clean-code and add params' explanation

2024-06-25 10:03:12 +08:00 · 2024-06-25 10:03:12 +08:00 · 0bc8fede9e
commit 0bc8fede9e
parent 25a0c9af31
18 changed files with 64 additions and 427 deletions
--- a/README.md
+++ b/README.md
@ -25,9 +25,12 @@ Light Gaussian implementation: [This link](https://github.com/pablodawson/4DGaus
 ## News
 2024.6.25: we clean the code and add an explanation of the parameters.
 2024.3.25: Update guidance for hypernerf and dynerf dataset.
-2024.03.04: We change the hyperparameters of the Neu3D dataset, corresponding to our paper
+2024.03.04: We change the hyperparameters of the Neu3D dataset, corresponding to our paper.
 2024.02.28: Update SIBR viewer guidance.
@ -89,7 +92,7 @@ Meanwhile, [Plenoptic Dataset](https://github.com/facebookresearch/Neural_3D_Vid
 ```
 **For multipleviews scenes:**
-If you want to train your own dataset of multipleviews scenes,you can orginize your dataset as follows:
+If you want to train your own dataset of multipleviews scenes, you can orginize your dataset as follows:
 ```
 ├── data
@ -105,11 +108,11 @@ If you want to train your own dataset of multipleviews scenes,you can orginize y
 │     		  ├── ...
 │   	  | ...
 ```
-After that,you can use the  `multipleviewprogress.sh` we provided to generate related data of poses and pointcloud.You can use it as follows:
+After that, you can use the  `multipleviewprogress.sh` we provided to generate related data of poses and pointcloud.You can use it as follows:
 ```bash
 bash multipleviewprogress.sh (youe dataset name)
 ```
-You need to ensure that the data folder is orginized as follows after running multipleviewprogress.sh:
+You need to ensure that the data folder is organized as follows after running multipleviewprogress.sh:
 ```
 ├── data
 |   | multipleview
@ -181,7 +184,7 @@ You can customize your training config through the config files.
 ## Checkpoint
-Also, you can training your model with checkpoint.
+Also, you can train your model with checkpoint.
 ```python
 python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncingballs" --configs arguments/dnerf/bouncingballs.py --checkpoint_iterations 200 # change it.
@ -199,7 +202,7 @@ python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncin
 Run the following script to render the images.
 ```
-python render.py --model_path "output/dnerf/bouncingballs/"  --skip_train --configs arguments/dnerf/bouncingballs.py  &
+python render.py --model_path "output/dnerf/bouncingballs/"  --skip_train --configs arguments/dnerf/bouncingballs.py 
 ```
 ## Evaluation
@ -297,11 +300,13 @@ We would like to express our sincere gratitude to [@zhouzhenghong-gt](https://gi
 Some insights about neural voxel grids and dynamic scenes reconstruction originate from [TiNeuVox](https://github.com/hustvl/TiNeuVox). If you find this repository/work helpful in your research, welcome to cite these papers and give a ⭐.
 ```
-@article{wu20234dgaussians,
+@InProceedings{Wu_2024_CVPR,
-  title={4D Gaussian Splatting for Real-Time Dynamic Scene Rendering},
+    author    = {Wu, Guanjun and Yi, Taoran and Fang, Jiemin and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Tian, Qi and Wang, Xinggang},
-  author={Wu, Guanjun and Yi, Taoran and Fang, Jiemin and Xie, Lingxi and Zhang, Xiaopeng and Wei Wei and Liu, Wenyu and Tian, Qi and Wang Xinggang},
+    title     = {4D Gaussian Splatting for Real-Time Dynamic Scene Rendering},
-  journal={arXiv preprint arXiv:2310.08528},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
-  year={2023}
+    month     = {June},
    year      = {2024},
    pages     = {20310-20320}
 }
@inproceedings{TiNeuVox,
--- a/arguments/init.py
+++ b/arguments/init.py
@ -73,35 +73,35 @@ class PipelineParams(ParamGroup):
        super().__init__(parser, "Pipeline Parameters")
 class ModelHiddenParams(ParamGroup):
    def __init__(self, parser):
-        self.net_width = 64
+        self.net_width = 64 # width of deformation MLP, larger will increase the rendering quality and decrase the training/rendering speed.
-        self.timebase_pe = 4
+        self.timebase_pe = 4 # useless
-        self.defor_depth = 1
+        self.defor_depth = 1 # depth of deformation MLP, larger will increase the rendering quality and decrase the training/rendering speed.
-        self.posebase_pe = 10
+        self.posebase_pe = 10 # useless
-        self.scale_rotation_pe = 2
+        self.scale_rotation_pe = 2 # useless
-        self.opacity_pe = 2
+        self.opacity_pe = 2 # useless
-        self.timenet_width = 64
+        self.timenet_width = 64 # useless
-        self.timenet_output = 32
+        self.timenet_output = 32 # useless
-        self.bounds = 1.6
+        self.bounds = 1.6 
-        self.plane_tv_weight = 0.0001
+        self.plane_tv_weight = 0.0001 # TV loss of spatial grid
-        self.time_smoothness_weight = 0.01
+        self.time_smoothness_weight = 0.01 # TV loss of temporal grid
-        self.l1_time_planes = 0.0001
+        self.l1_time_planes = 0.0001  # TV loss of temporal grid
        self.kplanes_config = {
                             'grid_dimensions': 2,
                             'input_coordinate_dim': 4,
                             'output_coordinate_dim': 32,
-                             'resolution': [64, 64, 64, 25]
+                             'resolution': [64, 64, 64, 25]  # [64,64,64]: resolution of spatial grid. 25: resolution of temporal grid, better to be half length of dynamic frames
                            }
-        self.multires = [1, 2, 4, 8]
+        self.multires = [1, 2, 4, 8] # multi resolution of voxel grid
-        self.no_dx=False
+        self.no_dx=False # cancel the deformation of Gaussians' position
-        self.no_grid=False
+        self.no_grid=False # cancel the spatial-temporal hexplane.
-        self.no_ds=False
+        self.no_ds=False # cancel the deformation of Gaussians' scaling
-        self.no_dr=False
+        self.no_dr=False # cancel the deformation of Gaussians' rotations
-        self.no_do=True
+        self.no_do=True # cancel the deformation of Gaussians' opacity
-        self.no_dshs=True
+        self.no_dshs=True # cancel the deformation of SH colors.
-        self.empty_voxel=False
+        self.empty_voxel=False # useless
-        self.grid_pe=0
+        self.grid_pe=0 # useless, I was trying to add positional encoding to hexplane's features
-        self.static_mlp=False
+        self.static_mlp=False # useless
-        self.apply_rotation=False
+        self.apply_rotation=False # useless
        super().__init__(parser, "ModelHiddenParams")
--- a/merge_many_4dgs.py
+++ b/merge_many_4dgs.py
@ -161,6 +161,7 @@ def save_point_cloud(points, model_path, timestamp):
    pcd.points = o3d.utility.Vector3dVector(points)
    ply_path = os.path.join(output_path,f"points_{timestamp}.ply")
    o3d.io.write_point_cloud(ply_path, pcd)
 # This scripts can help you to merge many 4DGS.
 parser = ArgumentParser(description="Testing script parameters")
 model = ModelParams(parser, sentinel=True)
 pipeline = PipelineParams(parser)
@ -228,5 +229,3 @@ for index, viewpoint in tqdm(enumerate(scene1.getVideoCameras())):
    torchvision.utils.save_image(result["render"],os.path.join(render_path,f"output_image{index}.png"))
 imageio.mimwrite(os.path.join(render_path, 'video_rgb.mp4'), render_images, fps=30, codec='libx265') 
    # points = get_state_at_time(gaussians, viewpoint)
    # save_point_cloud(points, args.model_path, index)
--- a/render.py
+++ b/render.py
@ -23,7 +23,6 @@ from argparse import ArgumentParser
 from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams
 from gaussian_renderer import GaussianModel
 from time import time
 # import torch.multiprocessing as mp
 import threading
 import concurrent.futures
 def multithread_write(image_list, path):
@ -53,32 +52,24 @@ def render_set(model_path, name, iteration, views, gaussians, pipeline, backgrou
    render_images = []
    gt_list = []
    render_list = []
    # breakpoint()
    print("point nums:",gaussians._xyz.shape[0])
    for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
        if idx == 0:time1 = time()
        # breakpoint()
        rendering = render(view, gaussians, pipeline, background,cam_type=cam_type)["render"]
        # torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png"))
        render_images.append(to8b(rendering).transpose(1,2,0))
        # print(to8b(rendering).shape)
        render_list.append(rendering)
        if name in ["train", "test"]:
            if cam_type != "PanopticSports":
                gt = view.original_image[0:3, :, :]
            else:
                gt  = view['image'].cuda()
            # torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png"))
            gt_list.append(gt)
-        # if idx >= 10:
+
            # break
    time2=time()
    print("FPS:",(len(views)-1)/(time2-time1))
    # print("writing training images.")
    multithread_write(gt_list, gts_path)
    # print("writing rendering images.")
    multithread_write(render_list, render_path)
--- a/scene/dataset_readers.py
+++ b/scene/dataset_readers.py
@ -307,7 +307,6 @@ def read_timeline(path):
    timestamp_mapper = {}
    max_time_float = max(time_line)
    for index, time in enumerate(time_line):
        # timestamp_mapper[time] = index
        timestamp_mapper[time] = time/max_time_float
    return timestamp_mapper, max_time_float
@ -548,18 +547,12 @@ def readPanopticmeta(datadir, json_path):
        cam_ids = test_meta['cam_id'][index]
        time = index / len(test_meta['fn'])
        # breakpoint()
        for focal, w2c, fn, cam in zip(focals, w2cs, fns, cam_ids):
            image_path = os.path.join(datadir,"ims")
            image_name=fn
            # breakpoint()
            image = Image.open(os.path.join(datadir,"ims",fn))
            im_data = np.array(image.convert("RGBA"))
            # breakpoint()
            im_data = PILtoTorch(im_data,None)[:3,:,:]
            # breakpoint()
            # print(w2c,focal,image_name)
            camera = setup_camera(w, h, focal, w2c)
            cam_infos.append({
                "camera":camera,
@ -568,7 +561,6 @@ def readPanopticmeta(datadir, json_path):
    cam_centers = np.linalg.inv(test_meta['w2c'][0])[:, :3, 3]  # Get scene radius
    scene_radius = 1.1 * np.max(np.linalg.norm(cam_centers - np.mean(cam_centers, 0)[None], axis=-1))
    # breakpoint()
    return cam_infos, max_time, scene_radius 
 def readPanopticSportsinfos(datadir):
--- a/scene/gaussian_model.py
+++ b/scene/gaussian_model.py
@ -22,7 +22,6 @@ from utils.sh_utils import RGB2SH
 from simple_knn._C import distCUDA2
 from utils.graphics_utils import BasicPointCloud
 from utils.general_utils import strip_symmetric, build_scaling_rotation
 # from utils.point_utils import addpoint, combine_pointcloud, downsample_point_cloud_open3d, find_indices_in_A
 from scene.deformation import deform_network
 from scene.regulation import compute_plane_smoothness
 class GaussianModel:
@ -49,9 +48,7 @@ class GaussianModel:
        self.active_sh_degree = 0
        self.max_sh_degree = sh_degree  
        self._xyz = torch.empty(0)
        # self._deformation =  torch.empty(0)
        self._deformation = deform_network(args)
        # self.grid = TriPlaneGrid()
        self._features_dc = torch.empty(0)
        self._features_rest = torch.empty(0)
        self._scaling = torch.empty(0)
@ -232,9 +229,7 @@ class GaussianModel:
        deform = self._deformation[:,:,:time].sum(dim=-1)
        xyz = self._xyz + deform
        return xyz
-    # def save_ply_dynamic(path):
+
    #     for time in range(self._deformation.shape(-1)):
    #         xyz = self.compute_deformation(time)
    def load_model(self, path):
        print("loading model from exists{}".format(path))
        weight_dict = torch.load(os.path.join(path,"deformation.pth"),map_location="cuda")
@ -448,49 +443,18 @@ class GaussianModel:
    def densify_and_clone(self, grads, grad_threshold, scene_extent, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None):
        grads_accum_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False)
-        # 主动增加稀疏点云
+
        # if not hasattr(self,"voxel_size"):
        #     self.voxel_size = 8  
        # if not hasattr(self,"density_threshold"):
        #     self.density_threshold = density_threshold
        # if not hasattr(self,"displacement_scale"):
        #     self.displacement_scale = displacement_scale
        # point_cloud = self.get_xyz.detach().cpu()
        # sparse_point_mask = self.downsample_point(point_cloud)
        # _, low_density_points, new_points, low_density_index = addpoint(point_cloud[sparse_point_mask],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0)
        # sparse_point_mask = sparse_point_mask.to(grads_accum_mask)
        # low_density_index = low_density_index.to(grads_accum_mask)
        # if new_points.shape[0] < 100 :
        #     self.density_threshold /= 2
        #     self.displacement_scale /= 2
        #     print("reduce diplacement_scale to: ",self.displacement_scale)
        # global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(grads_accum_mask)
        # global_mask[sparse_point_mask] = low_density_index
        # selected_pts_mask_grow = torch.logical_and(global_mask, grads_accum_mask)
        # print("降采样点云:",sparse_point_mask.sum(),"选中的稀疏点云：",global_mask.sum(),"梯度累计点云：",grads_accum_mask.sum(),"选中增长点云：",selected_pts_mask_grow.sum())
        # Extract points that satisfy the gradient condition
        selected_pts_mask = torch.logical_and(grads_accum_mask,
                                              torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent)
        # breakpoint()        
        new_xyz = self._xyz[selected_pts_mask] 
        # - 0.001 * self._xyz.grad[selected_pts_mask]
        new_features_dc = self._features_dc[selected_pts_mask]
        new_features_rest = self._features_rest[selected_pts_mask]
        new_opacities = self._opacity[selected_pts_mask]
        new_scaling = self._scaling[selected_pts_mask]
        new_rotation = self._rotation[selected_pts_mask]
        new_deformation_table = self._deformation_table[selected_pts_mask]
        # if opt.add_point:
        # selected_xyz, grow_xyz = self.add_point_by_mask(selected_pts_mask_grow.to(self.get_xyz.device), self.displacement_scale)
        self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table)
-        # print("被动增加点云：",selected_xyz.shape[0])
+
        # print("主动增加点云：",selected_pts_mask.sum())
        # if model_path is not None and iteration is not None:
        #     point = combine_pointcloud(self.get_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy(), selected_xyz.detach().cpu().numpy())
        #     write_path = os.path.join(model_path,"add_point_cloud")
        #     os.makedirs(write_path,exist_ok=True)
        #     o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point)
        #     print("write output.")
    @property
    def get_aabb(self):
        return self._deformation.get_aabb
@ -505,23 +469,12 @@ class GaussianModel:
        mask_d = mask_c.all(dim=1)
        final_point = final_point[mask_d]
-        # while (mask_d.sum()/final_point.shape[0])<0.5:
+
        #     perturb/=2
        #     displacements = torch.randn(selected_point.shape[0], 3).to(selected_point) * perturb
        #     final_point = selected_point + displacements
        #     mask_a = final_point<xyz_max 
        #     mask_b = final_point>xyz_min
        #     mask_c = mask_a & mask_b
        #     mask_d = mask_c.all(dim=1)
        #     final_point = final_point[mask_d]
        return final_point, mask_d    
    def add_point_by_mask(self, selected_pts_mask, perturb=0):
        selected_xyz = self._xyz[selected_pts_mask] 
        new_xyz, mask = self.get_displayment(selected_xyz, self.get_xyz.detach(),perturb)
        # displacements = torch.randn(selected_xyz.shape[0], 3).to(self._xyz) * perturb
        # new_xyz = selected_xyz + displacements
        # - 0.001 * self._xyz.grad[selected_pts_mask]
        new_features_dc = self._features_dc[selected_pts_mask][mask]
        new_features_rest = self._features_rest[selected_pts_mask][mask]
        new_opacities = self._opacity[selected_pts_mask][mask]
@ -532,56 +485,7 @@ class GaussianModel:
        self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table)
        return selected_xyz, new_xyz
    def downsample_point(self, point_cloud):
        if not hasattr(self,"voxel_size"):
            self.voxel_size = 8  
        point_downsample = point_cloud
        flag = False 
        while point_downsample.shape[0]>1000:
            if flag:
                self.voxel_size+=8
            point_downsample = downsample_point_cloud_open3d(point_cloud,voxel_size=self.voxel_size)
            flag = True
        print("point size:",point_downsample.shape[0])
        # downsampled_point_mask = torch.eq(point_downsample.view(1,-1,3), point_cloud.view(-1,1,3)).all(dim=1)
        downsampled_point_index = find_indices_in_A(point_cloud, point_downsample)
        downsampled_point_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(point_downsample.device)
        downsampled_point_mask[downsampled_point_index]=True
        return downsampled_point_mask
    def grow(self, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None):
        if not hasattr(self,"voxel_size"):
            self.voxel_size = 8  
        if not hasattr(self,"density_threshold"):
            self.density_threshold = density_threshold
        if not hasattr(self,"displacement_scale"):
            self.displacement_scale = displacement_scale
        flag = False
        point_cloud = self.get_xyz.detach().cpu()
        point_downsample = point_cloud.detach()
        downsampled_point_index = self.downsample_point(point_downsample)
        _, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0)
        if new_points.shape[0] < 100 :
            self.density_threshold /= 2
            self.displacement_scale /= 2
            print("reduce diplacement_scale to: ",self.displacement_scale)
        elif new_points.shape[0] == 0:
            print("no point added")
            return
        global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool)
        global_mask[downsampled_point_index] = low_density_index
        global_mask
        selected_xyz, new_xyz = self.add_point_by_mask(global_mask.to(self.get_xyz.device), self.displacement_scale)
        print("point growing,add point num:",global_mask.sum())
        if model_path is not None and iteration is not None:
            point = combine_pointcloud(point_cloud, selected_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy())
            write_path = os.path.join(model_path,"add_point_cloud")
            os.makedirs(write_path,exist_ok=True)
            o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point)
        return
    def prune(self, max_grad, min_opacity, extent, max_screen_size):
        prune_mask = (self.get_opacity < min_opacity).squeeze()
--- a/scene/grid.py
+++ b/scene/grid.py
@ -17,10 +17,7 @@ class DenseGrid(nn.Module):
        super(DenseGrid, self).__init__()
        self.channels = channels
        self.world_size = world_size
-        # self.xyz_max = xyz_max
+
        # self.xyz_min = xyz_min
        # self.register_buffer('xyz_min', torch.Tensor(xyz_min))
        # self.register_buffer('xyz_max', torch.Tensor(xyz_max))
        self.grid = nn.Parameter(torch.ones([1, channels, *world_size]))
    def forward(self, xyz):
--- a/scene/hyper_loader.py
+++ b/scene/hyper_loader.py
@ -134,13 +134,9 @@ class Load_hyper_data(Dataset):
        if idx in self.map.keys():
            return self.map[idx]
        camera = self.all_cam_params[idx]
        # camera = self.video_path[idx]
        w = self.image_one.size[0]
        h = self.image_one.size[1]
        # image = PILtoTorch(image,None)
        # image = image.to(torch.float32)
        time = self.video_time[idx]
        # .astype(np.float32)
        R = camera.orientation.T
        T = - camera.position @ R
        FovY = focal2fov(camera.focal_length, self.h)
--- a/scripts/fliter_point.py
+++ b/scripts/fliter_point.py
@ -1,40 +0,0 @@
 import open3d as o3d
 import os
 # 指定根目录路径
 root_path = "data/dynerf/sear_steak/"
 # 文件名
 input_file = "points3D.ply"
 output_file = "points3d_filtered.ply"
 # 读取点云数据
 point_cloud_before = o3d.io.read_point_cloud(os.path.join(root_path, input_file))
 # 计算过滤前的点的数量
 num_points_before = len(point_cloud_before.points)
 # 计算过滤前的点云的边界框大小
 bbox_before = point_cloud_before.get_axis_aligned_bounding_box()
 bbox_size_before = bbox_before.get_max_bound() - bbox_before.get_min_bound()
 # 进行离群点滤波
 cl, ind = point_cloud_before.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
 # 创建一个新的点云对象，包含滤波后的点
 filtered_point_cloud = point_cloud_before.select_by_index(ind)
 # 保存滤波后的点云到新文件
 o3d.io.write_point_cloud(os.path.join(root_path, output_file), filtered_point_cloud)
 # 计算过滤后的点的数量
 num_points_after = len(filtered_point_cloud.points)
 # 计算边界框的大小
 bbox = filtered_point_cloud.get_axis_aligned_bounding_box()
 bbox_size = bbox.get_max_bound() - bbox.get_min_bound()
 print(f"过滤前的点数: {num_points_before}")
 print(f"过滤前的点云边界框大小: {bbox_size_before}")
 print(f"过滤后的点数: {num_points_after}")
 print(f"过滤后的点云边界框大小: {bbox_size}")
 print(f"离群点过滤完成，结果已保存到 {output_file}")
--- a/scripts/grow_point.py
+++ b/scripts/grow_point.py
@ -2,24 +2,14 @@ import open3d as o3d
 import numpy as np
 def grow_sparse_regions(input_file, output_file):
    # 读取输入的ply文件
    pcd = o3d.io.read_point_cloud(input_file)
    # 计算点云的密度
    densities = o3d.geometry.PointCloud.compute_nearest_neighbor_distance(pcd)
    avg_density = np.average(densities)
    print(f"Average density: {avg_density}")
-
+    sparse_indices = np.where(densities > avg_density * 1.2)[0] 
    # 找到稀疏部分
    sparse_indices = np.where(densities > avg_density * 1.2)[0]  # 这里我们假设稀疏部分的密度大于平均密度的1.2倍
    sparse_points = np.asarray(pcd.points)[sparse_indices]
    breakpoint()
    # 复制并增长稀疏部分
    # for _ in range(5):  # 这里我们假设每个稀疏点复制5次
        # pcd.points.extend(sparse_points)
-    # 将结果保存到输入的路径中
+
    o3d.io.write_point_cloud(output_file, pcd)
 # 使用函数
 grow_sparse_regions("data/hypernerf/vrig/chickchicken/dense_downsample.ply", "data/hypernerf/interp/chickchicken/dense_downsample.ply")
--- a/scripts/hypernerf2colmap.py
+++ b/scripts/hypernerf2colmap.py
@ -41,7 +41,6 @@ for jsonfile in tqdm(cameras):
 image_size = cams[0]['image_size']
 image = Image.open(os.path.join(image_dir,images[0]))
 size = image.size
 # breakpoint()
 object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w")
 object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w")
@ -50,19 +49,13 @@ cnt=0
 sizes=2
 while len(cams)//sizes > 200:
    sizes += 1
 # breakpoint()
 for cam, image in zip(cams, images):
    cnt+=1
    # print(image)
    # breakpoint()
    if cnt %  sizes != 0:
        continue
    # print("begin to write")
    R = np.array(cam['orientation']).T
    # breakpoint()
    T = -np.array(cam['position'])@R 
    # T = -np.matmul(R,T)
    T = [str(i) for i in T]
    qevc = [str(i) for i in rotmat2qvec(R.T)]
@ -73,7 +66,6 @@ for cam, image in zip(cams, images):
    shutil.copy(os.path.join(image_dir,image),os.path.join(imagecolmap_dir,image))
 print(idx)
 # write camera infomation.
 # print(1,"SIMPLE_PINHOLE",image_size[0],image_size[1],focal[0],image_sizep0/2,image_size[1]/2,file=object_cameras_file)
 object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w")
 object_cameras_file.close()
--- a/scripts/llff2colmap.py
+++ b/scripts/llff2colmap.py
@ -98,17 +98,6 @@ H, W, focal = poses[0, :, -1]
 focal = focal/2
 focal = [focal, focal]
 poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1)
 # poses, _ = center_poses(
 #     poses, blender2opencv
 # )  # Re-center poses so that the average is near the center.
 # near_original = near_fars.min()
 # scale_factor = near_original * 0.75
 # near_fars /= (
 #     scale_factor  # rescale nearest plane so that it is at z = 4/3.
 # )
 # poses[..., 3] /= scale_factor
 # Sample N_views poses for validation - NeRF-like camera trajectory.
 # val_poses = directions
 videos = glob.glob(os.path.join(root_dir, "cam[0-9][0-9]"))
 videos = sorted(videos)
 image_paths = []
@ -132,8 +121,6 @@ for index, image in enumerate(image_paths):
    shutil.copy(image,goal_path)
 print(poses)
 # breakpoint()
 # write image information.
 object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w")
 for idx, pose in enumerate(poses):
@ -147,15 +134,12 @@ for idx, pose in enumerate(poses):
    R = np.linalg.inv(R)
    T = -np.matmul(R,T)
    T = [str(i) for i in T]
    # T = ["%.3f"%i for i in pose[:3,3]]
    qevc = [str(i) for i in rotmat2qvec(R)]
    # breakpoint()
    print(idx+1," ".join(qevc)," ".join(T),1,image_name_list[idx],"\n",file=object_images_file)
 # breakpoint()
 # write camera infomation.
 object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w")
-print(1,"SIMPLE_PINHOLE",1352,1014,focal[0],1352/2,1014/2,file=object_cameras_file)
+print(1,"SIMPLE_PINHOLE",1352,1014,focal[0],1352/2,1014/2,file=object_cameras_file) # 
 object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w")
 object_cameras_file.close()
--- a/scripts/merge_point.py
+++ b/scripts/merge_point.py
@ -2,22 +2,15 @@ import open3d as o3d
 import os
 from tqdm import tqdm
 def merge_point_clouds(directory, output_file):
    # 初始化一个空的点云
    merged_pcd = o3d.geometry.PointCloud()
    # 遍历文件夹下的所有文件
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.ply'):
            # 读取点云文件
            pcd = o3d.io.read_point_cloud(os.path.join(directory, filename))
            # 将点云合并
            merged_pcd += pcd
    # 移除位置相同的点
    merged_pcd = merged_pcd.remove_duplicate_points()
    # 将合并后的点云输出到一个文件中
    o3d.io.write_point_cloud(output_file, merged_pcd)
 # 使用函数
 merge_point_clouds("point_clouds_directory", "merged.ply")
--- a/test.py
+++ b/test.py
@ -1,39 +0,0 @@
 import cv2
 import os
 import re
 def sorted_alphanumeric(data):
    """
    对给定的数据进行字母数字排序（考虑数字的数值大小）
    """
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(data, key=alphanum_key)
 def create_video_from_images(folder_path, output_file, frame_rate=30, img_size=None):
    images = [img for img in os.listdir(folder_path) if img.endswith(".jpg") or img.endswith(".png")]
    images = sorted_alphanumeric(images)  # 使用自定义的排序函数
    # 获取第一张图片的尺寸
    frame = cv2.imread(os.path.join(folder_path, images[0]))
    height, width, layers = frame.shape
    # 如果指定了img_size，则调整尺寸
    if img_size is not None:
        width, height = img_size
    # 定义视频编码和创建VideoWriter对象
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 可以更改为其他编码器
    video = cv2.VideoWriter(output_file, fourcc, frame_rate, (width, height))
    for image in images:
        img = cv2.imread(os.path.join(folder_path, image))
        if img_size is not None:
            img = cv2.resize(img, img_size)
        video.write(img)
    cv2.destroyAllWindows()
    video.release()
 # 使用示例
 folder_path = 'output/editing_render'  # 替换为您的图片文件夹路径
 output_file = 'output_video.mp4'  # 输出视频文件名
 create_video_from_images(folder_path, output_file)
--- a/utils/point_utils.py
+++ b/utils/point_utils.py
@ -7,60 +7,34 @@ import open3d as o3d
 import numpy as np
 from torch_cluster import grid_cluster
 def voxel_down_sample_custom(points, voxel_size):
    # 将点云归一化到体素网格
    voxel_grid = torch.floor(points / voxel_size)
    # 找到唯一的体素，并获取它们在原始体素网格中的索引
    unique_voxels, inverse_indices = torch.unique(voxel_grid, dim=0, return_inverse=True)
    # 创建一个新的点云，其中每个点是其对应体素中所有点的平均值
    new_points = torch.zeros_like(unique_voxels)
    new_points_count = torch.zeros(unique_voxels.size(0), dtype=torch.long)
    # for i in tqdm(range(points.size(0))):
    new_points[inverse_indices] = points
-        # new_points_count[inverse_indices[i]] += 1
+
    # new_points /= new_points_count.unsqueeze(-1)
    return new_points, inverse_indices
 def downsample_point_cloud(points, ratio):
    # 创建一个TensorDataset
    dataset = TensorDataset(points)
    # 计算下采样后的点的数量
    num_points = len(dataset)
    num_downsampled_points = int(num_points * ratio)
    # 使用random_split进行下采样
    downsampled_dataset, _ = random_split(dataset, [num_downsampled_points, num_points - num_downsampled_points])
    # 获取下采样后的点的index和点云矩阵
    indices = torch.tensor([i for i, _ in enumerate(downsampled_dataset)])
    downsampled_points = torch.stack([x for x, in downsampled_dataset])
    return indices, downsampled_points
 def downsample_point_cloud_open3d(points, voxel_size):
    # 创建一个点云对象
    downsampled_pcd, inverse_indices = voxel_down_sample_custom(points, voxel_size)
    downsampled_points = downsampled_pcd
    # 获取下采样后的点云矩阵
    return torch.tensor(downsampled_points)
 def downsample_point_cloud_cluster(points, voxel_size):
    # 创建一个点云对象
    cluster = grid_cluster(points, size=torch.tensor([1,1,1]))
    # 获取下采样后的点云矩阵
    # downsampled_points = np.asarray(downsampled_pcd.points)
    return cluster, points
 import torch
 from sklearn.neighbors import NearestNeighbors
 def upsample_point_cloud(points, density_threshold, displacement_scale, iter_pass):
    # 计算每个点的密度
    # breakpoint()
    try:
        nbrs = NearestNeighbors(n_neighbors=2+iter_pass, algorithm='ball_tree').fit(points)
        distances, indices = nbrs.kneighbors(points)
@ -68,88 +42,52 @@ def upsample_point_cloud(points, density_threshold, displacement_scale, iter_pas
        print("no point added")
        return points, torch.tensor([]), torch.tensor([]), torch.zeros((points.shape[0]), dtype=torch.bool)  
    # 找出密度低的点
    low_density_points = points[distances[:,1] > density_threshold]
    low_density_index = distances[:,1] > density_threshold
    low_density_index = torch.from_numpy(low_density_index)
    # 复制这些点并添加随机位移
    num_points = low_density_points.shape[0]
    displacements = torch.randn(num_points, 3) * displacement_scale
    new_points = low_density_points + displacements
    # 返回新的点云矩阵
    return points, low_density_points, new_points, low_density_index
 def visualize_point_cloud(points, low_density_points, new_points):
    # 创建一个点云对象
    pcd = o3d.geometry.PointCloud()
    # 给被选中的点云添加一个小的偏移量
    low_density_points += 0.01
    # 将所有的点合并到一起
    all_points = np.concatenate([points, low_density_points, new_points], axis=0)
    pcd.points = o3d.utility.Vector3dVector(all_points)
    # 创建颜色数组
    colors = np.zeros((all_points.shape[0], 3))
-    colors[:points.shape[0]] = [0, 0, 0]  # 黑色表示初始化的点云
+    colors[:points.shape[0]] = [0, 0, 0] 
-    colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0]  # 红色表示被选中的点云
+    colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] 
-    colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0]  # 绿色表示增长的点云
+    colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] 
    pcd.colors = o3d.utility.Vector3dVector(colors)
    # 显示点云
    o3d.visualization.draw_geometries([pcd])
 def combine_pointcloud(points, low_density_points, new_points):
    pcd = o3d.geometry.PointCloud()
    # 给被选中的点云添加一个小的偏移量
    low_density_points += 0.01
    new_points -= 0.01
    # 将所有的点合并到一起
    all_points = np.concatenate([points, low_density_points, new_points], axis=0)
    pcd.points = o3d.utility.Vector3dVector(all_points)
    # 创建颜色数组
    colors = np.zeros((all_points.shape[0], 3))
-    colors[:points.shape[0]] = [0, 0, 0]  # 黑色表示初始化的点云
+    colors[:points.shape[0]] = [0, 0, 0]  
-    colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0]  # 红色表示被选中的点云
+    colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0]  
-    colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0]  # 绿色表示增长的点云
+    colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0]  
    pcd.colors = o3d.utility.Vector3dVector(colors)
    return pcd
 def addpoint(point_cloud,density_threshold,displacement_scale, iter_pass,):
    # density_threshold: 密度的阈值，越大能筛选出越稀疏的点。
    # displacement_scale: 在以displacement_scale的圆心内随机生成点
    points, low_density_points, new_points, low_density_index = upsample_point_cloud(point_cloud,density_threshold,displacement_scale, iter_pass)
    # breakpoint()
    # breakpoint()
    print("low_density_points",low_density_points.shape[0])
    return point_cloud, low_density_points, new_points, low_density_index
 def find_point_indices(origin_point, goal_point):
    indices = torch.nonzero((origin_point[:, None] == goal_point).all(-1), as_tuple=True)[0]
    return indices
 def find_indices_in_A(A, B):
    """
    找出子集矩阵 B 中每个点在点云矩阵 A 中的索引 u。
    参数:
    A (torch.Tensor): 点云矩阵 A，大小为 [N, 3]。
    B (torch.Tensor): 子集矩阵 B，大小为 [M, 3]。
    返回:
    torch.Tensor: 包含 B 中每个点在 A 中的索引 u 的张量，形状为 (M,)。
    """
    is_equal = torch.eq(B.view(1, -1, 3), A.view(-1, 1, 3))
    u_indices = torch.nonzero(is_equal, as_tuple=False)[:, 0]
    return torch.unique(u_indices)
 if __name__ =="__main__":
    # 
    from time import time
    pass_=0
    # filename=f"pointcloud/pass_{pass_}.ply"
    filename = "point_cloud.ply"
    pcd = o3d.io.read_point_cloud(filename)
    point_cloud = torch.tensor(pcd.points)
@ -170,7 +108,6 @@ if __name__ =="__main__":
            flag = True
        print("point size:",point_downsample.shape[0])
        # downsampled_point_index = find_point_indices(point_cloud, point_downsample)
        downsampled_point_index = find_indices_in_A(point_cloud, point_downsample)
        print("selected_num",point_cloud[downsampled_point_index].shape[0])
        _, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=density_threshold,displacement_scale=displacement_scale,iter_pass=0)
@ -188,9 +125,7 @@ if __name__ =="__main__":
        if low_density_points.shape[0] == 0:
            print("no more points.")
            continue
        # breakpoint()
        point = combine_pointcloud(point_cloud, low_density_points, new_points)
        point_cloud = torch.tensor(point.points)
        o3d.io.write_point_cloud(f"pointcloud/pass_{i}.ply",point)
        # visualize_qpoint_cloud( point_cloud, low_density_points, new_points)
--- a/utils/pose_utils.py
+++ b/utils/pose_utils.py
@ -3,15 +3,12 @@ from scipy.spatial.transform import Rotation as R
 from scene.utils import Camera
 from copy import deepcopy
 def rotation_matrix_to_quaternion(rotation_matrix):
    """将旋转矩阵转换为四元数"""
    return R.from_matrix(rotation_matrix).as_quat()
 def quaternion_to_rotation_matrix(quat):
    """将四元数转换为旋转矩阵"""
    return R.from_quat(quat).as_matrix()
 def quaternion_slerp(q1, q2, t):
    """在两个四元数之间进行球面线性插值（SLERP）"""
    # 计算两个四元数之间的点积
    dot = np.dot(q1, q2)
@ -32,13 +29,10 @@ def quaternion_slerp(q1, q2, t):
    return np.cos(theta) * q1 + np.sin(theta) * q3
 def bezier_interpolation(p1, p2, t):
    """在两点之间使用贝塞尔曲线进行插值"""
    return (1 - t) * p1 + t * p2
 def linear_interpolation(v1, v2, t):
    """线性插值"""
    return (1 - t) * v1 + t * v2
 def smooth_camera_poses(cameras, num_interpolations=5):
    """对一系列相机位姿进行平滑处理，通过在每对位姿之间插入额外的位姿"""
    smoothed_cameras = []
    smoothed_times = []
    total_poses = len(cameras) - 1 + (len(cameras) - 1) * num_interpolations
@ -48,44 +42,26 @@ def smooth_camera_poses(cameras, num_interpolations=5):
        cam1 = cameras[i]
        cam2 = cameras[i + 1]
        # 将旋转矩阵转换为四元数
        quat1 = rotation_matrix_to_quaternion(cam1.orientation)
        quat2 = rotation_matrix_to_quaternion(cam2.orientation)
        for j in range(num_interpolations + 1):
            t = j / (num_interpolations + 1)
            # 插值方向
            interp_orientation_quat = quaternion_slerp(quat1, quat2, t)
            interp_orientation_matrix = quaternion_to_rotation_matrix(interp_orientation_quat)
            # 插值位置
            interp_position = linear_interpolation(cam1.position, cam2.position, t)
            # 计算插值时间戳
            interp_time = i*10 / (len(cameras) - 1) + time_increment * j
            # 添加新的相机位姿和时间戳
            newcam = deepcopy(cam1)
            newcam.orientation = interp_orientation_matrix
            newcam.position = interp_position
            smoothed_cameras.append(newcam)
            smoothed_times.append(interp_time)
    # 添加最后一个原始位姿和时间戳
    smoothed_cameras.append(cameras[-1])
    smoothed_times.append(1.0)
    print(smoothed_times)
    return smoothed_cameras, smoothed_times
 # # 示例：使用两个相机位姿
 # cam1 = Camera(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), np.array([0, 0, 0]))
 # cam2 = Camera(np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]), np.array([1, 1, 1]))
 # # 应用平滑处理
 # smoothed_cameras = smooth_camera_poses([cam1, cam2], num_interpolations=5)
 # # 打印结果
 # for cam in smoothed_cameras:
 #     print("Orientation:\n", cam.orientation)
 #     print("Position:", cam.position)
--- a/utils/render_utils.py
+++ b/utils/render_utils.py
@ -11,15 +11,8 @@ def get_state_at_time(pc,viewpoint_camera):
    scales = pc._scaling
    rotations = pc._rotation
    cov3D_precomp = None
        # time0 = get_time()
        # means3D_deform, scales_deform, rotations_deform, opacity_deform = pc._deformation(means3D[deformation_point], scales[deformation_point], 
        #                                                                  rotations[deformation_point], opacity[deformation_point],
        #                                                                  time[deformation_point])
    means3D_final, scales_final, rotations_final, opacity_final, shs_final = pc._deformation(means3D, scales, 
                                                                 rotations, opacity, shs,
                                                                 time)
-    # scales_final = pc.scaling_activation(scales_final)
+
    # rotations_final = pc.rotation_activation(rotations_final)
    # opacity = pc.opacity_activation(opacity_final)
    return means3D_final, scales_final, rotations_final, opacity, shs_final
--- a/utils/scene_utils.py
+++ b/utils/scene_utils.py
@ -25,26 +25,17 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg
            gt_np = viewpoint['image'].permute(1,2,0).cpu().numpy()
        else:
            gt_np = viewpoint.original_image.permute(1,2,0).cpu().numpy()
-        image_np = image.permute(1, 2, 0).cpu().numpy()  # 转换通道顺序为 (H, W, 3)
+        image_np = image.permute(1, 2, 0).cpu().numpy()  # (H, W, 3)
        depth_np = depth.permute(1, 2, 0).cpu().numpy()
        depth_np /= depth_np.max()
        depth_np = np.repeat(depth_np, 3, axis=2)
        image_np = np.concatenate((gt_np, image_np, depth_np), axis=1)
-        image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8'))  # 转换为8位图像
+        image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8'))  
        # 创建PIL图像对象的副本以绘制标签
        draw1 = ImageDraw.Draw(image_with_labels)
-
+        font = ImageFont.truetype('./utils/TIMES.TTF', size=40) 
-        # 选择字体和字体大小
+        text_color = (255, 0, 0)  
        font = ImageFont.truetype('./utils/TIMES.TTF', size=40)  # 请将路径替换为您选择的字体文件路径
        # 选择文本颜色
        text_color = (255, 0, 0)  # 白色
        # 选择标签的位置（左上角坐标）
        label1_position = (10, 10)
-        label2_position = (image_with_labels.width - 100 - len(label2) * 10, 10)  # 右上角坐标
+        label2_position = (image_with_labels.width - 100 - len(label2) * 10, 10) 
        # 在图像上添加标签
        draw1.text(label1_position, label1, fill=text_color, font=font)
        draw1.text(label2_position, label2, fill=text_color, font=font)
@ -58,42 +49,20 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg
        os.makedirs(point_cloud_path)
    if not os.path.exists(image_path):
        os.makedirs(image_path)
    # image:3,800,800
    # point_save_path = os.path.join(point_cloud_path,f"{iteration}.jpg")
    for idx in range(len(viewpoints)):
        image_save_path = os.path.join(image_path,f"{iteration}_{idx}.jpg")
        render(gaussians,viewpoints[idx],image_save_path,scaling = 1,cam_type=dataset_type)
    # render(gaussians,point_save_path,scaling = 0.1)
    # 保存带有标签的图像
    pc_mask = gaussians.get_opacity
    pc_mask = pc_mask > 0.1
-    xyz = gaussians.get_xyz.detach()[pc_mask.squeeze()].cpu().permute(1,0).numpy()
+
    # visualize_and_save_point_cloud(xyz, viewpoint.R, viewpoint.T, point_save_path)
    # 如果需要，您可以将PIL图像转换回PyTorch张量
    # return image
    # image_with_labels_tensor = torch.tensor(image_with_labels, dtype=torch.float32).permute(2, 0, 1) / 255.0
 def visualize_and_save_point_cloud(point_cloud, R, T, filename):
    # 创建3D散点图
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    R = R.T
    # 应用旋转和平移变换
    T = -R.dot(T)
    transformed_point_cloud = np.dot(R, point_cloud) + T.reshape(-1, 1)
    # pcd = o3d.geometry.PointCloud()
    # pcd.points = o3d.utility.Vector3dVector(transformed_point_cloud.T)  # 转置点云数据以匹配Open3D的格式
    # transformed_point_cloud[2,:] = -transformed_point_cloud[2,:]
    # 可视化点云
    ax.scatter(transformed_point_cloud[0], transformed_point_cloud[1], transformed_point_cloud[2], c='g', marker='o')
    ax.axis("off")
    # ax.set_xlabel('X Label')
    # ax.set_ylabel('Y Label')
    # ax.set_zlabel('Z Label')
    # 保存渲染结果为图片
    plt.savefig(filename)