From 0bc8fede9ec760beaeda93fa8b7948e0d1ed92da Mon Sep 17 00:00:00 2001 From: wugunjun <985091524@qq.com> Date: Tue, 25 Jun 2024 10:03:12 +0800 Subject: [PATCH] clean-code and add params' explanation --- README.md | 27 ++++++---- arguments/__init__.py | 48 ++++++++--------- merge_many_4dgs.py | 3 +- render.py | 11 +--- scene/dataset_readers.py | 8 --- scene/gaussian_model.py | 104 ++---------------------------------- scene/grid.py | 5 +- scene/hyper_loader.py | 4 -- scripts/fliter_point.py | 40 -------------- scripts/grow_point.py | 14 +---- scripts/hypernerf2colmap.py | 8 --- scripts/llff2colmap.py | 18 +------ scripts/merge_point.py | 7 --- test.py | 39 -------------- utils/point_utils.py | 79 +++------------------------ utils/pose_utils.py | 24 --------- utils/render_utils.py | 9 +--- utils/scene_utils.py | 43 +++------------ 18 files changed, 64 insertions(+), 427 deletions(-) delete mode 100644 scripts/fliter_point.py delete mode 100644 test.py diff --git a/README.md b/README.md index 4ede4e9..1199bfb 100644 --- a/README.md +++ b/README.md @@ -25,9 +25,12 @@ Light Gaussian implementation: [This link](https://github.com/pablodawson/4DGaus ## News + +2024.6.25: we clean the code and add an explanation of the parameters. + 2024.3.25: Update guidance for hypernerf and dynerf dataset. -2024.03.04: We change the hyperparameters of the Neu3D dataset, corresponding to our paper +2024.03.04: We change the hyperparameters of the Neu3D dataset, corresponding to our paper. 2024.02.28: Update SIBR viewer guidance. @@ -89,7 +92,7 @@ Meanwhile, [Plenoptic Dataset](https://github.com/facebookresearch/Neural_3D_Vid ``` **For multipleviews scenes:** -If you want to train your own dataset of multipleviews scenes,you can orginize your dataset as follows: +If you want to train your own dataset of multipleviews scenes, you can orginize your dataset as follows: ``` ├── data @@ -105,11 +108,11 @@ If you want to train your own dataset of multipleviews scenes,you can orginize y │ ├── ... │ | ... ``` -After that,you can use the `multipleviewprogress.sh` we provided to generate related data of poses and pointcloud.You can use it as follows: +After that, you can use the `multipleviewprogress.sh` we provided to generate related data of poses and pointcloud.You can use it as follows: ```bash bash multipleviewprogress.sh (youe dataset name) ``` -You need to ensure that the data folder is orginized as follows after running multipleviewprogress.sh: +You need to ensure that the data folder is organized as follows after running multipleviewprogress.sh: ``` ├── data | | multipleview @@ -181,7 +184,7 @@ You can customize your training config through the config files. ## Checkpoint -Also, you can training your model with checkpoint. +Also, you can train your model with checkpoint. ```python python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncingballs" --configs arguments/dnerf/bouncingballs.py --checkpoint_iterations 200 # change it. @@ -199,7 +202,7 @@ python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncin Run the following script to render the images. ``` -python render.py --model_path "output/dnerf/bouncingballs/" --skip_train --configs arguments/dnerf/bouncingballs.py & +python render.py --model_path "output/dnerf/bouncingballs/" --skip_train --configs arguments/dnerf/bouncingballs.py ``` ## Evaluation @@ -297,11 +300,13 @@ We would like to express our sincere gratitude to [@zhouzhenghong-gt](https://gi Some insights about neural voxel grids and dynamic scenes reconstruction originate from [TiNeuVox](https://github.com/hustvl/TiNeuVox). If you find this repository/work helpful in your research, welcome to cite these papers and give a ⭐. ``` -@article{wu20234dgaussians, - title={4D Gaussian Splatting for Real-Time Dynamic Scene Rendering}, - author={Wu, Guanjun and Yi, Taoran and Fang, Jiemin and Xie, Lingxi and Zhang, Xiaopeng and Wei Wei and Liu, Wenyu and Tian, Qi and Wang Xinggang}, - journal={arXiv preprint arXiv:2310.08528}, - year={2023} +@InProceedings{Wu_2024_CVPR, + author = {Wu, Guanjun and Yi, Taoran and Fang, Jiemin and Xie, Lingxi and Zhang, Xiaopeng and Wei, Wei and Liu, Wenyu and Tian, Qi and Wang, Xinggang}, + title = {4D Gaussian Splatting for Real-Time Dynamic Scene Rendering}, + booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + month = {June}, + year = {2024}, + pages = {20310-20320} } @inproceedings{TiNeuVox, diff --git a/arguments/__init__.py b/arguments/__init__.py index 9fa2857..7f2e8d6 100644 --- a/arguments/__init__.py +++ b/arguments/__init__.py @@ -73,35 +73,35 @@ class PipelineParams(ParamGroup): super().__init__(parser, "Pipeline Parameters") class ModelHiddenParams(ParamGroup): def __init__(self, parser): - self.net_width = 64 - self.timebase_pe = 4 - self.defor_depth = 1 - self.posebase_pe = 10 - self.scale_rotation_pe = 2 - self.opacity_pe = 2 - self.timenet_width = 64 - self.timenet_output = 32 - self.bounds = 1.6 - self.plane_tv_weight = 0.0001 - self.time_smoothness_weight = 0.01 - self.l1_time_planes = 0.0001 + self.net_width = 64 # width of deformation MLP, larger will increase the rendering quality and decrase the training/rendering speed. + self.timebase_pe = 4 # useless + self.defor_depth = 1 # depth of deformation MLP, larger will increase the rendering quality and decrase the training/rendering speed. + self.posebase_pe = 10 # useless + self.scale_rotation_pe = 2 # useless + self.opacity_pe = 2 # useless + self.timenet_width = 64 # useless + self.timenet_output = 32 # useless + self.bounds = 1.6 + self.plane_tv_weight = 0.0001 # TV loss of spatial grid + self.time_smoothness_weight = 0.01 # TV loss of temporal grid + self.l1_time_planes = 0.0001 # TV loss of temporal grid self.kplanes_config = { 'grid_dimensions': 2, 'input_coordinate_dim': 4, 'output_coordinate_dim': 32, - 'resolution': [64, 64, 64, 25] + 'resolution': [64, 64, 64, 25] # [64,64,64]: resolution of spatial grid. 25: resolution of temporal grid, better to be half length of dynamic frames } - self.multires = [1, 2, 4, 8] - self.no_dx=False - self.no_grid=False - self.no_ds=False - self.no_dr=False - self.no_do=True - self.no_dshs=True - self.empty_voxel=False - self.grid_pe=0 - self.static_mlp=False - self.apply_rotation=False + self.multires = [1, 2, 4, 8] # multi resolution of voxel grid + self.no_dx=False # cancel the deformation of Gaussians' position + self.no_grid=False # cancel the spatial-temporal hexplane. + self.no_ds=False # cancel the deformation of Gaussians' scaling + self.no_dr=False # cancel the deformation of Gaussians' rotations + self.no_do=True # cancel the deformation of Gaussians' opacity + self.no_dshs=True # cancel the deformation of SH colors. + self.empty_voxel=False # useless + self.grid_pe=0 # useless, I was trying to add positional encoding to hexplane's features + self.static_mlp=False # useless + self.apply_rotation=False # useless super().__init__(parser, "ModelHiddenParams") diff --git a/merge_many_4dgs.py b/merge_many_4dgs.py index 86d3629..3a81597 100644 --- a/merge_many_4dgs.py +++ b/merge_many_4dgs.py @@ -161,6 +161,7 @@ def save_point_cloud(points, model_path, timestamp): pcd.points = o3d.utility.Vector3dVector(points) ply_path = os.path.join(output_path,f"points_{timestamp}.ply") o3d.io.write_point_cloud(ply_path, pcd) +# This scripts can help you to merge many 4DGS. parser = ArgumentParser(description="Testing script parameters") model = ModelParams(parser, sentinel=True) pipeline = PipelineParams(parser) @@ -228,5 +229,3 @@ for index, viewpoint in tqdm(enumerate(scene1.getVideoCameras())): torchvision.utils.save_image(result["render"],os.path.join(render_path,f"output_image{index}.png")) imageio.mimwrite(os.path.join(render_path, 'video_rgb.mp4'), render_images, fps=30, codec='libx265') - # points = get_state_at_time(gaussians, viewpoint) - # save_point_cloud(points, args.model_path, index) \ No newline at end of file diff --git a/render.py b/render.py index 45c7da8..35a4db2 100644 --- a/render.py +++ b/render.py @@ -23,7 +23,6 @@ from argparse import ArgumentParser from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams from gaussian_renderer import GaussianModel from time import time -# import torch.multiprocessing as mp import threading import concurrent.futures def multithread_write(image_list, path): @@ -53,32 +52,24 @@ def render_set(model_path, name, iteration, views, gaussians, pipeline, backgrou render_images = [] gt_list = [] render_list = [] - # breakpoint() print("point nums:",gaussians._xyz.shape[0]) for idx, view in enumerate(tqdm(views, desc="Rendering progress")): if idx == 0:time1 = time() - # breakpoint() rendering = render(view, gaussians, pipeline, background,cam_type=cam_type)["render"] - # torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png")) render_images.append(to8b(rendering).transpose(1,2,0)) - # print(to8b(rendering).shape) render_list.append(rendering) if name in ["train", "test"]: if cam_type != "PanopticSports": gt = view.original_image[0:3, :, :] else: gt = view['image'].cuda() - # torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png")) gt_list.append(gt) - # if idx >= 10: - # break + time2=time() print("FPS:",(len(views)-1)/(time2-time1)) - # print("writing training images.") multithread_write(gt_list, gts_path) - # print("writing rendering images.") multithread_write(render_list, render_path) diff --git a/scene/dataset_readers.py b/scene/dataset_readers.py index c392f76..8a6cc42 100644 --- a/scene/dataset_readers.py +++ b/scene/dataset_readers.py @@ -307,7 +307,6 @@ def read_timeline(path): timestamp_mapper = {} max_time_float = max(time_line) for index, time in enumerate(time_line): - # timestamp_mapper[time] = index timestamp_mapper[time] = time/max_time_float return timestamp_mapper, max_time_float @@ -548,18 +547,12 @@ def readPanopticmeta(datadir, json_path): cam_ids = test_meta['cam_id'][index] time = index / len(test_meta['fn']) - # breakpoint() for focal, w2c, fn, cam in zip(focals, w2cs, fns, cam_ids): image_path = os.path.join(datadir,"ims") image_name=fn - - # breakpoint() image = Image.open(os.path.join(datadir,"ims",fn)) im_data = np.array(image.convert("RGBA")) - # breakpoint() im_data = PILtoTorch(im_data,None)[:3,:,:] - # breakpoint() - # print(w2c,focal,image_name) camera = setup_camera(w, h, focal, w2c) cam_infos.append({ "camera":camera, @@ -568,7 +561,6 @@ def readPanopticmeta(datadir, json_path): cam_centers = np.linalg.inv(test_meta['w2c'][0])[:, :3, 3] # Get scene radius scene_radius = 1.1 * np.max(np.linalg.norm(cam_centers - np.mean(cam_centers, 0)[None], axis=-1)) - # breakpoint() return cam_infos, max_time, scene_radius def readPanopticSportsinfos(datadir): diff --git a/scene/gaussian_model.py b/scene/gaussian_model.py index 5225c74..918c5e5 100644 --- a/scene/gaussian_model.py +++ b/scene/gaussian_model.py @@ -22,7 +22,6 @@ from utils.sh_utils import RGB2SH from simple_knn._C import distCUDA2 from utils.graphics_utils import BasicPointCloud from utils.general_utils import strip_symmetric, build_scaling_rotation -# from utils.point_utils import addpoint, combine_pointcloud, downsample_point_cloud_open3d, find_indices_in_A from scene.deformation import deform_network from scene.regulation import compute_plane_smoothness class GaussianModel: @@ -49,9 +48,7 @@ class GaussianModel: self.active_sh_degree = 0 self.max_sh_degree = sh_degree self._xyz = torch.empty(0) - # self._deformation = torch.empty(0) self._deformation = deform_network(args) - # self.grid = TriPlaneGrid() self._features_dc = torch.empty(0) self._features_rest = torch.empty(0) self._scaling = torch.empty(0) @@ -232,9 +229,7 @@ class GaussianModel: deform = self._deformation[:,:,:time].sum(dim=-1) xyz = self._xyz + deform return xyz - # def save_ply_dynamic(path): - # for time in range(self._deformation.shape(-1)): - # xyz = self.compute_deformation(time) + def load_model(self, path): print("loading model from exists{}".format(path)) weight_dict = torch.load(os.path.join(path,"deformation.pth"),map_location="cuda") @@ -448,49 +443,18 @@ class GaussianModel: def densify_and_clone(self, grads, grad_threshold, scene_extent, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None): grads_accum_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False) - # 主动增加稀疏点云 - # if not hasattr(self,"voxel_size"): - # self.voxel_size = 8 - # if not hasattr(self,"density_threshold"): - # self.density_threshold = density_threshold - # if not hasattr(self,"displacement_scale"): - # self.displacement_scale = displacement_scale - # point_cloud = self.get_xyz.detach().cpu() - # sparse_point_mask = self.downsample_point(point_cloud) - # _, low_density_points, new_points, low_density_index = addpoint(point_cloud[sparse_point_mask],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0) - # sparse_point_mask = sparse_point_mask.to(grads_accum_mask) - # low_density_index = low_density_index.to(grads_accum_mask) - # if new_points.shape[0] < 100 : - # self.density_threshold /= 2 - # self.displacement_scale /= 2 - # print("reduce diplacement_scale to: ",self.displacement_scale) - # global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(grads_accum_mask) - # global_mask[sparse_point_mask] = low_density_index - # selected_pts_mask_grow = torch.logical_and(global_mask, grads_accum_mask) - # print("降采样点云:",sparse_point_mask.sum(),"选中的稀疏点云:",global_mask.sum(),"梯度累计点云:",grads_accum_mask.sum(),"选中增长点云:",selected_pts_mask_grow.sum()) - # Extract points that satisfy the gradient condition + selected_pts_mask = torch.logical_and(grads_accum_mask, torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent) - # breakpoint() new_xyz = self._xyz[selected_pts_mask] - # - 0.001 * self._xyz.grad[selected_pts_mask] new_features_dc = self._features_dc[selected_pts_mask] new_features_rest = self._features_rest[selected_pts_mask] new_opacities = self._opacity[selected_pts_mask] new_scaling = self._scaling[selected_pts_mask] new_rotation = self._rotation[selected_pts_mask] new_deformation_table = self._deformation_table[selected_pts_mask] - # if opt.add_point: - # selected_xyz, grow_xyz = self.add_point_by_mask(selected_pts_mask_grow.to(self.get_xyz.device), self.displacement_scale) self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table) - # print("被动增加点云:",selected_xyz.shape[0]) - # print("主动增加点云:",selected_pts_mask.sum()) - # if model_path is not None and iteration is not None: - # point = combine_pointcloud(self.get_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy(), selected_xyz.detach().cpu().numpy()) - # write_path = os.path.join(model_path,"add_point_cloud") - # os.makedirs(write_path,exist_ok=True) - # o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point) - # print("write output.") + @property def get_aabb(self): return self._deformation.get_aabb @@ -505,23 +469,12 @@ class GaussianModel: mask_d = mask_c.all(dim=1) final_point = final_point[mask_d] - # while (mask_d.sum()/final_point.shape[0])<0.5: - # perturb/=2 - # displacements = torch.randn(selected_point.shape[0], 3).to(selected_point) * perturb - # final_point = selected_point + displacements - # mask_a = final_pointxyz_min - # mask_c = mask_a & mask_b - # mask_d = mask_c.all(dim=1) - # final_point = final_point[mask_d] + return final_point, mask_d def add_point_by_mask(self, selected_pts_mask, perturb=0): selected_xyz = self._xyz[selected_pts_mask] new_xyz, mask = self.get_displayment(selected_xyz, self.get_xyz.detach(),perturb) - # displacements = torch.randn(selected_xyz.shape[0], 3).to(self._xyz) * perturb - # new_xyz = selected_xyz + displacements - # - 0.001 * self._xyz.grad[selected_pts_mask] new_features_dc = self._features_dc[selected_pts_mask][mask] new_features_rest = self._features_rest[selected_pts_mask][mask] new_opacities = self._opacity[selected_pts_mask][mask] @@ -532,56 +485,7 @@ class GaussianModel: self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table) return selected_xyz, new_xyz - def downsample_point(self, point_cloud): - if not hasattr(self,"voxel_size"): - self.voxel_size = 8 - point_downsample = point_cloud - flag = False - while point_downsample.shape[0]>1000: - if flag: - self.voxel_size+=8 - point_downsample = downsample_point_cloud_open3d(point_cloud,voxel_size=self.voxel_size) - flag = True - print("point size:",point_downsample.shape[0]) - # downsampled_point_mask = torch.eq(point_downsample.view(1,-1,3), point_cloud.view(-1,1,3)).all(dim=1) - downsampled_point_index = find_indices_in_A(point_cloud, point_downsample) - downsampled_point_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(point_downsample.device) - downsampled_point_mask[downsampled_point_index]=True - return downsampled_point_mask - def grow(self, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None): - if not hasattr(self,"voxel_size"): - self.voxel_size = 8 - if not hasattr(self,"density_threshold"): - self.density_threshold = density_threshold - if not hasattr(self,"displacement_scale"): - self.displacement_scale = displacement_scale - flag = False - point_cloud = self.get_xyz.detach().cpu() - point_downsample = point_cloud.detach() - downsampled_point_index = self.downsample_point(point_downsample) - - _, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0) - if new_points.shape[0] < 100 : - self.density_threshold /= 2 - self.displacement_scale /= 2 - print("reduce diplacement_scale to: ",self.displacement_scale) - - elif new_points.shape[0] == 0: - print("no point added") - return - global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool) - - global_mask[downsampled_point_index] = low_density_index - global_mask - selected_xyz, new_xyz = self.add_point_by_mask(global_mask.to(self.get_xyz.device), self.displacement_scale) - print("point growing,add point num:",global_mask.sum()) - if model_path is not None and iteration is not None: - point = combine_pointcloud(point_cloud, selected_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy()) - write_path = os.path.join(model_path,"add_point_cloud") - os.makedirs(write_path,exist_ok=True) - o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point) - return def prune(self, max_grad, min_opacity, extent, max_screen_size): prune_mask = (self.get_opacity < min_opacity).squeeze() diff --git a/scene/grid.py b/scene/grid.py index ba2622c..7d08038 100644 --- a/scene/grid.py +++ b/scene/grid.py @@ -17,10 +17,7 @@ class DenseGrid(nn.Module): super(DenseGrid, self).__init__() self.channels = channels self.world_size = world_size - # self.xyz_max = xyz_max - # self.xyz_min = xyz_min - # self.register_buffer('xyz_min', torch.Tensor(xyz_min)) - # self.register_buffer('xyz_max', torch.Tensor(xyz_max)) + self.grid = nn.Parameter(torch.ones([1, channels, *world_size])) def forward(self, xyz): diff --git a/scene/hyper_loader.py b/scene/hyper_loader.py index 7865e98..cd3b00d 100644 --- a/scene/hyper_loader.py +++ b/scene/hyper_loader.py @@ -134,13 +134,9 @@ class Load_hyper_data(Dataset): if idx in self.map.keys(): return self.map[idx] camera = self.all_cam_params[idx] - # camera = self.video_path[idx] w = self.image_one.size[0] h = self.image_one.size[1] - # image = PILtoTorch(image,None) - # image = image.to(torch.float32) time = self.video_time[idx] - # .astype(np.float32) R = camera.orientation.T T = - camera.position @ R FovY = focal2fov(camera.focal_length, self.h) diff --git a/scripts/fliter_point.py b/scripts/fliter_point.py deleted file mode 100644 index fa49fc6..0000000 --- a/scripts/fliter_point.py +++ /dev/null @@ -1,40 +0,0 @@ -import open3d as o3d -import os -# 指定根目录路径 -root_path = "data/dynerf/sear_steak/" - -# 文件名 -input_file = "points3D.ply" -output_file = "points3d_filtered.ply" - -# 读取点云数据 -point_cloud_before = o3d.io.read_point_cloud(os.path.join(root_path, input_file)) - -# 计算过滤前的点的数量 -num_points_before = len(point_cloud_before.points) - -# 计算过滤前的点云的边界框大小 -bbox_before = point_cloud_before.get_axis_aligned_bounding_box() -bbox_size_before = bbox_before.get_max_bound() - bbox_before.get_min_bound() - -# 进行离群点滤波 -cl, ind = point_cloud_before.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) - -# 创建一个新的点云对象,包含滤波后的点 -filtered_point_cloud = point_cloud_before.select_by_index(ind) - -# 保存滤波后的点云到新文件 -o3d.io.write_point_cloud(os.path.join(root_path, output_file), filtered_point_cloud) - -# 计算过滤后的点的数量 -num_points_after = len(filtered_point_cloud.points) - -# 计算边界框的大小 -bbox = filtered_point_cloud.get_axis_aligned_bounding_box() -bbox_size = bbox.get_max_bound() - bbox.get_min_bound() - -print(f"过滤前的点数: {num_points_before}") -print(f"过滤前的点云边界框大小: {bbox_size_before}") -print(f"过滤后的点数: {num_points_after}") -print(f"过滤后的点云边界框大小: {bbox_size}") -print(f"离群点过滤完成,结果已保存到 {output_file}") diff --git a/scripts/grow_point.py b/scripts/grow_point.py index bb30c88..4265c25 100644 --- a/scripts/grow_point.py +++ b/scripts/grow_point.py @@ -2,24 +2,14 @@ import open3d as o3d import numpy as np def grow_sparse_regions(input_file, output_file): - # 读取输入的ply文件 pcd = o3d.io.read_point_cloud(input_file) - - # 计算点云的密度 densities = o3d.geometry.PointCloud.compute_nearest_neighbor_distance(pcd) avg_density = np.average(densities) print(f"Average density: {avg_density}") - - # 找到稀疏部分 - sparse_indices = np.where(densities > avg_density * 1.2)[0] # 这里我们假设稀疏部分的密度大于平均密度的1.2倍 + sparse_indices = np.where(densities > avg_density * 1.2)[0] sparse_points = np.asarray(pcd.points)[sparse_indices] - breakpoint() - # 复制并增长稀疏部分 - # for _ in range(5): # 这里我们假设每个稀疏点复制5次 - # pcd.points.extend(sparse_points) - # 将结果保存到输入的路径中 + o3d.io.write_point_cloud(output_file, pcd) -# 使用函数 grow_sparse_regions("data/hypernerf/vrig/chickchicken/dense_downsample.ply", "data/hypernerf/interp/chickchicken/dense_downsample.ply") \ No newline at end of file diff --git a/scripts/hypernerf2colmap.py b/scripts/hypernerf2colmap.py index 1846ef4..3221dd3 100644 --- a/scripts/hypernerf2colmap.py +++ b/scripts/hypernerf2colmap.py @@ -41,7 +41,6 @@ for jsonfile in tqdm(cameras): image_size = cams[0]['image_size'] image = Image.open(os.path.join(image_dir,images[0])) size = image.size -# breakpoint() object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w") object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w") @@ -50,19 +49,13 @@ cnt=0 sizes=2 while len(cams)//sizes > 200: sizes += 1 -# breakpoint() for cam, image in zip(cams, images): cnt+=1 - # print(image) - # breakpoint() if cnt % sizes != 0: continue - # print("begin to write") R = np.array(cam['orientation']).T - # breakpoint() T = -np.array(cam['position'])@R - # T = -np.matmul(R,T) T = [str(i) for i in T] qevc = [str(i) for i in rotmat2qvec(R.T)] @@ -73,7 +66,6 @@ for cam, image in zip(cams, images): shutil.copy(os.path.join(image_dir,image),os.path.join(imagecolmap_dir,image)) print(idx) # write camera infomation. -# print(1,"SIMPLE_PINHOLE",image_size[0],image_size[1],focal[0],image_sizep0/2,image_size[1]/2,file=object_cameras_file) object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w") object_cameras_file.close() diff --git a/scripts/llff2colmap.py b/scripts/llff2colmap.py index e836cd5..507b3d4 100644 --- a/scripts/llff2colmap.py +++ b/scripts/llff2colmap.py @@ -98,17 +98,6 @@ H, W, focal = poses[0, :, -1] focal = focal/2 focal = [focal, focal] poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1) -# poses, _ = center_poses( -# poses, blender2opencv -# ) # Re-center poses so that the average is near the center. -# near_original = near_fars.min() -# scale_factor = near_original * 0.75 -# near_fars /= ( -# scale_factor # rescale nearest plane so that it is at z = 4/3. -# ) -# poses[..., 3] /= scale_factor -# Sample N_views poses for validation - NeRF-like camera trajectory. -# val_poses = directions videos = glob.glob(os.path.join(root_dir, "cam[0-9][0-9]")) videos = sorted(videos) image_paths = [] @@ -132,8 +121,6 @@ for index, image in enumerate(image_paths): shutil.copy(image,goal_path) print(poses) -# breakpoint() - # write image information. object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w") for idx, pose in enumerate(poses): @@ -147,15 +134,12 @@ for idx, pose in enumerate(poses): R = np.linalg.inv(R) T = -np.matmul(R,T) T = [str(i) for i in T] - # T = ["%.3f"%i for i in pose[:3,3]] qevc = [str(i) for i in rotmat2qvec(R)] - # breakpoint() print(idx+1," ".join(qevc)," ".join(T),1,image_name_list[idx],"\n",file=object_images_file) -# breakpoint() # write camera infomation. object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w") -print(1,"SIMPLE_PINHOLE",1352,1014,focal[0],1352/2,1014/2,file=object_cameras_file) +print(1,"SIMPLE_PINHOLE",1352,1014,focal[0],1352/2,1014/2,file=object_cameras_file) # object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w") object_cameras_file.close() diff --git a/scripts/merge_point.py b/scripts/merge_point.py index 4bfc95b..fba4a57 100644 --- a/scripts/merge_point.py +++ b/scripts/merge_point.py @@ -2,22 +2,15 @@ import open3d as o3d import os from tqdm import tqdm def merge_point_clouds(directory, output_file): - # 初始化一个空的点云 merged_pcd = o3d.geometry.PointCloud() - # 遍历文件夹下的所有文件 for filename in tqdm(os.listdir(directory)): if filename.endswith('.ply'): - # 读取点云文件 pcd = o3d.io.read_point_cloud(os.path.join(directory, filename)) - # 将点云合并 merged_pcd += pcd - # 移除位置相同的点 merged_pcd = merged_pcd.remove_duplicate_points() - # 将合并后的点云输出到一个文件中 o3d.io.write_point_cloud(output_file, merged_pcd) -# 使用函数 merge_point_clouds("point_clouds_directory", "merged.ply") \ No newline at end of file diff --git a/test.py b/test.py deleted file mode 100644 index e2e328e..0000000 --- a/test.py +++ /dev/null @@ -1,39 +0,0 @@ -import cv2 -import os -import re -def sorted_alphanumeric(data): - """ - 对给定的数据进行字母数字排序(考虑数字的数值大小) - """ - convert = lambda text: int(text) if text.isdigit() else text.lower() - alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] - return sorted(data, key=alphanum_key) -def create_video_from_images(folder_path, output_file, frame_rate=30, img_size=None): - images = [img for img in os.listdir(folder_path) if img.endswith(".jpg") or img.endswith(".png")] - images = sorted_alphanumeric(images) # 使用自定义的排序函数 - - # 获取第一张图片的尺寸 - frame = cv2.imread(os.path.join(folder_path, images[0])) - height, width, layers = frame.shape - - # 如果指定了img_size,则调整尺寸 - if img_size is not None: - width, height = img_size - - # 定义视频编码和创建VideoWriter对象 - fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 可以更改为其他编码器 - video = cv2.VideoWriter(output_file, fourcc, frame_rate, (width, height)) - - for image in images: - img = cv2.imread(os.path.join(folder_path, image)) - if img_size is not None: - img = cv2.resize(img, img_size) - video.write(img) - - cv2.destroyAllWindows() - video.release() - -# 使用示例 -folder_path = 'output/editing_render' # 替换为您的图片文件夹路径 -output_file = 'output_video.mp4' # 输出视频文件名 -create_video_from_images(folder_path, output_file) diff --git a/utils/point_utils.py b/utils/point_utils.py index cef4db9..f79f728 100644 --- a/utils/point_utils.py +++ b/utils/point_utils.py @@ -7,60 +7,34 @@ import open3d as o3d import numpy as np from torch_cluster import grid_cluster def voxel_down_sample_custom(points, voxel_size): - # 将点云归一化到体素网格 voxel_grid = torch.floor(points / voxel_size) - - # 找到唯一的体素,并获取它们在原始体素网格中的索引 unique_voxels, inverse_indices = torch.unique(voxel_grid, dim=0, return_inverse=True) - - # 创建一个新的点云,其中每个点是其对应体素中所有点的平均值 new_points = torch.zeros_like(unique_voxels) new_points_count = torch.zeros(unique_voxels.size(0), dtype=torch.long) - # for i in tqdm(range(points.size(0))): new_points[inverse_indices] = points - # new_points_count[inverse_indices[i]] += 1 - # new_points /= new_points_count.unsqueeze(-1) + return new_points, inverse_indices def downsample_point_cloud(points, ratio): - # 创建一个TensorDataset dataset = TensorDataset(points) - - # 计算下采样后的点的数量 num_points = len(dataset) num_downsampled_points = int(num_points * ratio) - - # 使用random_split进行下采样 downsampled_dataset, _ = random_split(dataset, [num_downsampled_points, num_points - num_downsampled_points]) - - # 获取下采样后的点的index和点云矩阵 indices = torch.tensor([i for i, _ in enumerate(downsampled_dataset)]) downsampled_points = torch.stack([x for x, in downsampled_dataset]) - return indices, downsampled_points def downsample_point_cloud_open3d(points, voxel_size): - # 创建一个点云对象 - downsampled_pcd, inverse_indices = voxel_down_sample_custom(points, voxel_size) downsampled_points = downsampled_pcd - # 获取下采样后的点云矩阵 - return torch.tensor(downsampled_points) def downsample_point_cloud_cluster(points, voxel_size): - # 创建一个点云对象 cluster = grid_cluster(points, size=torch.tensor([1,1,1])) - - # 获取下采样后的点云矩阵 - # downsampled_points = np.asarray(downsampled_pcd.points) - return cluster, points import torch from sklearn.neighbors import NearestNeighbors def upsample_point_cloud(points, density_threshold, displacement_scale, iter_pass): - # 计算每个点的密度 - # breakpoint() try: nbrs = NearestNeighbors(n_neighbors=2+iter_pass, algorithm='ball_tree').fit(points) distances, indices = nbrs.kneighbors(points) @@ -68,88 +42,52 @@ def upsample_point_cloud(points, density_threshold, displacement_scale, iter_pas print("no point added") return points, torch.tensor([]), torch.tensor([]), torch.zeros((points.shape[0]), dtype=torch.bool) - # 找出密度低的点 low_density_points = points[distances[:,1] > density_threshold] low_density_index = distances[:,1] > density_threshold low_density_index = torch.from_numpy(low_density_index) - # 复制这些点并添加随机位移 num_points = low_density_points.shape[0] displacements = torch.randn(num_points, 3) * displacement_scale new_points = low_density_points + displacements - # 返回新的点云矩阵 return points, low_density_points, new_points, low_density_index def visualize_point_cloud(points, low_density_points, new_points): - # 创建一个点云对象 pcd = o3d.geometry.PointCloud() - - # 给被选中的点云添加一个小的偏移量 low_density_points += 0.01 - - # 将所有的点合并到一起 all_points = np.concatenate([points, low_density_points, new_points], axis=0) pcd.points = o3d.utility.Vector3dVector(all_points) - - # 创建颜色数组 colors = np.zeros((all_points.shape[0], 3)) - colors[:points.shape[0]] = [0, 0, 0] # 黑色表示初始化的点云 - colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] # 红色表示被选中的点云 - colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] # 绿色表示增长的点云 + colors[:points.shape[0]] = [0, 0, 0] + colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] + colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] pcd.colors = o3d.utility.Vector3dVector(colors) - - # 显示点云 o3d.visualization.draw_geometries([pcd]) def combine_pointcloud(points, low_density_points, new_points): pcd = o3d.geometry.PointCloud() - - # 给被选中的点云添加一个小的偏移量 low_density_points += 0.01 new_points -= 0.01 - # 将所有的点合并到一起 all_points = np.concatenate([points, low_density_points, new_points], axis=0) pcd.points = o3d.utility.Vector3dVector(all_points) - - # 创建颜色数组 colors = np.zeros((all_points.shape[0], 3)) - colors[:points.shape[0]] = [0, 0, 0] # 黑色表示初始化的点云 - colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] # 红色表示被选中的点云 - colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] # 绿色表示增长的点云 + colors[:points.shape[0]] = [0, 0, 0] + colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] + colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] pcd.colors = o3d.utility.Vector3dVector(colors) return pcd def addpoint(point_cloud,density_threshold,displacement_scale, iter_pass,): - # density_threshold: 密度的阈值,越大能筛选出越稀疏的点。 - # displacement_scale: 在以displacement_scale的圆心内随机生成点 - points, low_density_points, new_points, low_density_index = upsample_point_cloud(point_cloud,density_threshold,displacement_scale, iter_pass) - # breakpoint() - # breakpoint() print("low_density_points",low_density_points.shape[0]) - - return point_cloud, low_density_points, new_points, low_density_index def find_point_indices(origin_point, goal_point): indices = torch.nonzero((origin_point[:, None] == goal_point).all(-1), as_tuple=True)[0] return indices def find_indices_in_A(A, B): - """ - 找出子集矩阵 B 中每个点在点云矩阵 A 中的索引 u。 - - 参数: - A (torch.Tensor): 点云矩阵 A,大小为 [N, 3]。 - B (torch.Tensor): 子集矩阵 B,大小为 [M, 3]。 - - 返回: - torch.Tensor: 包含 B 中每个点在 A 中的索引 u 的张量,形状为 (M,)。 - """ is_equal = torch.eq(B.view(1, -1, 3), A.view(-1, 1, 3)) u_indices = torch.nonzero(is_equal, as_tuple=False)[:, 0] return torch.unique(u_indices) if __name__ =="__main__": - # from time import time pass_=0 - # filename=f"pointcloud/pass_{pass_}.ply" filename = "point_cloud.ply" pcd = o3d.io.read_point_cloud(filename) point_cloud = torch.tensor(pcd.points) @@ -170,7 +108,6 @@ if __name__ =="__main__": flag = True print("point size:",point_downsample.shape[0]) - # downsampled_point_index = find_point_indices(point_cloud, point_downsample) downsampled_point_index = find_indices_in_A(point_cloud, point_downsample) print("selected_num",point_cloud[downsampled_point_index].shape[0]) _, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=density_threshold,displacement_scale=displacement_scale,iter_pass=0) @@ -188,9 +125,7 @@ if __name__ =="__main__": if low_density_points.shape[0] == 0: print("no more points.") continue - # breakpoint() point = combine_pointcloud(point_cloud, low_density_points, new_points) point_cloud = torch.tensor(point.points) o3d.io.write_point_cloud(f"pointcloud/pass_{i}.ply",point) - # visualize_qpoint_cloud( point_cloud, low_density_points, new_points) \ No newline at end of file diff --git a/utils/pose_utils.py b/utils/pose_utils.py index 80e251b..6cbc577 100644 --- a/utils/pose_utils.py +++ b/utils/pose_utils.py @@ -3,15 +3,12 @@ from scipy.spatial.transform import Rotation as R from scene.utils import Camera from copy import deepcopy def rotation_matrix_to_quaternion(rotation_matrix): - """将旋转矩阵转换为四元数""" return R.from_matrix(rotation_matrix).as_quat() def quaternion_to_rotation_matrix(quat): - """将四元数转换为旋转矩阵""" return R.from_quat(quat).as_matrix() def quaternion_slerp(q1, q2, t): - """在两个四元数之间进行球面线性插值(SLERP)""" # 计算两个四元数之间的点积 dot = np.dot(q1, q2) @@ -32,13 +29,10 @@ def quaternion_slerp(q1, q2, t): return np.cos(theta) * q1 + np.sin(theta) * q3 def bezier_interpolation(p1, p2, t): - """在两点之间使用贝塞尔曲线进行插值""" return (1 - t) * p1 + t * p2 def linear_interpolation(v1, v2, t): - """线性插值""" return (1 - t) * v1 + t * v2 def smooth_camera_poses(cameras, num_interpolations=5): - """对一系列相机位姿进行平滑处理,通过在每对位姿之间插入额外的位姿""" smoothed_cameras = [] smoothed_times = [] total_poses = len(cameras) - 1 + (len(cameras) - 1) * num_interpolations @@ -48,44 +42,26 @@ def smooth_camera_poses(cameras, num_interpolations=5): cam1 = cameras[i] cam2 = cameras[i + 1] - # 将旋转矩阵转换为四元数 quat1 = rotation_matrix_to_quaternion(cam1.orientation) quat2 = rotation_matrix_to_quaternion(cam2.orientation) for j in range(num_interpolations + 1): t = j / (num_interpolations + 1) - # 插值方向 interp_orientation_quat = quaternion_slerp(quat1, quat2, t) interp_orientation_matrix = quaternion_to_rotation_matrix(interp_orientation_quat) - # 插值位置 interp_position = linear_interpolation(cam1.position, cam2.position, t) - # 计算插值时间戳 interp_time = i*10 / (len(cameras) - 1) + time_increment * j - # 添加新的相机位姿和时间戳 newcam = deepcopy(cam1) newcam.orientation = interp_orientation_matrix newcam.position = interp_position smoothed_cameras.append(newcam) smoothed_times.append(interp_time) - - # 添加最后一个原始位姿和时间戳 smoothed_cameras.append(cameras[-1]) smoothed_times.append(1.0) print(smoothed_times) return smoothed_cameras, smoothed_times -# # 示例:使用两个相机位姿 -# cam1 = Camera(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), np.array([0, 0, 0])) -# cam2 = Camera(np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]), np.array([1, 1, 1])) - -# # 应用平滑处理 -# smoothed_cameras = smooth_camera_poses([cam1, cam2], num_interpolations=5) - -# # 打印结果 -# for cam in smoothed_cameras: -# print("Orientation:\n", cam.orientation) -# print("Position:", cam.position) diff --git a/utils/render_utils.py b/utils/render_utils.py index e06f263..2b4848d 100644 --- a/utils/render_utils.py +++ b/utils/render_utils.py @@ -11,15 +11,8 @@ def get_state_at_time(pc,viewpoint_camera): scales = pc._scaling rotations = pc._rotation cov3D_precomp = None - - # time0 = get_time() - # means3D_deform, scales_deform, rotations_deform, opacity_deform = pc._deformation(means3D[deformation_point], scales[deformation_point], - # rotations[deformation_point], opacity[deformation_point], - # time[deformation_point]) means3D_final, scales_final, rotations_final, opacity_final, shs_final = pc._deformation(means3D, scales, rotations, opacity, shs, time) - # scales_final = pc.scaling_activation(scales_final) - # rotations_final = pc.rotation_activation(rotations_final) - # opacity = pc.opacity_activation(opacity_final) + return means3D_final, scales_final, rotations_final, opacity, shs_final \ No newline at end of file diff --git a/utils/scene_utils.py b/utils/scene_utils.py index 5c9cf00..b364504 100644 --- a/utils/scene_utils.py +++ b/utils/scene_utils.py @@ -25,26 +25,17 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg gt_np = viewpoint['image'].permute(1,2,0).cpu().numpy() else: gt_np = viewpoint.original_image.permute(1,2,0).cpu().numpy() - image_np = image.permute(1, 2, 0).cpu().numpy() # 转换通道顺序为 (H, W, 3) + image_np = image.permute(1, 2, 0).cpu().numpy() # (H, W, 3) depth_np = depth.permute(1, 2, 0).cpu().numpy() depth_np /= depth_np.max() depth_np = np.repeat(depth_np, 3, axis=2) image_np = np.concatenate((gt_np, image_np, depth_np), axis=1) - image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8')) # 转换为8位图像 - # 创建PIL图像对象的副本以绘制标签 + image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8')) draw1 = ImageDraw.Draw(image_with_labels) - - # 选择字体和字体大小 - font = ImageFont.truetype('./utils/TIMES.TTF', size=40) # 请将路径替换为您选择的字体文件路径 - - # 选择文本颜色 - text_color = (255, 0, 0) # 白色 - - # 选择标签的位置(左上角坐标) + font = ImageFont.truetype('./utils/TIMES.TTF', size=40) + text_color = (255, 0, 0) label1_position = (10, 10) - label2_position = (image_with_labels.width - 100 - len(label2) * 10, 10) # 右上角坐标 - - # 在图像上添加标签 + label2_position = (image_with_labels.width - 100 - len(label2) * 10, 10) draw1.text(label1_position, label1, fill=text_color, font=font) draw1.text(label2_position, label2, fill=text_color, font=font) @@ -58,42 +49,20 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg os.makedirs(point_cloud_path) if not os.path.exists(image_path): os.makedirs(image_path) - # image:3,800,800 - # point_save_path = os.path.join(point_cloud_path,f"{iteration}.jpg") for idx in range(len(viewpoints)): image_save_path = os.path.join(image_path,f"{iteration}_{idx}.jpg") render(gaussians,viewpoints[idx],image_save_path,scaling = 1,cam_type=dataset_type) - # render(gaussians,point_save_path,scaling = 0.1) - # 保存带有标签的图像 - - - pc_mask = gaussians.get_opacity pc_mask = pc_mask > 0.1 - xyz = gaussians.get_xyz.detach()[pc_mask.squeeze()].cpu().permute(1,0).numpy() - # visualize_and_save_point_cloud(xyz, viewpoint.R, viewpoint.T, point_save_path) - # 如果需要,您可以将PIL图像转换回PyTorch张量 - # return image - # image_with_labels_tensor = torch.tensor(image_with_labels, dtype=torch.float32).permute(2, 0, 1) / 255.0 + def visualize_and_save_point_cloud(point_cloud, R, T, filename): - # 创建3D散点图 fig = plt.figure() ax = fig.add_subplot(111, projection='3d') R = R.T - # 应用旋转和平移变换 T = -R.dot(T) transformed_point_cloud = np.dot(R, point_cloud) + T.reshape(-1, 1) - # pcd = o3d.geometry.PointCloud() - # pcd.points = o3d.utility.Vector3dVector(transformed_point_cloud.T) # 转置点云数据以匹配Open3D的格式 - # transformed_point_cloud[2,:] = -transformed_point_cloud[2,:] - # 可视化点云 ax.scatter(transformed_point_cloud[0], transformed_point_cloud[1], transformed_point_cloud[2], c='g', marker='o') ax.axis("off") - # ax.set_xlabel('X Label') - # ax.set_ylabel('Y Label') - # ax.set_zlabel('Z Label') - - # 保存渲染结果为图片 plt.savefig(filename)