This commit is contained in:
guanjunwu 2023-10-29 18:27:07 +08:00
parent 54f3436156
commit 9bde22336e
87 changed files with 265667 additions and 810 deletions

2
.gitignore vendored
View File

@ -8,5 +8,3 @@ tensorboard_3d
screenshots screenshots
data/ data/
data data
argument/
scripts/

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,255 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import torch\n",
"import os\n",
"import imageio"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
"# path = \"/data3/guanjunwu/project_scp/TiNeuVox/logs/interp_data/interp/chicken/render_test_fine_last\"\n",
"path = \"output/hypernerf4/interp/americano/test/ours_14000/renders\"\n",
"# \n",
"# path = \"output/dynamic3dgs/dynamic3dgs/basketball/test/ours_30000/renders\"\n",
"image_list = os.listdir(path)\n",
"len_image = len(image_list)\n",
"tile = image_list[0].split('.')[-1]"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"def sort_numeric_filenames(filenames):\n",
" \"\"\"\n",
" Sort a list of filenames based on the numeric part of the filename.\n",
" Assumes filenames have a format like '0000.png', '0001.png', etc.\n",
" \"\"\"\n",
" def extract_number(filename):\n",
" # 使用正则表达式提取文件名中的数字\n",
" match = re.search(r'\\d+', filename)\n",
" return int(match.group()) if match else 0\n",
"\n",
" # 使用提取的数字进行排序\n",
" return sorted(filenames, key=extract_number)\n",
"\n",
"# 示例文件名列表\n",
"filenames = image_list\n",
"\n",
"# 进行排序\n",
"sorted_filenames = sort_numeric_filenames(filenames)\n",
"sorted_filenames = [i for i in sorted_filenames if 'png' in i]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['000.png',\n",
" '001.png',\n",
" '002.png',\n",
" '003.png',\n",
" '004.png',\n",
" '005.png',\n",
" '006.png',\n",
" '007.png',\n",
" '008.png',\n",
" '009.png',\n",
" '010.png',\n",
" '011.png',\n",
" '012.png',\n",
" '013.png',\n",
" '014.png',\n",
" '015.png',\n",
" '016.png',\n",
" '017.png',\n",
" '018.png',\n",
" '019.png',\n",
" '020.png',\n",
" '021.png',\n",
" '022.png',\n",
" '023.png',\n",
" '024.png',\n",
" '025.png',\n",
" '026.png',\n",
" '027.png',\n",
" '028.png',\n",
" '029.png',\n",
" '030.png',\n",
" '031.png',\n",
" '032.png',\n",
" '033.png',\n",
" '034.png',\n",
" '035.png',\n",
" '036.png',\n",
" '037.png',\n",
" '038.png',\n",
" '039.png',\n",
" '040.png',\n",
" '041.png',\n",
" '042.png',\n",
" '043.png',\n",
" '044.png',\n",
" '045.png',\n",
" '046.png',\n",
" '047.png',\n",
" '048.png',\n",
" '049.png',\n",
" '050.png',\n",
" '051.png',\n",
" '052.png',\n",
" '053.png',\n",
" '054.png',\n",
" '055.png',\n",
" '056.png',\n",
" '057.png',\n",
" '058.png',\n",
" '059.png',\n",
" '060.png',\n",
" '061.png',\n",
" '062.png',\n",
" '063.png',\n",
" '064.png',\n",
" '065.png',\n",
" '066.png',\n",
" '067.png',\n",
" '068.png',\n",
" '069.png',\n",
" '070.png',\n",
" '071.png',\n",
" '072.png',\n",
" '073.png',\n",
" '074.png',\n",
" '075.png',\n",
" '076.png',\n",
" '077.png',\n",
" '078.png',\n",
" '079.png',\n",
" '080.png',\n",
" '081.png',\n",
" '082.png',\n",
" '083.png',\n",
" '084.png',\n",
" '085.png',\n",
" '086.png',\n",
" '087.png',\n",
" '088.png',\n",
" '089.png',\n",
" '090.png',\n",
" '091.png',\n",
" '092.png',\n",
" '093.png',\n",
" '094.png',\n",
" '095.png',\n",
" '096.png',\n",
" '097.png',\n",
" '098.png',\n",
" '099.png',\n",
" '100.png',\n",
" '101.png',\n",
" '102.png',\n",
" '103.png',\n",
" '104.png',\n",
" '105.png',\n",
" '106.png',\n",
" '107.png',\n",
" '108.png',\n",
" '109.png',\n",
" '110.png',\n",
" '111.png',\n",
" '112.png']"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_filenames"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/data/guanjunwu/disk2/miniconda3/envs/Gaussians4D/lib/python3.7/site-packages/ipykernel_launcher.py:6: DeprecationWarning: Starting with ImageIO v3 the behavior of this function will switch to that of iio.v3.imread. To keep the current behavior (and make this warning disappear) use `import imageio.v2 as imageio` or call `imageio.v2.imread` directly.\n",
" \n",
"IMAGEIO FFMPEG_WRITER WARNING: input image is not divisible by macro_block_size=16, resizing from (536, 960) to (544, 960) to ensure video compatibility with most codecs and players. To prevent resizing, make your input image divisible by the macro_block_size or set the macro_block_size to 1 (risking incompatibility).\n",
"[swscaler @ 0x67a2580] Warning: data is not aligned! This can lead to a speed loss\n"
]
}
],
"source": [
"writer = imageio.get_writer(os.path.join(path,\"video111.mp4\"),fps=10)\n",
"video_num = 1\n",
"video_list = [[] for i in range(video_num)]\n",
"for i, image in enumerate(sorted_filenames):\n",
" if i % video_num == 0:\n",
" image = imageio.imread(os.path.join(path,image))\n",
" writer.append_data(image)\n",
"writer.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install imageio[ffmpeg]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.16"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

View File

@ -1,28 +1,24 @@
# 4D Gaussian Splatting for Real-Time Dynamic Scene Rendering # 4D Gaussian Splatting for Real-Time Dynamic Scene Rendering
## Arxiv Preprint ## arXiv Preprint
### [Project Page](https://guanjunwu.github.io/4dgs/index.html)| [Arxiv Paper](https://arxiv.org/abs/2310.08528) ### [Project Page](https://guanjunwu.github.io/4dgs/index.html)| [arXiv Paper](https://arxiv.org/abs/2310.08528)
[Guanjun Wu](https://guanjunwu.github.io/)<sup>1*</sup>, [Taoran Yi](https://github.com/taoranyi)<sup>2*</sup>, [Guanjun Wu](https://guanjunwu.github.io/)<sup>1*</sup>, [Taoran Yi](https://github.com/taoranyi)<sup>2*</sup>,
[Jiemin Fang](https://jaminfong.cn/)<sup>3</sup>, [Lingxi Xie](http://lingxixie.com/)<sup>3</sup>, </br>[Xiaopeng Zhang](https://sites.google.com/site/zxphistory/)<sup>3</sup>, [Wei Wei](https://www.eric-weiwei.com/)<sup>1</sup>,[Wenyu Liu](http://eic.hust.edu.cn/professor/liuwenyu/)<sup>2</sup>, [Qi Tian](https://scholar.google.com/citations?hl=en&user=61b6eYkAAAAJ)<sup>3</sup> , [Xinggang Wang](https://xinggangw.info/)<sup>2</sup> [Jiemin Fang](https://jaminfong.cn/)<sup>3</sup>, [Lingxi Xie](http://lingxixie.com/)<sup>3</sup>, </br>[Xiaopeng Zhang](https://scholar.google.com/citations?user=Ud6aBAcAAAAJ&hl=zh-CN)<sup>3</sup>, [Wei Wei](https://www.eric-weiwei.com/)<sup>1</sup>,[Wenyu Liu](http://eic.hust.edu.cn/professor/liuwenyu/)<sup>2</sup>, [Qi Tian](https://www.qitian1987.com/)<sup>3</sup> , [Xinggang Wang](https://xwcv.github.io)<sup>2‡</sup>
<sup>1</sup>School of CS, HUST &emsp; <sup>2</sup>School of EIC, HUST &emsp; <sup>3</sup>Huawei Inc. &emsp; <sup>1</sup>School of CS, HUST &emsp; <sup>2</sup>School of EIC, HUST &emsp; <sup>3</sup>Huawei Inc. &emsp;
<sup>\*</sup> Equal Contributions. <sup>$\ddagger$</sup> Project Lead. <sup></sup> Corresponding Author.
--------------------------------------------------- ---------------------------------------------------
![block](assets/teaserfig.png) ![block](assets/teaserfig.jpg)
Our method converges very quickly. And achieves real-time rendering speed. Our method converges very quickly and achieves real-time rendering speed.
Colab demo:[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hustvl/4DGaussians/blob/master/4DGaussians.ipynb) (Thanks [camenduru](https://github.com/camenduru/4DGaussians-colab).)
<video width="320" height="240" controls>
<sourc src="assets/teaservideo.mp4" type="video/mp4">
</video>
<video width="320" height="240" controls>
<source src="assets/cut_roasted_beef_time.mp4" type="video/mp4">
</video>
## Environmental Setups ## Environmental Setups
@ -30,13 +26,13 @@ Please follow the [3D-GS](https://github.com/graphdeco-inria/gaussian-splatting)
```bash ```bash
git clone https://github.com/hustvl/4DGaussians git clone https://github.com/hustvl/4DGaussians
cd 4DGaussians cd 4DGaussians
git submodule update --init --recursive
conda create -n Gaussians4D python=3.7 conda create -n Gaussians4D python=3.7
conda activate Gaussians4D conda activate Gaussians4D
pip install -r requirements.txt pip install -r requirements.txt
cd submodules pip install -e submodules/depth-diff-gaussian-rasterization
git clone https://github.com/ingra14m/depth-diff-gaussian-rasterization pip install -e submodules/simple-knn
pip install -e depth-diff-gaussian-rasterization
``` ```
In our environment, we use pytorch=1.13.1+cu116. In our environment, we use pytorch=1.13.1+cu116.
## Data Preparation ## Data Preparation
@ -44,7 +40,7 @@ In our environment, we use pytorch=1.13.1+cu116.
The dataset provided in [D-NeRF](https://github.com/albertpumarola/D-NeRF) is used. You can download the dataset from [dropbox](https://www.dropbox.com/s/0bf6fl0ye2vz3vr/data.zip?dl=0). The dataset provided in [D-NeRF](https://github.com/albertpumarola/D-NeRF) is used. You can download the dataset from [dropbox](https://www.dropbox.com/s/0bf6fl0ye2vz3vr/data.zip?dl=0).
**For real dynamic scenes:** **For real dynamic scenes:**
The dataset provided in [HyperNeRF](https://github.com/google/hypernerf) is used. You can download scenes from [Hypernerf Dataset](https://github.com/google/hypernerf/releases/tag/v0.1) and organize them as [Nerfies](https://github.com/google/nerfies#datasets). Meanwhile, [Plenoptic Dataset](https://github.com/facebookresearch/Neural_3D_Video) could be downloaded from their offical websites, to save the memory, you should extract the frames of each video, them organize your dataset as follows. The dataset provided in [HyperNeRF](https://github.com/google/hypernerf) is used. You can download scenes from [Hypernerf Dataset](https://github.com/google/hypernerf/releases/tag/v0.1) and organize them as [Nerfies](https://github.com/google/nerfies#datasets). Meanwhile, [Plenoptic Dataset](https://github.com/facebookresearch/Neural_3D_Video) could be downloaded from their official websites. To save the memory, you should extract the frames of each video and then organize your dataset as follows.
``` ```
├── data ├── data
│ | dnerf │ | dnerf
@ -74,11 +70,11 @@ The dataset provided in [HyperNeRF](https://github.com/google/hypernerf) is used
## Training ## Training
For training synthetic scenes such as `lego`, run For training synthetic scenes such as `bouncingballs`, run
``` ```
python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncingballs" --configs arguments/dnerf/bouncingballs.py python train.py -s data/dnerf/bouncingballs --port 6017 --expname "dnerf/bouncingballs" --configs arguments/dnerf/bouncingballs.py
``` ```
You can custom your training config through the config files. You can customize your training config through the config files.
## Rendering ## Rendering
Run the following script to render the images. Run the following script to render the images.
@ -88,24 +84,66 @@ python render.py --model_path "output/dnerf/bouncingballs/" --skip_train --conf
## Evaluation ## Evaluation
Run the following script to evaluate the model. You can just run the following script to evaluate the model.
``` ```
python metrics.py --model_path "output/dnerf/bouncingballs/" python metrics.py --model_path "output/dnerf/bouncingballs/"
``` ```
## Scripts ## Scripts
There are some helpful scripts in `scripts/`, please feel free to use them. There are some helpful scripts in , please feel free to use them.
`vis_point.py`:
get all points clouds at each timestamps.
usage:
```python
export exp_name="hypernerf"
python vis_point.py --model_path output/$exp_name/interp/aleks-teapot --configs arguments/$exp_name/default.py
```
`weight_visualization.ipynb`:
visualize the weight of Multi-resolution HexPlane module.
`merge_many_4dgs.py`:
merge your trained 4dgs.
usage:
```python
export exp_name="dynerf"
python merge_many_4dgs.py --model_path output/$exp_name/flame_salmon_1
```
`colmap.sh`:
generate point clouds from input data
```bash
bash colmap.sh data/hypernerf/virg/vrig-chicken hypernerf
bash colmap.sh data/dynerf/flame_salmon_1 llff
```
**Blender** format seems doesn't work. Welcome to raise a pull request to fix it.
`downsample_point.py` :downsample generated point clouds by sfm.
```python
python scripts/downsample_point.py data/dynerf/sear_steak/points3D_downsample.ply data/dynerf/sear_steak/points3D_downsample2.ply
```
In my paper, I always use `colmap.sh` to generate dense point clouds and downsample it to less than 40000 points.
Here are some codes maybe useful but never adopted in my paper, you can also try it.
--- ---
## Contributions
Some source code of ours is borrowed from [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), [k-planes](https://github.com/Giodiro/kplanes_nerfstudio),[HexPlane](https://github.com/Caoang327/HexPlane), [TiNeuVox](https://github.com/hustvl/TiNeuVox). We sincerely appreciate the excellent works of these authors. **This project is still under development. Please feel free to raise issues or submit pull requests to contribute to our codebase.**
---
Some source code of ours is borrowed from [3DGS](https://github.com/graphdeco-inria/gaussian-splatting), [k-planes](https://github.com/Giodiro/kplanes_nerfstudio),[HexPlane](https://github.com/Caoang327/HexPlane), [TiNeuVox](https://github.com/hustvl/TiNeuVox). We sincerely appreciate the excellent works of these authors.
## Acknowledgement ## Acknowledgement
We would like to express our sincere gratitude to @zhouzhenghong-gt for his revisions to our code and discussions on the content of our paper. We would like to express our sincere gratitude to [@zhouzhenghong-gt](https://github.com/zhouzhenghong-gt/) for his revisions to our code and discussions on the content of our paper.
## Citation ## Citation
If you find this repository/work helpful in your research, welcome to cite the paper and give a ⭐. Some insights about neural voxel grids and dynamic scenes reconstruction originate from [TiNeuVox](https://github.com/hustvl/TiNeuVox). If you find this repository/work helpful in your research, welcome to cite these papers and give a ⭐.
``` ```
@article{wu20234dgaussians, @article{wu20234dgaussians,
title={4D Gaussian Splatting for Real-Time Dynamic Scene Rendering}, title={4D Gaussian Splatting for Real-Time Dynamic Scene Rendering},
@ -113,4 +151,11 @@ If you find this repository/work helpful in your research, welcome to cite the p
journal={arXiv preprint arXiv:2310.08528}, journal={arXiv preprint arXiv:2310.08528},
year={2023} year={2023}
} }
@inproceedings{TiNeuVox,
author = {Fang, Jiemin and Yi, Taoran and Wang, Xinggang and Xie, Lingxi and Zhang, Xiaopeng and Liu, Wenyu and Nie\ss{}ner, Matthias and Tian, Qi},
title = {Fast Dynamic Radiance Fields with Time-Aware Neural Voxels},
year = {2022},
booktitle = {SIGGRAPH Asia 2022 Conference Papers}
}
``` ```

BIN
alex.pth Normal file

Binary file not shown.

View File

@ -55,6 +55,9 @@ class ModelParams(ParamGroup):
self.data_device = "cuda" self.data_device = "cuda"
self.eval = True self.eval = True
self.render_process=False self.render_process=False
self.add_points=False
self.extension=".png"
self.llffhold=8
super().__init__(parser, "Loading Parameters", sentinel) super().__init__(parser, "Loading Parameters", sentinel)
def extract(self, args): def extract(self, args):
@ -66,7 +69,7 @@ class PipelineParams(ParamGroup):
def __init__(self, parser): def __init__(self, parser):
self.convert_SHs_python = False self.convert_SHs_python = False
self.compute_cov3D_python = False self.compute_cov3D_python = False
self.debug = False self.debug = True
super().__init__(parser, "Pipeline Parameters") super().__init__(parser, "Pipeline Parameters")
class ModelHiddenParams(ParamGroup): class ModelHiddenParams(ParamGroup):
def __init__(self, parser): def __init__(self, parser):
@ -89,10 +92,16 @@ class ModelHiddenParams(ParamGroup):
'resolution': [64, 64, 64, 25] 'resolution': [64, 64, 64, 25]
} }
self.multires = [1, 2, 4, 8] self.multires = [1, 2, 4, 8]
self.no_dx=False
self.no_grid=False self.no_grid=False
self.no_ds=False self.no_ds=False
self.no_dr=False self.no_dr=False
self.no_do=True self.no_do=True
self.no_dshs=True
self.empty_voxel=False
self.grid_pe=0
self.static_mlp=False
self.apply_rotation=False
super().__init__(parser, "ModelHiddenParams") super().__init__(parser, "ModelHiddenParams")
@ -100,6 +109,8 @@ class ModelHiddenParams(ParamGroup):
class OptimizationParams(ParamGroup): class OptimizationParams(ParamGroup):
def __init__(self, parser): def __init__(self, parser):
self.dataloader=False self.dataloader=False
self.zerostamp_init=False
self.custom_sampler=None
self.iterations = 30_000 self.iterations = 30_000
self.coarse_iterations = 3000 self.coarse_iterations = 3000
self.position_lr_init = 0.00016 self.position_lr_init = 0.00016
@ -134,8 +145,8 @@ class OptimizationParams(ParamGroup):
self.opacity_threshold_coarse = 0.005 self.opacity_threshold_coarse = 0.005
self.opacity_threshold_fine_init = 0.005 self.opacity_threshold_fine_init = 0.005
self.opacity_threshold_fine_after = 0.005 self.opacity_threshold_fine_after = 0.005
self.batch_size=1, self.batch_size=1
self.add_point=False
super().__init__(parser, "Optimization Parameters") super().__init__(parser, "Optimization Parameters")
def get_combined_args(parser : ArgumentParser): def get_combined_args(parser : ArgumentParser):

View File

@ -11,18 +11,22 @@ OptimizationParams = dict(
iterations = 20000, iterations = 20000,
pruning_interval = 8000, pruning_interval = 8000,
percent_dense = 0.01, percent_dense = 0.01,
render_process=True,
# no_do=False,
# no_dshs=False
# opacity_reset_interval=30000 # opacity_reset_interval=30000
) )
ModelHiddenParams = dict( ModelHiddenParams = dict(
multires = [1, 2, 4, 8 ], multires = [1, 2],
defor_depth = 0, defor_depth = 0,
net_width = 64, net_width = 64,
plane_tv_weight = 0, plane_tv_weight = 0.0001,
time_smoothness_weight = 0, time_smoothness_weight = 0.01,
l1_time_planes = 0, l1_time_planes = 0.0001,
weight_decay_iteration=0, weight_decay_iteration=0,
bounds=1.6 bounds=1.6
) )

View File

@ -1,10 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 75]
}
)

View File

@ -1,28 +0,0 @@
OptimizationParams = dict(
coarse_iterations = 3000,
deformation_lr_init = 0.00016,
deformation_lr_final = 0.0000016,
deformation_lr_delay_mult = 0.01,
grid_lr_init = 0.0016,
grid_lr_final = 0.000016,
iterations = 20000,
pruning_interval = 8000,
percent_dense = 0.01,
# opacity_reset_interval=30000
)
ModelHiddenParams = dict(
multires = [1, 2, 4, 8 ],
defor_depth = 0,
net_width = 64,
plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001,
l1_time_planes = 0.001,
weight_decay_iteration=0,
bounds=1.6
)

View File

@ -1,10 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 50]
}
)

View File

@ -1,10 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 50]
}
)

View File

@ -1,16 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 25]
},
# deformation_lr_init = 0.001,
# deformation_lr_final = 0.001,
# deformation_lr_delay_mult = 0.01,
# grid_lr_init = 0.001,
# grid_lr_final = 0.001,
)

View File

@ -1,10 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 75]
}
)

View File

@ -1,10 +0,0 @@
_base_ = './dnerf_default.py'
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 32,
'resolution': [64, 64, 64, 75]
}
)

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -5,25 +5,29 @@ ModelHiddenParams = dict(
'output_coordinate_dim': 16, 'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150] 'resolution': [64, 64, 64, 150]
}, },
multires = [1,2,4,8], multires = [1,2],
defor_depth = 1, defor_depth = 0,
net_width = 256, net_width = 128,
plane_tv_weight = 0.0002, plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001, time_smoothness_weight = 0.001,
l1_time_planes = 0.001, l1_time_planes = 0.0001,
no_do=False no_do=False,
no_dshs=False,
no_ds=False,
empty_voxel=False,
render_process=False,
static_mlp=False
) )
OptimizationParams = dict( OptimizationParams = dict(
dataloader=True, dataloader=True,
iterations = 30_000, iterations = 15000,
batch_size=4, batch_size=1,
coarse_iterations = 3000, coarse_iterations = 3000,
densify_until_iter = 20_000, densify_until_iter = 10_000,
opacity_reset_interval = 20000, # opacity_reset_interval = 60000,
opacity_threshold_coarse = 0.005,
opacity_threshold_coarse = 0.05, opacity_threshold_fine_init = 0.005,
opacity_threshold_fine_init = 0.05, opacity_threshold_fine_after = 0.005,
opacity_threshold_fine_after = 0.05,
# pruning_interval = 2000 # pruning_interval = 2000
) )

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -0,0 +1,4 @@
_base_ = './default.py'
OptimizationParams = dict(
)

View File

@ -1,29 +0,0 @@
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150]
},
multires = [1,2,4,8],
defor_depth = 1,
net_width = 256,
plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001,
l1_time_planes = 0.001,
no_do=False
)
OptimizationParams = dict(
dataloader=True,
iterations = 20_000,
batch_size=8,
coarse_iterations = 3000,
densify_until_iter = 20_000,
opacity_reset_interval = 3000,
opacity_threshold_coarse = 0.05,
opacity_threshold_fine_init = 0.05,
opacity_threshold_fine_after = 0.05,
# pruning_interval = 2000
)

View File

@ -1,29 +0,0 @@
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150]
},
multires = [1,2,4,8],
defor_depth = 1,
net_width = 256,
plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001,
l1_time_planes = 0.001,
no_do=False
)
OptimizationParams = dict(
dataloader=True,
iterations = 60_000,
batch_size=8,
coarse_iterations = 3000,
densify_until_iter = 20_000,
opacity_reset_interval = 20000,
opacity_threshold_coarse = 0.05,
opacity_threshold_fine_init = 0.05,
opacity_threshold_fine_after = 0.05,
# pruning_interval = 2000
)

View File

@ -1,29 +0,0 @@
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150]
},
multires = [1,2,4,8],
defor_depth = 1,
net_width = 256,
plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001,
l1_time_planes = 0.001,
no_do=False
)
OptimizationParams = dict(
dataloader=True,
iterations = 60_000,
batch_size=1,
coarse_iterations = 3000,
densify_until_iter = 40_000,
opacity_reset_interval = 20000,
opacity_threshold_coarse = 0.05,
opacity_threshold_fine_init = 0.05,
opacity_threshold_fine_after = 0.05,
# pruning_interval = 2000
)

View File

@ -1,34 +0,0 @@
ModelHiddenParams = dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150]
},
multires = [1,2,4,8],
defor_depth = 1,
net_width = 256,
plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001,
l1_time_planes = 0.001,
no_do=False
)
OptimizationParams = dict(
dataloader=True,
iterations = 10_000,
batch_size=8,
coarse_iterations = 10000,
densify_until_iter = 20_000,
opacity_reset_interval = 3000,
opacity_threshold_coarse = 0.05,
opacity_threshold_fine_init = 0.05,
opacity_threshold_fine_after = 0.05,
# pruning_interval = 2000
# deformation_lr_init = 0.00016,
# deformation_lr_final = 0.000016,
# deformation_lr_delay_mult = 0.01,
# grid_lr_init = 0.0016,
# grid_lr_final = 0.00016,
)

View File

@ -1,10 +1,11 @@
_base_ = './dnerf_default.py' _base_="default.py"
ModelParams=dict(
ModelHiddenParams = dict(
kplanes_config = { kplanes_config = {
'grid_dimensions': 2, 'grid_dimensions': 2,
'input_coordinate_dim': 4, 'input_coordinate_dim': 4,
'output_coordinate_dim': 32, 'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 100] 'resolution': [64, 64, 64, 100]
} },
)
OptimizationParams=dict(
) )

View File

@ -0,0 +1,11 @@
_base_="default.py"
ModelParams=dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 250]
},
)
OptimizationParams=dict(
)

View File

@ -1,10 +1,11 @@
_base_ = './dnerf_default.py' _base_="default.py"
ModelParams=dict(
ModelHiddenParams = dict(
kplanes_config = { kplanes_config = {
'grid_dimensions': 2, 'grid_dimensions': 2,
'input_coordinate_dim': 4, 'input_coordinate_dim': 4,
'output_coordinate_dim': 32, 'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 100] 'resolution': [64, 64, 64, 100]
} },
)
OptimizationParams=dict(
) )

View File

@ -0,0 +1,11 @@
_base_="default.py"
ModelParams=dict(
kplanes_config = {
'grid_dimensions': 2,
'input_coordinate_dim': 4,
'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 80]
},
)
OptimizationParams=dict(
)

View File

@ -5,33 +5,25 @@ ModelHiddenParams = dict(
'output_coordinate_dim': 16, 'output_coordinate_dim': 16,
'resolution': [64, 64, 64, 150] 'resolution': [64, 64, 64, 150]
}, },
multires = [1,2,4,8], multires = [1,2,4],
defor_depth = 2, defor_depth = 1,
net_width = 256, net_width = 128,
plane_tv_weight = 0.0002, plane_tv_weight = 0.0002,
time_smoothness_weight = 0.001, time_smoothness_weight = 0.001,
l1_time_planes = 0.001, l1_time_planes = 0.0001,
render_process=True
) )
OptimizationParams = dict( OptimizationParams = dict(
dataloader=False, # dataloader=True,
iterations = 30000, iterations = 14_000,
batch_size=1, batch_size=2,
coarse_iterations = 3000, coarse_iterations = 3000,
densify_until_iter = 20_000, densify_until_iter = 10_000,
opacity_reset_interval = 6000, opacity_reset_interval = 300000,
# position_lr_init = 0.00016, # grid_lr_init = 0.0016,
# position_lr_final = 0.0000016, # grid_lr_final = 16,
# position_lr_delay_mult = 0.01, # opacity_threshold_coarse = 0.005,
# position_lr_max_steps = 60_000, # opacity_threshold_fine_init = 0.005,
deformation_lr_init = 0.0016, # opacity_threshold_fine_after = 0.005,
deformation_lr_final = 0.00016,
deformation_lr_delay_mult = 0.01,
grid_lr_init = 0.016,
grid_lr_final = 0.0016,
# densify_until_iter = 50_000,
opacity_threshold_coarse = 0.005,
opacity_threshold_fine_init = 0.005,
opacity_threshold_fine_after = 0.005,
# pruning_interval = 2000 # pruning_interval = 2000
) )

BIN
assets/teaserfig.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 632 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 893 KiB

25
colmap.sh Normal file
View File

@ -0,0 +1,25 @@
workdir=$1
datatype=$2 # blender, hypernerf, llff
export CUDA_VISIBLE_DEVICES=1
rm -rf $workdir/sparse_
rm -rf $workdir/image_colmap
python scripts/"$datatype"2colmap.py $workdir
rm -rf $workdir/colmap
rm -rf $workdir/colmap/sparse/0
mkdir $workdir/colmap
cp -r $workdir/image_colmap $workdir/colmap/images
cp -r $workdir/sparse_ $workdir/colmap/sparse_custom
colmap feature_extractor --database_path $workdir/colmap/database.db --image_path $workdir/colmap/images --SiftExtraction.max_image_size 4096 --SiftExtraction.max_num_features 16384 --SiftExtraction.estimate_affine_shape 1 --SiftExtraction.domain_size_pooling 1
python database.py --database_path $workdir/colmap/database.db --txt_path $workdir/colmap/sparse_custom/cameras.txt
colmap exhaustive_matcher --database_path $workdir/colmap/database.db
mkdir -p $workdir/colmap/sparse/0
colmap point_triangulator --database_path $workdir/colmap/database.db --image_path $workdir/colmap/images --input_path $workdir/colmap/sparse_custom --output_path $workdir/colmap/sparse/0 --clear_points 1
mkdir -p $workdir/colmap/dense/workspace
colmap image_undistorter --image_path $workdir/colmap/images --input_path $workdir/colmap/sparse/0 --output_path $workdir/colmap/dense/workspace
colmap patch_match_stereo --workspace_path $workdir/colmap/dense/workspace
colmap stereo_fusion --workspace_path $workdir/colmap/dense/workspace --output_path $workdir/colmap/dense/workspace/fused.ply

View File

@ -14,8 +14,8 @@ import math
from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer
from scene.gaussian_model import GaussianModel from scene.gaussian_model import GaussianModel
from utils.sh_utils import eval_sh from utils.sh_utils import eval_sh
from time import time as get_time
def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None, stage="fine"): def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None, stage="fine", cam_type=None):
""" """
Render the scene. Render the scene.
@ -31,33 +31,40 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
# Set up rasterization configuration # Set up rasterization configuration
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) means3D = pc.get_xyz
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) if cam_type != "PanopticSports":
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform.cuda(),
projmatrix=viewpoint_camera.full_proj_transform.cuda(),
sh_degree=pc.active_sh_degree,
campos=viewpoint_camera.camera_center.cuda(),
prefiltered=False,
debug=pipe.debug
)
time = torch.tensor(viewpoint_camera.time).to(means3D.device).repeat(means3D.shape[0],1)
else:
raster_settings = viewpoint_camera['camera']
time=torch.tensor(viewpoint_camera['time']).to(means3D.device).repeat(means3D.shape[0],1)
raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform.cuda(),
projmatrix=viewpoint_camera.full_proj_transform.cuda(),
sh_degree=pc.active_sh_degree,
campos=viewpoint_camera.camera_center.cuda(),
prefiltered=False,
# debug=pipe.debug
)
rasterizer = GaussianRasterizer(raster_settings=raster_settings) rasterizer = GaussianRasterizer(raster_settings=raster_settings)
# means3D = pc.get_xyz # means3D = pc.get_xyz
# add deformation to each points # add deformation to each points
# deformation = pc.get_deformation # deformation = pc.get_deformation
means3D = pc.get_xyz
time = torch.tensor(viewpoint_camera.time).to(means3D.device).repeat(means3D.shape[0],1)
means2D = screenspace_points means2D = screenspace_points
opacity = pc._opacity opacity = pc._opacity
shs = pc.get_features
# If precomputed 3d covariance is provided, use it. If not, then it will be computed from # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
# scaling / rotation by the rasterizer. # scaling / rotation by the rasterizer.
@ -71,35 +78,30 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
rotations = pc._rotation rotations = pc._rotation
deformation_point = pc._deformation_table deformation_point = pc._deformation_table
if stage == "coarse" : if stage == "coarse" :
means3D_deform, scales_deform, rotations_deform, opacity_deform = means3D, scales, rotations, opacity means3D_final, scales_final, rotations_final, opacity_final, shs_final = means3D, scales, rotations, opacity, shs
else: else:
means3D_deform, scales_deform, rotations_deform, opacity_deform = pc._deformation(means3D[deformation_point], scales[deformation_point], # time0 = get_time()
rotations[deformation_point], opacity[deformation_point], # means3D_deform, scales_deform, rotations_deform, opacity_deform = pc._deformation(means3D[deformation_point], scales[deformation_point],
time[deformation_point]) # rotations[deformation_point], opacity[deformation_point],
# time[deformation_point])
means3D_final, scales_final, rotations_final, opacity_final, shs_final = pc._deformation(means3D, scales,
rotations, opacity, shs,
time)
# time1 = get_time()
# print("deformation forward:",time1-time0)
# print(time.max()) # print(time.max())
with torch.no_grad():
pc._deformation_accum[deformation_point] += torch.abs(means3D_deform-means3D[deformation_point])
means3D_final = torch.zeros_like(means3D)
rotations_final = torch.zeros_like(rotations)
scales_final = torch.zeros_like(scales)
opacity_final = torch.zeros_like(opacity)
means3D_final[deformation_point] = means3D_deform
rotations_final[deformation_point] = rotations_deform
scales_final[deformation_point] = scales_deform
opacity_final[deformation_point] = opacity_deform
means3D_final[~deformation_point] = means3D[~deformation_point]
rotations_final[~deformation_point] = rotations[~deformation_point]
scales_final[~deformation_point] = scales[~deformation_point]
opacity_final[~deformation_point] = opacity[~deformation_point]
# time2 = get_time()
# print("asset value:",time2-time1)
scales_final = pc.scaling_activation(scales_final) scales_final = pc.scaling_activation(scales_final)
rotations_final = pc.rotation_activation(rotations_final) rotations_final = pc.rotation_activation(rotations_final)
opacity = pc.opacity_activation(opacity) opacity = pc.opacity_activation(opacity_final)
# print(opacity.max()) # print(opacity.max())
# If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
# from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
shs = None # shs = None
colors_precomp = None colors_precomp = None
if override_color is None: if override_color is None:
if pipe.convert_SHs_python: if pipe.convert_SHs_python:
@ -109,21 +111,25 @@ def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor,
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized) sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0) colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
else: else:
shs = pc.get_features pass
# shs =
else: else:
colors_precomp = override_color colors_precomp = override_color
# Rasterize visible Gaussians to image, obtain their radii (on screen). # Rasterize visible Gaussians to image, obtain their radii (on screen).
# time3 = get_time()
rendered_image, radii, depth = rasterizer( rendered_image, radii, depth = rasterizer(
means3D = means3D_final, means3D = means3D_final,
means2D = means2D, means2D = means2D,
shs = shs, shs = shs_final,
colors_precomp = colors_precomp, colors_precomp = colors_precomp,
opacities = opacity, opacities = opacity,
scales = scales_final, scales = scales_final,
rotations = rotations_final, rotations = rotations_final,
cov3D_precomp = cov3D_precomp) cov3D_precomp = cov3D_precomp)
# time4 = get_time()
# print("rasterization:",time4-time3)
# breakpoint()
# Those Gaussians that were frustum culled or had a radius of 0 were not visible. # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
# They will be excluded from value updates used in the splitting criteria. # They will be excluded from value updates used in the splitting criteria.
return {"render": rendered_image, return {"render": rendered_image,

230
merge_many_4dgs.py Normal file
View File

@ -0,0 +1,230 @@
import imageio
import numpy as np
import torch
from scene import Scene
import os
import cv2
from tqdm import tqdm
from os import makedirs
from gaussian_renderer import render
import torchvision
from utils.general_utils import safe_state
from argparse import ArgumentParser
from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams
from gaussian_renderer import GaussianModel
from time import time
import open3d as o3d
# import torch.multiprocessing as mp
import threading
import concurrent.futures
from copy import deepcopy
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact george.drettakis@inria.fr
#
import torch
import math
from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer
from scene.gaussian_model import GaussianModel
from utils.render_utils import get_state_at_time
from tqdm import tqdm
def rotate_point_cloud(point_cloud, displacement, rotation_angles, scales_bias):
theta, phi = rotation_angles
rotation_matrix_z = torch.tensor([
[torch.cos(theta), -torch.sin(theta), 0],
[torch.sin(theta), torch.cos(theta), 0],
[0, 0, 1]
]).to(point_cloud)
rotation_matrix_x = torch.tensor([
[1, 0, 0],
[0, torch.cos(phi), -torch.sin(phi)],
[0, torch.sin(phi), torch.cos(phi)]
]).to(point_cloud)
rotation_matrix = torch.matmul(rotation_matrix_z, rotation_matrix_x)
# print(rotation_matrix)
point_cloud = point_cloud*scales_bias
rotated_point_cloud = torch.matmul(point_cloud, rotation_matrix.t())
displaced_point_cloud = rotated_point_cloud + displacement
return displaced_point_cloud
@torch.no_grad()
def render(viewpoint_camera, gaussians, bg_color : torch.Tensor, scaling_modifier = 1.0, motion_bias = [torch.tensor([0,0,0])], rotation_bias = [torch.tensor([0,0])],
scales_bias=[1,1]):
"""
Render the scene.
Background tensor (bg_color) must be on GPU!
"""
# Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
# Set up rasterization configuration
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
screenspace_points = None
for pc in gaussians:
if screenspace_points is None:
screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
else:
screenspace_points1 = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
screenspace_points = torch.cat([screenspace_points,screenspace_points1],dim=0)
try:
screenspace_points.retain_grad()
except:
pass
raster_settings = GaussianRasterizationSettings(
image_height=int(viewpoint_camera.image_height),
image_width=int(viewpoint_camera.image_width),
tanfovx=tanfovx,
tanfovy=tanfovy,
bg=bg_color,
scale_modifier=scaling_modifier,
viewmatrix=viewpoint_camera.world_view_transform.cuda(),
projmatrix=viewpoint_camera.full_proj_transform.cuda(),
sh_degree=gaussians[0].active_sh_degree,
campos=viewpoint_camera.camera_center.cuda(),
prefiltered=False,
debug=False
)
rasterizer = GaussianRasterizer(raster_settings=raster_settings)
# means3D = pc.get_xyz
# add deformation to each points
# deformation = pc.get_deformation
means3D_final, scales_final, rotations_final, opacity_final, shs_final = None, None, None, None, None
for index, pc in enumerate(gaussians):
means3D_final1, scales_final1, rotations_final1, opacity_final1, shs_final1 = get_state_at_time(pc, viewpoint_camera)
if index == 0:
means3D_final, scales_final, rotations_final, opacity_final, shs_final = means3D_final1, scales_final1, rotations_final1, opacity_final1, shs_final1
else:
motion_bias_t = motion_bias[index-1].to(means3D_final)
rotation_bias_t = rotation_bias[index-1].to(means3D_final)
means3D_final1 = rotate_point_cloud(means3D_final1,motion_bias_t,rotation_bias_t,scales_bias[index-1])
# breakpoint()
scales_final1 = scales_final1*scales_bias[index-1]
means3D_final = torch.cat([means3D_final,means3D_final1],dim=0)
scales_final = torch.cat([scales_final,scales_final1],dim=0)
rotations_final = torch.cat([rotations_final,rotations_final1],dim=0)
opacity_final = torch.cat([opacity_final,opacity_final1],dim=0)
shs_final = torch.cat([shs_final,shs_final1],dim=0)
colors_precomp = None
cov3D_precomp = None
rendered_image, radii, depth = rasterizer(
means3D = means3D_final,
means2D = screenspace_points,
shs = shs_final,
colors_precomp = colors_precomp,
opacities = opacity_final,
scales = scales_final,
rotations = rotations_final,
cov3D_precomp = cov3D_precomp)
return {"render": rendered_image,
"viewspace_points": screenspace_points,
"visibility_filter" : radii > 0,
"radii": radii,
"depth":depth}
def init_gaussians(dataset : ModelParams, hyperparam, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, skip_video: bool):
with torch.no_grad():
gaussians = GaussianModel(dataset.sh_degree, hyperparam)
scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False)
bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
print("hello!!")
return gaussians, scene, background
def save_point_cloud(points, model_path, timestamp):
output_path = os.path.join(model_path,"point_pertimestamp")
if not os.path.exists(output_path):
os.makedirs(output_path,exist_ok=True)
points = points.detach().cpu().numpy()
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
ply_path = os.path.join(output_path,f"points_{timestamp}.ply")
o3d.io.write_point_cloud(ply_path, pcd)
parser = ArgumentParser(description="Testing script parameters")
model = ModelParams(parser, sentinel=True)
pipeline = PipelineParams(parser)
hyperparam = ModelHiddenParams(parser)
parser.add_argument("--iteration", default=-1, type=int)
parser.add_argument("--skip_train", action="store_true")
parser.add_argument("--skip_test", action="store_true")
parser.add_argument("--quiet", action="store_true")
parser.add_argument("--skip_video", action="store_true")
parser.add_argument("--configs1", type=str, default="arguments/dynerf_9/flame_salmon_1.py")
parser.add_argument("--configs2", type=str, default="arguments/dnerf_tv_2/hellwarrior.py")
parser.add_argument("--modelpath2", type=str, default="output/dnerf_tv_2/hellwarrior")
parser.add_argument("--configs3", type=str, default="arguments/dnerf_tv_2/mutant.py")
parser.add_argument("--modelpath3", type=str, default="output/dnerf_tv_2/mutant")
render_path = "output/editing_render_flame_salmon"
args = get_combined_args(parser)
print("Rendering " , args.model_path)
args2 = deepcopy(args)
args3 = deepcopy(args)
if args.configs1:
import mmcv
from utils.params_utils import merge_hparams
config = mmcv.Config.fromfile(args.configs1)
args1 = merge_hparams(args, config)
# breakpoint()
if args2.configs2:
import mmcv
from utils.params_utils import merge_hparams
config = mmcv.Config.fromfile(args2.configs2)
args2 = merge_hparams(args2, config)
args2.model_path = args2.modelpath2
if args3.configs3:
import mmcv
from utils.params_utils import merge_hparams
config = mmcv.Config.fromfile(args3.configs3)
args3 = merge_hparams(args3, config)
args3.model_path = args3.modelpath3
safe_state(args.quiet)
gaussians1, scene1, background = init_gaussians(model.extract(args1), hyperparam.extract(args1), args1.iteration, pipeline.extract(args1), args1.skip_train, args1.skip_test, args1.skip_video)
gaussians2, scene2, background = init_gaussians(model.extract(args2), hyperparam.extract(args2), args2.iteration, pipeline.extract(args2), args2.skip_train, args2.skip_test, args2.skip_video)
gaussians3, scene3, background = init_gaussians(model.extract(args3), hyperparam.extract(args3), args3.iteration, pipeline.extract(args3), args3.skip_train, args3.skip_test, args3.skip_video)
gaussians = [gaussians1,gaussians2,gaussians3]
# breakpoint()
to8b = lambda x : (255*np.clip(x.cpu().numpy(),0,1)).astype(np.uint8)
render_images=[]
if not os.path.exists(render_path):
os.makedirs(render_path,exist_ok=True)
for index, viewpoint in tqdm(enumerate(scene1.getVideoCameras())):
result = render(viewpoint, gaussians,
bg_color=background,
motion_bias=[
torch.tensor([4,4,12]),
torch.tensor([-2,4,12])
]
,rotation_bias=[
torch.tensor([0,1.9*np.pi/4]),
torch.tensor([0,1.9*np.pi/4])
],
scales_bias = [1,1])
render_images.append(to8b(result["render"]).transpose(1,2,0))
torchvision.utils.save_image(result["render"],os.path.join(render_path,f"output_image{index}.png"))
imageio.mimwrite(os.path.join(render_path, 'video_rgb.mp4'), render_images, fps=30, codec='libx265')
# points = get_state_at_time(gaussians, viewpoint)
# save_point_cloud(points, args.model_path, index)

View File

@ -20,7 +20,7 @@ import json
from tqdm import tqdm from tqdm import tqdm
from utils.image_utils import psnr from utils.image_utils import psnr
from argparse import ArgumentParser from argparse import ArgumentParser
from pytorch_msssim import ms_ssim
def readImages(renders_dir, gt_dir): def readImages(renders_dir, gt_dir):
renders = [] renders = []
gts = [] gts = []
@ -67,30 +67,50 @@ def evaluate(model_paths):
ssims = [] ssims = []
psnrs = [] psnrs = []
lpipss = [] lpipss = []
lpipsa = []
ms_ssims = []
Dssims = []
for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"): for idx in tqdm(range(len(renders)), desc="Metric evaluation progress"):
ssims.append(ssim(renders[idx], gts[idx])) ssims.append(ssim(renders[idx], gts[idx]))
psnrs.append(psnr(renders[idx], gts[idx])) psnrs.append(psnr(renders[idx], gts[idx]))
lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg')) lpipss.append(lpips(renders[idx], gts[idx], net_type='vgg'))
ms_ssims.append(ms_ssim(renders[idx], gts[idx],data_range=1, size_average=True ))
lpipsa.append(lpips(renders[idx], gts[idx], net_type='alex'))
Dssims.append((1-ms_ssims[-1])/2)
print("Scene: ", scene_dir, "SSIM : {:>12.7f}".format(torch.tensor(ssims).mean(), ".5")) print("Scene: ", scene_dir, "SSIM : {:>12.7f}".format(torch.tensor(ssims).mean(), ".5"))
print("Scene: ", scene_dir, "PSNR : {:>12.7f}".format(torch.tensor(psnrs).mean(), ".5")) print("Scene: ", scene_dir, "PSNR : {:>12.7f}".format(torch.tensor(psnrs).mean(), ".5"))
print("Scene: ", scene_dir, "LPIPS: {:>12.7f}".format(torch.tensor(lpipss).mean(), ".5")) print("Scene: ", scene_dir, "LPIPS-vgg: {:>12.7f}".format(torch.tensor(lpipss).mean(), ".5"))
print("") print("Scene: ", scene_dir, "LPIPS-alex: {:>12.7f}".format(torch.tensor(lpipsa).mean(), ".5"))
print("Scene: ", scene_dir, "MS-SSIM: {:>12.7f}".format(torch.tensor(ms_ssims).mean(), ".5"))
print("Scene: ", scene_dir, "D-SSIM: {:>12.7f}".format(torch.tensor(Dssims).mean(), ".5"))
full_dict[scene_dir][method].update({"SSIM": torch.tensor(ssims).mean().item(), full_dict[scene_dir][method].update({"SSIM": torch.tensor(ssims).mean().item(),
"PSNR": torch.tensor(psnrs).mean().item(), "PSNR": torch.tensor(psnrs).mean().item(),
"LPIPS": torch.tensor(lpipss).mean().item()}) "LPIPS-vgg": torch.tensor(lpipss).mean().item(),
"LPIPS-alex": torch.tensor(lpipsa).mean().item(),
"MS-SSIM": torch.tensor(ms_ssims).mean().item(),
"D-SSIM": torch.tensor(Dssims).mean().item()},
)
per_view_dict[scene_dir][method].update({"SSIM": {name: ssim for ssim, name in zip(torch.tensor(ssims).tolist(), image_names)}, per_view_dict[scene_dir][method].update({"SSIM": {name: ssim for ssim, name in zip(torch.tensor(ssims).tolist(), image_names)},
"PSNR": {name: psnr for psnr, name in zip(torch.tensor(psnrs).tolist(), image_names)}, "PSNR": {name: psnr for psnr, name in zip(torch.tensor(psnrs).tolist(), image_names)},
"LPIPS": {name: lp for lp, name in zip(torch.tensor(lpipss).tolist(), image_names)}}) "LPIPS-vgg": {name: lp for lp, name in zip(torch.tensor(lpipss).tolist(), image_names)},
"LPIPS-alex": {name: lp for lp, name in zip(torch.tensor(lpipsa).tolist(), image_names)},
"MS-SSIM": {name: lp for lp, name in zip(torch.tensor(ms_ssims).tolist(), image_names)},
"D-SSIM": {name: lp for lp, name in zip(torch.tensor(Dssims).tolist(), image_names)},
}
)
with open(scene_dir + "/results.json", 'w') as fp: with open(scene_dir + "/results.json", 'w') as fp:
json.dump(full_dict[scene_dir], fp, indent=True) json.dump(full_dict[scene_dir], fp, indent=True)
with open(scene_dir + "/per_view.json", 'w') as fp: with open(scene_dir + "/per_view.json", 'w') as fp:
json.dump(per_view_dict[scene_dir], fp, indent=True) json.dump(per_view_dict[scene_dir], fp, indent=True)
except: except Exception as e:
print("Unable to compute metrics for model", scene_dir) print("Unable to compute metrics for model", scene_dir)
raise e
if __name__ == "__main__": if __name__ == "__main__":
device = torch.device("cuda:0") device = torch.device("cuda:0")

View File

@ -23,8 +23,28 @@ from argparse import ArgumentParser
from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams
from gaussian_renderer import GaussianModel from gaussian_renderer import GaussianModel
from time import time from time import time
# import torch.multiprocessing as mp
import threading
import concurrent.futures
def multithread_write(image_list, path):
executor = concurrent.futures.ThreadPoolExecutor(max_workers=None)
def write_image(image, count, path):
try:
torchvision.utils.save_image(image, os.path.join(path, '{0:05d}'.format(count) + ".png"))
return count, True
except:
return count, False
tasks = []
for index, image in enumerate(image_list):
tasks.append(executor.submit(write_image, image, index, path))
executor.shutdown()
for index, status in enumerate(tasks):
if status == False:
write_image(image_list[index], index, path)
to8b = lambda x : (255*np.clip(x.cpu().numpy(),0,1)).astype(np.uint8) to8b = lambda x : (255*np.clip(x.cpu().numpy(),0,1)).astype(np.uint8)
def render_set(model_path, name, iteration, views, gaussians, pipeline, background): def render_set(model_path, name, iteration, views, gaussians, pipeline, background, cam_type):
render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders") render_path = os.path.join(model_path, name, "ours_{}".format(iteration), "renders")
gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt") gts_path = os.path.join(model_path, name, "ours_{}".format(iteration), "gt")
@ -33,49 +53,52 @@ def render_set(model_path, name, iteration, views, gaussians, pipeline, backgrou
render_images = [] render_images = []
gt_list = [] gt_list = []
render_list = [] render_list = []
# breakpoint()
print("point nums:",gaussians._xyz.shape[0])
for idx, view in enumerate(tqdm(views, desc="Rendering progress")): for idx, view in enumerate(tqdm(views, desc="Rendering progress")):
if idx == 0:time1 = time() if idx == 0:time1 = time()
rendering = render(view, gaussians, pipeline, background)["render"] # breakpoint()
rendering = render(view, gaussians, pipeline, background,cam_type=cam_type)["render"]
# torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png")) # torchvision.utils.save_image(rendering, os.path.join(render_path, '{0:05d}'.format(idx) + ".png"))
render_images.append(to8b(rendering).transpose(1,2,0)) render_images.append(to8b(rendering).transpose(1,2,0))
# print(to8b(rendering).shape) # print(to8b(rendering).shape)
render_list.append(rendering) render_list.append(rendering)
if name in ["train", "test"]: if name in ["train", "test"]:
gt = view.original_image[0:3, :, :] if cam_type != "PanopticSports":
gt = view.original_image[0:3, :, :]
else:
gt = view['image'].cuda()
# torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png")) # torchvision.utils.save_image(gt, os.path.join(gts_path, '{0:05d}'.format(idx) + ".png"))
gt_list.append(gt) gt_list.append(gt)
# if idx >= 10:
# break
time2=time() time2=time()
print("FPS:",(len(views)-1)/(time2-time1)) print("FPS:",(len(views)-1)/(time2-time1))
count = 0 # print("writing training images.")
print("writing training images.")
if len(gt_list) != 0: multithread_write(gt_list, gts_path)
for image in tqdm(gt_list): # print("writing rendering images.")
torchvision.utils.save_image(image, os.path.join(gts_path, '{0:05d}'.format(count) + ".png"))
count+=1 multithread_write(render_list, render_path)
count = 0
print("writing rendering images.")
if len(render_list) != 0:
for image in tqdm(render_list):
torchvision.utils.save_image(image, os.path.join(render_path, '{0:05d}'.format(count) + ".png"))
count +=1
imageio.mimwrite(os.path.join(model_path, name, "ours_{}".format(iteration), 'video_rgb.mp4'), render_images, fps=30, quality=8) imageio.mimwrite(os.path.join(model_path, name, "ours_{}".format(iteration), 'video_rgb.mp4'), render_images, fps=30)
def render_sets(dataset : ModelParams, hyperparam, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, skip_video: bool): def render_sets(dataset : ModelParams, hyperparam, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, skip_video: bool):
with torch.no_grad(): with torch.no_grad():
gaussians = GaussianModel(dataset.sh_degree, hyperparam) gaussians = GaussianModel(dataset.sh_degree, hyperparam)
scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False) scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False)
cam_type=scene.dataset_type
bg_color = [1,1,1] if dataset.white_background else [0, 0, 0] bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda") background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
if not skip_train: if not skip_train:
render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background) render_set(dataset.model_path, "train", scene.loaded_iter, scene.getTrainCameras(), gaussians, pipeline, background,cam_type)
if not skip_test: if not skip_test:
render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background) render_set(dataset.model_path, "test", scene.loaded_iter, scene.getTestCameras(), gaussians, pipeline, background,cam_type)
if not skip_video: if not skip_video:
render_set(dataset.model_path,"video",scene.loaded_iter,scene.getVideoCameras(),gaussians,pipeline,background) render_set(dataset.model_path,"video",scene.loaded_iter,scene.getVideoCameras(),gaussians,pipeline,background,cam_type)
if __name__ == "__main__": if __name__ == "__main__":
# Set up command line argument parser # Set up command line argument parser
parser = ArgumentParser(description="Testing script parameters") parser = ArgumentParser(description="Testing script parameters")

View File

@ -6,3 +6,5 @@ matplotlib
argparse argparse
lpips lpips
plyfile plyfile
pytorch_msssim
open3d

27
run.sh Normal file
View File

@ -0,0 +1,27 @@
# bash scripts/process_dnerf.sh dnerf_ab/dnerf_tv_30000
# bash scripts/process_dnerf.sh dnerf_ab/dnerf_tv_depth2
# bash scripts/process_dnerf_2.sh dnerf_tv_dshs
# bash scripts/process_dnerf_2.sh dnerf_tv_do
# bash scripts/process_dnerf_2.sh dnerf_tv_2
# bash scripts/process_dnerf_2.sh dnerf_tv_8
# bash scripts/process_dnerf_2.sh dnerf_tv_deepmlp
# bash scripts/process_dnerf_2.sh dnerf_tv_nods
# bash scripts/process_dnerf.sh dnerf_ab/dnerf_tv
# exp_name1="hypernerf_3dgs"
# export CUDA_VISIBLE_DEVICES=2&&python render2.py --model_path "output/$exp_name1/3dprinter/" --skip_train --configs arguments/$exp_name1/3dprinter.py &
# export CUDA_VISIBLE_DEVICES=3&&python render2.py --model_path "output/$exp_name1/broom2/" --skip_train --configs arguments/$exp_name1/broom2.py &
# #
# wait
# export CUDA_VISIBLE_DEVICES=2&&python render2.py --model_path "output/$exp_name1/peel-banana/" --skip_train --configs arguments/$exp_name1/banana.py &
# export CUDA_VISIBLE_DEVICES=3&&python render2.py --model_path "output/$exp_name1/vrig-chicken/" --skip_train --configs arguments/$exp_name1/chicken.py &
# wait
# exp_name="hypernerf_3dgs"
# export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/vrig-chicken/" &
# export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/peel-banana/" &
# export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/broom2/" &
# export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/3dprinter/" &
# bash scripts/train_ablation.sh dnerf_tv_2_1
bash scripts/train_ablation.sh dnerf_tv_2_1_nodrds

View File

@ -19,7 +19,7 @@ from scene.dataset import FourDGSdataset
from arguments import ModelParams from arguments import ModelParams
from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON
from torch.utils.data import Dataset from torch.utils.data import Dataset
from scene.dataset_readers import add_points
class Scene: class Scene:
gaussians : GaussianModel gaussians : GaussianModel
@ -43,54 +43,41 @@ class Scene:
self.test_cameras = {} self.test_cameras = {}
self.video_cameras = {} self.video_cameras = {}
if os.path.exists(os.path.join(args.source_path, "sparse")): if os.path.exists(os.path.join(args.source_path, "sparse")):
scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval) scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval, args.llffhold)
dataset_type="colmap"
elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")): elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")):
print("Found transforms_train.json file, assuming Blender data set!") print("Found transforms_train.json file, assuming Blender data set!")
scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.eval) scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.eval, args.extension)
dataset_type="blender"
elif os.path.exists(os.path.join(args.source_path, "poses_bounds.npy")): elif os.path.exists(os.path.join(args.source_path, "poses_bounds.npy")):
scene_info = sceneLoadTypeCallbacks["dynerf"](args.source_path, args.white_background, args.eval) scene_info = sceneLoadTypeCallbacks["dynerf"](args.source_path, args.white_background, args.eval)
dataset_type="dynerf"
elif os.path.exists(os.path.join(args.source_path,"dataset.json")): elif os.path.exists(os.path.join(args.source_path,"dataset.json")):
scene_info = sceneLoadTypeCallbacks["nerfies"](args.source_path, False, args.eval) scene_info = sceneLoadTypeCallbacks["nerfies"](args.source_path, False, args.eval)
dataset_type="nerfies"
elif os.path.exists(os.path.join(args.source_path,"train_meta.json")):
scene_info = sceneLoadTypeCallbacks["PanopticSports"](args.source_path)
dataset_type="PanopticSports"
else: else:
assert False, "Could not recognize scene type!" assert False, "Could not recognize scene type!"
self.maxtime = scene_info.maxtime self.maxtime = scene_info.maxtime
# if not self.loaded_iter: self.dataset_type = dataset_type
# with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file:
# dest_file.write(src_file.read())
# json_cams = []
# camlist = []
# if scene_info.test_cameras:
# camlist.extend(scene_info.test_cameras)
# if scene_info.train_cameras:
# camlist.extend(scene_info.train_cameras)
# for id, cam in enumerate(camlist):
# json_cams.append(camera_to_JSON(id, cam))
# with open(os.path.join(self.model_path, "cameras.json"), 'w') as file:
# json.dump(json_cams, file)
# if shuffle:
# random.shuffle(scene_info.train_cameras) # Multi-res consistent random shuffling
# random.shuffle(scene_info.test_cameras) # Multi-res consistent random shuffling
self.cameras_extent = scene_info.nerf_normalization["radius"] self.cameras_extent = scene_info.nerf_normalization["radius"]
# for resolution_scale in resolution_scales:
# print("Loading Training Cameras")
# self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args)
# print("Loading Test Cameras")
# self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args)
# print("Loading Video Cameras")
# self.video_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.video_cameras, resolution_scale, args)
print("Loading Training Cameras") print("Loading Training Cameras")
self.train_camera = FourDGSdataset(scene_info.train_cameras, args) self.train_camera = FourDGSdataset(scene_info.train_cameras, args, dataset_type)
print("Loading Test Cameras") print("Loading Test Cameras")
self.test_camera = FourDGSdataset(scene_info.test_cameras, args) self.test_camera = FourDGSdataset(scene_info.test_cameras, args, dataset_type)
print("Loading Video Cameras") print("Loading Video Cameras")
self.video_camera = cameraList_from_camInfos(scene_info.video_cameras,-1,args) self.video_camera = FourDGSdataset(scene_info.video_cameras, args, dataset_type)
# self.video_camera = cameraList_from_camInfos(scene_info.video_cameras,-1,args)
xyz_max = scene_info.point_cloud.points.max(axis=0) xyz_max = scene_info.point_cloud.points.max(axis=0)
xyz_min = scene_info.point_cloud.points.min(axis=0) xyz_min = scene_info.point_cloud.points.min(axis=0)
self.gaussians._deformation.deformation_net.grid.set_aabb(xyz_max,xyz_min) if args.add_points:
print("add points.")
# breakpoint()
scene_info = scene_info._replace(point_cloud=add_points(scene_info.point_cloud, xyz_max=xyz_max, xyz_min=xyz_min))
self.gaussians._deformation.deformation_net.set_aabb(xyz_max,xyz_min)
if self.loaded_iter: if self.loaded_iter:
self.gaussians.load_ply(os.path.join(self.model_path, self.gaussians.load_ply(os.path.join(self.model_path,
"point_cloud", "point_cloud",
@ -100,16 +87,6 @@ class Scene:
"point_cloud", "point_cloud",
"iteration_" + str(self.loaded_iter), "iteration_" + str(self.loaded_iter),
)) ))
# elif load_coarse:
# self.gaussians.load_ply(os.path.join(self.model_path,
# "point_cloud",
# "coarse_iteration_" + str(load_coarse),
# "point_cloud.ply"))
# self.gaussians.load_model(os.path.join(self.model_path,
# "point_cloud",
# "coarse_iteration_" + str(load_coarse),
# ))
# print("load coarse stage gaussians")
else: else:
self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent, self.maxtime) self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent, self.maxtime)

307
scene/camera.py Normal file
View File

@ -0,0 +1,307 @@
import numpy as np
import os,sys,time
import torch
import torch.nn.functional as torch_F
import collections
from easydict import EasyDict as edict
import util
from util import log,debug
class Pose():
"""
A class of operations on camera poses (PyTorch tensors with shape [...,3,4])
each [3,4] camera pose takes the form of [R|t]
"""
def __call__(self,R=None,t=None):
# construct a camera pose from the given R and/or t
assert(R is not None or t is not None)
if R is None:
if not isinstance(t,torch.Tensor): t = torch.tensor(t)
R = torch.eye(3,device=t.device).repeat(*t.shape[:-1],1,1)
elif t is None:
if not isinstance(R,torch.Tensor): R = torch.tensor(R)
t = torch.zeros(R.shape[:-1],device=R.device)
else:
if not isinstance(R,torch.Tensor): R = torch.tensor(R)
if not isinstance(t,torch.Tensor): t = torch.tensor(t)
assert(R.shape[:-1]==t.shape and R.shape[-2:]==(3,3))
R = R.float()
t = t.float()
pose = torch.cat([R,t[...,None]],dim=-1) # [...,3,4]
assert(pose.shape[-2:]==(3,4))
return pose
def invert(self,pose,use_inverse=False):
# invert a camera pose
R,t = pose[...,:3],pose[...,3:]
R_inv = R.inverse() if use_inverse else R.transpose(-1,-2)
t_inv = (-R_inv@t)[...,0]
pose_inv = self(R=R_inv,t=t_inv)
return pose_inv
def compose(self,pose_list):
# compose a sequence of poses together
# pose_new(x) = poseN o ... o pose2 o pose1(x)
pose_new = pose_list[0]
for pose in pose_list[1:]:
pose_new = self.compose_pair(pose_new,pose)
return pose_new
def compose_pair(self,pose_a,pose_b):
# pose_new(x) = pose_b o pose_a(x)
R_a,t_a = pose_a[...,:3],pose_a[...,3:]
R_b,t_b = pose_b[...,:3],pose_b[...,3:]
R_new = R_b@R_a
t_new = (R_b@t_a+t_b)[...,0]
pose_new = self(R=R_new,t=t_new)
return pose_new
class Lie():
"""
Lie algebra for SO(3) and SE(3) operations in PyTorch
"""
def so3_to_SO3(self,w): # [...,3]
wx = self.skew_symmetric(w)
theta = w.norm(dim=-1)[...,None,None]
I = torch.eye(3,device=w.device,dtype=torch.float32)
A = self.taylor_A(theta)
B = self.taylor_B(theta)
R = I+A*wx+B*wx@wx
return R
def SO3_to_so3(self,R,eps=1e-7): # [...,3,3]
trace = R[...,0,0]+R[...,1,1]+R[...,2,2]
theta = ((trace-1)/2).clamp(-1+eps,1-eps).acos_()[...,None,None]%np.pi # ln(R) will explode if theta==pi
lnR = 1/(2*self.taylor_A(theta)+1e-8)*(R-R.transpose(-2,-1)) # FIXME: wei-chiu finds it weird
w0,w1,w2 = lnR[...,2,1],lnR[...,0,2],lnR[...,1,0]
w = torch.stack([w0,w1,w2],dim=-1)
return w
def se3_to_SE3(self,wu): # [...,3]
w,u = wu.split([3,3],dim=-1)
wx = self.skew_symmetric(w)
theta = w.norm(dim=-1)[...,None,None]
I = torch.eye(3,device=w.device,dtype=torch.float32)
A = self.taylor_A(theta)
B = self.taylor_B(theta)
C = self.taylor_C(theta)
R = I+A*wx+B*wx@wx
V = I+B*wx+C*wx@wx
Rt = torch.cat([R,(V@u[...,None])],dim=-1)
return Rt
def SE3_to_se3(self,Rt,eps=1e-8): # [...,3,4]
R,t = Rt.split([3,1],dim=-1)
w = self.SO3_to_so3(R)
wx = self.skew_symmetric(w)
theta = w.norm(dim=-1)[...,None,None]
I = torch.eye(3,device=w.device,dtype=torch.float32)
A = self.taylor_A(theta)
B = self.taylor_B(theta)
invV = I-0.5*wx+(1-A/(2*B))/(theta**2+eps)*wx@wx
u = (invV@t)[...,0]
wu = torch.cat([w,u],dim=-1)
return wu
def skew_symmetric(self,w):
w0,w1,w2 = w.unbind(dim=-1)
O = torch.zeros_like(w0)
wx = torch.stack([torch.stack([O,-w2,w1],dim=-1),
torch.stack([w2,O,-w0],dim=-1),
torch.stack([-w1,w0,O],dim=-1)],dim=-2)
return wx
def taylor_A(self,x,nth=10):
# Taylor expansion of sin(x)/x
ans = torch.zeros_like(x)
denom = 1.
for i in range(nth+1):
if i>0: denom *= (2*i)*(2*i+1)
ans = ans+(-1)**i*x**(2*i)/denom
return ans
def taylor_B(self,x,nth=10):
# Taylor expansion of (1-cos(x))/x**2
ans = torch.zeros_like(x)
denom = 1.
for i in range(nth+1):
denom *= (2*i+1)*(2*i+2)
ans = ans+(-1)**i*x**(2*i)/denom
return ans
def taylor_C(self,x,nth=10):
# Taylor expansion of (x-sin(x))/x**3
ans = torch.zeros_like(x)
denom = 1.
for i in range(nth+1):
denom *= (2*i+2)*(2*i+3)
ans = ans+(-1)**i*x**(2*i)/denom
return ans
class Quaternion():
def q_to_R(self,q):
# https://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
qa,qb,qc,qd = q.unbind(dim=-1)
R = torch.stack([torch.stack([1-2*(qc**2+qd**2),2*(qb*qc-qa*qd),2*(qa*qc+qb*qd)],dim=-1),
torch.stack([2*(qb*qc+qa*qd),1-2*(qb**2+qd**2),2*(qc*qd-qa*qb)],dim=-1),
torch.stack([2*(qb*qd-qa*qc),2*(qa*qb+qc*qd),1-2*(qb**2+qc**2)],dim=-1)],dim=-2)
return R
def R_to_q(self,R,eps=1e-8): # [B,3,3]
# https://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
# FIXME: this function seems a bit problematic, need to double-check
row0,row1,row2 = R.unbind(dim=-2)
R00,R01,R02 = row0.unbind(dim=-1)
R10,R11,R12 = row1.unbind(dim=-1)
R20,R21,R22 = row2.unbind(dim=-1)
t = R[...,0,0]+R[...,1,1]+R[...,2,2]
r = (1+t+eps).sqrt()
qa = 0.5*r
qb = (R21-R12).sign()*0.5*(1+R00-R11-R22+eps).sqrt()
qc = (R02-R20).sign()*0.5*(1-R00+R11-R22+eps).sqrt()
qd = (R10-R01).sign()*0.5*(1-R00-R11+R22+eps).sqrt()
q = torch.stack([qa,qb,qc,qd],dim=-1)
for i,qi in enumerate(q):
if torch.isnan(qi).any():
K = torch.stack([torch.stack([R00-R11-R22,R10+R01,R20+R02,R12-R21],dim=-1),
torch.stack([R10+R01,R11-R00-R22,R21+R12,R20-R02],dim=-1),
torch.stack([R20+R02,R21+R12,R22-R00-R11,R01-R10],dim=-1),
torch.stack([R12-R21,R20-R02,R01-R10,R00+R11+R22],dim=-1)],dim=-2)/3.0
K = K[i]
eigval,eigvec = torch.linalg.eigh(K)
V = eigvec[:,eigval.argmax()]
q[i] = torch.stack([V[3],V[0],V[1],V[2]])
return q
def invert(self,q):
qa,qb,qc,qd = q.unbind(dim=-1)
norm = q.norm(dim=-1,keepdim=True)
q_inv = torch.stack([qa,-qb,-qc,-qd],dim=-1)/norm**2
return q_inv
def product(self,q1,q2): # [B,4]
q1a,q1b,q1c,q1d = q1.unbind(dim=-1)
q2a,q2b,q2c,q2d = q2.unbind(dim=-1)
hamil_prod = torch.stack([q1a*q2a-q1b*q2b-q1c*q2c-q1d*q2d,
q1a*q2b+q1b*q2a+q1c*q2d-q1d*q2c,
q1a*q2c-q1b*q2d+q1c*q2a+q1d*q2b,
q1a*q2d+q1b*q2c-q1c*q2b+q1d*q2a],dim=-1)
return hamil_prod
pose = Pose()
lie = Lie()
quaternion = Quaternion()
def to_hom(X):
# get homogeneous coordinates of the input
X_hom = torch.cat([X,torch.ones_like(X[...,:1])],dim=-1)
return X_hom
# basic operations of transforming 3D points between world/camera/image coordinates
def world2cam(X,pose): # [B,N,3]
X_hom = to_hom(X)
return X_hom@pose.transpose(-1,-2)
def cam2img(X,cam_intr):
return X@cam_intr.transpose(-1,-2)
def img2cam(X,cam_intr):
return X@cam_intr.inverse().transpose(-1,-2)
def cam2world(X,pose):
X_hom = to_hom(X)
pose_inv = Pose().invert(pose)
return X_hom@pose_inv.transpose(-1,-2)
def angle_to_rotation_matrix(a,axis):
# get the rotation matrix from Euler angle around specific axis
roll = dict(X=1,Y=2,Z=0)[axis]
O = torch.zeros_like(a)
I = torch.ones_like(a)
M = torch.stack([torch.stack([a.cos(),-a.sin(),O],dim=-1),
torch.stack([a.sin(),a.cos(),O],dim=-1),
torch.stack([O,O,I],dim=-1)],dim=-2)
M = M.roll((roll,roll),dims=(-2,-1))
return M
def get_center_and_ray(opt,pose,intr=None): # [HW,2]
# given the intrinsic/extrinsic matrices, get the camera center and ray directions]
assert(opt.camera.model=="perspective")
with torch.no_grad():
# compute image coordinate grid
y_range = torch.arange(opt.H,dtype=torch.float32,device=opt.device).add_(0.5)
x_range = torch.arange(opt.W,dtype=torch.float32,device=opt.device).add_(0.5)
Y,X = torch.meshgrid(y_range,x_range) # [H,W]
xy_grid = torch.stack([X,Y],dim=-1).view(-1,2) # [HW,2]
# compute center and ray
batch_size = len(pose)
xy_grid = xy_grid.repeat(batch_size,1,1) # [B,HW,2]
grid_3D = img2cam(to_hom(xy_grid),intr) # [B,HW,3]
center_3D = torch.zeros_like(grid_3D) # [B,HW,3]
# transform from camera to world coordinates
grid_3D = cam2world(grid_3D,pose) # [B,HW,3]
center_3D = cam2world(center_3D,pose) # [B,HW,3]
ray = grid_3D-center_3D # [B,HW,3]
return center_3D,ray
def get_3D_points_from_depth(opt,center,ray,depth,multi_samples=False):
if multi_samples: center,ray = center[:,:,None],ray[:,:,None]
# x = c+dv
points_3D = center+ray*depth # [B,HW,3]/[B,HW,N,3]/[N,3]
return points_3D
def convert_NDC(opt,center,ray,intr,near=1):
# shift camera center (ray origins) to near plane (z=1)
# (unlike conventional NDC, we assume the cameras are facing towards the +z direction)
center = center+(near-center[...,2:])/ray[...,2:]*ray
# projection
cx,cy,cz = center.unbind(dim=-1) # [B,HW]
rx,ry,rz = ray.unbind(dim=-1) # [B,HW]
scale_x = intr[:,0,0]/intr[:,0,2] # [B]
scale_y = intr[:,1,1]/intr[:,1,2] # [B]
cnx = scale_x[:,None]*(cx/cz)
cny = scale_y[:,None]*(cy/cz)
cnz = 1-2*near/cz
rnx = scale_x[:,None]*(rx/rz-cx/cz)
rny = scale_y[:,None]*(ry/rz-cy/cz)
rnz = 2*near/cz
center_ndc = torch.stack([cnx,cny,cnz],dim=-1) # [B,HW,3]
ray_ndc = torch.stack([rnx,rny,rnz],dim=-1) # [B,HW,3]
return center_ndc,ray_ndc
def rotation_distance(R1,R2,eps=1e-7):
# http://www.boris-belousov.net/2016/12/01/quat-dist/
R_diff = R1@R2.transpose(-2,-1)
trace = R_diff[...,0,0]+R_diff[...,1,1]+R_diff[...,2,2]
angle = ((trace-1)/2).clamp(-1+eps,1-eps).acos_() # numerical stability near -1/+1
return angle
def procrustes_analysis(X0,X1): # [N,3]
# translation
t0 = X0.mean(dim=0,keepdim=True)
t1 = X1.mean(dim=0,keepdim=True)
X0c = X0-t0
X1c = X1-t1
# scale
s0 = (X0c**2).sum(dim=-1).mean().sqrt()
s1 = (X1c**2).sum(dim=-1).mean().sqrt()
X0cs = X0c/s0
X1cs = X1c/s1
# rotation (use double for SVD, float loses precision)
U,S,V = (X0cs.t()@X1cs).double().svd(some=True)
R = (U@V.t()).float()
if R.det()<0: R[2] *= -1
# align X1 to X0: X1to0 = (X1-t1)/s1@R.t()*s0+t0
sim3 = edict(t0=t0[0],t1=t1[0],s0=s0,s1=s1,R=R)
return sim3
def get_novel_view_poses(opt,pose_anchor,N=60,scale=1):
# create circular viewpoints (small oscillations)
theta = torch.arange(N)/N*2*np.pi
R_x = angle_to_rotation_matrix((theta.sin()*0.05).asin(),"X")
R_y = angle_to_rotation_matrix((theta.cos()*0.05).asin(),"Y")
pose_rot = pose(R=R_y@R_x)
pose_shift = pose(t=[0,0,-4*scale])
pose_shift2 = pose(t=[0,0,3.8*scale])
pose_oscil = pose.compose([pose_shift,pose_rot,pose_shift2])
pose_novel = pose.compose([pose_oscil,pose_anchor.cpu()[None]])
return pose_novel

View File

@ -17,7 +17,8 @@ from utils.graphics_utils import getWorld2View2, getProjectionMatrix
class Camera(nn.Module): class Camera(nn.Module):
def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask, def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
image_name, uid, image_name, uid,
trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda", time = 0 trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda", time = 0,
mask = None, depth=None
): ):
super(Camera, self).__init__() super(Camera, self).__init__()
@ -35,7 +36,8 @@ class Camera(nn.Module):
print(e) print(e)
print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" ) print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
self.data_device = torch.device("cuda") self.data_device = torch.device("cuda")
self.original_image = image.clamp(0.0, 1.0) self.original_image = image.clamp(0.0, 1.0)[:3,:,:]
# breakpoint()
# .to(self.data_device) # .to(self.data_device)
self.image_width = self.original_image.shape[2] self.image_width = self.original_image.shape[2]
self.image_height = self.original_image.shape[1] self.image_height = self.original_image.shape[1]
@ -46,8 +48,8 @@ class Camera(nn.Module):
else: else:
self.original_image *= torch.ones((1, self.image_height, self.image_width)) self.original_image *= torch.ones((1, self.image_height, self.image_width))
# , device=self.data_device) # , device=self.data_device)
self.depth = depth
self.mask = mask
self.zfar = 100.0 self.zfar = 100.0
self.znear = 0.01 self.znear = 0.01

View File

@ -10,27 +10,37 @@ class FourDGSdataset(Dataset):
def __init__( def __init__(
self, self,
dataset, dataset,
args args,
dataset_type
): ):
self.dataset = dataset self.dataset = dataset
self.args = args self.args = args
self.dataset_type=dataset_type
def __getitem__(self, index): def __getitem__(self, index):
# breakpoint()
try: if self.dataset_type != "PanopticSports":
image, w2c, time = self.dataset[index] try:
R,T = w2c image, w2c, time = self.dataset[index]
FovX = focal2fov(self.dataset.focal[0], image.shape[2]) R,T = w2c
FovY = focal2fov(self.dataset.focal[0], image.shape[1]) FovX = focal2fov(self.dataset.focal[0], image.shape[2])
except: FovY = focal2fov(self.dataset.focal[0], image.shape[1])
caminfo = self.dataset[index] mask=None
image = caminfo.image except:
R = caminfo.R caminfo = self.dataset[index]
T = caminfo.T image = caminfo.image
FovX = caminfo.FovX R = caminfo.R
FovY = caminfo.FovY T = caminfo.T
time = caminfo.time FovX = caminfo.FovX
return Camera(colmap_id=index,R=R,T=T,FoVx=FovX,FoVy=FovY,image=image,gt_alpha_mask=None, FovY = caminfo.FovY
image_name=f"{index}",uid=index,data_device=torch.device("cuda"),time=time) time = caminfo.time
mask = caminfo.mask
return Camera(colmap_id=index,R=R,T=T,FoVx=FovX,FoVy=FovY,image=image,gt_alpha_mask=None,
image_name=f"{index}",uid=index,data_device=torch.device("cuda"),time=time,
mask=mask)
else:
return self.dataset[index]
def __len__(self): def __len__(self):
return len(self.dataset) return len(self.dataset)

View File

@ -12,6 +12,8 @@
import os import os
import sys import sys
from PIL import Image from PIL import Image
from scene.cameras import Camera
from typing import NamedTuple from typing import NamedTuple
from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \ from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
@ -40,6 +42,7 @@ class CameraInfo(NamedTuple):
width: int width: int
height: int height: int
time : float time : float
mask: np.array
class SceneInfo(NamedTuple): class SceneInfo(NamedTuple):
point_cloud: BasicPointCloud point_cloud: BasicPointCloud
@ -70,7 +73,7 @@ def getNerfppNorm(cam_info):
radius = diagonal * 1.1 radius = diagonal * 1.1
translate = -center translate = -center
# breakpoint()
return {"translate": translate, "radius": radius} return {"translate": translate, "radius": radius}
def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder): def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
@ -113,7 +116,7 @@ def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
image = PILtoTorch(image,None) image = PILtoTorch(image,None)
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image, cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=width, height=height, image_path=image_path, image_name=image_name, width=width, height=height,
time = 0) time = 0, mask=None)
cam_infos.append(cam_info) cam_infos.append(cam_info)
sys.stdout.write('\n') sys.stdout.write('\n')
return cam_infos return cam_infos
@ -130,11 +133,12 @@ def storePly(path, xyz, rgb):
# Define the dtype for the structured array # Define the dtype for the structured array
dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'), dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'), ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
('red', 'u1'), ('green', 'u1'), ('blue', 'u1')] ('red', 'f4'), ('green', 'f4'), ('blue', 'f4')]
normals = np.zeros_like(xyz) normals = np.zeros_like(xyz)
elements = np.empty(xyz.shape[0], dtype=dtype) elements = np.empty(xyz.shape[0], dtype=dtype)
# breakpoint()
attributes = np.concatenate((xyz, normals, rgb), axis=1) attributes = np.concatenate((xyz, normals, rgb), axis=1)
elements[:] = list(map(tuple, attributes)) elements[:] = list(map(tuple, attributes))
@ -158,7 +162,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
reading_dir = "images" if images == None else images reading_dir = "images" if images == None else images
cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir)) cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir))
cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name) cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name)
# breakpoint()
if eval: if eval:
train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0] train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0]
test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0] test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0]
@ -184,7 +188,7 @@ def readColmapSceneInfo(path, images, eval, llffhold=8):
except: except:
pcd = None pcd = None
scene_info = SceneInfo(point_cloud=pcd, scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos, train_cameras=train_cam_infos,
test_cameras=test_cam_infos, test_cameras=test_cam_infos,
@ -219,11 +223,16 @@ def generateCamerasFromTransforms(path, template_transformsfile, extension, maxt
return c2w return c2w
cam_infos = [] cam_infos = []
# generate render poses and times # generate render poses and times
render_poses = torch.stack([pose_spherical(angle, -30.0, 4.0) for angle in np.linspace(-180,180,40+1)[:-1]], 0) render_poses = torch.stack([pose_spherical(angle, -30.0, 4.0) for angle in np.linspace(-180,180,160+1)[:-1]], 0)
render_times = torch.linspace(0,maxtime,render_poses.shape[0]) render_times = torch.linspace(0,maxtime,render_poses.shape[0])
with open(os.path.join(path, template_transformsfile)) as json_file: with open(os.path.join(path, template_transformsfile)) as json_file:
template_json = json.load(json_file) template_json = json.load(json_file)
fovx = template_json["camera_angle_x"] try:
fovx = template_json["camera_angle_x"]
except:
fovx = focal2fov(template_json["fl_x"], template_json['w'])
print("hello!!!!")
# breakpoint()
# load a single image to get image info. # load a single image to get image info.
for idx, frame in enumerate(template_json["frames"]): for idx, frame in enumerate(template_json["frames"]):
cam_name = os.path.join(path, frame["file_path"] + extension) cam_name = os.path.join(path, frame["file_path"] + extension)
@ -245,15 +254,17 @@ def generateCamerasFromTransforms(path, template_transformsfile, extension, maxt
FovX = fovx FovX = fovx
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=None, image_name=None, width=image.shape[1], height=image.shape[2], image_path=None, image_name=None, width=image.shape[1], height=image.shape[2],
time = time)) time = time, mask=None))
return cam_infos return cam_infos
def readCamerasFromTransforms(path, transformsfile, white_background, extension=".png", mapper = {}): def readCamerasFromTransforms(path, transformsfile, white_background, extension=".png", mapper = {}):
cam_infos = [] cam_infos = []
with open(os.path.join(path, transformsfile)) as json_file: with open(os.path.join(path, transformsfile)) as json_file:
contents = json.load(json_file) contents = json.load(json_file)
fovx = contents["camera_angle_x"] try:
fovx = contents["camera_angle_x"]
except:
fovx = focal2fov(contents['fl_x'],contents['w'])
frames = contents["frames"] frames = contents["frames"]
for idx, frame in enumerate(frames): for idx, frame in enumerate(frames):
cam_name = os.path.join(path, frame["file_path"] + extension) cam_name = os.path.join(path, frame["file_path"] + extension)
@ -281,7 +292,7 @@ def readCamerasFromTransforms(path, transformsfile, white_background, extension=
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=image.shape[1], height=image.shape[2], image_path=image_path, image_name=image_name, width=image.shape[1], height=image.shape[2],
time = time)) time = time, mask=None))
return cam_infos return cam_infos
def read_timeline(path): def read_timeline(path):
@ -314,20 +325,23 @@ def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
nerf_normalization = getNerfppNorm(train_cam_infos) nerf_normalization = getNerfppNorm(train_cam_infos)
ply_path = os.path.join(path, "points3d.ply") ply_path = os.path.join(path, "fused.ply")
# Since this data set has no colmap data, we start with random points if not os.path.exists(ply_path):
num_pts = 2000 # Since this data set has no colmap data, we start with random points
print(f"Generating random point cloud ({num_pts})...") num_pts = 2000
print(f"Generating random point cloud ({num_pts})...")
# We create random points inside the bounds of the synthetic Blender scenes
xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3 # We create random points inside the bounds of the synthetic Blender scenes
shs = np.random.random((num_pts, 3)) / 255.0 # xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3
pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3))) xyz = np.random.random((num_pts, 3)) * 0.5 - 0.25
storePly(ply_path, xyz, SH2RGB(shs) * 255) shs = np.random.random((num_pts, 3)) / 255.0
try: pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3)))
# storePly(ply_path, xyz, SH2RGB(shs) * 255)
else:
pcd = fetchPly(ply_path) pcd = fetchPly(ply_path)
except: # xyz = -np.array(pcd.points)
pcd = None # pcd = pcd._replace(points=xyz)
scene_info = SceneInfo(point_cloud=pcd, scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos, train_cameras=train_cam_infos,
@ -353,7 +367,7 @@ def format_infos(dataset,split):
FovY = focal2fov(dataset.focal[0], image.shape[2]) FovY = focal2fov(dataset.focal[0], image.shape[2])
cameras.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, cameras.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=image.shape[2], height=image.shape[1], image_path=image_path, image_name=image_name, width=image.shape[2], height=image.shape[1],
time = time)) time = time, mask=None))
return cameras return cameras
@ -361,24 +375,30 @@ def format_infos(dataset,split):
def readHyperDataInfos(datadir,use_bg_points,eval): def readHyperDataInfos(datadir,use_bg_points,eval):
train_cam_infos = Load_hyper_data(datadir,0.5,use_bg_points,split ="train") train_cam_infos = Load_hyper_data(datadir,0.5,use_bg_points,split ="train")
test_cam_infos = Load_hyper_data(datadir,0.5,use_bg_points,split="test") test_cam_infos = Load_hyper_data(datadir,0.5,use_bg_points,split="test")
print("load finished")
train_cam = format_hyper_data(train_cam_infos,"train") train_cam = format_hyper_data(train_cam_infos,"train")
print("format finished")
max_time = train_cam_infos.max_time max_time = train_cam_infos.max_time
video_cam_infos = copy.deepcopy(test_cam_infos) video_cam_infos = copy.deepcopy(test_cam_infos)
video_cam_infos.split="video" video_cam_infos.split="video"
ply_path = os.path.join(datadir, "points.npy") # ply_path = os.path.join(datadir, "points.npy")
xyz = np.load(ply_path,allow_pickle=True) # xyz = np.load(ply_path,allow_pickle=True)
xyz -= train_cam_infos.scene_center # xyz -= train_cam_infos.scene_center
xyz *= train_cam_infos.coord_scale # xyz *= train_cam_infos.coord_scale
xyz = xyz.astype(np.float32) # xyz = xyz.astype(np.float32)
shs = np.random.random((xyz.shape[0], 3)) / 255.0 # shs = np.random.random((xyz.shape[0], 3)) / 255.0
pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((xyz.shape[0], 3))) # pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((xyz.shape[0], 3)))
ply_path = os.path.join(datadir, "points3D_downsample.ply")
# ply_path = os.path.join(datadir, "points3D.ply")
pcd = fetchPly(ply_path)
xyz = np.array(pcd.points)
# xyz -= train_cam_infos.scene_center
# xyz *= train_cam_infos.coord_scale
pcd = pcd._replace(points=xyz)
nerf_normalization = getNerfppNorm(train_cam) nerf_normalization = getNerfppNorm(train_cam)
plot_camera_orientations(train_cam_infos, pcd.points)
scene_info = SceneInfo(point_cloud=pcd, scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos, train_cameras=train_cam_infos,
test_cameras=test_cam_infos, test_cameras=test_cam_infos,
@ -411,14 +431,33 @@ def format_render_poses(poses,data_infos):
FovY = focal2fov(data_infos.focal[0], image.shape[1]) FovY = focal2fov(data_infos.focal[0], image.shape[1])
cameras.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, cameras.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=image.shape[2], height=image.shape[1], image_path=image_path, image_name=image_name, width=image.shape[2], height=image.shape[1],
time = time)) time = time, mask=None))
return cameras return cameras
# plydata = PlyData.read(path)
# vertices = plydata['vertex']
# positions = np.vstack([vertices['x'], vertices['y'], vertices['z']]).T
# colors = np.vstack([vertices['red'], vertices['green'], vertices['blue']]).T / 255.0
# normals = np.vstack([vertices['nx'], vertices['ny'], vertices['nz']]).T
# return BasicPointCloud(points=positions, colors=colors, normals=normals)
def add_points(pointsclouds, xyz_min, xyz_max):
add_points = (np.random.random((100000, 3)))* (xyz_max-xyz_min) + xyz_min
add_points = add_points.astype(np.float32)
addcolors = np.random.random((100000, 3)).astype(np.float32)
addnormals = np.random.random((100000, 3)).astype(np.float32)
# breakpoint()
new_points = np.vstack([pointsclouds.points,add_points])
new_colors = np.vstack([pointsclouds.colors,addcolors])
new_normals = np.vstack([pointsclouds.normals,addnormals])
pointsclouds=pointsclouds._replace(points=new_points)
pointsclouds=pointsclouds._replace(colors=new_colors)
pointsclouds=pointsclouds._replace(normals=new_normals)
return pointsclouds
# breakpoint()
# new_
def readdynerfInfo(datadir,use_bg_points,eval): def readdynerfInfo(datadir,use_bg_points,eval):
# loading all the data follow hexplane format # loading all the data follow hexplane format
ply_path = os.path.join(datadir, "points3d.ply") # ply_path = os.path.join(datadir, "points3D_dense.ply")
ply_path = os.path.join(datadir, "points3D_downsample2.ply")
from scene.neural_3D_dataset_NDC import Neural3D_NDC_Dataset from scene.neural_3D_dataset_NDC import Neural3D_NDC_Dataset
train_dataset = Neural3D_NDC_Dataset( train_dataset = Neural3D_NDC_Dataset(
datadir, datadir,
@ -446,24 +485,26 @@ def readdynerfInfo(datadir,use_bg_points,eval):
# create pcd # create pcd
# if not os.path.exists(ply_path): # if not os.path.exists(ply_path):
# Since this data set has no colmap data, we start with random points # Since this data set has no colmap data, we start with random points
num_pts = 2000 # num_pts = 2000
print(f"Generating random point cloud ({num_pts})...") # print(f"Generating random point cloud ({num_pts})...")
threshold = 3 # threshold = 3
# xyz_max = np.array([1.5*threshold, 1.5*threshold, 1.5*threshold]) # xyz_max = np.array([1.5*threshold, 1.5*threshold, 1.5*threshold])
# xyz_min = np.array([-1.5*threshold, -1.5*threshold, -3*threshold]) # xyz_min = np.array([-1.5*threshold, -1.5*threshold, -3*threshold])
xyz_max = np.array([1.5*threshold, 1.5*threshold, 1.5*threshold]) # xyz_max = np.array([1.5*threshold, 1.5*threshold, 1.5*threshold])
xyz_min = np.array([-1.5*threshold, -1.5*threshold, -1.5*threshold]) # xyz_min = np.array([-1.5*threshold, -1.5*threshold, -1.5*threshold])
# We create random points inside the bounds of the synthetic Blender scenes # We create random points inside the bounds of the synthetic Blender scenes
xyz = (np.random.random((num_pts, 3)))* (xyz_max-xyz_min) + xyz_min # xyz = (np.random.random((num_pts, 3)))* (xyz_max-xyz_min) + xyz_min
print("point cloud initialization:",xyz.max(axis=0),xyz.min(axis=0)) # print("point cloud initialization:",xyz.max(axis=0),xyz.min(axis=0))
shs = np.random.random((num_pts, 3)) / 255.0 # shs = np.random.random((num_pts, 3)) / 255.0
pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3))) # pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3)))
storePly(ply_path, xyz, SH2RGB(shs) * 255) # storePly(ply_path, xyz, SH2RGB(shs) * 255)
try:
# xyz = np.load # xyz = np.load
pcd = fetchPly(ply_path) pcd = fetchPly(ply_path)
except: print("origin points,",pcd.points.shape[0])
pcd = None
print("after points,",pcd.points.shape[0])
scene_info = SceneInfo(point_cloud=pcd, scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_dataset, train_cameras=train_dataset,
test_cameras=test_dataset, test_cameras=test_dataset,
@ -473,11 +514,132 @@ def readdynerfInfo(datadir,use_bg_points,eval):
maxtime=300 maxtime=300
) )
return scene_info return scene_info
def setup_camera(w, h, k, w2c, near=0.01, far=100):
from diff_gaussian_rasterization import GaussianRasterizationSettings as Camera
fx, fy, cx, cy = k[0][0], k[1][1], k[0][2], k[1][2]
w2c = torch.tensor(w2c).cuda().float()
cam_center = torch.inverse(w2c)[:3, 3]
w2c = w2c.unsqueeze(0).transpose(1, 2)
opengl_proj = torch.tensor([[2 * fx / w, 0.0, -(w - 2 * cx) / w, 0.0],
[0.0, 2 * fy / h, -(h - 2 * cy) / h, 0.0],
[0.0, 0.0, far / (far - near), -(far * near) / (far - near)],
[0.0, 0.0, 1.0, 0.0]]).cuda().float().unsqueeze(0).transpose(1, 2)
full_proj = w2c.bmm(opengl_proj)
cam = Camera(
image_height=h,
image_width=w,
tanfovx=w / (2 * fx),
tanfovy=h / (2 * fy),
bg=torch.tensor([0, 0, 0], dtype=torch.float32, device="cuda"),
scale_modifier=1.0,
viewmatrix=w2c,
projmatrix=full_proj,
sh_degree=0,
campos=cam_center,
prefiltered=False,
debug=True
)
return cam
def plot_camera_orientations(cam_list, xyz):
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# ax2 = fig.add_subplot(122, projection='3d')
# xyz = xyz[xyz[:,0]<1]
threshold=2
xyz = xyz[(xyz[:, 0] >= -threshold) & (xyz[:, 0] <= threshold) &
(xyz[:, 1] >= -threshold) & (xyz[:, 1] <= threshold) &
(xyz[:, 2] >= -threshold) & (xyz[:, 2] <= threshold)]
ax.scatter(xyz[:,0],xyz[:,1],xyz[:,2],c='r',s=0.1)
for cam in tqdm(cam_list):
# 提取 R 和 T
R = cam.R
T = cam.T
# print(R,T)
# breakpoint()
# 计算相机朝向(一个单位向量)
direction = R @ np.array([0, 0, 1])
# 绘制相机位置和朝向
ax.quiver(T[0], T[1], T[2], direction[0], direction[1], direction[2], length=1)
ax.set_xlabel('X Axis')
ax.set_ylabel('Y Axis')
ax.set_zlabel('Z Axis')
plt.savefig("output.png")
# breakpoint()
def readPanopticmeta(datadir, json_path):
with open(os.path.join(datadir,json_path)) as f:
test_meta = json.load(f)
w = test_meta['w']
h = test_meta['h']
max_time = len(test_meta['fn'])
cam_infos = []
for index in range(len(test_meta['fn'])):
focals = test_meta['k'][index]
w2cs = test_meta['w2c'][index]
fns = test_meta['fn'][index]
cam_ids = test_meta['cam_id'][index]
time = index / len(test_meta['fn'])
# breakpoint()
for focal, w2c, fn, cam in zip(focals, w2cs, fns, cam_ids):
image_path = os.path.join(datadir,"ims")
image_name=fn
# breakpoint()
image = Image.open(os.path.join(datadir,"ims",fn))
im_data = np.array(image.convert("RGBA"))
# breakpoint()
im_data = PILtoTorch(im_data,None)[:3,:,:]
# breakpoint()
# print(w2c,focal,image_name)
camera = setup_camera(w, h, focal, w2c)
cam_infos.append({
"camera":camera,
"time":time,
"image":im_data})
cam_centers = np.linalg.inv(test_meta['w2c'][0])[:, :3, 3] # Get scene radius
scene_radius = 1.1 * np.max(np.linalg.norm(cam_centers - np.mean(cam_centers, 0)[None], axis=-1))
# breakpoint()
return cam_infos, max_time, scene_radius
def readPanopticSportsinfos(datadir):
train_cam_infos, max_time, scene_radius = readPanopticmeta(datadir, "train_meta.json")
test_cam_infos,_, _ = readPanopticmeta(datadir, "test_meta.json")
nerf_normalization = {
"radius":scene_radius,
"translate":torch.tensor([0,0,0])
}
ply_path = os.path.join(datadir, "pointd3D.ply")
# Since this data set has no colmap data, we start with random points
plz_path = os.path.join(datadir, "init_pt_cld.npz")
data = np.load(plz_path)["data"]
xyz = data[:,:3]
rgb = data[:,3:6]
num_pts = xyz.shape[0]
pcd = BasicPointCloud(points=xyz, colors=rgb, normals=np.ones((num_pts, 3)))
storePly(ply_path, xyz, rgb)
# pcd = fetchPly(ply_path)
# breakpoint()
scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos,
test_cameras=test_cam_infos,
video_cameras=test_cam_infos,
nerf_normalization=nerf_normalization,
ply_path=ply_path,
maxtime=max_time,
)
return scene_info
sceneLoadTypeCallbacks = { sceneLoadTypeCallbacks = {
"Colmap": readColmapSceneInfo, "Colmap": readColmapSceneInfo,
"Blender" : readNerfSyntheticInfo, "Blender" : readNerfSyntheticInfo,
"dynerf" : readdynerfInfo, "dynerf" : readdynerfInfo,
"nerfies": readHyperDataInfos, # NeRFies & HyperNeRF dataset proposed by [https://github.com/google/hypernerf/releases/tag/v0.1] "nerfies": readHyperDataInfos, # NeRFies & HyperNeRF dataset proposed by [https://github.com/google/hypernerf/releases/tag/v0.1]
"PanopticSports" : readPanopticSportsinfos
} }

View File

@ -8,88 +8,144 @@ import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.utils.cpp_extension import load
import torch.nn.init as init import torch.nn.init as init
from utils.graphics_utils import apply_rotation, batch_quaternion_multiply
from scene.hexplane import HexPlaneField from scene.hexplane import HexPlaneField
from scene.grid import DenseGrid
# from scene.grid import HashHexPlane
class Deformation(nn.Module): class Deformation(nn.Module):
def __init__(self, D=8, W=256, input_ch=27, input_ch_time=9, skips=[], args=None): def __init__(self, D=8, W=256, input_ch=27, input_ch_time=9, grid_pe=0, skips=[], args=None):
super(Deformation, self).__init__() super(Deformation, self).__init__()
self.D = D self.D = D
self.W = W self.W = W
self.input_ch = input_ch self.input_ch = input_ch
self.input_ch_time = input_ch_time self.input_ch_time = input_ch_time
self.skips = skips self.skips = skips
self.grid_pe = grid_pe
self.no_grid = args.no_grid self.no_grid = args.no_grid
self.grid = HexPlaneField(args.bounds, args.kplanes_config, args.multires) self.grid = HexPlaneField(args.bounds, args.kplanes_config, args.multires)
self.pos_deform, self.scales_deform, self.rotations_deform, self.opacity_deform = self.create_net()
self.args = args
def create_net(self):
self.args = args
# self.args.empty_voxel=True
if self.args.empty_voxel:
self.empty_voxel = DenseGrid(channels=1, world_size=[64,64,64])
if self.args.static_mlp:
self.static_mlp = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 1))
self.ratio=0
self.create_net()
@property
def get_aabb(self):
return self.grid.get_aabb
def set_aabb(self, xyz_max, xyz_min):
print("Deformation Net Set aabb",xyz_max, xyz_min)
self.grid.set_aabb(xyz_max, xyz_min)
if self.args.empty_voxel:
self.empty_voxel.set_aabb(xyz_max, xyz_min)
def create_net(self):
mlp_out_dim = 0 mlp_out_dim = 0
if self.grid_pe !=0:
grid_out_dim = self.grid.feat_dim+(self.grid.feat_dim)*2
else:
grid_out_dim = self.grid.feat_dim
if self.no_grid: if self.no_grid:
self.feature_out = [nn.Linear(4,self.W)] self.feature_out = [nn.Linear(4,self.W)]
else: else:
self.feature_out = [nn.Linear(mlp_out_dim + self.grid.feat_dim ,self.W)] self.feature_out = [nn.Linear(mlp_out_dim + grid_out_dim ,self.W)]
for i in range(self.D-1): for i in range(self.D-1):
self.feature_out.append(nn.ReLU()) self.feature_out.append(nn.ReLU())
self.feature_out.append(nn.Linear(self.W,self.W)) self.feature_out.append(nn.Linear(self.W,self.W))
self.feature_out = nn.Sequential(*self.feature_out) self.feature_out = nn.Sequential(*self.feature_out)
output_dim = self.W self.pos_deform = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 3))
return \ self.scales_deform = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 3))
nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 3)),\ self.rotations_deform = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 4))
nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 3)),\ self.opacity_deform = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 1))
nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 4)), \ self.shs_deform = nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 16*3))
nn.Sequential(nn.ReLU(),nn.Linear(self.W,self.W),nn.ReLU(),nn.Linear(self.W, 1))
def query_time(self, rays_pts_emb, scales_emb, rotations_emb, time_feature, time_emb):
def query_time(self, rays_pts_emb, scales_emb, rotations_emb, time_emb):
if self.no_grid: if self.no_grid:
h = torch.cat([rays_pts_emb[:,:3],time_emb[:,:1]],-1) h = torch.cat([rays_pts_emb[:,:3],time_emb[:,:1]],-1)
else: else:
grid_feature = self.grid(rays_pts_emb[:,:3], time_emb[:,:1]) grid_feature = self.grid(rays_pts_emb[:,:3], time_emb[:,:1])
# breakpoint()
h = grid_feature if self.grid_pe > 1:
grid_feature = poc_fre(grid_feature,self.grid_pe)
hidden = torch.cat([grid_feature],-1)
h = self.feature_out(h)
hidden = self.feature_out(hidden)
return h
def forward(self, rays_pts_emb, scales_emb=None, rotations_emb=None, opacity = None, time_emb=None): return hidden
@property
def get_empty_ratio(self):
return self.ratio
def forward(self, rays_pts_emb, scales_emb=None, rotations_emb=None, opacity = None,shs_emb=None, time_feature=None, time_emb=None):
if time_emb is None: if time_emb is None:
return self.forward_static(rays_pts_emb[:,:3]) return self.forward_static(rays_pts_emb[:,:3])
else: else:
return self.forward_dynamic(rays_pts_emb, scales_emb, rotations_emb, opacity, time_emb) return self.forward_dynamic(rays_pts_emb, scales_emb, rotations_emb, opacity, shs_emb, time_feature, time_emb)
def forward_static(self, rays_pts_emb): def forward_static(self, rays_pts_emb):
grid_feature = self.grid(rays_pts_emb[:,:3]) grid_feature = self.grid(rays_pts_emb[:,:3])
dx = self.static_mlp(grid_feature) dx = self.static_mlp(grid_feature)
return rays_pts_emb[:, :3] + dx return rays_pts_emb[:, :3] + dx
def forward_dynamic(self,rays_pts_emb, scales_emb, rotations_emb, opacity_emb, time_emb): def forward_dynamic(self,rays_pts_emb, scales_emb, rotations_emb, opacity_emb, shs_emb, time_feature, time_emb):
hidden = self.query_time(rays_pts_emb, scales_emb, rotations_emb, time_emb).float() hidden = self.query_time(rays_pts_emb, scales_emb, rotations_emb, time_feature, time_emb)
dx = self.pos_deform(hidden) if self.args.static_mlp:
pts = rays_pts_emb[:, :3] + dx mask = self.static_mlp(hidden)
if self.args.no_ds: elif self.args.empty_voxel:
mask = self.empty_voxel(rays_pts_emb[:,:3])
else:
mask = torch.ones_like(opacity_emb[:,0]).unsqueeze(-1)
# breakpoint()
if self.args.no_dx:
pts = rays_pts_emb[:,:3]
else:
dx = self.pos_deform(hidden)
pts = torch.zeros_like(rays_pts_emb[:,:3])
pts = rays_pts_emb[:,:3]*mask + dx
if self.args.no_ds :
scales = scales_emb[:,:3] scales = scales_emb[:,:3]
else: else:
ds = self.scales_deform(hidden) ds = self.scales_deform(hidden)
scales = scales_emb[:,:3] + ds
if self.args.no_dr: scales = torch.zeros_like(scales_emb[:,:3])
scales = scales_emb[:,:3]*mask + ds
if self.args.no_dr :
rotations = rotations_emb[:,:4] rotations = rotations_emb[:,:4]
else: else:
dr = self.rotations_deform(hidden) dr = self.rotations_deform(hidden)
rotations = rotations_emb[:,:4] + dr
if self.args.no_do: rotations = torch.zeros_like(rotations_emb[:,:4])
if self.args.apply_rotation:
rotations = batch_quaternion_multiply(rotations_emb, dr)
else:
rotations = rotations_emb[:,:4] + dr
if self.args.no_do :
opacity = opacity_emb[:,:1] opacity = opacity_emb[:,:1]
else: else:
do = self.opacity_deform(hidden) do = self.opacity_deform(hidden)
opacity = opacity_emb[:,:1] + do
# + do opacity = torch.zeros_like(opacity_emb[:,:1])
# print("deformation value:","pts:",torch.abs(dx).mean(),"rotation:",torch.abs(dr).mean()) opacity = opacity_emb[:,:1]*mask + do
if self.args.no_dshs:
shs = shs_emb
else:
dshs = self.shs_deform(hidden).reshape([shs_emb.shape[0],16,3])
return pts, scales, rotations, opacity shs = torch.zeros_like(shs_emb)
# breakpoint()
shs = shs_emb*mask.unsqueeze(-1) + dshs
return pts, scales, rotations, opacity, shs
def get_mlp_parameters(self): def get_mlp_parameters(self):
parameter_list = [] parameter_list = []
for name, param in self.named_parameters(): for name, param in self.named_parameters():
@ -97,8 +153,11 @@ class Deformation(nn.Module):
parameter_list.append(param) parameter_list.append(param)
return parameter_list return parameter_list
def get_grid_parameters(self): def get_grid_parameters(self):
return list(self.grid.parameters() ) parameter_list = []
# + list(self.timegrid.parameters()) for name, param in self.named_parameters():
if "grid" in name:
parameter_list.append(param)
return parameter_list
class deform_network(nn.Module): class deform_network(nn.Module):
def __init__(self, args) : def __init__(self, args) :
super(deform_network, self).__init__() super(deform_network, self).__init__()
@ -110,11 +169,12 @@ class deform_network(nn.Module):
opacity_pe = args.opacity_pe opacity_pe = args.opacity_pe
timenet_width = args.timenet_width timenet_width = args.timenet_width
timenet_output = args.timenet_output timenet_output = args.timenet_output
grid_pe = args.grid_pe
times_ch = 2*timebase_pe+1 times_ch = 2*timebase_pe+1
self.timenet = nn.Sequential( self.timenet = nn.Sequential(
nn.Linear(times_ch, timenet_width), nn.ReLU(), nn.Linear(times_ch, timenet_width), nn.ReLU(),
nn.Linear(timenet_width, timenet_output)) nn.Linear(timenet_width, timenet_output))
self.deformation_net = Deformation(W=net_width, D=defor_depth, input_ch=(4+3)+((4+3)*scale_rotation_pe)*2, input_ch_time=timenet_output, args=args) self.deformation_net = Deformation(W=net_width, D=defor_depth, input_ch=(3)+(3*(posbase_pe))*2, grid_pe=grid_pe, input_ch_time=timenet_output, args=args)
self.register_buffer('time_poc', torch.FloatTensor([(2**i) for i in range(timebase_pe)])) self.register_buffer('time_poc', torch.FloatTensor([(2**i) for i in range(timebase_pe)]))
self.register_buffer('pos_poc', torch.FloatTensor([(2**i) for i in range(posbase_pe)])) self.register_buffer('pos_poc', torch.FloatTensor([(2**i) for i in range(posbase_pe)]))
self.register_buffer('rotation_scaling_poc', torch.FloatTensor([(2**i) for i in range(scale_rotation_pe)])) self.register_buffer('rotation_scaling_poc', torch.FloatTensor([(2**i) for i in range(scale_rotation_pe)]))
@ -122,26 +182,34 @@ class deform_network(nn.Module):
self.apply(initialize_weights) self.apply(initialize_weights)
# print(self) # print(self)
def forward(self, point, scales=None, rotations=None, opacity=None, times_sel=None): def forward(self, point, scales=None, rotations=None, opacity=None, shs=None, times_sel=None):
if times_sel is not None: return self.forward_dynamic(point, scales, rotations, opacity, shs, times_sel)
return self.forward_dynamic(point, scales, rotations, opacity, times_sel) @property
else: def get_aabb(self):
return self.forward_static(point)
return self.deformation_net.get_aabb
@property
def get_empty_ratio(self):
return self.deformation_net.get_empty_ratio
def forward_static(self, points): def forward_static(self, points):
points = self.deformation_net(points) points = self.deformation_net(points)
return points return points
def forward_dynamic(self, point, scales=None, rotations=None, opacity=None, times_sel=None): def forward_dynamic(self, point, scales=None, rotations=None, opacity=None, shs=None, times_sel=None):
# times_emb = poc_fre(times_sel, self.time_poc) # times_emb = poc_fre(times_sel, self.time_poc)
point_emb = poc_fre(point,self.pos_poc)
means3D, scales, rotations, opacity = self.deformation_net( point, scales_emb = poc_fre(scales,self.rotation_scaling_poc)
scales, rotations_emb = poc_fre(rotations,self.rotation_scaling_poc)
rotations, # time_emb = poc_fre(times_sel, self.time_poc)
# times_feature = self.timenet(time_emb)
means3D, scales, rotations, opacity, shs = self.deformation_net( point_emb,
scales_emb,
rotations_emb,
opacity, opacity,
# times_feature, shs,
None,
times_sel) times_sel)
return means3D, scales, rotations, opacity return means3D, scales, rotations, opacity, shs
def get_mlp_parameters(self): def get_mlp_parameters(self):
return self.deformation_net.get_mlp_parameters() + list(self.timenet.parameters()) return self.deformation_net.get_mlp_parameters() + list(self.timenet.parameters())
def get_grid_parameters(self): def get_grid_parameters(self):
@ -154,3 +222,10 @@ def initialize_weights(m):
if m.bias is not None: if m.bias is not None:
init.xavier_uniform_(m.weight,gain=1) init.xavier_uniform_(m.weight,gain=1)
# init.constant_(m.bias, 0) # init.constant_(m.bias, 0)
def poc_fre(input_data,poc_buf):
input_data_emb = (input_data.unsqueeze(-1) * poc_buf).flatten(-2)
input_data_sin = input_data_emb.sin()
input_data_cos = input_data_emb.cos()
input_data_emb = torch.cat([input_data, input_data_sin,input_data_cos], -1)
return input_data_emb

View File

@ -14,6 +14,7 @@ import numpy as np
from utils.general_utils import inverse_sigmoid, get_expon_lr_func, build_rotation from utils.general_utils import inverse_sigmoid, get_expon_lr_func, build_rotation
from torch import nn from torch import nn
import os import os
import open3d as o3d
from utils.system_utils import mkdir_p from utils.system_utils import mkdir_p
from plyfile import PlyData, PlyElement from plyfile import PlyData, PlyElement
from random import randint from random import randint
@ -21,6 +22,7 @@ from utils.sh_utils import RGB2SH
from simple_knn._C import distCUDA2 from simple_knn._C import distCUDA2
from utils.graphics_utils import BasicPointCloud from utils.graphics_utils import BasicPointCloud
from utils.general_utils import strip_symmetric, build_scaling_rotation from utils.general_utils import strip_symmetric, build_scaling_rotation
# from utils.point_utils import addpoint, combine_pointcloud, downsample_point_cloud_open3d, find_indices_in_A
from scene.deformation import deform_network from scene.deformation import deform_network
from scene.regulation import compute_plane_smoothness from scene.regulation import compute_plane_smoothness
class GaussianModel: class GaussianModel:
@ -135,6 +137,7 @@ class GaussianModel:
def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float, time_line: int): def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float, time_line: int):
self.spatial_lr_scale = spatial_lr_scale self.spatial_lr_scale = spatial_lr_scale
# breakpoint()
fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda() fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda()) fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda() features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda()
@ -418,6 +421,8 @@ class GaussianModel:
padded_grad = torch.zeros((n_init_points), device="cuda") padded_grad = torch.zeros((n_init_points), device="cuda")
padded_grad[:grads.shape[0]] = grads.squeeze() padded_grad[:grads.shape[0]] = grads.squeeze()
selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False) selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False)
# breakpoint()
selected_pts_mask = torch.logical_and(selected_pts_mask, selected_pts_mask = torch.logical_and(selected_pts_mask,
torch.max(self.get_scaling, dim=1).values > self.percent_dense*scene_extent) torch.max(self.get_scaling, dim=1).values > self.percent_dense*scene_extent)
if not selected_pts_mask.any(): if not selected_pts_mask.any():
@ -438,12 +443,33 @@ class GaussianModel:
prune_filter = torch.cat((selected_pts_mask, torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool))) prune_filter = torch.cat((selected_pts_mask, torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool)))
self.prune_points(prune_filter) self.prune_points(prune_filter)
def densify_and_clone(self, grads, grad_threshold, scene_extent): def densify_and_clone(self, grads, grad_threshold, scene_extent, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None):
# Extract points that satisfy the gradient condition grads_accum_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False)
selected_pts_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False)
selected_pts_mask = torch.logical_and(selected_pts_mask,
torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent)
# 主动增加稀疏点云
# if not hasattr(self,"voxel_size"):
# self.voxel_size = 8
# if not hasattr(self,"density_threshold"):
# self.density_threshold = density_threshold
# if not hasattr(self,"displacement_scale"):
# self.displacement_scale = displacement_scale
# point_cloud = self.get_xyz.detach().cpu()
# sparse_point_mask = self.downsample_point(point_cloud)
# _, low_density_points, new_points, low_density_index = addpoint(point_cloud[sparse_point_mask],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0)
# sparse_point_mask = sparse_point_mask.to(grads_accum_mask)
# low_density_index = low_density_index.to(grads_accum_mask)
# if new_points.shape[0] < 100 :
# self.density_threshold /= 2
# self.displacement_scale /= 2
# print("reduce diplacement_scale to: ",self.displacement_scale)
# global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(grads_accum_mask)
# global_mask[sparse_point_mask] = low_density_index
# selected_pts_mask_grow = torch.logical_and(global_mask, grads_accum_mask)
# print("降采样点云:",sparse_point_mask.sum(),"选中的稀疏点云:",global_mask.sum(),"梯度累计点云:",grads_accum_mask.sum(),"选中增长点云:",selected_pts_mask_grow.sum())
# Extract points that satisfy the gradient condition
selected_pts_mask = torch.logical_and(grads_accum_mask,
torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent)
# breakpoint()
new_xyz = self._xyz[selected_pts_mask] new_xyz = self._xyz[selected_pts_mask]
# - 0.001 * self._xyz.grad[selected_pts_mask] # - 0.001 * self._xyz.grad[selected_pts_mask]
new_features_dc = self._features_dc[selected_pts_mask] new_features_dc = self._features_dc[selected_pts_mask]
@ -452,15 +478,111 @@ class GaussianModel:
new_scaling = self._scaling[selected_pts_mask] new_scaling = self._scaling[selected_pts_mask]
new_rotation = self._rotation[selected_pts_mask] new_rotation = self._rotation[selected_pts_mask]
new_deformation_table = self._deformation_table[selected_pts_mask] new_deformation_table = self._deformation_table[selected_pts_mask]
# if opt.add_point:
# selected_xyz, grow_xyz = self.add_point_by_mask(selected_pts_mask_grow.to(self.get_xyz.device), self.displacement_scale)
self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table)
# print("被动增加点云:",selected_xyz.shape[0])
# print("主动增加点云:",selected_pts_mask.sum())
# if model_path is not None and iteration is not None:
# point = combine_pointcloud(self.get_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy(), selected_xyz.detach().cpu().numpy())
# write_path = os.path.join(model_path,"add_point_cloud")
# os.makedirs(write_path,exist_ok=True)
# o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point)
# print("write output.")
@property
def get_aabb(self):
return self._deformation.get_aabb
def get_displayment(self,selected_point, point, perturb):
xyz_max, xyz_min = self.get_aabb
displacements = torch.randn(selected_point.shape[0], 3).to(selected_point) * perturb
final_point = selected_point + displacements
mask_a = final_point<xyz_max
mask_b = final_point>xyz_min
mask_c = mask_a & mask_b
mask_d = mask_c.all(dim=1)
final_point = final_point[mask_d]
# while (mask_d.sum()/final_point.shape[0])<0.5:
# perturb/=2
# displacements = torch.randn(selected_point.shape[0], 3).to(selected_point) * perturb
# final_point = selected_point + displacements
# mask_a = final_point<xyz_max
# mask_b = final_point>xyz_min
# mask_c = mask_a & mask_b
# mask_d = mask_c.all(dim=1)
# final_point = final_point[mask_d]
return final_point, mask_d
def add_point_by_mask(self, selected_pts_mask, perturb=0):
selected_xyz = self._xyz[selected_pts_mask]
new_xyz, mask = self.get_displayment(selected_xyz, self.get_xyz.detach(),perturb)
# displacements = torch.randn(selected_xyz.shape[0], 3).to(self._xyz) * perturb
# new_xyz = selected_xyz + displacements
# - 0.001 * self._xyz.grad[selected_pts_mask]
new_features_dc = self._features_dc[selected_pts_mask][mask]
new_features_rest = self._features_rest[selected_pts_mask][mask]
new_opacities = self._opacity[selected_pts_mask][mask]
new_scaling = self._scaling[selected_pts_mask][mask]
new_rotation = self._rotation[selected_pts_mask][mask]
new_deformation_table = self._deformation_table[selected_pts_mask][mask]
self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table) self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation, new_deformation_table)
return selected_xyz, new_xyz
def downsample_point(self, point_cloud):
if not hasattr(self,"voxel_size"):
self.voxel_size = 8
point_downsample = point_cloud
flag = False
while point_downsample.shape[0]>1000:
if flag:
self.voxel_size+=8
point_downsample = downsample_point_cloud_open3d(point_cloud,voxel_size=self.voxel_size)
flag = True
print("point size:",point_downsample.shape[0])
# downsampled_point_mask = torch.eq(point_downsample.view(1,-1,3), point_cloud.view(-1,1,3)).all(dim=1)
downsampled_point_index = find_indices_in_A(point_cloud, point_downsample)
downsampled_point_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool).to(point_downsample.device)
downsampled_point_mask[downsampled_point_index]=True
return downsampled_point_mask
def grow(self, density_threshold=20, displacement_scale=20, model_path=None, iteration=None, stage=None):
if not hasattr(self,"voxel_size"):
self.voxel_size = 8
if not hasattr(self,"density_threshold"):
self.density_threshold = density_threshold
if not hasattr(self,"displacement_scale"):
self.displacement_scale = displacement_scale
flag = False
point_cloud = self.get_xyz.detach().cpu()
point_downsample = point_cloud.detach()
downsampled_point_index = self.downsample_point(point_downsample)
_, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=self.density_threshold,displacement_scale=self.displacement_scale,iter_pass=0)
if new_points.shape[0] < 100 :
self.density_threshold /= 2
self.displacement_scale /= 2
print("reduce diplacement_scale to: ",self.displacement_scale)
elif new_points.shape[0] == 0:
print("no point added")
return
global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool)
global_mask[downsampled_point_index] = low_density_index
global_mask
selected_xyz, new_xyz = self.add_point_by_mask(global_mask.to(self.get_xyz.device), self.displacement_scale)
print("point growing,add point num:",global_mask.sum())
if model_path is not None and iteration is not None:
point = combine_pointcloud(point_cloud, selected_xyz.detach().cpu().numpy(), new_xyz.detach().cpu().numpy())
write_path = os.path.join(model_path,"add_point_cloud")
os.makedirs(write_path,exist_ok=True)
o3d.io.write_point_cloud(os.path.join(write_path,f"iteration_{stage}{iteration}.ply"),point)
return
def prune(self, max_grad, min_opacity, extent, max_screen_size): def prune(self, max_grad, min_opacity, extent, max_screen_size):
prune_mask = (self.get_opacity < min_opacity).squeeze() prune_mask = (self.get_opacity < min_opacity).squeeze()
# prune_mask_2 = torch.logical_and(self.get_opacity <= inverse_sigmoid(0.101 , dtype=torch.float, device="cuda"), self.get_opacity >= inverse_sigmoid(0.999 , dtype=torch.float, device="cuda"))
# prune_mask = torch.logical_or(prune_mask, prune_mask_2)
# deformation_sum = abs(self._deformation).sum(dim=-1).mean(dim=-1)
# deformation_mask = (deformation_sum < torch.quantile(deformation_sum, torch.tensor([0.5]).to("cuda")))
# prune_mask = prune_mask & deformation_mask
if max_screen_size: if max_screen_size:
big_points_vs = self.max_radii2D > max_screen_size big_points_vs = self.max_radii2D > max_screen_size
big_points_ws = self.get_scaling.max(dim=1).values > 0.1 * extent big_points_ws = self.get_scaling.max(dim=1).values > 0.1 * extent
@ -470,11 +592,11 @@ class GaussianModel:
self.prune_points(prune_mask) self.prune_points(prune_mask)
torch.cuda.empty_cache() torch.cuda.empty_cache()
def densify(self, max_grad, min_opacity, extent, max_screen_size): def densify(self, max_grad, min_opacity, extent, max_screen_size, density_threshold, displacement_scale, model_path=None, iteration=None, stage=None):
grads = self.xyz_gradient_accum / self.denom grads = self.xyz_gradient_accum / self.denom
grads[grads.isnan()] = 0.0 grads[grads.isnan()] = 0.0
self.densify_and_clone(grads, max_grad, extent) self.densify_and_clone(grads, max_grad, extent, density_threshold, displacement_scale, model_path, iteration, stage)
self.densify_and_split(grads, max_grad, extent) self.densify_and_split(grads, max_grad, extent)
def standard_constaint(self): def standard_constaint(self):

View File

@ -6,29 +6,22 @@ import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
# import tinycudann as tcnn
parent_dir = os.path.dirname(os.path.abspath(__file__)) parent_dir = os.path.dirname(os.path.abspath(__file__))
def create_grid(type, **kwargs):
if type == 'DenseGrid':
return DenseGrid(**kwargs)
elif type == 'TensoRFGrid':
return TensoRFGrid(**kwargs)
else:
raise NotImplementedError
''' Dense 3D grid ''' Dense 3D grid
''' '''
class DenseGrid(nn.Module): class DenseGrid(nn.Module):
def __init__(self, channels, world_size, xyz_min, xyz_max, **kwargs): def __init__(self, channels, world_size, **kwargs):
super(DenseGrid, self).__init__() super(DenseGrid, self).__init__()
self.channels = channels self.channels = channels
self.world_size = world_size self.world_size = world_size
self.register_buffer('xyz_min', torch.Tensor(xyz_min)) # self.xyz_max = xyz_max
self.register_buffer('xyz_max', torch.Tensor(xyz_max)) # self.xyz_min = xyz_min
self.grid = nn.Parameter(torch.zeros([1, channels, *world_size])) # self.register_buffer('xyz_min', torch.Tensor(xyz_min))
# self.register_buffer('xyz_max', torch.Tensor(xyz_max))
self.grid = nn.Parameter(torch.ones([1, channels, *world_size]))
def forward(self, xyz): def forward(self, xyz):
''' '''
@ -39,17 +32,19 @@ class DenseGrid(nn.Module):
ind_norm = ((xyz - self.xyz_min) / (self.xyz_max - self.xyz_min)).flip((-1,)) * 2 - 1 ind_norm = ((xyz - self.xyz_min) / (self.xyz_max - self.xyz_min)).flip((-1,)) * 2 - 1
out = F.grid_sample(self.grid, ind_norm, mode='bilinear', align_corners=True) out = F.grid_sample(self.grid, ind_norm, mode='bilinear', align_corners=True)
out = out.reshape(self.channels,-1).T.reshape(*shape,self.channels) out = out.reshape(self.channels,-1).T.reshape(*shape,self.channels)
if self.channels == 1: # if self.channels == 1:
out = out.squeeze(-1) # out = out.squeeze(-1)
return out return out
def scale_volume_grid(self, new_world_size): def scale_volume_grid(self, new_world_size):
if self.channels == 0: if self.channels == 0:
self.grid = nn.Parameter(torch.zeros([1, self.channels, *new_world_size])) self.grid = nn.Parameter(torch.ones([1, self.channels, *new_world_size]))
else: else:
self.grid = nn.Parameter( self.grid = nn.Parameter(
F.interpolate(self.grid.data, size=tuple(new_world_size), mode='trilinear', align_corners=True)) F.interpolate(self.grid.data, size=tuple(new_world_size), mode='trilinear', align_corners=True))
def set_aabb(self, xyz_max, xyz_min):
self.register_buffer('xyz_min', torch.Tensor(xyz_min))
self.register_buffer('xyz_max', torch.Tensor(xyz_max))
def get_dense_grid(self): def get_dense_grid(self):
return self.grid return self.grid
@ -59,5 +54,38 @@ class DenseGrid(nn.Module):
return self return self
def extra_repr(self): def extra_repr(self):
return f'channels={self.channels}, world_size={self.world_size.tolist()}' return f'channels={self.channels}, world_size={self.world_size}'
# class HashHexPlane(nn.Module):
# def __init__(self,hparams,
# desired_resolution=1024,
# base_solution=128,
# n_levels=4,
# ):
# super(HashHexPlane, self).__init__()
# per_level_scale = np.exp2(np.log2(desired_resolution / base_solution) / (int(n_levels) - 1))
# encoding_2d_config = {
# "otype": "Grid",
# "type": "Hash",
# "n_levels": n_levels,
# "n_features_per_level": 2,
# "base_resolution": base_solution,
# "per_level_scale":per_level_scale,
# }
# self.xy = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.yz = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.xz = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.xt = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.yt = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.zt = tcnn.Encoding(n_input_dims=2, encoding_config=encoding_2d_config)
# self.feat_dim = n_levels * 2 *3
# def forward(self, x, bound):
# x = (x + bound) / (2 * bound) # zyq: map to [0, 1]
# xy_feat = self.xy(x[:, [0, 1]])
# yz_feat = self.yz(x[:, [0, 2]])
# xz_feat = self.xz(x[:, [1, 2]])
# xt_feat = self.xt(x[:, []])
# return torch.cat([xy_feat, yz_feat, xz_feat], dim=-1)

View File

@ -146,19 +146,20 @@ class HexPlaneField(nn.Module):
self.grids.append(gp) self.grids.append(gp)
# print(f"Initialized model grids: {self.grids}") # print(f"Initialized model grids: {self.grids}")
print("feature_dim:",self.feat_dim) print("feature_dim:",self.feat_dim)
@property
def get_aabb(self):
return self.aabb[0], self.aabb[1]
def set_aabb(self,xyz_max, xyz_min): def set_aabb(self,xyz_max, xyz_min):
aabb = torch.tensor([ aabb = torch.tensor([
xyz_max, xyz_max,
xyz_min xyz_min
]) ],dtype=torch.float32)
self.aabb = nn.Parameter(aabb,requires_grad=True) self.aabb = nn.Parameter(aabb,requires_grad=False)
print("Voxel Plane: set aabb=",self.aabb) print("Voxel Plane: set aabb=",self.aabb)
def get_density(self, pts: torch.Tensor, timestamps: Optional[torch.Tensor] = None): def get_density(self, pts: torch.Tensor, timestamps: Optional[torch.Tensor] = None):
"""Computes and returns the densities.""" """Computes and returns the densities."""
# breakpoint()
pts = normalize_aabb(pts, self.aabb) pts = normalize_aabb(pts, self.aabb)
pts = torch.cat((pts, timestamps), dim=-1) # [n_rays, n_samples, 4] pts = torch.cat((pts, timestamps), dim=-1) # [n_rays, n_samples, 4]

View File

@ -16,8 +16,9 @@ from typing import NamedTuple
from torch.utils.data import Dataset from torch.utils.data import Dataset
from utils.general_utils import PILtoTorch from utils.general_utils import PILtoTorch
# from scene.dataset_readers import # from scene.dataset_readers import
import torch.nn.functional as F
from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal
import copy from utils.pose_utils import smooth_camera_poses
class CameraInfo(NamedTuple): class CameraInfo(NamedTuple):
uid: int uid: int
R: np.array R: np.array
@ -30,6 +31,7 @@ class CameraInfo(NamedTuple):
width: int width: int
height: int height: int
time : float time : float
mask: np.array
class Load_hyper_data(Dataset): class Load_hyper_data(Dataset):
@ -72,7 +74,6 @@ class Load_hyper_data(Dataset):
self.i_test.append(i) self.i_test.append(i)
if id in self.train_id: if id in self.train_id:
self.i_train.append(i) self.i_train.append(i)
self.all_cam = [meta_json[i]['camera_id'] for i in self.all_img] self.all_cam = [meta_json[i]['camera_id'] for i in self.all_img]
self.all_time = [meta_json[i]['warp_id'] for i in self.all_img] self.all_time = [meta_json[i]['warp_id'] for i in self.all_img]
@ -84,21 +85,34 @@ class Load_hyper_data(Dataset):
self.min_time = min(self.all_time) self.min_time = min(self.all_time)
self.i_video = [i for i in range(len(self.all_img))] self.i_video = [i for i in range(len(self.all_img))]
self.i_video.sort() self.i_video.sort()
# all poses
self.all_cam_params = [] self.all_cam_params = []
for im in self.all_img: for im in self.all_img:
camera = Camera.from_json(f'{datadir}/camera/{im}.json') camera = Camera.from_json(f'{datadir}/camera/{im}.json')
camera = camera.scale(ratio)
camera.position -= self.scene_center
camera.position *= self.coord_scale
self.all_cam_params.append(camera) self.all_cam_params.append(camera)
self.all_img_origin = self.all_img
self.all_depth = [f'{datadir}/depth/{int(1/ratio)}x/{i}.npy' for i in self.all_img]
self.all_img = [f'{datadir}/rgb/{int(1/ratio)}x/{i}.png' for i in self.all_img] self.all_img = [f'{datadir}/rgb/{int(1/ratio)}x/{i}.png' for i in self.all_img]
self.h, self.w = self.all_cam_params[0].image_shape self.h, self.w = self.all_cam_params[0].image_shape
self.map = {} self.map = {}
self.image_one = Image.open(self.all_img[0]) self.image_one = Image.open(self.all_img[0])
self.image_one_torch = PILtoTorch(self.image_one,None).to(torch.float32) self.image_one_torch = PILtoTorch(self.image_one,None).to(torch.float32)
if os.path.exists(os.path.join(datadir,"covisible")):
self.image_mask = [f'{datadir}/covisible/{int(2)}x/val/{i}.png' for i in self.all_img_origin]
else:
self.image_mask = None
self.generate_video_path()
def generate_video_path(self):
self.select_video_cams = [item for i, item in enumerate(self.all_cam_params) if i % 1 == 0 ]
self.video_path, self.video_time = smooth_camera_poses(self.select_video_cams,10)
# breakpoint()
self.video_path = self.video_path[:500]
self.video_time = self.video_time[:500]
# breakpoint()
def __getitem__(self, index): def __getitem__(self, index):
if self.split == "train": if self.split == "train":
return self.load_raw(self.i_train[index]) return self.load_raw(self.i_train[index])
@ -106,24 +120,26 @@ class Load_hyper_data(Dataset):
elif self.split == "test": elif self.split == "test":
return self.load_raw(self.i_test[index]) return self.load_raw(self.i_test[index])
elif self.split == "video": elif self.split == "video":
return self.load_video(self.i_video[index]) return self.load_video(index)
def __len__(self): def __len__(self):
if self.split == "train": if self.split == "train":
return len(self.i_train) return len(self.i_train)
elif self.split == "test": elif self.split == "test":
return len(self.i_test) return len(self.i_test)
elif self.split == "video": elif self.split == "video":
# return len(self.i_video) return len(self.video_path)
return len(self.video_v2) # return len(self.video_v2)
def load_video(self, idx): def load_video(self, idx):
if idx in self.map.keys(): if idx in self.map.keys():
return self.map[idx] return self.map[idx]
camera = self.all_cam_params[idx] camera = self.all_cam_params[idx]
# camera = self.video_path[idx]
w = self.image_one.size[0] w = self.image_one.size[0]
h = self.image_one.size[1] h = self.image_one.size[1]
# image = PILtoTorch(image,None) # image = PILtoTorch(image,None)
# image = image.to(torch.float32) # image = image.to(torch.float32)
time = self.all_time[idx] time = self.video_time[idx]
# .astype(np.float32)
R = camera.orientation.T R = camera.orientation.T
T = - camera.position @ R T = - camera.position @ R
FovY = focal2fov(camera.focal_length, self.h) FovY = focal2fov(camera.focal_length, self.h)
@ -131,7 +147,7 @@ class Load_hyper_data(Dataset):
image_path = "/".join(self.all_img[idx].split("/")[:-1]) image_path = "/".join(self.all_img[idx].split("/")[:-1])
image_name = self.all_img[idx].split("/")[-1] image_name = self.all_img[idx].split("/")[-1]
caminfo = CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=self.image_one_torch, caminfo = CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=self.image_one_torch,
image_path=image_path, image_name=image_name, width=w, height=h, time=time, image_path=image_path, image_name=image_name, width=w, height=h, time=time, mask=None
) )
self.map[idx] = caminfo self.map[idx] = caminfo
return caminfo return caminfo
@ -143,7 +159,7 @@ class Load_hyper_data(Dataset):
w = image.size[0] w = image.size[0]
h = image.size[1] h = image.size[1]
image = PILtoTorch(image,None) image = PILtoTorch(image,None)
image = image.to(torch.float32) image = image.to(torch.float32)[:3,:,:]
time = self.all_time[idx] time = self.all_time[idx]
R = camera.orientation.T R = camera.orientation.T
T = - camera.position @ R T = - camera.position @ R
@ -151,8 +167,18 @@ class Load_hyper_data(Dataset):
FovX = focal2fov(camera.focal_length, self.w) FovX = focal2fov(camera.focal_length, self.w)
image_path = "/".join(self.all_img[idx].split("/")[:-1]) image_path = "/".join(self.all_img[idx].split("/")[:-1])
image_name = self.all_img[idx].split("/")[-1] image_name = self.all_img[idx].split("/")[-1]
if self.image_mask is not None and self.split == "test":
mask = Image.open(self.image_mask[idx])
mask = PILtoTorch(mask,None)
mask = mask.to(torch.float32)[0:1,:,:]
mask = F.interpolate(mask.unsqueeze(0), size=[self.h, self.w], mode='bilinear', align_corners=False).squeeze(0)
else:
mask = None
caminfo = CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image, caminfo = CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=w, height=h, time=time, image_path=image_path, image_name=image_name, width=w, height=h, time=time, mask=mask
) )
self.map[idx] = caminfo self.map[idx] = caminfo
return caminfo return caminfo
@ -177,12 +203,19 @@ def format_hyper_data(data_class, split):
FovX = focal2fov(camera.focal_length, data_class.w) FovX = focal2fov(camera.focal_length, data_class.w)
image_path = "/".join(data_class.all_img[index].split("/")[:-1]) image_path = "/".join(data_class.all_img[index].split("/")[:-1])
image_name = data_class.all_img[index].split("/")[-1] image_name = data_class.all_img[index].split("/")[-1]
if data_class.image_mask is not None and data_class.split == "test":
mask = Image.open(data_class.image_mask[index])
mask = PILtoTorch(mask,None)
mask = mask.to(torch.float32)[0:1,:,:]
else:
mask = None
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=None, cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=None,
image_path=image_path, image_name=image_name, width=int(data_class.w), height=int(data_class.h), time=time, image_path=image_path, image_name=image_name, width=int(data_class.w),
height=int(data_class.h), time=time, mask=mask
) )
cam_infos.append(cam_info) cam_infos.append(cam_info)
return cam_infos return cam_infos
# matrix = np.linalg.inv(np.array(poses))
# R = -np.transpose(matrix[:3,:3])
# R[:,0] = -R[:,0]
# T = -matrix[:3, 3]

View File

@ -264,26 +264,26 @@ class Neural3D_NDC_Dataset(Dataset):
self.near_fars = poses_arr[:, -2:] self.near_fars = poses_arr[:, -2:]
videos = glob.glob(os.path.join(self.root_dir, "cam*")) videos = glob.glob(os.path.join(self.root_dir, "cam*"))
videos = sorted(videos) videos = sorted(videos)
breakpoint() # breakpoint()
assert len(videos) == poses_arr.shape[0] assert len(videos) == poses_arr.shape[0]
H, W, focal = poses[0, :, -1] H, W, focal = poses[0, :, -1]
focal = focal / self.downsample focal = focal / self.downsample
self.focal = [focal, focal] self.focal = [focal, focal]
poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1) poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1)
poses, _ = center_poses( # poses, _ = center_poses(
poses, self.blender2opencv # poses, self.blender2opencv
) # Re-center poses so that the average is near the center. # ) # Re-center poses so that the average is near the center.
near_original = self.near_fars.min() # near_original = self.near_fars.min()
scale_factor = near_original * 0.75 # scale_factor = near_original * 0.75
self.near_fars /= ( # self.near_fars /= (
scale_factor # rescale nearest plane so that it is at z = 4/3. # scale_factor # rescale nearest plane so that it is at z = 4/3.
) # )
poses[..., 3] /= scale_factor # poses[..., 3] /= scale_factor
# Sample N_views poses for validation - NeRF-like camera trajectory. # Sample N_views poses for validation - NeRF-like camera trajectory.
N_views = 120 N_views = 300
self.val_poses = get_spiral(poses, self.near_fars, N_views=N_views) self.val_poses = get_spiral(poses, self.near_fars, N_views=N_views)
# self.val_poses = self.directions # self.val_poses = self.directions
W, H = self.img_wh W, H = self.img_wh

95
script.sh Normal file
View File

@ -0,0 +1,95 @@
# bash colmap.sh data/hypernerf/interp/aleks-teapot
# bash colmap.sh data/hypernerf/interp/chickchicken
# bash colmap.sh data/hypernerf/interp/cut-lemon1
# bash colmap.sh data/hypernerf/interp/hand1-dense-v2
# bash colmap.sh data/hypernerf/interp/slice-banana
# bash colmap.sh data/hypernerf/interp/torchocolate
# bash colmap.sh data/hypernerf/virg/broom2
# bash colmap.sh data/hypernerf/virg/peel-banana
# bash colmap.sh data/hypernerf/virg/vrig-3dprinter
# bash colmap.sh data/hypernerf/virg/vrig-chicken
# python scripts/downsample_point.py data/dynerf/coffee_martini/points3D_downsample.ply data/dynerf/coffee_martini/points3D_downsample2.ply
# python scripts/downsample_point.py data/dynerf/flame_salmon_1/points3D_downsample.ply data/dynerf/flame_salmon_1/points3D_downsample2.ply
# python scripts/downsample_point.py data/dynerf/cut_roasted_beef/points3D_downsample.ply data/dynerf/cut_roasted_beef/points3D_downsample2.ply
# python scripts/downsample_point.py data/dynerf/cook_spinach/points3D_downsample.ply data/dynerf/cook_spinach/points3D_downsample2.ply
# python scripts/downsample_point.py data/dynerf/flame_steak/points3D_downsample.ply data/dynerf/flame_steak/points3D_downsample2.ply
# python scripts/downsample_point.py data/dynerf/sear_steak/points3D_downsample.ply data/dynerf/sear_steak/points3D_downsample2.ply
# python scripts/downsample_point.py data/hypernerf/virg/broom2/dense.ply data/hypernerf/virg/broom2/dense_downsample.ply
# python scripts/downsample_point.py data/hypernerf/virg/peel-banana/dense.ply data/hypernerf/virg/peel-banana/dense_downsample.ply
# python scripts/downsample_point.py data/hypernerf/virg/vrig-chicken/dense.ply data/hypernerf/virg/vrig-chicken/dense_downsample.ply
# python scripts/downsample_point.py data/hypernerf/virg/vrig-3dprinter/dense.ply data/hypernerf/virg/vrig-3dprinter/dense_downsample.ply
# bash colmap.sh data/dycheck/sriracha-tree
# bash colmap.sh data/dycheck/apple
# bash colmap.sh data/dycheck/space-out
# bash colmap.sh data/dycheck/teddy
# bash colmap.sh data/dycheck/wheel
# bash colmap.sh data/dycheck/spin
# bash colmap.sh data/dnerf/hook/
# bash colmap.sh data/dnerf/mutant
# bash colmap.sh data/dnerf/standup
# bash colmap.sh data/dnerf/lego
# bash colmap.sh data/dnerf/trex
# bash colmap.sh data/dnerf/bouncingballs
# bash colmap.sh data/dnerf/hellwarrior
# bash colmap.sh data/nerf_synthetic/chair
# bash colmap.sh data/nerf_synthetic/drums
# bash colmap.sh data/nerf_synthetic/ficus
# bash colmap.sh data/nerf_synthetic/hotdog
# bash colmap.sh data/nerf_synthetic/lego
# bash colmap.sh data/nerf_synthetic/materials
# bash colmap.sh data/nerf_synthetic/mic
# bash colmap.sh data/nerf_synthetic/ship
# bash scripts/metric_dynerf.sh dynerf_batch4_do
# wait
# bash scripts/metric_hyper_one.sh hypernerf2
# wait
# bash scripts/metric_hyper_one.sh hypernerf_emptyvoxel2
# wait
# bash scripts/metric_hyper_one.sh hypernerf_emptyvoxel
# wait
# bash scripts/metric_dynerf.sh dynerf_batch1_do
# wait
# bash scripts/metric_dynerf.sh dynerf_res124
# wait
# bash scripts/metric_dynerf.sh dynerf_emptyvoxel1
# wait
# bash scripts/metric_dynerf.sh dynerf_emptyvoxel2
# wait
# exp_name="dynerf_static"
# export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dynerf/flame_salmon_1/colmap/dense/workspace --port 6368 --expname "$exp_name/flame_salmon_1" --configs arguments/$exp_name/default.py &
# export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dynerf/coffee_martini/colmap/dense/workspace --port 6369 --expname "$exp_name/coffee_martini" --configs arguments/$exp_name/default.py
# exp_name="dynerf_4_batch1"
# bash scripts/train_dynerf_ab1.sh dynerf_4_batch1_2 &
# bash scripts/train_dynerf_ab2.sh dynerf_4_batch4_2
# wait
# bash scripts/train_hyper_virg.sh hypernerf3
# bash scripts/train_hyper_interp.sh hypernerf4
# bash scripts/train_hyper_virg.sh hypernerf_3dgs
# exp_name="hypernerf4"
# export CUDA_VISIBLE_DEVICES=0&&python vis_point.py --model_path output/$exp_name/broom2 --configs arguments/$exp_name/broom2.py &
# export CUDA_VISIBLE_DEVICES=2&&python vis_point.py --model_path output/$exp_name/3dprinter --configs arguments/$exp_name/3dprinter.py &
# export CUDA_VISIBLE_DEVICES=2&&python vis_point.py --model_path output/$exp_name/peel-banana --configs arguments/$exp_name/banana.py&
# export CUDA_VISIBLE_DEVICES=3&&python vis_point.py --model_path output/$exp_name/vrig-chicken --configs arguments/$exp_name/chicken.py &
# wait
# exp_name="dnerf_tv_2"
# export CUDA_VISIBLE_DEVICES=3&&python editing.py --model_path output/$exp_name/lego
# exp_name="dnerf_tv_2_1"
# export CUDA_VISIBLE_DEVICES=3&&python vis_point.py --model_path output/ablation/$exp_name/hook --configs arguments/$exp_name/hook.py
# export CUDA_VISIBLE_DEVICES=3&&python vis_point.py --model_path output/ablation/$exp_name/hellwarrior --configs arguments/$exp_name/hellwarrior.py
# export CUDA_VISIBLE_DEVICES=3&&python vis_point.py --model_path output/ablation/$exp_name/jumpingjacks --configs arguments/$exp_name/jumpingjacks.py
# export CUDA_VISIBLE_DEVICES=3&&python vis_point.py --model_path output/ablation/$exp_name/standup --configs arguments/$exp_name/standup.py
exp_name1="medical"
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/medicaldata/images --port 6068 --expname "medical/$exp_name1/" --configs arguments/$exp_name1/bouncingballs.py

88
scripts/blender2colmap.py Normal file
View File

@ -0,0 +1,88 @@
import os
import numpy as np
import glob
import sys
import json
from PIL import Image
from tqdm import tqdm
import shutil
import math
def fov2focal(fov, pixels):
return pixels / (2 * math.tan(fov / 2))
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = np.array([
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
root_dir = sys.argv[1]
colmap_dir = os.path.join(root_dir,"sparse_")
if not os.path.exists(colmap_dir):
os.makedirs(colmap_dir)
imagecolmap_dir = os.path.join(root_dir,"image_colmap")
if not os.path.exists(imagecolmap_dir):
os.makedirs(imagecolmap_dir)
image_dir = os.path.join(root_dir)
images = os.listdir(image_dir)
images.sort()
camera_json = os.path.join(root_dir,"transforms_train.json")
with open (camera_json) as f:
meta = json.load(f)
try:
image_size = meta['w'], meta['h']
focal = [meta['fl_x'],meta['fl_y']]
except:
try:
image_size = meta['frames'][0]['w'], meta['frames'][0]['h']
focal = [meta['frames'][0]['fl_x'],meta['frames'][0]['fl_y']]
except:
image_size = 800,800
focal = fov2focal(meta['camera_angle_x'], 800)
focal = [focal,focal]
# size = image.size
# breakpoint()
object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w")
object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w")
idx=0
sizes=1
cnt=0
while len(meta['frames'])//sizes > 200:
sizes += 1
for frame in meta['frames']:
cnt+=1
if cnt % sizes != 0:
continue
matrix = np.linalg.inv(np.array(frame["transform_matrix"]))
R = -np.transpose(matrix[:3,:3])
R[:,0] = -R[:,0]
T = -matrix[:3, 3]
T = -np.matmul(R,T)
T = [str(i) for i in T]
qevc = [str(i) for i in rotmat2qvec(np.transpose(R))]
print(idx+1," ".join(qevc)," ".join(T),1,frame['file_path'].split('/')[-1]+".png","\n",file=object_images_file)
print(idx,"SIMPLE_PINHOLE",image_size[0],image_size[1],focal[0],image_size[0]/2,image_size[1]/2,file=object_cameras_file)
idx+=1
# breakpoint()
print(os.path.join(image_dir,frame['file_path']),os.path.join(imagecolmap_dir,frame['file_path'].split('/')[-1]+".png"))
shutil.copy(os.path.join(image_dir,frame['file_path']+".png"),os.path.join(imagecolmap_dir,frame['file_path'].split('/')[-1]+".png"))
# write camera infomation.
# print(1,"SIMPLE_PINHOLE",image_size[0],image_size[1],focal[0],image_sizep0/2,image_size[1]/2,file=object_cameras_file)
object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w")
object_cameras_file.close()
object_images_file.close()
object_point_file.close()

472
scripts/colmap_converter.py Normal file
View File

@ -0,0 +1,472 @@
import os
import collections
import numpy as np
import struct
import argparse
CameraModel = collections.namedtuple(
"CameraModel", ["model_id", "model_name", "num_params"])
Camera = collections.namedtuple(
"Camera", ["id", "model", "width", "height", "params"])
BaseImage = collections.namedtuple(
"Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
Point3D = collections.namedtuple(
"Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
class Image(BaseImage):
def qvec2rotmat(self):
return qvec2rotmat(self.qvec)
CAMERA_MODELS = {
CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
CameraModel(model_id=3, model_name="RADIAL", num_params=5),
CameraModel(model_id=4, model_name="OPENCV", num_params=8),
CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
CameraModel(model_id=7, model_name="FOV", num_params=5),
CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
}
CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
for camera_model in CAMERA_MODELS])
CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
for camera_model in CAMERA_MODELS])
def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
"""Read and unpack the next bytes from a binary file.
:param fid:
:param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
:param endian_character: Any of {@, =, <, >, !}
:return: Tuple of read and unpacked values.
"""
data = fid.read(num_bytes)
return struct.unpack(endian_character + format_char_sequence, data)
def write_next_bytes(fid, data, format_char_sequence, endian_character="<"):
"""pack and write to a binary file.
:param fid:
:param data: data to send, if multiple elements are sent at the same time,
they should be encapsuled either in a list or a tuple
:param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
should be the same length as the data list or tuple
:param endian_character: Any of {@, =, <, >, !}
"""
if isinstance(data, (list, tuple)):
bytes = struct.pack(endian_character + format_char_sequence, *data)
else:
bytes = struct.pack(endian_character + format_char_sequence, data)
fid.write(bytes)
def read_cameras_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
cameras = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
camera_id = int(elems[0])
model = elems[1]
width = int(elems[2])
height = int(elems[3])
params = np.array(tuple(map(float, elems[4:])))
cameras[camera_id] = Camera(id=camera_id, model=model,
width=width, height=height,
params=params)
return cameras
def read_cameras_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
cameras = {}
with open(path_to_model_file, "rb") as fid:
num_cameras = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_cameras):
camera_properties = read_next_bytes(
fid, num_bytes=24, format_char_sequence="iiQQ")
camera_id = camera_properties[0]
model_id = camera_properties[1]
model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
width = camera_properties[2]
height = camera_properties[3]
num_params = CAMERA_MODEL_IDS[model_id].num_params
params = read_next_bytes(fid, num_bytes=8*num_params,
format_char_sequence="d"*num_params)
cameras[camera_id] = Camera(id=camera_id,
model=model_name,
width=width,
height=height,
params=np.array(params))
assert len(cameras) == num_cameras
return cameras
def write_cameras_text(cameras, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasText(const std::string& path)
void Reconstruction::ReadCamerasText(const std::string& path)
"""
HEADER = "# Camera list with one line of data per camera:\n" + \
"# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n" + \
"# Number of cameras: {}\n".format(len(cameras))
with open(path, "w") as fid:
fid.write(HEADER)
for _, cam in cameras.items():
to_write = [cam.id, cam.model, cam.width, cam.height, *cam.params]
line = " ".join([str(elem) for elem in to_write])
fid.write(line + "\n")
def write_cameras_binary(cameras, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::WriteCamerasBinary(const std::string& path)
void Reconstruction::ReadCamerasBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(cameras), "Q")
for _, cam in cameras.items():
model_id = CAMERA_MODEL_NAMES[cam.model].model_id
camera_properties = [cam.id,
model_id,
cam.width,
cam.height]
write_next_bytes(fid, camera_properties, "iiQQ")
for p in cam.params:
write_next_bytes(fid, float(p), "d")
return cameras
def read_images_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
images = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
image_id = int(elems[0])
qvec = np.array(tuple(map(float, elems[1:5])))
tvec = np.array(tuple(map(float, elems[5:8])))
camera_id = int(elems[8])
image_name = elems[9]
elems = fid.readline().split()
xys = np.column_stack([tuple(map(float, elems[0::3])),
tuple(map(float, elems[1::3]))])
point3D_ids = np.array(tuple(map(int, elems[2::3])))
images[image_id] = Image(
id=image_id, qvec=qvec, tvec=tvec,
camera_id=camera_id, name=image_name,
xys=xys, point3D_ids=point3D_ids)
return images
def read_images_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
images = {}
with open(path_to_model_file, "rb") as fid:
num_reg_images = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_reg_images):
binary_image_properties = read_next_bytes(
fid, num_bytes=64, format_char_sequence="idddddddi")
image_id = binary_image_properties[0]
qvec = np.array(binary_image_properties[1:5])
tvec = np.array(binary_image_properties[5:8])
camera_id = binary_image_properties[8]
image_name = ""
current_char = read_next_bytes(fid, 1, "c")[0]
while current_char != b"\x00": # look for the ASCII 0 entry
image_name += current_char.decode("utf-8")
current_char = read_next_bytes(fid, 1, "c")[0]
num_points2D = read_next_bytes(fid, num_bytes=8,
format_char_sequence="Q")[0]
x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
format_char_sequence="ddq"*num_points2D)
xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
tuple(map(float, x_y_id_s[1::3]))])
point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
images[image_id] = Image(
id=image_id, qvec=qvec, tvec=tvec,
camera_id=camera_id, name=image_name,
xys=xys, point3D_ids=point3D_ids)
return images
def write_images_text(images, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesText(const std::string& path)
void Reconstruction::WriteImagesText(const std::string& path)
"""
if len(images) == 0:
mean_observations = 0
else:
mean_observations = sum((len(img.point3D_ids) for _, img in images.items()))/len(images)
HEADER = "# Image list with two lines of data per image:\n" + \
"# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n" + \
"# POINTS2D[] as (X, Y, POINT3D_ID)\n" + \
"# Number of images: {}, mean observations per image: {}\n".format(len(images), mean_observations)
with open(path, "w") as fid:
fid.write(HEADER)
for _, img in images.items():
image_header = [img.id, *img.qvec, *img.tvec, img.camera_id, img.name]
first_line = " ".join(map(str, image_header))
fid.write(first_line + "\n")
points_strings = []
for xy, point3D_id in zip(img.xys, img.point3D_ids):
points_strings.append(" ".join(map(str, [*xy, point3D_id])))
fid.write(" ".join(points_strings) + "\n")
def write_images_binary(images, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadImagesBinary(const std::string& path)
void Reconstruction::WriteImagesBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(images), "Q")
for _, img in images.items():
write_next_bytes(fid, img.id, "i")
write_next_bytes(fid, img.qvec.tolist(), "dddd")
write_next_bytes(fid, img.tvec.tolist(), "ddd")
write_next_bytes(fid, img.camera_id, "i")
for char in img.name:
write_next_bytes(fid, char.encode("utf-8"), "c")
write_next_bytes(fid, b"\x00", "c")
write_next_bytes(fid, len(img.point3D_ids), "Q")
for xy, p3d_id in zip(img.xys, img.point3D_ids):
write_next_bytes(fid, [*xy, p3d_id], "ddq")
def read_points3D_text(path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
points3D = {}
with open(path, "r") as fid:
while True:
line = fid.readline()
if not line:
break
line = line.strip()
if len(line) > 0 and line[0] != "#":
elems = line.split()
point3D_id = int(elems[0])
xyz = np.array(tuple(map(float, elems[1:4])))
rgb = np.array(tuple(map(int, elems[4:7])))
error = float(elems[7])
image_ids = np.array(tuple(map(int, elems[8::2])))
point2D_idxs = np.array(tuple(map(int, elems[9::2])))
points3D[point3D_id] = Point3D(id=point3D_id, xyz=xyz, rgb=rgb,
error=error, image_ids=image_ids,
point2D_idxs=point2D_idxs)
return points3D
def read_points3D_binary(path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
points3D = {}
with open(path_to_model_file, "rb") as fid:
num_points = read_next_bytes(fid, 8, "Q")[0]
for _ in range(num_points):
binary_point_line_properties = read_next_bytes(
fid, num_bytes=43, format_char_sequence="QdddBBBd")
point3D_id = binary_point_line_properties[0]
xyz = np.array(binary_point_line_properties[1:4])
rgb = np.array(binary_point_line_properties[4:7])
error = np.array(binary_point_line_properties[7])
track_length = read_next_bytes(
fid, num_bytes=8, format_char_sequence="Q")[0]
track_elems = read_next_bytes(
fid, num_bytes=8*track_length,
format_char_sequence="ii"*track_length)
image_ids = np.array(tuple(map(int, track_elems[0::2])))
point2D_idxs = np.array(tuple(map(int, track_elems[1::2])))
points3D[point3D_id] = Point3D(
id=point3D_id, xyz=xyz, rgb=rgb,
error=error, image_ids=image_ids,
point2D_idxs=point2D_idxs)
return points3D
def write_points3D_text(points3D, path):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DText(const std::string& path)
void Reconstruction::WritePoints3DText(const std::string& path)
"""
if len(points3D) == 0:
mean_track_length = 0
else:
mean_track_length = sum((len(pt.image_ids) for _, pt in points3D.items()))/len(points3D)
HEADER = "# 3D point list with one line of data per point:\n" + \
"# POINT3D_ID, X, Y, Z, R, G, B, ERROR, TRACK[] as (IMAGE_ID, POINT2D_IDX)\n" + \
"# Number of points: {}, mean track length: {}\n".format(len(points3D), mean_track_length)
with open(path, "w") as fid:
fid.write(HEADER)
for _, pt in points3D.items():
point_header = [pt.id, *pt.xyz, *pt.rgb, pt.error]
fid.write(" ".join(map(str, point_header)) + " ")
track_strings = []
for image_id, point2D in zip(pt.image_ids, pt.point2D_idxs):
track_strings.append(" ".join(map(str, [image_id, point2D])))
fid.write(" ".join(track_strings) + "\n")
def write_points3D_binary(points3D, path_to_model_file):
"""
see: src/base/reconstruction.cc
void Reconstruction::ReadPoints3DBinary(const std::string& path)
void Reconstruction::WritePoints3DBinary(const std::string& path)
"""
with open(path_to_model_file, "wb") as fid:
write_next_bytes(fid, len(points3D), "Q")
for _, pt in points3D.items():
write_next_bytes(fid, pt.id, "Q")
write_next_bytes(fid, pt.xyz.tolist(), "ddd")
write_next_bytes(fid, pt.rgb.tolist(), "BBB")
write_next_bytes(fid, pt.error, "d")
track_length = pt.image_ids.shape[0]
write_next_bytes(fid, track_length, "Q")
for image_id, point2D_id in zip(pt.image_ids, pt.point2D_idxs):
write_next_bytes(fid, [image_id, point2D_id], "ii")
def detect_model_format(path, ext):
if os.path.isfile(os.path.join(path, "cameras" + ext)) and \
os.path.isfile(os.path.join(path, "images" + ext)) and \
os.path.isfile(os.path.join(path, "points3D" + ext)):
print("Detected model format: '" + ext + "'")
return True
return False
def read_model(path, ext=""):
# try to detect the extension automatically
if ext == "":
if detect_model_format(path, ".bin"):
ext = ".bin"
elif detect_model_format(path, ".txt"):
ext = ".txt"
else:
print("Provide model format: '.bin' or '.txt'")
return
if ext == ".txt":
cameras = read_cameras_text(os.path.join(path, "cameras" + ext))
images = read_images_text(os.path.join(path, "images" + ext))
points3D = read_points3D_text(os.path.join(path, "points3D") + ext)
else:
cameras = read_cameras_binary(os.path.join(path, "cameras" + ext))
images = read_images_binary(os.path.join(path, "images" + ext))
points3D = read_points3D_binary(os.path.join(path, "points3D") + ext)
return cameras, images, points3D
def write_model(cameras, images, points3D, path, ext=".bin"):
if ext == ".txt":
write_cameras_text(cameras, os.path.join(path, "cameras" + ext))
write_images_text(images, os.path.join(path, "images" + ext))
write_points3D_text(points3D, os.path.join(path, "points3D") + ext)
else:
write_cameras_binary(cameras, os.path.join(path, "cameras" + ext))
write_images_binary(images, os.path.join(path, "images" + ext))
write_points3D_binary(points3D, os.path.join(path, "points3D") + ext)
return cameras, images, points3D
def qvec2rotmat(qvec):
return np.array([
[1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
[2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
[2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = np.array([
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
def main():
parser = argparse.ArgumentParser(description="Read and write COLMAP binary and text models")
parser.add_argument("--input_model", help="path to input model folder")
parser.add_argument("--input_format", choices=[".bin", ".txt"],
help="input model format", default="")
parser.add_argument("--output_model",
help="path to output model folder")
parser.add_argument("--output_format", choices=[".bin", ".txt"],
help="outut model format", default=".txt")
args = parser.parse_args()
cameras, images, points3D = read_model(path=args.input_model, ext=args.input_format)
print("num_cameras:", len(cameras))
print("num_images:", len(images))
print("num_points3D:", len(points3D))
if args.output_model is not None:
write_model(cameras, images, points3D, path=args.output_model, ext=args.output_format)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,19 @@
import open3d as o3d
import sys
def process_ply_file(input_file, output_file):
# 读取输入的ply文件
pcd = o3d.io.read_point_cloud(input_file)
print(f"Total points: {len(pcd.points)}")
# 通过点云下采样将输入的点云减少
voxel_size=0.02
while len(pcd.points) > 40000:
pcd = pcd.voxel_down_sample(voxel_size=voxel_size)
print(f"Downsampled points: {len(pcd.points)}")
voxel_size+=0.01
# 将结果保存到输入的路径中
o3d.io.write_point_cloud(output_file, pcd)
# 使用函数
process_ply_file(sys.argv[1], sys.argv[2])

40
scripts/fliter_point.py Normal file
View File

@ -0,0 +1,40 @@
import open3d as o3d
import os
# 指定根目录路径
root_path = "data/dynerf/sear_steak/"
# 文件名
input_file = "points3D.ply"
output_file = "points3d_filtered.ply"
# 读取点云数据
point_cloud_before = o3d.io.read_point_cloud(os.path.join(root_path, input_file))
# 计算过滤前的点的数量
num_points_before = len(point_cloud_before.points)
# 计算过滤前的点云的边界框大小
bbox_before = point_cloud_before.get_axis_aligned_bounding_box()
bbox_size_before = bbox_before.get_max_bound() - bbox_before.get_min_bound()
# 进行离群点滤波
cl, ind = point_cloud_before.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
# 创建一个新的点云对象,包含滤波后的点
filtered_point_cloud = point_cloud_before.select_by_index(ind)
# 保存滤波后的点云到新文件
o3d.io.write_point_cloud(os.path.join(root_path, output_file), filtered_point_cloud)
# 计算过滤后的点的数量
num_points_after = len(filtered_point_cloud.points)
# 计算边界框的大小
bbox = filtered_point_cloud.get_axis_aligned_bounding_box()
bbox_size = bbox.get_max_bound() - bbox.get_min_bound()
print(f"过滤前的点数: {num_points_before}")
print(f"过滤前的点云边界框大小: {bbox_size_before}")
print(f"过滤后的点数: {num_points_after}")
print(f"过滤后的点云边界框大小: {bbox_size}")
print(f"离群点过滤完成,结果已保存到 {output_file}")

25
scripts/grow_point.py Normal file
View File

@ -0,0 +1,25 @@
import open3d as o3d
import numpy as np
def grow_sparse_regions(input_file, output_file):
# 读取输入的ply文件
pcd = o3d.io.read_point_cloud(input_file)
# 计算点云的密度
densities = o3d.geometry.PointCloud.compute_nearest_neighbor_distance(pcd)
avg_density = np.average(densities)
print(f"Average density: {avg_density}")
# 找到稀疏部分
sparse_indices = np.where(densities > avg_density * 1.2)[0] # 这里我们假设稀疏部分的密度大于平均密度的1.2倍
sparse_points = np.asarray(pcd.points)[sparse_indices]
breakpoint()
# 复制并增长稀疏部分
# for _ in range(5): # 这里我们假设每个稀疏点复制5次
# pcd.points.extend(sparse_points)
# 将结果保存到输入的路径中
o3d.io.write_point_cloud(output_file, pcd)
# 使用函数
grow_sparse_regions("data/hypernerf/vrig/chickchicken/dense_downsample.ply", "data/hypernerf/interp/chickchicken/dense_downsample.ply")

View File

@ -0,0 +1,81 @@
import os
import numpy as np
import glob
import sys
import json
from PIL import Image
from tqdm import tqdm
import shutil
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = np.array([
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
root_dir = sys.argv[1]
colmap_dir = os.path.join(root_dir,"sparse_")
if not os.path.exists(colmap_dir):
os.makedirs(colmap_dir)
imagecolmap_dir = os.path.join(root_dir,"image_colmap")
if not os.path.exists(imagecolmap_dir):
os.makedirs(imagecolmap_dir)
image_dir = os.path.join(root_dir,"rgb","2x")
images = os.listdir(image_dir)
images.sort()
camera_dir = os.path.join(root_dir,"camera")
cameras = os.listdir(camera_dir)
cameras.sort()
cams = []
for jsonfile in tqdm(cameras):
with open (os.path.join(camera_dir,jsonfile)) as f:
cams.append(json.load(f))
image_size = cams[0]['image_size']
image = Image.open(os.path.join(image_dir,images[0]))
size = image.size
# breakpoint()
object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w")
object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w")
idx=0
cnt=0
sizes=2
while len(cams)//sizes > 200:
sizes += 1
# breakpoint()
for cam, image in zip(cams, images):
cnt+=1
# print(image)
# breakpoint()
if cnt % sizes != 0:
continue
# print("begin to write")
R = np.array(cam['orientation']).T
# breakpoint()
T = -np.array(cam['position'])@R
# T = -np.matmul(R,T)
T = [str(i) for i in T]
qevc = [str(i) for i in rotmat2qvec(R.T)]
print(idx+1," ".join(qevc)," ".join(T),1,image,"\n",file=object_images_file)
print(idx,"SIMPLE_PINHOLE",image_size[0]/2,image_size[1]/2,cam['focal_length']/2,cam['principal_point'][0]/2,cam['principal_point'][1]/2,file=object_cameras_file)
idx+=1
shutil.copy(os.path.join(image_dir,image),os.path.join(imagecolmap_dir,image))
print(idx)
# write camera infomation.
# print(1,"SIMPLE_PINHOLE",image_size[0],image_size[1],focal[0],image_sizep0/2,image_size[1]/2,file=object_cameras_file)
object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w")
object_cameras_file.close()
object_images_file.close()
object_point_file.close()

163
scripts/llff2colmap.py Normal file
View File

@ -0,0 +1,163 @@
import os
import numpy as np
import glob
import sys
def rotmat2qvec(R):
Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
K = np.array([
[Rxx - Ryy - Rzz, 0, 0, 0],
[Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
[Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
[Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
eigvals, eigvecs = np.linalg.eigh(K)
qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
if qvec[0] < 0:
qvec *= -1
return qvec
def normalize(v):
"""Normalize a vector."""
return v / np.linalg.norm(v)
def average_poses(poses):
"""
Calculate the average pose, which is then used to center all poses
using @center_poses. Its computation is as follows:
1. Compute the center: the average of pose centers.
2. Compute the z axis: the normalized average z axis.
3. Compute axis y': the average y axis.
4. Compute x' = y' cross product z, then normalize it as the x axis.
5. Compute the y axis: z cross product x.
Note that at step 3, we cannot directly use y' as y axis since it's
not necessarily orthogonal to z axis. We need to pass from x to y.
Inputs:
poses: (N_images, 3, 4)
Outputs:
pose_avg: (3, 4) the average pose
"""
# 1. Compute the center
center = poses[..., 3].mean(0) # (3)
# 2. Compute the z axis
z = normalize(poses[..., 2].mean(0)) # (3)
# 3. Compute axis y' (no need to normalize as it's not the final output)
y_ = poses[..., 1].mean(0) # (3)
# 4. Compute the x axis
x = normalize(np.cross(z, y_)) # (3)
# 5. Compute the y axis (as z and x are normalized, y is already of norm 1)
y = np.cross(x, z) # (3)
pose_avg = np.stack([x, y, z, center], 1) # (3, 4)
return pose_avg
blender2opencv = np.eye(4)
def center_poses(poses, blender2opencv):
"""
Center the poses so that we can use NDC.
See https://github.com/bmild/nerf/issues/34
Inputs:
poses: (N_images, 3, 4)
Outputs:
poses_centered: (N_images, 3, 4) the centered poses
pose_avg: (3, 4) the average pose
"""
poses = poses @ blender2opencv
pose_avg = average_poses(poses) # (3, 4)
pose_avg_homo = np.eye(4)
pose_avg_homo[
:3
] = pose_avg # convert to homogeneous coordinate for faster computation
pose_avg_homo = pose_avg_homo
# by simply adding 0, 0, 0, 1 as the last row
last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N_images, 1, 4)
poses_homo = np.concatenate(
[poses, last_row], 1
) # (N_images, 4, 4) homogeneous coordinate
poses_centered = np.linalg.inv(pose_avg_homo) @ poses_homo # (N_images, 4, 4)
# poses_centered = poses_centered @ blender2opencv
poses_centered = poses_centered[:, :3] # (N_images, 3, 4)
return poses_centered, pose_avg_homo
root_dir = sys.argv[1]
colmap_dir = os.path.join(root_dir,"sparse_")
if not os.path.exists(colmap_dir):
os.makedirs(colmap_dir)
poses_arr = np.load(os.path.join(root_dir, "poses_bounds.npy"))
poses = poses_arr[:, :-2].reshape([-1, 3, 5]) # (N_cams, 3, 5)
near_fars = poses_arr[:, -2:]
videos = glob.glob(os.path.join(root_dir, "cam*"))
videos = sorted(videos)
assert len(videos) == poses_arr.shape[0]
H, W, focal = poses[0, :, -1]
focal = focal/2
focal = [focal, focal]
poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1)
# poses, _ = center_poses(
# poses, blender2opencv
# ) # Re-center poses so that the average is near the center.
# near_original = near_fars.min()
# scale_factor = near_original * 0.75
# near_fars /= (
# scale_factor # rescale nearest plane so that it is at z = 4/3.
# )
# poses[..., 3] /= scale_factor
# Sample N_views poses for validation - NeRF-like camera trajectory.
# val_poses = directions
videos = glob.glob(os.path.join(root_dir, "cam*"))
videos = sorted(videos)
image_paths = []
for index, video_path in enumerate(videos):
image_path = os.path.join(video_path,"images","0000.png")
image_paths.append(image_path)
print(image_paths)
goal_dir = os.path.join(root_dir,"image_colmap")
if not os.path.exists(goal_dir):
os.makedirs(goal_dir)
import shutil
image_name_list =[]
for index, image in enumerate(image_paths):
image_name = image.split("/")[-1].split('.')
image_name[0] = "r_%03d" % index
print(image_name)
# breakpoint()
image_name = ".".join(image_name)
image_name_list.append(image_name)
goal_path = os.path.join(goal_dir,image_name)
shutil.copy(image,goal_path)
print(poses)
# breakpoint()
# write image information.
object_images_file = open(os.path.join(colmap_dir,"images.txt"),"w")
for idx, pose in enumerate(poses):
# pose_44 = np.eye(4)
R = pose[:3,:3]
R = -R
R[:,0] = -R[:,0]
T = pose[:3,3]
R = np.linalg.inv(R)
T = -np.matmul(R,T)
T = [str(i) for i in T]
# T = ["%.3f"%i for i in pose[:3,3]]
qevc = [str(i) for i in rotmat2qvec(R)]
# breakpoint()
print(idx+1," ".join(qevc)," ".join(T),1,image_name_list[idx],"\n",file=object_images_file)
# breakpoint()
# write camera infomation.
object_cameras_file = open(os.path.join(colmap_dir,"cameras.txt"),"w")
print(1,"SIMPLE_PINHOLE",1352,1014,focal[0],1352/2,1014/2,file=object_cameras_file)
object_point_file = open(os.path.join(colmap_dir,"points3D.txt"),"w")
object_cameras_file.close()
object_images_file.close()
object_point_file.close()

23
scripts/merge_point.py Normal file
View File

@ -0,0 +1,23 @@
import open3d as o3d
import os
from tqdm import tqdm
def merge_point_clouds(directory, output_file):
# 初始化一个空的点云
merged_pcd = o3d.geometry.PointCloud()
# 遍历文件夹下的所有文件
for filename in tqdm(os.listdir(directory)):
if filename.endswith('.ply'):
# 读取点云文件
pcd = o3d.io.read_point_cloud(os.path.join(directory, filename))
# 将点云合并
merged_pcd += pcd
# 移除位置相同的点
merged_pcd = merged_pcd.remove_duplicate_points()
# 将合并后的点云输出到一个文件中
o3d.io.write_point_cloud(output_file, merged_pcd)
# 使用函数
merge_point_clouds("point_clouds_directory", "merged.ply")

View File

@ -1,13 +0,0 @@
exp_name1=$1
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name1/standup/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name1/jumpingjacks/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/bouncingballs/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/lego/"
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name1/hellwarrior/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name1/hook/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/trex/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/mutant/" &
wait
echo "Done"

View File

@ -1,43 +1,42 @@
exp_name1=$1 exp_name1=$1
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dnerf/lego --port 6068 --expname "$exp_name1/lego" --configs arguments/$exp_name1/lego.py &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dnerf/bouncingballs --port 6066 --expname "$exp_name1/bouncingballs" --configs arguments/$exp_name1/bouncingballs.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/jumpingjacks --port 6069 --expname "$exp_name1/jumpingjacks" --configs arguments/$exp_name1/jumpingjacks.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/trex --port 6070 --expname "$exp_name1/trex" --configs arguments/$exp_name1/trex.py & export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/jumpingjacks --port 7169 --expname "$exp_name1/jumpingjacks" --configs arguments/$exp_name1/jumpingjacks.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/trex --port 7170 --expname "$exp_name1/trex" --configs arguments/$exp_name1/trex.py
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/jumpingjacks/" --skip_train --configs arguments/$exp_name1/jumpingjacks.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/trex/" --skip_train --configs arguments/$exp_name1/trex.py
wait wait
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dnerf/mutant --port 6068 --expname "$exp_name1/mutant" --configs arguments/$exp_name1/mutant.py & export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/jumpingjacks/" &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dnerf/standup --port 6066 --expname "$exp_name1/standup" --configs arguments/$exp_name1/standup.py & export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/trex/"
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/hook --port 6069 --expname "$exp_name1/hook" --configs arguments/$exp_name1/hook.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/hellwarrior --port 6070 --expname "$exp_name1/hellwarrior" --configs arguments/$exp_name1/hellwarrior.py &
wait wait
echo "Done" export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/mutant --port 7168 --expname "$exp_name1/mutant" --configs arguments/$exp_name1/mutant.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/standup --port 7166 --expname "$exp_name1/standup" --configs arguments/$exp_name1/standup.py
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/mutant/" --skip_train --configs arguments/$exp_name1/mutant.py &
exp_name1=$1 export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/standup/" --skip_train --configs arguments/$exp_name1/standup.py
wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path "output/$exp_name1/standup/" --skip_train --configs arguments/$exp_name1/standup.py & export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/mutant/" &
export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path "output/$exp_name1/jumpingjacks/" --skip_train --configs arguments/$exp_name1/jumpingjacks.py & export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/standup/"
wait
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/hook --port 7369 --expname "$exp_name1/hook" --configs arguments/$exp_name1/hook.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/hellwarrior --port 7370 --expname "$exp_name1/hellwarrior" --configs arguments/$exp_name1/hellwarrior.py
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/hellwarrior/" --skip_train --configs arguments/$exp_name1/hellwarrior.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/hook/" --skip_train --configs arguments/$exp_name1/hook.py
wait
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/hellwarrior/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/hook/"
wait
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/lego --port 7168 --expname "$exp_name1/lego" --configs arguments/$exp_name1/lego.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/bouncingballs --port 7166 --expname "$exp_name1/bouncingballs" --configs arguments/$exp_name1/bouncingballs.py
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/bouncingballs/" --skip_train --configs arguments/$exp_name1/bouncingballs.py & export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/bouncingballs/" --skip_train --configs arguments/$exp_name1/bouncingballs.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/lego/" --skip_train --configs arguments/$exp_name1/lego.py & export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/lego/" --skip_train --configs arguments/$exp_name1/lego.py
wait wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path "output/$exp_name1/hellwarrior/" --skip_train --configs arguments/$exp_name1/hellwarrior.py &
export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path "output/$exp_name1/hook/" --skip_train --configs arguments/$exp_name1/hook.py &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/trex/" --skip_train --configs arguments/$exp_name1/trex.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/mutant/" --skip_train --configs arguments/$exp_name1/mutant.py &
# wait
echo "Done"
exp_name1=$1
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name1/standup/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name1/jumpingjacks/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/bouncingballs/" & export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/bouncingballs/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/lego/" export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/lego/"
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name1/hellwarrior/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name1/hook/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name1/trex/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name1/mutant/" &
wait wait
echo "Done" echo "Done"

View File

@ -1,9 +1,15 @@
import json import json
import os import os
exp_name = ["dnerf_tv_test"] # exp_name = ["dnerf_tv_nodx","dnerf_tv_nodr","dnerf_tv_nods","dnerf_tv","dnerf_tv_dshs","dnerf_tv_do",
scene_name = ["bouncingballs","jumpingjacks","lego","standup","hook","mutant","hellwarrior","trex"] # "dnerf_tv_2","dnerf_tv_8","dnerf_tv_deepmlp"]
# exp_name= ["dnerf_tv_2_slim"]
exp_name=["dynerf_default","dynerf_9"]
# exp_name = ["hypernerf_3dgs"]
scene_name = ["coffee_martini", "cook_spinach", "cut_roasted_beef", "flame_salmon_1", "flame_steak", "sear_steak"]
# scene_name = ["bouncingballs","jumpingjacks","lego","standup","hook","mutant","hellwarrior","trex"]
# scene_name = ["3dprinter","broom2","peel-banana","vrig-chicken"]
json_name = "results.json" json_name = "results.json"
result_json = {"SSIM":0,"PSNR":0,"LPIPS":0} result_json = {"PSNR":0,"SSIM":0,"MS-SSIM":0,"D-SSIM":0,"LPIPS-vgg":0,"LPIPS-alex":0,"LPIPS":0}
exp_json = {} exp_json = {}
for exps in exp_name: for exps in exp_name:
exp_json[exps] = result_json.copy() exp_json[exps] = result_json.copy()
@ -14,10 +20,12 @@ for scene in scene_name:
js = json.load(f) js = json.load(f)
# print(js) # print(js)
# print(scene, experiment, js["ours_20000"]) # print(scene, experiment, js["ours_20000"])
for res in ["ours_30000","ours_20000","ours_14000","ours_7000","ours_3000"]: for res in ["ours_30000","ours_20000","ours_14000","ours_10000","ours_7000","ours_3000"]:
if res in js.keys(): if res in js.keys():
for key, item in js[res].items(): for key, item in js[res].items():
exp_json[experiment][key] += item if key in exp_json[experiment].keys():
exp_json[experiment][key] += item
print(scene, key, item)
break break
# for scene in scene_name: # for scene in scene_name:
@ -25,8 +33,11 @@ for scene in scene_name:
for experiment in exp_name: for experiment in exp_name:
print(exp_json[experiment]) print(exp_json[experiment])
for key, item in exp_json[experiment].items(): for key, item in exp_json[experiment].items():
exp_json[experiment][key] /= 8 exp_json[experiment][key] /= len(scene_name)
for key,item in exp_json.items(): for key,item in exp_json.items():
print(key) print(key)
print("%.4f"%item["PSNR"],"&","%.4f"%item["SSIM"],"&","%.4f"%item["LPIPS"],) print("PSNR,SSIM,D-SSIM,MS-SSIM,LPIPS-alex,LPIPS-vgg","LPIPS")
print("%.4f"%item["PSNR"],"&","%.4f"%item["SSIM"],"%.4f"%item["D-SSIM"],
"%.4f"%item["MS-SSIM"],"&","%.4f"%item["LPIPS-alex"],"%.4f"%item["LPIPS-vgg"],
"%.4f"%item["LPIPS"])
# break # break

View File

@ -1,13 +0,0 @@
exp_name1=$1
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/standup/" --skip_train --configs arguments/$exp_name1/standup.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/jumpingjacks/" --skip_train --configs arguments/$exp_name1/jumpingjacks.py &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/bouncingballs/" --skip_train --configs arguments/$exp_name1/bouncingballs.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/lego/" --skip_train --configs arguments/$exp_name1/lego.py &
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/hellwarrior/" --skip_train --configs arguments/$exp_name1/hellwarrior.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/hook/" --skip_train --configs arguments/$exp_name1/hook.py &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path "output/$exp_name1/trex/" --skip_train --configs arguments/$exp_name1/trex.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path "output/$exp_name1/mutant/" --skip_train --configs arguments/$exp_name1/mutant.py &
# wait
echo "Done"

View File

@ -1,7 +1,7 @@
exp_name1=$1 exp_name1=$1
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/lego --port 6068 --expname "$exp_name1/lego" --configs arguments/$exp_name1/lego.py & export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/lego --port 6068 --expname "$exp_name1/lego" --configs arguments/$exp_name1/lego.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/bouncingballs --port 6066 --expname "$exp_name1/bouncingballs" --configs arguments/$exp_name1/bouncingballs.py & export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/bouncingballs --port 6266 --expname "$exp_name1/bouncingballs" --configs arguments/$exp_name1/bouncingballs.py &
wait wait
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/jumpingjacks --port 6069 --expname "$exp_name1/jumpingjacks" --configs arguments/$exp_name1/jumpingjacks.py & export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dnerf/jumpingjacks --port 6069 --expname "$exp_name1/jumpingjacks" --configs arguments/$exp_name1/jumpingjacks.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/trex --port 6070 --expname "$exp_name1/trex" --configs arguments/$exp_name1/trex.py & export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dnerf/trex --port 6070 --expname "$exp_name1/trex" --configs arguments/$exp_name1/trex.py &

View File

@ -1,21 +0,0 @@
bash scripts/process_dnerf.sh dnerf_tv_test
wait
# bash scripts/train_ablation.sh dnerf_3dgs
# wait
# bash scripts/train_ablation.sh dnerf_gridlarge
# wait
# bash scripts/train_ablation.sh dnerf_gridsmall
# wait
# bash scripts/train_ablation.sh dnerf_gridsmaller
# wait
# bash scripts/train_ablation.sh dnerf_mlplarge
# wait
# bash scripts/train_ablation.sh dnerf_mlplarger
# wait
# bash scripts/train_ablation.sh dnerf_nocoarse
# wait
# bash scripts/train_ablation.sh dnerf_slim
# wait
# bash scripts/train_ablation.sh dnerf_notv
# wait
# bash scripts/train_ablation.sh dnerf_imageloss

21
scripts/train_dycheck.sh Normal file
View File

@ -0,0 +1,21 @@
exp_name1=$1
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dycheck/spin --port 6084 --expname $exp_name1/spin/ --configs arguments/$exp_name1/default.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dycheck/space-out --port 6083 --expname $exp_name1/space-out/ --configs arguments/$exp_name1/default.py &
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name1/space-out/ --configs arguments/$exp_name1/default.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name1/spin/ --configs arguments/$exp_name1/default.py
wait
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dycheck/teddy/ --port 6081 --expname $exp_name1/teddy/ --configs arguments/$exp_name1/default.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dycheck/apple/ --port 6082 --expname $exp_name1/apple/ --configs arguments/$exp_name1/default.py
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name1/teddy/ --skip_train --configs arguments/$exp_name1/default.py &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name1/apple/ --skip_train --configs arguments/$exp_name1/default.py
wait
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path output/$exp_name1/apple/ &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path output/$exp_name1/teddy/ &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path output/$exp_name1/space-out/ &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path output/$exp_name1/spin/
echo "Done"

View File

@ -0,0 +1,25 @@
exp_name1=$1
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/basketball --port 6068 --expname "$exp_name1/dynamic3dgs/basketball" --configs arguments/$exp_name1/default.py
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/boxes --port 6069 --expname "$exp_name1/dynamic3dgs/boxes" --configs arguments/$exp_name1/default.py
wait
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/football --port 6068 --expname "$exp_name1/dynamic3dgs/football" --configs arguments/$exp_name1/default.py
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/juggle --port 6069 --expname "$exp_name1/dynamic3dgs/juggle" --configs arguments/$exp_name1/default.py
wait
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/softball --port 6068 --expname "$exp_name1/dynamic3dgs/softball" --configs arguments/$exp_name1/default.py
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynamic3dgs/data/tennis --port 6069 --expname "$exp_name1/dynamic3dgs/tennis" --configs arguments/$exp_name1/default.py
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/basketball --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/boxes --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/football --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/juggle --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/softball --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name1/dynamic3dgs/tennis --configs arguments/$exp_name1/default.py --skip_train
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/basketball
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/boxes
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/football
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/juggle
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/softball
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path output/$exp_name/dynamic3dgs/tennis

View File

@ -1,27 +1,30 @@
exp_name=$1 exp_name=$1
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynerf/cut_roasted_beef --port 6068 --expname "$exp_name/cut_roasted_beef" --configs arguments/$exp_name/default.py & # export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dynerf/flame_salmon_1 --port 6468 --expname "$exp_name/flame_salmon_1" --configs arguments/$exp_name/flame_salmon_1.py &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dynerf/cook_spinach --port 6066 --expname "$exp_name/cook_spinach" --configs arguments/$exp_name/default.py & # export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dynerf/coffee_martini --port 6472 --expname "$exp_name/coffee_martini" --configs arguments/$exp_name/coffee_martini.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dynerf/sear_steak --port 6069 --expname "$exp_name/sear_steak" --configs arguments/$exp_name/default.py & #
# export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dynerf/cook_spinach --port 6436 --expname "$exp_name/cook_spinach" --configs arguments/$exp_name/cook_spinach.py &
# wait
# export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/dynerf/cut_roasted_beef --port 6470 --expname "$exp_name/cut_roasted_beef" --configs arguments/$exp_name/cut_roasted_beef.py
#
# export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dynerf/flame_steak --port 6471 --expname "$exp_name/flame_steak" --configs arguments/$exp_name/flame_steak.py &
# export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dynerf/sear_steak --port 6569 --expname "$exp_name/sear_steak" --configs arguments/$exp_name/sear_steak.py
# wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/cut_roasted_beef --configs arguments/$exp_name/cut_roasted_beef.py --skip_train &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/sear_steak --configs arguments/$exp_name/sear_steak.py --skip_train
wait wait
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/dynerf/flame_salmon_1 --port 6070 --expname "$exp_name/flame_salmon_1" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/flame_steak --configs arguments/$exp_name/flame_steak.py --skip_train &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/dynerf/flame_steak --port 6071 --expname "$exp_name/flame_steak" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/flame_salmon_1 --configs arguments/$exp_name/flame_salmon_1.py --skip_train
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/dynerf/coffee_martini --port 6071 --expname "$exp_name/coffee_martini" --configs arguments/$exp_name/default.py &
wait wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/cut_roasted_beef --configs arguments/$exp_name/default.py --skip_train & export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/cook_spinach --configs arguments/$exp_name/cook_spinach.py --skip_train &
export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path output/$exp_name/cook_spinach --configs arguments/$exp_name/default.py --skip_train & export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/coffee_martini --configs arguments/$exp_name/coffee_martini.py --skip_train &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/sear_steak --configs arguments/$exp_name/default.py --skip_train &
# export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/hand1-dense-v2 --configs arguments/$exp_name/hand1-dense-v2.py --skip_train
wait wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/flame_salmon_1 --configs arguments/$exp_name/default.py --skip_train & # export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/cut_roasted_beef/" &
export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path output/$exp_name/flame_steak --configs arguments/$exp_name/default.py --skip_train & # export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/cook_spinach/"
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/coffee_martini --configs arguments/$exp_name/default.py --skip_train & # wait
wait # export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/sear_steak/" &
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/cut_roasted_beef/" & # export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/flame_salmon_1/"
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name/cook_spinach/" & # wait
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/sear_steak/" & # export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/flame_steak/" &
# export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/hand1-dense-v2/" # export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/coffee_martini/"
wait
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/flame_salmon_1/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name/flame_steak/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/coffee_martini/" &
echo "Done" echo "Done"

View File

@ -1,20 +0,0 @@
exp_name=$1
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/hypernerf/misc/split-cookie --port 6068 --expname "$exp_name/split-cookie" --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/hypernerf/virg/vrig-3dprinter --port 6066 --expname "$exp_name/3dprinter" --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/interp/chickchicken --port 6069 --expname "$exp_name/interp-chicken" --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/cut-lemon1 --port 6070 --expname "$exp_name/cut-lemon1" --configs arguments/$exp_name/cut-lemon1.py &
# export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/hand1-dense-v2 --port 6071 --expname "$exp_name/hand1-dense-v2" --configs arguments/$exp_name/hand1-dense-v2.py
wait
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/split-cookie --configs arguments/$exp_name/default.py --skip_train --skip_test &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/3dprinter --configs arguments/$exp_name/default.py --skip_train --skip_test &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/interp-chicken --configs arguments/$exp_name/default.py --skip_train --skip_test&
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/cut-lemon1 --configs arguments/$exp_name/cut-lemon1.py --skip_train --skip_test&
# export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/hand1-dense-v2 --configs arguments/$exp_name/hand1-dense-v2.py --skip_train
wait
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/split-cookie/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name/3dprinter/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/interp-chicken/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/cut-lemon1/" &
# export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/hand1-dense-v2/"
wait
echo "Done"

View File

@ -1,13 +0,0 @@
# bash scripts/train_3dvideo.sh
# wait
bash scripts/train_hyper_one.sh hypernerf_format2_virg2
wait
bash scripts/train_hyper_one.sh hypernerf_format2_virg3
wait
# bash scripts/train_hyper.sh hypernerf_format2_lr2
# wait
# bash scripts/train_hyper.sh hypernerf_format2_prune
# wait
# wait
# bash scripts/train_ablation.sh dnerf_imageloss

View File

@ -0,0 +1,27 @@
exp_name=$1
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/aleks-teapot --port 6568 --expname "$exp_name/interp/aleks-teapot" --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/interp/slice-banana --port 6566 --expname "$exp_name/interp/slice-banana" --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/interp/chickchicken --port 6569 --expname "$exp_name/interp/interp-chicken" --configs arguments/$exp_name/default.py &
wait
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/cut-lemon1 --port 6670 --expname $exp_name/interp/cut-lemon1 --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/hand1-dense-v2 --port 6671 --expname $exp_name/interp/hand1-dense-v2 --configs arguments/$exp_name/default.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/interp/torchocolate --port 6672 --expname $exp_name/interp/torchocolate --configs arguments/$exp_name/default.py &
wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/interp/aleks-teapot --configs arguments/$exp_name/default.py --skip_train &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/interp/slice-banana --configs arguments/$exp_name/default.py --skip_train &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/interp/interp-chicken --configs arguments/$exp_name/default.py --skip_train &
wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/interp/cut-lemon1 --configs arguments/$exp_name/default.py --skip_train &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/interp/hand1-dense-v2 --configs arguments/$exp_name/default.py --skip_train&
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/interp/torchocolate --configs arguments/$exp_name/default.py --skip_train &
wait
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/interp/aleks-teapot/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/interp/slice-banana/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/interp/interp-chicken/"
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/interp/cut-lemon1/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/interp/hand1-dense-v2/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/interp/torchocolate/"
wait
echo "Done"

View File

@ -1,20 +1,17 @@
exp_name=$1 exp_name=$1
export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/hypernerf/virg/broom2 --port 6068 --expname "$exp_name/broom2" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=0&&python train.py -s data/hypernerf/virg/broom2 --port 6068 --expname "$exp_name/broom2" --configs arguments/$exp_name/broom2.py &
export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/hypernerf/virg/vrig-3dprinter --port 6066 --expname "$exp_name/3dprinter" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=1&&python train.py -s data/hypernerf/virg/vrig-3dprinter --port 6066 --expname "$exp_name/3dprinter" --configs arguments/$exp_name/3dprinter.py &
export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/virg/peel-banana --port 6069 --expname "$exp_name/peel-banana" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=2&&python train.py -s data/hypernerf/virg/peel-banana --port 6069 --expname "$exp_name/peel-banana" --configs arguments/$exp_name/banana.py &
export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/virg/vrig-chicken --port 6070 --expname "$exp_name/vrig-chicken" --configs arguments/$exp_name/default.py & export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/virg/vrig-chicken --port 6070 --expname "$exp_name/vrig-chicken" --configs arguments/$exp_name/chicken.py
# export CUDA_VISIBLE_DEVICES=3&&python train.py -s data/hypernerf/interp/hand1-dense-v2 --port 6071 --expname "$exp_name/hand1-dense-v2" --configs arguments/$exp_name/hand1-dense-v2.py
wait wait
export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/broom2 --configs arguments/$exp_name/default.py --skip_train & export CUDA_VISIBLE_DEVICES=0&&python render.py --model_path output/$exp_name/broom2 --configs arguments/$exp_name/broom2.py --skip_train --skip_test &
export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path output/$exp_name/3dprinter --configs arguments/$exp_name/default.py --skip_train & export CUDA_VISIBLE_DEVICES=1&&python render.py --model_path output/$exp_name/3dprinter --configs arguments/$exp_name/3dprinter.py --skip_train --skip_test &
export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/peel-banana --configs arguments/$exp_name/default.py --skip_train& export CUDA_VISIBLE_DEVICES=2&&python render.py --model_path output/$exp_name/peel-banana --configs arguments/$exp_name/banana.py --skip_train --skip_test &
export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/vrig-chicken --configs arguments/$exp_name/default.py --skip_train& export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/vrig-chicken --configs arguments/$exp_name/chicken.py --skip_train --skip_test &
# export CUDA_VISIBLE_DEVICES=3&&python render.py --model_path output/$exp_name/hand1-dense-v2 --configs arguments/$exp_name/hand1-dense-v2.py --skip_train
wait wait
export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/broom2/" & export CUDA_VISIBLE_DEVICES=0&&python metrics.py --model_path "output/$exp_name/broom2/" &
export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name/3dprinter/" & export CUDA_VISIBLE_DEVICES=1&&python metrics.py --model_path "output/$exp_name/3dprinter/" &
export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/peel-banana/" & export CUDA_VISIBLE_DEVICES=2&&python metrics.py --model_path "output/$exp_name/peel-banana/" &
export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/vrig-chicken/" & export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/vrig-chicken/" &
# export CUDA_VISIBLE_DEVICES=3&&python metrics.py --model_path "output/$exp_name/hand1-dense-v2/"
wait wait
echo "Done" echo "Done"

View File

@ -0,0 +1,70 @@
import numpy as np
import cv2
import os
import shutil
from tqdm import tqdm
def resort(frames):
newframes = {}
min_frameid = 10000000
for frame in frames:
frameid = int(frame["file_path"].split('/')[1].split('.')[0])
# print()
if frameid < min_frameid:min_frameid = frameid
newframes[frameid] = frame
return [newframes[i+min_frameid] for i in range(len(frames))]
inputpath = "data/custom/wave-ns/"
outputpath = "data/custom/wave-train/"
testskip = 10
if not os.path.exists(outputpath):
os.makedirs(outputpath)
image_path = os.listdir(os.path.join(inputpath,"images"))
import json
with open(os.path.join(inputpath,"transforms.json"),"r") as f:
meta = json.load(f)
cnt = 0
train_json = {
"w": meta["w"],
"h": meta["h"],
"fl_x": meta["fl_x"],
"fl_y": meta["fl_y"],
"cx": meta["cx"],
"cy": meta["cy"],
"camera_model" : meta["camera_model"],
"frames":[]
}
test_json = {
"w": meta["w"],
"h": meta["h"],
"fl_x": meta["fl_x"],
"fl_y": meta["fl_y"],
"cx": meta["cx"],
"cy": meta["cy"],
"camera_model" : meta["camera_model"],
"frames":[]
}
train_image_path = os.path.join(outputpath,"train")
os.makedirs(train_image_path)
test_image_path = os.path.join(outputpath,"test")
os.makedirs(test_image_path)
# meta["frames"] = resort(meta["frames"])
totallen = len(meta["frames"])
for index, frame in tqdm(enumerate(meta["frames"])):
image_path = os.path.join(inputpath,frame["file_path"])
frame["time"] = index/totallen
if index % testskip == 0:
frame["file_path"] = "test/" + frame["file_path"].split("/")[-1]
test_json["frames"].append(frame)
shutil.copy(image_path, test_image_path)
else:
frame["file_path"] = "train/" + frame["file_path"].split("/")[-1]
train_json["frames"].append(frame)
shutil.copy(image_path, train_image_path)
with open(os.path.join(outputpath,"transforms_train.json"),"w") as f:
json.dump(train_json, f)
with open(os.path.join(outputpath,"transforms_test.json"),"w") as f:
json.dump(test_json, f)
print("done")

@ -1 +1 @@
Subproject commit f2d8fa9921ea9a6cb9ac1c33a34ebd1b11510657 Subproject commit 2eb32ea251d3b339dab3af8b6fd78d7dec3caf8e

39
test.py Normal file
View File

@ -0,0 +1,39 @@
import cv2
import os
import re
def sorted_alphanumeric(data):
"""
对给定的数据进行字母数字排序考虑数字的数值大小
"""
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
return sorted(data, key=alphanum_key)
def create_video_from_images(folder_path, output_file, frame_rate=30, img_size=None):
images = [img for img in os.listdir(folder_path) if img.endswith(".jpg") or img.endswith(".png")]
images = sorted_alphanumeric(images) # 使用自定义的排序函数
# 获取第一张图片的尺寸
frame = cv2.imread(os.path.join(folder_path, images[0]))
height, width, layers = frame.shape
# 如果指定了img_size则调整尺寸
if img_size is not None:
width, height = img_size
# 定义视频编码和创建VideoWriter对象
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 可以更改为其他编码器
video = cv2.VideoWriter(output_file, fourcc, frame_rate, (width, height))
for image in images:
img = cv2.imread(os.path.join(folder_path, image))
if img_size is not None:
img = cv2.resize(img, img_size)
video.write(img)
cv2.destroyAllWindows()
video.release()
# 使用示例
folder_path = 'output/editing_render' # 替换为您的图片文件夹路径
output_file = 'output_video.mp4' # 输出视频文件名
create_video_from_images(folder_path, output_file)

157
train.py
View File

@ -10,7 +10,7 @@
# #
import numpy as np import numpy as np
import random import random
import os import os, sys
import torch import torch
from random import randint from random import randint
from utils.loss_utils import l1_loss, ssim, l2_loss, lpips_loss from utils.loss_utils import l1_loss, ssim, l2_loss, lpips_loss
@ -25,10 +25,12 @@ from argparse import ArgumentParser, Namespace
from arguments import ModelParams, PipelineParams, OptimizationParams, ModelHiddenParams from arguments import ModelParams, PipelineParams, OptimizationParams, ModelHiddenParams
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from utils.timer import Timer from utils.timer import Timer
from utils.loader_utils import FineSampler, get_stamp_list
import lpips import lpips
from utils.scene_utils import render_training_image from utils.scene_utils import render_training_image
from time import time from time import time
import copy
to8b = lambda x : (255*np.clip(x.cpu().numpy(),0,1)).astype(np.uint8) to8b = lambda x : (255*np.clip(x.cpu().numpy(),0,1)).astype(np.uint8)
try: try:
@ -60,8 +62,41 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
progress_bar = tqdm(range(first_iter, final_iter), desc="Training progress") progress_bar = tqdm(range(first_iter, final_iter), desc="Training progress")
first_iter += 1 first_iter += 1
lpips_model = lpips.LPIPS(net="alex").cuda() # lpips_model = lpips.LPIPS(net="alex").cuda()
video_cams = scene.getVideoCameras() video_cams = scene.getVideoCameras()
test_cams = scene.getTestCameras()
train_cams = scene.getTrainCameras()
if not viewpoint_stack and not opt.dataloader:
# dnerf's branch
viewpoint_stack = [i for i in train_cams]
temp_list = copy.deepcopy(viewpoint_stack)
#
batch_size = opt.batch_size
print("data loading done")
if opt.dataloader:
viewpoint_stack = scene.getTrainCameras()
if opt.custom_sampler is not None:
sampler = FineSampler(viewpoint_stack)
viewpoint_stack_loader = DataLoader(viewpoint_stack, batch_size=batch_size,sampler=sampler,num_workers=32,collate_fn=list)
random_loader = False
else:
viewpoint_stack_loader = DataLoader(viewpoint_stack, batch_size=batch_size,shuffle=True,num_workers=32,collate_fn=list)
random_loader = True
loader = iter(viewpoint_stack_loader)
# dynerf, zerostamp_init
# breakpoint()
if stage == "coarse" and opt.zerostamp_init:
load_in_memory = True
# batch_size = 4
temp_list = get_stamp_list(viewpoint_stack,0)
viewpoint_stack = temp_list.copy()
else:
load_in_memory = False
for iteration in range(first_iter, final_iter+1): for iteration in range(first_iter, final_iter+1):
if network_gui.conn == None: if network_gui.conn == None:
network_gui.try_connect() network_gui.try_connect()
@ -70,7 +105,7 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
net_image_bytes = None net_image_bytes = None
custom_cam, do_training, pipe.convert_SHs_python, pipe.compute_cov3D_python, keep_alive, scaling_modifer, ts = network_gui.receive() custom_cam, do_training, pipe.convert_SHs_python, pipe.compute_cov3D_python, keep_alive, scaling_modifer, ts = network_gui.receive()
if custom_cam != None: if custom_cam != None:
net_image = render(custom_cam, gaussians, pipe, background, scaling_modifer, stage="stage")["render"] net_image = render(custom_cam, gaussians, pipe, background, scaling_modifer, stage=stage, cam_type=scene.dataset_type)["render"]
net_image_bytes = memoryview((torch.clamp(net_image, min=0, max=1.0) * 255).byte().permute(1, 2, 0).contiguous().cpu().numpy()) net_image_bytes = memoryview((torch.clamp(net_image, min=0, max=1.0) * 255).byte().permute(1, 2, 0).contiguous().cpu().numpy())
network_gui.send(net_image_bytes, dataset.source_path) network_gui.send(net_image_bytes, dataset.source_path)
if do_training and ((iteration < int(opt.iterations)) or not keep_alive): if do_training and ((iteration < int(opt.iterations)) or not keep_alive):
@ -87,22 +122,33 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
gaussians.oneupSHdegree() gaussians.oneupSHdegree()
# Pick a random Camera # Pick a random Camera
if not viewpoint_stack:
viewpoint_stack = scene.getTrainCameras() # dynerf's branch
batch_size = opt.batch_size if opt.dataloader and not load_in_memory:
viewpoint_stack_loader = DataLoader(viewpoint_stack, batch_size=batch_size,shuffle=True,num_workers=32,collate_fn=list)
loader = iter(viewpoint_stack_loader)
if opt.dataloader:
try: try:
viewpoint_cams = next(loader) viewpoint_cams = next(loader)
except StopIteration: except StopIteration:
print("reset dataloader") print("reset dataloader into random dataloader.")
batch_size = opt.batch_size if not random_loader:
viewpoint_stack_loader = DataLoader(viewpoint_stack, batch_size=opt.batch_size,shuffle=True,num_workers=32,collate_fn=list)
random_loader = True
loader = iter(viewpoint_stack_loader) loader = iter(viewpoint_stack_loader)
else:
idx = randint(0, len(viewpoint_stack)-1)
viewpoint_cams = [viewpoint_stack[idx]]
else:
idx = 0
viewpoint_cams = []
while idx < batch_size :
viewpoint_cam = viewpoint_stack.pop(randint(0,len(viewpoint_stack)-1))
if not viewpoint_stack :
viewpoint_stack = temp_list.copy()
viewpoint_cams.append(viewpoint_cam)
idx +=1
if len(viewpoint_cams) == 0:
continue
# print(len(viewpoint_cams))
# breakpoint()
# Render # Render
if (iteration - 1) == debug_from: if (iteration - 1) == debug_from:
pipe.debug = True pipe.debug = True
@ -112,10 +158,14 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
visibility_filter_list = [] visibility_filter_list = []
viewspace_point_tensor_list = [] viewspace_point_tensor_list = []
for viewpoint_cam in viewpoint_cams: for viewpoint_cam in viewpoint_cams:
render_pkg = render(viewpoint_cam, gaussians, pipe, background, stage=stage) render_pkg = render(viewpoint_cam, gaussians, pipe, background, stage=stage,cam_type=scene.dataset_type)
image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"] image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"]
images.append(image.unsqueeze(0)) images.append(image.unsqueeze(0))
gt_image = viewpoint_cam.original_image.cuda() if scene.dataset_type!="PanopticSports":
gt_image = viewpoint_cam.original_image.cuda()
else:
gt_image = viewpoint_cam['image'].cuda()
gt_images.append(gt_image.unsqueeze(0)) gt_images.append(gt_image.unsqueeze(0))
radii_list.append(radii.unsqueeze(0)) radii_list.append(radii.unsqueeze(0))
visibility_filter_list.append(visibility_filter.unsqueeze(0)) visibility_filter_list.append(visibility_filter.unsqueeze(0))
@ -127,8 +177,8 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
image_tensor = torch.cat(images,0) image_tensor = torch.cat(images,0)
gt_image_tensor = torch.cat(gt_images,0) gt_image_tensor = torch.cat(gt_images,0)
# Loss # Loss
Ll1 = l1_loss(image_tensor, gt_image_tensor) # breakpoint()
# Ll1 = l2_loss(image, gt_image) Ll1 = l1_loss(image_tensor, gt_image_tensor[:,:3,:,:])
psnr_ = psnr(image_tensor, gt_image_tensor).mean().double() psnr_ = psnr(image_tensor, gt_image_tensor).mean().double()
# norm # norm
@ -137,16 +187,19 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
loss = Ll1 loss = Ll1
if stage == "fine" and hyper.time_smoothness_weight != 0: if stage == "fine" and hyper.time_smoothness_weight != 0:
# tv_loss = 0 # tv_loss = 0
tv_loss = gaussians.compute_regulation(hyper.time_smoothness_weight, hyper.plane_tv_weight, hyper.l1_time_planes) tv_loss = gaussians.compute_regulation(hyper.time_smoothness_weight, hyper.l1_time_planes, hyper.plane_tv_weight)
loss += tv_loss loss += tv_loss
if opt.lambda_dssim != 0: if opt.lambda_dssim != 0:
ssim_loss = ssim(image_tensor,gt_image_tensor) ssim_loss = ssim(image_tensor,gt_image_tensor)
loss += opt.lambda_dssim * (1.0-ssim_loss) loss += opt.lambda_dssim * (1.0-ssim_loss)
if opt.lambda_lpips !=0: # if opt.lambda_lpips !=0:
lpipsloss = lpips_loss(image_tensor,gt_image_tensor,lpips_model) # lpipsloss = lpips_loss(image_tensor,gt_image_tensor,lpips_model)
loss += opt.lambda_lpips * lpipsloss # loss += opt.lambda_lpips * lpipsloss
loss.backward() loss.backward()
if torch.isnan(loss).any():
print("loss is nan,end training, reexecv program now.")
os.execv(sys.executable, [sys.executable] + sys.argv)
viewspace_point_tensor_grad = torch.zeros_like(viewspace_point_tensor) viewspace_point_tensor_grad = torch.zeros_like(viewspace_point_tensor)
for idx in range(0, len(viewspace_point_tensor_list)): for idx in range(0, len(viewspace_point_tensor_list)):
viewspace_point_tensor_grad = viewspace_point_tensor_grad + viewspace_point_tensor_list[idx].grad viewspace_point_tensor_grad = viewspace_point_tensor_grad + viewspace_point_tensor_list[idx].grad
@ -167,17 +220,19 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
# Log and save # Log and save
timer.pause() timer.pause()
training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, [pipe, background], stage) training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, [pipe, background], stage, scene.dataset_type)
if (iteration in saving_iterations): if (iteration in saving_iterations):
print("\n[ITER {}] Saving Gaussians".format(iteration)) print("\n[ITER {}] Saving Gaussians".format(iteration))
scene.save(iteration, stage) scene.save(iteration, stage)
if dataset.render_process: if dataset.render_process:
if (iteration < 1000 and iteration % 10 == 1) \ if (iteration < 1000 and iteration % 10 == 9) \
or (iteration < 3000 and iteration % 50 == 1) \ or (iteration < 3000 and iteration % 50 == 49) \
or (iteration < 10000 and iteration % 100 == 1) \ or (iteration < 60000 and iteration % 100 == 99) :
or (iteration < 60000 and iteration % 100 ==1): # breakpoint()
render_training_image(scene, gaussians, [test_cams[iteration%len(test_cams)]], render, pipe, background, stage+"test", iteration,timer.get_elapsed_time(),scene.dataset_type)
render_training_image(scene, gaussians, [train_cams[iteration%len(train_cams)]], render, pipe, background, stage+"train", iteration,timer.get_elapsed_time(),scene.dataset_type)
# render_training_image(scene, gaussians, train_cams, render, pipe, background, stage+"train", iteration,timer.get_elapsed_time(),scene.dataset_type)
render_training_image(scene, gaussians, video_cams, render, pipe, background, stage, iteration-1,timer.get_elapsed_time())
# total_images.append(to8b(temp_image).transpose(1,2,0)) # total_images.append(to8b(temp_image).transpose(1,2,0))
timer.start() timer.start()
# Densification # Densification
@ -192,20 +247,24 @@ def scene_reconstruction(dataset, opt, hyper, pipe, testing_iterations, saving_i
else: else:
opacity_threshold = opt.opacity_threshold_fine_init - iteration*(opt.opacity_threshold_fine_init - opt.opacity_threshold_fine_after)/(opt.densify_until_iter) opacity_threshold = opt.opacity_threshold_fine_init - iteration*(opt.opacity_threshold_fine_init - opt.opacity_threshold_fine_after)/(opt.densify_until_iter)
densify_threshold = opt.densify_grad_threshold_fine_init - iteration*(opt.densify_grad_threshold_fine_init - opt.densify_grad_threshold_after)/(opt.densify_until_iter ) densify_threshold = opt.densify_grad_threshold_fine_init - iteration*(opt.densify_grad_threshold_fine_init - opt.densify_grad_threshold_after)/(opt.densify_until_iter )
if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0 and gaussians.get_xyz.shape[0]<360000:
if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0 :
size_threshold = 20 if iteration > opt.opacity_reset_interval else None size_threshold = 20 if iteration > opt.opacity_reset_interval else None
gaussians.densify(densify_threshold, opacity_threshold, scene.cameras_extent, size_threshold) gaussians.densify(densify_threshold, opacity_threshold, scene.cameras_extent, size_threshold, 5, 5, scene.model_path, iteration, stage)
if iteration > opt.pruning_from_iter and iteration % opt.pruning_interval == 0: if iteration > opt.pruning_from_iter and iteration % opt.pruning_interval == 0 and gaussians.get_xyz.shape[0]>200000:
size_threshold = 20 if iteration > opt.opacity_reset_interval else None size_threshold = 20 if iteration > opt.opacity_reset_interval else None
gaussians.prune(densify_threshold, opacity_threshold, scene.cameras_extent, size_threshold) gaussians.prune(densify_threshold, opacity_threshold, scene.cameras_extent, size_threshold)
if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter): # if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0 :
if iteration % opt.densification_interval == 0 and gaussians.get_xyz.shape[0]<360000 and opt.add_point:
gaussians.grow(5,5,scene.model_path,iteration,stage)
# torch.cuda.empty_cache()
if iteration % opt.opacity_reset_interval == 0:
print("reset opacity") print("reset opacity")
gaussians.reset_opacity() gaussians.reset_opacity()
# Optimizer step # Optimizer step
if iteration < opt.iterations: if iteration < opt.iterations:
@ -253,7 +312,7 @@ def prepare_output_and_logger(expname):
print("Tensorboard not available: not logging progress") print("Tensorboard not available: not logging progress")
return tb_writer return tb_writer
def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, stage): def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_iterations, scene : Scene, renderFunc, renderArgs, stage, dataset_type):
if tb_writer: if tb_writer:
tb_writer.add_scalar(f'{stage}/train_loss_patches/l1_loss', Ll1.item(), iteration) tb_writer.add_scalar(f'{stage}/train_loss_patches/l1_loss', Ll1.item(), iteration)
tb_writer.add_scalar(f'{stage}/train_loss_patchestotal_loss', loss.item(), iteration) tb_writer.add_scalar(f'{stage}/train_loss_patchestotal_loss', loss.item(), iteration)
@ -263,6 +322,7 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i
# Report test and samples of training set # Report test and samples of training set
if iteration in testing_iterations: if iteration in testing_iterations:
torch.cuda.empty_cache() torch.cuda.empty_cache()
#
validation_configs = ({'name': 'test', 'cameras' : [scene.getTestCameras()[idx % len(scene.getTestCameras())] for idx in range(10, 5000, 299)]}, validation_configs = ({'name': 'test', 'cameras' : [scene.getTestCameras()[idx % len(scene.getTestCameras())] for idx in range(10, 5000, 299)]},
{'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(10, 5000, 299)]}) {'name': 'train', 'cameras' : [scene.getTrainCameras()[idx % len(scene.getTrainCameras())] for idx in range(10, 5000, 299)]})
@ -271,17 +331,26 @@ def training_report(tb_writer, iteration, Ll1, loss, l1_loss, elapsed, testing_i
l1_test = 0.0 l1_test = 0.0
psnr_test = 0.0 psnr_test = 0.0
for idx, viewpoint in enumerate(config['cameras']): for idx, viewpoint in enumerate(config['cameras']):
image = torch.clamp(renderFunc(viewpoint, scene.gaussians,stage=stage, *renderArgs)["render"], 0.0, 1.0) image = torch.clamp(renderFunc(viewpoint, scene.gaussians,stage=stage, cam_type=dataset_type, *renderArgs)["render"], 0.0, 1.0)
gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0) if dataset_type == "PanopticSports":
if tb_writer and (idx < 5): gt_image = torch.clamp(viewpoint["image"].to("cuda"), 0.0, 1.0)
tb_writer.add_images(stage + "/"+config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration) else:
if iteration == testing_iterations[0]: gt_image = torch.clamp(viewpoint.original_image.to("cuda"), 0.0, 1.0)
tb_writer.add_images(stage + "/"+config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration) try:
if tb_writer and (idx < 5):
tb_writer.add_images(stage + "/"+config['name'] + "_view_{}/render".format(viewpoint.image_name), image[None], global_step=iteration)
if iteration == testing_iterations[0]:
tb_writer.add_images(stage + "/"+config['name'] + "_view_{}/ground_truth".format(viewpoint.image_name), gt_image[None], global_step=iteration)
except:
pass
l1_test += l1_loss(image, gt_image).mean().double() l1_test += l1_loss(image, gt_image).mean().double()
psnr_test += psnr(image, gt_image).mean().double() # mask=viewpoint.mask
psnr_test += psnr(image, gt_image, mask=None).mean().double()
psnr_test /= len(config['cameras']) psnr_test /= len(config['cameras'])
l1_test /= len(config['cameras']) l1_test /= len(config['cameras'])
print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test)) print("\n[ITER {}] Evaluating {}: L1 {} PSNR {}".format(iteration, config['name'], l1_test, psnr_test))
# print("sh feature",scene.gaussians.get_features.shape)
if tb_writer: if tb_writer:
tb_writer.add_scalar(stage + "/"+config['name'] + '/loss_viewpoint - l1_loss', l1_test, iteration) tb_writer.add_scalar(stage + "/"+config['name'] + '/loss_viewpoint - l1_loss', l1_test, iteration)
tb_writer.add_scalar(stage+"/"+config['name'] + '/loss_viewpoint - psnr', psnr_test, iteration) tb_writer.add_scalar(stage+"/"+config['name'] + '/loss_viewpoint - psnr', psnr_test, iteration)
@ -314,8 +383,8 @@ if __name__ == "__main__":
parser.add_argument('--port', type=int, default=6009) parser.add_argument('--port', type=int, default=6009)
parser.add_argument('--debug_from', type=int, default=-1) parser.add_argument('--debug_from', type=int, default=-1)
parser.add_argument('--detect_anomaly', action='store_true', default=False) parser.add_argument('--detect_anomaly', action='store_true', default=False)
parser.add_argument("--test_iterations", nargs="+", type=int, default=[i*500 for i in range(0,120)]) parser.add_argument("--test_iterations", nargs="+", type=int, default=[500*i for i in range(100)])
parser.add_argument("--save_iterations", nargs="+", type=int, default=[2000, 3000, 7_000, 8000, 9000, 14000, 20000, 30_000,45000,60000]) parser.add_argument("--save_iterations", nargs="+", type=int, default=[1000, 3000, 4000, 5000, 6000, 7_000, 9000, 10000, 12000, 14000, 20000, 30_000, 45000, 60000])
parser.add_argument("--quiet", action="store_true") parser.add_argument("--quiet", action="store_true")
parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[]) parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[])
parser.add_argument("--start_checkpoint", type=str, default = None) parser.add_argument("--start_checkpoint", type=str, default = None)

View File

@ -23,7 +23,10 @@ def PILtoTorch(pil_image, resolution):
resized_image_PIL = pil_image.resize(resolution) resized_image_PIL = pil_image.resize(resolution)
else: else:
resized_image_PIL = pil_image resized_image_PIL = pil_image
resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0 if np.array(resized_image_PIL).max()!=1:
resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
else:
resized_image = torch.from_numpy(np.array(resized_image_PIL))
if len(resized_image.shape) == 3: if len(resized_image.shape) == 3:
return resized_image.permute(2, 0, 1) return resized_image.permute(2, 0, 1)
else: else:

View File

@ -74,4 +74,59 @@ def fov2focal(fov, pixels):
return pixels / (2 * math.tan(fov / 2)) return pixels / (2 * math.tan(fov / 2))
def focal2fov(focal, pixels): def focal2fov(focal, pixels):
return 2*math.atan(pixels/(2*focal)) return 2*math.atan(pixels/(2*focal))
def apply_rotation(q1, q2):
"""
Applies a rotation to a quaternion.
Parameters:
q1 (Tensor): The original quaternion.
q2 (Tensor): The rotation quaternion to be applied.
Returns:
Tensor: The resulting quaternion after applying the rotation.
"""
# Extract components for readability
w1, x1, y1, z1 = q1
w2, x2, y2, z2 = q2
# Compute the product of the two quaternions
w3 = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
x3 = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
y3 = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
z3 = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
# Combine the components into a new quaternion tensor
q3 = torch.tensor([w3, x3, y3, z3])
# Normalize the resulting quaternion
q3_normalized = q3 / torch.norm(q3)
return q3_normalized
def batch_quaternion_multiply(q1, q2):
"""
Multiply batches of quaternions.
Args:
- q1 (torch.Tensor): A tensor of shape [N, 4] representing the first batch of quaternions.
- q2 (torch.Tensor): A tensor of shape [N, 4] representing the second batch of quaternions.
Returns:
- torch.Tensor: The resulting batch of quaternions after applying the rotation.
"""
# Calculate the product of each quaternion in the batch
w = q1[:, 0] * q2[:, 0] - q1[:, 1] * q2[:, 1] - q1[:, 2] * q2[:, 2] - q1[:, 3] * q2[:, 3]
x = q1[:, 0] * q2[:, 1] + q1[:, 1] * q2[:, 0] + q1[:, 2] * q2[:, 3] - q1[:, 3] * q2[:, 2]
y = q1[:, 0] * q2[:, 2] - q1[:, 1] * q2[:, 3] + q1[:, 2] * q2[:, 0] + q1[:, 3] * q2[:, 1]
z = q1[:, 0] * q2[:, 3] + q1[:, 1] * q2[:, 2] - q1[:, 2] * q2[:, 1] + q1[:, 3] * q2[:, 0]
# Combine into new quaternions
q3 = torch.stack((w, x, y, z), dim=1)
# Normalize the quaternions
norm_q3 = q3 / torch.norm(q3, dim=1, keepdim=True)
return norm_q3

View File

@ -14,6 +14,25 @@ import torch
def mse(img1, img2): def mse(img1, img2):
return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
@torch.no_grad() @torch.no_grad()
def psnr(img1, img2): def psnr(img1, img2, mask=None):
mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True) if mask is not None:
return 20 * torch.log10(1.0 / torch.sqrt(mse)) img1 = img1.flatten(1)
img2 = img2.flatten(1)
mask = mask.flatten(1).repeat(3,1)
mask = torch.where(mask!=0,True,False)
img1 = img1[mask]
img2 = img2[mask]
mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
else:
mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
psnr = 20 * torch.log10(1.0 / torch.sqrt(mse.float()))
if mask is not None:
if torch.isinf(psnr).any():
print(mse.mean(),psnr.mean())
psnr = 20 * torch.log10(1.0 / torch.sqrt(mse.float()))
psnr = psnr[~torch.isinf(psnr)]
return psnr

52
utils/loader_utils.py Normal file
View File

@ -0,0 +1,52 @@
import os
import cv2
import random
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import Sampler
from torchvision import transforms, utils
import random
def get_stamp_list(dataset, timestamp):
frame_length = int(len(dataset)/len(dataset.dataset.poses))
# print(frame_length)
if timestamp > frame_length:
raise IndexError("input timestamp bigger than total timestamp.")
print("select index:",[i*frame_length+timestamp for i in range(len(dataset.dataset.poses))])
return [dataset[i*frame_length+timestamp] for i in range(len(dataset.dataset.poses))]
class FineSampler(Sampler):
def __init__(self, dataset):
self.len_dataset = len(dataset)
self.len_pose = len(dataset.dataset.poses)
self.frame_length = int(self.len_dataset/ self.len_pose)
sample_list = []
for i in range(self.frame_length):
for j in range(4):
idx = torch.randperm(self.len_pose) *self.frame_length + i
# print(idx)
# breakpoint()
now_list = []
cnt = 0
for item in idx.tolist():
now_list.append(item)
cnt+=1
if cnt % 2 == 0 and len(sample_list)>2:
select_element = [x for x in random.sample(sample_list,2)]
now_list += select_element
sample_list += now_list
self.sample_list = sample_list
# print(self.sample_list)
# breakpoint()
print("one epoch containing:",len(self.sample_list))
def __iter__(self):
return iter(self.sample_list)
def __len__(self):
return len(self.sample_list)

196
utils/point_utils.py Normal file
View File

@ -0,0 +1,196 @@
import torch
import open3d as o3d
from torch.utils.data import TensorDataset, random_split
from tqdm import tqdm
import open3d as o3d
import numpy as np
from torch_cluster import grid_cluster
def voxel_down_sample_custom(points, voxel_size):
# 将点云归一化到体素网格
voxel_grid = torch.floor(points / voxel_size)
# 找到唯一的体素,并获取它们在原始体素网格中的索引
unique_voxels, inverse_indices = torch.unique(voxel_grid, dim=0, return_inverse=True)
# 创建一个新的点云,其中每个点是其对应体素中所有点的平均值
new_points = torch.zeros_like(unique_voxels)
new_points_count = torch.zeros(unique_voxels.size(0), dtype=torch.long)
# for i in tqdm(range(points.size(0))):
new_points[inverse_indices] = points
# new_points_count[inverse_indices[i]] += 1
# new_points /= new_points_count.unsqueeze(-1)
return new_points, inverse_indices
def downsample_point_cloud(points, ratio):
# 创建一个TensorDataset
dataset = TensorDataset(points)
# 计算下采样后的点的数量
num_points = len(dataset)
num_downsampled_points = int(num_points * ratio)
# 使用random_split进行下采样
downsampled_dataset, _ = random_split(dataset, [num_downsampled_points, num_points - num_downsampled_points])
# 获取下采样后的点的index和点云矩阵
indices = torch.tensor([i for i, _ in enumerate(downsampled_dataset)])
downsampled_points = torch.stack([x for x, in downsampled_dataset])
return indices, downsampled_points
def downsample_point_cloud_open3d(points, voxel_size):
# 创建一个点云对象
downsampled_pcd, inverse_indices = voxel_down_sample_custom(points, voxel_size)
downsampled_points = downsampled_pcd
# 获取下采样后的点云矩阵
return torch.tensor(downsampled_points)
def downsample_point_cloud_cluster(points, voxel_size):
# 创建一个点云对象
cluster = grid_cluster(points, size=torch.tensor([1,1,1]))
# 获取下采样后的点云矩阵
# downsampled_points = np.asarray(downsampled_pcd.points)
return cluster, points
import torch
from sklearn.neighbors import NearestNeighbors
def upsample_point_cloud(points, density_threshold, displacement_scale, iter_pass):
# 计算每个点的密度
# breakpoint()
try:
nbrs = NearestNeighbors(n_neighbors=2+iter_pass, algorithm='ball_tree').fit(points)
distances, indices = nbrs.kneighbors(points)
except:
print("no point added")
return points, torch.tensor([]), torch.tensor([]), torch.zeros((points.shape[0]), dtype=torch.bool)
# 找出密度低的点
low_density_points = points[distances[:,1] > density_threshold]
low_density_index = distances[:,1] > density_threshold
low_density_index = torch.from_numpy(low_density_index)
# 复制这些点并添加随机位移
num_points = low_density_points.shape[0]
displacements = torch.randn(num_points, 3) * displacement_scale
new_points = low_density_points + displacements
# 返回新的点云矩阵
return points, low_density_points, new_points, low_density_index
def visualize_point_cloud(points, low_density_points, new_points):
# 创建一个点云对象
pcd = o3d.geometry.PointCloud()
# 给被选中的点云添加一个小的偏移量
low_density_points += 0.01
# 将所有的点合并到一起
all_points = np.concatenate([points, low_density_points, new_points], axis=0)
pcd.points = o3d.utility.Vector3dVector(all_points)
# 创建颜色数组
colors = np.zeros((all_points.shape[0], 3))
colors[:points.shape[0]] = [0, 0, 0] # 黑色表示初始化的点云
colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] # 红色表示被选中的点云
colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] # 绿色表示增长的点云
pcd.colors = o3d.utility.Vector3dVector(colors)
# 显示点云
o3d.visualization.draw_geometries([pcd])
def combine_pointcloud(points, low_density_points, new_points):
pcd = o3d.geometry.PointCloud()
# 给被选中的点云添加一个小的偏移量
low_density_points += 0.01
new_points -= 0.01
# 将所有的点合并到一起
all_points = np.concatenate([points, low_density_points, new_points], axis=0)
pcd.points = o3d.utility.Vector3dVector(all_points)
# 创建颜色数组
colors = np.zeros((all_points.shape[0], 3))
colors[:points.shape[0]] = [0, 0, 0] # 黑色表示初始化的点云
colors[points.shape[0]:points.shape[0]+low_density_points.shape[0]] = [1, 0, 0] # 红色表示被选中的点云
colors[points.shape[0]+low_density_points.shape[0]:] = [0, 1, 0] # 绿色表示增长的点云
pcd.colors = o3d.utility.Vector3dVector(colors)
return pcd
def addpoint(point_cloud,density_threshold,displacement_scale, iter_pass,):
# density_threshold: 密度的阈值,越大能筛选出越稀疏的点。
# displacement_scale: 在以displacement_scale的圆心内随机生成点
points, low_density_points, new_points, low_density_index = upsample_point_cloud(point_cloud,density_threshold,displacement_scale, iter_pass)
# breakpoint()
# breakpoint()
print("low_density_points",low_density_points.shape[0])
return point_cloud, low_density_points, new_points, low_density_index
def find_point_indices(origin_point, goal_point):
indices = torch.nonzero((origin_point[:, None] == goal_point).all(-1), as_tuple=True)[0]
return indices
def find_indices_in_A(A, B):
"""
找出子集矩阵 B 中每个点在点云矩阵 A 中的索引 u
参数:
A (torch.Tensor): 点云矩阵 A大小为 [N, 3]
B (torch.Tensor): 子集矩阵 B大小为 [M, 3]
返回:
torch.Tensor: 包含 B 中每个点在 A 中的索引 u 的张量形状为 (M,)
"""
is_equal = torch.eq(B.view(1, -1, 3), A.view(-1, 1, 3))
u_indices = torch.nonzero(is_equal, as_tuple=False)[:, 0]
return torch.unique(u_indices)
if __name__ =="__main__":
#
from time import time
pass_=0
# filename=f"pointcloud/pass_{pass_}.ply"
filename = "point_cloud.ply"
pcd = o3d.io.read_point_cloud(filename)
point_cloud = torch.tensor(pcd.points)
voxel_size = 8
density_threshold=20
displacement_scale=5
for i in range(pass_+1, 50):
print("pass ",i)
time0 = time()
point_downsample = point_cloud
flag = False
while point_downsample.shape[0]>1000:
if flag:
voxel_size+=8
print("point size:",point_downsample.shape[0])
point_downsample = downsample_point_cloud_open3d(point_cloud,voxel_size=voxel_size)
flag = True
print("point size:",point_downsample.shape[0])
# downsampled_point_index = find_point_indices(point_cloud, point_downsample)
downsampled_point_index = find_indices_in_A(point_cloud, point_downsample)
print("selected_num",point_cloud[downsampled_point_index].shape[0])
_, low_density_points, new_points, low_density_index = addpoint(point_cloud[downsampled_point_index],density_threshold=density_threshold,displacement_scale=displacement_scale,iter_pass=0)
if new_points.shape[0] < 100:
density_threshold /= 2
displacement_scale /= 2
print("reduce diplacement_scale to: ",displacement_scale)
global_mask = torch.zeros((point_cloud.shape[0]), dtype=torch.bool)
global_mask[downsampled_point_index] = low_density_index
time1 = time()
print("time cost:",time1-time0,"new_points:",new_points.shape[0])
if low_density_points.shape[0] == 0:
print("no more points.")
continue
# breakpoint()
point = combine_pointcloud(point_cloud, low_density_points, new_points)
point_cloud = torch.tensor(point.points)
o3d.io.write_point_cloud(f"pointcloud/pass_{i}.ply",point)
# visualize_qpoint_cloud( point_cloud, low_density_points, new_points)

91
utils/pose_utils.py Normal file
View File

@ -0,0 +1,91 @@
import numpy as np
from scipy.spatial.transform import Rotation as R
from scene.utils import Camera
from copy import deepcopy
def rotation_matrix_to_quaternion(rotation_matrix):
"""将旋转矩阵转换为四元数"""
return R.from_matrix(rotation_matrix).as_quat()
def quaternion_to_rotation_matrix(quat):
"""将四元数转换为旋转矩阵"""
return R.from_quat(quat).as_matrix()
def quaternion_slerp(q1, q2, t):
"""在两个四元数之间进行球面线性插值SLERP"""
# 计算两个四元数之间的点积
dot = np.dot(q1, q2)
# 如果点积为负,取反一个四元数以保证最短路径插值
if dot < 0.0:
q1 = -q1
dot = -dot
# 防止数值误差导致的问题
dot = np.clip(dot, -1.0, 1.0)
# 计算插值参数
theta = np.arccos(dot) * t
q3 = q2 - q1 * dot
q3 = q3 / np.linalg.norm(q3)
# 计算插值结果
return np.cos(theta) * q1 + np.sin(theta) * q3
def bezier_interpolation(p1, p2, t):
"""在两点之间使用贝塞尔曲线进行插值"""
return (1 - t) * p1 + t * p2
def linear_interpolation(v1, v2, t):
"""线性插值"""
return (1 - t) * v1 + t * v2
def smooth_camera_poses(cameras, num_interpolations=5):
"""对一系列相机位姿进行平滑处理,通过在每对位姿之间插入额外的位姿"""
smoothed_cameras = []
smoothed_times = []
total_poses = len(cameras) - 1 + (len(cameras) - 1) * num_interpolations
time_increment = 10 / total_poses
for i in range(len(cameras) - 1):
cam1 = cameras[i]
cam2 = cameras[i + 1]
# 将旋转矩阵转换为四元数
quat1 = rotation_matrix_to_quaternion(cam1.orientation)
quat2 = rotation_matrix_to_quaternion(cam2.orientation)
for j in range(num_interpolations + 1):
t = j / (num_interpolations + 1)
# 插值方向
interp_orientation_quat = quaternion_slerp(quat1, quat2, t)
interp_orientation_matrix = quaternion_to_rotation_matrix(interp_orientation_quat)
# 插值位置
interp_position = linear_interpolation(cam1.position, cam2.position, t)
# 计算插值时间戳
interp_time = i*10 / (len(cameras) - 1) + time_increment * j
# 添加新的相机位姿和时间戳
newcam = deepcopy(cam1)
newcam.orientation = interp_orientation_matrix
newcam.position = interp_position
smoothed_cameras.append(newcam)
smoothed_times.append(interp_time)
# 添加最后一个原始位姿和时间戳
smoothed_cameras.append(cameras[-1])
smoothed_times.append(1.0)
print(smoothed_times)
return smoothed_cameras, smoothed_times
# # 示例:使用两个相机位姿
# cam1 = Camera(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]), np.array([0, 0, 0]))
# cam2 = Camera(np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]), np.array([1, 1, 1]))
# # 应用平滑处理
# smoothed_cameras = smooth_camera_poses([cam1, cam2], num_interpolations=5)
# # 打印结果
# for cam in smoothed_cameras:
# print("Orientation:\n", cam.orientation)
# print("Position:", cam.position)

25
utils/render_utils.py Normal file
View File

@ -0,0 +1,25 @@
import torch
@torch.no_grad()
def get_state_at_time(pc,viewpoint_camera):
means3D = pc.get_xyz
time = torch.tensor(viewpoint_camera.time).to(means3D.device).repeat(means3D.shape[0],1)
opacity = pc._opacity
shs = pc.get_features
# If precomputed 3d covariance is provided, use it. If not, then it will be computed from
# scaling / rotation by the rasterizer.
scales = pc._scaling
rotations = pc._rotation
cov3D_precomp = None
# time0 = get_time()
# means3D_deform, scales_deform, rotations_deform, opacity_deform = pc._deformation(means3D[deformation_point], scales[deformation_point],
# rotations[deformation_point], opacity[deformation_point],
# time[deformation_point])
means3D_final, scales_final, rotations_final, opacity_final, shs_final = pc._deformation(means3D, scales,
rotations, opacity, shs,
time)
scales_final = pc.scaling_activation(scales_final)
rotations_final = pc.rotation_activation(rotations_final)
opacity = pc.opacity_activation(opacity_final)
return means3D_final, scales_final, rotations_final, opacity, shs

View File

@ -8,10 +8,10 @@ import numpy as np
import copy import copy
@torch.no_grad() @torch.no_grad()
def render_training_image(scene, gaussians, viewpoints, render_func, pipe, background, stage, iteration, time_now): def render_training_image(scene, gaussians, viewpoints, render_func, pipe, background, stage, iteration, time_now, dataset_type):
def render(gaussians, viewpoint, path, scaling): def render(gaussians, viewpoint, path, scaling, cam_type):
# scaling_copy = gaussians._scaling # scaling_copy = gaussians._scaling
render_pkg = render_func(viewpoint, gaussians, pipe, background, stage=stage) render_pkg = render_func(viewpoint, gaussians, pipe, background, stage=stage, cam_type=cam_type)
label1 = f"stage:{stage},iter:{iteration}" label1 = f"stage:{stage},iter:{iteration}"
times = time_now/60 times = time_now/60
if times < 1: if times < 1:
@ -21,11 +21,15 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg
label2 = "time:%.2f" % times + end label2 = "time:%.2f" % times + end
image = render_pkg["render"] image = render_pkg["render"]
depth = render_pkg["depth"] depth = render_pkg["depth"]
if dataset_type == "PanopticSports":
gt_np = viewpoint['image'].permute(1,2,0).cpu().numpy()
else:
gt_np = viewpoint.original_image.permute(1,2,0).cpu().numpy()
image_np = image.permute(1, 2, 0).cpu().numpy() # 转换通道顺序为 (H, W, 3) image_np = image.permute(1, 2, 0).cpu().numpy() # 转换通道顺序为 (H, W, 3)
depth_np = depth.permute(1, 2, 0).cpu().numpy() depth_np = depth.permute(1, 2, 0).cpu().numpy()
depth_np /= depth_np.max() depth_np /= depth_np.max()
depth_np = np.repeat(depth_np, 3, axis=2) depth_np = np.repeat(depth_np, 3, axis=2)
image_np = np.concatenate((image_np, depth_np), axis=1) image_np = np.concatenate((gt_np, image_np, depth_np), axis=1)
image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8')) # 转换为8位图像 image_with_labels = Image.fromarray((np.clip(image_np,0,1) * 255).astype('uint8')) # 转换为8位图像
# 创建PIL图像对象的副本以绘制标签 # 创建PIL图像对象的副本以绘制标签
draw1 = ImageDraw.Draw(image_with_labels) draw1 = ImageDraw.Draw(image_with_labels)
@ -59,7 +63,7 @@ def render_training_image(scene, gaussians, viewpoints, render_func, pipe, backg
# point_save_path = os.path.join(point_cloud_path,f"{iteration}.jpg") # point_save_path = os.path.join(point_cloud_path,f"{iteration}.jpg")
for idx in range(len(viewpoints)): for idx in range(len(viewpoints)):
image_save_path = os.path.join(image_path,f"{iteration}_{idx}.jpg") image_save_path = os.path.join(image_path,f"{iteration}_{idx}.jpg")
render(gaussians,viewpoints[idx],image_save_path,scaling = 1) render(gaussians,viewpoints[idx],image_save_path,scaling = 1,cam_type=dataset_type)
# render(gaussians,point_save_path,scaling = 0.1) # render(gaussians,point_save_path,scaling = 0.1)
# 保存带有标签的图像 # 保存带有标签的图像

62
vis_point.py Normal file
View File

@ -0,0 +1,62 @@
import imageio
import numpy as np
import torch
from scene import Scene
import os
import cv2
from tqdm import tqdm
from os import makedirs
from gaussian_renderer import render
import torchvision
from utils.general_utils import safe_state
from argparse import ArgumentParser
from arguments import ModelParams, PipelineParams, get_combined_args, ModelHiddenParams
from gaussian_renderer import GaussianModel
from time import time
import open3d as o3d
# import torch.multiprocessing as mp
import threading
from utils.render_utils import get_state_at_time
import concurrent.futures
def render_sets(dataset : ModelParams, hyperparam, iteration : int, pipeline : PipelineParams, skip_train : bool, skip_test : bool, skip_video: bool):
with torch.no_grad():
gaussians = GaussianModel(dataset.sh_degree, hyperparam)
scene = Scene(dataset, gaussians, load_iteration=iteration, shuffle=False)
bg_color = [1,1,1] if dataset.white_background else [0, 0, 0]
background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
return gaussians, scene
def save_point_cloud(points, model_path, timestamp):
output_path = os.path.join(model_path,"point_pertimestamp")
if not os.path.exists(output_path):
os.makedirs(output_path,exist_ok=True)
points = points.detach().cpu().numpy()
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points)
ply_path = os.path.join(output_path,f"points_{timestamp}.ply")
o3d.io.write_point_cloud(ply_path, pcd)
parser = ArgumentParser(description="Testing script parameters")
model = ModelParams(parser, sentinel=True)
pipeline = PipelineParams(parser)
hyperparam = ModelHiddenParams(parser)
parser.add_argument("--iteration", default=-1, type=int)
parser.add_argument("--skip_train", action="store_true")
parser.add_argument("--skip_test", action="store_true")
parser.add_argument("--quiet", action="store_true")
parser.add_argument("--skip_video", action="store_true")
parser.add_argument("--configs", type=str)
args = get_combined_args(parser)
print("Rendering " , args.model_path)
if args.configs:
import mmcv
from utils.params_utils import merge_hparams
config = mmcv.Config.fromfile(args.configs)
args = merge_hparams(args, config)
# Initialize system state (RNG)
safe_state(args.quiet)
gaussians, scene = render_sets(model.extract(args), hyperparam.extract(args), args.iteration, pipeline.extract(args), args.skip_train, args.skip_test, args.skip_video)
for index, viewpoint in enumerate(scene.getVideoCameras()):
points, scales_final, rotations_final, opacity_final, shs_final = get_state_at_time(gaussians, viewpoint)
save_point_cloud(points, args.model_path, index)

155
weight_visualization.ipynb Normal file

File diff suppressed because one or more lines are too long