diff --git a/.gitignore b/.gitignore
index 1a55b6d..5f6705d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
pixel-nerf/checkpoints
+*.pyc
diff --git a/pixel-nerf/.ipynb_checkpoints/pose_estimation-checkpoint.ipynb b/pixel-nerf/.ipynb_checkpoints/pose_estimation-checkpoint.ipynb
new file mode 100644
index 0000000..3ecd5d5
--- /dev/null
+++ b/pixel-nerf/.ipynb_checkpoints/pose_estimation-checkpoint.ipynb
@@ -0,0 +1,533 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# iNeRF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "import os\n",
+ "\n",
+ "ROOT_DIR = os.getcwd()\n",
+ "sys.path.insert(0, os.path.join(ROOT_DIR, \"src\"))\n",
+ "\n",
+ "import json\n",
+ "import util\n",
+ "import torch\n",
+ "import numpy as np\n",
+ "from model import make_model\n",
+ "from render import NeRFRenderer\n",
+ "import torchvision.transforms as T\n",
+ "import tqdm\n",
+ "import imageio\n",
+ "import cv2\n",
+ "import mediapy as media\n",
+ "import matplotlib.pyplot as plt\n",
+ "from PIL import Image"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Config\n",
+ "\n",
+ "- `input`: the path of the source image for pixelNeRF.\n",
+ "- `target`: the path of the target whose pose we want to estimate.\n",
+ "- `output`: the directory to save rendered output."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config = {\n",
+ " 'input': './input/1.png',\n",
+ " 'target': './input/2.png',\n",
+ " 'output': './pose_estimation'\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualize the input data\n",
+ "\n",
+ "We show both the source image and the target image. Our goal is to 1) use pixelNeRF to generate a NeRF based on the source image and 2) estimate the relative pose between them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "input_image_np = np.array(Image.open(config['input']).convert(\"RGB\"))\n",
+ "target_image_np = np.array(Image.open(config['target']).convert(\"RGB\"))\n",
+ "\n",
+ "media.show_images({\n",
+ " 'Source': input_image_np,\n",
+ " 'Target': target_image_np\n",
+ "})"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## PixelNeRF\n",
+ "\n",
+ "Load the pixelNeRF. Make sure the `./checkpoints/srn_car/pixel_nerf_latest` exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "EXPERIMENT NAME: srn_car\n",
+ "* Config file: conf/exp/srn.conf\n",
+ "* Dataset format: srn\n",
+ "* Dataset location: data\n",
+ "Using torchvision resnet34 encoder\n",
+ "Load checkpoints/srn_car/pixel_nerf_latest\n"
+ ]
+ }
+ ],
+ "source": [
+ "def extra_args(parser):\n",
+ " parser.add_argument(\n",
+ " \"--input\",\n",
+ " \"-I\",\n",
+ " type=str,\n",
+ " help=\"Input image to condition on.\",\n",
+ " )\n",
+ " parser.add_argument(\n",
+ " \"--target\",\n",
+ " \"-T\",\n",
+ " type=str,\n",
+ " help=\"Target image to estimate the pose.\",\n",
+ " )\n",
+ " parser.add_argument(\n",
+ " \"--output\",\n",
+ " \"-O\",\n",
+ " type=str,\n",
+ " default=os.path.join(ROOT_DIR, \"pose_estimation\"),\n",
+ " help=\"Output directory\",\n",
+ " )\n",
+ " parser.add_argument(\"--size\", type=int, default=128, help=\"Input image maxdim\")\n",
+ " parser.add_argument(\n",
+ " \"--out_size\",\n",
+ " type=str,\n",
+ " default=\"128\",\n",
+ " help=\"Output image size, either 1 or 2 number (w h)\",\n",
+ " )\n",
+ "\n",
+ " parser.add_argument(\"--focal\", type=float, default=131.25, help=\"Focal length\")\n",
+ " parser.add_argument(\"--radius\", type=float, default=1.3, help=\"Camera distance\")\n",
+ " parser.add_argument(\"--z_near\", type=float, default=0.8)\n",
+ " parser.add_argument(\"--z_far\", type=float, default=1.8)\n",
+ " parser.add_argument(\n",
+ " \"--elevation\",\n",
+ " \"-e\",\n",
+ " type=float,\n",
+ " default=0.0,\n",
+ " help=\"Elevation angle (negative is above)\",\n",
+ " )\n",
+ " parser.add_argument(\n",
+ " \"--num_views\",\n",
+ " type=int,\n",
+ " default=1,\n",
+ " help=\"Number of video frames (rotated views)\",\n",
+ " )\n",
+ " parser.add_argument(\"--fps\", type=int, default=15, help=\"FPS of video\")\n",
+ " parser.add_argument(\"--gif\", action=\"store_true\", help=\"Store gif instead of mp4\")\n",
+ " parser.add_argument(\n",
+ " \"--no_vid\",\n",
+ " action=\"store_true\",\n",
+ " help=\"Do not store video (only image frames will be written)\",\n",
+ " )\n",
+ " parser.add_argument(\"--lrate\", type=float, default=1e-2)\n",
+ " parser.add_argument(\"--n_steps\", type=int, default=500, help=\"Number of steps for pose optimization.\")\n",
+ " return parser\n",
+ "\n",
+ "\n",
+ "args, conf = util.args.parse_args(\n",
+ " extra_args, default_expname=\"srn_car\", default_data_format=\"srn\", jupyter=True\n",
+ ")\n",
+ "args.resume = True\n",
+ "os.makedirs(args.output, exist_ok=True)\n",
+ "\n",
+ "device = util.get_cuda(args.gpu_id[0])\n",
+ "\n",
+ "z_near, z_far = args.z_near, args.z_far\n",
+ "focal = torch.tensor(args.focal, dtype=torch.float32, device=device)\n",
+ "\n",
+ "in_sz = args.size\n",
+ "sz = list(map(int, args.out_size.split()))\n",
+ "if len(sz) == 1:\n",
+ " H = W = sz[0]\n",
+ "else:\n",
+ " assert len(sz) == 2\n",
+ " W, H = sz\n",
+ " \n",
+ "net = make_model(conf[\"model\"]).to(device=device).load_weights(args)\n",
+ "\n",
+ "# Create the renderer.\n",
+ "renderer = NeRFRenderer.from_conf(\n",
+ " conf[\"renderer\"], eval_batch_size=args.ray_batch_size\n",
+ ").to(device=device)\n",
+ "render_par = renderer.bind_parallel(net, args.gpu_id, simple_output=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## iNeRF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Input image: ./input/1.png\n",
+ "Target image: ./input/2.png\n",
+ "Input pose:\n",
+ "tensor([[1.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 1.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 1.0000, 1.3000],\n",
+ " [0.0000, 0.0000, 0.0000, 1.0000]])\n",
+ "Init pose:\n",
+ "tensor([[1.0000, 0.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 1.0000, 0.0000, 0.0000],\n",
+ " [0.0000, 0.0000, 1.0000, 1.3000],\n",
+ " [0.0000, 0.0000, 0.0000, 1.0000]], grad_fn=)\n",
+ "Step 0, loss: 0.08879142999649048\n",
+ "Step 10, loss: 0.04623040556907654\n",
+ "Step 20, loss: 0.016977649182081223\n",
+ "Step 30, loss: 0.012130280956625938\n",
+ "Step 40, loss: 0.01018441841006279\n",
+ "Step 50, loss: 0.009968521073460579\n",
+ "Step 60, loss: 0.010097332298755646\n",
+ "Step 70, loss: 0.010672736912965775\n",
+ "Step 80, loss: 0.009122783318161964\n",
+ "Step 90, loss: 0.008157029747962952\n",
+ "Step 100, loss: 0.008524651639163494\n"
+ ]
+ }
+ ],
+ "source": [
+ "image_to_tensor = util.get_image_to_tensor_balanced()\n",
+ "\n",
+ "# Encoding the input image.\n",
+ "print(f\"Input image: {config['input']}\")\n",
+ "input_image = Image.fromarray(input_image_np)\n",
+ "input_image = T.Resize(in_sz)(input_image)\n",
+ "input_image = image_to_tensor(input_image).to(device=device)\n",
+ "input_pose = torch.eye(4)\n",
+ "input_pose[2, -1] = args.radius\n",
+ "\n",
+ "print(f\"Target image: {config['target']}\")\n",
+ "target_image = Image.fromarray(target_image_np)\n",
+ "target_image = T.Resize(in_sz)(target_image)\n",
+ "target_image_flatten = np.reshape(target_image, [-1, 3]) / 255.0\n",
+ "target_image_flatten = torch.from_numpy(target_image_flatten).float().to(device=device)\n",
+ "\n",
+ "cam_pose = torch.clone(input_pose.detach()).unsqueeze(0)\n",
+ "cam_pose.requires_grad = True\n",
+ "\n",
+ "print(\"Input pose:\")\n",
+ "print(f\"{input_pose}\")\n",
+ "print(\"Init pose:\")\n",
+ "print(f\"{cam_pose[0]}\")\n",
+ "\n",
+ "# Create optimizer.\n",
+ "optimizer = torch.optim.Adam(params=[cam_pose], lr=args.lrate)\n",
+ "n_steps = 100 + 1\n",
+ "\n",
+ "# Loss.\n",
+ "mse_loss = torch.nn.MSELoss()\n",
+ "\n",
+ "# Sampling.\n",
+ "n_rays = 1024\n",
+ "sampling = 'center'\n",
+ "\n",
+ "# Pose optimization.\n",
+ "predicted_poses = []\n",
+ "fine_patches = []\n",
+ "gt_patches = []\n",
+ "\n",
+ "for i_step in range(n_steps):\n",
+ " # Encode.\n",
+ " net.encode(\n",
+ " input_image.unsqueeze(0), input_pose.unsqueeze(0).to(device=device), focal,\n",
+ " )\n",
+ "\n",
+ " render_rays = util.gen_rays(cam_pose, W, H, focal, z_near, z_far)\n",
+ " render_rays_flatten = render_rays.view(-1, 8)\n",
+ " assert render_rays_flatten.shape[0] == H*W\n",
+ " if sampling == 'random':\n",
+ " idxs_sampled = torch.randint(0, H*W, (n_rays,))\n",
+ " elif sampling == 'center':\n",
+ " frac = 0.5\n",
+ " mask = torch.zeros((H, W))\n",
+ " h_low = int(0.5*(1-frac)*H)\n",
+ " h_high = int(0.5*(1+frac)*H)\n",
+ " w_low = int(0.5*(1-frac)*W)\n",
+ " w_high = int(0.5*(1+frac)*W)\n",
+ " mask[h_low:h_high, w_low:w_high] = 1\n",
+ " mask = mask.reshape(H*W)\n",
+ "\n",
+ " idxs_masked = torch.where(mask>0)[0]\n",
+ " idxs_sampled = idxs_masked[torch.randint(0, idxs_masked.shape[0], (n_rays,))]\n",
+ " elif sampling == 'patch':\n",
+ " frac = 0.25\n",
+ " mask = torch.zeros((H, W))\n",
+ " h_low = int(0.5*(1-frac)*H)\n",
+ " h_high = int(0.5*(1+frac)*H)\n",
+ " w_low = int(0.5*(1-frac)*W)\n",
+ " w_high = int(0.5*(1+frac)*W)\n",
+ " mask[h_low:h_high, w_low:w_high] = 1\n",
+ " mask = mask.reshape(H*W)\n",
+ "\n",
+ " idxs_sampled = torch.where(mask>0)[0]\n",
+ "\n",
+ " render_rays_sampled = render_rays_flatten[idxs_sampled].to(device=device)\n",
+ "\n",
+ " rgb, _ = render_par(render_rays_sampled[None])\n",
+ " loss = mse_loss(rgb, target_image_flatten[idxs_sampled][None])\n",
+ "\n",
+ " optimizer.zero_grad()\n",
+ " loss.backward()\n",
+ "\n",
+ " if i_step % 10 == 0: \n",
+ " predicted_poses.append(torch.clone(cam_pose[0]).detach().numpy())\n",
+ " fine_patches.append(torch.clone(rgb[0]).detach().cpu().numpy().reshape(32, 32, 3))\n",
+ " gt_patches.append(torch.clone(target_image_flatten[idxs_sampled]).detach().cpu().numpy().reshape(32, 32, 3))\n",
+ "\n",
+ "# pose_pred = predicted_poses[-1].copy()\n",
+ "# pose_pred[2, -1] -= args.radius\n",
+ "# pose_pred = pose_input @ pose_pred\n",
+ "# error_R, error_t = compute_pose_error(pose_pred, pose_target)\n",
+ " print(f\"Step {i_step}, loss: {loss}\")\n",
+ " \n",
+ " optimizer.step()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Render the results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 0%| | 0/3 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Rendering 180224 rays\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 3/3 [00:25<00:00, 8.43s/it]\n"
+ ]
+ }
+ ],
+ "source": [
+ "def create_image(patch):\n",
+ " image = np.zeros((128, 128, 3))\n",
+ " image[48:80, 48:80, :] = patch\n",
+ " image = (image * 255.0).astype(np.uint8)\n",
+ " return image\n",
+ "\n",
+ "# Rendering.\n",
+ "overlay_frames = []\n",
+ "n_poses = len(predicted_poses)\n",
+ "render_poses = torch.from_numpy(np.array(predicted_poses))\n",
+ "render_rays = util.gen_rays(render_poses, W, H, focal, z_near, z_far).to(device=device)\n",
+ "with torch.no_grad():\n",
+ " print(\"Rendering\", n_poses * H * W, \"rays\")\n",
+ " all_rgb_fine = []\n",
+ " for rays in tqdm.tqdm(torch.split(render_rays.view(-1, 8), 80000, dim=0)):\n",
+ " rgb, _depth = render_par(rays[None])\n",
+ " all_rgb_fine.append(rgb[0])\n",
+ " _depth = None\n",
+ " rgb_fine = torch.cat(all_rgb_fine)\n",
+ " frames = (rgb_fine.view(n_poses, H, W, 3).cpu().numpy() * 255).astype(\n",
+ " np.uint8\n",
+ " )\n",
+ " target_image = (target_image_flatten.cpu().numpy().reshape([H, W, 3]) * 255.0).astype(np.uint8)\n",
+ " target_images = np.stack([np.array(target_image)]*n_poses, 0)\n",
+ " \n",
+ " im_name = os.path.basename(os.path.splitext(config['input'])[0])\n",
+ "\n",
+ " frames_dir_name = os.path.join(config['output'], im_name + \"_frames\")\n",
+ " os.makedirs(frames_dir_name, exist_ok=True)\n",
+ "\n",
+ " for i in range(n_poses):\n",
+ " if sampling == 'patch':\n",
+ " pred_patch_path = os.path.join(config['output'], f'./pred_patch_{i}.png')\n",
+ " pred_image = create_image(fine_patches[i])\n",
+ "\n",
+ " gt_patch_path = os.path.join(config['output'], f'./gt_patch_{i}.png')\n",
+ " gt_image = create_image(gt_patches[i])\n",
+ " overlay_frame = (pred_image*0.5).astype(np.uint8) + (gt_image*0.5).astype(np.uint8)\n",
+ " else:\n",
+ " overlay_frame = (frames[i]*0.5).astype(np.uint8) + (target_images[i]*0.5).astype(np.uint8)\n",
+ " overlay_frames.append(overlay_frame)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualize the results\n",
+ "\n",
+ "We show the overlay of the image rendered with our predicted pose and the target image."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data = {}\n",
+ "for i, f in enumerate(overlay_frames):\n",
+ " step = i*10\n",
+ " data[f\"Step {step}\"] = f\n",
+ "media.show_images(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/pixel-nerf/conf/exp/srn.conf b/pixel-nerf/conf/exp/srn.conf
index 8fe7a0f..c453480 100644
--- a/pixel-nerf/conf/exp/srn.conf
+++ b/pixel-nerf/conf/exp/srn.conf
@@ -1,5 +1,108 @@
# SRN experiments config
-include required("../default_mv.conf")
+# Single-view only base model
+# (Not used in experiments; resnet_fine_mv.conf inherits)
+model {
+ # MLP architecture
+ # Adapted for multiview
+ # Possibly too big
+
+ # Condition on local encoder
+ use_encoder = True
+
+ # Condition also on a global encoder?
+ use_global_encoder = False
+
+ # Use xyz input instead of just z
+ # (didn't ablate)
+ use_xyz = True
+
+ # Canonical space xyz (default view space)
+ canon_xyz = False
+
+ # Positional encoding
+ use_code = True
+ code {
+ num_freqs = 6
+ freq_factor = 1.5
+ include_input = True
+ }
+
+ # View directions
+ use_viewdirs = True
+ # Apply pos. enc. to viewdirs?
+ use_code_viewdirs = False
+
+ # MLP architecture
+ mlp_coarse {
+ type = resnet # Can change to mlp
+ n_blocks = 3
+ d_hidden = 512
+ # Combine after 3rd layer by average
+ combine_layer = 3
+ combine_type = average
+ }
+ mlp_fine {
+ type = resnet
+ n_blocks = 3
+ d_hidden = 512
+ combine_layer = 3
+ combine_type = average
+ }
+
+ # Encoder architecture
+ encoder {
+ backbone = resnet34
+ pretrained = True
+ num_layers = 4
+ }
+}
+renderer {
+ n_coarse = 64
+ n_fine = 32
+ # Try using expected depth sample
+ n_fine_depth = 16
+ # Noise to add to depth sample
+ depth_std = 0.01
+ # Decay schedule, not used
+ sched = []
+ # White background color (false : black)
+ white_bkgd = True
+}
+loss {
+ # RGB losses coarse/fine
+ rgb {
+ use_l1 = False
+ }
+ rgb_fine {
+ use_l1 = False
+ }
+ # Alpha regularization (disabled in final version)
+ alpha {
+ # lambda_alpha = 0.0001
+ lambda_alpha = 0.0
+ clamp_alpha = 100
+ init_epoch = 5
+ }
+ # Coarse/fine weighting (nerf = equal)
+ lambda_coarse = 1.0 # loss = lambda_coarse * loss_coarse + loss_fine
+ lambda_fine = 1.0 # loss = lambda_coarse * loss_coarse + loss_fine
+}
+train {
+ # Training
+ print_interval = 2
+ save_interval = 50
+ vis_interval = 100
+ eval_interval = 50
+
+ # Accumulating gradients. Not really recommended.
+ # 1 = disable
+ accu_grad = 1
+
+ # Number of times to repeat dataset per 'epoch'
+ # Useful if dataset is extremely small, like DTU
+ num_epoch_repeats = 1
+}
+
data {
format = srn
}
diff --git a/pixel-nerf/environment.yml b/pixel-nerf/environment.yml
index c67449d..f9dfecd 100644
--- a/pixel-nerf/environment.yml
+++ b/pixel-nerf/environment.yml
@@ -7,7 +7,8 @@ dependencies:
- python>=3.8
- pip
- pip:
- - pyhocon
+ - pyhocon==0.3.6
+ - pyparsing==2.3.1
- opencv-python
- dotmap
- tensorboard
@@ -19,7 +20,7 @@ dependencies:
- scipy
- numpy
- matplotlib
- - pytorch==1.6.0
- - torchvision==0.7.0
+ - pytorch==1.8.0
+ - torchvision==0.9.0
- scikit-image==0.17.2
- tqdm
diff --git a/pixel-nerf/pose_estimation.ipynb b/pixel-nerf/pose_estimation.ipynb
index 5d00dae..94ec702 100644
--- a/pixel-nerf/pose_estimation.ipynb
+++ b/pixel-nerf/pose_estimation.ipynb
@@ -88,9 +88,9 @@
"text/html": [
""
+ " Target
"
],
"text/plain": [
""
@@ -132,8 +132,16 @@
"* Config file: conf/exp/srn.conf\n",
"* Dataset format: srn\n",
"* Dataset location: data\n",
- "Using torchvision resnet34 encoder\n",
- "Load checkpoints/srn_car/pixel_nerf_latest\n"
+ "Using torchvision resnet34 encoder\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/prashantdandriyal/Desktop/Home/github/inerf/pixel-nerf/src/model/models.py:291: UserWarning: WARNING: checkpoints/srn_car/pixel_nerf_latest does not exist, not loaded!! Model will be re-initialized.\n",
+ "If you are trying to load a pretrained model, STOP since it's not in the right place. If training, unless you are startin a new experiment, please remember to pass --resume.\n",
+ " warnings.warn(\n"
]
}
],
@@ -232,7 +240,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -251,17 +259,7 @@
" [0.0000, 1.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 1.0000, 1.3000],\n",
" [0.0000, 0.0000, 0.0000, 1.0000]], grad_fn=)\n",
- "Step 0, loss: 0.08879142999649048\n",
- "Step 10, loss: 0.04623040556907654\n",
- "Step 20, loss: 0.016977649182081223\n",
- "Step 30, loss: 0.012130280956625938\n",
- "Step 40, loss: 0.01018441841006279\n",
- "Step 50, loss: 0.009968521073460579\n",
- "Step 60, loss: 0.010097332298755646\n",
- "Step 70, loss: 0.010672736912965775\n",
- "Step 80, loss: 0.009122783318161964\n",
- "Step 90, loss: 0.008157029747962952\n",
- "Step 100, loss: 0.008524651639163494\n"
+ "Step 0, loss: 0.18002356588840485\n"
]
}
],
@@ -372,34 +370,11 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\r",
- " 0%| | 0/3 [00:00, ?it/s]"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Rendering 180224 rays\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|██████████| 3/3 [00:25<00:00, 8.43s/it]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"def create_image(patch):\n",
" image = np.zeros((128, 128, 3))\n",
@@ -455,44 +430,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"data = {}\n",
"for i, f in enumerate(overlay_frames):\n",
@@ -511,9 +451,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "pixelnerf2",
"language": "python",
- "name": "python3"
+ "name": "pixelnerf2"
},
"language_info": {
"codemirror_mode": {
@@ -525,7 +465,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.5"
+ "version": "3.9.16"
}
},
"nbformat": 4,
diff --git a/pixel-nerf/src/model/__pycache__/__init__.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..2711a35
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/__init__.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/code.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/code.cpython-39.pyc
new file mode 100644
index 0000000..d6ce9dd
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/code.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/custom_encoder.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/custom_encoder.cpython-39.pyc
new file mode 100644
index 0000000..dadcc11
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/custom_encoder.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/encoder.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/encoder.cpython-39.pyc
new file mode 100644
index 0000000..23c9563
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/encoder.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/model_util.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/model_util.cpython-39.pyc
new file mode 100644
index 0000000..de528b6
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/model_util.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/models.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/models.cpython-39.pyc
new file mode 100644
index 0000000..6b5fa7b
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/models.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/__pycache__/resnetfc.cpython-39.pyc b/pixel-nerf/src/model/__pycache__/resnetfc.cpython-39.pyc
new file mode 100644
index 0000000..b6a9d83
Binary files /dev/null and b/pixel-nerf/src/model/__pycache__/resnetfc.cpython-39.pyc differ
diff --git a/pixel-nerf/src/model/models.py b/pixel-nerf/src/model/models.py
index 8660720..878229e 100644
--- a/pixel-nerf/src/model/models.py
+++ b/pixel-nerf/src/model/models.py
@@ -265,7 +265,7 @@ def forward(self, xyz, coarse=True, viewdirs=None, far=False):
output = output.reshape(SB, B, -1)
return output
- def load_weights(self, args, opt_init=False, strict=True, device=None):
+ def load_weights(self, args, opt_init=False, strict=False, device=None):
"""
Helper for loading weights according to argparse arguments.
Your can put a checkpoint at checkpoints//pixel_nerf_init to use as initialization.
diff --git a/pixel-nerf/src/render/__pycache__/__init__.cpython-39.pyc b/pixel-nerf/src/render/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..1fed827
Binary files /dev/null and b/pixel-nerf/src/render/__pycache__/__init__.cpython-39.pyc differ
diff --git a/pixel-nerf/src/render/__pycache__/nerf.cpython-39.pyc b/pixel-nerf/src/render/__pycache__/nerf.cpython-39.pyc
new file mode 100644
index 0000000..5dbfcd1
Binary files /dev/null and b/pixel-nerf/src/render/__pycache__/nerf.cpython-39.pyc differ
diff --git a/pixel-nerf/src/util/__pycache__/__init__.cpython-39.pyc b/pixel-nerf/src/util/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000..b5962d9
Binary files /dev/null and b/pixel-nerf/src/util/__pycache__/__init__.cpython-39.pyc differ
diff --git a/pixel-nerf/src/util/__pycache__/args.cpython-39.pyc b/pixel-nerf/src/util/__pycache__/args.cpython-39.pyc
new file mode 100644
index 0000000..432168e
Binary files /dev/null and b/pixel-nerf/src/util/__pycache__/args.cpython-39.pyc differ
diff --git a/pixel-nerf/src/util/__pycache__/util.cpython-39.pyc b/pixel-nerf/src/util/__pycache__/util.cpython-39.pyc
new file mode 100644
index 0000000..6345608
Binary files /dev/null and b/pixel-nerf/src/util/__pycache__/util.cpython-39.pyc differ