-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Minor changes to flagging for 5.0 (#9166)
* init * add changeset * rename * flagging * flagging * changes * update * changes * more * more * changes * add changeset * fix test * changes * update demos --------- Co-authored-by: gradio-pr-bot <[email protected]>
- Loading branch information
1 parent
e9e737e
commit 8a75559
Showing
16 changed files
with
223 additions
and
72 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"gradio": minor | ||
--- | ||
|
||
feat:Minor changes to flagging for 5.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: depth_estimation\n", "### A demo for predicting the depth of an image and generating a 3D model of it.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch git+https://github.com/nielsrogge/transformers.git@add_dpt_redesign#egg=transformers numpy Pillow jinja2 open3d"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('examples')\n", "!wget -q -O examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/packages.txt"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import DPTFeatureExtractor, DPTForDepthEstimation\n", "import torch\n", "import numpy as np\n", "from PIL import Image\n", "import open3d as o3d\n", "from pathlib import Path\n", "\n", "feature_extractor = DPTFeatureExtractor.from_pretrained(\"Intel/dpt-large\")\n", "model = DPTForDepthEstimation.from_pretrained(\"Intel/dpt-large\")\n", "\n", "def process_image(image_path):\n", " image_path = Path(image_path)\n", " image_raw = Image.open(image_path)\n", " image = image_raw.resize(\n", " (800, int(800 * image_raw.size[1] / image_raw.size[0])),\n", " Image.Resampling.LANCZOS)\n", "\n", " # prepare image for the model\n", " encoding = feature_extractor(image, return_tensors=\"pt\") # type: ignore\n", "\n", " # forward pass\n", " with torch.no_grad():\n", " outputs = model(**encoding) # type: ignore\n", " predicted_depth = outputs.predicted_depth\n", "\n", " # interpolate to original size\n", " prediction = torch.nn.functional.interpolate(\n", " predicted_depth.unsqueeze(1),\n", " size=image.size[::-1],\n", " mode=\"bicubic\",\n", " align_corners=False,\n", " ).squeeze()\n", " output = prediction.cpu().numpy()\n", " depth_image = (output * 255 / np.max(output)).astype('uint8')\n", " try:\n", " gltf_path = create_3d_obj(np.array(image), depth_image, image_path)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except Exception:\n", " gltf_path = create_3d_obj(\n", " np.array(image), depth_image, image_path, depth=8)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except:\n", " print(\"Error reconstructing 3D model\")\n", " raise Exception(\"Error reconstructing 3D model\")\n", "\n", "def create_3d_obj(rgb_image, depth_image, image_path, depth=10):\n", " depth_o3d = o3d.geometry.Image(depth_image)\n", " image_o3d = o3d.geometry.Image(rgb_image)\n", " rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", " image_o3d, depth_o3d, convert_rgb_to_intensity=False)\n", " w = int(depth_image.shape[1])\n", " h = int(depth_image.shape[0])\n", "\n", " camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()\n", " camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)\n", "\n", " pcd = o3d.geometry.PointCloud.create_from_rgbd_image(\n", " rgbd_image, camera_intrinsic)\n", "\n", " print('normals')\n", " pcd.normals = o3d.utility.Vector3dVector(\n", " np.zeros((1, 3))) # invalidate existing normals\n", " pcd.estimate_normals(\n", " search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))\n", " pcd.orient_normals_towards_camera_location(\n", " camera_location=np.array([0., 0., 1000.]))\n", " pcd.transform([[1, 0, 0, 0],\n", " [0, -1, 0, 0],\n", " [0, 0, -1, 0],\n", " [0, 0, 0, 1]])\n", " pcd.transform([[-1, 0, 0, 0],\n", " [0, 1, 0, 0],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 1]])\n", "\n", " print('run Poisson surface reconstruction')\n", " with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):\n", " mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(\n", " pcd, depth=depth, width=0, scale=1.1, linear_fit=True)\n", "\n", " voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256\n", " print(f'voxel_size = {voxel_size:e}')\n", " mesh = mesh_raw.simplify_vertex_clustering(\n", " voxel_size=voxel_size,\n", " contraction=o3d.geometry.SimplificationContraction.Average)\n", "\n", " # vertices_to_remove = densities < np.quantile(densities, 0.001)\n", " # mesh.remove_vertices_by_mask(vertices_to_remove)\n", " bbox = pcd.get_axis_aligned_bounding_box()\n", " mesh_crop = mesh.crop(bbox)\n", " gltf_path = f'./{image_path.stem}.gltf'\n", " o3d.io.write_triangle_mesh(\n", " gltf_path, mesh_crop, write_triangle_uvs=True)\n", " return gltf_path\n", "\n", "title = \"Demo: zero-shot depth estimation with DPT + 3D Point Cloud\"\n", "description = \"This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object.\"\n", "examples = [[\"examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\"]]\n", "\n", "iface = gr.Interface(fn=process_image,\n", " inputs=[gr.Image(\n", " type=\"filepath\", label=\"Input Image\")],\n", " outputs=[gr.Image(label=\"predicted depth\", type=\"pil\"),\n", " gr.Model3D(label=\"3d mesh reconstruction\", clear_color=(\n", " 1.0, 1.0, 1.0, 1.0)),\n", " gr.File(label=\"3d gLTF\")],\n", " title=title,\n", " description=description,\n", " examples=examples,\n", " allow_flagging=\"never\",\n", " cache_examples=False)\n", "\n", "iface.launch(debug=True)\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} | ||
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: depth_estimation\n", "### A demo for predicting the depth of an image and generating a 3D model of it.\n", " "]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch git+https://github.com/nielsrogge/transformers.git@add_dpt_redesign#egg=transformers numpy Pillow jinja2 open3d"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('examples')\n", "!wget -q -O examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/depth_estimation/packages.txt"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import DPTFeatureExtractor, DPTForDepthEstimation\n", "import torch\n", "import numpy as np\n", "from PIL import Image\n", "import open3d as o3d\n", "from pathlib import Path\n", "\n", "feature_extractor = DPTFeatureExtractor.from_pretrained(\"Intel/dpt-large\")\n", "model = DPTForDepthEstimation.from_pretrained(\"Intel/dpt-large\")\n", "\n", "def process_image(image_path):\n", " image_path = Path(image_path)\n", " image_raw = Image.open(image_path)\n", " image = image_raw.resize(\n", " (800, int(800 * image_raw.size[1] / image_raw.size[0])),\n", " Image.Resampling.LANCZOS)\n", "\n", " # prepare image for the model\n", " encoding = feature_extractor(image, return_tensors=\"pt\") # type: ignore\n", "\n", " # forward pass\n", " with torch.no_grad():\n", " outputs = model(**encoding) # type: ignore\n", " predicted_depth = outputs.predicted_depth\n", "\n", " # interpolate to original size\n", " prediction = torch.nn.functional.interpolate(\n", " predicted_depth.unsqueeze(1),\n", " size=image.size[::-1],\n", " mode=\"bicubic\",\n", " align_corners=False,\n", " ).squeeze()\n", " output = prediction.cpu().numpy()\n", " depth_image = (output * 255 / np.max(output)).astype('uint8')\n", " try:\n", " gltf_path = create_3d_obj(np.array(image), depth_image, image_path)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except Exception:\n", " gltf_path = create_3d_obj(\n", " np.array(image), depth_image, image_path, depth=8)\n", " img = Image.fromarray(depth_image)\n", " return [img, gltf_path, gltf_path]\n", " except:\n", " print(\"Error reconstructing 3D model\")\n", " raise Exception(\"Error reconstructing 3D model\")\n", "\n", "def create_3d_obj(rgb_image, depth_image, image_path, depth=10):\n", " depth_o3d = o3d.geometry.Image(depth_image)\n", " image_o3d = o3d.geometry.Image(rgb_image)\n", " rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(\n", " image_o3d, depth_o3d, convert_rgb_to_intensity=False)\n", " w = int(depth_image.shape[1])\n", " h = int(depth_image.shape[0])\n", "\n", " camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()\n", " camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)\n", "\n", " pcd = o3d.geometry.PointCloud.create_from_rgbd_image(\n", " rgbd_image, camera_intrinsic)\n", "\n", " print('normals')\n", " pcd.normals = o3d.utility.Vector3dVector(\n", " np.zeros((1, 3))) # invalidate existing normals\n", " pcd.estimate_normals(\n", " search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))\n", " pcd.orient_normals_towards_camera_location(\n", " camera_location=np.array([0., 0., 1000.]))\n", " pcd.transform([[1, 0, 0, 0],\n", " [0, -1, 0, 0],\n", " [0, 0, -1, 0],\n", " [0, 0, 0, 1]])\n", " pcd.transform([[-1, 0, 0, 0],\n", " [0, 1, 0, 0],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 1]])\n", "\n", " print('run Poisson surface reconstruction')\n", " with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug):\n", " mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(\n", " pcd, depth=depth, width=0, scale=1.1, linear_fit=True)\n", "\n", " voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256\n", " print(f'voxel_size = {voxel_size:e}')\n", " mesh = mesh_raw.simplify_vertex_clustering(\n", " voxel_size=voxel_size,\n", " contraction=o3d.geometry.SimplificationContraction.Average)\n", "\n", " # vertices_to_remove = densities < np.quantile(densities, 0.001)\n", " # mesh.remove_vertices_by_mask(vertices_to_remove)\n", " bbox = pcd.get_axis_aligned_bounding_box()\n", " mesh_crop = mesh.crop(bbox)\n", " gltf_path = f'./{image_path.stem}.gltf'\n", " o3d.io.write_triangle_mesh(\n", " gltf_path, mesh_crop, write_triangle_uvs=True)\n", " return gltf_path\n", "\n", "title = \"Demo: zero-shot depth estimation with DPT + 3D Point Cloud\"\n", "description = \"This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object.\"\n", "examples = [[\"examples/1-jonathan-borba-CgWTqYxHEkg-unsplash.jpg\"]]\n", "\n", "iface = gr.Interface(fn=process_image,\n", " inputs=[gr.Image(\n", " type=\"filepath\", label=\"Input Image\")],\n", " outputs=[gr.Image(label=\"predicted depth\", type=\"pil\"),\n", " gr.Model3D(label=\"3d mesh reconstruction\", clear_color=(\n", " 1.0, 1.0, 1.0, 1.0)),\n", " gr.File(label=\"3d gLTF\")],\n", " title=title,\n", " description=description,\n", " examples=examples,\n", " flagging_mode=\"never\",\n", " cache_examples=False)\n", "\n", "iface.launch(debug=True)\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.