diff --git a/app/packages/looker/package.json b/app/packages/looker/package.json index ae997e4b95..a150cccaf1 100644 --- a/app/packages/looker/package.json +++ b/app/packages/looker/package.json @@ -26,6 +26,7 @@ "lodash": "^4.17.21", "lru-cache": "^6.0.0", "mime": "^2.5.2", + "monotone-convex-hull-2d": "^1.0.1", "uuid": "^8.3.2" }, "devDependencies": { diff --git a/app/packages/looker/src/overlays/detection.ts b/app/packages/looker/src/overlays/detection.ts index f95dfaefba..4930771692 100644 --- a/app/packages/looker/src/overlays/detection.ts +++ b/app/packages/looker/src/overlays/detection.ts @@ -21,6 +21,7 @@ export interface DetectionLabel extends RegularLabel { dimensions?: [number, number, number]; location?: [number, number, number]; rotation?: [number, number, number]; + convexHull?: Coordinates[]; } export default class DetectionOverlay< @@ -229,48 +230,32 @@ export default class DetectionOverlay< state: Readonly, color: string ) { - const [tlx, tly, w, h] = this.label.bounding_box; - const [boxCenterX, boxCenterY] = t(state, tlx + w / 2, tly + h / 2); - - const hasRotationAroundZAxis = - this.label.rotation && this.label.rotation[2] !== 0; - - if (hasRotationAroundZAxis) { - // translate to center of box before rotating - ctx.translate(boxCenterX, boxCenterY); - // modifies current transformation matrix so that all subsequent drawings are rotated - ctx.rotate(-this.label.rotation[2]); - // translate back to undo the translation into the center of the box - ctx.translate(-boxCenterX, -boxCenterY); - } + const convexHull = this.label.convexHull; const previousAlpha = ctx.globalAlpha; - ctx.beginPath(); // use double stoke width to make the box more visible ctx.lineWidth = state.strokeWidth * 2; ctx.fillStyle = color; ctx.strokeStyle = color; - ctx.moveTo(...t(state, tlx, tly)); - ctx.lineTo(...t(state, tlx + w, tly)); - ctx.lineTo(...t(state, tlx + w, tly + h)); - ctx.lineTo(...t(state, tlx, tly + h)); + + ctx.beginPath(); + + // draw a polyline that defines the convex hull of the projected corners and fill it + ctx.moveTo(...t(state, convexHull[0][0], convexHull[0][1])); + for (let i = 1; i < convexHull.length; i++) { + ctx.lineTo(...t(state, convexHull[i][0], convexHull[i][1])); + } + ctx.closePath(); ctx.stroke(); // fill with some transparency - ctx.globalAlpha = state.options.alpha * 0.5; - ctx.fillRect(...t(state, tlx, tly), w, h); + ctx.globalAlpha = state.options.alpha * 0.3; + ctx.fill(); // restore previous alpha ctx.globalAlpha = previousAlpha; - - if (hasRotationAroundZAxis) { - // undo rotation to reset current transformation matrix - ctx.translate(boxCenterX, boxCenterY); - ctx.rotate(this.label.rotation[2]); - ctx.translate(-boxCenterX, -boxCenterY); - } } private strokeRect( diff --git a/app/packages/looker/src/state.ts b/app/packages/looker/src/state.ts index b350bfcca6..ae6d57a67e 100644 --- a/app/packages/looker/src/state.ts +++ b/app/packages/looker/src/state.ts @@ -65,8 +65,9 @@ export type OrthogrpahicProjectionMetadata = { filepath: string; height: number; width: number; - min_bound: [number, number]; - max_bound: [number, number]; + min_bound: [number, number, number]; + max_bound: [number, number, number]; + normal: [number, number, number]; }; export type GenericLabel = { diff --git a/app/packages/looker/src/worker/label-3d-projection-utils.test.ts b/app/packages/looker/src/worker/label-3d-projection-utils.test.ts new file mode 100644 index 0000000000..7ade666097 --- /dev/null +++ b/app/packages/looker/src/worker/label-3d-projection-utils.test.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from "vitest"; +import { Vec3, projectTo2D } from "./label-3d-projection-utils"; + +describe("projectTo2D", () => { + it("should project a point to the xz plane", () => { + const point: Vec3 = [1, 2, 3]; + const projectedPoint = projectTo2D(point, "xz"); + expect(projectedPoint).toEqual([1, 3]); + }); + + it("should project a point to the xy plane", () => { + const point: Vec3 = [1, 2, 3]; + const projectedPoint = projectTo2D(point, "xy"); + expect(projectedPoint).toEqual([1, 2]); + }); + + it("should project a point to the yz plane", () => { + const point: Vec3 = [1, 2, 3]; + const projectedPoint = projectTo2D(point, "yz"); + expect(projectedPoint).toEqual([2, 3]); + }); +}); diff --git a/app/packages/looker/src/worker/label-3d-projection-utils.ts b/app/packages/looker/src/worker/label-3d-projection-utils.ts new file mode 100644 index 0000000000..1e6e2b9cfa --- /dev/null +++ b/app/packages/looker/src/worker/label-3d-projection-utils.ts @@ -0,0 +1,83 @@ +import { Euler, Vector3 } from "three"; + +export type Vec3 = [number, number, number]; +export type Vec2 = [number, number]; + +export interface BoundingBox3D { + dimensions: Vec3; + location: Vec3; + rotation: Vec3; // rotation angles in radians +} + +export interface BoundingBox2D { + tlx: number; // top-left corner of the bounding box, x + tly: number; // top-left corner of the bounding box, y + width: number; // width of the bounding box + height: number; // height of the bounding box +} + +export const rotatePoint = (point: Vec3, rotation: Vec3): Vec3 => { + const threePoint = new Vector3(...point); + const threeRotation = new Euler(...rotation); + + return threePoint.applyEuler(threeRotation).toArray() as Vec3; +}; + +export const projectTo2D = (point: Vec3, plane: "xz" | "xy" | "yz"): Vec2 => { + switch (plane) { + case "xz": + return [point[0], point[2]]; + case "xy": + return [point[0], point[1]]; + case "yz": + return [point[1], point[2]]; + } +}; + +export const getProjectedCorners = ( + box: BoundingBox3D, + plane: "xz" | "xy" | "yz" +) => { + const { dimensions, location, rotation } = box; + const [dx, dy, dz] = dimensions; + const halfDimensions = [dx / 2, dy / 2, dz / 2] as Vec3; + + // Generate the 8 corners of the 3D bounding box + const corners: Vec3[] = [ + // left bottom back + [-halfDimensions[0], -halfDimensions[1], -halfDimensions[2]], + // left bottom front + [-halfDimensions[0], -halfDimensions[1], halfDimensions[2]], + // left top back + [-halfDimensions[0], halfDimensions[1], -halfDimensions[2]], + // left top front + [-halfDimensions[0], halfDimensions[1], halfDimensions[2]], + // right bottom back + [halfDimensions[0], -halfDimensions[1], -halfDimensions[2]], + // right bottom front + [halfDimensions[0], -halfDimensions[1], halfDimensions[2]], + // right top back + [halfDimensions[0], halfDimensions[1], -halfDimensions[2]], + // right top front + [halfDimensions[0], halfDimensions[1], halfDimensions[2]], + ]; + + // rotate first, and translate + const transformedCorners = corners.map((corner) => { + const newRotation = rotation; + + const rotated = rotatePoint(corner, newRotation); + return [ + rotated[0] + location[0], + rotated[1] + location[1], + rotated[2] + location[2], + ] as Vec3; + }); + + // project the 3D points to 2D based on the specified plane + const projectedCorners: Vec2[] = transformedCorners.map((corner) => + projectTo2D(corner, plane) + ); + + return { projectedCorners }; +}; diff --git a/app/packages/looker/src/worker/label-3d-transformation.ts b/app/packages/looker/src/worker/label-3d-transformation.ts deleted file mode 100644 index d5fd7a95dc..0000000000 --- a/app/packages/looker/src/worker/label-3d-transformation.ts +++ /dev/null @@ -1,59 +0,0 @@ -export const getTransformedCoordinates = ( - location, - dimensions, - scalingFactors, - orthographicProjectionParams, - { round = true } -) => { - // location of centroid of box - const [x, y] = location; - - const [lx, ly] = dimensions; - - const [_, __, xminCartesian, xmaxCartesian, yminCartesian, ymaxCartesian] = - orthographicProjectionParams; - - const canvasXMin = - scalingFactors.xScale * (x - lx / 2 + (xmaxCartesian - xminCartesian) / 2); - const canvasYMin = - scalingFactors.yScale * (y - ly / 2 + (ymaxCartesian - yminCartesian) / 2); - - const canvasXMax = - scalingFactors.xScale * (x + lx / 2 + (xmaxCartesian - xminCartesian) / 2); - const canvasYMax = - scalingFactors.yScale * (y + ly / 2 + (ymaxCartesian - yminCartesian) / 2); - - if (round) { - return [ - Math.round(canvasXMin), - Math.round(canvasXMax), - Math.round(canvasYMin), - Math.round(canvasYMax), - ]; - } - - return [canvasXMin, canvasXMax, canvasYMin, canvasYMax]; -}; - -export const applyRotation = (x, y, z, rotX, rotY, rotZ) => { - const cosx = Math.cos(rotX); - const cosy = Math.cos(rotY); - const cosz = Math.cos(rotZ); - const sinx = Math.sin(rotX); - const siny = Math.sin(rotY); - const sinz = Math.sin(rotZ); - - // Apply rotation in x-axis - const y1 = y * cosx - z * sinx; - const z1 = y * sinx + z * cosx; - - // Apply rotation in y-axis - const x2 = x * cosy - z1 * siny; - const z2 = x * siny + z1 * cosy; - - // Apply rotation in z-axis - const x3 = x2 * cosz - y1 * sinz; - const y3 = x2 * sinz + y1 * cosz; - - return [x3, y3, z2]; -}; diff --git a/app/packages/looker/src/worker/threed-label-processor.ts b/app/packages/looker/src/worker/threed-label-processor.ts index dc11c0f074..eaf19eafd6 100644 --- a/app/packages/looker/src/worker/threed-label-processor.ts +++ b/app/packages/looker/src/worker/threed-label-processor.ts @@ -1,7 +1,12 @@ import { DETECTIONS, getCls, Schema } from "@fiftyone/utilities"; +import ch from "monotone-convex-hull-2d"; import { POINTCLOUD_OVERLAY_PADDING } from "../constants"; import { DetectionLabel } from "../overlays/detection"; import { OrthogrpahicProjectionMetadata, Sample } from "../state"; +import { + BoundingBox3D, + getProjectedCorners, +} from "./label-3d-projection-utils"; import { mapId } from "./shared"; type DetectionsLabel = { @@ -10,47 +15,8 @@ type DetectionsLabel = { type ThreeDLabel = DetectionsLabel | DetectionLabel; -type LabelId = string; - const COLLECTION_TYPES = new Set(["Detections"]); -const scalingFactorCache: Record< - LabelId, - { - scalingFactor?: { xScale: number; yScale: number }; - } -> = {}; - -/** - * Get scaling parameters from pointcloud bound range. - * - * Cache results of this function because it is called for every label in a sample. - */ -const getScalingFactorForLabel = ( - labelId: LabelId, - width: number, - height: number, - xmin: number, - xmax: number, - ymin: number, - ymax: number -) => { - if (scalingFactorCache[labelId]?.scalingFactor) { - return scalingFactorCache[labelId].scalingFactor; - } - - if (!scalingFactorCache[labelId]) { - scalingFactorCache[labelId] = {}; - } - - scalingFactorCache[labelId].scalingFactor = { - xScale: width / (xmax - xmin), - yScale: height / (ymax - ymin), - }; - - return scalingFactorCache[labelId].scalingFactor; -}; - // cache between sample id and inferred projection params const inferredParamsCache: Record< Sample["id"], @@ -103,6 +69,7 @@ const getInferredParamsForUndefinedProjection = ( inferredParamsCache[sample.id] = { width: minX === Infinity ? 512 : maxX - minX + POINTCLOUD_OVERLAY_PADDING, height: minY === Infinity ? 512 : maxY - minY + POINTCLOUD_OVERLAY_PADDING, + normal: [0, 0, 1], min_bound: [ minX === Infinity ? -POINTCLOUD_OVERLAY_PADDING @@ -110,6 +77,7 @@ const getInferredParamsForUndefinedProjection = ( minY === Infinity ? -POINTCLOUD_OVERLAY_PADDING : minY - POINTCLOUD_OVERLAY_PADDING, + 0, ], max_bound: [ maxX === Infinity @@ -118,6 +86,7 @@ const getInferredParamsForUndefinedProjection = ( maxY === Infinity ? POINTCLOUD_OVERLAY_PADDING : maxY + POINTCLOUD_OVERLAY_PADDING, + 0, ], } as OrthogrpahicProjectionMetadata; @@ -139,35 +108,67 @@ const PainterFactory3D = ( * Impute bounding box parameters. */ Detection: (label: DetectionLabel) => { - const { - width: canvasWidth, - height: canvasHeight, - min_bound, - max_bound, - } = orthographicProjectionParams; - const [xmin, ymin] = min_bound; - const [xmax, ymax] = max_bound; - - const [x, y, z] = label.location; // centroid of bounding box - const [lx, ly, lz] = label.dimensions; // length of bounding box in each dimension - - const { xScale, yScale } = getScalingFactorForLabel( - label._id, - canvasWidth, - canvasHeight, - xmin, - xmax, - ymin, - ymax - ); + const { min_bound, max_bound, normal } = orthographicProjectionParams; + const [xmin, ymin, zmin] = min_bound; + const [xmax, ymax, zmax] = max_bound; + + const [lx, ly, lz] = label.location; // centroid of bounding box + const [dx, dy, dz] = label.dimensions; // length of bounding box in each dimension + const [rx, ry, rz] = label.rotation ?? [0, 0, 0]; // rotation of bounding box + + const [nx, ny, nz] = normal ?? [0, 0, 1]; + + const box: BoundingBox3D = { + dimensions: [dx, dy, dz], + location: [lx, ly, lz], + rotation: [rx, ry, rz], + }; + + let projectionPlane: "xy" | "xz" | "yz" = "xy"; + + if (nx === 1 || nx === -1) { + // project on yz plane + projectionPlane = "yz"; + } else if (ny === 1 || ny === -1) { + // project on xz plane + projectionPlane = "xz"; + } else if (nz === 1 || nz === -1) { + // project on xy plane + projectionPlane = "xy"; + } + + const { projectedCorners } = getProjectedCorners(box, projectionPlane); + + const xRange = xmax - xmin; + const yRange = ymax - ymin; + const zRange = zmax - zmin; + + const newProjectedCorners = projectedCorners.map(([x, y]) => { + let px, py; + + // todo: need to account for negative / positive normals + switch (projectionPlane) { + case "xy": + px = (x - xmin) / xRange; + py = (ymax - y) / yRange; + break; + case "xz": + px = (x - xmin) / xRange; + py = (zmax - y) / zRange; + break; + case "yz": + px = (y - ymin) / yRange; + py = (zmax - x) / zRange; + break; + } + return [px, py]; + }); - const tlx = (xScale * (x - lx / 2 - xmin)) / canvasWidth; // top left x, normalized to [0, 1] - const tly = (yScale * (-y - ly / 2 + ymax)) / canvasHeight; // top left y, normalized to [0, 1] + const convexHullIndices = ch(newProjectedCorners); - const boxWidth = (lx * xScale) / canvasWidth; // width of projected bounding box, normalized to [0, 1] - const boxHeight = (ly * yScale) / canvasHeight; // height of projected bounding box, normalized to [0, 1] + const convexHull = convexHullIndices.map((i) => newProjectedCorners[i]); - label.bounding_box = [tlx, tly, boxWidth, boxHeight]; + label.convexHull = convexHull; }, }); diff --git a/app/yarn.lock b/app/yarn.lock index d36348f2a7..fb3e5fe8b6 100644 --- a/app/yarn.lock +++ b/app/yarn.lock @@ -2602,6 +2602,7 @@ __metadata: lodash: ^4.17.21 lru-cache: ^6.0.0 mime: ^2.5.2 + monotone-convex-hull-2d: ^1.0.1 prettier: ^2.7.1 typescript: ^4.7.4 typescript-plugin-css-modules: ^5.0.2 @@ -14024,6 +14025,15 @@ __metadata: languageName: node linkType: hard +"monotone-convex-hull-2d@npm:^1.0.1": + version: 1.0.1 + resolution: "monotone-convex-hull-2d@npm:1.0.1" + dependencies: + robust-orientation: ^1.1.3 + checksum: 2d788534b29ab568387e2da43057e3fa9912fbac5e73a9e1bd78fae15951258c66d2e4655cdf2df4db7a944f1db619828030ba4824ac5fe794edefd8e8377440 + languageName: node + linkType: hard + "mouse-change@npm:^1.4.0": version: 1.4.0 resolution: "mouse-change@npm:1.4.0" @@ -16442,6 +16452,42 @@ __metadata: languageName: node linkType: hard +"robust-orientation@npm:^1.1.3": + version: 1.2.1 + resolution: "robust-orientation@npm:1.2.1" + dependencies: + robust-scale: ^1.0.2 + robust-subtract: ^1.0.0 + robust-sum: ^1.0.0 + two-product: ^1.0.2 + checksum: 83b87300009716d96cf17af27b2c787bb7cabe00e82b6740ff4777a601babfcf132b3ec3d10cb1a91886423aa51863026d3befd58058af3b90be98abbda0056e + languageName: node + linkType: hard + +"robust-scale@npm:^1.0.2": + version: 1.0.2 + resolution: "robust-scale@npm:1.0.2" + dependencies: + two-product: ^1.0.2 + two-sum: ^1.0.0 + checksum: 4217f15c94bc803c0c78f6011507102cb603a4e9f71721d44e155c17c1fbe989382c8a150d20e23ca51164077395dab698498b9650d2377cc0a69902d73d0a1c + languageName: node + linkType: hard + +"robust-subtract@npm:^1.0.0": + version: 1.0.0 + resolution: "robust-subtract@npm:1.0.0" + checksum: e9dcc39a1a802d4a34d338844d9382ad7e49f58c5d01ce0d66cd18d6477069475af11a80fba0c0e158211c2b272c1c05950e78cbfc29ea7005f4ecc9e9f9d492 + languageName: node + linkType: hard + +"robust-sum@npm:^1.0.0": + version: 1.0.0 + resolution: "robust-sum@npm:1.0.0" + checksum: b9f32829ba3d6fd9cffeee440e1fb93a7d42f264540bd631abf13d0e8737f3a15a16a15764fa8a2fe86d3db6a1970361cf7ad2ed536c858b59e45f6f493a454b + languageName: node + linkType: hard + "rollup-plugin-external-globals@npm:^0.6.1": version: 0.6.1 resolution: "rollup-plugin-external-globals@npm:0.6.1" @@ -18120,6 +18166,20 @@ __metadata: languageName: node linkType: hard +"two-product@npm:^1.0.2": + version: 1.0.2 + resolution: "two-product@npm:1.0.2" + checksum: b289814957df58b91c910c944e7e247aa01a0a70e8fafdf58f01baf7fa1f96c06dc1cbb6cdafb39525e9a5ac0a9566875f1a76a02ef1f736f26e56fca2f0c847 + languageName: node + linkType: hard + +"two-sum@npm:^1.0.0": + version: 1.0.0 + resolution: "two-sum@npm:1.0.0" + checksum: 2c6a995b555233b989f473a5d039bd237d75f4824b9b54dc9d9ab28157f3e412b37156acbb48b322c817a26f3cc85e3da281c9aed4b06e892d2d27ae88db7d32 + languageName: node + linkType: hard + "type-check@npm:^0.4.0, type-check@npm:~0.4.0": version: 0.4.0 resolution: "type-check@npm:0.4.0" diff --git a/fiftyone/utils/utils3d.py b/fiftyone/utils/utils3d.py index ec5d696def..388f092033 100644 --- a/fiftyone/utils/utils3d.py +++ b/fiftyone/utils/utils3d.py @@ -426,6 +426,8 @@ class OrthographicProjectionMetadata(DynamicEmbeddedDocument, fol._HasMedia): plane max_bound (None): the ``[xmax, ymax]`` of the image in the projection plane + normal (None): the normal vector of the plane onto which the projection + was performed width: the width of the image, in pixels height: the height of the image, in pixels """ @@ -435,6 +437,7 @@ class OrthographicProjectionMetadata(DynamicEmbeddedDocument, fol._HasMedia): filepath = fof.StringField() min_bound = fof.ListField(fof.FloatField()) max_bound = fof.ListField(fof.FloatField()) + normal = fof.ListField(fof.FloatField()) width = fof.IntField() height = fof.IntField() @@ -857,6 +860,9 @@ def _parse_point_cloud( ].as_matrix() pc = pc.rotate(R, center=[0, 0, 0]) + if projection_normal is None: + projection_normal = [0, 0, 1] + if bounds is None: min_bound, max_bound = None, None else: @@ -898,6 +904,7 @@ def _parse_point_cloud( metadata = OrthographicProjectionMetadata( min_bound=min_bound, max_bound=max_bound, + normal=projection_normal, width=width, height=height, ) diff --git a/tests/unittests/utils3d_tests.py b/tests/unittests/utils3d_tests.py index fbe517ae65..e27b49ced0 100644 --- a/tests/unittests/utils3d_tests.py +++ b/tests/unittests/utils3d_tests.py @@ -285,6 +285,7 @@ def test_orthographic_projection_metadata_field(self): metadata.filepath = "test_path" metadata.min_bound = (1, 2, 3) metadata.max_bound = (4, 5, 6) + metadata.normal = (0, 0, 1) metadata.width = 100 metadata.height = 100 @@ -304,6 +305,7 @@ def test_orthographic_projection_metadata_field(self): # tuples after deserialized are converted into np arrays self.assertTrue(np.array_equal(field["min_bound"], (1, 2, 3))) self.assertTrue(np.array_equal(field["max_bound"], (4, 5, 6))) + self.assertTrue(np.array_equal(field["normal"], (0, 0, 1))) self.assertEqual(field["width"], 100) self.assertEqual(field["height"], 100)