From 3065b392f3d50e147e0e8b371678e0d5ea01da0a Mon Sep 17 00:00:00 2001
From: Manav Rathi <manav@mrmr.io>
Date: Thu, 4 Jul 2024 11:33:33 +0530
Subject: [PATCH] cmp

---
 web/packages/new/photos/services/ml/crop.ts | 69 ++++++++++++++-------
 web/packages/new/photos/services/ml/face.ts | 32 +++++++++-
 2 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/web/packages/new/photos/services/ml/crop.ts b/web/packages/new/photos/services/ml/crop.ts
index d4ec5e0f2e..863ceb0750 100644
--- a/web/packages/new/photos/services/ml/crop.ts
+++ b/web/packages/new/photos/services/ml/crop.ts
@@ -1,6 +1,12 @@
 import { blobCache } from "@/next/blob-cache";
 import { ensure } from "@/utils/ensure";
-import type { Box, FaceAlignment, FaceIndex } from "./face";
+import {
+    computeFaceAlignment,
+    restoreToImageDimensions,
+    type Box,
+    type FaceAlignment,
+    type FaceIndex,
+} from "./face";
 import { clamp } from "./image";
 
 /**
@@ -23,9 +29,13 @@ export const saveFaceCrops = async (
 
     return Promise.all(
         faceIndex.faceEmbedding.faces.map(({ faceID, detection }) =>
-            extractFaceCrop2(imageBitmap, detection.box).then((b) =>
-                cache.put(faceID, b),
-            ),
+            // extractFaceCrop2(imageBitmap, detection.box).then((b) =>
+            extractFaceCrop(
+                imageBitmap,
+                computeFaceAlignment(
+                    restoreToImageDimensions(detection, imageBitmap),
+                ),
+            ).then((b) => cache.put(faceID, b)),
         ),
     );
 };
@@ -81,31 +91,46 @@ export const extractFaceCrop2 = (imageBitmap: ImageBitmap, faceBox: Box) => {
     return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
 };
 
-export const saveFaceCrop = async (
-    imageBitmap: ImageBitmap,
-    faceID: string,
-    alignment: FaceAlignment,
-) => {
-    const faceCrop = extractFaceCrop(imageBitmap, alignment);
-    const blob = await imageBitmapToBlob(faceCrop);
-    faceCrop.close();
+// export const saveFaceCrop = async (
+//     imageBitmap: ImageBitmap,
+//     faceID: string,
+//     alignment: FaceAlignment,
+// ) => {
+//     const faceCrop = extractFaceCrop(imageBitmap, alignment);
+//     const blob = await imageBitmapToBlob(faceCrop);
+//     faceCrop.close();
 
-    const cache = await blobCache("face-crops");
-    await cache.put(faceID, blob);
+//     const cache = await blobCache("face-crops");
+//     await cache.put(faceID, blob);
 
-    return blob;
-};
+//     return blob;
+// };
 
-const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
-    const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
-    ensure(canvas.getContext("2d")).drawImage(imageBitmap, 0, 0);
-    return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
+// const imageBitmapToBlob = (imageBitmap: ImageBitmap) => {
+//     const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
+//     ensure(canvas.getContext("2d")).drawImage(imageBitmap, 0, 0);
+//     return canvas.convertToBlob({ type: "image/jpeg", quality: 0.8 });
+// };
+
+const unnormalizeBox = (imageBitmap: ImageBitmap, alignment: FaceAlignment) => {
+    const { width: imageWidth, height: imageHeight } = imageBitmap;
+
+    const obb = alignment.boundingBox;
+    // The faceBox is relative to the image size, and we need to convert
+    // them to absolute values first.
+    const faceX = obb.x * imageWidth;
+    const faceY = obb.y * imageHeight;
+    const faceWidth = obb.width * imageWidth;
+    const faceHeight = obb.height * imageHeight;
+
+    const bb = { x: faceX, y: faceY, width: faceWidth, height: faceHeight };
+    return bb;
 };
 
 const extractFaceCrop = (
     imageBitmap: ImageBitmap,
     alignment: FaceAlignment,
-): ImageBitmap => {
+) => {
     // TODO-ML: This algorithm is different from what is used by the mobile app.
     // Also, it needs to be something that can work fully using the embedding we
     // receive from remote - the `alignment.boundingBox` will not be available
@@ -152,7 +177,7 @@ const extractFaceCrop = (
         enlargedOutputBox.height,
     );
 
-    return offscreen.transferToImageBitmap();
+    return offscreen.convertToBlob({ type: "image/jpeg", quality: 0.8 });
 };
 
 /** Round all the components of the box. */
diff --git a/web/packages/new/photos/services/ml/face.ts b/web/packages/new/photos/services/ml/face.ts
index 92d178c46b..aaef3b922b 100644
--- a/web/packages/new/photos/services/ml/face.ts
+++ b/web/packages/new/photos/services/ml/face.ts
@@ -336,7 +336,7 @@ const indexFacesInBitmap = async (
 
     return partialResult.map(({ faceID, detection, score }, i) => ({
         faceID,
-        detection: normalizeToImageDimensions(detection, imageDimensions),
+        detection: normalizeByImageDimensions(detection, imageDimensions),
         score,
         blur: blurs[i]!,
         embedding: Array.from(embeddings[i]!),
@@ -629,7 +629,10 @@ export interface FaceAlignment {
  *
  * @param faceDetection A geometry indicating a face detected in an image.
  */
-const computeFaceAlignment = (faceDetection: FaceDetection): FaceAlignment =>
+// TODO-ML: Unexport?
+export const computeFaceAlignment = (
+    faceDetection: FaceDetection,
+): FaceAlignment =>
     computeFaceAlignmentUsingSimilarityTransform(
         faceDetection,
         normalizeLandmarks(idealMobileFaceNetLandmarks, mobileFaceNetFaceSize),
@@ -936,7 +939,7 @@ const computeEmbeddings = async (
 /**
  * Convert the coordinates to between 0-1, normalized by the image's dimensions.
  */
-const normalizeToImageDimensions = (
+const normalizeByImageDimensions = (
     faceDetection: FaceDetection,
     { width, height }: Dimensions,
 ): FaceDetection => {
@@ -953,3 +956,26 @@ const normalizeToImageDimensions = (
     }));
     return { box, landmarks };
 };
+
+/**
+ * Scale normalized coordinates from 0-1 back to the image's dimensions.
+ *
+ * Inverse of {@link normalizeByImageDimensions}.
+ */
+export const restoreToImageDimensions = (
+    faceDetection: FaceDetection,
+    { width, height }: Dimensions,
+): FaceDetection => {
+    const oldBox: Box = faceDetection.box;
+    const box = {
+        x: oldBox.x * width,
+        y: oldBox.y * height,
+        width: oldBox.width * width,
+        height: oldBox.height * height,
+    };
+    const landmarks = faceDetection.landmarks.map((l) => ({
+        x: l.x * width,
+        y: l.y * height,
+    }));
+    return { box, landmarks };
+};