From 95facd60e02c5a470b67b8d8a9b719179af41e23 Mon Sep 17 00:00:00 2001
From: Manav Rathi <manav@mrmr.io>
Date: Wed, 31 Jul 2024 09:20:29 +0530
Subject: [PATCH] integrate wip 1

---
 web/packages/base/types/ipc.ts                |  53 -------
 .../new/photos/services/ml/worker-rpc.ts      | 131 ++++++++++++++++++
 web/packages/new/photos/services/ml/worker.ts |  55 +-------
 3 files changed, 134 insertions(+), 105 deletions(-)
 create mode 100644 web/packages/new/photos/services/ml/worker-rpc.ts
diff --git a/web/packages/base/types/ipc.ts b/web/packages/base/types/ipc.ts
index ed7877c966..4aca8b865d 100644
--- a/web/packages/base/types/ipc.ts
+++ b/web/packages/base/types/ipc.ts
@@ -348,59 +348,6 @@ export interface Electron {
      */
     createMLWorker: () => void;
 
-    /**
-     * Return a CLIP embedding of the given image.
-     *
-     * See: [Note: Natural language search using CLIP]
-     *
-     * The input is a opaque float32 array representing the image. The layout
-     * and exact encoding of the input is specific to our implementation and the
-     * ML model (CLIP) we use.
-     *
-     * @returns A CLIP embedding (an array of 512 floating point values).
-     */
-    computeCLIPImageEmbedding: (input: Float32Array) => Promise<Float32Array>;
-
-    /**
-     * Return a CLIP embedding of the given image if we already have the model
-     * downloaded and prepped. If the model is not available return `undefined`.
-     *
-     * This differs from the other sibling ML functions in that it doesn't wait
-     * for the model download to finish. It does trigger a model download, but
-     * then immediately returns `undefined`. At some future point, when the
-     * model downloaded finishes, calls to this function will start returning
-     * the result we seek.
-     *
-     * The reason for doing it in this asymmetric way is because CLIP text
-     * embeddings are used as part of deducing user initiated search results,
-     * and we don't want to block that interaction on a large network request.
-     *
-     * See: [Note: Natural language search using CLIP]
-     *
-     * @param text The string whose embedding we want to compute.
-     *
-     * @returns A CLIP embedding.
-     */
-    computeCLIPTextEmbeddingIfAvailable: (
-        text: string,
-    ) => Promise<Float32Array | undefined>;
-
-    /**
-     * Detect faces in the given image using YOLO.
-     *
-     * Both the input and output are opaque binary data whose internal structure
-     * is specific to our implementation and the model (YOLO) we use.
-     */
-    detectFaces: (input: Float32Array) => Promise<Float32Array>;
-
-    /**
-     * Return a MobileFaceNet embeddings for the given faces.
-     *
-     * Both the input and output are opaque binary data whose internal structure
-     * is specific to our implementation and the model (MobileFaceNet) we use.
-     */
-    computeFaceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
-
     // - Watch
 
     /**
diff --git a/web/packages/new/photos/services/ml/worker-rpc.ts b/web/packages/new/photos/services/ml/worker-rpc.ts
new file mode 100644
index 0000000000..782c0a3bd3
--- /dev/null
+++ b/web/packages/new/photos/services/ml/worker-rpc.ts
@@ -0,0 +1,131 @@
+import { z } from "zod";
+
+/**
+ * The port used to communicate with the Node.js ML worker process
+ *
+ * See: [Note: ML IPC]
+ * */
+let _port: MessagePort | undefined;
+
+/**
+ * Use the given {@link MessagePort} to communicate with the Node.js ML worker
+ * process.
+ */
+export const startUsingMessagePort = (port: MessagePort) => {
+    _port = port;
+    port.start();
+};
+
+/**
+ * Return a CLIP embedding of the given image.
+ *
+ * See: [Note: Natural language search using CLIP]
+ *
+ * The input is a opaque float32 array representing the image. The layout
+ * and exact encoding of the input is specific to our implementation and the
+ * ML model (CLIP) we use.
+ *
+ * @returns A CLIP embedding (an array of 512 floating point values).
+ */
+export const computeCLIPImageEmbedding = (
+    input: Float32Array,
+): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("computeCLIPImageEmbedding", input));
+
+/**
+ * Return a CLIP embedding of the given image if we already have the model
+ * downloaded and prepped. If the model is not available return `undefined`.
+ *
+ * This differs from the other sibling ML functions in that it doesn't wait
+ * for the model download to finish. It does trigger a model download, but
+ * then immediately returns `undefined`. At some future point, when the
+ * model downloaded finishes, calls to this function will start returning
+ * the result we seek.
+ *
+ * The reason for doing it in this asymmetric way is because CLIP text
+ * embeddings are used as part of deducing user initiated search results,
+ * and we don't want to block that interaction on a large network request.
+ *
+ * See: [Note: Natural language search using CLIP]
+ *
+ * @param text The string whose embedding we want to compute.
+ *
+ * @returns A CLIP embedding.
+ */
+export const computeCLIPTextEmbeddingIfAvailable = async (
+    text: string,
+): Promise<Float32Array | undefined> =>
+    ensureOptionalFloat32Array(
+        electronMLWorker("computeCLIPTextEmbeddingIfAvailable", text),
+    );
+
+/**
+ * Detect faces in the given image using YOLO.
+ *
+ * Both the input and output are opaque binary data whose internal structure
+ * is specific to our implementation and the model (YOLO) we use.
+ */
+export const detectFaces = (input: Float32Array): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("detectFaces", input));
+
+/**
+ * Return a MobileFaceNet embeddings for the given faces.
+ *
+ * Both the input and output are opaque binary data whose internal structure
+ * is specific to our implementation and the model (MobileFaceNet) we use.
+ */
+export const computeFaceEmbeddings = (
+    input: Float32Array,
+): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("computeFaceEmbeddings", input));
+
+const ensureFloat32Array = async (
+    pu: Promise<unknown>,
+): Promise<Float32Array> => {
+    const u = await pu;
+    if (u instanceof Float32Array) return u;
+    throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
+};
+
+const ensureOptionalFloat32Array = async (
+    pu: Promise<unknown>,
+): Promise<Float32Array | undefined> => {
+    const u = await pu;
+    if (u === undefined) return u;
+    if (u instanceof Float32Array) return u;
+    throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
+};
+
+/**
+ * Make a call to the ML worker running in the Node.js layer using our
+ * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
+ */
+const electronMLWorker = async (method: string, p: string | Float32Array) => {
+    const port = _port;
+    if (!port) {
+        throw new Error(
+            "No MessagePort to communicate with Electron ML worker",
+        );
+    }
+
+    // Generate a unique nonce to identify this RPC interaction.
+    const id = Math.random();
+    return new Promise((resolve, reject) => {
+        const handleMessage = (event: MessageEvent) => {
+            const response = RPCResponse.parse(event.data);
+            if (response.id != id) return;
+            port.removeEventListener("message", handleMessage);
+            const error = response.error;
+            if (error) reject(new Error(error));
+            else resolve(response.result);
+        };
+        port.addEventListener("message", handleMessage);
+        port.postMessage({ id, method, p });
+    });
+};
+
+const RPCResponse = z.object({
+    id: z.number(),
+    result: z.any().optional(),
+    error: z.string().optional(),
+});
diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts
index 10e8f2fd7b..03b8adf2a2 100644
--- a/web/packages/new/photos/services/ml/worker.ts
+++ b/web/packages/new/photos/services/ml/worker.ts
@@ -9,7 +9,6 @@ import { ensure } from "@/utils/ensure";
 import { wait } from "@/utils/promise";
 import { DOMParser } from "@xmldom/xmldom";
 import { expose } from "comlink";
-import { z } from "zod";
 import downloadManager from "../download";
 import { cmpNewLib2, extractRawExif } from "../exif";
 import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
@@ -33,6 +32,7 @@ import {
     type RemoteDerivedData,
 } from "./embedding";
 import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
+import { startUsingMessagePort } from "./worker-rpc";
 import type { MLWorkerDelegate, MLWorkerElectron } from "./worker-types";
 
 const idleDurationStart = 5; /* 5 seconds */
@@ -47,18 +47,9 @@ interface IndexableItem {
     remoteDerivedData: RemoteDerivedData | undefined;
 }
 
-/**
- * The port used to communicate with the Node.js ML worker process
- *
- * See: [Note: ML IPC]
- * */
-let _port: MessagePort | undefined;
-
 globalThis.onmessage = (event: MessageEvent) => {
-    if (event.data == "createMLWorker/port") {
-        _port = event.ports[0];
-        _port?.start();
-    }
+    if (event.data == "createMLWorker/port")
+        startUsingMessagePort(ensure(event.ports[0]));
 };
 
 /**
@@ -128,12 +119,6 @@ export class MLWorker {
         // need to monkey patch it (This also ensures that it is not tree
         // shaken).
         globalThis.DOMParser = DOMParser;
-
-        void (async () => {
-            console.log("yyy calling foo with 3");
-            const res = await electronMLWorker("foo", "3");
-            console.log("yyy calling foo with 3 result", res);
-        })();
     }
 
     /**
@@ -266,40 +251,6 @@ export class MLWorker {
 
 expose(MLWorker);
 
-/**
- * Make a call to the ML worker running in the Node.js layer using our
- * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
- */
-const electronMLWorker = async (method: string, p: string) => {
-    const port = _port;
-    if (!port) {
-        throw new Error(
-            "No MessagePort to communicate with Electron ML worker",
-        );
-    }
-
-    // Generate a unique nonce to identify this RPC interaction.
-    const id = Math.random();
-    return new Promise((resolve, reject) => {
-        const handleMessage = (event: MessageEvent) => {
-            const response = RPCResponse.parse(event.data);
-            if (response.id != id) return;
-            port.removeEventListener("message", handleMessage);
-            const error = response.error;
-            if (error) reject(new Error(error));
-            else resolve(response.result);
-        };
-        port.addEventListener("message", handleMessage);
-        port.postMessage({ id, method, p });
-    });
-};
-
-const RPCResponse = z.object({
-    id: z.number(),
-    result: z.any().optional(),
-    error: z.string().optional(),
-});
-
 /**
  * Find out files which need to be indexed. Then index the next batch of them.
  *