integrate wip 1

2024-07-31 09:20:29 +05:30
parent 7baacc6a77
commit 95facd60e0
3 changed files with 134 additions and 105 deletions
--- a/web/packages/base/types/ipc.ts
+++ b/web/packages/base/types/ipc.ts
@@ -348,59 +348,6 @@ export interface Electron {
     */
    createMLWorker: () => void;

-    /**
-     * Return a CLIP embedding of the given image.
-     *
-     * See: [Note: Natural language search using CLIP]
-     *
-     * The input is a opaque float32 array representing the image. The layout
-     * and exact encoding of the input is specific to our implementation and the
-     * ML model (CLIP) we use.
-     *
-     * @returns A CLIP embedding (an array of 512 floating point values).
-     */
-    computeCLIPImageEmbedding: (input: Float32Array) => Promise<Float32Array>;
-
-    /**
-     * Return a CLIP embedding of the given image if we already have the model
-     * downloaded and prepped. If the model is not available return `undefined`.
-     *
-     * This differs from the other sibling ML functions in that it doesn't wait
-     * for the model download to finish. It does trigger a model download, but
-     * then immediately returns `undefined`. At some future point, when the
-     * model downloaded finishes, calls to this function will start returning
-     * the result we seek.
-     *
-     * The reason for doing it in this asymmetric way is because CLIP text
-     * embeddings are used as part of deducing user initiated search results,
-     * and we don't want to block that interaction on a large network request.
-     *
-     * See: [Note: Natural language search using CLIP]
-     *
-     * @param text The string whose embedding we want to compute.
-     *
-     * @returns A CLIP embedding.
-     */
-    computeCLIPTextEmbeddingIfAvailable: (
-        text: string,
-    ) => Promise<Float32Array | undefined>;
-
-    /**
-     * Detect faces in the given image using YOLO.
-     *
-     * Both the input and output are opaque binary data whose internal structure
-     * is specific to our implementation and the model (YOLO) we use.
-     */
-    detectFaces: (input: Float32Array) => Promise<Float32Array>;
-
-    /**
-     * Return a MobileFaceNet embeddings for the given faces.
-     *
-     * Both the input and output are opaque binary data whose internal structure
-     * is specific to our implementation and the model (MobileFaceNet) we use.
-     */
-    computeFaceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
-
    // - Watch

    /**
--- a/web/packages/new/photos/services/ml/worker-rpc.ts
+++ b/web/packages/new/photos/services/ml/worker-rpc.ts
@@ -0,0 +1,131 @@
+import { z } from "zod";
+
+/**
+ * The port used to communicate with the Node.js ML worker process
+ *
+ * See: [Note: ML IPC]
+ * */
+let _port: MessagePort | undefined;
+
+/**
+ * Use the given {@link MessagePort} to communicate with the Node.js ML worker
+ * process.
+ */
+export const startUsingMessagePort = (port: MessagePort) => {
+    _port = port;
+    port.start();
+};
+
+/**
+ * Return a CLIP embedding of the given image.
+ *
+ * See: [Note: Natural language search using CLIP]
+ *
+ * The input is a opaque float32 array representing the image. The layout
+ * and exact encoding of the input is specific to our implementation and the
+ * ML model (CLIP) we use.
+ *
+ * @returns A CLIP embedding (an array of 512 floating point values).
+ */
+export const computeCLIPImageEmbedding = (
+    input: Float32Array,
+): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("computeCLIPImageEmbedding", input));
+
+/**
+ * Return a CLIP embedding of the given image if we already have the model
+ * downloaded and prepped. If the model is not available return `undefined`.
+ *
+ * This differs from the other sibling ML functions in that it doesn't wait
+ * for the model download to finish. It does trigger a model download, but
+ * then immediately returns `undefined`. At some future point, when the
+ * model downloaded finishes, calls to this function will start returning
+ * the result we seek.
+ *
+ * The reason for doing it in this asymmetric way is because CLIP text
+ * embeddings are used as part of deducing user initiated search results,
+ * and we don't want to block that interaction on a large network request.
+ *
+ * See: [Note: Natural language search using CLIP]
+ *
+ * @param text The string whose embedding we want to compute.
+ *
+ * @returns A CLIP embedding.
+ */
+export const computeCLIPTextEmbeddingIfAvailable = async (
+    text: string,
+): Promise<Float32Array | undefined> =>
+    ensureOptionalFloat32Array(
+        electronMLWorker("computeCLIPTextEmbeddingIfAvailable", text),
+    );
+
+/**
+ * Detect faces in the given image using YOLO.
+ *
+ * Both the input and output are opaque binary data whose internal structure
+ * is specific to our implementation and the model (YOLO) we use.
+ */
+export const detectFaces = (input: Float32Array): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("detectFaces", input));
+
+/**
+ * Return a MobileFaceNet embeddings for the given faces.
+ *
+ * Both the input and output are opaque binary data whose internal structure
+ * is specific to our implementation and the model (MobileFaceNet) we use.
+ */
+export const computeFaceEmbeddings = (
+    input: Float32Array,
+): Promise<Float32Array> =>
+    ensureFloat32Array(electronMLWorker("computeFaceEmbeddings", input));
+
+const ensureFloat32Array = async (
+    pu: Promise<unknown>,
+): Promise<Float32Array> => {
+    const u = await pu;
+    if (u instanceof Float32Array) return u;
+    throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
+};
+
+const ensureOptionalFloat32Array = async (
+    pu: Promise<unknown>,
+): Promise<Float32Array | undefined> => {
+    const u = await pu;
+    if (u === undefined) return u;
+    if (u instanceof Float32Array) return u;
+    throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
+};
+
+/**
+ * Make a call to the ML worker running in the Node.js layer using our
+ * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
+ */
+const electronMLWorker = async (method: string, p: string | Float32Array) => {
+    const port = _port;
+    if (!port) {
+        throw new Error(
+            "No MessagePort to communicate with Electron ML worker",
+        );
+    }
+
+    // Generate a unique nonce to identify this RPC interaction.
+    const id = Math.random();
+    return new Promise((resolve, reject) => {
+        const handleMessage = (event: MessageEvent) => {
+            const response = RPCResponse.parse(event.data);
+            if (response.id != id) return;
+            port.removeEventListener("message", handleMessage);
+            const error = response.error;
+            if (error) reject(new Error(error));
+            else resolve(response.result);
+        };
+        port.addEventListener("message", handleMessage);
+        port.postMessage({ id, method, p });
+    });
+};
+
+const RPCResponse = z.object({
+    id: z.number(),
+    result: z.any().optional(),
+    error: z.string().optional(),
+});
--- a/web/packages/new/photos/services/ml/worker.ts
+++ b/web/packages/new/photos/services/ml/worker.ts
@@ -9,7 +9,6 @@ import { ensure } from "@/utils/ensure";
 import { wait } from "@/utils/promise";
 import { DOMParser } from "@xmldom/xmldom";
 import { expose } from "comlink";
-import { z } from "zod";
 import downloadManager from "../download";
 import { cmpNewLib2, extractRawExif } from "../exif";
 import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
@@ -33,6 +32,7 @@ import {
    type RemoteDerivedData,
 } from "./embedding";
 import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
+import { startUsingMessagePort } from "./worker-rpc";
 import type { MLWorkerDelegate, MLWorkerElectron } from "./worker-types";

 const idleDurationStart = 5; /* 5 seconds */
@@ -47,18 +47,9 @@ interface IndexableItem {
    remoteDerivedData: RemoteDerivedData | undefined;
 }

-/**
- * The port used to communicate with the Node.js ML worker process
- *
- * See: [Note: ML IPC]
- * */
-let _port: MessagePort | undefined;
-
 globalThis.onmessage = (event: MessageEvent) => {
-    if (event.data == "createMLWorker/port") {
-        _port = event.ports[0];
-        _port?.start();
-    }
+    if (event.data == "createMLWorker/port")
+        startUsingMessagePort(ensure(event.ports[0]));
 };

 /**
@@ -128,12 +119,6 @@ export class MLWorker {
        // need to monkey patch it (This also ensures that it is not tree
        // shaken).
        globalThis.DOMParser = DOMParser;
-
-        void (async () => {
-            console.log("yyy calling foo with 3");
-            const res = await electronMLWorker("foo", "3");
-            console.log("yyy calling foo with 3 result", res);
-        })();
    }

    /**
@@ -266,40 +251,6 @@ export class MLWorker {

 expose(MLWorker);

-/**
- * Make a call to the ML worker running in the Node.js layer using our
- * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
- */
-const electronMLWorker = async (method: string, p: string) => {
-    const port = _port;
-    if (!port) {
-        throw new Error(
-            "No MessagePort to communicate with Electron ML worker",
-        );
-    }
-
-    // Generate a unique nonce to identify this RPC interaction.
-    const id = Math.random();
-    return new Promise((resolve, reject) => {
-        const handleMessage = (event: MessageEvent) => {
-            const response = RPCResponse.parse(event.data);
-            if (response.id != id) return;
-            port.removeEventListener("message", handleMessage);
-            const error = response.error;
-            if (error) reject(new Error(error));
-            else resolve(response.result);
-        };
-        port.addEventListener("message", handleMessage);
-        port.postMessage({ id, method, p });
-    });
-};
-
-const RPCResponse = z.object({
-    id: z.number(),
-    result: z.any().optional(),
-    error: z.string().optional(),
-});
-
 /**
 * Find out files which need to be indexed. Then index the next batch of them.
 *