From 95facd60e02c5a470b67b8d8a9b719179af41e23 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 31 Jul 2024 09:20:29 +0530 Subject: [PATCH] integrate wip 1 --- web/packages/base/types/ipc.ts | 53 ------- .../new/photos/services/ml/worker-rpc.ts | 131 ++++++++++++++++++ web/packages/new/photos/services/ml/worker.ts | 55 +------- 3 files changed, 134 insertions(+), 105 deletions(-) create mode 100644 web/packages/new/photos/services/ml/worker-rpc.ts diff --git a/web/packages/base/types/ipc.ts b/web/packages/base/types/ipc.ts index ed7877c966..4aca8b865d 100644 --- a/web/packages/base/types/ipc.ts +++ b/web/packages/base/types/ipc.ts @@ -348,59 +348,6 @@ export interface Electron { */ createMLWorker: () => void; - /** - * Return a CLIP embedding of the given image. - * - * See: [Note: Natural language search using CLIP] - * - * The input is a opaque float32 array representing the image. The layout - * and exact encoding of the input is specific to our implementation and the - * ML model (CLIP) we use. - * - * @returns A CLIP embedding (an array of 512 floating point values). - */ - computeCLIPImageEmbedding: (input: Float32Array) => Promise; - - /** - * Return a CLIP embedding of the given image if we already have the model - * downloaded and prepped. If the model is not available return `undefined`. - * - * This differs from the other sibling ML functions in that it doesn't wait - * for the model download to finish. It does trigger a model download, but - * then immediately returns `undefined`. At some future point, when the - * model downloaded finishes, calls to this function will start returning - * the result we seek. - * - * The reason for doing it in this asymmetric way is because CLIP text - * embeddings are used as part of deducing user initiated search results, - * and we don't want to block that interaction on a large network request. - * - * See: [Note: Natural language search using CLIP] - * - * @param text The string whose embedding we want to compute. - * - * @returns A CLIP embedding. - */ - computeCLIPTextEmbeddingIfAvailable: ( - text: string, - ) => Promise; - - /** - * Detect faces in the given image using YOLO. - * - * Both the input and output are opaque binary data whose internal structure - * is specific to our implementation and the model (YOLO) we use. - */ - detectFaces: (input: Float32Array) => Promise; - - /** - * Return a MobileFaceNet embeddings for the given faces. - * - * Both the input and output are opaque binary data whose internal structure - * is specific to our implementation and the model (MobileFaceNet) we use. - */ - computeFaceEmbeddings: (input: Float32Array) => Promise; - // - Watch /** diff --git a/web/packages/new/photos/services/ml/worker-rpc.ts b/web/packages/new/photos/services/ml/worker-rpc.ts new file mode 100644 index 0000000000..782c0a3bd3 --- /dev/null +++ b/web/packages/new/photos/services/ml/worker-rpc.ts @@ -0,0 +1,131 @@ +import { z } from "zod"; + +/** + * The port used to communicate with the Node.js ML worker process + * + * See: [Note: ML IPC] + * */ +let _port: MessagePort | undefined; + +/** + * Use the given {@link MessagePort} to communicate with the Node.js ML worker + * process. + */ +export const startUsingMessagePort = (port: MessagePort) => { + _port = port; + port.start(); +}; + +/** + * Return a CLIP embedding of the given image. + * + * See: [Note: Natural language search using CLIP] + * + * The input is a opaque float32 array representing the image. The layout + * and exact encoding of the input is specific to our implementation and the + * ML model (CLIP) we use. + * + * @returns A CLIP embedding (an array of 512 floating point values). + */ +export const computeCLIPImageEmbedding = ( + input: Float32Array, +): Promise => + ensureFloat32Array(electronMLWorker("computeCLIPImageEmbedding", input)); + +/** + * Return a CLIP embedding of the given image if we already have the model + * downloaded and prepped. If the model is not available return `undefined`. + * + * This differs from the other sibling ML functions in that it doesn't wait + * for the model download to finish. It does trigger a model download, but + * then immediately returns `undefined`. At some future point, when the + * model downloaded finishes, calls to this function will start returning + * the result we seek. + * + * The reason for doing it in this asymmetric way is because CLIP text + * embeddings are used as part of deducing user initiated search results, + * and we don't want to block that interaction on a large network request. + * + * See: [Note: Natural language search using CLIP] + * + * @param text The string whose embedding we want to compute. + * + * @returns A CLIP embedding. + */ +export const computeCLIPTextEmbeddingIfAvailable = async ( + text: string, +): Promise => + ensureOptionalFloat32Array( + electronMLWorker("computeCLIPTextEmbeddingIfAvailable", text), + ); + +/** + * Detect faces in the given image using YOLO. + * + * Both the input and output are opaque binary data whose internal structure + * is specific to our implementation and the model (YOLO) we use. + */ +export const detectFaces = (input: Float32Array): Promise => + ensureFloat32Array(electronMLWorker("detectFaces", input)); + +/** + * Return a MobileFaceNet embeddings for the given faces. + * + * Both the input and output are opaque binary data whose internal structure + * is specific to our implementation and the model (MobileFaceNet) we use. + */ +export const computeFaceEmbeddings = ( + input: Float32Array, +): Promise => + ensureFloat32Array(electronMLWorker("computeFaceEmbeddings", input)); + +const ensureFloat32Array = async ( + pu: Promise, +): Promise => { + const u = await pu; + if (u instanceof Float32Array) return u; + throw new Error(`Expected a Float32Array but instead got ${typeof u}`); +}; + +const ensureOptionalFloat32Array = async ( + pu: Promise, +): Promise => { + const u = await pu; + if (u === undefined) return u; + if (u instanceof Float32Array) return u; + throw new Error(`Expected a Float32Array but instead got ${typeof u}`); +}; + +/** + * Make a call to the ML worker running in the Node.js layer using our + * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol]. + */ +const electronMLWorker = async (method: string, p: string | Float32Array) => { + const port = _port; + if (!port) { + throw new Error( + "No MessagePort to communicate with Electron ML worker", + ); + } + + // Generate a unique nonce to identify this RPC interaction. + const id = Math.random(); + return new Promise((resolve, reject) => { + const handleMessage = (event: MessageEvent) => { + const response = RPCResponse.parse(event.data); + if (response.id != id) return; + port.removeEventListener("message", handleMessage); + const error = response.error; + if (error) reject(new Error(error)); + else resolve(response.result); + }; + port.addEventListener("message", handleMessage); + port.postMessage({ id, method, p }); + }); +}; + +const RPCResponse = z.object({ + id: z.number(), + result: z.any().optional(), + error: z.string().optional(), +}); diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 10e8f2fd7b..03b8adf2a2 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -9,7 +9,6 @@ import { ensure } from "@/utils/ensure"; import { wait } from "@/utils/promise"; import { DOMParser } from "@xmldom/xmldom"; import { expose } from "comlink"; -import { z } from "zod"; import downloadManager from "../download"; import { cmpNewLib2, extractRawExif } from "../exif"; import { getAllLocalFiles, getLocalTrashedFiles } from "../files"; @@ -33,6 +32,7 @@ import { type RemoteDerivedData, } from "./embedding"; import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face"; +import { startUsingMessagePort } from "./worker-rpc"; import type { MLWorkerDelegate, MLWorkerElectron } from "./worker-types"; const idleDurationStart = 5; /* 5 seconds */ @@ -47,18 +47,9 @@ interface IndexableItem { remoteDerivedData: RemoteDerivedData | undefined; } -/** - * The port used to communicate with the Node.js ML worker process - * - * See: [Note: ML IPC] - * */ -let _port: MessagePort | undefined; - globalThis.onmessage = (event: MessageEvent) => { - if (event.data == "createMLWorker/port") { - _port = event.ports[0]; - _port?.start(); - } + if (event.data == "createMLWorker/port") + startUsingMessagePort(ensure(event.ports[0])); }; /** @@ -128,12 +119,6 @@ export class MLWorker { // need to monkey patch it (This also ensures that it is not tree // shaken). globalThis.DOMParser = DOMParser; - - void (async () => { - console.log("yyy calling foo with 3"); - const res = await electronMLWorker("foo", "3"); - console.log("yyy calling foo with 3 result", res); - })(); } /** @@ -266,40 +251,6 @@ export class MLWorker { expose(MLWorker); -/** - * Make a call to the ML worker running in the Node.js layer using our - * hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol]. - */ -const electronMLWorker = async (method: string, p: string) => { - const port = _port; - if (!port) { - throw new Error( - "No MessagePort to communicate with Electron ML worker", - ); - } - - // Generate a unique nonce to identify this RPC interaction. - const id = Math.random(); - return new Promise((resolve, reject) => { - const handleMessage = (event: MessageEvent) => { - const response = RPCResponse.parse(event.data); - if (response.id != id) return; - port.removeEventListener("message", handleMessage); - const error = response.error; - if (error) reject(new Error(error)); - else resolve(response.result); - }; - port.addEventListener("message", handleMessage); - port.postMessage({ id, method, p }); - }); -}; - -const RPCResponse = z.object({ - id: z.number(), - result: z.any().optional(), - error: z.string().optional(), -}); - /** * Find out files which need to be indexed. Then index the next batch of them. *