integrate wip 1

This commit is contained in:
Manav Rathi
2024-07-31 09:20:29 +05:30
parent 7baacc6a77
commit 95facd60e0
3 changed files with 134 additions and 105 deletions

View File

@@ -348,59 +348,6 @@ export interface Electron {
*/
createMLWorker: () => void;
/**
* Return a CLIP embedding of the given image.
*
* See: [Note: Natural language search using CLIP]
*
* The input is a opaque float32 array representing the image. The layout
* and exact encoding of the input is specific to our implementation and the
* ML model (CLIP) we use.
*
* @returns A CLIP embedding (an array of 512 floating point values).
*/
computeCLIPImageEmbedding: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a CLIP embedding of the given image if we already have the model
* downloaded and prepped. If the model is not available return `undefined`.
*
* This differs from the other sibling ML functions in that it doesn't wait
* for the model download to finish. It does trigger a model download, but
* then immediately returns `undefined`. At some future point, when the
* model downloaded finishes, calls to this function will start returning
* the result we seek.
*
* The reason for doing it in this asymmetric way is because CLIP text
* embeddings are used as part of deducing user initiated search results,
* and we don't want to block that interaction on a large network request.
*
* See: [Note: Natural language search using CLIP]
*
* @param text The string whose embedding we want to compute.
*
* @returns A CLIP embedding.
*/
computeCLIPTextEmbeddingIfAvailable: (
text: string,
) => Promise<Float32Array | undefined>;
/**
* Detect faces in the given image using YOLO.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (YOLO) we use.
*/
detectFaces: (input: Float32Array) => Promise<Float32Array>;
/**
* Return a MobileFaceNet embeddings for the given faces.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (MobileFaceNet) we use.
*/
computeFaceEmbeddings: (input: Float32Array) => Promise<Float32Array>;
// - Watch
/**

View File

@@ -0,0 +1,131 @@
import { z } from "zod";
/**
* The port used to communicate with the Node.js ML worker process
*
* See: [Note: ML IPC]
* */
let _port: MessagePort | undefined;
/**
* Use the given {@link MessagePort} to communicate with the Node.js ML worker
* process.
*/
export const startUsingMessagePort = (port: MessagePort) => {
_port = port;
port.start();
};
/**
* Return a CLIP embedding of the given image.
*
* See: [Note: Natural language search using CLIP]
*
* The input is a opaque float32 array representing the image. The layout
* and exact encoding of the input is specific to our implementation and the
* ML model (CLIP) we use.
*
* @returns A CLIP embedding (an array of 512 floating point values).
*/
export const computeCLIPImageEmbedding = (
input: Float32Array,
): Promise<Float32Array> =>
ensureFloat32Array(electronMLWorker("computeCLIPImageEmbedding", input));
/**
* Return a CLIP embedding of the given image if we already have the model
* downloaded and prepped. If the model is not available return `undefined`.
*
* This differs from the other sibling ML functions in that it doesn't wait
* for the model download to finish. It does trigger a model download, but
* then immediately returns `undefined`. At some future point, when the
* model downloaded finishes, calls to this function will start returning
* the result we seek.
*
* The reason for doing it in this asymmetric way is because CLIP text
* embeddings are used as part of deducing user initiated search results,
* and we don't want to block that interaction on a large network request.
*
* See: [Note: Natural language search using CLIP]
*
* @param text The string whose embedding we want to compute.
*
* @returns A CLIP embedding.
*/
export const computeCLIPTextEmbeddingIfAvailable = async (
text: string,
): Promise<Float32Array | undefined> =>
ensureOptionalFloat32Array(
electronMLWorker("computeCLIPTextEmbeddingIfAvailable", text),
);
/**
* Detect faces in the given image using YOLO.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (YOLO) we use.
*/
export const detectFaces = (input: Float32Array): Promise<Float32Array> =>
ensureFloat32Array(electronMLWorker("detectFaces", input));
/**
* Return a MobileFaceNet embeddings for the given faces.
*
* Both the input and output are opaque binary data whose internal structure
* is specific to our implementation and the model (MobileFaceNet) we use.
*/
export const computeFaceEmbeddings = (
input: Float32Array,
): Promise<Float32Array> =>
ensureFloat32Array(electronMLWorker("computeFaceEmbeddings", input));
const ensureFloat32Array = async (
pu: Promise<unknown>,
): Promise<Float32Array> => {
const u = await pu;
if (u instanceof Float32Array) return u;
throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
};
const ensureOptionalFloat32Array = async (
pu: Promise<unknown>,
): Promise<Float32Array | undefined> => {
const u = await pu;
if (u === undefined) return u;
if (u instanceof Float32Array) return u;
throw new Error(`Expected a Float32Array but instead got ${typeof u}`);
};
/**
* Make a call to the ML worker running in the Node.js layer using our
* hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
*/
const electronMLWorker = async (method: string, p: string | Float32Array) => {
const port = _port;
if (!port) {
throw new Error(
"No MessagePort to communicate with Electron ML worker",
);
}
// Generate a unique nonce to identify this RPC interaction.
const id = Math.random();
return new Promise((resolve, reject) => {
const handleMessage = (event: MessageEvent) => {
const response = RPCResponse.parse(event.data);
if (response.id != id) return;
port.removeEventListener("message", handleMessage);
const error = response.error;
if (error) reject(new Error(error));
else resolve(response.result);
};
port.addEventListener("message", handleMessage);
port.postMessage({ id, method, p });
});
};
const RPCResponse = z.object({
id: z.number(),
result: z.any().optional(),
error: z.string().optional(),
});

View File

@@ -9,7 +9,6 @@ import { ensure } from "@/utils/ensure";
import { wait } from "@/utils/promise";
import { DOMParser } from "@xmldom/xmldom";
import { expose } from "comlink";
import { z } from "zod";
import downloadManager from "../download";
import { cmpNewLib2, extractRawExif } from "../exif";
import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
@@ -33,6 +32,7 @@ import {
type RemoteDerivedData,
} from "./embedding";
import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
import { startUsingMessagePort } from "./worker-rpc";
import type { MLWorkerDelegate, MLWorkerElectron } from "./worker-types";
const idleDurationStart = 5; /* 5 seconds */
@@ -47,18 +47,9 @@ interface IndexableItem {
remoteDerivedData: RemoteDerivedData | undefined;
}
/**
* The port used to communicate with the Node.js ML worker process
*
* See: [Note: ML IPC]
* */
let _port: MessagePort | undefined;
globalThis.onmessage = (event: MessageEvent) => {
if (event.data == "createMLWorker/port") {
_port = event.ports[0];
_port?.start();
}
if (event.data == "createMLWorker/port")
startUsingMessagePort(ensure(event.ports[0]));
};
/**
@@ -128,12 +119,6 @@ export class MLWorker {
// need to monkey patch it (This also ensures that it is not tree
// shaken).
globalThis.DOMParser = DOMParser;
void (async () => {
console.log("yyy calling foo with 3");
const res = await electronMLWorker("foo", "3");
console.log("yyy calling foo with 3 result", res);
})();
}
/**
@@ -266,40 +251,6 @@ export class MLWorker {
expose(MLWorker);
/**
* Make a call to the ML worker running in the Node.js layer using our
* hand-rolled RPC protocol. See: [Note: Node.js ML worker RPC protocol].
*/
const electronMLWorker = async (method: string, p: string) => {
const port = _port;
if (!port) {
throw new Error(
"No MessagePort to communicate with Electron ML worker",
);
}
// Generate a unique nonce to identify this RPC interaction.
const id = Math.random();
return new Promise((resolve, reject) => {
const handleMessage = (event: MessageEvent) => {
const response = RPCResponse.parse(event.data);
if (response.id != id) return;
port.removeEventListener("message", handleMessage);
const error = response.error;
if (error) reject(new Error(error));
else resolve(response.result);
};
port.addEventListener("message", handleMessage);
port.postMessage({ id, method, p });
});
};
const RPCResponse = z.object({
id: z.number(),
result: z.any().optional(),
error: z.string().optional(),
});
/**
* Find out files which need to be indexed. Then index the next batch of them.
*