Route via workers

This commit is contained in:
Manav Rathi
2024-07-31 12:30:15 +05:30
parent e55a7facc3
commit 5a3838be34
5 changed files with 56 additions and 24 deletions

View File

@@ -1,13 +1,12 @@
import { isDesktop } from "@/base/app";
import { ensureElectron } from "@/base/electron";
import log from "@/base/log";
import { FileType } from "@/media/file-type";
import {
clipMatches,
isMLEnabled,
isMLSupported,
mlStatusSnapshot,
} from "@/new/photos/services/ml";
import { clipMatches } from "@/new/photos/services/ml/clip";
import type { Person } from "@/new/photos/services/ml/people";
import { EnteFile } from "@/new/photos/types/file";
import * as chrono from "chrono-node";
@@ -374,7 +373,7 @@ const searchClip = async (
searchPhrase: string,
): Promise<ClipSearchScores | undefined> => {
if (!isMLEnabled()) return undefined;
const matches = await clipMatches(searchPhrase, ensureElectron());
const matches = await clipMatches(searchPhrase);
log.debug(() => ["clip/scores", matches]);
return matches;
};

View File

@@ -1,8 +1,9 @@
import type { Electron, ElectronMLWorker } from "@/base/types/ipc";
import type { ElectronMLWorker } from "@/base/types/ipc";
import type { ImageBitmapAndData } from "./blob";
import { clipIndexes } from "./db";
import { pixelRGBBicubic } from "./image";
import { dotProduct, norm } from "./math";
import type { CLIPMatches } from "./worker-types";
/**
* The version of the CLIP indexing pipeline implemented by the current client.
@@ -166,26 +167,15 @@ const normalized = (embedding: Float32Array) => {
};
/**
* Use CLIP to perform a natural language search over image embeddings.
*
* @param searchPhrase The text entered by the user in the search box.
*
* @param electron The {@link Electron} instance to use to communicate with the
* native code running in our desktop app (the embedding happens in the native
* layer).
*
* It returns file (IDs) that should be shown in the search results. They're
* returned as a map from fileIDs to the scores they got (higher is better).
* This map will only contains entries whose score was above our minimum
* threshold.
* Find the files whose CLIP embedding "matches" the given {@link searchPhrase}.
*
* The result can also be `undefined`, which indicates that the download for the
* ML model is still in progress (trying again later should succeed).
*/
export const clipMatches = async (
searchPhrase: string,
electron: Electron,
): Promise<Map<number, number> | undefined> => {
electron: ElectronMLWorker,
): Promise<CLIPMatches | undefined> => {
const t = await electron.computeCLIPTextEmbeddingIfAvailable(searchPhrase);
if (!t) return undefined;

View File

@@ -19,6 +19,7 @@ import type { UploadItem } from "../upload/types";
import { regenerateFaceCrops } from "./crop";
import { clearMLDB, faceIndex, indexableAndIndexedCounts } from "./db";
import { MLWorker } from "./worker";
import type { CLIPMatches } from "./worker-types";
/**
* In-memory flag that tracks if ML is enabled.
@@ -392,6 +393,22 @@ const setInterimScheduledStatus = () => {
const workerDidProcessFile = throttled(updateMLStatusSnapshot, 2000);
/**
* Use CLIP to perform a natural language search over image embeddings.
*
* @param searchPhrase The text entered by the user in the search box.
*
* It returns file (IDs) that should be shown in the search results, along with
* their scores.
*
* The result can also be `undefined`, which indicates that the download for the
* ML model is still in progress (trying again later should succeed).
*/
export const clipMatches = (
searchPhrase: string,
): Promise<CLIPMatches | undefined> =>
worker().then((w) => w.clipMatches(searchPhrase));
/**
* Return the IDs of all the faces in the given {@link enteFile} that are not
* associated with a person cluster.

View File

@@ -1,6 +1,5 @@
/**
* @file Types for the objects shared (as a Comlink proxy) by the main thread
* and the ML worker.
* @file Types for the objects shared between the main thread and the ML worker.
*/
/**
@@ -15,3 +14,14 @@ export interface MLWorkerDelegate {
*/
workerDidProcessFile: () => void;
}
/**
* The result of file ids that should be considered as matches for a particular
* search phrase, each with their associated score.
*
* This is a map of file (IDs) that should be shown in the search results.
* They're returned as a map from fileIDs to the scores they got (higher is
* better). This map will only contains entries whose score was above our
* minimum threshold.
*/
export type CLIPMatches = Map<number, number>;

View File

@@ -19,7 +19,12 @@ import {
indexableBlobs,
type ImageBitmapAndData,
} from "./blob";
import { clipIndexingVersion, indexCLIP, type CLIPIndex } from "./clip";
import {
clipIndexingVersion,
clipMatches,
indexCLIP,
type CLIPIndex,
} from "./clip";
import { saveFaceCrops } from "./crop";
import {
indexableFileIDs,
@@ -33,7 +38,7 @@ import {
type RemoteDerivedData,
} from "./embedding";
import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
import type { MLWorkerDelegate } from "./worker-types";
import type { CLIPMatches, MLWorkerDelegate } from "./worker-types";
const idleDurationStart = 5; /* 5 seconds */
const idleDurationMax = 16 * 60; /* 16 minutes */
@@ -68,6 +73,9 @@ interface IndexableItem {
* - "backfillq": fetching remote embeddings of unindexed items, and then
* indexing them if needed,
* - "idle": in between state transitions.
*
* In addition, MLWorker can also be invoked for interactive tasks: in
* particular, for finding the closest CLIP match when the user does a search.
*/
export class MLWorker {
private electron: ElectronMLWorker | undefined;
@@ -178,6 +186,13 @@ export class MLWorker {
return this.state == "indexing";
}
/**
* Find {@link CLIPMatches} for a given {@link searchPhrase}.
*/
async clipMatches(searchPhrase: string): Promise<CLIPMatches | undefined> {
return clipMatches(searchPhrase, ensure(this.electron));
}
private async tick() {
log.debug(() => [
"ml/tick",
@@ -226,7 +241,7 @@ export class MLWorker {
}
/** Return the next batch of items to backfill (if any). */
async backfillQ() {
private async backfillQ() {
const userID = ensure(await getKVN("userID"));
// Find files that our local DB thinks need syncing.
const filesByID = await syncWithLocalFilesAndGetFilesToIndex(
@@ -278,7 +293,8 @@ const indexNextBatch = async (
try {
await index(item, electron);
delegate?.workerDidProcessFile();
// Possibly unnecessary, but let us drain the microtask queue.
// Let us drain the microtask queue. This also gives a chance for other
// interactive tasks like `clipMatches` to run.
await wait(0);
} catch (e) {
log.warn(`Skipping unindexable file ${item.enteFile.id}`, e);