Route via workers
This commit is contained in:
@@ -1,13 +1,12 @@
|
||||
import { isDesktop } from "@/base/app";
|
||||
import { ensureElectron } from "@/base/electron";
|
||||
import log from "@/base/log";
|
||||
import { FileType } from "@/media/file-type";
|
||||
import {
|
||||
clipMatches,
|
||||
isMLEnabled,
|
||||
isMLSupported,
|
||||
mlStatusSnapshot,
|
||||
} from "@/new/photos/services/ml";
|
||||
import { clipMatches } from "@/new/photos/services/ml/clip";
|
||||
import type { Person } from "@/new/photos/services/ml/people";
|
||||
import { EnteFile } from "@/new/photos/types/file";
|
||||
import * as chrono from "chrono-node";
|
||||
@@ -374,7 +373,7 @@ const searchClip = async (
|
||||
searchPhrase: string,
|
||||
): Promise<ClipSearchScores | undefined> => {
|
||||
if (!isMLEnabled()) return undefined;
|
||||
const matches = await clipMatches(searchPhrase, ensureElectron());
|
||||
const matches = await clipMatches(searchPhrase);
|
||||
log.debug(() => ["clip/scores", matches]);
|
||||
return matches;
|
||||
};
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import type { Electron, ElectronMLWorker } from "@/base/types/ipc";
|
||||
import type { ElectronMLWorker } from "@/base/types/ipc";
|
||||
import type { ImageBitmapAndData } from "./blob";
|
||||
import { clipIndexes } from "./db";
|
||||
import { pixelRGBBicubic } from "./image";
|
||||
import { dotProduct, norm } from "./math";
|
||||
import type { CLIPMatches } from "./worker-types";
|
||||
|
||||
/**
|
||||
* The version of the CLIP indexing pipeline implemented by the current client.
|
||||
@@ -166,26 +167,15 @@ const normalized = (embedding: Float32Array) => {
|
||||
};
|
||||
|
||||
/**
|
||||
* Use CLIP to perform a natural language search over image embeddings.
|
||||
*
|
||||
* @param searchPhrase The text entered by the user in the search box.
|
||||
*
|
||||
* @param electron The {@link Electron} instance to use to communicate with the
|
||||
* native code running in our desktop app (the embedding happens in the native
|
||||
* layer).
|
||||
*
|
||||
* It returns file (IDs) that should be shown in the search results. They're
|
||||
* returned as a map from fileIDs to the scores they got (higher is better).
|
||||
* This map will only contains entries whose score was above our minimum
|
||||
* threshold.
|
||||
* Find the files whose CLIP embedding "matches" the given {@link searchPhrase}.
|
||||
*
|
||||
* The result can also be `undefined`, which indicates that the download for the
|
||||
* ML model is still in progress (trying again later should succeed).
|
||||
*/
|
||||
export const clipMatches = async (
|
||||
searchPhrase: string,
|
||||
electron: Electron,
|
||||
): Promise<Map<number, number> | undefined> => {
|
||||
electron: ElectronMLWorker,
|
||||
): Promise<CLIPMatches | undefined> => {
|
||||
const t = await electron.computeCLIPTextEmbeddingIfAvailable(searchPhrase);
|
||||
if (!t) return undefined;
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ import type { UploadItem } from "../upload/types";
|
||||
import { regenerateFaceCrops } from "./crop";
|
||||
import { clearMLDB, faceIndex, indexableAndIndexedCounts } from "./db";
|
||||
import { MLWorker } from "./worker";
|
||||
import type { CLIPMatches } from "./worker-types";
|
||||
|
||||
/**
|
||||
* In-memory flag that tracks if ML is enabled.
|
||||
@@ -392,6 +393,22 @@ const setInterimScheduledStatus = () => {
|
||||
|
||||
const workerDidProcessFile = throttled(updateMLStatusSnapshot, 2000);
|
||||
|
||||
/**
|
||||
* Use CLIP to perform a natural language search over image embeddings.
|
||||
*
|
||||
* @param searchPhrase The text entered by the user in the search box.
|
||||
*
|
||||
* It returns file (IDs) that should be shown in the search results, along with
|
||||
* their scores.
|
||||
*
|
||||
* The result can also be `undefined`, which indicates that the download for the
|
||||
* ML model is still in progress (trying again later should succeed).
|
||||
*/
|
||||
export const clipMatches = (
|
||||
searchPhrase: string,
|
||||
): Promise<CLIPMatches | undefined> =>
|
||||
worker().then((w) => w.clipMatches(searchPhrase));
|
||||
|
||||
/**
|
||||
* Return the IDs of all the faces in the given {@link enteFile} that are not
|
||||
* associated with a person cluster.
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/**
|
||||
* @file Types for the objects shared (as a Comlink proxy) by the main thread
|
||||
* and the ML worker.
|
||||
* @file Types for the objects shared between the main thread and the ML worker.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -15,3 +14,14 @@ export interface MLWorkerDelegate {
|
||||
*/
|
||||
workerDidProcessFile: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* The result of file ids that should be considered as matches for a particular
|
||||
* search phrase, each with their associated score.
|
||||
*
|
||||
* This is a map of file (IDs) that should be shown in the search results.
|
||||
* They're returned as a map from fileIDs to the scores they got (higher is
|
||||
* better). This map will only contains entries whose score was above our
|
||||
* minimum threshold.
|
||||
*/
|
||||
export type CLIPMatches = Map<number, number>;
|
||||
|
||||
@@ -19,7 +19,12 @@ import {
|
||||
indexableBlobs,
|
||||
type ImageBitmapAndData,
|
||||
} from "./blob";
|
||||
import { clipIndexingVersion, indexCLIP, type CLIPIndex } from "./clip";
|
||||
import {
|
||||
clipIndexingVersion,
|
||||
clipMatches,
|
||||
indexCLIP,
|
||||
type CLIPIndex,
|
||||
} from "./clip";
|
||||
import { saveFaceCrops } from "./crop";
|
||||
import {
|
||||
indexableFileIDs,
|
||||
@@ -33,7 +38,7 @@ import {
|
||||
type RemoteDerivedData,
|
||||
} from "./embedding";
|
||||
import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
|
||||
import type { MLWorkerDelegate } from "./worker-types";
|
||||
import type { CLIPMatches, MLWorkerDelegate } from "./worker-types";
|
||||
|
||||
const idleDurationStart = 5; /* 5 seconds */
|
||||
const idleDurationMax = 16 * 60; /* 16 minutes */
|
||||
@@ -68,6 +73,9 @@ interface IndexableItem {
|
||||
* - "backfillq": fetching remote embeddings of unindexed items, and then
|
||||
* indexing them if needed,
|
||||
* - "idle": in between state transitions.
|
||||
*
|
||||
* In addition, MLWorker can also be invoked for interactive tasks: in
|
||||
* particular, for finding the closest CLIP match when the user does a search.
|
||||
*/
|
||||
export class MLWorker {
|
||||
private electron: ElectronMLWorker | undefined;
|
||||
@@ -178,6 +186,13 @@ export class MLWorker {
|
||||
return this.state == "indexing";
|
||||
}
|
||||
|
||||
/**
|
||||
* Find {@link CLIPMatches} for a given {@link searchPhrase}.
|
||||
*/
|
||||
async clipMatches(searchPhrase: string): Promise<CLIPMatches | undefined> {
|
||||
return clipMatches(searchPhrase, ensure(this.electron));
|
||||
}
|
||||
|
||||
private async tick() {
|
||||
log.debug(() => [
|
||||
"ml/tick",
|
||||
@@ -226,7 +241,7 @@ export class MLWorker {
|
||||
}
|
||||
|
||||
/** Return the next batch of items to backfill (if any). */
|
||||
async backfillQ() {
|
||||
private async backfillQ() {
|
||||
const userID = ensure(await getKVN("userID"));
|
||||
// Find files that our local DB thinks need syncing.
|
||||
const filesByID = await syncWithLocalFilesAndGetFilesToIndex(
|
||||
@@ -278,7 +293,8 @@ const indexNextBatch = async (
|
||||
try {
|
||||
await index(item, electron);
|
||||
delegate?.workerDidProcessFile();
|
||||
// Possibly unnecessary, but let us drain the microtask queue.
|
||||
// Let us drain the microtask queue. This also gives a chance for other
|
||||
// interactive tasks like `clipMatches` to run.
|
||||
await wait(0);
|
||||
} catch (e) {
|
||||
log.warn(`Skipping unindexable file ${item.enteFile.id}`, e);
|
||||
|
||||
Reference in New Issue
Block a user