From fed7864b11aed78bf0d20c2b1ab0a1f5088773df Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 13:45:35 +0530 Subject: [PATCH 1/6] Fetching state --- web/apps/photos/src/services/searchService.ts | 4 ++ .../new/photos/components/MLSettings.tsx | 14 +++++-- web/packages/new/photos/services/ml/index.ts | 10 +++-- web/packages/new/photos/services/ml/worker.ts | 38 +++++++++++++++---- 4 files changed, 51 insertions(+), 15 deletions(-) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 987a4cdacb..1be9fa01fd 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -193,6 +193,10 @@ export async function getMLStatusSuggestion(): Promise { case "indexing": label = t("indexing_photos", status); break; + case "fetching": + // label = pt("Fetching"indexing_photos", status); + label = `Fetching indexes (${status.nSyncedFiles} / ${status.nTotalFiles})`; + break; case "clustering": label = t("indexing_people", status); break; diff --git a/web/packages/new/photos/components/MLSettings.tsx b/web/packages/new/photos/components/MLSettings.tsx index 581869e0bd..87613912d7 100644 --- a/web/packages/new/photos/components/MLSettings.tsx +++ b/web/packages/new/photos/components/MLSettings.tsx @@ -1,6 +1,7 @@ import { EnteDrawer } from "@/base/components/EnteDrawer"; import { MenuItemGroup } from "@/base/components/Menu"; import { Titlebar } from "@/base/components/Titlebar"; +import { pt } from "@/base/i18n"; import log from "@/base/log"; import { disableML, @@ -299,12 +300,15 @@ const ManageML: React.FC = ({ let status: string; switch (phase) { - case "indexing": - status = "running"; - break; case "scheduled": status = "scheduled"; break; + case "fetching": + status = "fetching"; + break; + case "indexing": + status = "running"; + break; // TODO: Clustering default: status = "done"; @@ -352,7 +356,9 @@ const ManageML: React.FC = ({ {t("indexing")} - {t("indexing_status", { context: status })} + {status == "fetching" + ? pt("Fetching") + : t("indexing_status", { context: status })} diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index f6e0be04e6..e4c56a4569 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -404,13 +404,16 @@ export type MLStatus = * * - "indexing": The indexer is currently running. * + * - "fetching": The indexer is currently running, but we're primarily + * fetching indexes for existing files. + * * - "clustering": All file we know of have been indexed, and we are now * clustering the faces that were found. * * - "done": ML indexing and face clustering is complete for the user's * library. */ - phase: "scheduled" | "indexing" | "clustering" | "done"; + phase: "scheduled" | "indexing" | "fetching" | "clustering" | "done"; /** The number of files that have already been indexed. */ nSyncedFiles: number; /** The total number of files that are eligible for indexing. */ @@ -478,8 +481,9 @@ const getMLStatus = async (): Promise => { let phase: MLStatus["phase"]; if (indexableCount > 0) { - const isIndexing = await (await worker()).isIndexing(); - phase = !isIndexing ? "scheduled" : "indexing"; + const state = await (await worker()).state; + phase = + state == "indexing" || state == "fetching" ? state : "scheduled"; } else { phase = "done"; } diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 7e6825ad9b..79520cc076 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -1,4 +1,5 @@ import { clientPackageName } from "@/base/app"; +import { assertionFailed } from "@/base/assert"; import { isHTTP4xxError } from "@/base/http"; import { getKVN } from "@/base/kv"; import { ensureAuthToken } from "@/base/local-user"; @@ -39,6 +40,20 @@ import { } from "./ml-data"; import type { CLIPMatches, MLWorkerDelegate } from "./worker-types"; +/** + * A rough hint at what the worker is up to. + * + * - "idle": Not doing anything + * - "tick": Transitioning to a new state + * - "indexing": Indexing + * - "fetching": A subset of indexing + * + * During indexing, the state is set to "fetching" whenever remote provided us + * data for more than 50% of the files that we requested from it in the last + * fetch during indexing. + */ +export type WorkerState = "idle" | "tick" | "indexing" | "fetching"; + const idleDurationStart = 5; /* 5 seconds */ const idleDurationMax = 16 * 60; /* 16 minutes */ @@ -76,9 +91,11 @@ interface IndexableItem { * particular, for finding the closest CLIP match when the user does a search. */ export class MLWorker { + /** The last known state of the worker. */ + public state: WorkerState = "idle"; + private electron: ElectronMLWorker | undefined; private delegate: MLWorkerDelegate | undefined; - private state: "idle" | "tick" | "indexing" = "idle"; private liveQ: IndexableItem[] = []; private idleTimeout: ReturnType | undefined; private idleDuration = idleDurationStart; /* unit: seconds */ @@ -164,13 +181,6 @@ export class MLWorker { } } - /** - * Return true if we're currently indexing. - */ - isIndexing() { - return this.state == "indexing"; - } - /** * Find {@link CLIPMatches} for a given {@link searchPhrase}. */ @@ -234,8 +244,20 @@ export class MLWorker { 200, ); if (!filesByID.size) return []; + // Fetch their existing ML data (if any). const mlDataByID = await fetchMLData(filesByID); + + // If the number of files for which remote gave us data is more than 50% + // of what we asked of it, assume we are "fetching", not "indexing". + // This is a heuristic to try and show a better indexing state in the UI + // (so that the user does not think that their files are being + // unnecessarily reindexed). + if (this.state != "indexing" && this.state != "fetching") + assertionFailed(`Unexpected state ${this.state}`); + this.state = + mlDataByID.size * 2 > filesByID.size ? "fetching" : "indexing"; + // Return files after annotating them with their existing ML data. return Array.from(filesByID, ([id, file]) => ({ enteFile: file, From cf3b75702192ceb0baf922015ef25639958eef6e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 14:26:55 +0530 Subject: [PATCH 2/6] Most recent file IDs first --- web/packages/new/photos/services/ml/db.ts | 17 +++++++++++++---- web/packages/new/photos/services/ml/worker.ts | 2 ++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 7252483845..91f0dd21fa 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -394,14 +394,23 @@ export const indexableAndIndexedCounts = async () => { * universe, we filter out fileIDs the files corresponding to which have already * been indexed, or which should be ignored. * - * @param count Limit the result to up to {@link count} items. + * @param count Limit the result to up to {@link count} items. If there are more + * than {@link count} items present, the files with the higher file IDs (which + * can be taken as a approximate for their creation order) are preferred. */ -export const indexableFileIDs = async (count?: number) => { +export const indexableFileIDs = async (count: number) => { const db = await mlDB(); const tx = db.transaction("file-status", "readonly"); - return tx.store + let cursor = await tx.store .index("status") - .getAllKeys(IDBKeyRange.only("indexable"), count); + .openKeyCursor(IDBKeyRange.only("indexable"), "prev"); + const result: number[] = []; + while (cursor && count > 0) { + result.push(cursor.primaryKey); + cursor = await cursor.continue(); + count -= 1; + } + return result; }; /** diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 79520cc076..27dd602aef 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -339,6 +339,8 @@ const indexNextBatch = async ( * about. Then return the next {@link count} files that still need to be * indexed. * + * When returning from amongst pending files, prefer the most recent ones first. + * * For specifics of what a "sync" entails, see {@link updateAssumingLocalFiles}. * * @param userID Sync only files owned by a {@link userID} with the face DB. From d96d4773cfee137aa47255058f479948905f149d Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 14:42:46 +0530 Subject: [PATCH 3/6] Fix status during live uploads --- web/packages/new/photos/services/ml/index.ts | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index e4c56a4569..53a57a3b73 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -479,13 +479,19 @@ const getMLStatus = async (): Promise => { const { indexedCount, indexableCount } = await indexableAndIndexedCounts(); + // During live uploads, the indexable count remains zero even as the indexer + // is processing the newly uploaded items. This is because these "live + // queue" items do not yet have a "file-status" entry. + // + // So use the state of the worker as a guide for the phase, not the + // indexable count. + let phase: MLStatus["phase"]; - if (indexableCount > 0) { - const state = await (await worker()).state; - phase = - state == "indexing" || state == "fetching" ? state : "scheduled"; + const state = await (await worker()).state; + if (state == "indexing" || state == "fetching") { + phase = state; } else { - phase = "done"; + phase = indexableCount > 0 ? "scheduled" : "done"; } return { From d7fb8cf82b5f593fd2526c1acad6d7a868e45afa Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 14:54:18 +0530 Subject: [PATCH 4/6] Handle the idle transition in the UI --- web/packages/new/photos/services/ml/index.ts | 4 ++-- web/packages/new/photos/services/ml/worker-types.ts | 7 +++---- web/packages/new/photos/services/ml/worker.ts | 3 ++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 53a57a3b73..07b8afe5cd 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -89,7 +89,7 @@ const worker = () => const createComlinkWorker = async () => { const electron = ensureElectron(); const delegate = { - workerDidProcessFile, + workerDidProcessFileOrIdle, }; // Obtain a message port from the Electron layer. @@ -523,7 +523,7 @@ const setInterimScheduledStatus = () => { setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles }); }; -const workerDidProcessFile = throttled(updateMLStatusSnapshot, 2000); +const workerDidProcessFileOrIdle = throttled(updateMLStatusSnapshot, 2000); /** * Use CLIP to perform a natural language search over image embeddings. diff --git a/web/packages/new/photos/services/ml/worker-types.ts b/web/packages/new/photos/services/ml/worker-types.ts index 72d6bce61b..446986b8ef 100644 --- a/web/packages/new/photos/services/ml/worker-types.ts +++ b/web/packages/new/photos/services/ml/worker-types.ts @@ -8,11 +8,10 @@ */ export interface MLWorkerDelegate { /** - * Called whenever a file is processed during indexing. - * - * It is called both when the indexing was successful or it failed. + * Called whenever the worker processes a file during indexing (either + * successfully or with errors), or when in goes into the "idle" state. */ - workerDidProcessFile: () => void; + workerDidProcessFileOrIdle: () => void; } /** diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 27dd602aef..0823a5806a 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -233,6 +233,7 @@ export class MLWorker { this.state = "idle"; this.idleDuration = Math.min(this.idleDuration * 2, idleDurationMax); this.idleTimeout = setTimeout(scheduleTick, this.idleDuration * 1000); + this.delegate?.workerDidProcessFileOrIdle(); } /** Return the next batch of items to backfill (if any). */ @@ -320,7 +321,7 @@ const indexNextBatch = async ( await Promise.race(tasks); // Let the main thread now we're doing something. - delegate?.workerDidProcessFile(); + delegate?.workerDidProcessFileOrIdle(); // Let us drain the microtask queue. This also gives a chance for other // interactive tasks like `clipMatches` to run. From a43c0baa4610911d69036d784acd3ee5e2f1d248 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 15:00:49 +0530 Subject: [PATCH 5/6] Avoid non-greppable context APIs for i18n --- web/apps/accounts/src/pages/_app.tsx | 4 +--- web/apps/auth/src/pages/_app.tsx | 4 +--- web/apps/photos/src/pages/_app.tsx | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/web/apps/accounts/src/pages/_app.tsx b/web/apps/accounts/src/pages/_app.tsx index 4b9572cdd2..6b19b530fb 100644 --- a/web/apps/accounts/src/pages/_app.tsx +++ b/web/apps/accounts/src/pages/_app.tsx @@ -52,9 +52,7 @@ const App: React.FC = ({ Component, pageProps }) => { setDialogBoxAttributesV2, }; - const title = isI18nReady - ? t("title", { context: "accounts" }) - : staticAppTitle; + const title = isI18nReady ? t("title_accounts") : staticAppTitle; return ( <> diff --git a/web/apps/auth/src/pages/_app.tsx b/web/apps/auth/src/pages/_app.tsx index cab009dd17..f369755cad 100644 --- a/web/apps/auth/src/pages/_app.tsx +++ b/web/apps/auth/src/pages/_app.tsx @@ -150,9 +150,7 @@ const App: React.FC = ({ Component, pageProps }) => { somethingWentWrong, }; - const title = isI18nReady - ? t("title", { context: "auth" }) - : staticAppTitle; + const title = isI18nReady ? t("title_auth") : staticAppTitle; return ( <> diff --git a/web/apps/photos/src/pages/_app.tsx b/web/apps/photos/src/pages/_app.tsx index 195ab2f8ff..15d2c58eee 100644 --- a/web/apps/photos/src/pages/_app.tsx +++ b/web/apps/photos/src/pages/_app.tsx @@ -330,9 +330,7 @@ export default function App({ Component, pageProps }: AppProps) { logout, }; - const title = isI18nReady - ? t("title", { context: "photos" }) - : staticAppTitle; + const title = isI18nReady ? t("title_photos") : staticAppTitle; return ( <> From 769b3ab21f166777ca703acc23bf567efc0e3c7e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 19 Aug 2024 15:04:07 +0530 Subject: [PATCH 6/6] i18n --- web/apps/photos/src/services/searchService.ts | 3 +-- web/packages/base/locales/en-US/translation.json | 2 ++ web/packages/new/photos/components/MLSettings.tsx | 15 +++++---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 1be9fa01fd..3156652a38 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -194,8 +194,7 @@ export async function getMLStatusSuggestion(): Promise { label = t("indexing_photos", status); break; case "fetching": - // label = pt("Fetching"indexing_photos", status); - label = `Fetching indexes (${status.nSyncedFiles} / ${status.nTotalFiles})`; + label = t("indexing_fetching", status); break; case "clustering": label = t("indexing_people", status); diff --git a/web/packages/base/locales/en-US/translation.json b/web/packages/base/locales/en-US/translation.json index 9a5114934b..4df6277261 100644 --- a/web/packages/base/locales/en-US/translation.json +++ b/web/packages/base/locales/en-US/translation.json @@ -232,6 +232,7 @@ "PEOPLE": "People", "indexing_scheduled": "Indexing is scheduled...", "indexing_photos": "Indexing photos ({{nSyncedFiles, number}} / {{nTotalFiles, number}})", + "indexing_fetching": "Fetching indexes ({{nSyncedFiles, number}} / {{nTotalFiles, number}})", "indexing_people": "Indexing people in {{nSyncedFiles, number}} photos...", "indexing_done": "Indexed {{nSyncedFiles, number}} photos", "UNIDENTIFIED_FACES": "Unidentified faces", @@ -484,6 +485,7 @@ "indexing": "Indexing", "processed": "Processed", "indexing_status_running": "Running", + "indexing_status_fetching": "Fetching", "indexing_status_scheduled": "Scheduled", "indexing_status_done": "Done", "ml_search_disable": "Disable machine learning", diff --git a/web/packages/new/photos/components/MLSettings.tsx b/web/packages/new/photos/components/MLSettings.tsx index 87613912d7..55fc07c466 100644 --- a/web/packages/new/photos/components/MLSettings.tsx +++ b/web/packages/new/photos/components/MLSettings.tsx @@ -1,7 +1,6 @@ import { EnteDrawer } from "@/base/components/EnteDrawer"; import { MenuItemGroup } from "@/base/components/Menu"; import { Titlebar } from "@/base/components/Titlebar"; -import { pt } from "@/base/i18n"; import log from "@/base/log"; import { disableML, @@ -301,17 +300,17 @@ const ManageML: React.FC = ({ let status: string; switch (phase) { case "scheduled": - status = "scheduled"; + status = t("indexing_status_scheduled"); break; case "fetching": - status = "fetching"; + status = t("indexing_status_fetching"); break; case "indexing": - status = "running"; + status = t("indexing_status_running"); break; // TODO: Clustering default: - status = "done"; + status = t("indexing_status_done"); break; } const processed = `${nSyncedFiles} / ${nTotalFiles}`; @@ -355,11 +354,7 @@ const ManageML: React.FC = ({ {t("indexing")} - - {status == "fetching" - ? pt("Fetching") - : t("indexing_status", { context: status })} - + {status}