[desktop] Indexing tweaks (#2749)
This commit is contained in:
@@ -52,9 +52,7 @@ const App: React.FC<AppProps> = ({ Component, pageProps }) => {
|
||||
setDialogBoxAttributesV2,
|
||||
};
|
||||
|
||||
const title = isI18nReady
|
||||
? t("title", { context: "accounts" })
|
||||
: staticAppTitle;
|
||||
const title = isI18nReady ? t("title_accounts") : staticAppTitle;
|
||||
|
||||
return (
|
||||
<>
|
||||
|
||||
@@ -150,9 +150,7 @@ const App: React.FC<AppProps> = ({ Component, pageProps }) => {
|
||||
somethingWentWrong,
|
||||
};
|
||||
|
||||
const title = isI18nReady
|
||||
? t("title", { context: "auth" })
|
||||
: staticAppTitle;
|
||||
const title = isI18nReady ? t("title_auth") : staticAppTitle;
|
||||
|
||||
return (
|
||||
<>
|
||||
|
||||
@@ -330,9 +330,7 @@ export default function App({ Component, pageProps }: AppProps) {
|
||||
logout,
|
||||
};
|
||||
|
||||
const title = isI18nReady
|
||||
? t("title", { context: "photos" })
|
||||
: staticAppTitle;
|
||||
const title = isI18nReady ? t("title_photos") : staticAppTitle;
|
||||
|
||||
return (
|
||||
<>
|
||||
|
||||
@@ -193,6 +193,9 @@ export async function getMLStatusSuggestion(): Promise<Suggestion> {
|
||||
case "indexing":
|
||||
label = t("indexing_photos", status);
|
||||
break;
|
||||
case "fetching":
|
||||
label = t("indexing_fetching", status);
|
||||
break;
|
||||
case "clustering":
|
||||
label = t("indexing_people", status);
|
||||
break;
|
||||
|
||||
@@ -232,6 +232,7 @@
|
||||
"PEOPLE": "People",
|
||||
"indexing_scheduled": "Indexing is scheduled...",
|
||||
"indexing_photos": "Indexing photos ({{nSyncedFiles, number}} / {{nTotalFiles, number}})",
|
||||
"indexing_fetching": "Fetching indexes ({{nSyncedFiles, number}} / {{nTotalFiles, number}})",
|
||||
"indexing_people": "Indexing people in {{nSyncedFiles, number}} photos...",
|
||||
"indexing_done": "Indexed {{nSyncedFiles, number}} photos",
|
||||
"UNIDENTIFIED_FACES": "Unidentified faces",
|
||||
@@ -484,6 +485,7 @@
|
||||
"indexing": "Indexing",
|
||||
"processed": "Processed",
|
||||
"indexing_status_running": "Running",
|
||||
"indexing_status_fetching": "Fetching",
|
||||
"indexing_status_scheduled": "Scheduled",
|
||||
"indexing_status_done": "Done",
|
||||
"ml_search_disable": "Disable machine learning",
|
||||
|
||||
@@ -299,15 +299,18 @@ const ManageML: React.FC<ManageMLProps> = ({
|
||||
|
||||
let status: string;
|
||||
switch (phase) {
|
||||
case "indexing":
|
||||
status = "running";
|
||||
break;
|
||||
case "scheduled":
|
||||
status = "scheduled";
|
||||
status = t("indexing_status_scheduled");
|
||||
break;
|
||||
case "fetching":
|
||||
status = t("indexing_status_fetching");
|
||||
break;
|
||||
case "indexing":
|
||||
status = t("indexing_status_running");
|
||||
break;
|
||||
// TODO: Clustering
|
||||
default:
|
||||
status = "done";
|
||||
status = t("indexing_status_done");
|
||||
break;
|
||||
}
|
||||
const processed = `${nSyncedFiles} / ${nTotalFiles}`;
|
||||
@@ -351,9 +354,7 @@ const ManageML: React.FC<ManageMLProps> = ({
|
||||
<Typography color="text.faint">
|
||||
{t("indexing")}
|
||||
</Typography>
|
||||
<Typography>
|
||||
{t("indexing_status", { context: status })}
|
||||
</Typography>
|
||||
<Typography>{status}</Typography>
|
||||
</Stack>
|
||||
<Divider sx={{ marginInlineStart: 2 }} />
|
||||
<Stack
|
||||
|
||||
@@ -394,14 +394,23 @@ export const indexableAndIndexedCounts = async () => {
|
||||
* universe, we filter out fileIDs the files corresponding to which have already
|
||||
* been indexed, or which should be ignored.
|
||||
*
|
||||
* @param count Limit the result to up to {@link count} items.
|
||||
* @param count Limit the result to up to {@link count} items. If there are more
|
||||
* than {@link count} items present, the files with the higher file IDs (which
|
||||
* can be taken as a approximate for their creation order) are preferred.
|
||||
*/
|
||||
export const indexableFileIDs = async (count?: number) => {
|
||||
export const indexableFileIDs = async (count: number) => {
|
||||
const db = await mlDB();
|
||||
const tx = db.transaction("file-status", "readonly");
|
||||
return tx.store
|
||||
let cursor = await tx.store
|
||||
.index("status")
|
||||
.getAllKeys(IDBKeyRange.only("indexable"), count);
|
||||
.openKeyCursor(IDBKeyRange.only("indexable"), "prev");
|
||||
const result: number[] = [];
|
||||
while (cursor && count > 0) {
|
||||
result.push(cursor.primaryKey);
|
||||
cursor = await cursor.continue();
|
||||
count -= 1;
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -89,7 +89,7 @@ const worker = () =>
|
||||
const createComlinkWorker = async () => {
|
||||
const electron = ensureElectron();
|
||||
const delegate = {
|
||||
workerDidProcessFile,
|
||||
workerDidProcessFileOrIdle,
|
||||
};
|
||||
|
||||
// Obtain a message port from the Electron layer.
|
||||
@@ -404,13 +404,16 @@ export type MLStatus =
|
||||
*
|
||||
* - "indexing": The indexer is currently running.
|
||||
*
|
||||
* - "fetching": The indexer is currently running, but we're primarily
|
||||
* fetching indexes for existing files.
|
||||
*
|
||||
* - "clustering": All file we know of have been indexed, and we are now
|
||||
* clustering the faces that were found.
|
||||
*
|
||||
* - "done": ML indexing and face clustering is complete for the user's
|
||||
* library.
|
||||
*/
|
||||
phase: "scheduled" | "indexing" | "clustering" | "done";
|
||||
phase: "scheduled" | "indexing" | "fetching" | "clustering" | "done";
|
||||
/** The number of files that have already been indexed. */
|
||||
nSyncedFiles: number;
|
||||
/** The total number of files that are eligible for indexing. */
|
||||
@@ -476,12 +479,19 @@ const getMLStatus = async (): Promise<MLStatus> => {
|
||||
|
||||
const { indexedCount, indexableCount } = await indexableAndIndexedCounts();
|
||||
|
||||
// During live uploads, the indexable count remains zero even as the indexer
|
||||
// is processing the newly uploaded items. This is because these "live
|
||||
// queue" items do not yet have a "file-status" entry.
|
||||
//
|
||||
// So use the state of the worker as a guide for the phase, not the
|
||||
// indexable count.
|
||||
|
||||
let phase: MLStatus["phase"];
|
||||
if (indexableCount > 0) {
|
||||
const isIndexing = await (await worker()).isIndexing();
|
||||
phase = !isIndexing ? "scheduled" : "indexing";
|
||||
const state = await (await worker()).state;
|
||||
if (state == "indexing" || state == "fetching") {
|
||||
phase = state;
|
||||
} else {
|
||||
phase = "done";
|
||||
phase = indexableCount > 0 ? "scheduled" : "done";
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -513,7 +523,7 @@ const setInterimScheduledStatus = () => {
|
||||
setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles });
|
||||
};
|
||||
|
||||
const workerDidProcessFile = throttled(updateMLStatusSnapshot, 2000);
|
||||
const workerDidProcessFileOrIdle = throttled(updateMLStatusSnapshot, 2000);
|
||||
|
||||
/**
|
||||
* Use CLIP to perform a natural language search over image embeddings.
|
||||
|
||||
@@ -8,11 +8,10 @@
|
||||
*/
|
||||
export interface MLWorkerDelegate {
|
||||
/**
|
||||
* Called whenever a file is processed during indexing.
|
||||
*
|
||||
* It is called both when the indexing was successful or it failed.
|
||||
* Called whenever the worker processes a file during indexing (either
|
||||
* successfully or with errors), or when in goes into the "idle" state.
|
||||
*/
|
||||
workerDidProcessFile: () => void;
|
||||
workerDidProcessFileOrIdle: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { clientPackageName } from "@/base/app";
|
||||
import { assertionFailed } from "@/base/assert";
|
||||
import { isHTTP4xxError } from "@/base/http";
|
||||
import { getKVN } from "@/base/kv";
|
||||
import { ensureAuthToken } from "@/base/local-user";
|
||||
@@ -39,6 +40,20 @@ import {
|
||||
} from "./ml-data";
|
||||
import type { CLIPMatches, MLWorkerDelegate } from "./worker-types";
|
||||
|
||||
/**
|
||||
* A rough hint at what the worker is up to.
|
||||
*
|
||||
* - "idle": Not doing anything
|
||||
* - "tick": Transitioning to a new state
|
||||
* - "indexing": Indexing
|
||||
* - "fetching": A subset of indexing
|
||||
*
|
||||
* During indexing, the state is set to "fetching" whenever remote provided us
|
||||
* data for more than 50% of the files that we requested from it in the last
|
||||
* fetch during indexing.
|
||||
*/
|
||||
export type WorkerState = "idle" | "tick" | "indexing" | "fetching";
|
||||
|
||||
const idleDurationStart = 5; /* 5 seconds */
|
||||
const idleDurationMax = 16 * 60; /* 16 minutes */
|
||||
|
||||
@@ -76,9 +91,11 @@ interface IndexableItem {
|
||||
* particular, for finding the closest CLIP match when the user does a search.
|
||||
*/
|
||||
export class MLWorker {
|
||||
/** The last known state of the worker. */
|
||||
public state: WorkerState = "idle";
|
||||
|
||||
private electron: ElectronMLWorker | undefined;
|
||||
private delegate: MLWorkerDelegate | undefined;
|
||||
private state: "idle" | "tick" | "indexing" = "idle";
|
||||
private liveQ: IndexableItem[] = [];
|
||||
private idleTimeout: ReturnType<typeof setTimeout> | undefined;
|
||||
private idleDuration = idleDurationStart; /* unit: seconds */
|
||||
@@ -164,13 +181,6 @@ export class MLWorker {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if we're currently indexing.
|
||||
*/
|
||||
isIndexing() {
|
||||
return this.state == "indexing";
|
||||
}
|
||||
|
||||
/**
|
||||
* Find {@link CLIPMatches} for a given {@link searchPhrase}.
|
||||
*/
|
||||
@@ -223,6 +233,7 @@ export class MLWorker {
|
||||
this.state = "idle";
|
||||
this.idleDuration = Math.min(this.idleDuration * 2, idleDurationMax);
|
||||
this.idleTimeout = setTimeout(scheduleTick, this.idleDuration * 1000);
|
||||
this.delegate?.workerDidProcessFileOrIdle();
|
||||
}
|
||||
|
||||
/** Return the next batch of items to backfill (if any). */
|
||||
@@ -234,8 +245,20 @@ export class MLWorker {
|
||||
200,
|
||||
);
|
||||
if (!filesByID.size) return [];
|
||||
|
||||
// Fetch their existing ML data (if any).
|
||||
const mlDataByID = await fetchMLData(filesByID);
|
||||
|
||||
// If the number of files for which remote gave us data is more than 50%
|
||||
// of what we asked of it, assume we are "fetching", not "indexing".
|
||||
// This is a heuristic to try and show a better indexing state in the UI
|
||||
// (so that the user does not think that their files are being
|
||||
// unnecessarily reindexed).
|
||||
if (this.state != "indexing" && this.state != "fetching")
|
||||
assertionFailed(`Unexpected state ${this.state}`);
|
||||
this.state =
|
||||
mlDataByID.size * 2 > filesByID.size ? "fetching" : "indexing";
|
||||
|
||||
// Return files after annotating them with their existing ML data.
|
||||
return Array.from(filesByID, ([id, file]) => ({
|
||||
enteFile: file,
|
||||
@@ -298,7 +321,7 @@ const indexNextBatch = async (
|
||||
await Promise.race(tasks);
|
||||
|
||||
// Let the main thread now we're doing something.
|
||||
delegate?.workerDidProcessFile();
|
||||
delegate?.workerDidProcessFileOrIdle();
|
||||
|
||||
// Let us drain the microtask queue. This also gives a chance for other
|
||||
// interactive tasks like `clipMatches` to run.
|
||||
@@ -317,6 +340,8 @@ const indexNextBatch = async (
|
||||
* about. Then return the next {@link count} files that still need to be
|
||||
* indexed.
|
||||
*
|
||||
* When returning from amongst pending files, prefer the most recent ones first.
|
||||
*
|
||||
* For specifics of what a "sync" entails, see {@link updateAssumingLocalFiles}.
|
||||
*
|
||||
* @param userID Sync only files owned by a {@link userID} with the face DB.
|
||||
|
||||
Reference in New Issue
Block a user