diff --git a/web/apps/photos/src/services/face/indexer.ts b/web/apps/photos/src/services/face/indexer.ts index 0f78c20e2e..74e700a3f0 100644 --- a/web/apps/photos/src/services/face/indexer.ts +++ b/web/apps/photos/src/services/face/indexer.ts @@ -2,13 +2,16 @@ import { faceIndex, indexableFileIDs, indexedAndIndexableCounts, - syncWithLocalFiles, + syncAssumingLocalFileIDs, } from "@/new/photos/services/face/db"; import { isBetaUser, isInternalUser, } from "@/new/photos/services/feature-flags"; -import { getAllLocalFiles } from "@/new/photos/services/files"; +import { + getAllLocalFiles, + getLocalTrashedFiles, +} from "@/new/photos/services/files"; import type { EnteFile } from "@/new/photos/types/file"; import { ComlinkWorker } from "@/next/worker/comlink-worker"; import { ensure } from "@/utils/ensure"; @@ -229,13 +232,13 @@ export const setIsFaceIndexingEnabled = async (enabled: boolean) => { * about. Then return the next {@link count} files that still need to be * indexed. * - * For more specifics of what a "sync" entails, see {@link syncWithLocalFiles}. + * For specifics of what a "sync" entails, see {@link syncAssumingLocalFileIDs}. * * @param userID Sync only files owned by a {@link userID} with the face DB. * * @param count Limit the resulting list of indexable files to {@link count}. */ -export const syncAndGetFilesToIndex = async ( +export const syncWithLocalFilesAndGetFilesToIndex = async ( userID: number, count: number, ): Promise => { @@ -246,7 +249,14 @@ export const syncAndGetFilesToIndex = async ( localFiles.filter(isIndexable).map((f) => [f.id, f]), ); - await syncWithLocalFiles([...localFilesByID.keys()]); + const localFilesInTrashIDs = (await getLocalTrashedFiles()).map( + (f) => f.id, + ); + + await syncAssumingLocalFileIDs( + [...localFilesByID.keys()], + localFilesInTrashIDs, + ); const fileIDsToIndex = await indexableFileIDs(count); return fileIDsToIndex.map((id) => ensure(localFilesByID.get(id))); diff --git a/web/apps/photos/src/services/machineLearning/machineLearningService.ts b/web/apps/photos/src/services/machineLearning/machineLearningService.ts index 7dbadf9186..be182219a8 100644 --- a/web/apps/photos/src/services/machineLearning/machineLearningService.ts +++ b/web/apps/photos/src/services/machineLearning/machineLearningService.ts @@ -2,7 +2,7 @@ import { EnteFile } from "@/new/photos/types/file"; import log from "@/next/log"; import { CustomError, parseUploadErrorCodes } from "@ente/shared/error"; import PQueue from "p-queue"; -import { syncAndGetFilesToIndex } from "services/face/indexer"; +import { syncWithLocalFilesAndGetFilesToIndex } from "services/face/indexer"; import { FaceIndexerWorker } from "services/face/indexer.worker"; const batchSize = 200; @@ -56,7 +56,7 @@ class MachineLearningService { const syncContext = await this.getSyncContext(token, userID, userAgent); - syncContext.outOfSyncFiles = await syncAndGetFilesToIndex( + syncContext.outOfSyncFiles = await syncWithLocalFilesAndGetFilesToIndex( userID, batchSize, ); diff --git a/web/packages/new/photos/services/embedding.ts b/web/packages/new/photos/services/embedding.ts index 38793680ed..3c2df24ec3 100644 --- a/web/packages/new/photos/services/embedding.ts +++ b/web/packages/new/photos/services/embedding.ts @@ -75,13 +75,12 @@ const RemoteEmbedding = z.object({ type RemoteEmbedding = z.infer; /** - * Fetch new or updated face embeddings with the server and save them locally. - * Also prune local embeddings for any files no longer exist locally. + * Fetch new or updated face embeddings from remote and save them locally. * * It takes no parameters since it saves the last sync time in local storage. * - * Precondition: This function should be called only after we have synced files - * with remote (See: [Note: Ignoring embeddings for unknown files]). + * This function should be called only after we have synced files with remote. + * See: [Note: Ignoring embeddings for unknown files]. */ export const syncRemoteFaceEmbeddings = async () => { // Include files from trash, otherwise they'll get unnecessarily reindexed @@ -91,9 +90,6 @@ export const syncRemoteFaceEmbeddings = async () => { ); const localFilesByID = new Map(localFiles.map((f) => [f.id, f])); - // Delete embeddings for files which are no longer present locally. - // pruneFaceEmbeddings(localFilesByID); - const decryptEmbedding = async (remoteEmbedding: RemoteEmbedding) => { const file = localFilesByID.get(remoteEmbedding.fileID); // [Note: Ignoring embeddings for unknown files] diff --git a/web/packages/new/photos/services/face/db.ts b/web/packages/new/photos/services/face/db.ts index bd8255f40f..5a86605a45 100644 --- a/web/packages/new/photos/services/face/db.ts +++ b/web/packages/new/photos/services/face/db.ts @@ -217,30 +217,54 @@ export const addFileEntry = async (fileID: number) => { * Sync entries in the face DB to align with the state of local files outside * face DB. * - * @param localFileIDs Local {@link EnteFile}s, keyed by their IDs. These are - * all the files that the client is aware of, filtered to only keep the files - * that the user owns and the formats that can be indexed by our current face - * indexing pipeline. + * @param localFileIDs IDs of all the files that the client is aware of filtered + * to only keep the files that the user owns and the formats that can be indexed + * by our current face indexing pipeline. * - * This function syncs the state of file entries in face DB to the state of file - * entries stored otherwise by the client locally. + * @param localFilesInTrashIDs IDs of all the files in trash. * - * - Files (identified by their ID) that are present locally but are not yet in - * face DB get a fresh entry in face DB (and are marked as indexable). + * This function then updates the state of file entries in face DB to the be in + * "sync" with these provided local file IDS. * - * - Files that are not present locally but still exist in face DB are removed - * from face DB (including its face index, if any). + * - Files that are present locally but are not yet in face DB get a fresh entry + * in face DB (and are marked as indexable). + * + * - Files that are not present locally (nor are in trash) but still exist in + * face DB are removed from face DB (including their face index, if any). + * + * - Files that are not present locally but are in the trash are retained in + * face DB if their status is "indexed" (otherwise they too are removed). This + * is prevent churn (re-indexing) if the user moves some files to trash but + * then later restores them before they get permanently deleted. */ -export const syncWithLocalFiles = async (localFileIDs: number[]) => { +export const syncAssumingLocalFileIDs = async ( + localFileIDs: number[], + localFilesInTrashIDs: number[], +) => { const db = await faceDB(); const tx = db.transaction(["face-index", "file-status"], "readwrite"); const fdbFileIDs = await tx.objectStore("file-status").getAllKeys(); + const fdbIndexedFileIDs = await tx + .objectStore("file-status") + .getAllKeys(IDBKeyRange.only("indexed")); const local = new Set(localFileIDs); + const localTrash = new Set(localFilesInTrashIDs); const fdb = new Set(fdbFileIDs); + const fdbIndexed = new Set(fdbIndexedFileIDs); const newFileIDs = localFileIDs.filter((id) => !fdb.has(id)); - const removedFileIDs = fdbFileIDs.filter((id) => !local.has(id)); + const fileIDsToRemove = fdbFileIDs.filter((id) => { + if (local.has(id)) return false; // Still exists + if (localTrash.has(id)) { + // Exists in trash + if (fdbIndexed.has(id)) { + // But is already indexed, so let it be. + return false; + } + } + return true; // Remove + }); await Promise.all( [ @@ -251,10 +275,12 @@ export const syncWithLocalFiles = async (localFileIDs: number[]) => { failureCount: 0, }), ), - removedFileIDs.map((id) => + fileIDsToRemove.map((id) => tx.objectStore("file-status").delete(id), ), - removedFileIDs.map((id) => tx.objectStore("face-index").delete(id)), + fileIDsToRemove.map((id) => + tx.objectStore("face-index").delete(id), + ), tx.done, ].flat(), );