Move trash handling to existing sync

This commit is contained in:
Manav Rathi
2024-06-27 09:59:54 +05:30
parent 53452344f3
commit b63a15a521
4 changed files with 60 additions and 28 deletions

View File

@@ -2,13 +2,16 @@ import {
faceIndex,
indexableFileIDs,
indexedAndIndexableCounts,
syncWithLocalFiles,
syncAssumingLocalFileIDs,
} from "@/new/photos/services/face/db";
import {
isBetaUser,
isInternalUser,
} from "@/new/photos/services/feature-flags";
import { getAllLocalFiles } from "@/new/photos/services/files";
import {
getAllLocalFiles,
getLocalTrashedFiles,
} from "@/new/photos/services/files";
import type { EnteFile } from "@/new/photos/types/file";
import { ComlinkWorker } from "@/next/worker/comlink-worker";
import { ensure } from "@/utils/ensure";
@@ -229,13 +232,13 @@ export const setIsFaceIndexingEnabled = async (enabled: boolean) => {
* about. Then return the next {@link count} files that still need to be
* indexed.
*
* For more specifics of what a "sync" entails, see {@link syncWithLocalFiles}.
* For specifics of what a "sync" entails, see {@link syncAssumingLocalFileIDs}.
*
* @param userID Sync only files owned by a {@link userID} with the face DB.
*
* @param count Limit the resulting list of indexable files to {@link count}.
*/
export const syncAndGetFilesToIndex = async (
export const syncWithLocalFilesAndGetFilesToIndex = async (
userID: number,
count: number,
): Promise<EnteFile[]> => {
@@ -246,7 +249,14 @@ export const syncAndGetFilesToIndex = async (
localFiles.filter(isIndexable).map((f) => [f.id, f]),
);
await syncWithLocalFiles([...localFilesByID.keys()]);
const localFilesInTrashIDs = (await getLocalTrashedFiles()).map(
(f) => f.id,
);
await syncAssumingLocalFileIDs(
[...localFilesByID.keys()],
localFilesInTrashIDs,
);
const fileIDsToIndex = await indexableFileIDs(count);
return fileIDsToIndex.map((id) => ensure(localFilesByID.get(id)));

View File

@@ -2,7 +2,7 @@ import { EnteFile } from "@/new/photos/types/file";
import log from "@/next/log";
import { CustomError, parseUploadErrorCodes } from "@ente/shared/error";
import PQueue from "p-queue";
import { syncAndGetFilesToIndex } from "services/face/indexer";
import { syncWithLocalFilesAndGetFilesToIndex } from "services/face/indexer";
import { FaceIndexerWorker } from "services/face/indexer.worker";
const batchSize = 200;
@@ -56,7 +56,7 @@ class MachineLearningService {
const syncContext = await this.getSyncContext(token, userID, userAgent);
syncContext.outOfSyncFiles = await syncAndGetFilesToIndex(
syncContext.outOfSyncFiles = await syncWithLocalFilesAndGetFilesToIndex(
userID,
batchSize,
);

View File

@@ -75,13 +75,12 @@ const RemoteEmbedding = z.object({
type RemoteEmbedding = z.infer<typeof RemoteEmbedding>;
/**
* Fetch new or updated face embeddings with the server and save them locally.
* Also prune local embeddings for any files no longer exist locally.
* Fetch new or updated face embeddings from remote and save them locally.
*
* It takes no parameters since it saves the last sync time in local storage.
*
* Precondition: This function should be called only after we have synced files
* with remote (See: [Note: Ignoring embeddings for unknown files]).
* This function should be called only after we have synced files with remote.
* See: [Note: Ignoring embeddings for unknown files].
*/
export const syncRemoteFaceEmbeddings = async () => {
// Include files from trash, otherwise they'll get unnecessarily reindexed
@@ -91,9 +90,6 @@ export const syncRemoteFaceEmbeddings = async () => {
);
const localFilesByID = new Map(localFiles.map((f) => [f.id, f]));
// Delete embeddings for files which are no longer present locally.
// pruneFaceEmbeddings(localFilesByID);
const decryptEmbedding = async (remoteEmbedding: RemoteEmbedding) => {
const file = localFilesByID.get(remoteEmbedding.fileID);
// [Note: Ignoring embeddings for unknown files]

View File

@@ -217,30 +217,54 @@ export const addFileEntry = async (fileID: number) => {
* Sync entries in the face DB to align with the state of local files outside
* face DB.
*
* @param localFileIDs Local {@link EnteFile}s, keyed by their IDs. These are
* all the files that the client is aware of, filtered to only keep the files
* that the user owns and the formats that can be indexed by our current face
* indexing pipeline.
* @param localFileIDs IDs of all the files that the client is aware of filtered
* to only keep the files that the user owns and the formats that can be indexed
* by our current face indexing pipeline.
*
* This function syncs the state of file entries in face DB to the state of file
* entries stored otherwise by the client locally.
* @param localFilesInTrashIDs IDs of all the files in trash.
*
* - Files (identified by their ID) that are present locally but are not yet in
* face DB get a fresh entry in face DB (and are marked as indexable).
* This function then updates the state of file entries in face DB to the be in
* "sync" with these provided local file IDS.
*
* - Files that are not present locally but still exist in face DB are removed
* from face DB (including its face index, if any).
* - Files that are present locally but are not yet in face DB get a fresh entry
* in face DB (and are marked as indexable).
*
* - Files that are not present locally (nor are in trash) but still exist in
* face DB are removed from face DB (including their face index, if any).
*
* - Files that are not present locally but are in the trash are retained in
* face DB if their status is "indexed" (otherwise they too are removed). This
* is prevent churn (re-indexing) if the user moves some files to trash but
* then later restores them before they get permanently deleted.
*/
export const syncWithLocalFiles = async (localFileIDs: number[]) => {
export const syncAssumingLocalFileIDs = async (
localFileIDs: number[],
localFilesInTrashIDs: number[],
) => {
const db = await faceDB();
const tx = db.transaction(["face-index", "file-status"], "readwrite");
const fdbFileIDs = await tx.objectStore("file-status").getAllKeys();
const fdbIndexedFileIDs = await tx
.objectStore("file-status")
.getAllKeys(IDBKeyRange.only("indexed"));
const local = new Set(localFileIDs);
const localTrash = new Set(localFilesInTrashIDs);
const fdb = new Set(fdbFileIDs);
const fdbIndexed = new Set(fdbIndexedFileIDs);
const newFileIDs = localFileIDs.filter((id) => !fdb.has(id));
const removedFileIDs = fdbFileIDs.filter((id) => !local.has(id));
const fileIDsToRemove = fdbFileIDs.filter((id) => {
if (local.has(id)) return false; // Still exists
if (localTrash.has(id)) {
// Exists in trash
if (fdbIndexed.has(id)) {
// But is already indexed, so let it be.
return false;
}
}
return true; // Remove
});
await Promise.all(
[
@@ -251,10 +275,12 @@ export const syncWithLocalFiles = async (localFileIDs: number[]) => {
failureCount: 0,
}),
),
removedFileIDs.map((id) =>
fileIDsToRemove.map((id) =>
tx.objectStore("file-status").delete(id),
),
removedFileIDs.map((id) => tx.objectStore("face-index").delete(id)),
fileIDsToRemove.map((id) =>
tx.objectStore("face-index").delete(id),
),
tx.done,
].flat(),
);