From 97bbf4811fbea83462732fee3ff75bdd8f8575d0 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Thu, 1 Aug 2024 11:45:25 +0530 Subject: [PATCH] Save --- web/packages/new/photos/services/ml/worker.ts | 70 ++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 79124e34cc..035f296346 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -358,7 +358,22 @@ const pullSince = async (sinceTime: number, fetchedCount: number) => { const items = await fetchDerivedData(filesByID); - // TODO: Save items + const save = async ([id, data]: [number, RemoteDerivedData]) => { + try { + await saveDerivedData(id, data); + } catch (e) { + // Ignore errors during saving individual items, let the rest of the + // pull proceed. Failures will not have a lasting impact since the + // file will anyways get revisited as part of a backfill. + log.warn( + `Ignoring error when saving pulled derived data for file id ${id}`, + e, + ); + } + }; + + // Save items. + await Promise.all([...items.entries()].map(save)); // Save the checkpoint. await setLatestDerivedDataUpdatedAt(latestUpdatedAt); @@ -372,6 +387,57 @@ const pullSince = async (sinceTime: number, fetchedCount: number) => { return pullSince(latestUpdatedAt, fetchedCount + items.size); }; +/** + * Save the given {@link remoteDerivedData} for {@link fileID}. + * + * This as subset of the save sequence during {@link index}. This one is meant + * to be used during a {@link pull}. + */ +const saveDerivedData = async ( + fileID: number, + remoteDerivedData: RemoteDerivedData, +) => { + // Discard any existing data that is made by an older indexing pipelines. + // See: [Note: Embedding versions] + + const existingRemoteFaceIndex = remoteDerivedData.parsed?.face; + const existingRemoteCLIPIndex = remoteDerivedData.parsed?.clip; + + let existingFaceIndex: FaceIndex | undefined; + if ( + existingRemoteFaceIndex && + existingRemoteFaceIndex.version >= faceIndexingVersion + ) { + const { width, height, faces } = existingRemoteFaceIndex; + existingFaceIndex = { width, height, faces }; + } + + let existingCLIPIndex: CLIPIndex | undefined; + if ( + existingRemoteCLIPIndex && + existingRemoteCLIPIndex.version >= clipIndexingVersion + ) { + const { embedding } = existingRemoteCLIPIndex; + existingCLIPIndex = { embedding }; + } + + // If we have all the required embedding types, then save them, marking a + // file as indexed. + // + // In particular, this means that there might be files which we've marked + // indexed but still don't have the optional derived data types like exif. + // This is fine, we wish to compute the optional type of derived data when + // we can, but by themselves they're not reason enough for us to download + // and index the original. + + if (existingFaceIndex && existingCLIPIndex) { + await saveIndexes( + { fileID, ...existingFaceIndex }, + { fileID, ...existingCLIPIndex }, + ); + } +}; + /** * Find out files which need to be indexed. Then index the next batch of them. * @@ -545,6 +611,8 @@ const index = async ( existingRemoteFaceIndex && existingRemoteFaceIndex.version >= faceIndexingVersion ) { + // Destructure the data we got from remote so that we only retain the + // fields we're interested in the object that gets put into indexed db. const { width, height, faces } = existingRemoteFaceIndex; existingFaceIndex = { width, height, faces }; }