From 3980d6b61448c67098311bb4bad8daa07f190720 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 13:27:03 +0530 Subject: [PATCH] Nomen --- .../new/photos/services/ml/ml-data.ts | 4 +- web/packages/new/photos/services/ml/worker.ts | 51 +++++++++---------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/web/packages/new/photos/services/ml/ml-data.ts b/web/packages/new/photos/services/ml/ml-data.ts index 2956b9dea2..2d99f215d5 100644 --- a/web/packages/new/photos/services/ml/ml-data.ts +++ b/web/packages/new/photos/services/ml/ml-data.ts @@ -35,7 +35,7 @@ import { type RemoteFaceIndex } from "./face"; * * which is then gzipped to get the plaintext data to upload. * - * [Note: Preserve unknown derived data fields] + * [Note: Preserve unknown ML data fields] * * The (unzipped) remote mldata can contain arbitrary keys at the top level * apart from the ones that the current client knows about. We need to preserve @@ -214,7 +214,7 @@ const remoteMLDataFromJSONString = (jsonString: string) => { * client (us) to ensure that we preserve the parts of the pre-existing ML data * (if any) that we did not understand or touch. * - * See: [Note: Preserve unknown derived data fields]. + * See: [Note: Preserve unknown ML data fields]. */ export const putMLData = async (enteFile: EnteFile, mlData: RawRemoteMLData) => putFileData(enteFile, "mldata", await gzip(JSON.stringify(mlData))); diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 9d0d6c6bba..3f211421c0 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -30,13 +30,13 @@ import { saveIndexes, updateAssumingLocalFiles, } from "./db"; +import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face"; import { fetchMLData, putMLData, type RawRemoteMLData, type RemoteMLData, } from "./ml-data"; -import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face"; import type { CLIPMatches, MLWorkerDelegate } from "./worker-types"; const idleDurationStart = 5; /* 5 seconds */ @@ -47,8 +47,8 @@ interface IndexableItem { enteFile: EnteFile; /** If the file was uploaded from the current client, then its contents. */ uploadItem: UploadItem | undefined; - /** The existing derived data on remote corresponding to this file. */ - remoteDerivedData: RemoteMLData | undefined; + /** The existing ML data on remote corresponding to this file. */ + remoteMLData: RemoteMLData | undefined; } /** @@ -111,9 +111,9 @@ export class MLWorker { * This function enqueues a backfill attempt and returns immediately without * waiting for it complete. * - * During a backfill, we first attempt to fetch derived data for files which - * don't have that data locally. If we fetch and find what we need, we save - * it locally. Otherwise we index them. + * During a backfill, we first attempt to fetch ML data for files which + * don't have that data locally. If on fetching we find what we need, we + * save it locally. Otherwise we index them. */ sync() { this.wakeUp(); @@ -156,9 +156,8 @@ export class MLWorker { // the live queue, it'll later get indexed anyway when we backfill. if (this.liveQ.length < 200) { // The file is just being uploaded, and so will not have any - // pre-existing derived data on remote. - const remoteDerivedData = undefined; - this.liveQ.push({ enteFile, uploadItem, remoteDerivedData }); + // pre-existing ML data on remote. + this.liveQ.push({ enteFile, uploadItem, remoteMLData: undefined }); this.wakeUp(); } else { log.debug(() => "Ignoring upload item since liveQ is full"); @@ -235,13 +234,13 @@ export class MLWorker { 200, ); if (!filesByID.size) return []; - // Fetch their existing derived data (if any). - const derivedDataByID = await fetchMLData(filesByID); - // Return files after annotating them with their existing derived data. + // Fetch their existing ML data (if any). + const mlDataByID = await fetchMLData(filesByID); + // Return files after annotating them with their existing ML data. return Array.from(filesByID, ([id, file]) => ({ enteFile: file, uploadItem: undefined, - remoteDerivedData: derivedDataByID.get(id), + remoteMLData: mlDataByID.get(id), })); } } @@ -387,7 +386,7 @@ const syncWithLocalFilesAndGetFilesToIndex = async ( * then remote will return a 413 Request Entity Too Large). */ const index = async ( - { enteFile, uploadItem, remoteDerivedData }: IndexableItem, + { enteFile, uploadItem, remoteMLData }: IndexableItem, electron: ElectronMLWorker, ) => { const f = fileLogID(enteFile); @@ -399,8 +398,8 @@ const index = async ( // Discard any existing data that is made by an older indexing pipelines. // See: [Note: Embedding versions] - const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face; - const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip; + const existingRemoteFaceIndex = remoteMLData?.parsed?.face; + const existingRemoteCLIPIndex = remoteMLData?.parsed?.clip; let existingFaceIndex: FaceIndex | undefined; if ( @@ -422,8 +421,8 @@ const index = async ( existingCLIPIndex = { embedding }; } - // If we already have all the derived data fields then just update our local - // db and return. + // If we already have all the ML data types then just update our local db + // and return. if (existingFaceIndex && existingCLIPIndex) { try { @@ -438,7 +437,7 @@ const index = async ( return; } - // There is at least one derived data type that still needs to be indexed. + // There is at least one ML data type that still needs to be indexed. const renderableBlob = await fetchRenderableBlob( enteFile, @@ -501,22 +500,22 @@ const index = async ( // Perform an "upsert" by using the existing raw data we got from the // remote as the base, and inserting or overwriting any newly indexed - // parts. See: [Note: Preserve unknown derived data fields]. + // parts. See: [Note: Preserve unknown ML data fields]. - const existingRawDerivedData = remoteDerivedData?.raw ?? {}; - const rawDerivedData: RawRemoteMLData = { - ...existingRawDerivedData, + const existingRawMLData = remoteMLData?.raw ?? {}; + const rawMLData: RawRemoteMLData = { + ...existingRawMLData, face: remoteFaceIndex, clip: remoteCLIPIndex, }; - log.debug(() => ["Uploading derived data", rawDerivedData]); + log.debug(() => ["Uploading ML data", rawMLData]); try { - await putMLData(enteFile, rawDerivedData); + await putMLData(enteFile, rawMLData); } catch (e) { // See: [Note: Transient and permanent indexing failures] - log.error(`Failed to put derived data for ${f}`, e); + log.error(`Failed to put ML data for ${f}`, e); if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id); throw e; }