From 3980d6b61448c67098311bb4bad8daa07f190720 Mon Sep 17 00:00:00 2001
From: Manav Rathi <manav@mrmr.io>
Date: Tue, 13 Aug 2024 13:27:03 +0530
Subject: [PATCH] Nomen

---
 .../new/photos/services/ml/ml-data.ts         |  4 +-
 web/packages/new/photos/services/ml/worker.ts | 51 +++++++++----------
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/web/packages/new/photos/services/ml/ml-data.ts b/web/packages/new/photos/services/ml/ml-data.ts
index 2956b9dea2..2d99f215d5 100644
--- a/web/packages/new/photos/services/ml/ml-data.ts
+++ b/web/packages/new/photos/services/ml/ml-data.ts
@@ -35,7 +35,7 @@ import { type RemoteFaceIndex } from "./face";
  *
  * which is then gzipped to get the plaintext data to upload.
  *
- * [Note: Preserve unknown derived data fields]
+ * [Note: Preserve unknown ML data fields]
  *
  * The (unzipped) remote mldata can contain arbitrary keys at the top level
  * apart from the ones that the current client knows about. We need to preserve
@@ -214,7 +214,7 @@ const remoteMLDataFromJSONString = (jsonString: string) => {
  * client (us) to ensure that we preserve the parts of the pre-existing ML data
  * (if any) that we did not understand or touch.
  *
- * See: [Note: Preserve unknown derived data fields].
+ * See: [Note: Preserve unknown ML data fields].
  */
 export const putMLData = async (enteFile: EnteFile, mlData: RawRemoteMLData) =>
     putFileData(enteFile, "mldata", await gzip(JSON.stringify(mlData)));
diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts
index 9d0d6c6bba..3f211421c0 100644
--- a/web/packages/new/photos/services/ml/worker.ts
+++ b/web/packages/new/photos/services/ml/worker.ts
@@ -30,13 +30,13 @@ import {
     saveIndexes,
     updateAssumingLocalFiles,
 } from "./db";
+import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
 import {
     fetchMLData,
     putMLData,
     type RawRemoteMLData,
     type RemoteMLData,
 } from "./ml-data";
-import { faceIndexingVersion, indexFaces, type FaceIndex } from "./face";
 import type { CLIPMatches, MLWorkerDelegate } from "./worker-types";
 
 const idleDurationStart = 5; /* 5 seconds */
@@ -47,8 +47,8 @@ interface IndexableItem {
     enteFile: EnteFile;
     /** If the file was uploaded from the current client, then its contents. */
     uploadItem: UploadItem | undefined;
-    /** The existing derived data on remote corresponding to this file. */
-    remoteDerivedData: RemoteMLData | undefined;
+    /** The existing ML data on remote corresponding to this file. */
+    remoteMLData: RemoteMLData | undefined;
 }
 
 /**
@@ -111,9 +111,9 @@ export class MLWorker {
      * This function enqueues a backfill attempt and returns immediately without
      * waiting for it complete.
      *
-     * During a backfill, we first attempt to fetch derived data for files which
-     * don't have that data locally. If we fetch and find what we need, we save
-     * it locally. Otherwise we index them.
+     * During a backfill, we first attempt to fetch ML data for files which
+     * don't have that data locally. If on fetching we find what we need, we
+     * save it locally. Otherwise we index them.
      */
     sync() {
         this.wakeUp();
@@ -156,9 +156,8 @@ export class MLWorker {
         // the live queue, it'll later get indexed anyway when we backfill.
         if (this.liveQ.length < 200) {
             // The file is just being uploaded, and so will not have any
-            // pre-existing derived data on remote.
-            const remoteDerivedData = undefined;
-            this.liveQ.push({ enteFile, uploadItem, remoteDerivedData });
+            // pre-existing ML data on remote.
+            this.liveQ.push({ enteFile, uploadItem, remoteMLData: undefined });
             this.wakeUp();
         } else {
             log.debug(() => "Ignoring upload item since liveQ is full");
@@ -235,13 +234,13 @@ export class MLWorker {
             200,
         );
         if (!filesByID.size) return [];
-        // Fetch their existing derived data (if any).
-        const derivedDataByID = await fetchMLData(filesByID);
-        // Return files after annotating them with their existing derived data.
+        // Fetch their existing ML data (if any).
+        const mlDataByID = await fetchMLData(filesByID);
+        // Return files after annotating them with their existing ML data.
         return Array.from(filesByID, ([id, file]) => ({
             enteFile: file,
             uploadItem: undefined,
-            remoteDerivedData: derivedDataByID.get(id),
+            remoteMLData: mlDataByID.get(id),
         }));
     }
 }
@@ -387,7 +386,7 @@ const syncWithLocalFilesAndGetFilesToIndex = async (
  * then remote will return a 413 Request Entity Too Large).
  */
 const index = async (
-    { enteFile, uploadItem, remoteDerivedData }: IndexableItem,
+    { enteFile, uploadItem, remoteMLData }: IndexableItem,
     electron: ElectronMLWorker,
 ) => {
     const f = fileLogID(enteFile);
@@ -399,8 +398,8 @@ const index = async (
     // Discard any existing data that is made by an older indexing pipelines.
     // See: [Note: Embedding versions]
 
-    const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face;
-    const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip;
+    const existingRemoteFaceIndex = remoteMLData?.parsed?.face;
+    const existingRemoteCLIPIndex = remoteMLData?.parsed?.clip;
 
     let existingFaceIndex: FaceIndex | undefined;
     if (
@@ -422,8 +421,8 @@ const index = async (
         existingCLIPIndex = { embedding };
     }
 
-    // If we already have all the derived data fields then just update our local
-    // db and return.
+    // If we already have all the ML data types then just update our local db
+    // and return.
 
     if (existingFaceIndex && existingCLIPIndex) {
         try {
@@ -438,7 +437,7 @@ const index = async (
         return;
     }
 
-    // There is at least one derived data type that still needs to be indexed.
+    // There is at least one ML data type that still needs to be indexed.
 
     const renderableBlob = await fetchRenderableBlob(
         enteFile,
@@ -501,22 +500,22 @@ const index = async (
 
         // Perform an "upsert" by using the existing raw data we got from the
         // remote as the base, and inserting or overwriting any newly indexed
-        // parts. See: [Note: Preserve unknown derived data fields].
+        // parts. See: [Note: Preserve unknown ML data fields].
 
-        const existingRawDerivedData = remoteDerivedData?.raw ?? {};
-        const rawDerivedData: RawRemoteMLData = {
-            ...existingRawDerivedData,
+        const existingRawMLData = remoteMLData?.raw ?? {};
+        const rawMLData: RawRemoteMLData = {
+            ...existingRawMLData,
             face: remoteFaceIndex,
             clip: remoteCLIPIndex,
         };
 
-        log.debug(() => ["Uploading derived data", rawDerivedData]);
+        log.debug(() => ["Uploading ML data", rawMLData]);
 
         try {
-            await putMLData(enteFile, rawDerivedData);
+            await putMLData(enteFile, rawMLData);
         } catch (e) {
             // See: [Note: Transient and permanent indexing failures]
-            log.error(`Failed to put derived data for ${f}`, e);
+            log.error(`Failed to put ML data for ${f}`, e);
             if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
             throw e;
         }