[desktop] Clustering - Part 2/x (#2647)

2024-08-10 09:33:25 +05:30
parent 4d8e80041d 62e27916b7
commit 82ecbe6866
10 changed files with 406 additions and 150 deletions
--- a/web/apps/photos/src/services/entityService.ts
+++ b/web/apps/photos/src/services/entityService.ts
@@ -15,30 +15,6 @@ import {
 } from "types/entity";
 import { getLatestVersionEntities } from "utils/entity";

-/**
- * The maximum number of items to fetch in a single diff
- *
- * [Note: Limit of returned items in /diff requests]
- *
- * The various GET /diff API methods, which tell the client what all has changed
- * since a timestamp (provided by the client) take a limit parameter.
- *
- * These diff API calls return all items whose updated at is greater
- * (non-inclusive) than the timestamp we provide. So there is no mechanism for
- * pagination of items which have the same exact updated at.
- *
- * Conceptually, it may happen that there are more items than the limit we've
- * provided, but there are practical safeguards.
- *
- * For file diff, the limit is advisory, and remote may return less, equal or
- * more items than the provided limit. The scenario where it returns more is
- * when more files than the limit have the same updated at. Theoretically it
- * would make the diff response unbounded, however in practice file
- * modifications themselves are all batched. Even if the user were to select all
- * the files in their library and updates them all in one go in the UI, their
- * client app is required to use batched API calls to make those updates, and
- * each of those batches would get distinct updated at.
- */
 const DIFF_LIMIT = 500;

 const ENTITY_TABLES: Record<EntityType, string> = {
@@ -71,7 +47,8 @@ const getCachedEntityKey = async (type: EntityType) => {
    return entityKey;
 };

-const getEntityKey = async (type: EntityType) => {
+// TODO: unexport
+export const getEntityKey = async (type: EntityType) => {
    try {
        const entityKey = await getCachedEntityKey(type);
        if (entityKey) {
--- a/web/apps/photos/src/services/searchService.ts
+++ b/web/apps/photos/src/services/searchService.ts
@@ -6,8 +6,11 @@ import {
    isMLEnabled,
    isMLSupported,
    mlStatusSnapshot,
+    wipCluster,
+    wipClusterEnable,
 } from "@/new/photos/services/ml";
 import type { Person } from "@/new/photos/services/ml/people";
+import { personDiff } from "@/new/photos/services/user-entity";
 import { EnteFile } from "@/new/photos/types/file";
 import * as chrono from "chrono-node";
 import { t } from "i18next";
@@ -24,7 +27,7 @@ import {
 import ComlinkSearchWorker from "utils/comlink/ComlinkSearchWorker";
 import { getUniqueFiles } from "utils/file";
 import { getFormattedDate } from "utils/search";
-import { getLatestEntities } from "./entityService";
+import { getEntityKey, getLatestEntities } from "./entityService";
 import locationSearchService, { City } from "./locationSearchService";

 const DIGITS = new Set(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
@@ -414,12 +417,28 @@ function convertSuggestionToSearchQuery(option: Suggestion): Search {
 }

 async function getAllPeople(limit: number = undefined) {
+    if (!(await wipClusterEnable())) return [];
+
+    if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) {
+        const entityKey = await getEntityKey("person" as EntityType);
+        const peopleR = await personDiff(entityKey.data);
+        const r = peopleR.length;
+        log.debug(() => ["people", peopleR]);
+
+        if (r) return [];
+        return [];
+    }
+
    let people: Array<Person> = []; // await mlIDbStorage.getAllPeople();
+    people = await wipCluster();
    // await mlPeopleStore.iterate<Person, void>((person) => {
    //     people.push(person);
    // });
    people = people ?? [];
-    return people
+    const result = people
        .sort((p1, p2) => p2.files.length - p1.files.length)
        .slice(0, limit);
+    // log.debug(() => ["getAllPeople", result]);
+
+    return result;
 }
--- a/web/packages/base/assert.ts
+++ b/web/packages/base/assert.ts
@@ -0,0 +1,11 @@
+import { isDevBuild } from "./env";
+import log from "./log";
+
+/**
+ * If running in a dev build, throw an exception with the given message.
+ * Otherwise log it as a warning.
+ */
+export const assertionFailed = (message: string) => {
+    if (isDevBuild) throw new Error(message);
+    log.warn(message);
+};
--- a/web/packages/base/crypto/ente.ts
+++ b/web/packages/base/crypto/ente.ts
@@ -115,7 +115,7 @@ export const encryptMetadata = async (metadata: unknown, keyB64: string) => {
 *
 * This is the sibling of {@link encryptAssociatedData}.
 *
- * See {@link decryptChaChaOneShot2} for the implementation details.
+ * See {@link decryptChaChaOneShot} for the implementation details.
 *
 * @param encryptedData A {@link Uint8Array} containing the bytes to decrypt.
 *
@@ -166,7 +166,7 @@ export const decryptFileEmbedding = async (
 * Decrypt the metadata associated with an Ente object (file, collection or
 * entity) using the object's key.
 *
- * This is the sibling of {@link decryptMetadata}.
+ * This is the sibling of {@link encryptMetadata}.
 *
 * @param encryptedDataB64 base64 encoded string containing the encrypted data.
 *
@@ -180,7 +180,6 @@ export const decryptFileEmbedding = async (
 * @returns The decrypted JSON value. Since TypeScript does not have a native
 * JSON type, we need to return it as an `unknown`.
 */
-
 export const decryptMetadata = async (
    encryptedDataB64: string,
    decryptionHeaderB64: string,
@@ -188,10 +187,26 @@ export const decryptMetadata = async (
 ) =>
    JSON.parse(
        new TextDecoder().decode(
-            await decryptAssociatedData(
-                await libsodium.fromB64(encryptedDataB64),
+            await decryptMetadataBytes(
+                encryptedDataB64,
                decryptionHeaderB64,
                keyB64,
            ),
        ),
    ) as unknown;
+
+/**
+ * A variant of {@link decryptMetadata} that does not attempt to parse the
+ * decrypted data as a JSON string and instead just returns the raw decrypted
+ * bytes that we got.
+ */
+export const decryptMetadataBytes = async (
+    encryptedDataB64: string,
+    decryptionHeaderB64: string,
+    keyB64: string,
+) =>
+    await decryptAssociatedData(
+        await libsodium.fromB64(encryptedDataB64),
+        decryptionHeaderB64,
+        keyB64,
+    );
--- a/web/packages/new/photos/components/PeopleList.tsx
+++ b/web/packages/new/photos/components/PeopleList.tsx
@@ -1,8 +1,4 @@
-import { blobCache } from "@/base/blob-cache";
-import {
-    regenerateFaceCropsIfNeeded,
-    unidentifiedFaceIDs,
-} from "@/new/photos/services/ml";
+import { faceCrop, unidentifiedFaceIDs } from "@/new/photos/services/ml";
 import type { Person } from "@/new/photos/services/ml/people";
 import type { EnteFile } from "@/new/photos/types/file";
 import { Skeleton, Typography, styled } from "@mui/material";
@@ -28,7 +24,10 @@ export const PeopleList: React.FC<PeopleListProps> = ({
                    clickable={!!onSelect}
                    onClick={() => onSelect && onSelect(person, index)}
                >
-                    <FaceCropImageView faceID={person.displayFaceId} />
+                    <FaceCropImageView
+                        faceID={person.displayFaceID}
+                        enteFile={person.displayFaceFile}
+                    />
                </FaceChip>
            ))}
        </FaceChipContainer>
@@ -80,7 +79,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
    enteFile,
 }) => {
    const [faceIDs, setFaceIDs] = useState<string[]>([]);
-    const [didRegen, setDidRegen] = useState(false);

    useEffect(() => {
        let didCancel = false;
@@ -88,13 +86,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
        const go = async () => {
            const faceIDs = await unidentifiedFaceIDs(enteFile);
            !didCancel && setFaceIDs(faceIDs);
-            // Don't block for the regeneration to happen. If anything got
-            // regenerated, the result will be true, in response to which we'll
-            // change the key of the face list and cause it to be rerendered
-            // (fetching the regenerated crops).
-            void regenerateFaceCropsIfNeeded(enteFile).then((r) =>
-                setDidRegen(r),
-            );
        };

        void go();
@@ -111,10 +102,10 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
            <Typography variant="large" p={1}>
                {t("UNIDENTIFIED_FACES")}
            </Typography>
-            <FaceChipContainer key={didRegen ? 1 : 0}>
+            <FaceChipContainer>
                {faceIDs.map((faceID) => (
                    <FaceChip key={faceID}>
-                        <FaceCropImageView {...{ faceID }} />
+                        <FaceCropImageView {...{ enteFile, faceID }} />
                    </FaceChip>
                ))}
            </FaceChipContainer>
@@ -123,39 +114,41 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
 };

 interface FaceCropImageViewProps {
+    /** The ID of the face to display. */
    faceID: string;
+    /** The {@link EnteFile} which contains this face. */
+    enteFile: EnteFile;
 }

 /**
- * An image view showing the face crop for the given {@link faceID}.
+ * An image view showing the face crop for the given face.
 *
- * The image is read from the "face-crops" {@link BlobCache}. While the image is
- * being fetched, or if it doesn't exist, a placeholder is shown.
+ * The image is read from the "face-crops" {@link BlobCache}, regenerating it if
+ * needed (which is why also need to pass the associated file).
+ *
+ * While the image is being fetched or regenerated, or if it doesn't exist, a
+ * placeholder is shown.
 */
-const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({ faceID }) => {
+const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
+    faceID,
+    enteFile,
+}) => {
    const [objectURL, setObjectURL] = useState<string | undefined>();

    useEffect(() => {
        let didCancel = false;
-        if (faceID) {
-            void blobCache("face-crops")
-                .then((cache) => cache.get(faceID))
-                .then((data) => {
-                    if (data) {
-                        const blob = new Blob([data]);
-                        if (!didCancel) setObjectURL(URL.createObjectURL(blob));
-                    }
-                });
-        } else setObjectURL(undefined);
+        let thisObjectURL: string | undefined;
+
+        void faceCrop(faceID, enteFile).then((blob) => {
+            if (blob && !didCancel)
+                setObjectURL((thisObjectURL = URL.createObjectURL(blob)));
+        });

        return () => {
            didCancel = true;
-            if (objectURL) URL.revokeObjectURL(objectURL);
+            if (thisObjectURL) URL.revokeObjectURL(thisObjectURL);
        };
-        // TODO: The linter warning is actually correct, objectURL should be a
-        // dependency, but adding that require reworking this code first.
-        // eslint-disable-next-line react-hooks/exhaustive-deps
-    }, [faceID]);
+    }, [faceID, enteFile]);

    return objectURL ? (
        <img style={{ objectFit: "cover" }} src={objectURL} />
--- a/web/packages/new/photos/services/ml/cluster-new.ts
+++ b/web/packages/new/photos/services/ml/cluster-new.ts
@@ -5,11 +5,11 @@ import type { FaceIndex } from "./face";
 import { dotProduct } from "./math";

 /**
- * A cluster is an set of faces.
+ * A face cluster is an set of faces.
 *
 * Each cluster has an id so that a Person (a set of clusters) can refer to it.
 */
-export interface Cluster {
+export interface FaceCluster {
    /**
     * A randomly generated ID to uniquely identify this cluster.
     */
@@ -31,7 +31,7 @@ export interface Cluster {
 *
 * For ease of transportation, the Person entity on remote looks like
 *
- *     { name, clusters: { cluster_id, face_ids }}
+ *     { name, clusters: [{ clusterID, faceIDs }] }
 *
 * That is, it has the clusters embedded within itself.
 */
@@ -78,47 +78,55 @@ export const clusterFaces = (faceIndexes: FaceIndex[]) => {

    const faces = [...faceIDAndEmbeddings(faceIndexes)];

-    const clusters: Cluster[] = [];
+    let clusters: FaceCluster[] = [];
    const clusterIndexByFaceID = new Map<string, number>();
    for (const [i, { faceID, embedding }] of faces.entries()) {
-        let j = 0;
-        for (; j < i; j++) {
-            // Can't find a better way for avoiding the null assertion.
+        // Find the nearest neighbour from among the faces we have already seen.
+        let nnIndex: number | undefined;
+        let nnCosineSimilarity = 0;
+        for (let j = 0; j < i; j++) {
+            // Can't find a way of avoiding the null assertion.
            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
            const n = faces[j]!;

-            // TODO-ML: The distance metric and the thresholds are placeholders.
-
            // The vectors are already normalized, so we can directly use their
            // dot product as their cosine similarity.
            const csim = dotProduct(embedding, n.embedding);
-            if (csim > 0.5) {
-                // Found a neighbour near enough. Add this face to the
-                // neighbour's cluster and call it a day.
-                const ci = ensure(clusterIndexByFaceID.get(n.faceID));
-                clusters[ci]?.faceIDs.push(faceID);
-                clusterIndexByFaceID.set(faceID, ci);
-                break;
+            if (csim > 0.76 && csim > nnCosineSimilarity) {
+                nnIndex = j;
+                nnCosineSimilarity = csim;
            }
        }
-        if (j == i) {
+        if (nnIndex === undefined) {
            // We didn't find a neighbour. Create a new cluster with this face.
+
            const cluster = {
                id: newNonSecureID("cluster_"),
                faceIDs: [faceID],
            };
            clusters.push(cluster);
            clusterIndexByFaceID.set(faceID, clusters.length);
+        } else {
+            // Found a neighbour near enough. Add this face to the neighbour's
+            // cluster.
+
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            const nn = faces[nnIndex]!;
+            const nnClusterIndex = ensure(clusterIndexByFaceID.get(nn.faceID));
+            clusters[nnClusterIndex]?.faceIDs.push(faceID);
+            clusterIndexByFaceID.set(faceID, nnClusterIndex);
        }
    }

+    clusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1);
+
    log.debug(() => ["ml/cluster", { faces, clusters, clusterIndexByFaceID }]);
    log.debug(
        () =>
            `Clustered ${faces.length} faces into ${clusters.length} clusters (${Date.now() - t} ms)`,
    );

-    return undefined;
+    return clusters;
 };

 /**
--- a/web/packages/new/photos/services/ml/face.ts
+++ b/web/packages/new/photos/services/ml/face.ts
@@ -139,6 +139,17 @@ export interface Face {
     * This ID is guaranteed to be unique for all the faces detected in all the
     * files for the user. In particular, each file can have multiple faces but
     * they all will get their own unique {@link faceID}.
+     *
+     * This ID is also meant to be stable across reindexing. That is, if the
+     * same algorithm and hyperparameters are used to reindex the file, then it
+     * should result in the same face IDs. This allows us leeway in letting
+     * unnecessary reindexing happen in rare cases without invalidating the
+     * clusters that rely on the presence of the given face ID.
+     *
+     * Finally, this face ID is not completely opaque. It consists of underscore
+     * separated components, the first of which is the ID of the
+     * {@link EnteFile} to which this face belongs. Client code can rely on this
+     * structure and can parse it if needed.
     */
    faceID: string;
    /**
--- a/web/packages/new/photos/services/ml/index.ts
+++ b/web/packages/new/photos/services/ml/index.ts
@@ -3,6 +3,7 @@
 */

 import { isDesktop } from "@/base/app";
+import { assertionFailed } from "@/base/assert";
 import { blobCache } from "@/base/blob-cache";
 import { ensureElectron } from "@/base/electron";
 import { isDevBuild } from "@/base/env";
@@ -15,6 +16,7 @@ import { ensure } from "@/utils/ensure";
 import { throttled } from "@/utils/promise";
 import { proxy, transfer } from "comlink";
 import { isInternalUser } from "../feature-flags";
+import { getAllLocalFiles } from "../files";
 import { getRemoteFlag, updateRemoteFlag } from "../remote-store";
 import type { UploadItem } from "../upload/types";
 import { clusterFaces } from "./cluster-new";
@@ -25,43 +27,64 @@ import {
    faceIndexes,
    indexableAndIndexedCounts,
 } from "./db";
+import type { Person } from "./people";
 import { MLWorker } from "./worker";
 import type { CLIPMatches } from "./worker-types";

 /**
- * In-memory flag that tracks if ML is enabled.
+ * Internal state of the ML subsystem.
 *
- * -   On app start, this is read from local storage during {@link initML}.
+ * This are essentially cached values used by the functions of this module.
 *
- * -   It gets updated when we sync with remote (so if the user enables/disables
- *     ML on a different device, this local value will also become true/false).
- *
- * -   It gets updated when the user enables/disables ML on this device.
- *
- * -   It is cleared in {@link logoutML}.
+ * This should be cleared on logout.
 */
-let _isMLEnabled = false;
+class MLState {
+    /**
+     * In-memory flag that tracks if ML is enabled.
+     *
+     * -   On app start, this is read from local storage during {@link initML}.
+     *
+     * -   It gets updated when we sync with remote (so if the user enables/disables
+     *     ML on a different device, this local value will also become true/false).
+     *
+     * -   It gets updated when the user enables/disables ML on this device.
+     *
+     * -   It is cleared in {@link logoutML}.
+     */
+    isMLEnabled = false;

-/** Cached instance of the {@link ComlinkWorker} that wraps our web worker. */
-let _comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
+    /**
+     * Cached instance of the {@link ComlinkWorker} that wraps our web worker.
+     */
+    comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;

-/**
- * Subscriptions to {@link MLStatus}.
- *
- * See {@link mlStatusSubscribe}.
- */
-let _mlStatusListeners: (() => void)[] = [];
+    /**
+     * Subscriptions to {@link MLStatus}.
+     *
+     * See {@link mlStatusSubscribe}.
+     */
+    mlStatusListeners: (() => void)[] = [];

-/**
- * Snapshot of {@link MLStatus}.
- *
- * See {@link mlStatusSnapshot}.
- */
-let _mlStatusSnapshot: MLStatus | undefined;
+    /**
+     * Snapshot of {@link MLStatus}.
+     *
+     * See {@link mlStatusSnapshot}.
+     */
+    mlStatusSnapshot: MLStatus | undefined;
+
+    /**
+     * In flight face crop regeneration promises indexed by the IDs of the files
+     * whose faces we are regenerating.
+     */
+    inFlightFaceCropRegens = new Map<number, Promise<void>>();
+}
+
+/** State shared by the functions in this module. See {@link MLState}. */
+let _state = new MLState();

 /** Lazily created, cached, instance of {@link MLWorker}. */
 const worker = () =>
-    (_comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);
+    (_state.comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);

 const createComlinkWorker = async () => {
    const electron = ensureElectron();
@@ -95,9 +118,9 @@ const createComlinkWorker = async () => {
 * It is also called when the user pauses or disables ML.
 */
 export const terminateMLWorker = async () => {
-    if (_comlinkWorker) {
-        await _comlinkWorker.then((cw) => cw.terminate());
-        _comlinkWorker = undefined;
+    if (_state.comlinkWorker) {
+        await _state.comlinkWorker.then((cw) => cw.terminate());
+        _state.comlinkWorker = undefined;
    }
 };

@@ -149,7 +172,7 @@ export const canEnableML = async () =>
 * Initialize the ML subsystem if the user has enabled it in preferences.
 */
 export const initML = () => {
-    _isMLEnabled = isMLEnabledLocal();
+    _state.isMLEnabled = isMLEnabledLocal();
 };

 export const logoutML = async () => {
@@ -158,9 +181,7 @@ export const logoutML = async () => {
    // execution contexts], it gets called first in the logout sequence, and
    // then this function (`logoutML`) gets called at a later point in time.

-    _isMLEnabled = false;
-    _mlStatusListeners = [];
-    _mlStatusSnapshot = undefined;
+    _state = new MLState();
    await clearMLDB();
 };

@@ -173,7 +194,7 @@ export const logoutML = async () => {
 */
 export const isMLEnabled = () =>
    // Implementation note: Keep it fast, it might be called frequently.
-    _isMLEnabled;
+    _state.isMLEnabled;

 /**
 * Enable ML.
@@ -183,7 +204,7 @@ export const isMLEnabled = () =>
 export const enableML = async () => {
    await updateIsMLEnabledRemote(true);
    setIsMLEnabledLocal(true);
-    _isMLEnabled = true;
+    _state.isMLEnabled = true;
    setInterimScheduledStatus();
    triggerStatusUpdate();
    triggerMLSync();
@@ -198,7 +219,7 @@ export const enableML = async () => {
 export const disableML = async () => {
    await updateIsMLEnabledRemote(false);
    setIsMLEnabledLocal(false);
-    _isMLEnabled = false;
+    _state.isMLEnabled = false;
    await terminateMLWorker();
    triggerStatusUpdate();
 };
@@ -257,18 +278,18 @@ const updateIsMLEnabledRemote = (enabled: boolean) =>
 export const triggerMLSync = () => void mlSync();

 const mlSync = async () => {
-    _isMLEnabled = await getIsMLEnabledRemote();
-    setIsMLEnabledLocal(_isMLEnabled);
+    _state.isMLEnabled = await getIsMLEnabledRemote();
+    setIsMLEnabledLocal(_state.isMLEnabled);
    triggerStatusUpdate();

-    if (_isMLEnabled) void worker().then((w) => w.sync());
-    // TODO-ML
-    if (_isMLEnabled) void wipCluster();
+    if (_state.isMLEnabled) void worker().then((w) => w.sync());
 };

 /**
 * Run indexing on a file which was uploaded from this client.
 *
+ * Indexing only happens if ML is enabled.
+ *
 * This function is called by the uploader when it uploads a new file from this
 * client, giving us the opportunity to index it live. This is only an
 * optimization - if we don't index it now it'll anyways get indexed later as
@@ -282,20 +303,54 @@ const mlSync = async () => {
 * image part of the live photo that was uploaded.
 */
 export const indexNewUpload = (enteFile: EnteFile, uploadItem: UploadItem) => {
-    if (!_isMLEnabled) return;
+    if (!isMLEnabled()) return;
    if (enteFile.metadata.fileType !== FileType.image) return;
    log.debug(() => ["ml/liveq", { enteFile, uploadItem }]);
    void worker().then((w) => w.onUpload(enteFile, uploadItem));
 };

+let last: Person[] | undefined;
+
 /**
 * WIP! Don't enable, dragon eggs are hatching here.
 */
-export const wipCluster = async () => {
-    if (!isDevBuild || !(await isInternalUser())) return;
-    if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return;
+export const wipClusterEnable = async () => {
+    if (!isDevBuild || !(await isInternalUser())) return false;
+    if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return false;
+    return true;
+};

-    clusterFaces(await faceIndexes());
+export const wipCluster = async () => {
+    if (!(await wipClusterEnable())) return;
+
+    if (last) return last;
+
+    const clusters = clusterFaces(await faceIndexes());
+
+    const localFiles = await getAllLocalFiles();
+    const localFilesByID = new Map(localFiles.map((f) => [f.id, f]));
+
+    const people: Person[] = []; // await mlIDbStorage.getAllPeople();
+    for (const cluster of clusters) {
+        // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+        const dfID = cluster.faceIDs[0]!;
+        const dfFile = localFilesByID.get(fileIDFromFaceID(dfID) ?? 0);
+        if (!dfFile) {
+            assertionFailed(`Face ID ${dfID} without local file`);
+            continue;
+        }
+        people.push({
+            id: Math.random(), //cluster.id,
+            name: "test",
+            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+            files: cluster.faceIDs.map((s) => parseInt(s.split("_")[0]!)),
+            displayFaceID: dfID,
+            displayFaceFile: dfFile,
+        });
+    }
+
+    last = people;
+    return people;
 };

 export type MLStatus =
@@ -336,9 +391,11 @@ export type MLStatus =
 * @returns A function that can be used to clear the subscription.
 */
 export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
-    _mlStatusListeners.push(onChange);
+    _state.mlStatusListeners.push(onChange);
    return () => {
-        _mlStatusListeners = _mlStatusListeners.filter((l) => l != onChange);
+        _state.mlStatusListeners = _state.mlStatusListeners.filter(
+            (l) => l != onChange,
+        );
    };
 };

@@ -352,7 +409,7 @@ export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
 * asynchronous tasks that are needed to get the status.
 */
 export const mlStatusSnapshot = (): MLStatus | undefined => {
-    const result = _mlStatusSnapshot;
+    const result = _state.mlStatusSnapshot;
    // We don't have it yet, trigger an update.
    if (!result) triggerStatusUpdate();
    return result;
@@ -369,15 +426,15 @@ const updateMLStatusSnapshot = async () =>
    setMLStatusSnapshot(await getMLStatus());

 const setMLStatusSnapshot = (snapshot: MLStatus) => {
-    _mlStatusSnapshot = snapshot;
-    _mlStatusListeners.forEach((l) => l());
+    _state.mlStatusSnapshot = snapshot;
+    _state.mlStatusListeners.forEach((l) => l());
 };

 /**
 * Compute the current state of the ML subsystem.
 */
 const getMLStatus = async (): Promise<MLStatus> => {
-    if (!_isMLEnabled) return { phase: "disabled" };
+    if (!_state.isMLEnabled) return { phase: "disabled" };

    const { indexedCount, indexableCount } = await indexableAndIndexedCounts();

@@ -409,8 +466,11 @@ const getMLStatus = async (): Promise<MLStatus> => {
 const setInterimScheduledStatus = () => {
    let nSyncedFiles = 0,
        nTotalFiles = 0;
-    if (_mlStatusSnapshot && _mlStatusSnapshot.phase != "disabled") {
-        ({ nSyncedFiles, nTotalFiles } = _mlStatusSnapshot);
+    if (
+        _state.mlStatusSnapshot &&
+        _state.mlStatusSnapshot.phase != "disabled"
+    ) {
+        ({ nSyncedFiles, nTotalFiles } = _state.mlStatusSnapshot);
    }
    setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles });
 };
@@ -444,25 +504,52 @@ export const unidentifiedFaceIDs = async (
    return index?.faces.map((f) => f.faceID) ?? [];
 };

+/**
+ * Extract the ID of the {@link EnteFile} to which a face belongs from its ID.
+ */
+const fileIDFromFaceID = (faceID: string) => {
+    const fileID = parseInt(faceID.split("_")[0] ?? "");
+    if (isNaN(fileID)) {
+        assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
+        return undefined;
+    }
+    return fileID;
+};
+
+/**
+ * Return the cached face crop for the given face, regenerating it if needed.
+ *
+ * @param faceID The id of the face whose face crop we want.
+ *
+ * @param enteFile The {@link EnteFile} that contains this face.
+ */
+export const faceCrop = async (faceID: string, enteFile: EnteFile) => {
+    let inFlight = _state.inFlightFaceCropRegens.get(enteFile.id);
+
+    if (!inFlight) {
+        inFlight = regenerateFaceCropsIfNeeded(enteFile);
+        _state.inFlightFaceCropRegens.set(enteFile.id, inFlight);
+    }
+
+    await inFlight;
+
+    const cache = await blobCache("face-crops");
+    return cache.get(faceID);
+};
+
 /**
 * Check to see if any of the faces in the given file do not have a face crop
 * present locally. If so, then regenerate the face crops for all the faces in
 * the file (updating the "face-crops" {@link BlobCache}).
- *
- * @returns true if one or more face crops were regenerated; false otherwise.
 */
-export const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
+const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
    const index = await faceIndex(enteFile.id);
-    if (!index) return false;
+    if (!index) return;

-    const faceIDs = index.faces.map((f) => f.faceID);
    const cache = await blobCache("face-crops");
-    for (const id of faceIDs) {
-        if (!(await cache.has(id))) {
-            await regenerateFaceCrops(enteFile, index);
-            return true;
-        }
-    }
+    const faceIDs = index.faces.map((f) => f.faceID);
+    let needsRegen = false;
+    for (const id of faceIDs) if (!(await cache.has(id))) needsRegen = true;

-    return false;
+    if (needsRegen) await regenerateFaceCrops(enteFile, index);
 };
--- a/web/packages/new/photos/services/ml/people.ts
+++ b/web/packages/new/photos/services/ml/people.ts
@@ -1,8 +1,11 @@
+import type { EnteFile } from "../../types/file";
+
 export interface Person {
    id: number;
    name?: string;
    files: number[];
-    displayFaceId: string;
+    displayFaceID: string;
+    displayFaceFile: EnteFile;
 }

 // Forced disable clustering. It doesn't currently work.
--- a/web/packages/new/photos/services/user-entity.ts
+++ b/web/packages/new/photos/services/user-entity.ts
@@ -0,0 +1,132 @@
+import { decryptMetadataBytes } from "@/base/crypto/ente";
+import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
+import { apiURL } from "@/base/origins";
+import { z } from "zod";
+
+/**
+ * User entities are predefined lists of otherwise arbitrary data that the user
+ * can store for their account.
+ *
+ * e.g. location tags, people in their photos.
+ */
+export type EntityType =
+    | "person"
+    /**
+     * A new version of the Person entity where the data is gzipped before
+     * encryption.
+     */
+    | "person_v2";
+
+/**
+ * The maximum number of items to fetch in a single diff
+ *
+ * [Note: Limit of returned items in /diff requests]
+ *
+ * The various GET /diff API methods, which tell the client what all has changed
+ * since a timestamp (provided by the client) take a limit parameter.
+ *
+ * These diff API calls return all items whose updated at is greater
+ * (non-inclusive) than the timestamp we provide. So there is no mechanism for
+ * pagination of items which have the exact same updated at.
+ *
+ * Conceptually, it may happen that there are more items than the limit we've
+ * provided, but there are practical safeguards.
+ *
+ * For file diff, the limit is advisory, and remote may return less, equal or
+ * more items than the provided limit. The scenario where it returns more is
+ * when more files than the limit have the same updated at. Theoretically it
+ * would make the diff response unbounded, however in practice file
+ * modifications themselves are all batched. Even if the user were to select all
+ * the files in their library and updates them all in one go in the UI, their
+ * client app is required to use batched API calls to make those updates, and
+ * each of those batches would get distinct updated at.
+ */
+const defaultDiffLimit = 500;
+
+/**
+ * A generic user entity.
+ *
+ * This is an intermediate step, usually what we really want is a version
+ * of this with the {@link data} parsed to the specific type of JSON object
+ * expected to be associated with this entity type.
+ */
+interface UserEntity {
+    /**
+     * Arbitrary data associated with the entity. The format of this data is
+     * specific to each entity type.
+     *
+     * This will not be present for entities that have been deleted on remote.
+     */
+    data: Uint8Array | undefined;
+    /**
+     * Epoch microseconds denoting when this entity was created or last updated.
+     */
+    updatedAt: number;
+}
+
+const RemoteUserEntity = z.object({
+    /** Base64 string containing the encrypted contents of the entity. */
+    encryptedData: z.string(),
+    /** Base64 string containing the decryption header. */
+    header: z.string(),
+    isDeleted: z.boolean(),
+    updatedAt: z.number(),
+});
+
+/**
+ * Fetch all user entities of the given type that have been created or updated
+ * since the given time.
+ *
+ * @param type The type of the entities to fetch.
+ *
+ * @param sinceTime Epoch milliseconds. This is used to ask remote to provide us
+ * only entities whose {@link updatedAt} is more than the given value. Set this
+ * to zero to start from the beginning.
+ *
+ * @param entityKeyB64 The base64 encoded key to use for decrypting the
+ * encrypted contents of the user entity.
+ */
+export const userEntityDiff = async (
+    type: EntityType,
+    sinceTime: number,
+    entityKeyB64: string,
+): Promise<UserEntity[]> => {
+    const decrypt = (dataB64: string, headerB64: string) =>
+        decryptMetadataBytes(dataB64, headerB64, entityKeyB64);
+
+    const params = new URLSearchParams({
+        type,
+        sinceTime: sinceTime.toString(),
+        limit: defaultDiffLimit.toString(),
+    });
+    const url = await apiURL(`/user-entity/entity/diff`);
+    const res = await fetch(`${url}?${params.toString()}`, {
+        headers: await authenticatedRequestHeaders(),
+    });
+    ensureOk(res);
+    const entities = z
+        .object({ diff: z.array(RemoteUserEntity) })
+        .parse(await res.json()).diff;
+    return Promise.all(
+        entities.map(
+            async ({ encryptedData, header, isDeleted, updatedAt }) => ({
+                data: isDeleted
+                    ? undefined
+                    : await decrypt(encryptedData, header),
+                updatedAt,
+            }),
+        ),
+    );
+};
+
+/**
+ * Fetch all Person entities that have been created or updated since the last
+ * time we checked.
+ */
+export const personDiff = async (entityKeyB64: string) => {
+    const entities = await userEntityDiff("person", 0, entityKeyB64);
+    return entities.map(({ data }) => {
+        if (!data) return undefined;
+        return JSON.parse(new TextDecoder().decode(data)) as unknown;
+    });
+};