Discussion

2024-08-14 10:56:17 +05:30
parent 1314b8ccbb
commit 050bbfbbb3
2 changed files with 73 additions and 11 deletions
--- a/web/packages/new/photos/services/ml/cluster-new.ts
+++ b/web/packages/new/photos/services/ml/cluster-new.ts
@@ -8,11 +8,11 @@ import { dotProduct } from "./math";
 /**
 * A face cluster is an set of faces.
 *
- * Each cluster has an id so that a Person (a set of clusters) can refer to it.
+ * Each cluster has an id so that a {@link Person} can refer to it.
 *
- * The cluster is not directly synced to remote. But it can indirectly get
- * synced if it gets attached to a person (which can be thought of as a named
- * cluster).
+ * The cluster is not directly synced to remote. But it does indirectly get
+ * synced if it gets promoted or attached to a person (which can be thought of
+ * as a named or hidden clusters).
 */
 export interface FaceCluster {
    /**
@@ -29,13 +29,26 @@ export interface FaceCluster {
 }

 /**
- * A Person is a set of clusters and some attached metadata.
+ * A Person is a set of clusters with some attached metadata.
 *
- * For ease of transportation, the Person entity on remote is something like
+ * More precisely, a person is a a single cluster or a set of clusters that the
+ * user has interacted with.
+ *
+ * The most frequent interaction is naming a {@link FaceCluster}, which promotes
+ * it to a become a {@link Person}. The promotion comes with the ability to be
+ * synced with remote (as a "person_v2" user entity).
+ *
+ * There after, the user may attach more clusters to the same {@link Person}.
+ *
+ * The other form of interaction is hiding. The user may hide a single (unnamed)
+ * cluster, or they may hide a person.
+ *
+ * The Person entity on remote has clusters embedded within itself
 *
 *     { name, clusters: [{ clusterID, faceIDs }] }
 *
- * That is, the Person has the clusters embedded within itself.
+ * Since clusters don't get independently synced, one way to think about a
+ * Person is that it is an interaction with a cluster that we want to sync.
 */
 export interface Person {
    /**
@@ -47,8 +60,21 @@ export interface Person {
    id: string;
    /**
     * A name assigned by the user to this person.
+     *
+     * This can be missing or an empty string for an unnamed cluster that was
+     * hidden.
     */
-    name: string;
+    name: string | undefined;
+    /**
+     * True if this person should be hidden.
+     *
+     * This can also be true for unnamed hidden clusters. When the user hides a
+     * single cluster that was offered as a suggestion to them on a client, then
+     * the client will create a new person entity without a name, and set its
+     * hidden flag to sync it with remote (so that other clients can also stop
+     * showing this cluster).
+     */
+    isHidden: boolean;
    /**
     * An unordered set of ids of the clusters that belong to this person.
     *
@@ -57,10 +83,15 @@ export interface Person {
     */
    clusterIDs: string[];
    /**
-     * The ID of the face that should be used as the display face, to represent
-     * this person in the UI.
+     * The ID of the face that should be used as the cover photo for this person
+     * (if the user has set one).
     */
    avatarFaceID: string | undefined;
+    /**
+     * Locally determined ID of the "best" face that should be used as the
+     * display face, to represent this person in the UI.
+     */
+    displayFaceID: string | undefined;
 }

 /**
@@ -90,6 +121,16 @@ export interface Person {
 * -   They can attach more clusters to a person.
 *
 * -   They can remove a cluster from a person.
+ *
+ * After clustering, we also do some routine cleanup. Faces belonging to files
+ * that have been deleted (including those in Trash) should be pruned off.
+ *
+ * We should not make strict assumptions about the clusters we get from remote.
+ * In particular, the same face ID can be in different clusters. In such cases
+ * we should assign it arbitrarily assign it to the last cluster we find it in.
+ * Such leeway is intentionally provided to allow clients some slack in how they
+ * implement the sync without making an blocking API request for every user
+ * interaction.
 */
 export const clusterFaces = async (faceIndexes: FaceIndex[]) => {
    const t = Date.now();
--- a/web/packages/new/photos/services/user-entity.ts
+++ b/web/packages/new/photos/services/user-entity.ts
@@ -2,6 +2,7 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente";
 import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
 import { getKVN, setKV } from "@/base/kv";
 import { apiURL } from "@/base/origins";
+import { nullToUndefined } from "@/utils/transform";
 import { z } from "zod";
 import type { Person } from "./ml/cluster-new";
 import { applyPersonDiff } from "./ml/db";
@@ -93,6 +94,24 @@ const RemoteUserEntity = z.object({
 *
 * @param entityKeyB64 The base64 encoded key to use for decrypting the
 * encrypted contents of the user entity.
+ *
+ * [Note: Diff contents]
+ *
+ * Unlike git diffs which track all changes, the diffs we get from remote are
+ * guaranteed to contain only one entry (upsert or delete) for particular Ente
+ * object. This holds true irrespective of the diff limit.
+ *
+ * For example, in the user entity diff response, it is guaranteed that there
+ * will only be at max one entry for a particular entity id. The entry will have
+ * no data to indicate that the corresponding entity was deleted. Otherwise,
+ * when the data is present, it is taken as the creation of a new entity or the
+ * updation of an existing one.
+ *
+ * This behaviour comes from how remote stores the underlying, e.g., entities. A
+ * diff returns just entities whose updation times greater than the provided
+ * since time (limited to the given diff limit). So there will be at most one
+ * row for a particular entity id. And if that entity has been deleted, then the
+ * row will be a tombstone so data will be not be present.
 */
 export const userEntityDiff = async (
    type: EntityType,
@@ -176,7 +195,7 @@ export const syncPersons = async (entityKeyB64: string) => {
 * Zod schema for the "person" entity (the {@link RemotePerson} type).
 */
 const RemotePerson = z.object({
-    name: z.string(),
+    name: z.string().nullish().transform(nullToUndefined),
    assigned: z.array(
        z.object({
            // TODO-Cluster temporary modify
@@ -184,6 +203,8 @@ const RemotePerson = z.object({
            faces: z.string().array(),
        }),
    ),
+    isHidden: z.boolean(),
+    avatarFaceID: z.string().nullish().transform(nullToUndefined),
 });

 /**