diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 0c203b1189..987a4cdacb 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -9,9 +9,9 @@ import { wipCluster, wipClusterEnable, } from "@/new/photos/services/ml"; -import { persons } from "@/new/photos/services/ml/db"; +import { clusterGroups } from "@/new/photos/services/ml/db"; import type { SearchPerson } from "@/new/photos/services/search"; -import { syncPersons } from "@/new/photos/services/user-entity"; +import { syncCGroups } from "@/new/photos/services/user-entity"; import { EnteFile } from "@/new/photos/types/file"; import * as chrono from "chrono-node"; import { t } from "i18next"; @@ -424,8 +424,8 @@ async function getAllPeople(limit: number = undefined) { done = true; if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) { - await syncPersons(); - const people = await persons(); + await syncCGroups(); + const people = await clusterGroups(); log.debug(() => ["people", { people }]); } diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 60a9a1e9fb..ff28b75260 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -1,18 +1,17 @@ import { newNonSecureID } from "@/base/id-worker"; import log from "@/base/log"; import { ensure } from "@/utils/ensure"; -import { faceClusters, persons } from "./db"; +import { clusterGroups, faceClusters } from "./db"; import type { Face, FaceIndex } from "./face"; import { dotProduct } from "./math"; /** * A face cluster is an set of faces. * - * Each cluster has an id so that a {@link Person} can refer to it. + * Each cluster has an id so that a {@link CGroup} can refer to it. * - * The cluster is not directly synced to remote. But it does indirectly get - * synced if it gets promoted or attached to a person (which can be thought of - * as a named or hidden clusters). + * The cluster is not directly synced to remote. Only clusters that the user + * interacts with get synced to remote, as part of a {@link CGroup}. */ export interface FaceCluster { /** @@ -29,67 +28,77 @@ export interface FaceCluster { } /** - * A Person is a set of clusters with some attached metadata. + * A cgroup ("cluster group") is a group of clusters (possibly containing a + * single cluster) that the user has interacted with. * - * More precisely, a person is a a single cluster or a set of clusters that the - * user has interacted with. + * Interactions include hiding, merging and giving a name and/or a cover photo. * * The most frequent interaction is naming a {@link FaceCluster}, which promotes - * it to a become a {@link Person}. The promotion comes with the ability to be - * synced with remote (as a "person_v2" user entity). + * it to a become a {@link CGroup}. The promotion comes with the ability to be + * synced with remote (as a "cgroup" user entity). * - * There after, the user may attach more clusters to the same {@link Person}. + * There after, the user may attach more clusters to the same {@link CGroup}. + * + * > A named cluster group can be thought of as a "person", though this is not + * > necessarily an accurate characterization. e.g. there can be a named cluster + * > group that contains face clusters of pets. * * The other form of interaction is hiding. The user may hide a single (unnamed) - * cluster, or they may hide a person. + * cluster, or they may hide an named {@link CGroup}. In both cases, we promote + * the cluster to a CGroup if needed so that their request to hide gets synced. * - * The Person entity on remote has clusters embedded within itself + * While in our local representation we separately maintain clusters and link to + * them from within CGroups by their clusterID, in the remote representation + * clusters themselves don't get synced. Instead, the "cgroup" entities synced + * with remote contain the clusters within themselves. So a group that gets + * synced with remote looks something like: * - * { name, clusters: [{ clusterID, faceIDs }] } + * { id, name, clusters: [{ clusterID, faceIDs }] } * - * Since clusters don't get independently synced, one way to think about a - * Person is that it is an interaction with a cluster that we want to sync. */ -export interface Person { +export interface CGroup { /** - * A UUID or nanoid for this person. + * A nanoid for this cluster group. * - * This is the ID of the Person user entity, it is not contained as part of - * the Person entity payload. + * This is the ID of the "cgroup" user entity, it is not contained as part + * of the group entity payload itself. */ id: string; /** - * A name assigned by the user to this person. + * A name assigned by the user to this cluster group. * - * This can be missing or an empty string for an unnamed cluster that was + * This should be set to an empty string for an unnamed cluster that was * hidden. */ name: string | undefined; /** - * An unordered set of ids of the clusters that belong to this person. + * An unordered set of ids of the clusters that belong to this group. * * For ergonomics of transportation and persistence this is an array, but it * should conceptually be thought of as a set. */ clusterIDs: string[]; /** - * True if this person should be hidden. + * True if this cluster group should be hidden. * - * This can also be true for unnamed hidden clusters. When the user hides a - * single cluster that was offered as a suggestion to them on a client, then - * the client will create a new person entity without a name, and set its - * hidden flag to sync it with remote (so that other clients can also stop - * showing this cluster). + * The user can hide both named cluster groups and single unnamed clusters. + * If the user hides a single cluster that was offered as a suggestion to + * them on a client, the client will create a new unnamed cgroup containing + * it, and set its hidden flag to sync it with remote (so that other clients + * can also stop showing this cluster). */ isHidden: boolean; /** - * The ID of the face that should be used as the cover photo for this person - * (if the user has set one). + * The ID of the face that should be used as the cover photo for this + * cluster group (if the user has set one). + * + * {@link avatarFaceID} is the user selected face. {@link displayFaceID} is + * the automatic placeholder. */ avatarFaceID: string | undefined; /** * Locally determined ID of the "best" face that should be used as the - * display face, to represent this person in the UI. + * display face, to represent this cluster group in the UI. */ displayFaceID: string | undefined; } @@ -99,9 +108,11 @@ export interface Person { * * [Note: Face clustering algorithm] * - * A person consists of clusters, each of which itself is a set of faces. + * A (cluster) group consists of clusters, each of which itself is a set of + * faces. * - * The clusters are generated using locally by clients using this algorithm: + * The clusters are generated using locally by clients using the following + * (pseudo-) algorithm: * * 1. clusters = [] initially, or fetched from remote. * @@ -116,11 +127,11 @@ export interface Person { * following actions to the list of clusters that they can see: * * - They can provide a name for a cluster. This upgrades a cluster into a - * "Person", which then gets synced via remote to all their devices. + * "cgroup", which then gets synced via remote to all their devices. * - * - They can attach more clusters to a person. + * - They can attach more clusters to a cgroup. * - * - They can remove a cluster from a person. + * - They can remove a cluster from a cgroup. * * After clustering, we also do some routine cleanup. Faces belonging to files * that have been deleted (including those in Trash) should be pruned off. @@ -226,14 +237,14 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { // Prune too small clusters. const validClusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1); - // For each person, use the highest scoring face in any of its clusters as - // its display face. + // For each cluster group, use the highest scoring face in any of its + // clusters as its display face. const faceForFaceID = new Map(faces.map((f) => [f.faceID, f])); - const people = await persons(); + const cgroups = await clusterGroups(); - for (const person of people) { - person.avatarFaceID = person.clusterIDs + for (const cgroup of cgroups) { + cgroup.avatarFaceID = cgroup.clusterIDs .map((clusterID) => clusterIndexForClusterID.get(clusterID)) .map((clusterIndex) => clusterIndex ? clusters[clusterIndex] : undefined, @@ -254,7 +265,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { validClusters, clusterIndexForClusterID, clusterIDForFaceID, - people, + cgroups, }, ]); log.debug( @@ -262,7 +273,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { `Clustered ${faces.length} faces into ${validClusters.length} clusters (${Date.now() - t} ms)`, ); - return { clusters: validClusters, people }; + return { clusters: validClusters, cgroups }; }; /** diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 6ae98519c3..7252483845 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -3,32 +3,47 @@ import log from "@/base/log"; import localForage from "@ente/shared/storage/localForage"; import { deleteDB, openDB, type DBSchema } from "idb"; import type { LocalCLIPIndex } from "./clip"; -import type { FaceCluster, Person } from "./cluster-new"; +import type { CGroup, FaceCluster } from "./cluster-new"; import type { LocalFaceIndex } from "./face"; /** * ML DB schema. * - * The "ML" database is made of three object stores: + * The "ML" database is made of the lower level "index" object stores, and + * higher level "cluster" object stores. * - * - "file-status": Contains {@link FileStatus} objects, one for each - * {@link EnteFile} that the ML subsystem knows about. Periodically (and when - * required), this is synced with the list of files that the current client - * knows about locally. + * The index related object stores are the following: * - * - "face-index": Contains {@link LocalFaceIndex} objects, either indexed - * locally or fetched from remote. + * - "file-status": Contains {@link FileStatus} objects, one for each + * {@link EnteFile} that the ML subsystem knows about. Periodically (and + * when required), this is synced with the list of files that the current + * client knows about locally. * - * - "clip-index": Contains {@link LocalCLIPIndex} objects, either indexed - * locally or fetched from remote. + * - "face-index": Contains {@link LocalFaceIndex} objects, either indexed + * locally or fetched from remote. * - * All the stores are keyed by {@link fileID}. The "file-status" contains + * - "clip-index": Contains {@link LocalCLIPIndex} objects, either indexed + * locally or fetched from remote. + * + * These three stores are keyed by {@link fileID}. The "file-status" contains * book-keeping about the indexing process (whether or not a file needs * indexing, or if there were errors doing so), while the other stores contain * the actual indexing results. * - * In tandem, these serve as the underlying storage for the functions exposed by - * the ML database. + * In tandem, these serve as the underlying storage for the indexes maintained + * in the ML database. + * + * The cluster related object stores are the following: + * + * - "face-cluster": Contains {@link FaceCluster} objects, one for each + * cluster of faces that either the clustering algorithm produced locally or + * were synced from remote. It is indexed by the (cluster) ID. + * + * - "cluster-group": Contains {@link CGroup} objects, one for each group of + * clusters that were synced from remote. The client can also locally + * generate cluster groups on certain user interactions, but these too will + * eventually get synced with remote. This object store is indexed by the + * (cgroup) ID. */ interface MLDBSchema extends DBSchema { "file-status": { @@ -48,9 +63,9 @@ interface MLDBSchema extends DBSchema { key: string; value: FaceCluster; }; - person: { + "cluster-group": { key: string; - value: Person; + value: CGroup; }; } @@ -111,7 +126,7 @@ const openMLDB = async () => { if (oldVersion < 3) { if (process.env.NEXT_PUBLIC_ENTE_WIP_CL) { db.createObjectStore("face-cluster", { keyPath: "id" }); - db.createObjectStore("person", { keyPath: "id" }); + db.createObjectStore("cluster-group", { keyPath: "id" }); } } }, @@ -419,18 +434,18 @@ export const faceClusters = async () => { }; /** - * Return all person entries (aka "people") present locally. + * Return all cluster group entries (aka "cgroups") present locally. */ -export const persons = async () => { +export const clusterGroups = async () => { const db = await mlDB(); - return db.getAll("person"); + return db.getAll("cluster-group"); }; /** * Replace the face clusters stored locally with the given ones. * - * This function deletes all entries from the person object store, and then - * inserts the given {@link clusters} into it. + * This function deletes all entries from the face cluster object store, and + * then inserts the given {@link clusters} into it. */ export const setFaceClusters = async (clusters: FaceCluster[]) => { const db = await mlDB(); @@ -441,19 +456,19 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => { }; /** - * Update the person store to reflect the given changes, in order. + * Update the cluster group store to reflect the given changes. * * @param diff A list of changes to apply. Each entry is either * - * - A string, in which case the person with the given string as their ID - * should be deleted from the store, or + * - A string, in which case the cluster group with the given string as their + * ID should be deleted from the store, or * - * - A person, in which case it should add or overwrite the entry for the - * corresponding person (as identified by their {@link id}). + * - A cgroup, in which case it should add or overwrite the entry for the + * corresponding cluster group (as identified by its {@link id}). */ -export const applyPersonDiff = async (diff: (string | Person)[]) => { +export const applyCGroupDiff = async (diff: (string | CGroup)[]) => { const db = await mlDB(); - const tx = db.transaction("person", "readwrite"); + const tx = db.transaction("cluster-group", "readwrite"); // See: [Note: Diff response will have at most one entry for an id] await Promise.all( diff.map((d) => @@ -464,37 +479,22 @@ export const applyPersonDiff = async (diff: (string | Person)[]) => { }; /** - * Add or overwrite the entry for the given {@link person}, as identified by + * Add or overwrite the entry for the given {@link cgroup}, as identified by * their {@link id}. */ // TODO-Cluster: Remove me -export const savePerson = async (person: Person) => { +export const saveClusterGroup = async (cgroup: CGroup) => { const db = await mlDB(); - const tx = db.transaction("person", "readwrite"); - await Promise.all([tx.store.put(person), tx.done]); + const tx = db.transaction("cluster-group", "readwrite"); + await Promise.all([tx.store.put(cgroup), tx.done]); }; /** - * Delete the entry for the persons with the given {@link id}, if any. + * Delete the entry (if any) for the cluster group with the given {@link id}. */ // TODO-Cluster: Remove me -export const deletePerson = async (id: string) => { +export const deleteClusterGroup = async (id: string) => { const db = await mlDB(); - const tx = db.transaction("person", "readwrite"); + const tx = db.transaction("cluster-group", "readwrite"); await Promise.all([tx.store.delete(id), tx.done]); }; - -/** - * Replace the persons stored locally with the given ones. - * - * This function deletes all entries from the person object store, and then - * inserts the given {@link persons} into it. - */ -// TODO-Cluster: Remove me -export const setPersons = async (persons: Person[]) => { - const db = await mlDB(); - const tx = db.transaction("person", "readwrite"); - await tx.store.clear(); - await Promise.all(persons.map((person) => tx.store.put(person))); - return tx.done; -}; diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 65a36c5981..670bc5cc58 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -347,7 +347,7 @@ export const wipCluster = async () => { if (last) return last; - const { clusters, people } = await clusterFaces(await faceIndexes()); + const { clusters, cgroups } = await clusterFaces(await faceIndexes()); const clusterByID = new Map( clusters.map((cluster) => [cluster.id, cluster]), ); @@ -356,31 +356,31 @@ export const wipCluster = async () => { const localFilesByID = new Map(localFiles.map((f) => [f.id, f])); const result: SearchPerson[] = []; - for (const person of people) { - let avatarFaceID = person.avatarFaceID; + for (const cgroup of cgroups) { + let avatarFaceID = cgroup.avatarFaceID; // TODO-Cluster // Temp if (!avatarFaceID) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - avatarFaceID = person.clusterIDs + avatarFaceID = cgroup.clusterIDs .map((id) => clusterByID.get(id)) .flatMap((cluster) => cluster?.faceIDs ?? [])[0]!; } - person.clusterIDs; + cgroup.clusterIDs; const avatarFaceFileID = fileIDFromFaceID(avatarFaceID); const avatarFaceFile = localFilesByID.get(avatarFaceFileID ?? 0); if (!avatarFaceFileID || !avatarFaceFile) { assertionFailed(`Face ID ${avatarFaceID} without local file`); continue; } - const files = person.clusterIDs + const files = cgroup.clusterIDs .map((id) => clusterByID.get(id)) .flatMap((cluster) => cluster?.faceIDs ?? []) .map((faceID) => fileIDFromFaceID(faceID)) .filter((fileID) => fileID !== undefined); result.push({ - id: person.id, - name: person.name, + id: cgroup.id, + name: cgroup.name, files, displayFaceID: avatarFaceID, displayFaceFile: avatarFaceFile, diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 3f211421c0..7e6825ad9b 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -78,7 +78,7 @@ interface IndexableItem { export class MLWorker { private electron: ElectronMLWorker | undefined; private delegate: MLWorkerDelegate | undefined; - private state: "idle" | "tick" | "pull" | "indexing" = "idle"; + private state: "idle" | "tick" | "indexing" = "idle"; private liveQ: IndexableItem[] = []; private idleTimeout: ReturnType | undefined; private idleDuration = idleDurationStart; /* unit: seconds */ diff --git a/web/packages/new/photos/services/search.ts b/web/packages/new/photos/services/search.ts index de11843164..587485d571 100644 --- a/web/packages/new/photos/services/search.ts +++ b/web/packages/new/photos/services/search.ts @@ -1,7 +1,7 @@ import type { EnteFile } from "@/new/photos/types/file"; /** - * A massaged version of {@link Person} suitable for being shown in search + * A massaged version of {@link CGroup} suitable for being shown in search * results. */ export interface SearchPerson { diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 260317341a..ee3e5edd8c 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -7,21 +7,23 @@ import { usersEncryptionKeyB64 } from "@/base/session-store"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; import { gunzip } from "./gzip"; -import type { Person } from "./ml/cluster-new"; -import { applyPersonDiff } from "./ml/db"; +import type { CGroup } from "./ml/cluster-new"; +import { applyCGroupDiff } from "./ml/db"; /** * User entities are predefined lists of otherwise arbitrary data that the user * can store for their account. * - * e.g. location tags, people in their photos. + * e.g. location tags, cluster groups. */ export type EntityType = /** - * The latest iteration of the Person entity format, where the data is - * gzipped before encryption. + * A cluster group. + * + * Format: An encrypted string containing a gzipped JSON string representing + * the cgroup data. */ - "person_v2"; + "cgroup"; /** * The maximum number of items to fetch in a single diff @@ -313,21 +315,21 @@ const saveLatestUpdatedAt = (type: EntityType, value: number) => setKV(latestUpdatedAtKey(type), value); /** - * Sync the {@link Person} entities that we have locally with remote. + * Sync the {@link CGroup} entities that we have locally with remote. * - * This fetches all the user entities corresponding to the "person_v2" entity - * type from remote that have been created, updated or deleted since the last - * time we checked. + * This fetches all the user entities corresponding to the "cgroup" entity type + * from remote that have been created, updated or deleted since the last time we + * checked. * * This diff is then applied to the data we have persisted locally. */ -export const syncPersons = async () => { - const type: EntityType = "person_v2"; +export const syncCGroups = async () => { + const type: EntityType = "cgroup"; const entityKeyB64 = await getOrCreateEntityKeyB64(type); - const parse = async (id: string, data: Uint8Array): Promise => { - const rp = RemotePerson.parse(JSON.parse(await gunzip(data))); + const parse = async (id: string, data: Uint8Array): Promise => { + const rp = RemoteCGroup.parse(JSON.parse(await gunzip(data))); return { id, name: rp.name, @@ -344,7 +346,7 @@ export const syncPersons = async () => { const entities = await userEntityDiff(type, sinceTime, entityKeyB64); if (entities.length == 0) break; - await applyPersonDiff( + await applyCGroupDiff( await Promise.all( entities.map(async ({ id, data }) => data ? await parse(id, data) : id, @@ -360,8 +362,8 @@ export const syncPersons = async () => { } }; -/** Zod schema for the {@link RemotePerson} type. */ -const RemotePerson = z.object({ +/** Zod schema for the {@link RemoteCGroup} type. */ +const RemoteCGroup = z.object({ name: z.string().nullish().transform(nullToUndefined), assigned: z.array( z.object({ @@ -374,6 +376,6 @@ const RemotePerson = z.object({ }); /** - * A "person_v2" entity as synced via remote. + * Contents of a "cgroup" user entity, as synced via remote. */ -type RemotePerson = z.infer; +type RemoteCGroup = z.infer;