From 2376327e5224a0d4a4799973c6312cc8ceb9af04 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 14:18:43 +0530 Subject: [PATCH 01/31] zt --- .../new/photos/services/ml/cluster-new.ts | 2 +- web/packages/new/photos/services/user-entity.ts | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index acb74639f7..356c9885d5 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -17,7 +17,7 @@ export interface FaceCluster { /** * An unordered set of ids of the faces that belong to the cluster. * - * For ergonomics of transportation and persistence this is an array but it + * For ergonomics of transportation and persistence this is an array, but it * should conceptually be thought of as a set. */ faceIDs: string[]; diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 6be7c699be..700c8ca976 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,6 +1,7 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { apiURL } from "@/base/origins"; +import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; /** @@ -131,6 +132,19 @@ export const personDiff = async (entityKeyB64: string) => { const entities = await userEntityDiff("person", 0, entityKeyB64); return entities.map(({ data }) => { if (!data) return undefined; - return JSON.parse(new TextDecoder().decode(data)) as unknown; + return RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); }); }; + +/** + * Zod schema for the "person" entity + */ +const RemotePerson = z.object({ + name: z.string().nullish().transform(nullToUndefined), + assigned: z.array( + z.object({ + id: z.number(), // TODO z.string person_v2 + faces: z.string().array(), + }), + ), +}); From ef7b978cd586547ed17a39801a57b249aed567c8 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 14:34:11 +0530 Subject: [PATCH 02/31] Outline --- .../new/photos/services/ml/cluster-new.ts | 37 ++++++++++++------- .../new/photos/services/user-entity.ts | 3 +- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 356c9885d5..79252ac206 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -8,6 +8,10 @@ import { dotProduct } from "./math"; * A face cluster is an set of faces. * * Each cluster has an id so that a Person (a set of clusters) can refer to it. + * + * The cluster is not directly synced to remote. But it can indirectly get + * synced if it gets attached to a person (which can be thought of as a named + * cluster). */ export interface FaceCluster { /** @@ -15,7 +19,7 @@ export interface FaceCluster { */ id: string; /** - * An unordered set of ids of the faces that belong to the cluster. + * An unordered set of ids of the faces that belong to this cluster. * * For ergonomics of transportation and persistence this is an array, but it * should conceptually be thought of as a set. @@ -24,16 +28,13 @@ export interface FaceCluster { } /** - * A Person is a set of clusters, with some attached metadata. + * A Person is a set of clusters and some attached metadata. * - * The person is the user visible concept. It consists of a set of clusters, - * each of which itself is a set of faces. - * - * For ease of transportation, the Person entity on remote looks like + * For ease of transportation, the Person entity on remote is something like * * { name, clusters: [{ clusterID, faceIDs }] } * - * That is, it has the clusters embedded within itself. + * That is, the Person has the clusters embedded within itself. */ export interface Person { /** @@ -43,11 +44,11 @@ export interface Person { /** * An optional name assigned by the user to this person. */ - name: string | undefined; + name: string; /** * An unordered set of ids of the clusters that belong to this person. * - * For ergonomics of transportation and persistence this is an array but it + * For ergonomics of transportation and persistence this is an array, but it * should conceptually be thought of as a set. */ clusterIDs: string[]; @@ -58,20 +59,28 @@ export interface Person { * * [Note: Face clustering algorithm] * + * A person consists of clusters, each of which itself is a set of faces. + * + * The clusters are generated using locally by clients using this algorithm: + * * 1. clusters = [] + * * 2. For each face, find its nearest neighbour in the embedding space from * amongst the faces that have already been clustered. + * * 3. If no such neighbour is found within our threshold, create a new cluster. + * * 4. Otherwise assign this face to the same cluster as its nearest neighbour. * - * [Note: Face clustering feedback] + * This user can then tweak the output of the algorithm by performing the + * following actions to the list of clusters that they can see: * - * This user can tweak the output of the algorithm by providing feedback. They - * can perform the following actions: + * - They can provide a name for a cluster. This upgrades a cluster into a + * "Person", which then gets synced via remote to all their devices. * - * 1. Move a cluster from one person to another. - * 2. Break a cluster. + * - They can attach more clusters to a person. * + * - They can remove a cluster from a person. */ export const clusterFaces = (faceIndexes: FaceIndex[]) => { const t = Date.now(); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 700c8ca976..61dc93d464 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,7 +1,6 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { apiURL } from "@/base/origins"; -import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; /** @@ -140,7 +139,7 @@ export const personDiff = async (entityKeyB64: string) => { * Zod schema for the "person" entity */ const RemotePerson = z.object({ - name: z.string().nullish().transform(nullToUndefined), + name: z.string(), assigned: z.array( z.object({ id: z.number(), // TODO z.string person_v2 From 5081dc904b4553d2f04e7135dc5f32736d3c8a6d Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 14:54:07 +0530 Subject: [PATCH 03/31] Enhance --- .../new/photos/services/user-entity.ts | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 61dc93d464..63b8fda9a5 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -124,19 +124,7 @@ export const userEntityDiff = async ( }; /** - * Fetch all Person entities that have been created or updated since the last - * time we checked. - */ -export const personDiff = async (entityKeyB64: string) => { - const entities = await userEntityDiff("person", 0, entityKeyB64); - return entities.map(({ data }) => { - if (!data) return undefined; - return RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); - }); -}; - -/** - * Zod schema for the "person" entity + * Zod schema for the "person" entity (the {@link RemotePerson} type). */ const RemotePerson = z.object({ name: z.string(), @@ -147,3 +135,33 @@ const RemotePerson = z.object({ }), ), }); + +/** + * A "person" entity as synced via remote. + */ +export type RemotePerson = z.infer; + +/** + * Fetch all Person entities that have been created or updated since the last + * time we checked. + */ +export const personDiff = async ( + entityKeyB64: string, +): Promise => { + const sinceTime = 0; + const entities = await userEntityDiff("person", 0, entityKeyB64); + const latestUpdatedAt = entities.reduce( + (max, e) => Math.max(max, e.updatedAt), + sinceTime, + ); + const people = entities + .map(({ data }) => + data + ? RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))) + : undefined, + ) + .filter((p) => !!p); + // TODO-Cluster + console.log({ latestUpdatedAt, people }); + return people; +}; From 614c3128763b0bd797092da71d0e38f422937083 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 15:19:02 +0530 Subject: [PATCH 04/31] Tentative DB schema --- .../new/photos/services/ml/cluster-new.ts | 2 +- web/packages/new/photos/services/ml/db.ts | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 79252ac206..c412c43f9a 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -42,7 +42,7 @@ export interface Person { */ id: string; /** - * An optional name assigned by the user to this person. + * A name assigned by the user to this person. */ name: string; /** diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 3fe18d0731..4ee9539bd6 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -3,6 +3,7 @@ import log from "@/base/log"; import localForage from "@ente/shared/storage/localForage"; import { deleteDB, openDB, type DBSchema } from "idb"; import type { LocalCLIPIndex } from "./clip"; +import type { FaceCluster, Person } from "./cluster-new"; import type { LocalFaceIndex } from "./face"; /** @@ -43,6 +44,14 @@ interface MLDBSchema extends DBSchema { key: number; value: LocalCLIPIndex; }; + "face-cluster": { + key: string; + value: FaceCluster; + }; + person: { + key: string; + value: Person; + }; } interface FileStatus { @@ -98,6 +107,17 @@ const openMLDB = async () => { if (oldVersion < 2) { db.createObjectStore("clip-index", { keyPath: "fileID" }); } + // TODO-Cluster + if (oldVersion < 3) { + if ( + newVersion && + newVersion > 10 && + process.env.NEXT_PUBLIC_ENTE_WIP_CL + ) { + db.createObjectStore("face-cluster", { keyPath: "id" }); + db.createObjectStore("person", { keyPath: "id" }); + } + } }, blocking() { log.info( @@ -393,3 +413,19 @@ export const markIndexingFailed = async (fileID: number) => { fileStatus.failureCount = fileStatus.failureCount + 1; await Promise.all([tx.store.put(fileStatus), tx.done]); }; + +/** + * Return all face clusters present locally. + */ +export const faceClusters = async () => { + const db = await mlDB(); + return await db.getAll("face-cluster"); +}; + +/** + * Return all people present locally. + */ +export const people = async () => { + const db = await mlDB(); + return await db.getAll("person"); +}; From 7f9391f89f3deedfd0f9281d403db16124cf23d4 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 18:50:14 +0530 Subject: [PATCH 05/31] Parse --- web/packages/new/photos/services/ml/index.ts | 2 +- web/packages/new/photos/services/user-entity.ts | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index ebee2fb790..e3e23d616f 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -330,7 +330,7 @@ export const wipCluster = async () => { if (last) return last; - const clusters = clusterFaces(await faceIndexes()); + const clusters = await clusterFaces(await faceIndexes()); const localFiles = await getAllLocalFiles(); const localFilesByID = new Map(localFiles.map((f) => [f.id, f])); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 63b8fda9a5..b78f246120 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -130,7 +130,8 @@ const RemotePerson = z.object({ name: z.string(), assigned: z.array( z.object({ - id: z.number(), // TODO z.string person_v2 + // TODO-Cluster temporary modify + id: z.number().transform((n) => n.toString()), // TODO z.string person_v2 faces: z.string().array(), }), ), @@ -154,12 +155,10 @@ export const personDiff = async ( (max, e) => Math.max(max, e.updatedAt), sinceTime, ); + const parse = (data: Uint8Array) => + RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); const people = entities - .map(({ data }) => - data - ? RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))) - : undefined, - ) + .map(({ data }) => (data ? parse(data) : undefined)) .filter((p) => !!p); // TODO-Cluster console.log({ latestUpdatedAt, people }); From 5e4f0d4caf57c3c915be3c523ace6864ca0700af Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 19:14:25 +0530 Subject: [PATCH 06/31] A2 --- .../new/photos/services/ml/cluster-new.ts | 96 ++++++++++++++----- 1 file changed, 74 insertions(+), 22 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index c412c43f9a..0e4919d517 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -1,6 +1,7 @@ import { newNonSecureID } from "@/base/id-worker"; import log from "@/base/log"; import { ensure } from "@/utils/ensure"; +import { faceClusters } from "./db"; import type { FaceIndex } from "./face"; import { dotProduct } from "./math"; @@ -63,7 +64,7 @@ export interface Person { * * The clusters are generated using locally by clients using this algorithm: * - * 1. clusters = [] + * 1. clusters = [] initially, or fetched from remote. * * 2. For each face, find its nearest neighbour in the embedding space from * amongst the faces that have already been clustered. @@ -82,18 +83,45 @@ export interface Person { * * - They can remove a cluster from a person. */ -export const clusterFaces = (faceIndexes: FaceIndex[]) => { +export const clusterFaces = async (faceIndexes: FaceIndex[]) => { const t = Date.now(); + // The face data that we need (face ID and its embedding). const faces = [...faceIDAndEmbeddings(faceIndexes)]; - let clusters: FaceCluster[] = []; - const clusterIndexByFaceID = new Map(); + // Start with the clusters we already have (either from a previous indexing, + // or fetched from remote). + const clusters = await faceClusters(); + + // For fast reverse lookup - map from cluster ids to the index in the + // clusters array. + const clusterIndexForClusterID = new Map(clusters.map((c, i) => [c.id, i])); + + // For fast reverse lookup - map from face ids to the id of the cluster to + // which they belong. + const clusterIDForFaceID = new Map( + clusters.flatMap((c) => + c.faceIDs.map((faceID) => [faceID, c.id] as const), + ), + ); + + // Generate a new cluster ID + const newClusterID = () => newNonSecureID("cluster_"); + + // For each face for (const [i, { faceID, embedding }] of faces.entries()) { - // Find the nearest neighbour from among the faces we have already seen. + // If the face is already part of a cluster, then skip it. + if (clusterIDForFaceID.get(faceID)) continue; + + // Find the nearest neighbour from among all the other faces. let nnIndex: number | undefined; let nnCosineSimilarity = 0; - for (let j = 0; j < i; j++) { + for (let j = 0; j < faces.length; j++) { + // ! This is an O(n^2) loop, be careful when adding more code here. + + // Skip itself + if (i == j) continue; + // Can't find a way of avoiding the null assertion. // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const n = faces[j]!; @@ -106,36 +134,60 @@ export const clusterFaces = (faceIndexes: FaceIndex[]) => { nnCosineSimilarity = csim; } } - if (nnIndex === undefined) { - // We didn't find a neighbour. Create a new cluster with this face. - const cluster = { - id: newNonSecureID("cluster_"), - faceIDs: [faceID], - }; + if (nnIndex === undefined) { + // We didn't find a neighbour within the threshold. Create a new + // cluster with this face. + + const cluster = { id: newClusterID(), faceIDs: [faceID] }; clusters.push(cluster); - clusterIndexByFaceID.set(faceID, clusters.length); + clusterIndexForClusterID.set(cluster.id, clusters.length); + clusterIDForFaceID.set(faceID, cluster.id); } else { - // Found a neighbour near enough. Add this face to the neighbour's - // cluster. + // Found a neighbour near enough. // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const nn = faces[nnIndex]!; - const nnClusterIndex = ensure(clusterIndexByFaceID.get(nn.faceID)); - clusters[nnClusterIndex]?.faceIDs.push(faceID); - clusterIndexByFaceID.set(faceID, nnClusterIndex); + + // Find the cluster the nearest neighbour belongs to, if any. + const nnClusterID = clusterIDForFaceID.get(nn.faceID); + + if (nnClusterID) { + // If the neighbour is already part of a cluster, also add + // ourselves to that cluster. + + const nnClusterIndex = ensure( + clusterIndexForClusterID.get(nnClusterID), + ); + clusters[nnClusterIndex]?.faceIDs.push(faceID); + clusterIDForFaceID.set(faceID, nnClusterID); + } else { + // Create a new cluster with us and our nearest neighbour. + + const cluster = { + id: newClusterID(), + faceIDs: [faceID, nn.faceID], + }; + clusters.push(cluster); + clusterIndexForClusterID.set(cluster.id, clusters.length); + clusterIDForFaceID.set(faceID, cluster.id); + clusterIDForFaceID.set(nn.faceID, cluster.id); + } } } - clusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1); + const validClusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1); - log.debug(() => ["ml/cluster", { faces, clusters, clusterIndexByFaceID }]); + log.debug(() => [ + "ml/cluster", + { faces, validClusters, clusterIndexForClusterID, clusterIDForFaceID }, + ]); log.debug( () => - `Clustered ${faces.length} faces into ${clusters.length} clusters (${Date.now() - t} ms)`, + `Clustered ${faces.length} faces into ${validClusters.length} clusters (${Date.now() - t} ms)`, ); - return clusters; + return validClusters; }; /** From 1c9a14cfdc7874b6539e43d7aa3350138c32b5f0 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 19:26:32 +0530 Subject: [PATCH 07/31] Tweak --- .../new/photos/services/ml/cluster-new.ts | 39 ++++++++++--------- web/packages/new/photos/services/ml/db.ts | 2 +- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 0e4919d517..55b3510825 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -40,6 +40,9 @@ export interface FaceCluster { export interface Person { /** * A nanoid for this person. + * + * This is the ID of the Person user entity, it is not contained as part of + * the Person entity payload. */ id: string; /** @@ -105,24 +108,24 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { ), ); - // Generate a new cluster ID + // New cluster ID generator function. const newClusterID = () => newNonSecureID("cluster_"); - // For each face + // For each face, for (const [i, { faceID, embedding }] of faces.entries()) { // If the face is already part of a cluster, then skip it. if (clusterIDForFaceID.get(faceID)) continue; // Find the nearest neighbour from among all the other faces. - let nnIndex: number | undefined; + let nn: (typeof faces)[number] | undefined; let nnCosineSimilarity = 0; for (let j = 0; j < faces.length; j++) { // ! This is an O(n^2) loop, be careful when adding more code here. - // Skip itself + // Skip ourselves. if (i == j) continue; - // Can't find a way of avoiding the null assertion. + // Can't find a way of avoiding the null assertion here. // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const n = faces[j]!; @@ -130,25 +133,14 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { // dot product as their cosine similarity. const csim = dotProduct(embedding, n.embedding); if (csim > 0.76 && csim > nnCosineSimilarity) { - nnIndex = j; + nn = n; nnCosineSimilarity = csim; } } - if (nnIndex === undefined) { - // We didn't find a neighbour within the threshold. Create a new - // cluster with this face. - - const cluster = { id: newClusterID(), faceIDs: [faceID] }; - clusters.push(cluster); - clusterIndexForClusterID.set(cluster.id, clusters.length); - clusterIDForFaceID.set(faceID, cluster.id); - } else { + if (nn) { // Found a neighbour near enough. - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const nn = faces[nnIndex]!; - // Find the cluster the nearest neighbour belongs to, if any. const nnClusterID = clusterIDForFaceID.get(nn.faceID); @@ -162,7 +154,8 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { clusters[nnClusterIndex]?.faceIDs.push(faceID); clusterIDForFaceID.set(faceID, nnClusterID); } else { - // Create a new cluster with us and our nearest neighbour. + // Otherwise create a new cluster with us and our nearest + // neighbour. const cluster = { id: newClusterID(), @@ -173,6 +166,14 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { clusterIDForFaceID.set(faceID, cluster.id); clusterIDForFaceID.set(nn.faceID, cluster.id); } + } else { + // We didn't find a neighbour within the threshold. Create a new + // cluster with only this face. + + const cluster = { id: newClusterID(), faceIDs: [faceID] }; + clusters.push(cluster); + clusterIndexForClusterID.set(cluster.id, clusters.length); + clusterIDForFaceID.set(faceID, cluster.id); } } diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 4ee9539bd6..f33271188a 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -425,7 +425,7 @@ export const faceClusters = async () => { /** * Return all people present locally. */ -export const people = async () => { +export const persons = async () => { const db = await mlDB(); return await db.getAll("person"); }; From 207f9c50cf9da95af81820ef4ce67b2df584c1d3 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 19:39:43 +0530 Subject: [PATCH 08/31] Clean --- web/packages/new/photos/services/ml/people.ts | 43 +------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/web/packages/new/photos/services/ml/people.ts b/web/packages/new/photos/services/ml/people.ts index a85cc2f20a..6420794e1d 100644 --- a/web/packages/new/photos/services/ml/people.ts +++ b/web/packages/new/photos/services/ml/people.ts @@ -37,14 +37,6 @@ export const syncPeopleIndex = async () => { } - // TODO: have faces addresable through fileId + faceId - // to avoid index based addressing, which is prone to wrong results - // one way could be to match nearest face within threshold in the file - - const allFacesMap = - syncContext.allSyncedFacesMap ?? - (syncContext.allSyncedFacesMap = await mlIDbStorage.getAllFacesMap()); - // await this.init(); @@ -86,30 +78,13 @@ export const syncPeopleIndex = async () => { : best, ); -export async function getLocalFile(fileId: number) { - const localFiles = await getLocalFiles(); - return localFiles.find((f) => f.id === fileId); -} - - if (personFace && !personFace.crop?.cacheKey) { - const file = await getLocalFile(personFace.fileId); - const imageBitmap = await fetchImageBitmap(file); - await saveFaceCrop(imageBitmap, personFace); - } - - - const person: Person = { - id: index, - files: faces.map((f) => f.fileId), - displayFaceId: personFace?.id, - }; await mlIDbStorage.putPerson(person); faces.forEach((face) => { face.personId = person.id; }); - // log.info("Creating person: ", person, faces); + } await mlIDbStorage.updateFaces(allFacesMap); @@ -117,20 +92,4 @@ export async function getLocalFile(fileId: number) { // await mlIDbStorage.setIndexVersion("people", filesVersion); }; - public async regenerateFaceCrop(token: string, faceID: string) { - await downloadManager.init(APPS.PHOTOS, { token }); - return mlService.regenerateFaceCrop(faceID); - } - -export const regenerateFaceCrop = async (faceID: string) => { - const fileID = Number(faceID.split("-")[0]); - const personFace = await mlIDbStorage.getFace(fileID, faceID); - if (!personFace) { - throw Error("Face not found"); - } - - const file = await getLocalFile(personFace.fileId); - const imageBitmap = await fetchImageBitmap(file); - return await saveFaceCrop(imageBitmap, personFace); -}; */ From 3097810f2c281fc5ef7e72b4b174ecc6f233c73a Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 20:14:30 +0530 Subject: [PATCH 09/31] Top scorer --- .../new/photos/services/ml/cluster-new.ts | 49 ++++++++++++++++--- web/packages/new/photos/services/ml/index.ts | 3 +- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 55b3510825..202d656ec0 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -1,8 +1,8 @@ import { newNonSecureID } from "@/base/id-worker"; import log from "@/base/log"; import { ensure } from "@/utils/ensure"; -import { faceClusters } from "./db"; -import type { FaceIndex } from "./face"; +import { faceClusters, persons } from "./db"; +import type { Face, FaceIndex } from "./face"; import { dotProduct } from "./math"; /** @@ -56,6 +56,11 @@ export interface Person { * should conceptually be thought of as a set. */ clusterIDs: string[]; + /** + * The ID of the face that should be used as the display face, to represent + * this person in the UI. + */ + avatarFaceID: string; } /** @@ -89,8 +94,8 @@ export interface Person { export const clusterFaces = async (faceIndexes: FaceIndex[]) => { const t = Date.now(); - // The face data that we need (face ID and its embedding). - const faces = [...faceIDAndEmbeddings(faceIndexes)]; + // A flattened array of faces. + const faces = [...enumerateFaces(faceIndexes)]; // Start with the clusters we already have (either from a previous indexing, // or fetched from remote). @@ -117,7 +122,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { if (clusterIDForFaceID.get(faceID)) continue; // Find the nearest neighbour from among all the other faces. - let nn: (typeof faces)[number] | undefined; + let nn: Face | undefined; let nnCosineSimilarity = 0; for (let j = 0; j < faces.length; j++) { // ! This is an O(n^2) loop, be careful when adding more code here. @@ -177,11 +182,39 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { } } + // Prune too small clusters. const validClusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1); + // For each person, use the highest scoring face in any of its clusters as + // its display face. + + const faceForFaceID = new Map(faces.map((f) => [f.faceID, f])); + const people = await persons(); + + for (const person of people) { + person.avatarFaceID = person.clusterIDs + .map((clusterID) => clusterIndexForClusterID.get(clusterID)) + .map((clusterIndex) => + clusterIndex ? clusters[clusterIndex] : undefined, + ) + .filter((cluster) => !!cluster) + .flatMap((cluster) => cluster.faceIDs) + .map((id) => faceForFaceID.get(id)) + .filter((face) => !!face) + .reduce((topFace, face) => + topFace.score > face.score ? topFace : face, + ).faceID; + } + log.debug(() => [ "ml/cluster", - { faces, validClusters, clusterIndexForClusterID, clusterIDForFaceID }, + { + faces, + validClusters, + clusterIndexForClusterID, + clusterIDForFaceID, + people, + }, ]); log.debug( () => @@ -195,10 +228,10 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { * A generator function that returns a stream of {faceID, embedding} values, * flattening all the all the faces present in the given {@link faceIndices}. */ -function* faceIDAndEmbeddings(faceIndices: FaceIndex[]) { +function* enumerateFaces(faceIndices: FaceIndex[]) { for (const fi of faceIndices) { for (const f of fi.faces) { - yield { faceID: f.faceID, embedding: f.embedding }; + yield f; } } } diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index e3e23d616f..76ce68da36 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -510,7 +510,8 @@ export const unidentifiedFaceIDs = async ( }; /** - * Extract the ID of the {@link EnteFile} to which a face belongs from its ID. + * Extract the fileID of the {@link EnteFile} to which the face belongs from its + * faceID. */ const fileIDFromFaceID = (faceID: string) => { const fileID = parseInt(faceID.split("_")[0] ?? ""); From 113bd9744eab08b3b8be953bf839fdaa2cd917ff Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 20:23:04 +0530 Subject: [PATCH 10/31] Update all --- web/packages/new/photos/services/ml/db.ts | 34 +++++++++++++++++-- web/packages/new/photos/services/ml/people.ts | 7 ---- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index f33271188a..be35790591 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -419,13 +419,41 @@ export const markIndexingFailed = async (fileID: number) => { */ export const faceClusters = async () => { const db = await mlDB(); - return await db.getAll("face-cluster"); + return db.getAll("face-cluster"); }; /** - * Return all people present locally. + * Return all person entries (aka "people") present locally. */ export const persons = async () => { const db = await mlDB(); - return await db.getAll("person"); + return db.getAll("person"); +}; + +/** + * Replace the face clusters stored locally with the given ones. + * + * This function deletes all entries from the person object store, and then + * inserts the given {@link clusters} into it. + */ +export const setFaceClusters = async (clusters: FaceCluster[]) => { + const db = await mlDB(); + const tx = db.transaction("face-cluster", "readwrite"); + await tx.store.clear(); + await Promise.all(clusters.map((cluster) => tx.store.put(cluster))); + return tx.done; +}; + +/** + * Replace the persons stored locally with the given ones. + * + * This function deletes all entries from the person object store, and then + * inserts the given {@link persons} into it. + */ +export const setPersons = async (persons: Person[]) => { + const db = await mlDB(); + const tx = db.transaction("person", "readwrite"); + await tx.store.clear(); + await Promise.all(persons.map((person) => tx.store.put(person))); + return tx.done; }; diff --git a/web/packages/new/photos/services/ml/people.ts b/web/packages/new/photos/services/ml/people.ts index 6420794e1d..d2d9c884a3 100644 --- a/web/packages/new/photos/services/ml/people.ts +++ b/web/packages/new/photos/services/ml/people.ts @@ -71,13 +71,6 @@ export const syncPeopleIndex = async () => { for (const [index, cluster] of clusters.entries()) { const faces = cluster.map((f) => allFaces[f]).filter((f) => f); - // TODO: take default display face from last leaves of hdbscan clusters - const personFace = faces.reduce((best, face) => - face.detection.probability > best.detection.probability - ? face - : best, - ); - await mlIDbStorage.putPerson(person); From 565546755a75c6e61b9bc7d7865d984c19432af9 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 20:35:30 +0530 Subject: [PATCH 11/31] Split the type --- .../Search/SearchBar/searchInput/index.tsx | 4 +- web/apps/photos/src/services/searchService.ts | 6 +- web/apps/photos/src/types/search/index.ts | 6 +- .../new/photos/components/PeopleList.tsx | 8 +- web/packages/new/photos/services/ml/index.ts | 7 +- web/packages/new/photos/services/ml/people.ts | 88 ------------------- web/packages/new/photos/services/search.ts | 13 +++ 7 files changed, 29 insertions(+), 103 deletions(-) delete mode 100644 web/packages/new/photos/services/ml/people.ts create mode 100644 web/packages/new/photos/services/search.ts diff --git a/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx b/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx index 9f493ba9c7..80a863b62e 100644 --- a/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx +++ b/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx @@ -1,6 +1,5 @@ import { FileType } from "@/media/file-type"; import { isMLEnabled } from "@/new/photos/services/ml"; -import type { Person } from "@/new/photos/services/ml/people"; import { EnteFile } from "@/new/photos/types/file"; import CloseIcon from "@mui/icons-material/Close"; import { IconButton } from "@mui/material"; @@ -19,6 +18,7 @@ import { } from "services/searchService"; import { Collection } from "types/collection"; import { LocationTagData } from "types/entity"; +import type { SearchPerson } from "@/new/photos/services/search"; import { ClipSearchScores, DateValue, @@ -146,7 +146,7 @@ export default function SearchInput(props: Iprops) { search = { files: selectedOption.value as number[] }; break; case SuggestionType.PERSON: - search = { person: selectedOption.value as Person }; + search = { person: selectedOption.value as SearchPerson }; break; case SuggestionType.FILE_TYPE: search = { fileType: selectedOption.value as FileType }; diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index c469c4eec5..f9507bd675 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -9,7 +9,7 @@ import { wipCluster, wipClusterEnable, } from "@/new/photos/services/ml"; -import type { Person } from "@/new/photos/services/ml/people"; +import type { SearchPerson } from "@/new/photos/services/search"; import { personDiff } from "@/new/photos/services/user-entity"; import { EnteFile } from "@/new/photos/types/file"; import * as chrono from "chrono-node"; @@ -406,7 +406,7 @@ function convertSuggestionToSearchQuery(option: Suggestion): Search { return { files: option.value as number[] }; case SuggestionType.PERSON: - return { person: option.value as Person }; + return { person: option.value as SearchPerson }; case SuggestionType.FILE_TYPE: return { fileType: option.value as FileType }; @@ -429,7 +429,7 @@ async function getAllPeople(limit: number = undefined) { return []; } - let people: Array = []; // await mlIDbStorage.getAllPeople(); + let people: Array = []; // await mlIDbStorage.getAllPeople(); people = await wipCluster(); // await mlPeopleStore.iterate((person) => { // people.push(person); diff --git a/web/apps/photos/src/types/search/index.ts b/web/apps/photos/src/types/search/index.ts index fdb054f7f5..5c9158958a 100644 --- a/web/apps/photos/src/types/search/index.ts +++ b/web/apps/photos/src/types/search/index.ts @@ -1,6 +1,6 @@ import { FileType } from "@/media/file-type"; import type { MLStatus } from "@/new/photos/services/ml"; -import type { Person } from "@/new/photos/services/ml/people"; +import type { SearchPerson } from "@/new/photos/services/search"; import { EnteFile } from "@/new/photos/types/file"; import { City } from "services/locationSearchService"; import { LocationTagData } from "types/entity"; @@ -30,7 +30,7 @@ export interface Suggestion { value: | DateValue | number[] - | Person + | SearchPerson | MLStatus | LocationTagData | City @@ -45,7 +45,7 @@ export type Search = { city?: City; collection?: number; files?: number[]; - person?: Person; + person?: SearchPerson; fileType?: FileType; clip?: ClipSearchScores; }; diff --git a/web/packages/new/photos/components/PeopleList.tsx b/web/packages/new/photos/components/PeopleList.tsx index adc53dbaf2..d3d6fe0c9c 100644 --- a/web/packages/new/photos/components/PeopleList.tsx +++ b/web/packages/new/photos/components/PeopleList.tsx @@ -1,14 +1,14 @@ import { faceCrop, unidentifiedFaceIDs } from "@/new/photos/services/ml"; -import type { Person } from "@/new/photos/services/ml/people"; import type { EnteFile } from "@/new/photos/types/file"; import { Skeleton, Typography, styled } from "@mui/material"; import { t } from "i18next"; import React, { useEffect, useState } from "react"; +import type { SearchPerson } from "../services/search"; export interface PeopleListProps { - people: Person[]; + people: SearchPerson[]; maxRows: number; - onSelect?: (person: Person, index: number) => void; + onSelect?: (person: SearchPerson, index: number) => void; } export const PeopleList: React.FC = ({ @@ -60,7 +60,7 @@ const FaceChip = styled("div")<{ clickable?: boolean }>` export interface PhotoPeopleListProps { file: EnteFile; - onSelect?: (person: Person, index: number) => void; + onSelect?: (person: SearchPerson, index: number) => void; } export function PhotoPeopleList() { diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 76ce68da36..5ca2bbe950 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -27,9 +27,9 @@ import { faceIndexes, indexableAndIndexedCounts, } from "./db"; -import type { Person } from "./people"; import { MLWorker } from "./worker"; import type { CLIPMatches } from "./worker-types"; +import type { SearchPerson } from "../search"; /** * Internal state of the ML subsystem. @@ -314,7 +314,8 @@ export const indexNewUpload = (enteFile: EnteFile, uploadItem: UploadItem) => { void worker().then((w) => w.onUpload(enteFile, uploadItem)); }; -let last: Person[] | undefined; +// TODO-Cluster temporary import here +let last: SearchPerson[] | undefined; /** * WIP! Don't enable, dragon eggs are hatching here. @@ -335,7 +336,7 @@ export const wipCluster = async () => { const localFiles = await getAllLocalFiles(); const localFilesByID = new Map(localFiles.map((f) => [f.id, f])); - const people: Person[] = []; // await mlIDbStorage.getAllPeople(); + const people: SearchPerson[] = []; // await mlIDbStorage.getAllPeople(); for (const cluster of clusters) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion const dfID = cluster.faceIDs[0]!; diff --git a/web/packages/new/photos/services/ml/people.ts b/web/packages/new/photos/services/ml/people.ts deleted file mode 100644 index d2d9c884a3..0000000000 --- a/web/packages/new/photos/services/ml/people.ts +++ /dev/null @@ -1,88 +0,0 @@ -import type { EnteFile } from "../../types/file"; - -export interface Person { - id: number; - name?: string; - files: number[]; - displayFaceID: string; - displayFaceFile: EnteFile; -} - -// Forced disable clustering. It doesn't currently work. -// -// > Error: Failed to execute 'transferToImageBitmap' on -// > 'OffscreenCanvas': ImageBitmap construction failed - -/* -export const syncPeopleIndex = async () => { - - if ( - syncContext.outOfSyncFiles.length <= 0 || - (syncContext.nSyncedFiles === batchSize && Math.random() < 0) - ) { - await this.syncIndex(syncContext); - } - - public async syncIndex(syncContext: MLSyncContext) { - await this.getMLLibraryData(syncContext); - - await syncPeopleIndex(syncContext); - - await this.persistMLLibraryData(syncContext); - } - - const filesVersion = await mlIDbStorage.getIndexVersion("files"); - if (filesVersion <= (await mlIDbStorage.getIndexVersion("people"))) { - return; - } - - - - // await this.init(); - - const allFacesMap = await mlIDbStorage.getAllFacesMap(); - const allFaces = [...allFacesMap.values()].flat(); - - if (!allFaces || allFaces.length < 50) { - log.info( - `Skipping clustering since number of faces (${allFaces.length}) is less than the clustering threshold (50)`, - ); - return; - } - - log.info("Running clustering allFaces: ", allFaces.length); - const faceClusteringResults = await clusterFaces( - allFaces.map((f) => Array.from(f.embedding)), - ); - log.info( - "[MLService] Got face clustering results: ", - JSON.stringify(faceClusteringResults), - ); - - const clusters = faceClusteringResults?.clusters; - if (!clusters || clusters.length < 1) { - return; - } - - for (const face of allFaces) { - face.personId = undefined; - } - await mlIDbStorage.clearAllPeople(); - for (const [index, cluster] of clusters.entries()) { - const faces = cluster.map((f) => allFaces[f]).filter((f) => f); - - - await mlIDbStorage.putPerson(person); - - faces.forEach((face) => { - face.personId = person.id; - }); - - } - - await mlIDbStorage.updateFaces(allFacesMap); - - // await mlIDbStorage.setIndexVersion("people", filesVersion); -}; - -*/ diff --git a/web/packages/new/photos/services/search.ts b/web/packages/new/photos/services/search.ts new file mode 100644 index 0000000000..300fad5c24 --- /dev/null +++ b/web/packages/new/photos/services/search.ts @@ -0,0 +1,13 @@ +import type { EnteFile } from "@/new/photos/types/file"; + +/** + * A massaged version of {@link Person} suitable for being shown in search + * results. + */ +export interface SearchPerson { + id: number; + name?: string; + files: number[]; + displayFaceID: string; + displayFaceFile: EnteFile; +} From f802e87215986a8d23fc73ba445fdfeb9867ae89 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 Aug 2024 20:47:13 +0530 Subject: [PATCH 12/31] To search person --- .../new/photos/services/ml/cluster-new.ts | 2 +- web/packages/new/photos/services/ml/index.ts | 43 +++++++++++-------- web/packages/new/photos/services/search.ts | 2 +- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 202d656ec0..9bf342bab9 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -221,7 +221,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { `Clustered ${faces.length} faces into ${validClusters.length} clusters (${Date.now() - t} ms)`, ); - return validClusters; + return { clusters: validClusters, people }; }; /** diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 5ca2bbe950..1981df4394 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -18,6 +18,7 @@ import { proxy, transfer } from "comlink"; import { isInternalUser } from "../feature-flags"; import { getAllLocalFiles } from "../files"; import { getRemoteFlag, updateRemoteFlag } from "../remote-store"; +import type { SearchPerson } from "../search"; import type { UploadItem } from "../upload/types"; import { clusterFaces } from "./cluster-new"; import { regenerateFaceCrops } from "./crop"; @@ -29,7 +30,6 @@ import { } from "./db"; import { MLWorker } from "./worker"; import type { CLIPMatches } from "./worker-types"; -import type { SearchPerson } from "../search"; /** * Internal state of the ML subsystem. @@ -331,32 +331,39 @@ export const wipCluster = async () => { if (last) return last; - const clusters = await clusterFaces(await faceIndexes()); + const { clusters, people } = await clusterFaces(await faceIndexes()); + const clusterByID = new Map( + clusters.map((cluster) => [cluster.id, cluster]), + ); const localFiles = await getAllLocalFiles(); const localFilesByID = new Map(localFiles.map((f) => [f.id, f])); - const people: SearchPerson[] = []; // await mlIDbStorage.getAllPeople(); - for (const cluster of clusters) { - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const dfID = cluster.faceIDs[0]!; - const dfFile = localFilesByID.get(fileIDFromFaceID(dfID) ?? 0); - if (!dfFile) { - assertionFailed(`Face ID ${dfID} without local file`); + const result: SearchPerson[] = []; + for (const person of people) { + const avatarFaceID = person.avatarFaceID; + const avatarFaceFileID = fileIDFromFaceID(avatarFaceID); + const avatarFaceFile = localFilesByID.get(avatarFaceFileID ?? 0); + if (!avatarFaceFileID || !avatarFaceFile) { + assertionFailed(`Face ID ${avatarFaceID} without local file`); continue; } - people.push({ - id: Math.random(), //cluster.id, - name: "test", - // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - files: cluster.faceIDs.map((s) => parseInt(s.split("_")[0]!)), - displayFaceID: dfID, - displayFaceFile: dfFile, + const files = person.clusterIDs + .map((id) => clusterByID.get(id)) + .flatMap((cluster) => cluster?.faceIDs ?? []) + .map((faceID) => fileIDFromFaceID(faceID)) + .filter((fileID) => fileID !== undefined); + result.push({ + id: person.id, + name: person.name, + files, + displayFaceID: avatarFaceID, + displayFaceFile: avatarFaceFile, }); } - last = people; - return people; + last = result; + return result; }; export type MLStatus = diff --git a/web/packages/new/photos/services/search.ts b/web/packages/new/photos/services/search.ts index 300fad5c24..de11843164 100644 --- a/web/packages/new/photos/services/search.ts +++ b/web/packages/new/photos/services/search.ts @@ -5,7 +5,7 @@ import type { EnteFile } from "@/new/photos/types/file"; * results. */ export interface SearchPerson { - id: number; + id: string; name?: string; files: number[]; displayFaceID: string; From 82b8658268271ebff2a518f206548f293c3f950e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 08:45:00 +0530 Subject: [PATCH 13/31] lint-fix --- .../src/components/Search/SearchBar/searchInput/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx b/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx index 80a863b62e..1317464b49 100644 --- a/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx +++ b/web/apps/photos/src/components/Search/SearchBar/searchInput/index.tsx @@ -1,5 +1,6 @@ import { FileType } from "@/media/file-type"; import { isMLEnabled } from "@/new/photos/services/ml"; +import type { SearchPerson } from "@/new/photos/services/search"; import { EnteFile } from "@/new/photos/types/file"; import CloseIcon from "@mui/icons-material/Close"; import { IconButton } from "@mui/material"; @@ -18,7 +19,6 @@ import { } from "services/searchService"; import { Collection } from "types/collection"; import { LocationTagData } from "types/entity"; -import type { SearchPerson } from "@/new/photos/services/search"; import { ClipSearchScores, DateValue, From d5d0e98197e9728ed7234f4b95302c3495358307 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 09:11:56 +0530 Subject: [PATCH 14/31] Person --- .../new/photos/services/user-entity.ts | 86 +++++++++++++------ 1 file changed, 62 insertions(+), 24 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index b78f246120..c695be5b93 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,5 +1,6 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; +import { getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; import { z } from "zod"; @@ -51,6 +52,10 @@ const defaultDiffLimit = 500; * expected to be associated with this entity type. */ interface UserEntity { + /** + * A UUID or nanoid for the entity. + */ + id: string; /** * Arbitrary data associated with the entity. The format of this data is * specific to each entity type. @@ -65,6 +70,7 @@ interface UserEntity { } const RemoteUserEntity = z.object({ + id: z.string(), /** Base64 string containing the encrypted contents of the entity. */ encryptedData: z.string(), /** Base64 string containing the decryption header. */ @@ -74,8 +80,8 @@ const RemoteUserEntity = z.object({ }); /** - * Fetch all user entities of the given type that have been created or updated - * since the given time. + * Fetch the next batch of user entities of the given type that have been + * created or updated since the given time. * * @param type The type of the entities to fetch. * @@ -113,7 +119,8 @@ export const userEntityDiff = async ( .parse(await res.json()).diff; return Promise.all( entities.map( - async ({ encryptedData, header, isDeleted, updatedAt }) => ({ + async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({ + id, data: isDeleted ? undefined : await decrypt(encryptedData, header), @@ -123,6 +130,43 @@ export const userEntityDiff = async ( ); }; +/** + * Sync the {@link Person} entities that we have locally with remote. + * + * This fetches all the user entities corresponding to the "person_v2" entity + * type from remote that have been created, updated or deleted since the last + * time we checked. This diff is then applied to the data we have persisted + * locally. + */ +export const personDiff = async ( + entityKeyB64: string, +): Promise => { + const sinceTime = 0; + + const parse = (data: Uint8Array) => + RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); + + const result: RemotePerson[] = []; + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition + while (true) { + const entities = await userEntityDiff("person", 0, entityKeyB64); + if (entities.length == 0) break; + + const latestUpdatedAt = entities.reduce( + (max, e) => Math.max(max, e.updatedAt), + sinceTime, + ); + + const people = entities + .map(({ data }) => (data ? parse(data) : undefined)) + .filter((p) => !!p); + // TODO-Cluster + console.log({ latestUpdatedAt, people }); + return people; + } + return result; +}; + /** * Zod schema for the "person" entity (the {@link RemotePerson} type). */ @@ -140,27 +184,21 @@ const RemotePerson = z.object({ /** * A "person" entity as synced via remote. */ -export type RemotePerson = z.infer; +type RemotePerson = z.infer; + +const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; /** - * Fetch all Person entities that have been created or updated since the last - * time we checked. + * Return the locally persisted value for the latest `updatedAt` time for the + * given entity type. + * + * This is used to checkpoint diffs, so that we can resume fetching from the + * last time we did a fetch. */ -export const personDiff = async ( - entityKeyB64: string, -): Promise => { - const sinceTime = 0; - const entities = await userEntityDiff("person", 0, entityKeyB64); - const latestUpdatedAt = entities.reduce( - (max, e) => Math.max(max, e.updatedAt), - sinceTime, - ); - const parse = (data: Uint8Array) => - RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); - const people = entities - .map(({ data }) => (data ? parse(data) : undefined)) - .filter((p) => !!p); - // TODO-Cluster - console.log({ latestUpdatedAt, people }); - return people; -}; +const latestUpdatedAt = (type: EntityType) => getKVN(latestUpdatedAtKey(type)); + +/** + * Setter for {@link latestUpdatedAt}. + */ +const setLatestUpdatedAt = (type: EntityType, value: number) => + setKV(latestUpdatedAtKey(type), value); From 1314b8ccbb3d298975a7d21792940dd79dcbbc5e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 09:48:08 +0530 Subject: [PATCH 15/31] sync 2 --- web/apps/photos/src/services/searchService.ts | 4 +- .../new/photos/services/ml/cluster-new.ts | 2 +- web/packages/new/photos/services/ml/db.ts | 43 +++++++++++++++++++ .../new/photos/services/user-entity.ts | 43 +++++++++++-------- 4 files changed, 70 insertions(+), 22 deletions(-) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index f9507bd675..4ece8ab439 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -10,7 +10,7 @@ import { wipClusterEnable, } from "@/new/photos/services/ml"; import type { SearchPerson } from "@/new/photos/services/search"; -import { personDiff } from "@/new/photos/services/user-entity"; +import { syncPersons } from "@/new/photos/services/user-entity"; import { EnteFile } from "@/new/photos/types/file"; import * as chrono from "chrono-node"; import { t } from "i18next"; @@ -421,7 +421,7 @@ async function getAllPeople(limit: number = undefined) { if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) { const entityKey = await getEntityKey("person" as EntityType); - const peopleR = await personDiff(entityKey.data); + const peopleR = await syncPersons(entityKey.data); const r = peopleR.length; log.debug(() => ["people", peopleR]); diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 9bf342bab9..9953e474b8 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -60,7 +60,7 @@ export interface Person { * The ID of the face that should be used as the display face, to represent * this person in the UI. */ - avatarFaceID: string; + avatarFaceID: string | undefined; } /** diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index be35790591..4b8d621370 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -444,12 +444,55 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => { return tx.done; }; +/** + * Update the person store to reflect the given changes, in order. + * + * @param changes A list of changes to apply. Each entry is either + * + * - A string, in which case the person with the given string as their ID + * should be deleted from the store, or + * + * - A person, in which case it should add or overwrite the entry for the + * corresponding person (as identified by their {@link id}). + */ +export const applyPersonDiff = async (changes: (string | Person)[]) => { + const db = await mlDB(); + const tx = db.transaction("person", "readwrite"); + // We want to do the changes in order, so we shouldn't use Promise.all. + for (const change of changes) { + await (typeof change == "string" + ? tx.store.delete(change) + : tx.store.put(change)); + } + return tx.done; +}; + +/** + * Add or overwrite the entry for the given {@link person}, as identified by + * their {@link id}. + */ +export const savePerson = async (person: Person) => { + const db = await mlDB(); + const tx = db.transaction("person", "readwrite"); + await Promise.all([tx.store.put(person), tx.done]); +}; + +/** + * Delete the entry for the persons with the given {@link id}, if any. + */ +export const deletePerson = async (id: string) => { + const db = await mlDB(); + const tx = db.transaction("person", "readwrite"); + await Promise.all([tx.store.delete(id), tx.done]); +}; + /** * Replace the persons stored locally with the given ones. * * This function deletes all entries from the person object store, and then * inserts the given {@link persons} into it. */ +// TODO-Cluster: Remove me export const setPersons = async (persons: Person[]) => { const db = await mlDB(); const tx = db.transaction("person", "readwrite"); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index c695be5b93..c952c61f40 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -3,6 +3,8 @@ import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; import { z } from "zod"; +import type { Person } from "./ml/cluster-new"; +import { applyPersonDiff } from "./ml/db"; /** * User entities are predefined lists of otherwise arbitrary data that the user @@ -135,36 +137,39 @@ export const userEntityDiff = async ( * * This fetches all the user entities corresponding to the "person_v2" entity * type from remote that have been created, updated or deleted since the last - * time we checked. This diff is then applied to the data we have persisted - * locally. + * time we checked. + * + * This diff is then applied to the data we have persisted locally. */ -export const personDiff = async ( - entityKeyB64: string, -): Promise => { - const sinceTime = 0; +export const syncPersons = async (entityKeyB64: string) => { + const type: EntityType = "person"; - const parse = (data: Uint8Array) => - RemotePerson.parse(JSON.parse(new TextDecoder().decode(data))); + const parse = ({ id, data }: UserEntity): Person => { + const rp = RemotePerson.parse( + JSON.parse(new TextDecoder().decode(data)), + ); + return { + id, + name: rp.name, + clusterIDs: rp.assigned.map(({ id }) => id), + avatarFaceID: undefined, + }; + }; - const result: RemotePerson[] = []; + let sinceTime = (await latestUpdatedAt(type)) ?? 0; // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition while (true) { - const entities = await userEntityDiff("person", 0, entityKeyB64); + const entities = await userEntityDiff(type, sinceTime, entityKeyB64); if (entities.length == 0) break; - const latestUpdatedAt = entities.reduce( + await applyPersonDiff(entities.map((e) => (e.data ? parse(e) : e.id))); + + sinceTime = entities.reduce( (max, e) => Math.max(max, e.updatedAt), sinceTime, ); - - const people = entities - .map(({ data }) => (data ? parse(data) : undefined)) - .filter((p) => !!p); - // TODO-Cluster - console.log({ latestUpdatedAt, people }); - return people; + await setLatestUpdatedAt(type, sinceTime); } - return result; }; /** From 050bbfbbb351509b96bf289819ebb97c2d7bee73 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 10:56:17 +0530 Subject: [PATCH 16/31] Discussion --- .../new/photos/services/ml/cluster-new.ts | 61 ++++++++++++++++--- .../new/photos/services/user-entity.ts | 23 ++++++- 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 9953e474b8..93d87e3fed 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -8,11 +8,11 @@ import { dotProduct } from "./math"; /** * A face cluster is an set of faces. * - * Each cluster has an id so that a Person (a set of clusters) can refer to it. + * Each cluster has an id so that a {@link Person} can refer to it. * - * The cluster is not directly synced to remote. But it can indirectly get - * synced if it gets attached to a person (which can be thought of as a named - * cluster). + * The cluster is not directly synced to remote. But it does indirectly get + * synced if it gets promoted or attached to a person (which can be thought of + * as a named or hidden clusters). */ export interface FaceCluster { /** @@ -29,13 +29,26 @@ export interface FaceCluster { } /** - * A Person is a set of clusters and some attached metadata. + * A Person is a set of clusters with some attached metadata. * - * For ease of transportation, the Person entity on remote is something like + * More precisely, a person is a a single cluster or a set of clusters that the + * user has interacted with. + * + * The most frequent interaction is naming a {@link FaceCluster}, which promotes + * it to a become a {@link Person}. The promotion comes with the ability to be + * synced with remote (as a "person_v2" user entity). + * + * There after, the user may attach more clusters to the same {@link Person}. + * + * The other form of interaction is hiding. The user may hide a single (unnamed) + * cluster, or they may hide a person. + * + * The Person entity on remote has clusters embedded within itself * * { name, clusters: [{ clusterID, faceIDs }] } * - * That is, the Person has the clusters embedded within itself. + * Since clusters don't get independently synced, one way to think about a + * Person is that it is an interaction with a cluster that we want to sync. */ export interface Person { /** @@ -47,8 +60,21 @@ export interface Person { id: string; /** * A name assigned by the user to this person. + * + * This can be missing or an empty string for an unnamed cluster that was + * hidden. */ - name: string; + name: string | undefined; + /** + * True if this person should be hidden. + * + * This can also be true for unnamed hidden clusters. When the user hides a + * single cluster that was offered as a suggestion to them on a client, then + * the client will create a new person entity without a name, and set its + * hidden flag to sync it with remote (so that other clients can also stop + * showing this cluster). + */ + isHidden: boolean; /** * An unordered set of ids of the clusters that belong to this person. * @@ -57,10 +83,15 @@ export interface Person { */ clusterIDs: string[]; /** - * The ID of the face that should be used as the display face, to represent - * this person in the UI. + * The ID of the face that should be used as the cover photo for this person + * (if the user has set one). */ avatarFaceID: string | undefined; + /** + * Locally determined ID of the "best" face that should be used as the + * display face, to represent this person in the UI. + */ + displayFaceID: string | undefined; } /** @@ -90,6 +121,16 @@ export interface Person { * - They can attach more clusters to a person. * * - They can remove a cluster from a person. + * + * After clustering, we also do some routine cleanup. Faces belonging to files + * that have been deleted (including those in Trash) should be pruned off. + * + * We should not make strict assumptions about the clusters we get from remote. + * In particular, the same face ID can be in different clusters. In such cases + * we should assign it arbitrarily assign it to the last cluster we find it in. + * Such leeway is intentionally provided to allow clients some slack in how they + * implement the sync without making an blocking API request for every user + * interaction. */ export const clusterFaces = async (faceIndexes: FaceIndex[]) => { const t = Date.now(); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index c952c61f40..6fc1fcb0ee 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -2,6 +2,7 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; +import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; import type { Person } from "./ml/cluster-new"; import { applyPersonDiff } from "./ml/db"; @@ -93,6 +94,24 @@ const RemoteUserEntity = z.object({ * * @param entityKeyB64 The base64 encoded key to use for decrypting the * encrypted contents of the user entity. + * + * [Note: Diff contents] + * + * Unlike git diffs which track all changes, the diffs we get from remote are + * guaranteed to contain only one entry (upsert or delete) for particular Ente + * object. This holds true irrespective of the diff limit. + * + * For example, in the user entity diff response, it is guaranteed that there + * will only be at max one entry for a particular entity id. The entry will have + * no data to indicate that the corresponding entity was deleted. Otherwise, + * when the data is present, it is taken as the creation of a new entity or the + * updation of an existing one. + * + * This behaviour comes from how remote stores the underlying, e.g., entities. A + * diff returns just entities whose updation times greater than the provided + * since time (limited to the given diff limit). So there will be at most one + * row for a particular entity id. And if that entity has been deleted, then the + * row will be a tombstone so data will be not be present. */ export const userEntityDiff = async ( type: EntityType, @@ -176,7 +195,7 @@ export const syncPersons = async (entityKeyB64: string) => { * Zod schema for the "person" entity (the {@link RemotePerson} type). */ const RemotePerson = z.object({ - name: z.string(), + name: z.string().nullish().transform(nullToUndefined), assigned: z.array( z.object({ // TODO-Cluster temporary modify @@ -184,6 +203,8 @@ const RemotePerson = z.object({ faces: z.string().array(), }), ), + isHidden: z.boolean(), + avatarFaceID: z.string().nullish().transform(nullToUndefined), }); /** From ad156bc33af83b8f580224e03fa8dd15094ee321 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 11:16:26 +0530 Subject: [PATCH 17/31] Diff --- web/packages/new/photos/services/ml/db.ts | 18 ++++++++++-------- .../new/photos/services/user-entity.ts | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 4b8d621370..14bd0a58f9 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -447,7 +447,7 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => { /** * Update the person store to reflect the given changes, in order. * - * @param changes A list of changes to apply. Each entry is either + * @param diff A list of changes to apply. Each entry is either * * - A string, in which case the person with the given string as their ID * should be deleted from the store, or @@ -455,15 +455,15 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => { * - A person, in which case it should add or overwrite the entry for the * corresponding person (as identified by their {@link id}). */ -export const applyPersonDiff = async (changes: (string | Person)[]) => { +export const applyPersonDiff = async (diff: (string | Person)[]) => { const db = await mlDB(); const tx = db.transaction("person", "readwrite"); - // We want to do the changes in order, so we shouldn't use Promise.all. - for (const change of changes) { - await (typeof change == "string" - ? tx.store.delete(change) - : tx.store.put(change)); - } + // See: [Note: Diff response will have at most one entry for an id] + await Promise.all( + diff.map((d) => + typeof d == "string" ? tx.store.delete(d) : tx.store.put(d), + ), + ); return tx.done; }; @@ -471,6 +471,7 @@ export const applyPersonDiff = async (changes: (string | Person)[]) => { * Add or overwrite the entry for the given {@link person}, as identified by * their {@link id}. */ +// TODO-Cluster: Remove me export const savePerson = async (person: Person) => { const db = await mlDB(); const tx = db.transaction("person", "readwrite"); @@ -480,6 +481,7 @@ export const savePerson = async (person: Person) => { /** * Delete the entry for the persons with the given {@link id}, if any. */ +// TODO-Cluster: Remove me export const deletePerson = async (id: string) => { const db = await mlDB(); const tx = db.transaction("person", "readwrite"); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 6fc1fcb0ee..f4e171559d 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -95,7 +95,7 @@ const RemoteUserEntity = z.object({ * @param entityKeyB64 The base64 encoded key to use for decrypting the * encrypted contents of the user entity. * - * [Note: Diff contents] + * [Note: Diff response will have at most one entry for an id] * * Unlike git diffs which track all changes, the diffs we get from remote are * guaranteed to contain only one entry (upsert or delete) for particular Ente From 81885d6814b87f9dfdeaf6d232ab089f40b079b0 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 11:21:04 +0530 Subject: [PATCH 18/31] Tweak --- web/packages/new/photos/services/ml/cluster-new.ts | 14 +++++++------- web/packages/new/photos/services/user-entity.ts | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index 93d87e3fed..a483d6f0ec 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -65,6 +65,13 @@ export interface Person { * hidden. */ name: string | undefined; + /** + * An unordered set of ids of the clusters that belong to this person. + * + * For ergonomics of transportation and persistence this is an array, but it + * should conceptually be thought of as a set. + */ + clusterIDs: string[]; /** * True if this person should be hidden. * @@ -75,13 +82,6 @@ export interface Person { * showing this cluster). */ isHidden: boolean; - /** - * An unordered set of ids of the clusters that belong to this person. - * - * For ergonomics of transportation and persistence this is an array, but it - * should conceptually be thought of as a set. - */ - clusterIDs: string[]; /** * The ID of the face that should be used as the cover photo for this person * (if the user has set one). diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index f4e171559d..e469c72663 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -16,8 +16,8 @@ import { applyPersonDiff } from "./ml/db"; export type EntityType = | "person" /** - * A new version of the Person entity where the data is gzipped before - * encryption. + * The latest iteration of the Person entity format, where the data is + * gzipped before encryption. */ | "person_v2"; @@ -171,7 +171,9 @@ export const syncPersons = async (entityKeyB64: string) => { id, name: rp.name, clusterIDs: rp.assigned.map(({ id }) => id), - avatarFaceID: undefined, + isHidden: rp.isHidden, + avatarFaceID: rp.avatarFaceID, + displayFaceID: undefined, }; }; @@ -191,9 +193,7 @@ export const syncPersons = async (entityKeyB64: string) => { } }; -/** - * Zod schema for the "person" entity (the {@link RemotePerson} type). - */ +/** Zod schema for the {@link RemotePerson} type. */ const RemotePerson = z.object({ name: z.string().nullish().transform(nullToUndefined), assigned: z.array( @@ -208,7 +208,7 @@ const RemotePerson = z.object({ }); /** - * A "person" entity as synced via remote. + * A "person_v2" entity as synced via remote. */ type RemotePerson = z.infer; From c164b0710a964c3a16af6fb5645ecdac36e8c829 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 11:24:29 +0530 Subject: [PATCH 19/31] key --- .../new/photos/services/user-entity.ts | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index e469c72663..e1d74504b3 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,6 +1,6 @@ import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; -import { getKVN, setKV } from "@/base/kv"; +import { getKV, getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; @@ -151,6 +151,37 @@ export const userEntityDiff = async ( ); }; +const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; + +/** + * Return the locally persisted value for the latest `updatedAt` time for the + * given entity {@link type}. + * + * This is used to checkpoint diffs, so that we can resume fetching from the + * last time we did a fetch. + */ +const latestUpdatedAt = (type: EntityType) => getKVN(latestUpdatedAtKey(type)); + +/** + * Setter for {@link latestUpdatedAt}. + */ +const setLatestUpdatedAt = (type: EntityType, value: number) => + setKV(latestUpdatedAtKey(type), value); + +const entityKeyKey = (type: EntityType) => `entityKey/${type}`; + +/** + * Return the locally persisted value for the entity key to use for decrypting + * the contents of entities of the given {@link type}. + */ +const entityKey = (type: EntityType) => getKV(entityKeyKey(type)); + +/** + * Setter for {@link entityKey}. + */ +const setEntityKey = (type: EntityType, value: string) => + setKV(entityKeyKey(type), value); + /** * Sync the {@link Person} entities that we have locally with remote. * @@ -211,20 +242,3 @@ const RemotePerson = z.object({ * A "person_v2" entity as synced via remote. */ type RemotePerson = z.infer; - -const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; - -/** - * Return the locally persisted value for the latest `updatedAt` time for the - * given entity type. - * - * This is used to checkpoint diffs, so that we can resume fetching from the - * last time we did a fetch. - */ -const latestUpdatedAt = (type: EntityType) => getKVN(latestUpdatedAtKey(type)); - -/** - * Setter for {@link latestUpdatedAt}. - */ -const setLatestUpdatedAt = (type: EntityType, value: number) => - setKV(latestUpdatedAtKey(type), value); From da4ba85c7c836bcdcede30e1921011207aa09d76 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 12:26:11 +0530 Subject: [PATCH 20/31] wip ek --- .../new/photos/services/user-entity.ts | 70 +++++++++++++++---- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index e1d74504b3..0c4764d6c3 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -151,6 +151,62 @@ export const userEntityDiff = async ( ); }; +/** + * Return the entity key that can be used to decrypt the encrypted contents of + * user entities of the given {@link type}. + * + * 1. We'll see if we have the (encrypted) entity key present locally. If so, + * we'll decrypt it using the user's master key and return it. + * + * 2. Otherwise we'll fetch the entity key for that type from remote. If found, + * we'll decrypte it using the user's master key and return it, also saving + * it locally for future use. + * + * 3. Otherwise we'll create a new one, save it locally and put it to remote. + */ +const entityKey = (type: EntityType) => { + + +}; + +const savedRemoteUserEntityKeyKey = (type: EntityType) => `entityKey/${type}`; + +/** + * Return the locally persisted value for the entity key to use for decrypting + * the contents of entities of the given {@link type}. + */ +const savedRemoteUserEntityKey = (type: EntityType) => + getKV(savedRemoteUserEntityKeyKey(type)); + +/** + * Setter for {@link entityKey}. + */ +const setSavedRemoteUserEntityKey = (type: EntityType, value: string) => + setKV(savedRemoteUserEntityKeyKey(type), value); + +/** + * Fetch the latest encryption key for the given user entity {@link} type from + * remote. + */ +const getUserEntityKey = async ( + type: EntityType, +): Promise => { + const params = new URLSearchParams({ type }); + const url = await apiURL("/user-entity/key"); + const res = await fetch(`${url}?${params.toString()}`, { + headers: await authenticatedRequestHeaders(), + }); + ensureOk(res); + return RemoteUserEntityKey.parse(await res.json()); +}; + +const RemoteUserEntityKey = z.object({ + encryptedKey: z.string(), + header: z.string(), +}); + +type RemoteUserEntityKey = z.infer; + const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; /** @@ -168,20 +224,6 @@ const latestUpdatedAt = (type: EntityType) => getKVN(latestUpdatedAtKey(type)); const setLatestUpdatedAt = (type: EntityType, value: number) => setKV(latestUpdatedAtKey(type), value); -const entityKeyKey = (type: EntityType) => `entityKey/${type}`; - -/** - * Return the locally persisted value for the entity key to use for decrypting - * the contents of entities of the given {@link type}. - */ -const entityKey = (type: EntityType) => getKV(entityKeyKey(type)); - -/** - * Setter for {@link entityKey}. - */ -const setEntityKey = (type: EntityType, value: string) => - setKV(entityKeyKey(type), value); - /** * Sync the {@link Person} entities that we have locally with remote. * From dffe364c51ccc653012f09a9bc3c1c1fea79094c Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 12:49:21 +0530 Subject: [PATCH 21/31] Read ss --- web/packages/base/session-store.ts | 31 +++++++++++++++++++ .../new/photos/services/user-entity.ts | 8 ++--- 2 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 web/packages/base/session-store.ts diff --git a/web/packages/base/session-store.ts b/web/packages/base/session-store.ts new file mode 100644 index 0000000000..81b162d3ba --- /dev/null +++ b/web/packages/base/session-store.ts @@ -0,0 +1,31 @@ +import { sharedCryptoWorker } from "@/base/crypto"; +import { z } from "zod"; + +/** + * Return the user's encryption key from session storage. + * + * Precondition: The user should be logged in. + */ +export const userEncryptionKey = async () => { + // TODO: Same value as the deprecated SESSION_KEYS.ENCRYPTION_KEY. + const value = sessionStorage.getItem("encryptionKey"); + if (!value) { + throw new Error( + "The user's encryption key was not found in session storage. Likely they are not logged in.", + ); + } + + const { encryptedData, key, nonce } = EncryptionKeyAttributes.parse( + JSON.parse(value), + ); + + const cryptoWorker = await sharedCryptoWorker(); + return cryptoWorker.decryptB64(encryptedData, nonce, key); +}; + +// TODO: Same as B64EncryptionResult. Revisit. +const EncryptionKeyAttributes = z.object({ + encryptedData: z.string(), + key: z.string(), + nonce: z.string(), +}); diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 0c4764d6c3..8b1734f096 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -165,24 +165,24 @@ export const userEntityDiff = async ( * 3. Otherwise we'll create a new one, save it locally and put it to remote. */ const entityKey = (type: EntityType) => { - + const masterKey = await getActualKey(); }; -const savedRemoteUserEntityKeyKey = (type: EntityType) => `entityKey/${type}`; +const savedRemoteEntityKeyKey = (type: EntityType) => `entityKey/${type}`; /** * Return the locally persisted value for the entity key to use for decrypting * the contents of entities of the given {@link type}. */ const savedRemoteUserEntityKey = (type: EntityType) => - getKV(savedRemoteUserEntityKeyKey(type)); + getKV(savedRemoteEntityKeyKey(type)); /** * Setter for {@link entityKey}. */ const setSavedRemoteUserEntityKey = (type: EntityType, value: string) => - setKV(savedRemoteUserEntityKeyKey(type), value); + setKV(savedRemoteEntityKeyKey(type), value); /** * Fetch the latest encryption key for the given user entity {@link} type from From 3a85e9dcaeb1a79fee75052d9679acfc39463179 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 13:04:08 +0530 Subject: [PATCH 22/31] ek type --- web/packages/base/session-store.ts | 2 +- .../new/photos/services/user-entity.ts | 48 +++++++++++++------ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/web/packages/base/session-store.ts b/web/packages/base/session-store.ts index 81b162d3ba..87a044fa56 100644 --- a/web/packages/base/session-store.ts +++ b/web/packages/base/session-store.ts @@ -6,7 +6,7 @@ import { z } from "zod"; * * Precondition: The user should be logged in. */ -export const userEncryptionKey = async () => { +export const usersEncryptionKey = async () => { // TODO: Same value as the deprecated SESSION_KEYS.ENCRYPTION_KEY. const value = sessionStorage.getItem("encryptionKey"); if (!value) { diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 8b1734f096..d3468f317a 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,7 +1,9 @@ +import { sharedCryptoWorker } from "@/base/crypto"; import { decryptAssociatedB64Data } from "@/base/crypto/ente"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { getKV, getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; +import { usersEncryptionKey } from "@/base/session-store"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; import type { Person } from "./ml/cluster-new"; @@ -164,25 +166,40 @@ export const userEntityDiff = async ( * * 3. Otherwise we'll create a new one, save it locally and put it to remote. */ -const entityKey = (type: EntityType) => { - const masterKey = await getActualKey(); - +const entityKey = async (type: EntityType) => { + const encryptionKey = await usersEncryptionKey(); + const worker = await sharedCryptoWorker(); + const saved = await savedRemoteUserEntityKey(type); + if (saved) { + return worker.decryptB64( + saved.encryptedKey, + saved.header, + encryptionKey, + ); + } + return undefined; }; -const savedRemoteEntityKeyKey = (type: EntityType) => `entityKey/${type}`; +const entityKeyKey = (type: EntityType) => `entityKey/${type}`; /** - * Return the locally persisted value for the entity key to use for decrypting - * the contents of entities of the given {@link type}. + * Return the locally persisted {@link RemoteUserEntityKey}, if any, + * corresponding the given {@link type}. */ -const savedRemoteUserEntityKey = (type: EntityType) => - getKV(savedRemoteEntityKeyKey(type)); +const savedRemoteUserEntityKey = ( + type: EntityType, +): Promise => + getKV(entityKeyKey(type)).then((s) => + s ? RemoteUserEntityKey.parse(JSON.parse(s)) : undefined, + ); /** * Setter for {@link entityKey}. */ -const setSavedRemoteUserEntityKey = (type: EntityType, value: string) => - setKV(savedRemoteEntityKeyKey(type), value); +const saveRemoteUserEntityKey = ( + type: EntityType, + entityKey: RemoteUserEntityKey, +) => setKV(entityKeyKey(type), JSON.stringify(entityKey)); /** * Fetch the latest encryption key for the given user entity {@link} type from @@ -216,12 +233,13 @@ const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; * This is used to checkpoint diffs, so that we can resume fetching from the * last time we did a fetch. */ -const latestUpdatedAt = (type: EntityType) => getKVN(latestUpdatedAtKey(type)); +const savedLatestUpdatedAt = (type: EntityType) => + getKVN(latestUpdatedAtKey(type)); /** - * Setter for {@link latestUpdatedAt}. + * Setter for {@link savedLatestUpdatedAt}. */ -const setLatestUpdatedAt = (type: EntityType, value: number) => +const saveLatestUpdatedAt = (type: EntityType, value: number) => setKV(latestUpdatedAtKey(type), value); /** @@ -250,7 +268,7 @@ export const syncPersons = async (entityKeyB64: string) => { }; }; - let sinceTime = (await latestUpdatedAt(type)) ?? 0; + let sinceTime = (await savedLatestUpdatedAt(type)) ?? 0; // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition while (true) { const entities = await userEntityDiff(type, sinceTime, entityKeyB64); @@ -262,7 +280,7 @@ export const syncPersons = async (entityKeyB64: string) => { (max, e) => Math.max(max, e.updatedAt), sinceTime, ); - await setLatestUpdatedAt(type, sinceTime); + await saveLatestUpdatedAt(type, sinceTime); } }; From ee5acf6a2e86b44524b7ed2a7fd83c0107e37cb2 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 13:46:27 +0530 Subject: [PATCH 23/31] Sketch --- web/packages/base/session-store.ts | 4 +- .../new/photos/services/user-entity.ts | 83 +++++++++++++++---- 2 files changed, 70 insertions(+), 17 deletions(-) diff --git a/web/packages/base/session-store.ts b/web/packages/base/session-store.ts index 87a044fa56..6e37bac534 100644 --- a/web/packages/base/session-store.ts +++ b/web/packages/base/session-store.ts @@ -2,11 +2,11 @@ import { sharedCryptoWorker } from "@/base/crypto"; import { z } from "zod"; /** - * Return the user's encryption key from session storage. + * Return the base64 encoded user's encryption key from session storage. * * Precondition: The user should be logged in. */ -export const usersEncryptionKey = async () => { +export const usersEncryptionKeyB64 = async () => { // TODO: Same value as the deprecated SESSION_KEYS.ENCRYPTION_KEY. const value = sessionStorage.getItem("encryptionKey"); if (!value) { diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index d3468f317a..8aa12b1810 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -1,9 +1,9 @@ import { sharedCryptoWorker } from "@/base/crypto"; import { decryptAssociatedB64Data } from "@/base/crypto/ente"; -import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; +import { authenticatedRequestHeaders, ensureOk, HTTPError } from "@/base/http"; import { getKV, getKVN, setKV } from "@/base/kv"; import { apiURL } from "@/base/origins"; -import { usersEncryptionKey } from "@/base/session-store"; +import { usersEncryptionKeyB64 } from "@/base/session-store"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; import type { Person } from "./ml/cluster-new"; @@ -165,19 +165,42 @@ export const userEntityDiff = async ( * it locally for future use. * * 3. Otherwise we'll create a new one, save it locally and put it to remote. + * + * See also, [Note: User entity keys]. */ const entityKey = async (type: EntityType) => { - const encryptionKey = await usersEncryptionKey(); + const encryptionKeyB64 = await usersEncryptionKeyB64(); const worker = await sharedCryptoWorker(); + + const decrypt = async ({ encryptedKey, header }: RemoteUserEntityKey) => { + return worker.decryptB64(encryptedKey, header, encryptionKeyB64); + }; + + // See if we already have it locally. const saved = await savedRemoteUserEntityKey(type); - if (saved) { - return worker.decryptB64( - saved.encryptedKey, - saved.header, - encryptionKey, - ); + if (saved) return decrypt(saved); + + // See if remote already has it. + const existing = await getUserEntityKey(type); + if (existing) { + // Only save it if we can decrypt it to avoid corrupting our local state + // in unforeseen circumstances. + const result = decrypt(existing); + await saveRemoteUserEntityKey(type, existing); + return result; } - return undefined; + + // Nada. Create a new one, put it to remote, save it locally, and return. + // TODO-Cluster Keep this read only, only add the writeable bits after other + // stuff has been tested. + throw new Error("Not implemented"); + // const generatedKeyB64 = await worker.generateEncryptionKey(); + // const encryptedNewKey = await worker.encryptToB64( + // generatedKeyB64, + // encryptionKeyB64, + // ); + // await postUserEntityKey(type, newKey); + // return decrypt(newKey); }; const entityKeyKey = (type: EntityType) => `entityKey/${type}`; @@ -202,19 +225,30 @@ const saveRemoteUserEntityKey = ( ) => setKV(entityKeyKey(type), JSON.stringify(entityKey)); /** - * Fetch the latest encryption key for the given user entity {@link} type from - * remote. + * Fetch the encryption key for the given user entity {@link type} from remote. + * + * [Note: User entity keys] + * + * There is one encryption key (itself encrypted with the user's encryption key) + * for each user entity type. If the key doesn't exist on remote, then the + * client is expected to create one on the user's behalf. Remote will disallow + * attempts to multiple keys for the same user entity type. */ const getUserEntityKey = async ( type: EntityType, -): Promise => { +): Promise => { const params = new URLSearchParams({ type }); const url = await apiURL("/user-entity/key"); const res = await fetch(`${url}?${params.toString()}`, { headers: await authenticatedRequestHeaders(), }); - ensureOk(res); - return RemoteUserEntityKey.parse(await res.json()); + if (!res.ok) { + // Remote says HTTP 404 Not Found if there is no key yet for the user. + if (res.status == 404) return undefined; + throw new HTTPError(res); + } else { + return RemoteUserEntityKey.parse(await res.json()); + } }; const RemoteUserEntityKey = z.object({ @@ -224,6 +258,25 @@ const RemoteUserEntityKey = z.object({ type RemoteUserEntityKey = z.infer; +/** + * Create a new encryption key for the given user entity {@link type} on remote. + * + * See: [Note: User entity keys] + */ +// TODO-Cluster remove export +export const postUserEntityKey = async ( + type: EntityType, + entityKey: RemoteUserEntityKey, +) => { + const url = await apiURL("/user-entity/key"); + const res = await fetch(url, { + method: "POST", + headers: await authenticatedRequestHeaders(), + body: JSON.stringify({ type, ...entityKey }), + }); + ensureOk(res); +}; + const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; /** From fd1f3c6710b60dc4f3ea4accf29cdc53474bfde0 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 13:51:43 +0530 Subject: [PATCH 24/31] Sync 1 --- web/packages/new/photos/services/user-entity.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index 8aa12b1810..ba04f3082a 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -168,7 +168,7 @@ export const userEntityDiff = async ( * * See also, [Note: User entity keys]. */ -const entityKey = async (type: EntityType) => { +const getOrCreateEntityKeyB64 = async (type: EntityType) => { const encryptionKeyB64 = await usersEncryptionKeyB64(); const worker = await sharedCryptoWorker(); @@ -304,9 +304,11 @@ const saveLatestUpdatedAt = (type: EntityType, value: number) => * * This diff is then applied to the data we have persisted locally. */ -export const syncPersons = async (entityKeyB64: string) => { +export const syncPersons = async () => { const type: EntityType = "person"; + const entityKeyB64 = await getOrCreateEntityKeyB64(type); + const parse = ({ id, data }: UserEntity): Person => { const rp = RemotePerson.parse( JSON.parse(new TextDecoder().decode(data)), @@ -327,10 +329,12 @@ export const syncPersons = async (entityKeyB64: string) => { const entities = await userEntityDiff(type, sinceTime, entityKeyB64); if (entities.length == 0) break; - await applyPersonDiff(entities.map((e) => (e.data ? parse(e) : e.id))); + await applyPersonDiff( + entities.map((entity) => (entity.data ? parse(entity) : entity.id)), + ); sinceTime = entities.reduce( - (max, e) => Math.max(max, e.updatedAt), + (max, entity) => Math.max(max, entity.updatedAt), sinceTime, ); await saveLatestUpdatedAt(type, sinceTime); From 4e1d80380c74ffa206254de6dda1d8128b03e838 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 13:56:19 +0530 Subject: [PATCH 25/31] pv2 --- web/apps/photos/src/services/searchService.ts | 11 +++++----- .../new/photos/services/user-entity.ts | 21 ++++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 4ece8ab439..19719f351a 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -9,6 +9,7 @@ import { wipCluster, wipClusterEnable, } from "@/new/photos/services/ml"; +import { persons } from "@/new/photos/services/ml/db"; import type { SearchPerson } from "@/new/photos/services/search"; import { syncPersons } from "@/new/photos/services/user-entity"; import { EnteFile } from "@/new/photos/types/file"; @@ -27,7 +28,7 @@ import { import ComlinkSearchWorker from "utils/comlink/ComlinkSearchWorker"; import { getUniqueFiles } from "utils/file"; import { getFormattedDate } from "utils/search"; -import { getEntityKey, getLatestEntities } from "./entityService"; +import { getLatestEntities } from "./entityService"; import locationSearchService, { City } from "./locationSearchService"; const DIGITS = new Set(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]); @@ -420,12 +421,10 @@ async function getAllPeople(limit: number = undefined) { if (!(await wipClusterEnable())) return []; if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) { - const entityKey = await getEntityKey("person" as EntityType); - const peopleR = await syncPersons(entityKey.data); - const r = peopleR.length; - log.debug(() => ["people", peopleR]); + await syncPersons(); + const people = await persons(); + log.debug(() => ["people", { people }]); - if (r) return []; return []; } diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index ba04f3082a..db276a7054 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -6,6 +6,7 @@ import { apiURL } from "@/base/origins"; import { usersEncryptionKeyB64 } from "@/base/session-store"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; +import { gunzip } from "./gzip"; import type { Person } from "./ml/cluster-new"; import { applyPersonDiff } from "./ml/db"; @@ -16,12 +17,11 @@ import { applyPersonDiff } from "./ml/db"; * e.g. location tags, people in their photos. */ export type EntityType = - | "person" /** * The latest iteration of the Person entity format, where the data is * gzipped before encryption. */ - | "person_v2"; + "person_v2"; /** * The maximum number of items to fetch in a single diff @@ -305,14 +305,12 @@ const saveLatestUpdatedAt = (type: EntityType, value: number) => * This diff is then applied to the data we have persisted locally. */ export const syncPersons = async () => { - const type: EntityType = "person"; + const type: EntityType = "person_v2"; const entityKeyB64 = await getOrCreateEntityKeyB64(type); - const parse = ({ id, data }: UserEntity): Person => { - const rp = RemotePerson.parse( - JSON.parse(new TextDecoder().decode(data)), - ); + const parse = async (id: string, data: Uint8Array): Promise => { + const rp = RemotePerson.parse(JSON.parse(await gunzip(data))); return { id, name: rp.name, @@ -330,7 +328,11 @@ export const syncPersons = async () => { if (entities.length == 0) break; await applyPersonDiff( - entities.map((entity) => (entity.data ? parse(entity) : entity.id)), + await Promise.all( + entities.map(async ({ id, data }) => + data ? await parse(id, data) : id, + ), + ), ); sinceTime = entities.reduce( @@ -346,8 +348,7 @@ const RemotePerson = z.object({ name: z.string().nullish().transform(nullToUndefined), assigned: z.array( z.object({ - // TODO-Cluster temporary modify - id: z.number().transform((n) => n.toString()), // TODO z.string person_v2 + id: z.string(), faces: z.string().array(), }), ), From bb56fddd45dd8cebb5b33a8333d4c3259a79f19b Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 14:20:23 +0530 Subject: [PATCH 26/31] lf --- web/apps/photos/src/services/searchService.ts | 2 -- web/packages/new/photos/services/ml/index.ts | 11 ++++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index 19719f351a..adc33fa40d 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -424,8 +424,6 @@ async function getAllPeople(limit: number = undefined) { await syncPersons(); const people = await persons(); log.debug(() => ["people", { people }]); - - return []; } let people: Array = []; // await mlIDbStorage.getAllPeople(); diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 1981df4394..1ee85c37e6 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -341,7 +341,16 @@ export const wipCluster = async () => { const result: SearchPerson[] = []; for (const person of people) { - const avatarFaceID = person.avatarFaceID; + let avatarFaceID = person.avatarFaceID; + // TODO-Cluster + // Temp + if (!avatarFaceID) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + avatarFaceID = person.clusterIDs + .map((id) => clusterByID.get(id)) + .flatMap((cluster) => cluster?.faceIDs ?? [])[0]!; + } + person.clusterIDs; const avatarFaceFileID = fileIDFromFaceID(avatarFaceID); const avatarFaceFile = localFilesByID.get(avatarFaceFileID ?? 0); if (!avatarFaceFileID || !avatarFaceFile) { From 6a8fe7100031c502e85c37da8e0e104638fc6eac Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 14:38:33 +0530 Subject: [PATCH 27/31] Extra --- web/apps/photos/src/services/searchService.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/apps/photos/src/services/searchService.ts b/web/apps/photos/src/services/searchService.ts index adc33fa40d..0c203b1189 100644 --- a/web/apps/photos/src/services/searchService.ts +++ b/web/apps/photos/src/services/searchService.ts @@ -417,9 +417,12 @@ function convertSuggestionToSearchQuery(option: Suggestion): Search { } } +let done = false; async function getAllPeople(limit: number = undefined) { if (!(await wipClusterEnable())) return []; + if (done) return []; + done = true; if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) { await syncPersons(); const people = await persons(); From 367a715aa8c8aacfa5d7bf6f1ff12cd53e94e97f Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 15:19:10 +0530 Subject: [PATCH 28/31] Reduce CLIP threshold to 0.175 --- web/packages/new/photos/services/ml/clip.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/packages/new/photos/services/ml/clip.ts b/web/packages/new/photos/services/ml/clip.ts index b226ef10cb..e81036ae56 100644 --- a/web/packages/new/photos/services/ml/clip.ts +++ b/web/packages/new/photos/services/ml/clip.ts @@ -186,5 +186,8 @@ export const clipMatches = async ( // This code is on the hot path, so these optimizations help. [fileID, dotProduct(embedding, textEmbedding)] as const, ); - return new Map(items.filter(([, score]) => score >= 0.2)); + // This score threshold was obtain heuristically. 0.2 generally gives solid + // results, and around 0.15 we start getting many false positives (all this + // is query dependent too). + return new Map(items.filter(([, score]) => score >= 0.175)); }; From b6b87c196fcddff01916a9857d9131584457ab2e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 15:20:05 +0530 Subject: [PATCH 29/31] Update comment --- web/packages/new/photos/services/ml/cluster-new.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/packages/new/photos/services/ml/cluster-new.ts b/web/packages/new/photos/services/ml/cluster-new.ts index a483d6f0ec..60a9a1e9fb 100644 --- a/web/packages/new/photos/services/ml/cluster-new.ts +++ b/web/packages/new/photos/services/ml/cluster-new.ts @@ -52,7 +52,7 @@ export interface FaceCluster { */ export interface Person { /** - * A nanoid for this person. + * A UUID or nanoid for this person. * * This is the ID of the Person user entity, it is not contained as part of * the Person entity payload. From e946749b2e4f1978d61f82bbcf02e223c32a0122 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 15:29:28 +0530 Subject: [PATCH 30/31] Fixes for person v2 --- web/packages/new/photos/services/ml/db.ts | 6 +----- web/packages/new/photos/services/user-entity.ts | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 14bd0a58f9..6ae98519c3 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -109,11 +109,7 @@ const openMLDB = async () => { } // TODO-Cluster if (oldVersion < 3) { - if ( - newVersion && - newVersion > 10 && - process.env.NEXT_PUBLIC_ENTE_WIP_CL - ) { + if (process.env.NEXT_PUBLIC_ENTE_WIP_CL) { db.createObjectStore("face-cluster", { keyPath: "id" }); db.createObjectStore("person", { keyPath: "id" }); } diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index db276a7054..fb8330beb2 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -76,10 +76,18 @@ interface UserEntity { const RemoteUserEntity = z.object({ id: z.string(), - /** Base64 string containing the encrypted contents of the entity. */ - encryptedData: z.string(), - /** Base64 string containing the decryption header. */ - header: z.string(), + /** + * Base64 string containing the encrypted contents of the entity. + * + * Will be `null` when isDeleted is true. + */ + encryptedData: z.string().nullable(), + /** + * Base64 string containing the decryption header. + * + * Will be `null` when isDeleted is true. + */ + header: z.string().nullable(), isDeleted: z.boolean(), updatedAt: z.number(), }); From e21a4b4f9e7a9635136d968d31dfee0984ac3c86 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Wed, 14 Aug 2024 15:39:04 +0530 Subject: [PATCH 31/31] Handle deleted better --- .../new/photos/services/user-entity.ts | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts index fb8330beb2..260317341a 100644 --- a/web/packages/new/photos/services/user-entity.ts +++ b/web/packages/new/photos/services/user-entity.ts @@ -74,6 +74,7 @@ interface UserEntity { updatedAt: number; } +/** Zod schema for {@link RemoteUserEntity} */ const RemoteUserEntity = z.object({ id: z.string(), /** @@ -92,6 +93,9 @@ const RemoteUserEntity = z.object({ updatedAt: z.number(), }); +/** An item in the user entity diff response we get from remote. */ +type RemoteUserEntity = z.infer; + /** * Fetch the next batch of user entities of the given type that have been * created or updated since the given time. @@ -128,6 +132,21 @@ export const userEntityDiff = async ( sinceTime: number, entityKeyB64: string, ): Promise => { + const parse = async ({ + id, + encryptedData, + header, + isDeleted, + updatedAt, + }: RemoteUserEntity) => ({ + id, + data: + encryptedData && header && !isDeleted + ? await decrypt(encryptedData, header) + : undefined, + updatedAt, + }); + const decrypt = (encryptedDataB64: string, decryptionHeaderB64: string) => decryptAssociatedB64Data({ encryptedDataB64, @@ -148,17 +167,7 @@ export const userEntityDiff = async ( const entities = z .object({ diff: z.array(RemoteUserEntity) }) .parse(await res.json()).diff; - return Promise.all( - entities.map( - async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({ - id, - data: isDeleted - ? undefined - : await decrypt(encryptedData, header), - updatedAt, - }), - ), - ); + return Promise.all(entities.map(parse)); }; /**