[desktop] person => cgroup (#2727)

https://github.com/ente-io/ente/pull/2718
This commit is contained in:
Manav Rathi
2024-08-16 15:45:54 +05:30
committed by GitHub
7 changed files with 140 additions and 127 deletions

View File

@@ -9,9 +9,9 @@ import {
wipCluster,
wipClusterEnable,
} from "@/new/photos/services/ml";
import { persons } from "@/new/photos/services/ml/db";
import { clusterGroups } from "@/new/photos/services/ml/db";
import type { SearchPerson } from "@/new/photos/services/search";
import { syncPersons } from "@/new/photos/services/user-entity";
import { syncCGroups } from "@/new/photos/services/user-entity";
import { EnteFile } from "@/new/photos/types/file";
import * as chrono from "chrono-node";
import { t } from "i18next";
@@ -424,8 +424,8 @@ async function getAllPeople(limit: number = undefined) {
done = true;
if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) {
await syncPersons();
const people = await persons();
await syncCGroups();
const people = await clusterGroups();
log.debug(() => ["people", { people }]);
}

View File

@@ -1,18 +1,17 @@
import { newNonSecureID } from "@/base/id-worker";
import log from "@/base/log";
import { ensure } from "@/utils/ensure";
import { faceClusters, persons } from "./db";
import { clusterGroups, faceClusters } from "./db";
import type { Face, FaceIndex } from "./face";
import { dotProduct } from "./math";
/**
* A face cluster is an set of faces.
*
* Each cluster has an id so that a {@link Person} can refer to it.
* Each cluster has an id so that a {@link CGroup} can refer to it.
*
* The cluster is not directly synced to remote. But it does indirectly get
* synced if it gets promoted or attached to a person (which can be thought of
* as a named or hidden clusters).
* The cluster is not directly synced to remote. Only clusters that the user
* interacts with get synced to remote, as part of a {@link CGroup}.
*/
export interface FaceCluster {
/**
@@ -29,67 +28,77 @@ export interface FaceCluster {
}
/**
* A Person is a set of clusters with some attached metadata.
* A cgroup ("cluster group") is a group of clusters (possibly containing a
* single cluster) that the user has interacted with.
*
* More precisely, a person is a a single cluster or a set of clusters that the
* user has interacted with.
* Interactions include hiding, merging and giving a name and/or a cover photo.
*
* The most frequent interaction is naming a {@link FaceCluster}, which promotes
* it to a become a {@link Person}. The promotion comes with the ability to be
* synced with remote (as a "person_v2" user entity).
* it to a become a {@link CGroup}. The promotion comes with the ability to be
* synced with remote (as a "cgroup" user entity).
*
* There after, the user may attach more clusters to the same {@link Person}.
* There after, the user may attach more clusters to the same {@link CGroup}.
*
* > A named cluster group can be thought of as a "person", though this is not
* > necessarily an accurate characterization. e.g. there can be a named cluster
* > group that contains face clusters of pets.
*
* The other form of interaction is hiding. The user may hide a single (unnamed)
* cluster, or they may hide a person.
* cluster, or they may hide an named {@link CGroup}. In both cases, we promote
* the cluster to a CGroup if needed so that their request to hide gets synced.
*
* The Person entity on remote has clusters embedded within itself
* While in our local representation we separately maintain clusters and link to
* them from within CGroups by their clusterID, in the remote representation
* clusters themselves don't get synced. Instead, the "cgroup" entities synced
* with remote contain the clusters within themselves. So a group that gets
* synced with remote looks something like:
*
* { name, clusters: [{ clusterID, faceIDs }] }
* { id, name, clusters: [{ clusterID, faceIDs }] }
*
* Since clusters don't get independently synced, one way to think about a
* Person is that it is an interaction with a cluster that we want to sync.
*/
export interface Person {
export interface CGroup {
/**
* A UUID or nanoid for this person.
* A nanoid for this cluster group.
*
* This is the ID of the Person user entity, it is not contained as part of
* the Person entity payload.
* This is the ID of the "cgroup" user entity, it is not contained as part
* of the group entity payload itself.
*/
id: string;
/**
* A name assigned by the user to this person.
* A name assigned by the user to this cluster group.
*
* This can be missing or an empty string for an unnamed cluster that was
* This should be set to an empty string for an unnamed cluster that was
* hidden.
*/
name: string | undefined;
/**
* An unordered set of ids of the clusters that belong to this person.
* An unordered set of ids of the clusters that belong to this group.
*
* For ergonomics of transportation and persistence this is an array, but it
* should conceptually be thought of as a set.
*/
clusterIDs: string[];
/**
* True if this person should be hidden.
* True if this cluster group should be hidden.
*
* This can also be true for unnamed hidden clusters. When the user hides a
* single cluster that was offered as a suggestion to them on a client, then
* the client will create a new person entity without a name, and set its
* hidden flag to sync it with remote (so that other clients can also stop
* showing this cluster).
* The user can hide both named cluster groups and single unnamed clusters.
* If the user hides a single cluster that was offered as a suggestion to
* them on a client, the client will create a new unnamed cgroup containing
* it, and set its hidden flag to sync it with remote (so that other clients
* can also stop showing this cluster).
*/
isHidden: boolean;
/**
* The ID of the face that should be used as the cover photo for this person
* (if the user has set one).
* The ID of the face that should be used as the cover photo for this
* cluster group (if the user has set one).
*
* {@link avatarFaceID} is the user selected face. {@link displayFaceID} is
* the automatic placeholder.
*/
avatarFaceID: string | undefined;
/**
* Locally determined ID of the "best" face that should be used as the
* display face, to represent this person in the UI.
* display face, to represent this cluster group in the UI.
*/
displayFaceID: string | undefined;
}
@@ -99,9 +108,11 @@ export interface Person {
*
* [Note: Face clustering algorithm]
*
* A person consists of clusters, each of which itself is a set of faces.
* A (cluster) group consists of clusters, each of which itself is a set of
* faces.
*
* The clusters are generated using locally by clients using this algorithm:
* The clusters are generated using locally by clients using the following
* (pseudo-) algorithm:
*
* 1. clusters = [] initially, or fetched from remote.
*
@@ -116,11 +127,11 @@ export interface Person {
* following actions to the list of clusters that they can see:
*
* - They can provide a name for a cluster. This upgrades a cluster into a
* "Person", which then gets synced via remote to all their devices.
* "cgroup", which then gets synced via remote to all their devices.
*
* - They can attach more clusters to a person.
* - They can attach more clusters to a cgroup.
*
* - They can remove a cluster from a person.
* - They can remove a cluster from a cgroup.
*
* After clustering, we also do some routine cleanup. Faces belonging to files
* that have been deleted (including those in Trash) should be pruned off.
@@ -226,14 +237,14 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => {
// Prune too small clusters.
const validClusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1);
// For each person, use the highest scoring face in any of its clusters as
// its display face.
// For each cluster group, use the highest scoring face in any of its
// clusters as its display face.
const faceForFaceID = new Map(faces.map((f) => [f.faceID, f]));
const people = await persons();
const cgroups = await clusterGroups();
for (const person of people) {
person.avatarFaceID = person.clusterIDs
for (const cgroup of cgroups) {
cgroup.avatarFaceID = cgroup.clusterIDs
.map((clusterID) => clusterIndexForClusterID.get(clusterID))
.map((clusterIndex) =>
clusterIndex ? clusters[clusterIndex] : undefined,
@@ -254,7 +265,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => {
validClusters,
clusterIndexForClusterID,
clusterIDForFaceID,
people,
cgroups,
},
]);
log.debug(
@@ -262,7 +273,7 @@ export const clusterFaces = async (faceIndexes: FaceIndex[]) => {
`Clustered ${faces.length} faces into ${validClusters.length} clusters (${Date.now() - t} ms)`,
);
return { clusters: validClusters, people };
return { clusters: validClusters, cgroups };
};
/**

View File

@@ -3,32 +3,47 @@ import log from "@/base/log";
import localForage from "@ente/shared/storage/localForage";
import { deleteDB, openDB, type DBSchema } from "idb";
import type { LocalCLIPIndex } from "./clip";
import type { FaceCluster, Person } from "./cluster-new";
import type { CGroup, FaceCluster } from "./cluster-new";
import type { LocalFaceIndex } from "./face";
/**
* ML DB schema.
*
* The "ML" database is made of three object stores:
* The "ML" database is made of the lower level "index" object stores, and
* higher level "cluster" object stores.
*
* - "file-status": Contains {@link FileStatus} objects, one for each
* {@link EnteFile} that the ML subsystem knows about. Periodically (and when
* required), this is synced with the list of files that the current client
* knows about locally.
* The index related object stores are the following:
*
* - "face-index": Contains {@link LocalFaceIndex} objects, either indexed
* locally or fetched from remote.
* - "file-status": Contains {@link FileStatus} objects, one for each
* {@link EnteFile} that the ML subsystem knows about. Periodically (and
* when required), this is synced with the list of files that the current
* client knows about locally.
*
* - "clip-index": Contains {@link LocalCLIPIndex} objects, either indexed
* locally or fetched from remote.
* - "face-index": Contains {@link LocalFaceIndex} objects, either indexed
* locally or fetched from remote.
*
* All the stores are keyed by {@link fileID}. The "file-status" contains
* - "clip-index": Contains {@link LocalCLIPIndex} objects, either indexed
* locally or fetched from remote.
*
* These three stores are keyed by {@link fileID}. The "file-status" contains
* book-keeping about the indexing process (whether or not a file needs
* indexing, or if there were errors doing so), while the other stores contain
* the actual indexing results.
*
* In tandem, these serve as the underlying storage for the functions exposed by
* the ML database.
* In tandem, these serve as the underlying storage for the indexes maintained
* in the ML database.
*
* The cluster related object stores are the following:
*
* - "face-cluster": Contains {@link FaceCluster} objects, one for each
* cluster of faces that either the clustering algorithm produced locally or
* were synced from remote. It is indexed by the (cluster) ID.
*
* - "cluster-group": Contains {@link CGroup} objects, one for each group of
* clusters that were synced from remote. The client can also locally
* generate cluster groups on certain user interactions, but these too will
* eventually get synced with remote. This object store is indexed by the
* (cgroup) ID.
*/
interface MLDBSchema extends DBSchema {
"file-status": {
@@ -48,9 +63,9 @@ interface MLDBSchema extends DBSchema {
key: string;
value: FaceCluster;
};
person: {
"cluster-group": {
key: string;
value: Person;
value: CGroup;
};
}
@@ -111,7 +126,7 @@ const openMLDB = async () => {
if (oldVersion < 3) {
if (process.env.NEXT_PUBLIC_ENTE_WIP_CL) {
db.createObjectStore("face-cluster", { keyPath: "id" });
db.createObjectStore("person", { keyPath: "id" });
db.createObjectStore("cluster-group", { keyPath: "id" });
}
}
},
@@ -419,18 +434,18 @@ export const faceClusters = async () => {
};
/**
* Return all person entries (aka "people") present locally.
* Return all cluster group entries (aka "cgroups") present locally.
*/
export const persons = async () => {
export const clusterGroups = async () => {
const db = await mlDB();
return db.getAll("person");
return db.getAll("cluster-group");
};
/**
* Replace the face clusters stored locally with the given ones.
*
* This function deletes all entries from the person object store, and then
* inserts the given {@link clusters} into it.
* This function deletes all entries from the face cluster object store, and
* then inserts the given {@link clusters} into it.
*/
export const setFaceClusters = async (clusters: FaceCluster[]) => {
const db = await mlDB();
@@ -441,19 +456,19 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => {
};
/**
* Update the person store to reflect the given changes, in order.
* Update the cluster group store to reflect the given changes.
*
* @param diff A list of changes to apply. Each entry is either
*
* - A string, in which case the person with the given string as their ID
* should be deleted from the store, or
* - A string, in which case the cluster group with the given string as their
* ID should be deleted from the store, or
*
* - A person, in which case it should add or overwrite the entry for the
* corresponding person (as identified by their {@link id}).
* - A cgroup, in which case it should add or overwrite the entry for the
* corresponding cluster group (as identified by its {@link id}).
*/
export const applyPersonDiff = async (diff: (string | Person)[]) => {
export const applyCGroupDiff = async (diff: (string | CGroup)[]) => {
const db = await mlDB();
const tx = db.transaction("person", "readwrite");
const tx = db.transaction("cluster-group", "readwrite");
// See: [Note: Diff response will have at most one entry for an id]
await Promise.all(
diff.map((d) =>
@@ -464,37 +479,22 @@ export const applyPersonDiff = async (diff: (string | Person)[]) => {
};
/**
* Add or overwrite the entry for the given {@link person}, as identified by
* Add or overwrite the entry for the given {@link cgroup}, as identified by
* their {@link id}.
*/
// TODO-Cluster: Remove me
export const savePerson = async (person: Person) => {
export const saveClusterGroup = async (cgroup: CGroup) => {
const db = await mlDB();
const tx = db.transaction("person", "readwrite");
await Promise.all([tx.store.put(person), tx.done]);
const tx = db.transaction("cluster-group", "readwrite");
await Promise.all([tx.store.put(cgroup), tx.done]);
};
/**
* Delete the entry for the persons with the given {@link id}, if any.
* Delete the entry (if any) for the cluster group with the given {@link id}.
*/
// TODO-Cluster: Remove me
export const deletePerson = async (id: string) => {
export const deleteClusterGroup = async (id: string) => {
const db = await mlDB();
const tx = db.transaction("person", "readwrite");
const tx = db.transaction("cluster-group", "readwrite");
await Promise.all([tx.store.delete(id), tx.done]);
};
/**
* Replace the persons stored locally with the given ones.
*
* This function deletes all entries from the person object store, and then
* inserts the given {@link persons} into it.
*/
// TODO-Cluster: Remove me
export const setPersons = async (persons: Person[]) => {
const db = await mlDB();
const tx = db.transaction("person", "readwrite");
await tx.store.clear();
await Promise.all(persons.map((person) => tx.store.put(person)));
return tx.done;
};

View File

@@ -347,7 +347,7 @@ export const wipCluster = async () => {
if (last) return last;
const { clusters, people } = await clusterFaces(await faceIndexes());
const { clusters, cgroups } = await clusterFaces(await faceIndexes());
const clusterByID = new Map(
clusters.map((cluster) => [cluster.id, cluster]),
);
@@ -356,31 +356,31 @@ export const wipCluster = async () => {
const localFilesByID = new Map(localFiles.map((f) => [f.id, f]));
const result: SearchPerson[] = [];
for (const person of people) {
let avatarFaceID = person.avatarFaceID;
for (const cgroup of cgroups) {
let avatarFaceID = cgroup.avatarFaceID;
// TODO-Cluster
// Temp
if (!avatarFaceID) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
avatarFaceID = person.clusterIDs
avatarFaceID = cgroup.clusterIDs
.map((id) => clusterByID.get(id))
.flatMap((cluster) => cluster?.faceIDs ?? [])[0]!;
}
person.clusterIDs;
cgroup.clusterIDs;
const avatarFaceFileID = fileIDFromFaceID(avatarFaceID);
const avatarFaceFile = localFilesByID.get(avatarFaceFileID ?? 0);
if (!avatarFaceFileID || !avatarFaceFile) {
assertionFailed(`Face ID ${avatarFaceID} without local file`);
continue;
}
const files = person.clusterIDs
const files = cgroup.clusterIDs
.map((id) => clusterByID.get(id))
.flatMap((cluster) => cluster?.faceIDs ?? [])
.map((faceID) => fileIDFromFaceID(faceID))
.filter((fileID) => fileID !== undefined);
result.push({
id: person.id,
name: person.name,
id: cgroup.id,
name: cgroup.name,
files,
displayFaceID: avatarFaceID,
displayFaceFile: avatarFaceFile,

View File

@@ -78,7 +78,7 @@ interface IndexableItem {
export class MLWorker {
private electron: ElectronMLWorker | undefined;
private delegate: MLWorkerDelegate | undefined;
private state: "idle" | "tick" | "pull" | "indexing" = "idle";
private state: "idle" | "tick" | "indexing" = "idle";
private liveQ: IndexableItem[] = [];
private idleTimeout: ReturnType<typeof setTimeout> | undefined;
private idleDuration = idleDurationStart; /* unit: seconds */

View File

@@ -1,7 +1,7 @@
import type { EnteFile } from "@/new/photos/types/file";
/**
* A massaged version of {@link Person} suitable for being shown in search
* A massaged version of {@link CGroup} suitable for being shown in search
* results.
*/
export interface SearchPerson {

View File

@@ -7,21 +7,23 @@ import { usersEncryptionKeyB64 } from "@/base/session-store";
import { nullToUndefined } from "@/utils/transform";
import { z } from "zod";
import { gunzip } from "./gzip";
import type { Person } from "./ml/cluster-new";
import { applyPersonDiff } from "./ml/db";
import type { CGroup } from "./ml/cluster-new";
import { applyCGroupDiff } from "./ml/db";
/**
* User entities are predefined lists of otherwise arbitrary data that the user
* can store for their account.
*
* e.g. location tags, people in their photos.
* e.g. location tags, cluster groups.
*/
export type EntityType =
/**
* The latest iteration of the Person entity format, where the data is
* gzipped before encryption.
* A cluster group.
*
* Format: An encrypted string containing a gzipped JSON string representing
* the cgroup data.
*/
"person_v2";
"cgroup";
/**
* The maximum number of items to fetch in a single diff
@@ -313,21 +315,21 @@ const saveLatestUpdatedAt = (type: EntityType, value: number) =>
setKV(latestUpdatedAtKey(type), value);
/**
* Sync the {@link Person} entities that we have locally with remote.
* Sync the {@link CGroup} entities that we have locally with remote.
*
* This fetches all the user entities corresponding to the "person_v2" entity
* type from remote that have been created, updated or deleted since the last
* time we checked.
* This fetches all the user entities corresponding to the "cgroup" entity type
* from remote that have been created, updated or deleted since the last time we
* checked.
*
* This diff is then applied to the data we have persisted locally.
*/
export const syncPersons = async () => {
const type: EntityType = "person_v2";
export const syncCGroups = async () => {
const type: EntityType = "cgroup";
const entityKeyB64 = await getOrCreateEntityKeyB64(type);
const parse = async (id: string, data: Uint8Array): Promise<Person> => {
const rp = RemotePerson.parse(JSON.parse(await gunzip(data)));
const parse = async (id: string, data: Uint8Array): Promise<CGroup> => {
const rp = RemoteCGroup.parse(JSON.parse(await gunzip(data)));
return {
id,
name: rp.name,
@@ -344,7 +346,7 @@ export const syncPersons = async () => {
const entities = await userEntityDiff(type, sinceTime, entityKeyB64);
if (entities.length == 0) break;
await applyPersonDiff(
await applyCGroupDiff(
await Promise.all(
entities.map(async ({ id, data }) =>
data ? await parse(id, data) : id,
@@ -360,8 +362,8 @@ export const syncPersons = async () => {
}
};
/** Zod schema for the {@link RemotePerson} type. */
const RemotePerson = z.object({
/** Zod schema for the {@link RemoteCGroup} type. */
const RemoteCGroup = z.object({
name: z.string().nullish().transform(nullToUndefined),
assigned: z.array(
z.object({
@@ -374,6 +376,6 @@ const RemotePerson = z.object({
});
/**
* A "person_v2" entity as synced via remote.
* Contents of a "cgroup" user entity, as synced via remote.
*/
type RemotePerson = z.infer<typeof RemotePerson>;
type RemoteCGroup = z.infer<typeof RemoteCGroup>;