From 3664532f9164bb3dfb6af83c19c8a9df1a47c28d Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 28 May 2024 13:49:03 +0530 Subject: [PATCH] Document --- web/apps/photos/src/services/face/db.ts | 122 ++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 6 deletions(-) diff --git a/web/apps/photos/src/services/face/db.ts b/web/apps/photos/src/services/face/db.ts index 8edaaca6b8..ff0a1061b9 100644 --- a/web/apps/photos/src/services/face/db.ts +++ b/web/apps/photos/src/services/face/db.ts @@ -1,3 +1,5 @@ +import type { Box, Point } from "./types"; + /** * The faces in a file (and an embedding for each of them). * @@ -14,14 +16,122 @@ * In both these scenarios (whether generated locally or fetched from remote), * we end up with an face index described by this {@link FaceIndex} interface. * - * It has a top level envelope with information about the client (in particular + * It has a top level envelope with information about the file (in particular * the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with * metadata about the indexing, and an array of {@link faces} each containing * the result of a face detection and an embedding for that detected face. * - * This last one (faceEmbedding > faces > embedding) is the "actual" embedding, - * but sometimes we colloquially refer to the inner envelope (the - * "faceEmbedding") also an embedding since a file can have other types of - * embedding (envelopes) like a "clipEmbedding". + * The word embedding is used to refer two things: The last one (faceEmbedding > + * faces > embedding) is the "actual" embedding, but sometimes we colloquially + * refer to the inner envelope (the "faceEmbedding") also an embedding since a + * file can have other types of embedding (envelopes), e.g. a "clipEmbedding". */ -export interface FaceIndex {} +export interface FaceIndex { + /** + * The ID of the {@link EnteFile} whose index this is. + * + * This is used as the primary key when storing the index locally (An + * {@link EnteFile} is guaranteed to have its fileID be unique in the + * namespace of the user. Even if someone shares a file with the user the + * user will get a file entry with a fileID unique to them). + */ + fileID: number; + /** + * The width (in px) of the image (file). + */ + width: number; + /** + * The height (in px) of the image (file). + */ + height: number; + /** + * The "face embedding" for the file. + * + * This is an envelope that contains a list of indexed faces and metadata + * about the indexing. + */ + faceEmbedding: { + /** + * An integral version number of the indexing algorithm / pipeline. + * + * Clients agree out of band what a particular version means. The + * guarantee is that an embedding with a particular version will be the + * same (to negligible floating point epsilons) irrespective of the + * client that indexed the file. + */ + version: number; + /** The UA for the client which generated this embedding. */ + client: string; + /** The list of faces (and their embeddings) detected in the file. */ + faces: Face[]; + }; +} + +/** + * A face detected in a file, and an embedding for this detected face. + * + * During face indexing, we first detect all the faces in a particular file. + * Then for each such detected region, we compute an embedding of that part of + * the file. Together, this detection region and the emedding travel together in + * this {@link Face} interface. + */ +export interface Face { + /** + * A unique identifier for the face. + * + * This ID is guaranteed to be unique for all the faces detected in all the + * files for the user. In particular, each file can have multiple faces but + * they all will get their own unique {@link faceID}. + */ + faceID: string; + /** + * The face detection. Describes the region within the image that was + * detected to be a face, and a set of landmarks (e.g. "eyes") of the + * detection. + * + * All coordinates are relative within the image's dimension, i.e. they have + * been normalized to lie between 0 and 1, with 0 being the left (or top) + * and 1 being the width (or height) of the image. + */ + detection: { + /** + * The region within the image that contains the face. + * + * All coordinates and sizes are between 0 and 1, normalized by the + * dimensions of the image. + * */ + box: Box; + /** + * Face "landmarks", e.g. eyes. + * + * The exact landmarks and their order depends on the face detection + * algorithm being used. + * + * The coordinatesare between 0 and 1, normalized by the dimensions of + * the image. + */ + landmarks: Point[]; + }; + /** + * An correctness probability (0 to 1) that the face detection algorithm + * gave to the detection. Higher values are better. + */ + score: number; + /** + * The computed blur for the detected face. + * + * The exact semantics and range for these (floating point) values depend on + * the face indexing algorithm / pipeline version being used. + * */ + blur: number; + /** + * An embedding for the face. + * + * This is an opaque numeric (signed floating point) vector whose semantics + * and length depend on the version of the face indexing algorithm / + * pipeline that we are using. However, within a set of embeddings with the + * same version, the property is that two such embedding vectors will be + * "cosine similar" to each other if they are both faces of the same person. + */ + embedding: number[]; +}