Document

2024-05-28 13:49:03 +05:30
parent 8ea7a742b1
commit 3664532f91
1 changed files with 116 additions and 6 deletions
--- a/web/apps/photos/src/services/face/db.ts
+++ b/web/apps/photos/src/services/face/db.ts
@@ -1,3 +1,5 @@
+import type { Box, Point } from "./types";
+
 /**
 * The faces in a file (and an embedding for each of them).
 *
@@ -14,14 +16,122 @@
 * In both these scenarios (whether generated locally or fetched from remote),
 * we end up with an face index described by this {@link FaceIndex} interface.
 *
- * It has a top level envelope with information about the client (in particular
+ * It has a top level envelope with information about the file (in particular
 * the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with
 * metadata about the indexing, and an array of {@link faces} each containing
 * the result of a face detection and an embedding for that detected face.
 *
- * This last one (faceEmbedding > faces > embedding) is the "actual" embedding,
- * but sometimes we colloquially refer to the inner envelope (the
- * "faceEmbedding") also an embedding since a file can have other types of
- * embedding (envelopes) like a "clipEmbedding".
+ * The word embedding is used to refer two things: The last one (faceEmbedding >
+ * faces > embedding) is the "actual" embedding, but sometimes we colloquially
+ * refer to the inner envelope (the "faceEmbedding") also an embedding since a
+ * file can have other types of embedding (envelopes), e.g. a "clipEmbedding".
 */
-export interface FaceIndex {}
+export interface FaceIndex {
+    /**
+     * The ID of the {@link EnteFile} whose index this is.
+     *
+     * This is used as the primary key when storing the index locally (An
+     * {@link EnteFile} is guaranteed to have its fileID be unique in the
+     * namespace of the user. Even if someone shares a file with the user the
+     * user will get a file entry with a fileID unique to them).
+     */
+    fileID: number;
+    /**
+     * The width (in px) of the image (file).
+     */
+    width: number;
+    /**
+     * The height (in px) of the image (file).
+     */
+    height: number;
+    /**
+     * The "face embedding" for the file.
+     *
+     * This is an envelope that contains a list of indexed faces and metadata
+     * about the indexing.
+     */
+    faceEmbedding: {
+        /**
+         * An integral version number of the indexing algorithm / pipeline.
+         *
+         * Clients agree out of band what a particular version means. The
+         * guarantee is that an embedding with a particular version will be the
+         * same (to negligible floating point epsilons) irrespective of the
+         * client that indexed the file.
+         */
+        version: number;
+        /** The UA for the client which generated this embedding. */
+        client: string;
+        /** The list of faces (and their embeddings) detected in the file. */
+        faces: Face[];
+    };
+}
+
+/**
+ * A face detected in a file, and an embedding for this detected face.
+ *
+ * During face indexing, we first detect all the faces in a particular file.
+ * Then for each such detected region, we compute an embedding of that part of
+ * the file. Together, this detection region and the emedding travel together in
+ * this {@link Face} interface.
+ */
+export interface Face {
+    /**
+     * A unique identifier for the face.
+     *
+     * This ID is guaranteed to be unique for all the faces detected in all the
+     * files for the user. In particular, each file can have multiple faces but
+     * they all will get their own unique {@link faceID}.
+     */
+    faceID: string;
+    /**
+     * The face detection. Describes the region within the image that was
+     * detected to be a face, and a set of landmarks (e.g. "eyes") of the
+     * detection.
+     *
+     * All coordinates are relative within the image's dimension, i.e. they have
+     * been normalized to lie between 0 and 1, with 0 being the left (or top)
+     * and 1 being the width (or height) of the image.
+     */
+    detection: {
+        /**
+         * The region within the image that contains the face.
+         *
+         * All coordinates and sizes are between 0 and 1, normalized by the
+         * dimensions of the image.
+         * */
+        box: Box;
+        /**
+         * Face "landmarks", e.g. eyes.
+         *
+         * The exact landmarks and their order depends on the face detection
+         * algorithm being used.
+         *
+         * The coordinatesare between 0 and 1, normalized by the dimensions of
+         * the image.
+         */
+        landmarks: Point[];
+    };
+    /**
+     * An correctness probability (0 to 1) that the face detection algorithm
+     * gave to the detection. Higher values are better.
+     */
+    score: number;
+    /**
+     * The computed blur for the detected face.
+     *
+     * The exact semantics and range for these (floating point) values depend on
+     * the face indexing algorithm / pipeline version being used.
+     * */
+    blur: number;
+    /**
+     * An embedding for the face.
+     *
+     * This is an opaque numeric (signed floating point) vector whose semantics
+     * and length depend on the version of the face indexing algorithm /
+     * pipeline that we are using. However, within a set of embeddings with the
+     * same version, the property is that two such embedding vectors will be
+     * "cosine similar" to each other if they are both faces of the same person.
+     */
+    embedding: number[];
+}