diff --git a/web/apps/photos/src/services/face/crop.ts b/web/apps/photos/src/services/face/crop.ts index 369dfc654a..d4d3753825 100644 --- a/web/apps/photos/src/services/face/crop.ts +++ b/web/apps/photos/src/services/face/crop.ts @@ -1,5 +1,5 @@ import { blobCache } from "@/next/blob-cache"; -import type { Box, Face, FaceAlignment } from "./types"; +import type { Box, Face, FaceAlignment } from "./types-old"; export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => { const faceCrop = extractFaceCrop(imageBitmap, face.alignment); diff --git a/web/apps/photos/src/services/face/db-old.ts b/web/apps/photos/src/services/face/db-old.ts index 4742dd9d73..a70e94bee7 100644 --- a/web/apps/photos/src/services/face/db-old.ts +++ b/web/apps/photos/src/services/face/db-old.ts @@ -10,7 +10,7 @@ import { } from "idb"; import isElectron from "is-electron"; import type { Person } from "services/face/people"; -import type { MlFileData } from "services/face/types"; +import type { MlFileData } from "services/face/types-old"; import { DEFAULT_ML_SEARCH_CONFIG, MAX_ML_SYNC_ERROR_COUNT, diff --git a/web/apps/photos/src/services/face/db.ts b/web/apps/photos/src/services/face/db.ts deleted file mode 100644 index ff0a1061b9..0000000000 --- a/web/apps/photos/src/services/face/db.ts +++ /dev/null @@ -1,137 +0,0 @@ -import type { Box, Point } from "./types"; - -/** - * The faces in a file (and an embedding for each of them). - * - * This interface describes the format of both local and remote face data. - * - * - Local face detections and embeddings (collectively called as the face - * index) are generated by the current client when uploading a file (or when - * noticing a file which doesn't yet have a face index), stored in the local - * IndexedDB ("face/db") and also uploaded (E2EE) to remote. - * - * - Remote embeddings are fetched by subsequent clients to avoid them having to - * reindex (indexing faces is a costly operation, esp for mobile clients). - * - * In both these scenarios (whether generated locally or fetched from remote), - * we end up with an face index described by this {@link FaceIndex} interface. - * - * It has a top level envelope with information about the file (in particular - * the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with - * metadata about the indexing, and an array of {@link faces} each containing - * the result of a face detection and an embedding for that detected face. - * - * The word embedding is used to refer two things: The last one (faceEmbedding > - * faces > embedding) is the "actual" embedding, but sometimes we colloquially - * refer to the inner envelope (the "faceEmbedding") also an embedding since a - * file can have other types of embedding (envelopes), e.g. a "clipEmbedding". - */ -export interface FaceIndex { - /** - * The ID of the {@link EnteFile} whose index this is. - * - * This is used as the primary key when storing the index locally (An - * {@link EnteFile} is guaranteed to have its fileID be unique in the - * namespace of the user. Even if someone shares a file with the user the - * user will get a file entry with a fileID unique to them). - */ - fileID: number; - /** - * The width (in px) of the image (file). - */ - width: number; - /** - * The height (in px) of the image (file). - */ - height: number; - /** - * The "face embedding" for the file. - * - * This is an envelope that contains a list of indexed faces and metadata - * about the indexing. - */ - faceEmbedding: { - /** - * An integral version number of the indexing algorithm / pipeline. - * - * Clients agree out of band what a particular version means. The - * guarantee is that an embedding with a particular version will be the - * same (to negligible floating point epsilons) irrespective of the - * client that indexed the file. - */ - version: number; - /** The UA for the client which generated this embedding. */ - client: string; - /** The list of faces (and their embeddings) detected in the file. */ - faces: Face[]; - }; -} - -/** - * A face detected in a file, and an embedding for this detected face. - * - * During face indexing, we first detect all the faces in a particular file. - * Then for each such detected region, we compute an embedding of that part of - * the file. Together, this detection region and the emedding travel together in - * this {@link Face} interface. - */ -export interface Face { - /** - * A unique identifier for the face. - * - * This ID is guaranteed to be unique for all the faces detected in all the - * files for the user. In particular, each file can have multiple faces but - * they all will get their own unique {@link faceID}. - */ - faceID: string; - /** - * The face detection. Describes the region within the image that was - * detected to be a face, and a set of landmarks (e.g. "eyes") of the - * detection. - * - * All coordinates are relative within the image's dimension, i.e. they have - * been normalized to lie between 0 and 1, with 0 being the left (or top) - * and 1 being the width (or height) of the image. - */ - detection: { - /** - * The region within the image that contains the face. - * - * All coordinates and sizes are between 0 and 1, normalized by the - * dimensions of the image. - * */ - box: Box; - /** - * Face "landmarks", e.g. eyes. - * - * The exact landmarks and their order depends on the face detection - * algorithm being used. - * - * The coordinatesare between 0 and 1, normalized by the dimensions of - * the image. - */ - landmarks: Point[]; - }; - /** - * An correctness probability (0 to 1) that the face detection algorithm - * gave to the detection. Higher values are better. - */ - score: number; - /** - * The computed blur for the detected face. - * - * The exact semantics and range for these (floating point) values depend on - * the face indexing algorithm / pipeline version being used. - * */ - blur: number; - /** - * An embedding for the face. - * - * This is an opaque numeric (signed floating point) vector whose semantics - * and length depend on the version of the face indexing algorithm / - * pipeline that we are using. However, within a set of embeddings with the - * same version, the property is that two such embedding vectors will be - * "cosine similar" to each other if they are both faces of the same person. - */ - embedding: number[]; -} diff --git a/web/apps/photos/src/services/face/f-index.ts b/web/apps/photos/src/services/face/f-index.ts index 5197214b24..5e93f60bd6 100644 --- a/web/apps/photos/src/services/face/f-index.ts +++ b/web/apps/photos/src/services/face/f-index.ts @@ -2,14 +2,6 @@ import { FILE_TYPE } from "@/media/file-type"; import log from "@/next/log"; import { workerBridge } from "@/next/worker/worker-bridge"; import { Matrix } from "ml-matrix"; -import type { - Box, - Dimensions, - Face, - FaceAlignment, - FaceDetection, - MlFileData, -} from "services/face/types"; import { defaultMLVersion } from "services/machineLearning/machineLearningService"; import { getSimilarityTransformation } from "similarity-transformation"; import { @@ -28,6 +20,13 @@ import { pixelRGBBilinear, warpAffineFloat32List, } from "./image"; +import type { Box, Dimensions } from "./types"; +import type { + Face, + FaceAlignment, + FaceDetection, + MlFileData, +} from "./types-old"; /** * Index faces in the given file. diff --git a/web/apps/photos/src/services/face/remote.ts b/web/apps/photos/src/services/face/remote.ts index 3c64ca30cc..32d0fddad8 100644 --- a/web/apps/photos/src/services/face/remote.ts +++ b/web/apps/photos/src/services/face/remote.ts @@ -2,7 +2,7 @@ import log from "@/next/log"; import ComlinkCryptoWorker from "@ente/shared/crypto"; import { putEmbedding } from "services/embeddingService"; import type { EnteFile } from "types/file"; -import type { Face, FaceDetection, MlFileData, Point } from "./types"; +import type { Face, FaceDetection, MlFileData, Point } from "./types-old"; export const putFaceEmbedding = async ( enteFile: EnteFile, diff --git a/web/apps/photos/src/services/face/types-old.ts b/web/apps/photos/src/services/face/types-old.ts new file mode 100644 index 0000000000..66eec9cf55 --- /dev/null +++ b/web/apps/photos/src/services/face/types-old.ts @@ -0,0 +1,46 @@ +import type { Box, Dimensions, Point } from "./types"; + +export interface FaceDetection { + // box and landmarks is relative to image dimentions stored at mlFileData + box: Box; + landmarks?: Point[]; + probability?: number; +} + +export interface FaceAlignment { + /** + * An affine transformation matrix (rotation, translation, scaling) to align + * the face extracted from the image. + */ + affineMatrix: number[][]; + /** + * The bounding box of the transformed box. + * + * The affine transformation shifts the original detection box a new, + * transformed, box (possibily rotated). This property is the bounding box + * of that transformed box. It is in the coordinate system of the original, + * full, image on which the detection occurred. + */ + boundingBox: Box; +} + +export interface Face { + fileId: number; + detection: FaceDetection; + id: string; + + alignment?: FaceAlignment; + blurValue?: number; + + embedding?: Float32Array; + + personId?: number; +} + +export interface MlFileData { + fileId: number; + faces?: Face[]; + imageDimensions?: Dimensions; + mlVersion: number; + errorCount: number; +} diff --git a/web/apps/photos/src/services/face/types.ts b/web/apps/photos/src/services/face/types.ts index 0b1b2f9757..a1db97a9af 100644 --- a/web/apps/photos/src/services/face/types.ts +++ b/web/apps/photos/src/services/face/types.ts @@ -1,3 +1,139 @@ +/** + * The faces in a file (and an embedding for each of them). + * + * This interface describes the format of both local and remote face data. + * + * - Local face detections and embeddings (collectively called as the face + * index) are generated by the current client when uploading a file (or when + * noticing a file which doesn't yet have a face index), stored in the local + * IndexedDB ("face/db") and also uploaded (E2EE) to remote. + * + * - Remote embeddings are fetched by subsequent clients to avoid them having to + * reindex (indexing faces is a costly operation, esp for mobile clients). + * + * In both these scenarios (whether generated locally or fetched from remote), + * we end up with an face index described by this {@link FaceIndex} interface. + * + * It has a top level envelope with information about the file (in particular + * the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with + * metadata about the indexing, and an array of {@link faces} each containing + * the result of a face detection and an embedding for that detected face. + * + * The word embedding is used to refer two things: The last one (faceEmbedding > + * faces > embedding) is the "actual" embedding, but sometimes we colloquially + * refer to the inner envelope (the "faceEmbedding") also an embedding since a + * file can have other types of embedding (envelopes), e.g. a "clipEmbedding". + */ +export interface FaceIndex { + /** + * The ID of the {@link EnteFile} whose index this is. + * + * This is used as the primary key when storing the index locally (An + * {@link EnteFile} is guaranteed to have its fileID be unique in the + * namespace of the user. Even if someone shares a file with the user the + * user will get a file entry with a fileID unique to them). + */ + fileID: number; + /** + * The width (in px) of the image (file). + */ + width: number; + /** + * The height (in px) of the image (file). + */ + height: number; + /** + * The "face embedding" for the file. + * + * This is an envelope that contains a list of indexed faces and metadata + * about the indexing. + */ + faceEmbedding: { + /** + * An integral version number of the indexing algorithm / pipeline. + * + * Clients agree out of band what a particular version means. The + * guarantee is that an embedding with a particular version will be the + * same (to negligible floating point epsilons) irrespective of the + * client that indexed the file. + */ + version: number; + /** The UA for the client which generated this embedding. */ + client: string; + /** The list of faces (and their embeddings) detected in the file. */ + faces: Face[]; + }; +} + +/** + * A face detected in a file, and an embedding for this detected face. + * + * During face indexing, we first detect all the faces in a particular file. + * Then for each such detected region, we compute an embedding of that part of + * the file. Together, this detection region and the emedding travel together in + * this {@link Face} interface. + */ +export interface Face { + /** + * A unique identifier for the face. + * + * This ID is guaranteed to be unique for all the faces detected in all the + * files for the user. In particular, each file can have multiple faces but + * they all will get their own unique {@link faceID}. + */ + faceID: string; + /** + * The face detection. Describes the region within the image that was + * detected to be a face, and a set of landmarks (e.g. "eyes") of the + * detection. + * + * All coordinates are relative to and normalized by the image's dimension, + * i.e. they have been normalized to lie between 0 and 1, with 0 being the + * left (or top) and 1 being the width (or height) of the image. + */ + detection: { + /** + * The region within the image that contains the face. + * + * All coordinates and sizes are between 0 and 1, normalized by the + * dimensions of the image. + * */ + box: Box; + /** + * Face "landmarks", e.g. eyes. + * + * The exact landmarks and their order depends on the face detection + * algorithm being used. + * + * The coordinatesare between 0 and 1, normalized by the dimensions of + * the image. + */ + landmarks: Point[]; + }; + /** + * An correctness probability (0 to 1) that the face detection algorithm + * gave to the detection. Higher values are better. + */ + score: number; + /** + * The computed blur for the detected face. + * + * The exact semantics and range for these (floating point) values depend on + * the face indexing algorithm / pipeline version being used. + * */ + blur: number; + /** + * An embedding for the face. + * + * This is an opaque numeric (signed floating point) vector whose semantics + * and length depend on the version of the face indexing algorithm / + * pipeline that we are using. However, within a set of embeddings with the + * same version, the property is that two such embedding vectors will be + * "cosine similar" to each other if they are both faces of the same person. + */ + embedding: number[]; +} + /** The x and y coordinates of a point. */ export interface Point { x: number; @@ -21,48 +157,3 @@ export interface Box { /** The height of the box. */ height: number; } - -export interface FaceDetection { - // box and landmarks is relative to image dimentions stored at mlFileData - box: Box; - landmarks?: Point[]; - probability?: number; -} - -export interface FaceAlignment { - /** - * An affine transformation matrix (rotation, translation, scaling) to align - * the face extracted from the image. - */ - affineMatrix: number[][]; - /** - * The bounding box of the transformed box. - * - * The affine transformation shifts the original detection box a new, - * transformed, box (possibily rotated). This property is the bounding box - * of that transformed box. It is in the coordinate system of the original, - * full, image on which the detection occurred. - */ - boundingBox: Box; -} - -export interface Face { - fileId: number; - detection: FaceDetection; - id: string; - - alignment?: FaceAlignment; - blurValue?: number; - - embedding?: Float32Array; - - personId?: number; -} - -export interface MlFileData { - fileId: number; - faces?: Face[]; - imageDimensions?: Dimensions; - mlVersion: number; - errorCount: number; -}