This commit is contained in:
Manav Rathi
2024-05-28 13:52:40 +05:30
parent 3664532f91
commit 5e49b8a528
7 changed files with 192 additions and 193 deletions

View File

@@ -1,5 +1,5 @@
import { blobCache } from "@/next/blob-cache";
import type { Box, Face, FaceAlignment } from "./types";
import type { Box, Face, FaceAlignment } from "./types-old";
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);

View File

@@ -10,7 +10,7 @@ import {
} from "idb";
import isElectron from "is-electron";
import type { Person } from "services/face/people";
import type { MlFileData } from "services/face/types";
import type { MlFileData } from "services/face/types-old";
import {
DEFAULT_ML_SEARCH_CONFIG,
MAX_ML_SYNC_ERROR_COUNT,

View File

@@ -1,137 +0,0 @@
import type { Box, Point } from "./types";
/**
* The faces in a file (and an embedding for each of them).
*
* This interface describes the format of both local and remote face data.
*
* - Local face detections and embeddings (collectively called as the face
* index) are generated by the current client when uploading a file (or when
* noticing a file which doesn't yet have a face index), stored in the local
* IndexedDB ("face/db") and also uploaded (E2EE) to remote.
*
* - Remote embeddings are fetched by subsequent clients to avoid them having to
* reindex (indexing faces is a costly operation, esp for mobile clients).
*
* In both these scenarios (whether generated locally or fetched from remote),
* we end up with an face index described by this {@link FaceIndex} interface.
*
* It has a top level envelope with information about the file (in particular
* the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with
* metadata about the indexing, and an array of {@link faces} each containing
* the result of a face detection and an embedding for that detected face.
*
* The word embedding is used to refer two things: The last one (faceEmbedding >
* faces > embedding) is the "actual" embedding, but sometimes we colloquially
* refer to the inner envelope (the "faceEmbedding") also an embedding since a
* file can have other types of embedding (envelopes), e.g. a "clipEmbedding".
*/
export interface FaceIndex {
/**
* The ID of the {@link EnteFile} whose index this is.
*
* This is used as the primary key when storing the index locally (An
* {@link EnteFile} is guaranteed to have its fileID be unique in the
* namespace of the user. Even if someone shares a file with the user the
* user will get a file entry with a fileID unique to them).
*/
fileID: number;
/**
* The width (in px) of the image (file).
*/
width: number;
/**
* The height (in px) of the image (file).
*/
height: number;
/**
* The "face embedding" for the file.
*
* This is an envelope that contains a list of indexed faces and metadata
* about the indexing.
*/
faceEmbedding: {
/**
* An integral version number of the indexing algorithm / pipeline.
*
* Clients agree out of band what a particular version means. The
* guarantee is that an embedding with a particular version will be the
* same (to negligible floating point epsilons) irrespective of the
* client that indexed the file.
*/
version: number;
/** The UA for the client which generated this embedding. */
client: string;
/** The list of faces (and their embeddings) detected in the file. */
faces: Face[];
};
}
/**
* A face detected in a file, and an embedding for this detected face.
*
* During face indexing, we first detect all the faces in a particular file.
* Then for each such detected region, we compute an embedding of that part of
* the file. Together, this detection region and the emedding travel together in
* this {@link Face} interface.
*/
export interface Face {
/**
* A unique identifier for the face.
*
* This ID is guaranteed to be unique for all the faces detected in all the
* files for the user. In particular, each file can have multiple faces but
* they all will get their own unique {@link faceID}.
*/
faceID: string;
/**
* The face detection. Describes the region within the image that was
* detected to be a face, and a set of landmarks (e.g. "eyes") of the
* detection.
*
* All coordinates are relative within the image's dimension, i.e. they have
* been normalized to lie between 0 and 1, with 0 being the left (or top)
* and 1 being the width (or height) of the image.
*/
detection: {
/**
* The region within the image that contains the face.
*
* All coordinates and sizes are between 0 and 1, normalized by the
* dimensions of the image.
* */
box: Box;
/**
* Face "landmarks", e.g. eyes.
*
* The exact landmarks and their order depends on the face detection
* algorithm being used.
*
* The coordinatesare between 0 and 1, normalized by the dimensions of
* the image.
*/
landmarks: Point[];
};
/**
* An correctness probability (0 to 1) that the face detection algorithm
* gave to the detection. Higher values are better.
*/
score: number;
/**
* The computed blur for the detected face.
*
* The exact semantics and range for these (floating point) values depend on
* the face indexing algorithm / pipeline version being used.
* */
blur: number;
/**
* An embedding for the face.
*
* This is an opaque numeric (signed floating point) vector whose semantics
* and length depend on the version of the face indexing algorithm /
* pipeline that we are using. However, within a set of embeddings with the
* same version, the property is that two such embedding vectors will be
* "cosine similar" to each other if they are both faces of the same person.
*/
embedding: number[];
}

View File

@@ -2,14 +2,6 @@ import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { workerBridge } from "@/next/worker/worker-bridge";
import { Matrix } from "ml-matrix";
import type {
Box,
Dimensions,
Face,
FaceAlignment,
FaceDetection,
MlFileData,
} from "services/face/types";
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
import { getSimilarityTransformation } from "similarity-transformation";
import {
@@ -28,6 +20,13 @@ import {
pixelRGBBilinear,
warpAffineFloat32List,
} from "./image";
import type { Box, Dimensions } from "./types";
import type {
Face,
FaceAlignment,
FaceDetection,
MlFileData,
} from "./types-old";
/**
* Index faces in the given file.

View File

@@ -2,7 +2,7 @@ import log from "@/next/log";
import ComlinkCryptoWorker from "@ente/shared/crypto";
import { putEmbedding } from "services/embeddingService";
import type { EnteFile } from "types/file";
import type { Face, FaceDetection, MlFileData, Point } from "./types";
import type { Face, FaceDetection, MlFileData, Point } from "./types-old";
export const putFaceEmbedding = async (
enteFile: EnteFile,

View File

@@ -0,0 +1,46 @@
import type { Box, Dimensions, Point } from "./types";
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Point[];
probability?: number;
}
export interface FaceAlignment {
/**
* An affine transformation matrix (rotation, translation, scaling) to align
* the face extracted from the image.
*/
affineMatrix: number[][];
/**
* The bounding box of the transformed box.
*
* The affine transformation shifts the original detection box a new,
* transformed, box (possibily rotated). This property is the bounding box
* of that transformed box. It is in the coordinate system of the original,
* full, image on which the detection occurred.
*/
boundingBox: Box;
}
export interface Face {
fileId: number;
detection: FaceDetection;
id: string;
alignment?: FaceAlignment;
blurValue?: number;
embedding?: Float32Array;
personId?: number;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageDimensions?: Dimensions;
mlVersion: number;
errorCount: number;
}

View File

@@ -1,3 +1,139 @@
/**
* The faces in a file (and an embedding for each of them).
*
* This interface describes the format of both local and remote face data.
*
* - Local face detections and embeddings (collectively called as the face
* index) are generated by the current client when uploading a file (or when
* noticing a file which doesn't yet have a face index), stored in the local
* IndexedDB ("face/db") and also uploaded (E2EE) to remote.
*
* - Remote embeddings are fetched by subsequent clients to avoid them having to
* reindex (indexing faces is a costly operation, esp for mobile clients).
*
* In both these scenarios (whether generated locally or fetched from remote),
* we end up with an face index described by this {@link FaceIndex} interface.
*
* It has a top level envelope with information about the file (in particular
* the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with
* metadata about the indexing, and an array of {@link faces} each containing
* the result of a face detection and an embedding for that detected face.
*
* The word embedding is used to refer two things: The last one (faceEmbedding >
* faces > embedding) is the "actual" embedding, but sometimes we colloquially
* refer to the inner envelope (the "faceEmbedding") also an embedding since a
* file can have other types of embedding (envelopes), e.g. a "clipEmbedding".
*/
export interface FaceIndex {
/**
* The ID of the {@link EnteFile} whose index this is.
*
* This is used as the primary key when storing the index locally (An
* {@link EnteFile} is guaranteed to have its fileID be unique in the
* namespace of the user. Even if someone shares a file with the user the
* user will get a file entry with a fileID unique to them).
*/
fileID: number;
/**
* The width (in px) of the image (file).
*/
width: number;
/**
* The height (in px) of the image (file).
*/
height: number;
/**
* The "face embedding" for the file.
*
* This is an envelope that contains a list of indexed faces and metadata
* about the indexing.
*/
faceEmbedding: {
/**
* An integral version number of the indexing algorithm / pipeline.
*
* Clients agree out of band what a particular version means. The
* guarantee is that an embedding with a particular version will be the
* same (to negligible floating point epsilons) irrespective of the
* client that indexed the file.
*/
version: number;
/** The UA for the client which generated this embedding. */
client: string;
/** The list of faces (and their embeddings) detected in the file. */
faces: Face[];
};
}
/**
* A face detected in a file, and an embedding for this detected face.
*
* During face indexing, we first detect all the faces in a particular file.
* Then for each such detected region, we compute an embedding of that part of
* the file. Together, this detection region and the emedding travel together in
* this {@link Face} interface.
*/
export interface Face {
/**
* A unique identifier for the face.
*
* This ID is guaranteed to be unique for all the faces detected in all the
* files for the user. In particular, each file can have multiple faces but
* they all will get their own unique {@link faceID}.
*/
faceID: string;
/**
* The face detection. Describes the region within the image that was
* detected to be a face, and a set of landmarks (e.g. "eyes") of the
* detection.
*
* All coordinates are relative to and normalized by the image's dimension,
* i.e. they have been normalized to lie between 0 and 1, with 0 being the
* left (or top) and 1 being the width (or height) of the image.
*/
detection: {
/**
* The region within the image that contains the face.
*
* All coordinates and sizes are between 0 and 1, normalized by the
* dimensions of the image.
* */
box: Box;
/**
* Face "landmarks", e.g. eyes.
*
* The exact landmarks and their order depends on the face detection
* algorithm being used.
*
* The coordinatesare between 0 and 1, normalized by the dimensions of
* the image.
*/
landmarks: Point[];
};
/**
* An correctness probability (0 to 1) that the face detection algorithm
* gave to the detection. Higher values are better.
*/
score: number;
/**
* The computed blur for the detected face.
*
* The exact semantics and range for these (floating point) values depend on
* the face indexing algorithm / pipeline version being used.
* */
blur: number;
/**
* An embedding for the face.
*
* This is an opaque numeric (signed floating point) vector whose semantics
* and length depend on the version of the face indexing algorithm /
* pipeline that we are using. However, within a set of embeddings with the
* same version, the property is that two such embedding vectors will be
* "cosine similar" to each other if they are both faces of the same person.
*/
embedding: number[];
}
/** The x and y coordinates of a point. */
export interface Point {
x: number;
@@ -21,48 +157,3 @@ export interface Box {
/** The height of the box. */
height: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Point[];
probability?: number;
}
export interface FaceAlignment {
/**
* An affine transformation matrix (rotation, translation, scaling) to align
* the face extracted from the image.
*/
affineMatrix: number[][];
/**
* The bounding box of the transformed box.
*
* The affine transformation shifts the original detection box a new,
* transformed, box (possibily rotated). This property is the bounding box
* of that transformed box. It is in the coordinate system of the original,
* full, image on which the detection occurred.
*/
boundingBox: Box;
}
export interface Face {
fileId: number;
detection: FaceDetection;
id: string;
alignment?: FaceAlignment;
blurValue?: number;
embedding?: Float32Array;
personId?: number;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageDimensions?: Dimensions;
mlVersion: number;
errorCount: number;
}