Propagate

This commit is contained in:
Manav Rathi
2024-07-16 12:33:57 +05:30
parent f44a476285
commit 2dbfa17a45
4 changed files with 62 additions and 83 deletions

View File

@@ -14,7 +14,7 @@ export const clipIndexingVersion = 1;
/**
* The CLIP embedding for a file (and some metadata).
*
* See {@link RemoteFaceIndex} for a similar structure with more comprehensive
* See {@link FaceIndex} for a similar structure with more comprehensive
* documentation.
*
* ---

View File

@@ -2,7 +2,7 @@ import { blobCache } from "@/next/blob-cache";
import { ensure } from "@/utils/ensure";
import type { EnteFile } from "../../types/file";
import { renderableEnteFileBlob } from "./blob";
import { type Box, type LocalFaceIndex } from "./face";
import { type Box, type FaceIndex } from "./face";
import { clamp } from "./math";
/**
@@ -16,15 +16,15 @@ import { clamp } from "./math";
*
* @param enteFile The {@link EnteFile} whose face crops we want to generate.
*
* @param faces The {@link LocalFaceIndex} containing information about the
* faces detected in the given image.
* @param faceIndex The {@link FaceIndex} containing information about the faces
* detected in the given image.
*
* The generated face crops are saved in a local cache and can subsequently be
* retrieved from the {@link BlobCache} named "face-crops".
*/
export const regenerateFaceCrops = async (
enteFile: EnteFile,
faces: Face[],
faceIndex: FaceIndex,
) => {
const imageBitmap = await createImageBitmap(
await renderableEnteFileBlob(enteFile),
@@ -51,12 +51,12 @@ export const regenerateFaceCrops = async (
*/
export const saveFaceCrops = async (
imageBitmap: ImageBitmap,
faceIndex: LocalFaceIndex,
faceIndex: FaceIndex,
) => {
const cache = await blobCache("face-crops");
return Promise.all(
faceIndex.faceEmbedding.faces.map(({ faceID, detection }) =>
faceIndex.faces.map(({ faceID, detection }) =>
extractFaceCrop(imageBitmap, detection.box).then((b) =>
cache.put(faceID, b),
),

View File

@@ -8,7 +8,6 @@
/* eslint-disable @typescript-eslint/no-non-null-assertion */
import type { EnteFile } from "@/new/photos/types/file";
import log from "@/next/log";
import { Matrix } from "ml-matrix";
import { getSimilarityTransformation } from "similarity-transformation";
import {
@@ -19,7 +18,6 @@ import {
type Matrix as TransformationMatrix,
} from "transformation-matrix";
import type { ImageBitmapAndData } from "./blob";
import { saveFaceCrops } from "./crop";
import {
grayscaleIntMatrixFromNormalized2List,
pixelRGBBilinear,
@@ -192,63 +190,29 @@ export interface Box {
* This function is the entry point to the face indexing pipeline. The file goes
* through various stages:
*
* 1. Downloading the original if needed.
* 2. Detect faces using ONNX/YOLO
* 3. Align the face rectangles, compute blur.
* 4. Compute embeddings using ONNX/MFNT for the detected face (crop).
* 1. Detect faces using ONNX/YOLO
* 2. Align the face rectangles, compute blur.
* 3. Compute embeddings using ONNX/MFNT for the detected face (crop).
*
* Once all of it is done, it returns the face rectangles and embeddings so that
* they can be saved locally (for offline use), and also uploaded to the user's
* remote storage so that their other devices can download them instead of
* needing to reindex.
*
* As an optimization, it also saves the face crops of the detected faces to the
* local cache (they can be regenerated independently too by using
* {@link regenerateFaceCrops}).
*
* @param enteFile The {@link EnteFile} to index.
*
* @param image The file's contents.
*
* @param electron The {@link MLWorkerElectron} instance that allows us to call
* our Node.js layer for various functionality.
*
* @param userAgent The UA of the client that is doing the indexing (us).
* our Node.js layer to run the ONNX inference.
*/
export const indexFaces = async (
enteFile: EnteFile,
image: ImageBitmapAndData,
{ data: imageData }: ImageBitmapAndData,
electron: MLWorkerElectron,
userAgent: string,
): Promise<FaceIndex> => {
const { bitmap: imageBitmap, data: imageData } = image;
const { width, height } = imageBitmap;
const fileID = enteFile.id;
const faceIndex = {
fileID,
width,
height,
faceEmbedding: {
version: faceIndexingVersion,
client: userAgent,
faces: await indexFaces_(fileID, imageData, electron),
},
};
// This step, saving face crops, is not part of the indexing pipeline;
// we just do it here since we have already have the ImageBitmap at
// hand. Ignore errors that happen during this since it does not impact
// the generated face index.
try {
await saveFaceCrops(imageBitmap, faceIndex);
} catch (e) {
log.error(`Failed to save face crops for file ${fileID}`, e);
}
return faceIndex;
};
): Promise<FaceIndex> => ({
faces: await indexFaces_(enteFile.id, imageData, electron),
});
const indexFaces_ = async (
fileID: number,

View File

@@ -17,6 +17,7 @@ import {
type ImageBitmapAndData,
} from "./blob";
import { indexCLIP, type CLIPIndex } from "./clip";
import { saveFaceCrops } from "./crop";
import {
indexableFileIDs,
markIndexingFailed,
@@ -433,40 +434,54 @@ const index = async (
throw e;
}
let faceIndex: FaceIndex;
let clipIndex: CLIPIndex;
try {
[faceIndex, clipIndex] = await Promise.all([
indexFaces(enteFile, image, electron, userAgent),
indexCLIP(enteFile, image, electron, userAgent),
]);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
log.error(`Failed to index ${f}`, e);
await markIndexingFailed(enteFile.id);
throw e;
let faceIndex: FaceIndex;
let clipIndex: CLIPIndex;
try {
[faceIndex, clipIndex] = await Promise.all([
indexFaces(enteFile, image, electron, userAgent),
indexCLIP(enteFile, image, electron, userAgent),
]);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
log.error(`Failed to index ${f}`, e);
await markIndexingFailed(enteFile.id);
throw e;
}
log.debug(() => {
const ms = Date.now() - startTime;
const nf = faceIndex.faces.length;
return `Indexed ${nf} faces and clip in ${f} (${ms} ms)`;
});
try {
await putFaceIndex(enteFile, faceIndex);
await putCLIPIndex(enteFile, clipIndex);
await saveFaceIndex(faceIndex);
await saveCLIPIndex(clipIndex);
} catch (e) {
// Not sure if DB failures should be considered permanent or
// transient. There isn't a known case where writing to the local
// indexedDB would fail.
//
// See: [Note: Transient and permanent indexing failures]
log.error(`Failed to put/save face index for ${f}`, e);
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
throw e;
}
// This step, saving face crops, is conceptually not part of the
// indexing pipeline; we just do it here since we have already have the
// ImageBitmap at hand. Ignore errors that happen during this since it
// does not impact the generated face index.
try {
await saveFaceCrops(image.bitmap, faceIndex);
} catch (e) {
log.error(`Failed to save face crops for ${f}`, e);
}
} finally {
image.bitmap.close();
}
log.debug(() => {
const ms = Date.now() - startTime;
const nf = faceIndex.faceEmbedding.faces.length;
return `Indexed ${nf} faces and clip in ${f} (${ms} ms)`;
});
try {
await putFaceIndex(enteFile, faceIndex);
await putCLIPIndex(enteFile, clipIndex);
await saveFaceIndex(faceIndex);
await saveCLIPIndex(clipIndex);
} catch (e) {
// Not sure if DB failures should be considered permanent or transient.
// There isn't a known case where writing to the local indexedDB would
// fail. See: [Note: Transient and permanent indexing failures].
log.error(`Failed to put/save face index for ${f}`, e);
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
throw e;
}
};