[web] Create the face DB layer (#1915)

This commit is contained in:
Manav Rathi
2024-05-29 13:31:17 +05:30
committed by GitHub
20 changed files with 862 additions and 461 deletions

View File

@@ -12,19 +12,22 @@ import { watchReset } from "./watch";
* See: [Note: Do not throw during logout].
*/
export const logout = (watcher: FSWatcher) => {
const ignoreError = (label: string, e: unknown) =>
log.error(`Ignoring error during logout (${label})`, e);
try {
watchReset(watcher);
} catch (e) {
log.error("Ignoring error during logout (FS watch)", e);
ignoreError("FS watch", e);
}
try {
clearConvertToMP4Results();
} catch (e) {
log.error("Ignoring error during logout (convert-to-mp4)", e);
ignoreError("convert-to-mp4", e);
}
try {
clearStores();
} catch (e) {
log.error("Ignoring error during logout (native stores)", e);
ignoreError("native stores", e);
}
};

View File

@@ -22,7 +22,7 @@
"ffmpeg-wasm": "file:./thirdparty/ffmpeg-wasm",
"formik": "^2.1.5",
"hdbscan": "0.0.1-alpha.5",
"idb": "^7.1.1",
"idb": "^8",
"leaflet": "^1.9.4",
"leaflet-defaulticon-compatibility": "^0.1.1",
"localforage": "^1.9.0",

View File

@@ -5,7 +5,7 @@ import { t } from "i18next";
import { AppContext } from "pages/_app";
import { useContext } from "react";
import { components } from "react-select";
import { IndexStatus } from "services/face/db";
import { IndexStatus } from "services/face/db-old";
import { Suggestion, SuggestionType } from "types/search";
const { Menu } = components;

View File

@@ -4,7 +4,7 @@ import { Skeleton, styled } from "@mui/material";
import { Legend } from "components/PhotoViewer/styledComponents/Legend";
import { t } from "i18next";
import React, { useEffect, useState } from "react";
import mlIDbStorage from "services/face/db";
import mlIDbStorage from "services/face/db-old";
import type { Person } from "services/face/people";
import { EnteFile } from "types/file";

View File

@@ -1,5 +1,6 @@
import { blobCache } from "@/next/blob-cache";
import type { Box, Face, FaceAlignment } from "./types";
import type { Box } from "./types";
import type { Face, FaceAlignment } from "./types-old";
export const saveFaceCrop = async (imageBitmap: ImageBitmap, face: Face) => {
const faceCrop = extractFaceCrop(imageBitmap, face.alignment);

View File

@@ -0,0 +1,387 @@
import { haveWindow } from "@/next/env";
import log from "@/next/log";
import {
DBSchema,
IDBPDatabase,
IDBPTransaction,
StoreNames,
deleteDB,
openDB,
} from "idb";
import isElectron from "is-electron";
import type { Person } from "services/face/people";
import type { MlFileData } from "services/face/types-old";
import {
DEFAULT_ML_SEARCH_CONFIG,
MAX_ML_SYNC_ERROR_COUNT,
} from "services/machineLearning/machineLearningService";
export interface IndexStatus {
outOfSyncFilesExists: boolean;
nSyncedFiles: number;
nTotalFiles: number;
localFilesSynced: boolean;
peopleIndexSynced: boolean;
}
/**
* TODO(MR): Transient type with an intersection of values that both existing
* and new types during the migration will have. Eventually we'll store the the
* server ML data shape here exactly.
*/
export interface MinimalPersistedFileData {
fileId: number;
mlVersion: number;
errorCount: number;
faces?: { personId?: number; id: string }[];
}
interface Config {}
export const ML_SEARCH_CONFIG_NAME = "ml-search";
const MLDATA_DB_NAME = "mldata";
interface MLDb extends DBSchema {
files: {
key: number;
value: MinimalPersistedFileData;
indexes: { mlVersion: [number, number] };
};
people: {
key: number;
value: Person;
};
// Unused, we only retain this is the schema so that we can delete it during
// migration.
things: {
key: number;
value: unknown;
};
versions: {
key: string;
value: number;
};
library: {
key: string;
value: unknown;
};
configs: {
key: string;
value: Config;
};
}
class MLIDbStorage {
public _db: Promise<IDBPDatabase<MLDb>>;
constructor() {
if (!haveWindow() || !isElectron()) {
return;
}
this.db;
}
private openDB(): Promise<IDBPDatabase<MLDb>> {
return openDB<MLDb>(MLDATA_DB_NAME, 4, {
terminated: async () => {
log.error("ML Indexed DB terminated");
this._db = undefined;
// TODO: remove if there is chance of this going into recursion in some case
await this.db;
},
blocked() {
// TODO: make sure we dont allow multiple tabs of app
log.error("ML Indexed DB blocked");
},
blocking() {
// TODO: make sure we dont allow multiple tabs of app
log.error("ML Indexed DB blocking");
},
async upgrade(db, oldVersion, newVersion, tx) {
let wasMLSearchEnabled = false;
try {
const searchConfig: unknown = await tx
.objectStore("configs")
.get(ML_SEARCH_CONFIG_NAME);
if (
searchConfig &&
typeof searchConfig == "object" &&
"enabled" in searchConfig &&
typeof searchConfig.enabled == "boolean"
) {
wasMLSearchEnabled = searchConfig.enabled;
}
} catch (e) {
// The configs store might not exist (e.g. during logout).
// Ignore.
}
log.info(
`Previous ML database v${oldVersion} had ML search ${wasMLSearchEnabled ? "enabled" : "disabled"}`,
);
if (oldVersion < 1) {
const filesStore = db.createObjectStore("files", {
keyPath: "fileId",
});
filesStore.createIndex("mlVersion", [
"mlVersion",
"errorCount",
]);
db.createObjectStore("people", {
keyPath: "id",
});
db.createObjectStore("things", {
keyPath: "id",
});
db.createObjectStore("versions");
db.createObjectStore("library");
}
if (oldVersion < 2) {
// TODO: update configs if version is updated in defaults
db.createObjectStore("configs");
/*
await tx
.objectStore("configs")
.add(
DEFAULT_ML_SYNC_JOB_CONFIG,
"ml-sync-job",
);
await tx
.objectStore("configs")
.add(DEFAULT_ML_SYNC_CONFIG, ML_SYNC_CONFIG_NAME);
*/
}
if (oldVersion < 3) {
await tx
.objectStore("configs")
.add(DEFAULT_ML_SEARCH_CONFIG, ML_SEARCH_CONFIG_NAME);
}
/*
This'll go in version 5. Note that version 4 was never released,
but it was in main for a while, so we'll just skip it to avoid
breaking the upgrade path for people who ran the mainline.
*/
if (oldVersion < 4) {
/*
try {
await tx
.objectStore("configs")
.delete(ML_SEARCH_CONFIG_NAME);
await tx
.objectStore("configs")
.delete(""ml-sync"");
await tx
.objectStore("configs")
.delete("ml-sync-job");
await tx
.objectStore("configs")
.add(
{ enabled: wasMLSearchEnabled },
ML_SEARCH_CONFIG_NAME,
);
db.deleteObjectStore("library");
db.deleteObjectStore("things");
} catch {
// TODO: ignore for now as we finalize the new version
// the shipped implementation should have a more
// deterministic migration.
}
*/
}
log.info(
`ML DB upgraded from version ${oldVersion} to version ${newVersion}`,
);
},
});
}
public get db(): Promise<IDBPDatabase<MLDb>> {
if (!this._db) {
this._db = this.openDB();
log.info("Opening Ml DB");
}
return this._db;
}
public async clearMLDB() {
const db = await this.db;
db.close();
await deleteDB(MLDATA_DB_NAME);
log.info("Cleared Ml DB");
this._db = undefined;
await this.db;
}
public async getAllFileIdsForUpdate(
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
return tx.store.getAllKeys();
}
public async getFileIds(
count: number,
limitMlVersion: number,
maxErrorCount: number,
) {
const db = await this.db;
const tx = db.transaction("files", "readonly");
const index = tx.store.index("mlVersion");
let cursor = await index.openKeyCursor(
IDBKeyRange.upperBound([limitMlVersion], true),
);
const fileIds: number[] = [];
while (cursor && fileIds.length < count) {
if (
cursor.key[0] < limitMlVersion &&
cursor.key[1] <= maxErrorCount
) {
fileIds.push(cursor.primaryKey);
}
cursor = await cursor.continue();
}
await tx.done;
return fileIds;
}
public async getFile(fileId: number): Promise<MinimalPersistedFileData> {
const db = await this.db;
return db.get("files", fileId);
}
public async putFile(mlFile: MlFileData) {
const db = await this.db;
return db.put("files", mlFile);
}
public async upsertFileInTx(
fileId: number,
upsert: (mlFile: MinimalPersistedFileData) => MinimalPersistedFileData,
) {
const db = await this.db;
const tx = db.transaction("files", "readwrite");
const existing = await tx.store.get(fileId);
const updated = upsert(existing);
await tx.store.put(updated);
await tx.done;
return updated;
}
public async putAllFiles(
mlFiles: MinimalPersistedFileData[],
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
await Promise.all(mlFiles.map((mlFile) => tx.store.put(mlFile)));
}
public async removeAllFiles(
fileIds: Array<number>,
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
await Promise.all(fileIds.map((fileId) => tx.store.delete(fileId)));
}
public async getPerson(id: number) {
const db = await this.db;
return db.get("people", id);
}
public async getAllPeople() {
const db = await this.db;
return db.getAll("people");
}
public async incrementIndexVersion(index: StoreNames<MLDb>) {
if (index === "versions") {
throw new Error("versions store can not be versioned");
}
const db = await this.db;
const tx = db.transaction(["versions", index], "readwrite");
let version = await tx.objectStore("versions").get(index);
version = (version || 0) + 1;
tx.objectStore("versions").put(version, index);
await tx.done;
return version;
}
public async getConfig<T extends Config>(name: string, def: T) {
const db = await this.db;
const tx = db.transaction("configs", "readwrite");
let config = (await tx.store.get(name)) as T;
if (!config) {
config = def;
await tx.store.put(def, name);
}
await tx.done;
return config;
}
public async putConfig(name: string, data: Config) {
const db = await this.db;
return db.put("configs", data, name);
}
public async getIndexStatus(latestMlVersion: number): Promise<IndexStatus> {
const db = await this.db;
const tx = db.transaction(["files", "versions"], "readonly");
const mlVersionIdx = tx.objectStore("files").index("mlVersion");
let outOfSyncCursor = await mlVersionIdx.openKeyCursor(
IDBKeyRange.upperBound([latestMlVersion], true),
);
let outOfSyncFilesExists = false;
while (outOfSyncCursor && !outOfSyncFilesExists) {
if (
outOfSyncCursor.key[0] < latestMlVersion &&
outOfSyncCursor.key[1] <= MAX_ML_SYNC_ERROR_COUNT
) {
outOfSyncFilesExists = true;
}
outOfSyncCursor = await outOfSyncCursor.continue();
}
const nSyncedFiles = await mlVersionIdx.count(
IDBKeyRange.lowerBound([latestMlVersion]),
);
const nTotalFiles = await mlVersionIdx.count();
const filesIndexVersion = await tx.objectStore("versions").get("files");
const peopleIndexVersion = await tx
.objectStore("versions")
.get("people");
const filesIndexVersionExists =
filesIndexVersion !== null && filesIndexVersion !== undefined;
const peopleIndexVersionExists =
peopleIndexVersion !== null && peopleIndexVersion !== undefined;
await tx.done;
return {
outOfSyncFilesExists,
nSyncedFiles,
nTotalFiles,
localFilesSynced: filesIndexVersionExists,
peopleIndexSynced:
peopleIndexVersionExists &&
peopleIndexVersion === filesIndexVersion,
};
}
}
export default new MLIDbStorage();

View File

@@ -1,387 +1,233 @@
import { haveWindow } from "@/next/env";
import log from "@/next/log";
import {
DBSchema,
IDBPDatabase,
IDBPTransaction,
StoreNames,
deleteDB,
openDB,
} from "idb";
import isElectron from "is-electron";
import type { Person } from "services/face/people";
import type { MlFileData } from "services/face/types";
import {
DEFAULT_ML_SEARCH_CONFIG,
MAX_ML_SYNC_ERROR_COUNT,
} from "services/machineLearning/machineLearningService";
import { deleteDB, openDB, type DBSchema } from "idb";
import type { FaceIndex } from "./types";
export interface IndexStatus {
outOfSyncFilesExists: boolean;
nSyncedFiles: number;
nTotalFiles: number;
localFilesSynced: boolean;
peopleIndexSynced: boolean;
/**
* [Note: Face DB schema]
*
* There "face" database is made of two object stores:
*
* - "face-index": Contains {@link FaceIndex} objects, either indexed locally or
* fetched from remote storage.
*
* - "file-status": Contains {@link FileStatus} objects, one for each
* {@link EnteFile} that the current client knows about.
*
* Both the stores are keyed by {@link fileID}, and are expected to contain the
* exact same set of {@link fileID}s. The face-index can be thought of as the
* "original" indexing result, whilst file-status bookkeeps information about
* the indexing process (whether or not a file needs indexing, or if there were
* errors doing so).
*
* In tandem, these serve as the underlying storage for the functions exposed by
* this file.
*/
interface FaceDBSchema extends DBSchema {
"face-index": {
key: number;
value: FaceIndex;
};
"file-status": {
key: number;
value: FileStatus;
indexes: { isIndexable: number };
};
}
interface FileStatus {
/** The ID of the {@link EnteFile} whose indexing status we represent. */
fileID: number;
/**
* `1` if this file needs to be indexed, `0` otherwise.
*
* > Somewhat confusingly, we also have a (IndexedDB) "index" on this field.
* That (IDB) index allows us to efficiently select {@link fileIDs} that
* still need indexing (i.e. entries where {@link isIndexed} is `1`).
*
* [Note: Boolean IndexedDB indexes].
*
* IndexedDB does not (currently) supported indexes on boolean fields.
* https://github.com/w3c/IndexedDB/issues/76
*
* As a workaround, we use numeric fields where `0` denotes `false` and `1`
* denotes `true`.
*/
isIndexable: number;
/**
* The number of times attempts to index this file failed.
*
* This is guaranteed to be `0` for files which have already been
* sucessfully indexed (i.e. files for which `isIndexable` is 0 and which
* have a corresponding entry in the "face-index" object store).
*/
failureCount: number;
}
/**
* TODO(MR): Transient type with an intersection of values that both existing
* and new types during the migration will have. Eventually we'll store the the
* server ML data shape here exactly.
* A promise to the face DB.
*
* We open the database once (lazily), and thereafter save and reuse the promise
* each time something wants to connect to it.
*
* This promise can subsequently get cleared if we need to relinquish our
* connection (e.g. if another client wants to open the face DB with a newer
* version of the schema).
*
* Note that this is module specific state, so the main thread and each worker
* thread that calls the functions in this module will have their own promises.
* To ensure that all connections get torn down correctly, we need to call
* {@link closeFaceDBConnectionsIfNeeded} from both the main thread and all the
* worker threads that use this module.
*/
export interface MinimalPersistedFileData {
fileId: number;
mlVersion: number;
errorCount: number;
faces?: { personId?: number; id: string }[];
}
let _faceDB: ReturnType<typeof openFaceDB> | undefined;
interface Config {}
const openFaceDB = async () => {
const db = await openDB<FaceDBSchema>("face", 1, {
upgrade(db, oldVersion, newVersion) {
log.info(`Upgrading face DB ${oldVersion} => ${newVersion}`);
if (oldVersion < 1) {
db.createObjectStore("face-index", { keyPath: "fileID" });
db.createObjectStore("file-status", {
keyPath: "fileID",
}).createIndex("isIndexable", "isIndexable");
}
},
blocking() {
log.info(
"Another client is attempting to open a new version of face DB",
);
db.close();
_faceDB = undefined;
},
blocked() {
log.warn(
"Waiting for an existing client to close their connection so that we can update the face DB version",
);
},
terminated() {
log.warn("Our connection to face DB was unexpectedly terminated");
_faceDB = undefined;
},
});
return db;
};
export const ML_SEARCH_CONFIG_NAME = "ml-search";
/**
* @returns a lazily created, cached connection to the face DB.
*/
const faceDB = () => (_faceDB ??= openFaceDB());
const MLDATA_DB_NAME = "mldata";
interface MLDb extends DBSchema {
files: {
key: number;
value: MinimalPersistedFileData;
indexes: { mlVersion: [number, number] };
};
people: {
key: number;
value: Person;
};
// Unused, we only retain this is the schema so that we can delete it during
// migration.
things: {
key: number;
value: unknown;
};
versions: {
key: string;
value: number;
};
library: {
key: string;
value: unknown;
};
configs: {
key: string;
value: Config;
};
}
class MLIDbStorage {
public _db: Promise<IDBPDatabase<MLDb>>;
constructor() {
if (!haveWindow() || !isElectron()) {
return;
}
this.db;
/**
* Close the face DB connection (if any) opened by this module.
*
* To ensure proper teardown of the DB connections, this function must be called
* at least once by any execution context that has called any of the other
* functions in this module.
*/
export const closeFaceDBConnectionsIfNeeded = async () => {
try {
if (_faceDB) (await _faceDB).close();
} finally {
_faceDB = undefined;
}
};
private openDB(): Promise<IDBPDatabase<MLDb>> {
return openDB<MLDb>(MLDATA_DB_NAME, 4, {
terminated: async () => {
log.error("ML Indexed DB terminated");
this._db = undefined;
// TODO: remove if there is chance of this going into recursion in some case
await this.db;
},
blocked() {
// TODO: make sure we dont allow multiple tabs of app
log.error("ML Indexed DB blocked");
},
blocking() {
// TODO: make sure we dont allow multiple tabs of app
log.error("ML Indexed DB blocking");
},
async upgrade(db, oldVersion, newVersion, tx) {
let wasMLSearchEnabled = false;
try {
const searchConfig: unknown = await tx
.objectStore("configs")
.get(ML_SEARCH_CONFIG_NAME);
if (
searchConfig &&
typeof searchConfig == "object" &&
"enabled" in searchConfig &&
typeof searchConfig.enabled == "boolean"
) {
wasMLSearchEnabled = searchConfig.enabled;
}
} catch (e) {
// The configs store might not exist (e.g. during logout).
// Ignore.
}
log.info(
`Previous ML database v${oldVersion} had ML search ${wasMLSearchEnabled ? "enabled" : "disabled"}`,
);
/**
* Clear any data stored by the face module.
*
* Meant to be called during logout.
*/
export const clearFaceData = async () => {
await closeFaceDBConnectionsIfNeeded();
return deleteDB("face", {
blocked() {
log.warn(
"Waiting for an existing client to close their connection so that we can delete the face DB",
);
},
});
};
if (oldVersion < 1) {
const filesStore = db.createObjectStore("files", {
keyPath: "fileId",
});
filesStore.createIndex("mlVersion", [
"mlVersion",
"errorCount",
]);
/**
* Save the given {@link faceIndex} locally.
*
* @param faceIndex A {@link FaceIndex} representing the faces that we detected
* (and their corresponding embeddings) in some file.
*
* This function adds a new entry, overwriting any existing ones (No merging is
* performed, the existing entry is unconditionally overwritten).
*/
export const saveFaceIndex = async (faceIndex: FaceIndex) => {
const db = await faceDB();
const tx = db.transaction(["face-index", "file-status"], "readwrite");
const indexStore = tx.objectStore("face-index");
const statusStore = tx.objectStore("file-status");
return Promise.all([
indexStore.put(faceIndex),
statusStore.put({
fileID: faceIndex.fileID,
isIndexable: 0,
failureCount: 0,
}),
tx.done,
]);
};
db.createObjectStore("people", {
keyPath: "id",
});
db.createObjectStore("things", {
keyPath: "id",
});
db.createObjectStore("versions");
db.createObjectStore("library");
}
if (oldVersion < 2) {
// TODO: update configs if version is updated in defaults
db.createObjectStore("configs");
/*
await tx
.objectStore("configs")
.add(
DEFAULT_ML_SYNC_JOB_CONFIG,
"ml-sync-job",
);
await tx
.objectStore("configs")
.add(DEFAULT_ML_SYNC_CONFIG, ML_SYNC_CONFIG_NAME);
*/
}
if (oldVersion < 3) {
await tx
.objectStore("configs")
.add(DEFAULT_ML_SEARCH_CONFIG, ML_SEARCH_CONFIG_NAME);
}
/*
This'll go in version 5. Note that version 4 was never released,
but it was in main for a while, so we'll just skip it to avoid
breaking the upgrade path for people who ran the mainline.
*/
if (oldVersion < 4) {
/*
try {
await tx
.objectStore("configs")
.delete(ML_SEARCH_CONFIG_NAME);
await tx
.objectStore("configs")
.delete(""ml-sync"");
await tx
.objectStore("configs")
.delete("ml-sync-job");
await tx
.objectStore("configs")
.add(
{ enabled: wasMLSearchEnabled },
ML_SEARCH_CONFIG_NAME,
);
db.deleteObjectStore("library");
db.deleteObjectStore("things");
} catch {
// TODO: ignore for now as we finalize the new version
// the shipped implementation should have a more
// deterministic migration.
}
*/
}
log.info(
`ML DB upgraded from version ${oldVersion} to version ${newVersion}`,
);
},
/**
* Record the existence of a file so that entities in the face indexing universe
* know about it (e.g. can index it if it is new and it needs indexing).
*
* @param fileID The ID of an {@link EnteFile}.
*
* This function does not overwrite existing entries. If an entry already exists
* for the given {@link fileID} (e.g. if it was indexed and
* {@link saveFaceIndex} called with the result), its existing status remains
* unperturbed.
*/
export const addFileEntry = async (fileID: number) => {
const db = await faceDB();
const tx = db.transaction("file-status", "readwrite");
if ((await tx.store.getKey(fileID)) === undefined) {
await tx.store.put({
fileID,
isIndexable: 1,
failureCount: 0,
});
}
return tx.done;
};
public get db(): Promise<IDBPDatabase<MLDb>> {
if (!this._db) {
this._db = this.openDB();
log.info("Opening Ml DB");
}
/**
* Return a list of fileIDs that need to be indexed.
*
* This list is from the universe of the file IDs that the face DB knows about
* (can use {@link addFileEntry} to inform it about new files). From this
* universe, we filter out fileIDs the files corresponding to which have already
* been indexed, or for which we attempted indexing but failed.
*/
export const unindexedFileIDs = async () => {
const db = await faceDB();
const tx = db.transaction("file-status", "readonly");
return tx.store.index("isIndexable").getAllKeys(IDBKeyRange.only(1));
};
return this._db;
}
public async clearMLDB() {
const db = await this.db;
db.close();
await deleteDB(MLDATA_DB_NAME);
log.info("Cleared Ml DB");
this._db = undefined;
await this.db;
}
public async getAllFileIdsForUpdate(
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
return tx.store.getAllKeys();
}
public async getFileIds(
count: number,
limitMlVersion: number,
maxErrorCount: number,
) {
const db = await this.db;
const tx = db.transaction("files", "readonly");
const index = tx.store.index("mlVersion");
let cursor = await index.openKeyCursor(
IDBKeyRange.upperBound([limitMlVersion], true),
);
const fileIds: number[] = [];
while (cursor && fileIds.length < count) {
if (
cursor.key[0] < limitMlVersion &&
cursor.key[1] <= maxErrorCount
) {
fileIds.push(cursor.primaryKey);
}
cursor = await cursor.continue();
}
await tx.done;
return fileIds;
}
public async getFile(fileId: number): Promise<MinimalPersistedFileData> {
const db = await this.db;
return db.get("files", fileId);
}
public async putFile(mlFile: MlFileData) {
const db = await this.db;
return db.put("files", mlFile);
}
public async upsertFileInTx(
fileId: number,
upsert: (mlFile: MinimalPersistedFileData) => MinimalPersistedFileData,
) {
const db = await this.db;
const tx = db.transaction("files", "readwrite");
const existing = await tx.store.get(fileId);
const updated = upsert(existing);
await tx.store.put(updated);
await tx.done;
return updated;
}
public async putAllFiles(
mlFiles: MinimalPersistedFileData[],
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
await Promise.all(mlFiles.map((mlFile) => tx.store.put(mlFile)));
}
public async removeAllFiles(
fileIds: Array<number>,
tx: IDBPTransaction<MLDb, ["files"], "readwrite">,
) {
await Promise.all(fileIds.map((fileId) => tx.store.delete(fileId)));
}
public async getPerson(id: number) {
const db = await this.db;
return db.get("people", id);
}
public async getAllPeople() {
const db = await this.db;
return db.getAll("people");
}
public async incrementIndexVersion(index: StoreNames<MLDb>) {
if (index === "versions") {
throw new Error("versions store can not be versioned");
}
const db = await this.db;
const tx = db.transaction(["versions", index], "readwrite");
let version = await tx.objectStore("versions").get(index);
version = (version || 0) + 1;
tx.objectStore("versions").put(version, index);
await tx.done;
return version;
}
public async getConfig<T extends Config>(name: string, def: T) {
const db = await this.db;
const tx = db.transaction("configs", "readwrite");
let config = (await tx.store.get(name)) as T;
if (!config) {
config = def;
await tx.store.put(def, name);
}
await tx.done;
return config;
}
public async putConfig(name: string, data: Config) {
const db = await this.db;
return db.put("configs", data, name);
}
public async getIndexStatus(latestMlVersion: number): Promise<IndexStatus> {
const db = await this.db;
const tx = db.transaction(["files", "versions"], "readonly");
const mlVersionIdx = tx.objectStore("files").index("mlVersion");
let outOfSyncCursor = await mlVersionIdx.openKeyCursor(
IDBKeyRange.upperBound([latestMlVersion], true),
);
let outOfSyncFilesExists = false;
while (outOfSyncCursor && !outOfSyncFilesExists) {
if (
outOfSyncCursor.key[0] < latestMlVersion &&
outOfSyncCursor.key[1] <= MAX_ML_SYNC_ERROR_COUNT
) {
outOfSyncFilesExists = true;
}
outOfSyncCursor = await outOfSyncCursor.continue();
}
const nSyncedFiles = await mlVersionIdx.count(
IDBKeyRange.lowerBound([latestMlVersion]),
);
const nTotalFiles = await mlVersionIdx.count();
const filesIndexVersion = await tx.objectStore("versions").get("files");
const peopleIndexVersion = await tx
.objectStore("versions")
.get("people");
const filesIndexVersionExists =
filesIndexVersion !== null && filesIndexVersion !== undefined;
const peopleIndexVersionExists =
peopleIndexVersion !== null && peopleIndexVersion !== undefined;
await tx.done;
return {
outOfSyncFilesExists,
nSyncedFiles,
nTotalFiles,
localFilesSynced: filesIndexVersionExists,
peopleIndexSynced:
peopleIndexVersionExists &&
peopleIndexVersion === filesIndexVersion,
};
}
}
export default new MLIDbStorage();
/**
* Increment the failure count associated with the given {@link fileID}.
*
* @param fileID The ID of an {@link EnteFile}.
*
* If an entry does not exist yet for the given file, then a new one is created
* and its failure count is set to 1. Otherwise the failure count of the
* existing entry is incremented.
*/
export const markIndexingFailed = async (fileID: number) => {
const db = await faceDB();
const tx = db.transaction("file-status", "readwrite");
const failureCount = ((await tx.store.get(fileID)).failureCount ?? 0) + 1;
await tx.store.put({
fileID,
isIndexable: 0,
failureCount,
});
return tx.done;
};

View File

@@ -2,14 +2,6 @@ import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import { workerBridge } from "@/next/worker/worker-bridge";
import { Matrix } from "ml-matrix";
import type {
Box,
Dimensions,
Face,
FaceAlignment,
FaceDetection,
MlFileData,
} from "services/face/types";
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
import { getSimilarityTransformation } from "similarity-transformation";
import {
@@ -28,6 +20,13 @@ import {
pixelRGBBilinear,
warpAffineFloat32List,
} from "./image";
import type { Box, Dimensions } from "./types";
import type {
Face,
FaceAlignment,
FaceDetection,
MlFileData,
} from "./types-old";
/**
* Index faces in the given file.

View File

@@ -2,7 +2,8 @@ import log from "@/next/log";
import ComlinkCryptoWorker from "@ente/shared/crypto";
import { putEmbedding } from "services/embeddingService";
import type { EnteFile } from "types/file";
import type { Face, FaceDetection, MlFileData, Point } from "./types";
import type { Point } from "./types";
import type { Face, FaceDetection, MlFileData } from "./types-old";
export const putFaceEmbedding = async (
enteFile: EnteFile,

View File

@@ -0,0 +1,46 @@
import type { Box, Dimensions, Point } from "./types";
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Point[];
probability?: number;
}
export interface FaceAlignment {
/**
* An affine transformation matrix (rotation, translation, scaling) to align
* the face extracted from the image.
*/
affineMatrix: number[][];
/**
* The bounding box of the transformed box.
*
* The affine transformation shifts the original detection box a new,
* transformed, box (possibily rotated). This property is the bounding box
* of that transformed box. It is in the coordinate system of the original,
* full, image on which the detection occurred.
*/
boundingBox: Box;
}
export interface Face {
fileId: number;
detection: FaceDetection;
id: string;
alignment?: FaceAlignment;
blurValue?: number;
embedding?: Float32Array;
personId?: number;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageDimensions?: Dimensions;
mlVersion: number;
errorCount: number;
}

View File

@@ -1,3 +1,139 @@
/**
* The faces in a file (and an embedding for each of them).
*
* This interface describes the format of both local and remote face data.
*
* - Local face detections and embeddings (collectively called as the face
* index) are generated by the current client when uploading a file (or when
* noticing a file which doesn't yet have a face index), stored in the local
* IndexedDB ("face/db") and also uploaded (E2EE) to remote.
*
* - Remote embeddings are fetched by subsequent clients to avoid them having to
* reindex (indexing faces is a costly operation, esp for mobile clients).
*
* In both these scenarios (whether generated locally or fetched from remote),
* we end up with an face index described by this {@link FaceIndex} interface.
*
* It has a top level envelope with information about the file (in particular
* the primary key {@link fileID}), an inner envelope {@link faceEmbedding} with
* metadata about the indexing, and an array of {@link faces} each containing
* the result of a face detection and an embedding for that detected face.
*
* The word embedding is used to refer two things: The last one (faceEmbedding >
* faces > embedding) is the "actual" embedding, but sometimes we colloquially
* refer to the inner envelope (the "faceEmbedding") also an embedding since a
* file can have other types of embedding (envelopes), e.g. a "clipEmbedding".
*/
export interface FaceIndex {
/**
* The ID of the {@link EnteFile} whose index this is.
*
* This is used as the primary key when storing the index locally (An
* {@link EnteFile} is guaranteed to have its fileID be unique in the
* namespace of the user. Even if someone shares a file with the user the
* user will get a file entry with a fileID unique to them).
*/
fileID: number;
/**
* The width (in px) of the image (file).
*/
width: number;
/**
* The height (in px) of the image (file).
*/
height: number;
/**
* The "face embedding" for the file.
*
* This is an envelope that contains a list of indexed faces and metadata
* about the indexing.
*/
faceEmbedding: {
/**
* An integral version number of the indexing algorithm / pipeline.
*
* Clients agree out of band what a particular version means. The
* guarantee is that an embedding with a particular version will be the
* same (to negligible floating point epsilons) irrespective of the
* client that indexed the file.
*/
version: number;
/** The UA for the client which generated this embedding. */
client: string;
/** The list of faces (and their embeddings) detected in the file. */
faces: Face[];
};
}
/**
* A face detected in a file, and an embedding for this detected face.
*
* During face indexing, we first detect all the faces in a particular file.
* Then for each such detected region, we compute an embedding of that part of
* the file. Together, this detection region and the emedding travel together in
* this {@link Face} interface.
*/
export interface Face {
/**
* A unique identifier for the face.
*
* This ID is guaranteed to be unique for all the faces detected in all the
* files for the user. In particular, each file can have multiple faces but
* they all will get their own unique {@link faceID}.
*/
faceID: string;
/**
* The face detection. Describes the region within the image that was
* detected to be a face, and a set of landmarks (e.g. "eyes") of the
* detection.
*
* All coordinates are relative to and normalized by the image's dimension,
* i.e. they have been normalized to lie between 0 and 1, with 0 being the
* left (or top) and 1 being the width (or height) of the image.
*/
detection: {
/**
* The region within the image that contains the face.
*
* All coordinates and sizes are between 0 and 1, normalized by the
* dimensions of the image.
* */
box: Box;
/**
* Face "landmarks", e.g. eyes.
*
* The exact landmarks and their order depends on the face detection
* algorithm being used.
*
* The coordinatesare between 0 and 1, normalized by the dimensions of
* the image.
*/
landmarks: Point[];
};
/**
* An correctness probability (0 to 1) that the face detection algorithm
* gave to the detection. Higher values are better.
*/
score: number;
/**
* The computed blur for the detected face.
*
* The exact semantics and range for these (floating point) values depend on
* the face indexing algorithm / pipeline version being used.
* */
blur: number;
/**
* An embedding for the face.
*
* This is an opaque numeric (signed floating point) vector whose semantics
* and length depend on the version of the face indexing algorithm /
* pipeline that we are using. However, within a set of embeddings with the
* same version, the property is that two such embedding vectors will be
* "cosine similar" to each other if they are both faces of the same person.
*/
embedding: number[];
}
/** The x and y coordinates of a point. */
export interface Point {
x: number;
@@ -21,48 +157,3 @@ export interface Box {
/** The height of the box. */
height: number;
}
export interface FaceDetection {
// box and landmarks is relative to image dimentions stored at mlFileData
box: Box;
landmarks?: Point[];
probability?: number;
}
export interface FaceAlignment {
/**
* An affine transformation matrix (rotation, translation, scaling) to align
* the face extracted from the image.
*/
affineMatrix: number[][];
/**
* The bounding box of the transformed box.
*
* The affine transformation shifts the original detection box a new,
* transformed, box (possibily rotated). This property is the bounding box
* of that transformed box. It is in the coordinate system of the original,
* full, image on which the detection occurred.
*/
boundingBox: Box;
}
export interface Face {
fileId: number;
detection: FaceDetection;
id: string;
alignment?: FaceAlignment;
blurValue?: number;
embedding?: Float32Array;
personId?: number;
}
export interface MlFileData {
fileId: number;
faces?: Face[];
imageDimensions?: Dimensions;
mlVersion: number;
errorCount: number;
}

View File

@@ -3,6 +3,7 @@ import { accountLogout } from "@ente/accounts/services/logout";
import { clipService } from "services/clip-service";
import DownloadManager from "./download";
import exportService from "./export";
import { clearFaceData } from "./face/db";
import mlWorkManager from "./machineLearning/mlWorkManager";
/**
@@ -13,18 +14,21 @@ import mlWorkManager from "./machineLearning/mlWorkManager";
* See: [Note: Do not throw during logout].
*/
export const photosLogout = async () => {
const ignoreError = (label: string, e: unknown) =>
log.error(`Ignoring error during logout (${label})`, e);
await accountLogout();
try {
await DownloadManager.logout();
} catch (e) {
log.error("Ignoring error during logout (download)", e);
ignoreError("download", e);
}
try {
await clipService.logout();
} catch (e) {
log.error("Ignoring error during logout (CLIP)", e);
ignoreError("CLIP", e);
}
const electron = globalThis.electron;
@@ -32,19 +36,25 @@ export const photosLogout = async () => {
try {
await mlWorkManager.logout();
} catch (e) {
log.error("Ignoring error during logout (ML)", e);
ignoreError("ML", e);
}
try {
await clearFaceData();
} catch (e) {
ignoreError("face", e);
}
try {
exportService.disableContinuousExport();
} catch (e) {
log.error("Ignoring error during logout (export)", e);
ignoreError("export", e);
}
try {
await electron?.logout();
} catch (e) {
log.error("Ignoring error during logout (electron)", e);
ignoreError("electron", e);
}
}
};

View File

@@ -4,7 +4,7 @@ import PQueue from "p-queue";
import mlIDbStorage, {
ML_SEARCH_CONFIG_NAME,
type MinimalPersistedFileData,
} from "services/face/db";
} from "services/face/db-old";
import { putFaceEmbedding } from "services/face/remote";
import { getLocalFiles } from "services/fileService";
import { EnteFile } from "types/file";

View File

@@ -8,7 +8,7 @@ import { getToken, getUserID } from "@ente/shared/storage/localStorage/helpers";
import debounce from "debounce";
import PQueue from "p-queue";
import { createFaceComlinkWorker } from "services/face";
import mlIDbStorage from "services/face/db";
import mlIDbStorage from "services/face/db-old";
import type { DedicatedMLWorker } from "services/face/face.worker";
import { EnteFile } from "types/file";

View File

@@ -2,7 +2,7 @@ import { FILE_TYPE } from "@/media/file-type";
import log from "@/next/log";
import * as chrono from "chrono-node";
import { t } from "i18next";
import mlIDbStorage from "services/face/db";
import mlIDbStorage from "services/face/db-old";
import type { Person } from "services/face/people";
import { defaultMLVersion } from "services/machineLearning/machineLearningService";
import { Collection } from "types/collection";

View File

@@ -1,5 +1,5 @@
import { FILE_TYPE } from "@/media/file-type";
import { IndexStatus } from "services/face/db";
import { IndexStatus } from "services/face/db-old";
import type { Person } from "services/face/people";
import { City } from "services/locationSearchService";
import { LocationTagData } from "types/entity";

View File

@@ -161,11 +161,16 @@ some cases.
- [heic-convert](https://github.com/catdad-experiments/heic-convert) is used
for converting HEIC files (which browsers don't natively support) into JPEG.
## Processing
## General
- [comlink](https://github.com/GoogleChromeLabs/comlink) provides a minimal
layer on top of Web Workers to make them more easier to use.
- [idb](https://github.com/jakearchibald/idb) provides a promise API over the
browser-native IndexedDB APIs.
> For more details about IDB and its role, see [storage.md](storage.md).
## Photos app specific
- [react-dropzone](https://github.com/react-dropzone/react-dropzone/) is a

View File

@@ -1,9 +1,15 @@
# Storage
## Session Storage
Data tied to the browser tab's lifetime.
We store the user's encryption key here.
## Local Storage
Data in the local storage is persisted even after the user closes the tab (or
the browser itself). This is in contrast with session storage, where the data is
Data in the local storage is persisted even after the user closes the tab, or
the browser itself. This is in contrast with session storage, where the data is
cleared when the browser tab is closed.
The data in local storage is tied to the Document's origin (scheme + host).
@@ -15,19 +21,22 @@ Some things that get stored here are:
- Various user preferences
## Session Storage
## IndexedDB
Data tied to the browser tab's lifetime.
IndexedDB is a transactional NoSQL store provided by browsers. It has quite
large storage limits, and data is stored per origin (and remains persistent
across tab restarts).
We store the user's encryption key here.
Older code used the LocalForage library for storing things in Indexed DB. This
library falls back to localStorage in case Indexed DB storage is not available.
## Indexed DB
Newer code uses the idb library which provides a promise API over the IndexedDB,
but otherwise does not introduce any new abstractions.
We use the LocalForage library for storing things in Indexed DB. This library
falls back to localStorage in case Indexed DB storage is not available.
For more details, see:
Indexed DB allows for larger sizes than local/session storage, and is generally
meant for larger, tabular data.
- https://web.dev/articles/indexeddb
- https://github.com/jakearchibald/idb
## OPFS

View File

@@ -17,34 +17,37 @@ import { logout as remoteLogout } from "../api/user";
* gets in an unexpected state.
*/
export const accountLogout = async () => {
const ignoreError = (label: string, e: unknown) =>
log.error(`Ignoring error during logout (${label})`, e);
try {
await remoteLogout();
} catch (e) {
log.error("Ignoring error during logout (remote)", e);
ignoreError("remote", e);
}
try {
InMemoryStore.clear();
} catch (e) {
log.error("Ignoring error during logout (in-memory store)", e);
ignoreError("in-memory store", e);
}
try {
clearKeys();
} catch (e) {
log.error("Ignoring error during logout (session store)", e);
ignoreError("session store", e);
}
try {
clearData();
} catch (e) {
log.error("Ignoring error during logout (local storage)", e);
ignoreError("local storage", e);
}
try {
await localForage.clear();
} catch (e) {
log.error("Ignoring error during logout (local forage)", e);
ignoreError("local forage", e);
}
try {
await clearBlobCaches();
} catch (e) {
log.error("Ignoring error during logout (cache)", e);
ignoreError("cache", e);
}
};

View File

@@ -2922,10 +2922,10 @@ i18next@^23.10:
dependencies:
"@babel/runtime" "^7.23.2"
idb@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/idb/-/idb-7.1.1.tgz#d910ded866d32c7ced9befc5bfdf36f572ced72b"
integrity sha512-gchesWBzyvGHRO9W8tzUWFDycow5gwjvFKfyV9FF32Y7F50yZMp7mP+T2mJIWFx49zicqyC4uefHM17o6xKIVQ==
idb@^8:
version "8.0.0"
resolved "https://registry.yarnpkg.com/idb/-/idb-8.0.0.tgz#33d7ed894ed36e23bcb542fb701ad579bfaad41f"
integrity sha512-l//qvlAKGmQO31Qn7xdzagVPPaHTxXx199MhrAFuVBTPqydcPYBWjkrbv4Y0ktB+GmWOiwHl237UUOrLmQxLvw==
ieee754@^1.2.1:
version "1.2.1"