[desktop] Clustering - Part 2/x (#2647)

This commit is contained in:
Manav Rathi
2024-08-10 09:33:25 +05:30
committed by GitHub
10 changed files with 406 additions and 150 deletions

View File

@@ -15,30 +15,6 @@ import {
} from "types/entity";
import { getLatestVersionEntities } from "utils/entity";
/**
* The maximum number of items to fetch in a single diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the same exact updated at.
*
* Conceptually, it may happen that there are more items than the limit we've
* provided, but there are practical safeguards.
*
* For file diff, the limit is advisory, and remote may return less, equal or
* more items than the provided limit. The scenario where it returns more is
* when more files than the limit have the same updated at. Theoretically it
* would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user were to select all
* the files in their library and updates them all in one go in the UI, their
* client app is required to use batched API calls to make those updates, and
* each of those batches would get distinct updated at.
*/
const DIFF_LIMIT = 500;
const ENTITY_TABLES: Record<EntityType, string> = {
@@ -71,7 +47,8 @@ const getCachedEntityKey = async (type: EntityType) => {
return entityKey;
};
const getEntityKey = async (type: EntityType) => {
// TODO: unexport
export const getEntityKey = async (type: EntityType) => {
try {
const entityKey = await getCachedEntityKey(type);
if (entityKey) {

View File

@@ -6,8 +6,11 @@ import {
isMLEnabled,
isMLSupported,
mlStatusSnapshot,
wipCluster,
wipClusterEnable,
} from "@/new/photos/services/ml";
import type { Person } from "@/new/photos/services/ml/people";
import { personDiff } from "@/new/photos/services/user-entity";
import { EnteFile } from "@/new/photos/types/file";
import * as chrono from "chrono-node";
import { t } from "i18next";
@@ -24,7 +27,7 @@ import {
import ComlinkSearchWorker from "utils/comlink/ComlinkSearchWorker";
import { getUniqueFiles } from "utils/file";
import { getFormattedDate } from "utils/search";
import { getLatestEntities } from "./entityService";
import { getEntityKey, getLatestEntities } from "./entityService";
import locationSearchService, { City } from "./locationSearchService";
const DIGITS = new Set(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
@@ -414,12 +417,28 @@ function convertSuggestionToSearchQuery(option: Suggestion): Search {
}
async function getAllPeople(limit: number = undefined) {
if (!(await wipClusterEnable())) return [];
if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) {
const entityKey = await getEntityKey("person" as EntityType);
const peopleR = await personDiff(entityKey.data);
const r = peopleR.length;
log.debug(() => ["people", peopleR]);
if (r) return [];
return [];
}
let people: Array<Person> = []; // await mlIDbStorage.getAllPeople();
people = await wipCluster();
// await mlPeopleStore.iterate<Person, void>((person) => {
// people.push(person);
// });
people = people ?? [];
return people
const result = people
.sort((p1, p2) => p2.files.length - p1.files.length)
.slice(0, limit);
// log.debug(() => ["getAllPeople", result]);
return result;
}

View File

@@ -0,0 +1,11 @@
import { isDevBuild } from "./env";
import log from "./log";
/**
* If running in a dev build, throw an exception with the given message.
* Otherwise log it as a warning.
*/
export const assertionFailed = (message: string) => {
if (isDevBuild) throw new Error(message);
log.warn(message);
};

View File

@@ -115,7 +115,7 @@ export const encryptMetadata = async (metadata: unknown, keyB64: string) => {
*
* This is the sibling of {@link encryptAssociatedData}.
*
* See {@link decryptChaChaOneShot2} for the implementation details.
* See {@link decryptChaChaOneShot} for the implementation details.
*
* @param encryptedData A {@link Uint8Array} containing the bytes to decrypt.
*
@@ -166,7 +166,7 @@ export const decryptFileEmbedding = async (
* Decrypt the metadata associated with an Ente object (file, collection or
* entity) using the object's key.
*
* This is the sibling of {@link decryptMetadata}.
* This is the sibling of {@link encryptMetadata}.
*
* @param encryptedDataB64 base64 encoded string containing the encrypted data.
*
@@ -180,7 +180,6 @@ export const decryptFileEmbedding = async (
* @returns The decrypted JSON value. Since TypeScript does not have a native
* JSON type, we need to return it as an `unknown`.
*/
export const decryptMetadata = async (
encryptedDataB64: string,
decryptionHeaderB64: string,
@@ -188,10 +187,26 @@ export const decryptMetadata = async (
) =>
JSON.parse(
new TextDecoder().decode(
await decryptAssociatedData(
await libsodium.fromB64(encryptedDataB64),
await decryptMetadataBytes(
encryptedDataB64,
decryptionHeaderB64,
keyB64,
),
),
) as unknown;
/**
* A variant of {@link decryptMetadata} that does not attempt to parse the
* decrypted data as a JSON string and instead just returns the raw decrypted
* bytes that we got.
*/
export const decryptMetadataBytes = async (
encryptedDataB64: string,
decryptionHeaderB64: string,
keyB64: string,
) =>
await decryptAssociatedData(
await libsodium.fromB64(encryptedDataB64),
decryptionHeaderB64,
keyB64,
);

View File

@@ -1,8 +1,4 @@
import { blobCache } from "@/base/blob-cache";
import {
regenerateFaceCropsIfNeeded,
unidentifiedFaceIDs,
} from "@/new/photos/services/ml";
import { faceCrop, unidentifiedFaceIDs } from "@/new/photos/services/ml";
import type { Person } from "@/new/photos/services/ml/people";
import type { EnteFile } from "@/new/photos/types/file";
import { Skeleton, Typography, styled } from "@mui/material";
@@ -28,7 +24,10 @@ export const PeopleList: React.FC<PeopleListProps> = ({
clickable={!!onSelect}
onClick={() => onSelect && onSelect(person, index)}
>
<FaceCropImageView faceID={person.displayFaceId} />
<FaceCropImageView
faceID={person.displayFaceID}
enteFile={person.displayFaceFile}
/>
</FaceChip>
))}
</FaceChipContainer>
@@ -80,7 +79,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
enteFile,
}) => {
const [faceIDs, setFaceIDs] = useState<string[]>([]);
const [didRegen, setDidRegen] = useState(false);
useEffect(() => {
let didCancel = false;
@@ -88,13 +86,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
const go = async () => {
const faceIDs = await unidentifiedFaceIDs(enteFile);
!didCancel && setFaceIDs(faceIDs);
// Don't block for the regeneration to happen. If anything got
// regenerated, the result will be true, in response to which we'll
// change the key of the face list and cause it to be rerendered
// (fetching the regenerated crops).
void regenerateFaceCropsIfNeeded(enteFile).then((r) =>
setDidRegen(r),
);
};
void go();
@@ -111,10 +102,10 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
<Typography variant="large" p={1}>
{t("UNIDENTIFIED_FACES")}
</Typography>
<FaceChipContainer key={didRegen ? 1 : 0}>
<FaceChipContainer>
{faceIDs.map((faceID) => (
<FaceChip key={faceID}>
<FaceCropImageView {...{ faceID }} />
<FaceCropImageView {...{ enteFile, faceID }} />
</FaceChip>
))}
</FaceChipContainer>
@@ -123,39 +114,41 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
};
interface FaceCropImageViewProps {
/** The ID of the face to display. */
faceID: string;
/** The {@link EnteFile} which contains this face. */
enteFile: EnteFile;
}
/**
* An image view showing the face crop for the given {@link faceID}.
* An image view showing the face crop for the given face.
*
* The image is read from the "face-crops" {@link BlobCache}. While the image is
* being fetched, or if it doesn't exist, a placeholder is shown.
* The image is read from the "face-crops" {@link BlobCache}, regenerating it if
* needed (which is why also need to pass the associated file).
*
* While the image is being fetched or regenerated, or if it doesn't exist, a
* placeholder is shown.
*/
const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({ faceID }) => {
const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
faceID,
enteFile,
}) => {
const [objectURL, setObjectURL] = useState<string | undefined>();
useEffect(() => {
let didCancel = false;
if (faceID) {
void blobCache("face-crops")
.then((cache) => cache.get(faceID))
.then((data) => {
if (data) {
const blob = new Blob([data]);
if (!didCancel) setObjectURL(URL.createObjectURL(blob));
}
});
} else setObjectURL(undefined);
let thisObjectURL: string | undefined;
void faceCrop(faceID, enteFile).then((blob) => {
if (blob && !didCancel)
setObjectURL((thisObjectURL = URL.createObjectURL(blob)));
});
return () => {
didCancel = true;
if (objectURL) URL.revokeObjectURL(objectURL);
if (thisObjectURL) URL.revokeObjectURL(thisObjectURL);
};
// TODO: The linter warning is actually correct, objectURL should be a
// dependency, but adding that require reworking this code first.
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [faceID]);
}, [faceID, enteFile]);
return objectURL ? (
<img style={{ objectFit: "cover" }} src={objectURL} />

View File

@@ -5,11 +5,11 @@ import type { FaceIndex } from "./face";
import { dotProduct } from "./math";
/**
* A cluster is an set of faces.
* A face cluster is an set of faces.
*
* Each cluster has an id so that a Person (a set of clusters) can refer to it.
*/
export interface Cluster {
export interface FaceCluster {
/**
* A randomly generated ID to uniquely identify this cluster.
*/
@@ -31,7 +31,7 @@ export interface Cluster {
*
* For ease of transportation, the Person entity on remote looks like
*
* { name, clusters: { cluster_id, face_ids }}
* { name, clusters: [{ clusterID, faceIDs }] }
*
* That is, it has the clusters embedded within itself.
*/
@@ -78,47 +78,55 @@ export const clusterFaces = (faceIndexes: FaceIndex[]) => {
const faces = [...faceIDAndEmbeddings(faceIndexes)];
const clusters: Cluster[] = [];
let clusters: FaceCluster[] = [];
const clusterIndexByFaceID = new Map<string, number>();
for (const [i, { faceID, embedding }] of faces.entries()) {
let j = 0;
for (; j < i; j++) {
// Can't find a better way for avoiding the null assertion.
// Find the nearest neighbour from among the faces we have already seen.
let nnIndex: number | undefined;
let nnCosineSimilarity = 0;
for (let j = 0; j < i; j++) {
// Can't find a way of avoiding the null assertion.
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const n = faces[j]!;
// TODO-ML: The distance metric and the thresholds are placeholders.
// The vectors are already normalized, so we can directly use their
// dot product as their cosine similarity.
const csim = dotProduct(embedding, n.embedding);
if (csim > 0.5) {
// Found a neighbour near enough. Add this face to the
// neighbour's cluster and call it a day.
const ci = ensure(clusterIndexByFaceID.get(n.faceID));
clusters[ci]?.faceIDs.push(faceID);
clusterIndexByFaceID.set(faceID, ci);
break;
if (csim > 0.76 && csim > nnCosineSimilarity) {
nnIndex = j;
nnCosineSimilarity = csim;
}
}
if (j == i) {
if (nnIndex === undefined) {
// We didn't find a neighbour. Create a new cluster with this face.
const cluster = {
id: newNonSecureID("cluster_"),
faceIDs: [faceID],
};
clusters.push(cluster);
clusterIndexByFaceID.set(faceID, clusters.length);
} else {
// Found a neighbour near enough. Add this face to the neighbour's
// cluster.
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const nn = faces[nnIndex]!;
const nnClusterIndex = ensure(clusterIndexByFaceID.get(nn.faceID));
clusters[nnClusterIndex]?.faceIDs.push(faceID);
clusterIndexByFaceID.set(faceID, nnClusterIndex);
}
}
clusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1);
log.debug(() => ["ml/cluster", { faces, clusters, clusterIndexByFaceID }]);
log.debug(
() =>
`Clustered ${faces.length} faces into ${clusters.length} clusters (${Date.now() - t} ms)`,
);
return undefined;
return clusters;
};
/**

View File

@@ -139,6 +139,17 @@ export interface Face {
* This ID is guaranteed to be unique for all the faces detected in all the
* files for the user. In particular, each file can have multiple faces but
* they all will get their own unique {@link faceID}.
*
* This ID is also meant to be stable across reindexing. That is, if the
* same algorithm and hyperparameters are used to reindex the file, then it
* should result in the same face IDs. This allows us leeway in letting
* unnecessary reindexing happen in rare cases without invalidating the
* clusters that rely on the presence of the given face ID.
*
* Finally, this face ID is not completely opaque. It consists of underscore
* separated components, the first of which is the ID of the
* {@link EnteFile} to which this face belongs. Client code can rely on this
* structure and can parse it if needed.
*/
faceID: string;
/**

View File

@@ -3,6 +3,7 @@
*/
import { isDesktop } from "@/base/app";
import { assertionFailed } from "@/base/assert";
import { blobCache } from "@/base/blob-cache";
import { ensureElectron } from "@/base/electron";
import { isDevBuild } from "@/base/env";
@@ -15,6 +16,7 @@ import { ensure } from "@/utils/ensure";
import { throttled } from "@/utils/promise";
import { proxy, transfer } from "comlink";
import { isInternalUser } from "../feature-flags";
import { getAllLocalFiles } from "../files";
import { getRemoteFlag, updateRemoteFlag } from "../remote-store";
import type { UploadItem } from "../upload/types";
import { clusterFaces } from "./cluster-new";
@@ -25,43 +27,64 @@ import {
faceIndexes,
indexableAndIndexedCounts,
} from "./db";
import type { Person } from "./people";
import { MLWorker } from "./worker";
import type { CLIPMatches } from "./worker-types";
/**
* In-memory flag that tracks if ML is enabled.
* Internal state of the ML subsystem.
*
* - On app start, this is read from local storage during {@link initML}.
* This are essentially cached values used by the functions of this module.
*
* - It gets updated when we sync with remote (so if the user enables/disables
* ML on a different device, this local value will also become true/false).
*
* - It gets updated when the user enables/disables ML on this device.
*
* - It is cleared in {@link logoutML}.
* This should be cleared on logout.
*/
let _isMLEnabled = false;
class MLState {
/**
* In-memory flag that tracks if ML is enabled.
*
* - On app start, this is read from local storage during {@link initML}.
*
* - It gets updated when we sync with remote (so if the user enables/disables
* ML on a different device, this local value will also become true/false).
*
* - It gets updated when the user enables/disables ML on this device.
*
* - It is cleared in {@link logoutML}.
*/
isMLEnabled = false;
/** Cached instance of the {@link ComlinkWorker} that wraps our web worker. */
let _comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
/**
* Cached instance of the {@link ComlinkWorker} that wraps our web worker.
*/
comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
/**
* Subscriptions to {@link MLStatus}.
*
* See {@link mlStatusSubscribe}.
*/
let _mlStatusListeners: (() => void)[] = [];
/**
* Subscriptions to {@link MLStatus}.
*
* See {@link mlStatusSubscribe}.
*/
mlStatusListeners: (() => void)[] = [];
/**
* Snapshot of {@link MLStatus}.
*
* See {@link mlStatusSnapshot}.
*/
let _mlStatusSnapshot: MLStatus | undefined;
/**
* Snapshot of {@link MLStatus}.
*
* See {@link mlStatusSnapshot}.
*/
mlStatusSnapshot: MLStatus | undefined;
/**
* In flight face crop regeneration promises indexed by the IDs of the files
* whose faces we are regenerating.
*/
inFlightFaceCropRegens = new Map<number, Promise<void>>();
}
/** State shared by the functions in this module. See {@link MLState}. */
let _state = new MLState();
/** Lazily created, cached, instance of {@link MLWorker}. */
const worker = () =>
(_comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);
(_state.comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);
const createComlinkWorker = async () => {
const electron = ensureElectron();
@@ -95,9 +118,9 @@ const createComlinkWorker = async () => {
* It is also called when the user pauses or disables ML.
*/
export const terminateMLWorker = async () => {
if (_comlinkWorker) {
await _comlinkWorker.then((cw) => cw.terminate());
_comlinkWorker = undefined;
if (_state.comlinkWorker) {
await _state.comlinkWorker.then((cw) => cw.terminate());
_state.comlinkWorker = undefined;
}
};
@@ -149,7 +172,7 @@ export const canEnableML = async () =>
* Initialize the ML subsystem if the user has enabled it in preferences.
*/
export const initML = () => {
_isMLEnabled = isMLEnabledLocal();
_state.isMLEnabled = isMLEnabledLocal();
};
export const logoutML = async () => {
@@ -158,9 +181,7 @@ export const logoutML = async () => {
// execution contexts], it gets called first in the logout sequence, and
// then this function (`logoutML`) gets called at a later point in time.
_isMLEnabled = false;
_mlStatusListeners = [];
_mlStatusSnapshot = undefined;
_state = new MLState();
await clearMLDB();
};
@@ -173,7 +194,7 @@ export const logoutML = async () => {
*/
export const isMLEnabled = () =>
// Implementation note: Keep it fast, it might be called frequently.
_isMLEnabled;
_state.isMLEnabled;
/**
* Enable ML.
@@ -183,7 +204,7 @@ export const isMLEnabled = () =>
export const enableML = async () => {
await updateIsMLEnabledRemote(true);
setIsMLEnabledLocal(true);
_isMLEnabled = true;
_state.isMLEnabled = true;
setInterimScheduledStatus();
triggerStatusUpdate();
triggerMLSync();
@@ -198,7 +219,7 @@ export const enableML = async () => {
export const disableML = async () => {
await updateIsMLEnabledRemote(false);
setIsMLEnabledLocal(false);
_isMLEnabled = false;
_state.isMLEnabled = false;
await terminateMLWorker();
triggerStatusUpdate();
};
@@ -257,18 +278,18 @@ const updateIsMLEnabledRemote = (enabled: boolean) =>
export const triggerMLSync = () => void mlSync();
const mlSync = async () => {
_isMLEnabled = await getIsMLEnabledRemote();
setIsMLEnabledLocal(_isMLEnabled);
_state.isMLEnabled = await getIsMLEnabledRemote();
setIsMLEnabledLocal(_state.isMLEnabled);
triggerStatusUpdate();
if (_isMLEnabled) void worker().then((w) => w.sync());
// TODO-ML
if (_isMLEnabled) void wipCluster();
if (_state.isMLEnabled) void worker().then((w) => w.sync());
};
/**
* Run indexing on a file which was uploaded from this client.
*
* Indexing only happens if ML is enabled.
*
* This function is called by the uploader when it uploads a new file from this
* client, giving us the opportunity to index it live. This is only an
* optimization - if we don't index it now it'll anyways get indexed later as
@@ -282,20 +303,54 @@ const mlSync = async () => {
* image part of the live photo that was uploaded.
*/
export const indexNewUpload = (enteFile: EnteFile, uploadItem: UploadItem) => {
if (!_isMLEnabled) return;
if (!isMLEnabled()) return;
if (enteFile.metadata.fileType !== FileType.image) return;
log.debug(() => ["ml/liveq", { enteFile, uploadItem }]);
void worker().then((w) => w.onUpload(enteFile, uploadItem));
};
let last: Person[] | undefined;
/**
* WIP! Don't enable, dragon eggs are hatching here.
*/
export const wipCluster = async () => {
if (!isDevBuild || !(await isInternalUser())) return;
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return;
export const wipClusterEnable = async () => {
if (!isDevBuild || !(await isInternalUser())) return false;
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return false;
return true;
};
clusterFaces(await faceIndexes());
export const wipCluster = async () => {
if (!(await wipClusterEnable())) return;
if (last) return last;
const clusters = clusterFaces(await faceIndexes());
const localFiles = await getAllLocalFiles();
const localFilesByID = new Map(localFiles.map((f) => [f.id, f]));
const people: Person[] = []; // await mlIDbStorage.getAllPeople();
for (const cluster of clusters) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const dfID = cluster.faceIDs[0]!;
const dfFile = localFilesByID.get(fileIDFromFaceID(dfID) ?? 0);
if (!dfFile) {
assertionFailed(`Face ID ${dfID} without local file`);
continue;
}
people.push({
id: Math.random(), //cluster.id,
name: "test",
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
files: cluster.faceIDs.map((s) => parseInt(s.split("_")[0]!)),
displayFaceID: dfID,
displayFaceFile: dfFile,
});
}
last = people;
return people;
};
export type MLStatus =
@@ -336,9 +391,11 @@ export type MLStatus =
* @returns A function that can be used to clear the subscription.
*/
export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
_mlStatusListeners.push(onChange);
_state.mlStatusListeners.push(onChange);
return () => {
_mlStatusListeners = _mlStatusListeners.filter((l) => l != onChange);
_state.mlStatusListeners = _state.mlStatusListeners.filter(
(l) => l != onChange,
);
};
};
@@ -352,7 +409,7 @@ export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
* asynchronous tasks that are needed to get the status.
*/
export const mlStatusSnapshot = (): MLStatus | undefined => {
const result = _mlStatusSnapshot;
const result = _state.mlStatusSnapshot;
// We don't have it yet, trigger an update.
if (!result) triggerStatusUpdate();
return result;
@@ -369,15 +426,15 @@ const updateMLStatusSnapshot = async () =>
setMLStatusSnapshot(await getMLStatus());
const setMLStatusSnapshot = (snapshot: MLStatus) => {
_mlStatusSnapshot = snapshot;
_mlStatusListeners.forEach((l) => l());
_state.mlStatusSnapshot = snapshot;
_state.mlStatusListeners.forEach((l) => l());
};
/**
* Compute the current state of the ML subsystem.
*/
const getMLStatus = async (): Promise<MLStatus> => {
if (!_isMLEnabled) return { phase: "disabled" };
if (!_state.isMLEnabled) return { phase: "disabled" };
const { indexedCount, indexableCount } = await indexableAndIndexedCounts();
@@ -409,8 +466,11 @@ const getMLStatus = async (): Promise<MLStatus> => {
const setInterimScheduledStatus = () => {
let nSyncedFiles = 0,
nTotalFiles = 0;
if (_mlStatusSnapshot && _mlStatusSnapshot.phase != "disabled") {
({ nSyncedFiles, nTotalFiles } = _mlStatusSnapshot);
if (
_state.mlStatusSnapshot &&
_state.mlStatusSnapshot.phase != "disabled"
) {
({ nSyncedFiles, nTotalFiles } = _state.mlStatusSnapshot);
}
setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles });
};
@@ -444,25 +504,52 @@ export const unidentifiedFaceIDs = async (
return index?.faces.map((f) => f.faceID) ?? [];
};
/**
* Extract the ID of the {@link EnteFile} to which a face belongs from its ID.
*/
const fileIDFromFaceID = (faceID: string) => {
const fileID = parseInt(faceID.split("_")[0] ?? "");
if (isNaN(fileID)) {
assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
return undefined;
}
return fileID;
};
/**
* Return the cached face crop for the given face, regenerating it if needed.
*
* @param faceID The id of the face whose face crop we want.
*
* @param enteFile The {@link EnteFile} that contains this face.
*/
export const faceCrop = async (faceID: string, enteFile: EnteFile) => {
let inFlight = _state.inFlightFaceCropRegens.get(enteFile.id);
if (!inFlight) {
inFlight = regenerateFaceCropsIfNeeded(enteFile);
_state.inFlightFaceCropRegens.set(enteFile.id, inFlight);
}
await inFlight;
const cache = await blobCache("face-crops");
return cache.get(faceID);
};
/**
* Check to see if any of the faces in the given file do not have a face crop
* present locally. If so, then regenerate the face crops for all the faces in
* the file (updating the "face-crops" {@link BlobCache}).
*
* @returns true if one or more face crops were regenerated; false otherwise.
*/
export const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
const index = await faceIndex(enteFile.id);
if (!index) return false;
if (!index) return;
const faceIDs = index.faces.map((f) => f.faceID);
const cache = await blobCache("face-crops");
for (const id of faceIDs) {
if (!(await cache.has(id))) {
await regenerateFaceCrops(enteFile, index);
return true;
}
}
const faceIDs = index.faces.map((f) => f.faceID);
let needsRegen = false;
for (const id of faceIDs) if (!(await cache.has(id))) needsRegen = true;
return false;
if (needsRegen) await regenerateFaceCrops(enteFile, index);
};

View File

@@ -1,8 +1,11 @@
import type { EnteFile } from "../../types/file";
export interface Person {
id: number;
name?: string;
files: number[];
displayFaceId: string;
displayFaceID: string;
displayFaceFile: EnteFile;
}
// Forced disable clustering. It doesn't currently work.

View File

@@ -0,0 +1,132 @@
import { decryptMetadataBytes } from "@/base/crypto/ente";
import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
import { apiURL } from "@/base/origins";
import { z } from "zod";
/**
* User entities are predefined lists of otherwise arbitrary data that the user
* can store for their account.
*
* e.g. location tags, people in their photos.
*/
export type EntityType =
| "person"
/**
* A new version of the Person entity where the data is gzipped before
* encryption.
*/
| "person_v2";
/**
* The maximum number of items to fetch in a single diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the exact same updated at.
*
* Conceptually, it may happen that there are more items than the limit we've
* provided, but there are practical safeguards.
*
* For file diff, the limit is advisory, and remote may return less, equal or
* more items than the provided limit. The scenario where it returns more is
* when more files than the limit have the same updated at. Theoretically it
* would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user were to select all
* the files in their library and updates them all in one go in the UI, their
* client app is required to use batched API calls to make those updates, and
* each of those batches would get distinct updated at.
*/
const defaultDiffLimit = 500;
/**
* A generic user entity.
*
* This is an intermediate step, usually what we really want is a version
* of this with the {@link data} parsed to the specific type of JSON object
* expected to be associated with this entity type.
*/
interface UserEntity {
/**
* Arbitrary data associated with the entity. The format of this data is
* specific to each entity type.
*
* This will not be present for entities that have been deleted on remote.
*/
data: Uint8Array | undefined;
/**
* Epoch microseconds denoting when this entity was created or last updated.
*/
updatedAt: number;
}
const RemoteUserEntity = z.object({
/** Base64 string containing the encrypted contents of the entity. */
encryptedData: z.string(),
/** Base64 string containing the decryption header. */
header: z.string(),
isDeleted: z.boolean(),
updatedAt: z.number(),
});
/**
* Fetch all user entities of the given type that have been created or updated
* since the given time.
*
* @param type The type of the entities to fetch.
*
* @param sinceTime Epoch milliseconds. This is used to ask remote to provide us
* only entities whose {@link updatedAt} is more than the given value. Set this
* to zero to start from the beginning.
*
* @param entityKeyB64 The base64 encoded key to use for decrypting the
* encrypted contents of the user entity.
*/
export const userEntityDiff = async (
type: EntityType,
sinceTime: number,
entityKeyB64: string,
): Promise<UserEntity[]> => {
const decrypt = (dataB64: string, headerB64: string) =>
decryptMetadataBytes(dataB64, headerB64, entityKeyB64);
const params = new URLSearchParams({
type,
sinceTime: sinceTime.toString(),
limit: defaultDiffLimit.toString(),
});
const url = await apiURL(`/user-entity/entity/diff`);
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
ensureOk(res);
const entities = z
.object({ diff: z.array(RemoteUserEntity) })
.parse(await res.json()).diff;
return Promise.all(
entities.map(
async ({ encryptedData, header, isDeleted, updatedAt }) => ({
data: isDeleted
? undefined
: await decrypt(encryptedData, header),
updatedAt,
}),
),
);
};
/**
* Fetch all Person entities that have been created or updated since the last
* time we checked.
*/
export const personDiff = async (entityKeyB64: string) => {
const entities = await userEntityDiff("person", 0, entityKeyB64);
return entities.map(({ data }) => {
if (!data) return undefined;
return JSON.parse(new TextDecoder().decode(data)) as unknown;
});
};