[desktop] Clustering - Part 2/x (#2647)
This commit is contained in:
@@ -15,30 +15,6 @@ import {
|
||||
} from "types/entity";
|
||||
import { getLatestVersionEntities } from "utils/entity";
|
||||
|
||||
/**
|
||||
* The maximum number of items to fetch in a single diff
|
||||
*
|
||||
* [Note: Limit of returned items in /diff requests]
|
||||
*
|
||||
* The various GET /diff API methods, which tell the client what all has changed
|
||||
* since a timestamp (provided by the client) take a limit parameter.
|
||||
*
|
||||
* These diff API calls return all items whose updated at is greater
|
||||
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
|
||||
* pagination of items which have the same exact updated at.
|
||||
*
|
||||
* Conceptually, it may happen that there are more items than the limit we've
|
||||
* provided, but there are practical safeguards.
|
||||
*
|
||||
* For file diff, the limit is advisory, and remote may return less, equal or
|
||||
* more items than the provided limit. The scenario where it returns more is
|
||||
* when more files than the limit have the same updated at. Theoretically it
|
||||
* would make the diff response unbounded, however in practice file
|
||||
* modifications themselves are all batched. Even if the user were to select all
|
||||
* the files in their library and updates them all in one go in the UI, their
|
||||
* client app is required to use batched API calls to make those updates, and
|
||||
* each of those batches would get distinct updated at.
|
||||
*/
|
||||
const DIFF_LIMIT = 500;
|
||||
|
||||
const ENTITY_TABLES: Record<EntityType, string> = {
|
||||
@@ -71,7 +47,8 @@ const getCachedEntityKey = async (type: EntityType) => {
|
||||
return entityKey;
|
||||
};
|
||||
|
||||
const getEntityKey = async (type: EntityType) => {
|
||||
// TODO: unexport
|
||||
export const getEntityKey = async (type: EntityType) => {
|
||||
try {
|
||||
const entityKey = await getCachedEntityKey(type);
|
||||
if (entityKey) {
|
||||
|
||||
@@ -6,8 +6,11 @@ import {
|
||||
isMLEnabled,
|
||||
isMLSupported,
|
||||
mlStatusSnapshot,
|
||||
wipCluster,
|
||||
wipClusterEnable,
|
||||
} from "@/new/photos/services/ml";
|
||||
import type { Person } from "@/new/photos/services/ml/people";
|
||||
import { personDiff } from "@/new/photos/services/user-entity";
|
||||
import { EnteFile } from "@/new/photos/types/file";
|
||||
import * as chrono from "chrono-node";
|
||||
import { t } from "i18next";
|
||||
@@ -24,7 +27,7 @@ import {
|
||||
import ComlinkSearchWorker from "utils/comlink/ComlinkSearchWorker";
|
||||
import { getUniqueFiles } from "utils/file";
|
||||
import { getFormattedDate } from "utils/search";
|
||||
import { getLatestEntities } from "./entityService";
|
||||
import { getEntityKey, getLatestEntities } from "./entityService";
|
||||
import locationSearchService, { City } from "./locationSearchService";
|
||||
|
||||
const DIGITS = new Set(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]);
|
||||
@@ -414,12 +417,28 @@ function convertSuggestionToSearchQuery(option: Suggestion): Search {
|
||||
}
|
||||
|
||||
async function getAllPeople(limit: number = undefined) {
|
||||
if (!(await wipClusterEnable())) return [];
|
||||
|
||||
if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_FETCH) {
|
||||
const entityKey = await getEntityKey("person" as EntityType);
|
||||
const peopleR = await personDiff(entityKey.data);
|
||||
const r = peopleR.length;
|
||||
log.debug(() => ["people", peopleR]);
|
||||
|
||||
if (r) return [];
|
||||
return [];
|
||||
}
|
||||
|
||||
let people: Array<Person> = []; // await mlIDbStorage.getAllPeople();
|
||||
people = await wipCluster();
|
||||
// await mlPeopleStore.iterate<Person, void>((person) => {
|
||||
// people.push(person);
|
||||
// });
|
||||
people = people ?? [];
|
||||
return people
|
||||
const result = people
|
||||
.sort((p1, p2) => p2.files.length - p1.files.length)
|
||||
.slice(0, limit);
|
||||
// log.debug(() => ["getAllPeople", result]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
11
web/packages/base/assert.ts
Normal file
11
web/packages/base/assert.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import { isDevBuild } from "./env";
|
||||
import log from "./log";
|
||||
|
||||
/**
|
||||
* If running in a dev build, throw an exception with the given message.
|
||||
* Otherwise log it as a warning.
|
||||
*/
|
||||
export const assertionFailed = (message: string) => {
|
||||
if (isDevBuild) throw new Error(message);
|
||||
log.warn(message);
|
||||
};
|
||||
@@ -115,7 +115,7 @@ export const encryptMetadata = async (metadata: unknown, keyB64: string) => {
|
||||
*
|
||||
* This is the sibling of {@link encryptAssociatedData}.
|
||||
*
|
||||
* See {@link decryptChaChaOneShot2} for the implementation details.
|
||||
* See {@link decryptChaChaOneShot} for the implementation details.
|
||||
*
|
||||
* @param encryptedData A {@link Uint8Array} containing the bytes to decrypt.
|
||||
*
|
||||
@@ -166,7 +166,7 @@ export const decryptFileEmbedding = async (
|
||||
* Decrypt the metadata associated with an Ente object (file, collection or
|
||||
* entity) using the object's key.
|
||||
*
|
||||
* This is the sibling of {@link decryptMetadata}.
|
||||
* This is the sibling of {@link encryptMetadata}.
|
||||
*
|
||||
* @param encryptedDataB64 base64 encoded string containing the encrypted data.
|
||||
*
|
||||
@@ -180,7 +180,6 @@ export const decryptFileEmbedding = async (
|
||||
* @returns The decrypted JSON value. Since TypeScript does not have a native
|
||||
* JSON type, we need to return it as an `unknown`.
|
||||
*/
|
||||
|
||||
export const decryptMetadata = async (
|
||||
encryptedDataB64: string,
|
||||
decryptionHeaderB64: string,
|
||||
@@ -188,10 +187,26 @@ export const decryptMetadata = async (
|
||||
) =>
|
||||
JSON.parse(
|
||||
new TextDecoder().decode(
|
||||
await decryptAssociatedData(
|
||||
await libsodium.fromB64(encryptedDataB64),
|
||||
await decryptMetadataBytes(
|
||||
encryptedDataB64,
|
||||
decryptionHeaderB64,
|
||||
keyB64,
|
||||
),
|
||||
),
|
||||
) as unknown;
|
||||
|
||||
/**
|
||||
* A variant of {@link decryptMetadata} that does not attempt to parse the
|
||||
* decrypted data as a JSON string and instead just returns the raw decrypted
|
||||
* bytes that we got.
|
||||
*/
|
||||
export const decryptMetadataBytes = async (
|
||||
encryptedDataB64: string,
|
||||
decryptionHeaderB64: string,
|
||||
keyB64: string,
|
||||
) =>
|
||||
await decryptAssociatedData(
|
||||
await libsodium.fromB64(encryptedDataB64),
|
||||
decryptionHeaderB64,
|
||||
keyB64,
|
||||
);
|
||||
|
||||
@@ -1,8 +1,4 @@
|
||||
import { blobCache } from "@/base/blob-cache";
|
||||
import {
|
||||
regenerateFaceCropsIfNeeded,
|
||||
unidentifiedFaceIDs,
|
||||
} from "@/new/photos/services/ml";
|
||||
import { faceCrop, unidentifiedFaceIDs } from "@/new/photos/services/ml";
|
||||
import type { Person } from "@/new/photos/services/ml/people";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { Skeleton, Typography, styled } from "@mui/material";
|
||||
@@ -28,7 +24,10 @@ export const PeopleList: React.FC<PeopleListProps> = ({
|
||||
clickable={!!onSelect}
|
||||
onClick={() => onSelect && onSelect(person, index)}
|
||||
>
|
||||
<FaceCropImageView faceID={person.displayFaceId} />
|
||||
<FaceCropImageView
|
||||
faceID={person.displayFaceID}
|
||||
enteFile={person.displayFaceFile}
|
||||
/>
|
||||
</FaceChip>
|
||||
))}
|
||||
</FaceChipContainer>
|
||||
@@ -80,7 +79,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
|
||||
enteFile,
|
||||
}) => {
|
||||
const [faceIDs, setFaceIDs] = useState<string[]>([]);
|
||||
const [didRegen, setDidRegen] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
let didCancel = false;
|
||||
@@ -88,13 +86,6 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
|
||||
const go = async () => {
|
||||
const faceIDs = await unidentifiedFaceIDs(enteFile);
|
||||
!didCancel && setFaceIDs(faceIDs);
|
||||
// Don't block for the regeneration to happen. If anything got
|
||||
// regenerated, the result will be true, in response to which we'll
|
||||
// change the key of the face list and cause it to be rerendered
|
||||
// (fetching the regenerated crops).
|
||||
void regenerateFaceCropsIfNeeded(enteFile).then((r) =>
|
||||
setDidRegen(r),
|
||||
);
|
||||
};
|
||||
|
||||
void go();
|
||||
@@ -111,10 +102,10 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
|
||||
<Typography variant="large" p={1}>
|
||||
{t("UNIDENTIFIED_FACES")}
|
||||
</Typography>
|
||||
<FaceChipContainer key={didRegen ? 1 : 0}>
|
||||
<FaceChipContainer>
|
||||
{faceIDs.map((faceID) => (
|
||||
<FaceChip key={faceID}>
|
||||
<FaceCropImageView {...{ faceID }} />
|
||||
<FaceCropImageView {...{ enteFile, faceID }} />
|
||||
</FaceChip>
|
||||
))}
|
||||
</FaceChipContainer>
|
||||
@@ -123,39 +114,41 @@ export const UnidentifiedFaces: React.FC<UnidentifiedFacesProps> = ({
|
||||
};
|
||||
|
||||
interface FaceCropImageViewProps {
|
||||
/** The ID of the face to display. */
|
||||
faceID: string;
|
||||
/** The {@link EnteFile} which contains this face. */
|
||||
enteFile: EnteFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* An image view showing the face crop for the given {@link faceID}.
|
||||
* An image view showing the face crop for the given face.
|
||||
*
|
||||
* The image is read from the "face-crops" {@link BlobCache}. While the image is
|
||||
* being fetched, or if it doesn't exist, a placeholder is shown.
|
||||
* The image is read from the "face-crops" {@link BlobCache}, regenerating it if
|
||||
* needed (which is why also need to pass the associated file).
|
||||
*
|
||||
* While the image is being fetched or regenerated, or if it doesn't exist, a
|
||||
* placeholder is shown.
|
||||
*/
|
||||
const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({ faceID }) => {
|
||||
const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
|
||||
faceID,
|
||||
enteFile,
|
||||
}) => {
|
||||
const [objectURL, setObjectURL] = useState<string | undefined>();
|
||||
|
||||
useEffect(() => {
|
||||
let didCancel = false;
|
||||
if (faceID) {
|
||||
void blobCache("face-crops")
|
||||
.then((cache) => cache.get(faceID))
|
||||
.then((data) => {
|
||||
if (data) {
|
||||
const blob = new Blob([data]);
|
||||
if (!didCancel) setObjectURL(URL.createObjectURL(blob));
|
||||
}
|
||||
});
|
||||
} else setObjectURL(undefined);
|
||||
let thisObjectURL: string | undefined;
|
||||
|
||||
void faceCrop(faceID, enteFile).then((blob) => {
|
||||
if (blob && !didCancel)
|
||||
setObjectURL((thisObjectURL = URL.createObjectURL(blob)));
|
||||
});
|
||||
|
||||
return () => {
|
||||
didCancel = true;
|
||||
if (objectURL) URL.revokeObjectURL(objectURL);
|
||||
if (thisObjectURL) URL.revokeObjectURL(thisObjectURL);
|
||||
};
|
||||
// TODO: The linter warning is actually correct, objectURL should be a
|
||||
// dependency, but adding that require reworking this code first.
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [faceID]);
|
||||
}, [faceID, enteFile]);
|
||||
|
||||
return objectURL ? (
|
||||
<img style={{ objectFit: "cover" }} src={objectURL} />
|
||||
|
||||
@@ -5,11 +5,11 @@ import type { FaceIndex } from "./face";
|
||||
import { dotProduct } from "./math";
|
||||
|
||||
/**
|
||||
* A cluster is an set of faces.
|
||||
* A face cluster is an set of faces.
|
||||
*
|
||||
* Each cluster has an id so that a Person (a set of clusters) can refer to it.
|
||||
*/
|
||||
export interface Cluster {
|
||||
export interface FaceCluster {
|
||||
/**
|
||||
* A randomly generated ID to uniquely identify this cluster.
|
||||
*/
|
||||
@@ -31,7 +31,7 @@ export interface Cluster {
|
||||
*
|
||||
* For ease of transportation, the Person entity on remote looks like
|
||||
*
|
||||
* { name, clusters: { cluster_id, face_ids }}
|
||||
* { name, clusters: [{ clusterID, faceIDs }] }
|
||||
*
|
||||
* That is, it has the clusters embedded within itself.
|
||||
*/
|
||||
@@ -78,47 +78,55 @@ export const clusterFaces = (faceIndexes: FaceIndex[]) => {
|
||||
|
||||
const faces = [...faceIDAndEmbeddings(faceIndexes)];
|
||||
|
||||
const clusters: Cluster[] = [];
|
||||
let clusters: FaceCluster[] = [];
|
||||
const clusterIndexByFaceID = new Map<string, number>();
|
||||
for (const [i, { faceID, embedding }] of faces.entries()) {
|
||||
let j = 0;
|
||||
for (; j < i; j++) {
|
||||
// Can't find a better way for avoiding the null assertion.
|
||||
// Find the nearest neighbour from among the faces we have already seen.
|
||||
let nnIndex: number | undefined;
|
||||
let nnCosineSimilarity = 0;
|
||||
for (let j = 0; j < i; j++) {
|
||||
// Can't find a way of avoiding the null assertion.
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
const n = faces[j]!;
|
||||
|
||||
// TODO-ML: The distance metric and the thresholds are placeholders.
|
||||
|
||||
// The vectors are already normalized, so we can directly use their
|
||||
// dot product as their cosine similarity.
|
||||
const csim = dotProduct(embedding, n.embedding);
|
||||
if (csim > 0.5) {
|
||||
// Found a neighbour near enough. Add this face to the
|
||||
// neighbour's cluster and call it a day.
|
||||
const ci = ensure(clusterIndexByFaceID.get(n.faceID));
|
||||
clusters[ci]?.faceIDs.push(faceID);
|
||||
clusterIndexByFaceID.set(faceID, ci);
|
||||
break;
|
||||
if (csim > 0.76 && csim > nnCosineSimilarity) {
|
||||
nnIndex = j;
|
||||
nnCosineSimilarity = csim;
|
||||
}
|
||||
}
|
||||
if (j == i) {
|
||||
if (nnIndex === undefined) {
|
||||
// We didn't find a neighbour. Create a new cluster with this face.
|
||||
|
||||
const cluster = {
|
||||
id: newNonSecureID("cluster_"),
|
||||
faceIDs: [faceID],
|
||||
};
|
||||
clusters.push(cluster);
|
||||
clusterIndexByFaceID.set(faceID, clusters.length);
|
||||
} else {
|
||||
// Found a neighbour near enough. Add this face to the neighbour's
|
||||
// cluster.
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
const nn = faces[nnIndex]!;
|
||||
const nnClusterIndex = ensure(clusterIndexByFaceID.get(nn.faceID));
|
||||
clusters[nnClusterIndex]?.faceIDs.push(faceID);
|
||||
clusterIndexByFaceID.set(faceID, nnClusterIndex);
|
||||
}
|
||||
}
|
||||
|
||||
clusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1);
|
||||
|
||||
log.debug(() => ["ml/cluster", { faces, clusters, clusterIndexByFaceID }]);
|
||||
log.debug(
|
||||
() =>
|
||||
`Clustered ${faces.length} faces into ${clusters.length} clusters (${Date.now() - t} ms)`,
|
||||
);
|
||||
|
||||
return undefined;
|
||||
return clusters;
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -139,6 +139,17 @@ export interface Face {
|
||||
* This ID is guaranteed to be unique for all the faces detected in all the
|
||||
* files for the user. In particular, each file can have multiple faces but
|
||||
* they all will get their own unique {@link faceID}.
|
||||
*
|
||||
* This ID is also meant to be stable across reindexing. That is, if the
|
||||
* same algorithm and hyperparameters are used to reindex the file, then it
|
||||
* should result in the same face IDs. This allows us leeway in letting
|
||||
* unnecessary reindexing happen in rare cases without invalidating the
|
||||
* clusters that rely on the presence of the given face ID.
|
||||
*
|
||||
* Finally, this face ID is not completely opaque. It consists of underscore
|
||||
* separated components, the first of which is the ID of the
|
||||
* {@link EnteFile} to which this face belongs. Client code can rely on this
|
||||
* structure and can parse it if needed.
|
||||
*/
|
||||
faceID: string;
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
import { isDesktop } from "@/base/app";
|
||||
import { assertionFailed } from "@/base/assert";
|
||||
import { blobCache } from "@/base/blob-cache";
|
||||
import { ensureElectron } from "@/base/electron";
|
||||
import { isDevBuild } from "@/base/env";
|
||||
@@ -15,6 +16,7 @@ import { ensure } from "@/utils/ensure";
|
||||
import { throttled } from "@/utils/promise";
|
||||
import { proxy, transfer } from "comlink";
|
||||
import { isInternalUser } from "../feature-flags";
|
||||
import { getAllLocalFiles } from "../files";
|
||||
import { getRemoteFlag, updateRemoteFlag } from "../remote-store";
|
||||
import type { UploadItem } from "../upload/types";
|
||||
import { clusterFaces } from "./cluster-new";
|
||||
@@ -25,43 +27,64 @@ import {
|
||||
faceIndexes,
|
||||
indexableAndIndexedCounts,
|
||||
} from "./db";
|
||||
import type { Person } from "./people";
|
||||
import { MLWorker } from "./worker";
|
||||
import type { CLIPMatches } from "./worker-types";
|
||||
|
||||
/**
|
||||
* In-memory flag that tracks if ML is enabled.
|
||||
* Internal state of the ML subsystem.
|
||||
*
|
||||
* - On app start, this is read from local storage during {@link initML}.
|
||||
* This are essentially cached values used by the functions of this module.
|
||||
*
|
||||
* - It gets updated when we sync with remote (so if the user enables/disables
|
||||
* ML on a different device, this local value will also become true/false).
|
||||
*
|
||||
* - It gets updated when the user enables/disables ML on this device.
|
||||
*
|
||||
* - It is cleared in {@link logoutML}.
|
||||
* This should be cleared on logout.
|
||||
*/
|
||||
let _isMLEnabled = false;
|
||||
class MLState {
|
||||
/**
|
||||
* In-memory flag that tracks if ML is enabled.
|
||||
*
|
||||
* - On app start, this is read from local storage during {@link initML}.
|
||||
*
|
||||
* - It gets updated when we sync with remote (so if the user enables/disables
|
||||
* ML on a different device, this local value will also become true/false).
|
||||
*
|
||||
* - It gets updated when the user enables/disables ML on this device.
|
||||
*
|
||||
* - It is cleared in {@link logoutML}.
|
||||
*/
|
||||
isMLEnabled = false;
|
||||
|
||||
/** Cached instance of the {@link ComlinkWorker} that wraps our web worker. */
|
||||
let _comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
|
||||
/**
|
||||
* Cached instance of the {@link ComlinkWorker} that wraps our web worker.
|
||||
*/
|
||||
comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
|
||||
|
||||
/**
|
||||
* Subscriptions to {@link MLStatus}.
|
||||
*
|
||||
* See {@link mlStatusSubscribe}.
|
||||
*/
|
||||
let _mlStatusListeners: (() => void)[] = [];
|
||||
/**
|
||||
* Subscriptions to {@link MLStatus}.
|
||||
*
|
||||
* See {@link mlStatusSubscribe}.
|
||||
*/
|
||||
mlStatusListeners: (() => void)[] = [];
|
||||
|
||||
/**
|
||||
* Snapshot of {@link MLStatus}.
|
||||
*
|
||||
* See {@link mlStatusSnapshot}.
|
||||
*/
|
||||
let _mlStatusSnapshot: MLStatus | undefined;
|
||||
/**
|
||||
* Snapshot of {@link MLStatus}.
|
||||
*
|
||||
* See {@link mlStatusSnapshot}.
|
||||
*/
|
||||
mlStatusSnapshot: MLStatus | undefined;
|
||||
|
||||
/**
|
||||
* In flight face crop regeneration promises indexed by the IDs of the files
|
||||
* whose faces we are regenerating.
|
||||
*/
|
||||
inFlightFaceCropRegens = new Map<number, Promise<void>>();
|
||||
}
|
||||
|
||||
/** State shared by the functions in this module. See {@link MLState}. */
|
||||
let _state = new MLState();
|
||||
|
||||
/** Lazily created, cached, instance of {@link MLWorker}. */
|
||||
const worker = () =>
|
||||
(_comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);
|
||||
(_state.comlinkWorker ??= createComlinkWorker()).then((cw) => cw.remote);
|
||||
|
||||
const createComlinkWorker = async () => {
|
||||
const electron = ensureElectron();
|
||||
@@ -95,9 +118,9 @@ const createComlinkWorker = async () => {
|
||||
* It is also called when the user pauses or disables ML.
|
||||
*/
|
||||
export const terminateMLWorker = async () => {
|
||||
if (_comlinkWorker) {
|
||||
await _comlinkWorker.then((cw) => cw.terminate());
|
||||
_comlinkWorker = undefined;
|
||||
if (_state.comlinkWorker) {
|
||||
await _state.comlinkWorker.then((cw) => cw.terminate());
|
||||
_state.comlinkWorker = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -149,7 +172,7 @@ export const canEnableML = async () =>
|
||||
* Initialize the ML subsystem if the user has enabled it in preferences.
|
||||
*/
|
||||
export const initML = () => {
|
||||
_isMLEnabled = isMLEnabledLocal();
|
||||
_state.isMLEnabled = isMLEnabledLocal();
|
||||
};
|
||||
|
||||
export const logoutML = async () => {
|
||||
@@ -158,9 +181,7 @@ export const logoutML = async () => {
|
||||
// execution contexts], it gets called first in the logout sequence, and
|
||||
// then this function (`logoutML`) gets called at a later point in time.
|
||||
|
||||
_isMLEnabled = false;
|
||||
_mlStatusListeners = [];
|
||||
_mlStatusSnapshot = undefined;
|
||||
_state = new MLState();
|
||||
await clearMLDB();
|
||||
};
|
||||
|
||||
@@ -173,7 +194,7 @@ export const logoutML = async () => {
|
||||
*/
|
||||
export const isMLEnabled = () =>
|
||||
// Implementation note: Keep it fast, it might be called frequently.
|
||||
_isMLEnabled;
|
||||
_state.isMLEnabled;
|
||||
|
||||
/**
|
||||
* Enable ML.
|
||||
@@ -183,7 +204,7 @@ export const isMLEnabled = () =>
|
||||
export const enableML = async () => {
|
||||
await updateIsMLEnabledRemote(true);
|
||||
setIsMLEnabledLocal(true);
|
||||
_isMLEnabled = true;
|
||||
_state.isMLEnabled = true;
|
||||
setInterimScheduledStatus();
|
||||
triggerStatusUpdate();
|
||||
triggerMLSync();
|
||||
@@ -198,7 +219,7 @@ export const enableML = async () => {
|
||||
export const disableML = async () => {
|
||||
await updateIsMLEnabledRemote(false);
|
||||
setIsMLEnabledLocal(false);
|
||||
_isMLEnabled = false;
|
||||
_state.isMLEnabled = false;
|
||||
await terminateMLWorker();
|
||||
triggerStatusUpdate();
|
||||
};
|
||||
@@ -257,18 +278,18 @@ const updateIsMLEnabledRemote = (enabled: boolean) =>
|
||||
export const triggerMLSync = () => void mlSync();
|
||||
|
||||
const mlSync = async () => {
|
||||
_isMLEnabled = await getIsMLEnabledRemote();
|
||||
setIsMLEnabledLocal(_isMLEnabled);
|
||||
_state.isMLEnabled = await getIsMLEnabledRemote();
|
||||
setIsMLEnabledLocal(_state.isMLEnabled);
|
||||
triggerStatusUpdate();
|
||||
|
||||
if (_isMLEnabled) void worker().then((w) => w.sync());
|
||||
// TODO-ML
|
||||
if (_isMLEnabled) void wipCluster();
|
||||
if (_state.isMLEnabled) void worker().then((w) => w.sync());
|
||||
};
|
||||
|
||||
/**
|
||||
* Run indexing on a file which was uploaded from this client.
|
||||
*
|
||||
* Indexing only happens if ML is enabled.
|
||||
*
|
||||
* This function is called by the uploader when it uploads a new file from this
|
||||
* client, giving us the opportunity to index it live. This is only an
|
||||
* optimization - if we don't index it now it'll anyways get indexed later as
|
||||
@@ -282,20 +303,54 @@ const mlSync = async () => {
|
||||
* image part of the live photo that was uploaded.
|
||||
*/
|
||||
export const indexNewUpload = (enteFile: EnteFile, uploadItem: UploadItem) => {
|
||||
if (!_isMLEnabled) return;
|
||||
if (!isMLEnabled()) return;
|
||||
if (enteFile.metadata.fileType !== FileType.image) return;
|
||||
log.debug(() => ["ml/liveq", { enteFile, uploadItem }]);
|
||||
void worker().then((w) => w.onUpload(enteFile, uploadItem));
|
||||
};
|
||||
|
||||
let last: Person[] | undefined;
|
||||
|
||||
/**
|
||||
* WIP! Don't enable, dragon eggs are hatching here.
|
||||
*/
|
||||
export const wipCluster = async () => {
|
||||
if (!isDevBuild || !(await isInternalUser())) return;
|
||||
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return;
|
||||
export const wipClusterEnable = async () => {
|
||||
if (!isDevBuild || !(await isInternalUser())) return false;
|
||||
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
clusterFaces(await faceIndexes());
|
||||
export const wipCluster = async () => {
|
||||
if (!(await wipClusterEnable())) return;
|
||||
|
||||
if (last) return last;
|
||||
|
||||
const clusters = clusterFaces(await faceIndexes());
|
||||
|
||||
const localFiles = await getAllLocalFiles();
|
||||
const localFilesByID = new Map(localFiles.map((f) => [f.id, f]));
|
||||
|
||||
const people: Person[] = []; // await mlIDbStorage.getAllPeople();
|
||||
for (const cluster of clusters) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
const dfID = cluster.faceIDs[0]!;
|
||||
const dfFile = localFilesByID.get(fileIDFromFaceID(dfID) ?? 0);
|
||||
if (!dfFile) {
|
||||
assertionFailed(`Face ID ${dfID} without local file`);
|
||||
continue;
|
||||
}
|
||||
people.push({
|
||||
id: Math.random(), //cluster.id,
|
||||
name: "test",
|
||||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
||||
files: cluster.faceIDs.map((s) => parseInt(s.split("_")[0]!)),
|
||||
displayFaceID: dfID,
|
||||
displayFaceFile: dfFile,
|
||||
});
|
||||
}
|
||||
|
||||
last = people;
|
||||
return people;
|
||||
};
|
||||
|
||||
export type MLStatus =
|
||||
@@ -336,9 +391,11 @@ export type MLStatus =
|
||||
* @returns A function that can be used to clear the subscription.
|
||||
*/
|
||||
export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
|
||||
_mlStatusListeners.push(onChange);
|
||||
_state.mlStatusListeners.push(onChange);
|
||||
return () => {
|
||||
_mlStatusListeners = _mlStatusListeners.filter((l) => l != onChange);
|
||||
_state.mlStatusListeners = _state.mlStatusListeners.filter(
|
||||
(l) => l != onChange,
|
||||
);
|
||||
};
|
||||
};
|
||||
|
||||
@@ -352,7 +409,7 @@ export const mlStatusSubscribe = (onChange: () => void): (() => void) => {
|
||||
* asynchronous tasks that are needed to get the status.
|
||||
*/
|
||||
export const mlStatusSnapshot = (): MLStatus | undefined => {
|
||||
const result = _mlStatusSnapshot;
|
||||
const result = _state.mlStatusSnapshot;
|
||||
// We don't have it yet, trigger an update.
|
||||
if (!result) triggerStatusUpdate();
|
||||
return result;
|
||||
@@ -369,15 +426,15 @@ const updateMLStatusSnapshot = async () =>
|
||||
setMLStatusSnapshot(await getMLStatus());
|
||||
|
||||
const setMLStatusSnapshot = (snapshot: MLStatus) => {
|
||||
_mlStatusSnapshot = snapshot;
|
||||
_mlStatusListeners.forEach((l) => l());
|
||||
_state.mlStatusSnapshot = snapshot;
|
||||
_state.mlStatusListeners.forEach((l) => l());
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the current state of the ML subsystem.
|
||||
*/
|
||||
const getMLStatus = async (): Promise<MLStatus> => {
|
||||
if (!_isMLEnabled) return { phase: "disabled" };
|
||||
if (!_state.isMLEnabled) return { phase: "disabled" };
|
||||
|
||||
const { indexedCount, indexableCount } = await indexableAndIndexedCounts();
|
||||
|
||||
@@ -409,8 +466,11 @@ const getMLStatus = async (): Promise<MLStatus> => {
|
||||
const setInterimScheduledStatus = () => {
|
||||
let nSyncedFiles = 0,
|
||||
nTotalFiles = 0;
|
||||
if (_mlStatusSnapshot && _mlStatusSnapshot.phase != "disabled") {
|
||||
({ nSyncedFiles, nTotalFiles } = _mlStatusSnapshot);
|
||||
if (
|
||||
_state.mlStatusSnapshot &&
|
||||
_state.mlStatusSnapshot.phase != "disabled"
|
||||
) {
|
||||
({ nSyncedFiles, nTotalFiles } = _state.mlStatusSnapshot);
|
||||
}
|
||||
setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles });
|
||||
};
|
||||
@@ -444,25 +504,52 @@ export const unidentifiedFaceIDs = async (
|
||||
return index?.faces.map((f) => f.faceID) ?? [];
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract the ID of the {@link EnteFile} to which a face belongs from its ID.
|
||||
*/
|
||||
const fileIDFromFaceID = (faceID: string) => {
|
||||
const fileID = parseInt(faceID.split("_")[0] ?? "");
|
||||
if (isNaN(fileID)) {
|
||||
assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
|
||||
return undefined;
|
||||
}
|
||||
return fileID;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return the cached face crop for the given face, regenerating it if needed.
|
||||
*
|
||||
* @param faceID The id of the face whose face crop we want.
|
||||
*
|
||||
* @param enteFile The {@link EnteFile} that contains this face.
|
||||
*/
|
||||
export const faceCrop = async (faceID: string, enteFile: EnteFile) => {
|
||||
let inFlight = _state.inFlightFaceCropRegens.get(enteFile.id);
|
||||
|
||||
if (!inFlight) {
|
||||
inFlight = regenerateFaceCropsIfNeeded(enteFile);
|
||||
_state.inFlightFaceCropRegens.set(enteFile.id, inFlight);
|
||||
}
|
||||
|
||||
await inFlight;
|
||||
|
||||
const cache = await blobCache("face-crops");
|
||||
return cache.get(faceID);
|
||||
};
|
||||
|
||||
/**
|
||||
* Check to see if any of the faces in the given file do not have a face crop
|
||||
* present locally. If so, then regenerate the face crops for all the faces in
|
||||
* the file (updating the "face-crops" {@link BlobCache}).
|
||||
*
|
||||
* @returns true if one or more face crops were regenerated; false otherwise.
|
||||
*/
|
||||
export const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
|
||||
const regenerateFaceCropsIfNeeded = async (enteFile: EnteFile) => {
|
||||
const index = await faceIndex(enteFile.id);
|
||||
if (!index) return false;
|
||||
if (!index) return;
|
||||
|
||||
const faceIDs = index.faces.map((f) => f.faceID);
|
||||
const cache = await blobCache("face-crops");
|
||||
for (const id of faceIDs) {
|
||||
if (!(await cache.has(id))) {
|
||||
await regenerateFaceCrops(enteFile, index);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
const faceIDs = index.faces.map((f) => f.faceID);
|
||||
let needsRegen = false;
|
||||
for (const id of faceIDs) if (!(await cache.has(id))) needsRegen = true;
|
||||
|
||||
return false;
|
||||
if (needsRegen) await regenerateFaceCrops(enteFile, index);
|
||||
};
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
import type { EnteFile } from "../../types/file";
|
||||
|
||||
export interface Person {
|
||||
id: number;
|
||||
name?: string;
|
||||
files: number[];
|
||||
displayFaceId: string;
|
||||
displayFaceID: string;
|
||||
displayFaceFile: EnteFile;
|
||||
}
|
||||
|
||||
// Forced disable clustering. It doesn't currently work.
|
||||
|
||||
132
web/packages/new/photos/services/user-entity.ts
Normal file
132
web/packages/new/photos/services/user-entity.ts
Normal file
@@ -0,0 +1,132 @@
|
||||
import { decryptMetadataBytes } from "@/base/crypto/ente";
|
||||
import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
|
||||
import { apiURL } from "@/base/origins";
|
||||
import { z } from "zod";
|
||||
|
||||
/**
|
||||
* User entities are predefined lists of otherwise arbitrary data that the user
|
||||
* can store for their account.
|
||||
*
|
||||
* e.g. location tags, people in their photos.
|
||||
*/
|
||||
export type EntityType =
|
||||
| "person"
|
||||
/**
|
||||
* A new version of the Person entity where the data is gzipped before
|
||||
* encryption.
|
||||
*/
|
||||
| "person_v2";
|
||||
|
||||
/**
|
||||
* The maximum number of items to fetch in a single diff
|
||||
*
|
||||
* [Note: Limit of returned items in /diff requests]
|
||||
*
|
||||
* The various GET /diff API methods, which tell the client what all has changed
|
||||
* since a timestamp (provided by the client) take a limit parameter.
|
||||
*
|
||||
* These diff API calls return all items whose updated at is greater
|
||||
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
|
||||
* pagination of items which have the exact same updated at.
|
||||
*
|
||||
* Conceptually, it may happen that there are more items than the limit we've
|
||||
* provided, but there are practical safeguards.
|
||||
*
|
||||
* For file diff, the limit is advisory, and remote may return less, equal or
|
||||
* more items than the provided limit. The scenario where it returns more is
|
||||
* when more files than the limit have the same updated at. Theoretically it
|
||||
* would make the diff response unbounded, however in practice file
|
||||
* modifications themselves are all batched. Even if the user were to select all
|
||||
* the files in their library and updates them all in one go in the UI, their
|
||||
* client app is required to use batched API calls to make those updates, and
|
||||
* each of those batches would get distinct updated at.
|
||||
*/
|
||||
const defaultDiffLimit = 500;
|
||||
|
||||
/**
|
||||
* A generic user entity.
|
||||
*
|
||||
* This is an intermediate step, usually what we really want is a version
|
||||
* of this with the {@link data} parsed to the specific type of JSON object
|
||||
* expected to be associated with this entity type.
|
||||
*/
|
||||
interface UserEntity {
|
||||
/**
|
||||
* Arbitrary data associated with the entity. The format of this data is
|
||||
* specific to each entity type.
|
||||
*
|
||||
* This will not be present for entities that have been deleted on remote.
|
||||
*/
|
||||
data: Uint8Array | undefined;
|
||||
/**
|
||||
* Epoch microseconds denoting when this entity was created or last updated.
|
||||
*/
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
const RemoteUserEntity = z.object({
|
||||
/** Base64 string containing the encrypted contents of the entity. */
|
||||
encryptedData: z.string(),
|
||||
/** Base64 string containing the decryption header. */
|
||||
header: z.string(),
|
||||
isDeleted: z.boolean(),
|
||||
updatedAt: z.number(),
|
||||
});
|
||||
|
||||
/**
|
||||
* Fetch all user entities of the given type that have been created or updated
|
||||
* since the given time.
|
||||
*
|
||||
* @param type The type of the entities to fetch.
|
||||
*
|
||||
* @param sinceTime Epoch milliseconds. This is used to ask remote to provide us
|
||||
* only entities whose {@link updatedAt} is more than the given value. Set this
|
||||
* to zero to start from the beginning.
|
||||
*
|
||||
* @param entityKeyB64 The base64 encoded key to use for decrypting the
|
||||
* encrypted contents of the user entity.
|
||||
*/
|
||||
export const userEntityDiff = async (
|
||||
type: EntityType,
|
||||
sinceTime: number,
|
||||
entityKeyB64: string,
|
||||
): Promise<UserEntity[]> => {
|
||||
const decrypt = (dataB64: string, headerB64: string) =>
|
||||
decryptMetadataBytes(dataB64, headerB64, entityKeyB64);
|
||||
|
||||
const params = new URLSearchParams({
|
||||
type,
|
||||
sinceTime: sinceTime.toString(),
|
||||
limit: defaultDiffLimit.toString(),
|
||||
});
|
||||
const url = await apiURL(`/user-entity/entity/diff`);
|
||||
const res = await fetch(`${url}?${params.toString()}`, {
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
});
|
||||
ensureOk(res);
|
||||
const entities = z
|
||||
.object({ diff: z.array(RemoteUserEntity) })
|
||||
.parse(await res.json()).diff;
|
||||
return Promise.all(
|
||||
entities.map(
|
||||
async ({ encryptedData, header, isDeleted, updatedAt }) => ({
|
||||
data: isDeleted
|
||||
? undefined
|
||||
: await decrypt(encryptedData, header),
|
||||
updatedAt,
|
||||
}),
|
||||
),
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetch all Person entities that have been created or updated since the last
|
||||
* time we checked.
|
||||
*/
|
||||
export const personDiff = async (entityKeyB64: string) => {
|
||||
const entities = await userEntityDiff("person", 0, entityKeyB64);
|
||||
return entities.map(({ data }) => {
|
||||
if (!data) return undefined;
|
||||
return JSON.parse(new TextDecoder().decode(data)) as unknown;
|
||||
});
|
||||
};
|
||||
Reference in New Issue
Block a user