Split
This commit is contained in:
112
web/packages/new/photos/services/file-data.ts
Normal file
112
web/packages/new/photos/services/file-data.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { encryptFileEmbedding } from "@/base/crypto/ente";
|
||||
import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
|
||||
import { apiURL } from "@/base/origins";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { z } from "zod";
|
||||
|
||||
/**
|
||||
* [Note: Derived embeddings model]
|
||||
*
|
||||
* The API endpoints related to embeddings and are parameterized by a "model"
|
||||
* enum. This is a bit of misnomer, since the contents of the payload are not
|
||||
* just the raw embeddings themselves, but also additional data generated by the
|
||||
* ML model.
|
||||
*
|
||||
* For example, the face indexing process generates multiple face embeddings per
|
||||
* file, each with an associated detection box. So instead of storing just a
|
||||
* singular embedding, the data is an entire face index structure containing
|
||||
* multiple embeddings and associated data.
|
||||
*
|
||||
* Further down, it was realized that the fan out caused on remote when trying
|
||||
* to fetch both CLIP and face embeddings was problematic, and also that their
|
||||
* raw JSON was unnecessarily big. To deal with these better, we now have a
|
||||
* single "derived" model type, whose data is a gzipped map of the form:
|
||||
*
|
||||
* {
|
||||
* "face": ... the face indexing result ...
|
||||
* "clip": ... the CLIP indexing result ...
|
||||
* ... more in the future ...
|
||||
* }
|
||||
*/
|
||||
type FileDataType = "mldata";
|
||||
|
||||
const RemoteFileData = z.object({
|
||||
/** The ID of the file whose embedding this is. */
|
||||
fileID: z.number(),
|
||||
/**
|
||||
* Base64 representation of the encrypted (model specific) embedding JSON.
|
||||
*/
|
||||
encryptedData: z.string(),
|
||||
/**
|
||||
* Base64 representation of the header that should be passed when decrypting
|
||||
* {@link encryptedData}. See the {@link decryptMetadata} function in the
|
||||
* crypto layer.
|
||||
*/
|
||||
decryptionHeader: z.string(),
|
||||
});
|
||||
|
||||
type RemoteFileData = z.infer<typeof RemoteFileData>;
|
||||
|
||||
/**
|
||||
* Fetch {@link model} embeddings for the given list of files.
|
||||
*
|
||||
* @param model The {@link FileDataType} which we want.
|
||||
*
|
||||
* @param fileIDs The ids of the files for which we want the embeddings.
|
||||
*
|
||||
* @returns a list of {@link RemoteFileData} for the files which had embeddings
|
||||
* (and that remote was able to successfully retrieve). The order of this list
|
||||
* is arbitrary, and the caller should use the {@link fileID} present within the
|
||||
* {@link RemoteFileData} to associate an item in the result back to a file
|
||||
* instead of relying on the order or count of items in the result.
|
||||
*/
|
||||
export const fetchFileData = async (
|
||||
model: FileDataType,
|
||||
fileIDs: number[],
|
||||
): Promise<RemoteFileData[]> => {
|
||||
const res = await fetch(await apiURL("/files/data/fetch"), {
|
||||
method: "POST",
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
type: "mldata",
|
||||
fileIDs,
|
||||
}),
|
||||
});
|
||||
ensureOk(res);
|
||||
return z.object({ data: z.array(RemoteFileData) }).parse(await res.json())
|
||||
.data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Upload an embedding to remote.
|
||||
*
|
||||
* This function will save or update the given embedding as the latest embedding
|
||||
* associated with the given {@link enteFile} for {@link type}.
|
||||
*
|
||||
* @param enteFile {@link EnteFile} to which this embedding relates to.
|
||||
*
|
||||
* @param type The {@link FileDataType} which we are uploading.
|
||||
*
|
||||
* @param embedding The binary data the embedding. The exact contents of the
|
||||
* embedding are {@link type} specific.
|
||||
*/
|
||||
export const putFileData = async (
|
||||
enteFile: EnteFile,
|
||||
type: FileDataType,
|
||||
embedding: Uint8Array,
|
||||
) => {
|
||||
const { encryptedDataB64, decryptionHeaderB64 } =
|
||||
await encryptFileEmbedding({ data: embedding, keyB64: enteFile.key });
|
||||
|
||||
const res = await fetch(await apiURL("/files/data"), {
|
||||
method: "PUT",
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
fileID: enteFile.id,
|
||||
encryptedData: encryptedDataB64,
|
||||
decryptionHeader: decryptionHeaderB64,
|
||||
type: "mldata",
|
||||
}),
|
||||
});
|
||||
ensureOk(res);
|
||||
};
|
||||
@@ -1,57 +1,13 @@
|
||||
import { decryptFileEmbedding, encryptFileEmbedding } from "@/base/crypto/ente";
|
||||
import { authenticatedRequestHeaders, ensureOk } from "@/base/http";
|
||||
import { decryptFileEmbedding } from "@/base/crypto/ente";
|
||||
import log from "@/base/log";
|
||||
import { apiURL } from "@/base/origins";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { nullToUndefined } from "@/utils/transform";
|
||||
import { z } from "zod";
|
||||
import { fetchFileData, putFileData } from "../file-data";
|
||||
import { gunzip, gzip } from "../gzip";
|
||||
import { type RemoteCLIPIndex } from "./clip";
|
||||
import { type RemoteFaceIndex } from "./face";
|
||||
|
||||
/**
|
||||
* [Note: Derived embeddings model]
|
||||
*
|
||||
* The API endpoints related to embeddings and are parameterized by a "model"
|
||||
* enum. This is a bit of misnomer, since the contents of the payload are not
|
||||
* just the raw embeddings themselves, but also additional data generated by the
|
||||
* ML model.
|
||||
*
|
||||
* For example, the face indexing process generates multiple face embeddings per
|
||||
* file, each with an associated detection box. So instead of storing just a
|
||||
* singular embedding, the data is an entire face index structure containing
|
||||
* multiple embeddings and associated data.
|
||||
*
|
||||
* Further down, it was realized that the fan out caused on remote when trying
|
||||
* to fetch both CLIP and face embeddings was problematic, and also that their
|
||||
* raw JSON was unnecessarily big. To deal with these better, we now have a
|
||||
* single "derived" model type, whose data is a gzipped map of the form:
|
||||
*
|
||||
* {
|
||||
* "face": ... the face indexing result ...
|
||||
* "clip": ... the CLIP indexing result ...
|
||||
* ... more in the future ...
|
||||
* }
|
||||
*/
|
||||
type FileDataType = "mldata";
|
||||
|
||||
const RemoteFileData = z.object({
|
||||
/** The ID of the file whose embedding this is. */
|
||||
fileID: z.number(),
|
||||
/**
|
||||
* Base64 representation of the encrypted (model specific) embedding JSON.
|
||||
*/
|
||||
encryptedData: z.string(),
|
||||
/**
|
||||
* Base64 representation of the header that should be passed when decrypting
|
||||
* {@link encryptedData}. See the {@link decryptMetadata} function in the
|
||||
* crypto layer.
|
||||
*/
|
||||
decryptionHeader: z.string(),
|
||||
});
|
||||
|
||||
type RemoteFileData = z.infer<typeof RemoteFileData>;
|
||||
|
||||
export type RawRemoteMLData = Record<string, unknown>;
|
||||
|
||||
export type ParsedRemoteMLData = Partial<{
|
||||
@@ -224,36 +180,6 @@ const remoteMLDataFromJSONString = (jsonString: string) => {
|
||||
return { raw, parsed };
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetch {@link model} embeddings for the given list of files.
|
||||
*
|
||||
* @param model The {@link FileDataType} which we want.
|
||||
*
|
||||
* @param fileIDs The ids of the files for which we want the embeddings.
|
||||
*
|
||||
* @returns a list of {@link RemoteFileData} for the files which had embeddings
|
||||
* (and that remote was able to successfully retrieve). The order of this list
|
||||
* is arbitrary, and the caller should use the {@link fileID} present within the
|
||||
* {@link RemoteFileData} to associate an item in the result back to a file
|
||||
* instead of relying on the order or count of items in the result.
|
||||
*/
|
||||
const fetchFileData = async (
|
||||
model: FileDataType,
|
||||
fileIDs: number[],
|
||||
): Promise<RemoteFileData[]> => {
|
||||
const res = await fetch(await apiURL("/files/data/fetch"), {
|
||||
method: "POST",
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
type: "mldata",
|
||||
fileIDs,
|
||||
}),
|
||||
});
|
||||
ensureOk(res);
|
||||
return z.object({ data: z.array(RemoteFileData) }).parse(await res.json())
|
||||
.data;
|
||||
};
|
||||
|
||||
/**
|
||||
* Update the derived data stored for given {@link enteFile} on remote.
|
||||
*
|
||||
@@ -270,37 +196,3 @@ export const putMLData = async (
|
||||
enteFile: EnteFile,
|
||||
derivedData: RawRemoteMLData,
|
||||
) => putFileData(enteFile, "mldata", await gzip(JSON.stringify(derivedData)));
|
||||
|
||||
/**
|
||||
* Upload an embedding to remote.
|
||||
*
|
||||
* This function will save or update the given embedding as the latest embedding
|
||||
* associated with the given {@link enteFile} for {@link type}.
|
||||
*
|
||||
* @param enteFile {@link EnteFile} to which this embedding relates to.
|
||||
*
|
||||
* @param type The {@link FileDataType} which we are uploading.
|
||||
*
|
||||
* @param embedding The binary data the embedding. The exact contents of the
|
||||
* embedding are {@link type} specific.
|
||||
*/
|
||||
const putFileData = async (
|
||||
enteFile: EnteFile,
|
||||
type: FileDataType,
|
||||
embedding: Uint8Array,
|
||||
) => {
|
||||
const { encryptedDataB64, decryptionHeaderB64 } =
|
||||
await encryptFileEmbedding({ data: embedding, keyB64: enteFile.key });
|
||||
|
||||
const res = await fetch(await apiURL("/files/data"), {
|
||||
method: "PUT",
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
fileID: enteFile.id,
|
||||
encryptedData: encryptedDataB64,
|
||||
decryptionHeader: decryptionHeaderB64,
|
||||
type: "mldata",
|
||||
}),
|
||||
});
|
||||
ensureOk(res);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user