[desktop] Incorporate mldata version check

This commit is contained in:
Manav Rathi
2025-05-13 18:24:46 +05:30
parent 236301dc16
commit 9d5960c6fe
3 changed files with 54 additions and 7 deletions

View File

@@ -7,6 +7,7 @@ import {
} from "ente-base/http";
import { apiURL } from "ente-base/origins";
import type { EnteFile } from "ente-media/file";
import { nullToUndefined } from "ente-utils/transform";
import { z } from "zod";
/**
@@ -43,6 +44,29 @@ const RemoteFileData = z.object({
* crypto layer.
*/
decryptionHeader: z.string(),
/**
* The epoch microseconds when this file data entry was last upserted.
*
* [Note: PUT "mldata" version check]
*
* When PUT-ting mldata onto remote, the client is expected to pass the
* updated at of the existing {@link RemoteFileData} which it is updating
* (this field), or 0 if the client is creating a new entity.
*
* This allows remote to detect and reject cases where the client is trying
* to overwrite a version it hasn't yet pulled.
*
* About the optionality of this field: Newer museums are expected to always
* provide the {@link updatedAt} in the response, but for ease of self
* hosters we don't take a hard dependency on the latest museum and instead
* allow this field to be optional. When it is not present, effectively
* we'll pass 0 as {@link lastUpdatedAt} in the "mldata" PUT API call, but
* since it's an old museum it'll anyway ignore it.
*
* > This note was added May 2025, and the optionality can be removed in a
* > few months when museums should've updated (tag: Migration).
*/
updatedAt: z.number().nullish().transform(nullToUndefined),
});
type RemoteFileData = z.infer<typeof RemoteFileData>;
@@ -257,11 +281,15 @@ export const syncUpdatedFileDataFileIDs = async (
*
* @param data The binary data to upload. The exact contents of the data are
* {@link type} specific.
*
* @param lastUpdatedAt The {@link updatedAt} of the {@link RemoteFileData}
* which we are updating, or 0 to indicate a new entity.
*/
export const putFileData = async (
file: EnteFile,
type: FileDataType,
data: Uint8Array,
lastUpdatedAt: number,
) => {
const { encryptedData, decryptionHeader } = await encryptBlobB64(
data,
@@ -276,6 +304,7 @@ export const putFileData = async (
type,
encryptedData,
decryptionHeader,
lastUpdatedAt,
}),
});
ensureOk(res);

View File

@@ -56,6 +56,8 @@ import { type RemoteFaceIndex } from "./face";
export interface RemoteMLData {
raw: RawRemoteMLData;
parsed: ParsedRemoteMLData | undefined;
// See: [Note: PUT "mldata" version check]
updatedAt: number | undefined;
}
export type RawRemoteMLData = Record<string, unknown>;
@@ -159,7 +161,7 @@ export const fetchMLData = async (
const result = new Map<number, RemoteMLData>();
for (const remoteFileData of remoteFileDatas) {
const { fileID } = remoteFileData;
const { fileID, updatedAt } = remoteFileData;
const file = filesByID.get(fileID);
if (!file) {
log.warn(`Ignoring ML data for unknown file id ${fileID}`);
@@ -173,7 +175,10 @@ export const fetchMLData = async (
// @ts-ignore
const decryptedBytes = await decryptBlob(remoteFileData, file.key);
const jsonString = await gunzip(decryptedBytes);
result.set(fileID, remoteMLDataFromJSONString(jsonString));
result.set(
fileID,
remoteMLDataFromJSONString(jsonString, updatedAt),
);
} catch (e) {
// This shouldn't happen. Best guess is that some client has
// uploaded a corrupted ML index. Ignore it so that it gets
@@ -185,7 +190,10 @@ export const fetchMLData = async (
return result;
};
const remoteMLDataFromJSONString = (jsonString: string) => {
const remoteMLDataFromJSONString = (
jsonString: string,
updatedAt: number | undefined,
) => {
const raw = RawRemoteMLData.parse(JSON.parse(jsonString));
const parseResult = ParsedRemoteMLData.safeParse(raw);
// TODO: [Note: strict mode migration]
@@ -199,7 +207,7 @@ const remoteMLDataFromJSONString = (jsonString: string) => {
const parsed = parseResult.success
? (parseResult.data as ParsedRemoteMLData)
: undefined;
return { raw, parsed };
return { raw, parsed, updatedAt };
};
/**
@@ -214,5 +222,14 @@ const remoteMLDataFromJSONString = (jsonString: string) => {
*
* See: [Note: Preserve unknown ML data fields].
*/
export const putMLData = async (file: EnteFile, mlData: RawRemoteMLData) =>
putFileData(file, "mldata", await gzip(JSON.stringify(mlData)));
export const putMLData = async (
file: EnteFile,
mlData: RawRemoteMLData,
lastUpdatedAt: number,
) =>
putFileData(
file,
"mldata",
await gzip(JSON.stringify(mlData)),
lastUpdatedAt,
);

View File

@@ -614,7 +614,8 @@ const index = async (
log.debug(() => ["Uploading ML data", rawMLData]);
try {
await putMLData(file, rawMLData);
const lastUpdatedAt = remoteMLData?.updatedAt ?? 0;
await putMLData(file, rawMLData, lastUpdatedAt);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
if (isHTTP4xxError(e)) await markIndexingFailed(fileID);