From 9d5960c6fe693a63231e33c707fcfbc74e25d24e Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 13 May 2025 18:24:46 +0530 Subject: [PATCH] [desktop] Incorporate mldata version check --- web/packages/gallery/services/file-data.ts | 29 +++++++++++++++++++ .../new/photos/services/ml/ml-data.ts | 29 +++++++++++++++---- web/packages/new/photos/services/ml/worker.ts | 3 +- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/web/packages/gallery/services/file-data.ts b/web/packages/gallery/services/file-data.ts index 966ec00363..68d0b6a8c5 100644 --- a/web/packages/gallery/services/file-data.ts +++ b/web/packages/gallery/services/file-data.ts @@ -7,6 +7,7 @@ import { } from "ente-base/http"; import { apiURL } from "ente-base/origins"; import type { EnteFile } from "ente-media/file"; +import { nullToUndefined } from "ente-utils/transform"; import { z } from "zod"; /** @@ -43,6 +44,29 @@ const RemoteFileData = z.object({ * crypto layer. */ decryptionHeader: z.string(), + /** + * The epoch microseconds when this file data entry was last upserted. + * + * [Note: PUT "mldata" version check] + * + * When PUT-ting mldata onto remote, the client is expected to pass the + * updated at of the existing {@link RemoteFileData} which it is updating + * (this field), or 0 if the client is creating a new entity. + * + * This allows remote to detect and reject cases where the client is trying + * to overwrite a version it hasn't yet pulled. + * + * About the optionality of this field: Newer museums are expected to always + * provide the {@link updatedAt} in the response, but for ease of self + * hosters we don't take a hard dependency on the latest museum and instead + * allow this field to be optional. When it is not present, effectively + * we'll pass 0 as {@link lastUpdatedAt} in the "mldata" PUT API call, but + * since it's an old museum it'll anyway ignore it. + * + * > This note was added May 2025, and the optionality can be removed in a + * > few months when museums should've updated (tag: Migration). + */ + updatedAt: z.number().nullish().transform(nullToUndefined), }); type RemoteFileData = z.infer; @@ -257,11 +281,15 @@ export const syncUpdatedFileDataFileIDs = async ( * * @param data The binary data to upload. The exact contents of the data are * {@link type} specific. + * + * @param lastUpdatedAt The {@link updatedAt} of the {@link RemoteFileData} + * which we are updating, or 0 to indicate a new entity. */ export const putFileData = async ( file: EnteFile, type: FileDataType, data: Uint8Array, + lastUpdatedAt: number, ) => { const { encryptedData, decryptionHeader } = await encryptBlobB64( data, @@ -276,6 +304,7 @@ export const putFileData = async ( type, encryptedData, decryptionHeader, + lastUpdatedAt, }), }); ensureOk(res); diff --git a/web/packages/new/photos/services/ml/ml-data.ts b/web/packages/new/photos/services/ml/ml-data.ts index f2257369ac..228c915a88 100644 --- a/web/packages/new/photos/services/ml/ml-data.ts +++ b/web/packages/new/photos/services/ml/ml-data.ts @@ -56,6 +56,8 @@ import { type RemoteFaceIndex } from "./face"; export interface RemoteMLData { raw: RawRemoteMLData; parsed: ParsedRemoteMLData | undefined; + // See: [Note: PUT "mldata" version check] + updatedAt: number | undefined; } export type RawRemoteMLData = Record; @@ -159,7 +161,7 @@ export const fetchMLData = async ( const result = new Map(); for (const remoteFileData of remoteFileDatas) { - const { fileID } = remoteFileData; + const { fileID, updatedAt } = remoteFileData; const file = filesByID.get(fileID); if (!file) { log.warn(`Ignoring ML data for unknown file id ${fileID}`); @@ -173,7 +175,10 @@ export const fetchMLData = async ( // @ts-ignore const decryptedBytes = await decryptBlob(remoteFileData, file.key); const jsonString = await gunzip(decryptedBytes); - result.set(fileID, remoteMLDataFromJSONString(jsonString)); + result.set( + fileID, + remoteMLDataFromJSONString(jsonString, updatedAt), + ); } catch (e) { // This shouldn't happen. Best guess is that some client has // uploaded a corrupted ML index. Ignore it so that it gets @@ -185,7 +190,10 @@ export const fetchMLData = async ( return result; }; -const remoteMLDataFromJSONString = (jsonString: string) => { +const remoteMLDataFromJSONString = ( + jsonString: string, + updatedAt: number | undefined, +) => { const raw = RawRemoteMLData.parse(JSON.parse(jsonString)); const parseResult = ParsedRemoteMLData.safeParse(raw); // TODO: [Note: strict mode migration] @@ -199,7 +207,7 @@ const remoteMLDataFromJSONString = (jsonString: string) => { const parsed = parseResult.success ? (parseResult.data as ParsedRemoteMLData) : undefined; - return { raw, parsed }; + return { raw, parsed, updatedAt }; }; /** @@ -214,5 +222,14 @@ const remoteMLDataFromJSONString = (jsonString: string) => { * * See: [Note: Preserve unknown ML data fields]. */ -export const putMLData = async (file: EnteFile, mlData: RawRemoteMLData) => - putFileData(file, "mldata", await gzip(JSON.stringify(mlData))); +export const putMLData = async ( + file: EnteFile, + mlData: RawRemoteMLData, + lastUpdatedAt: number, +) => + putFileData( + file, + "mldata", + await gzip(JSON.stringify(mlData)), + lastUpdatedAt, + ); diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 5609c0c67e..7a82cff193 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -614,7 +614,8 @@ const index = async ( log.debug(() => ["Uploading ML data", rawMLData]); try { - await putMLData(file, rawMLData); + const lastUpdatedAt = remoteMLData?.updatedAt ?? 0; + await putMLData(file, rawMLData, lastUpdatedAt); } catch (e) { // See: [Note: Transient and permanent indexing failures] if (isHTTP4xxError(e)) await markIndexingFailed(fileID);