From a7e0c5b61d1d8365455a4291615970c677048c9a Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 12:08:32 +0530 Subject: [PATCH 01/10] Use the new server enum --- .../new/photos/services/ml/embedding.ts | 78 ++++++------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/web/packages/new/photos/services/ml/embedding.ts b/web/packages/new/photos/services/ml/embedding.ts index 18d71ed87e..6da4e765d1 100644 --- a/web/packages/new/photos/services/ml/embedding.ts +++ b/web/packages/new/photos/services/ml/embedding.ts @@ -10,31 +10,24 @@ import { type RemoteCLIPIndex } from "./clip"; import { type RemoteFaceIndex } from "./face"; /** - * The embeddings that we (the current client) knows how to handle. + * [Note: Derived embeddings and other metadata] * - * This is an exhaustive set of values we pass when GET-ing or PUT-ting - * encrypted embeddings from remote. However, we should be prepared to receive a - * {@link RemoteEmbedding} with a model value different from these. + * The APIs they deal with derived data started in a ML context, and would store + * embeddings generated by particular models. Thus the API endpoints use the + * name "embedding", and are parameterized by a "model" enum. * - * [Note: Embedding/model vs derived data] + * Next step in the evolution was that instead of just storing the embedding, + * the code also started storing various additional data generated by the ML + * model. For example, the face indexing process generates multiple face + * embeddings per file, each with an associated detection box. So instead of + * storing just a singular embedding, the data that got stored was this entire + * face index structure containing multiple embeddings and associated data. * - * Historically, this has been called an "embedding" or a "model" in the API - * terminology. However, it is more like derived data. - * - * It started off being called as "model", but strictly speaking it was not just - * the model, but the embedding produced by a particular ML model when used with - * a particular set of pre-/post- processing steps and related hyperparameters. - * It is better thought of as an "type" of embedding produced or consumed by the - * client, e.g. the "face" embedding, or the "clip" embedding. - * - * Even the word embedding is a synedoche, since it might have other data. For - * example, for faces, it in not just the face embedding, but also the detection - * regions, landmarks etc: What we've come to refer as the "face index" in our - * client code terminology. - * - * Later on, to avoid the proliferation of small files (one per embedding), we - * combined all these embeddings into a single "embedding", which is a map of - * the form: + * Further down, it was realized that the fan out caused on remote when trying + * to fetch all derived data - both ML ("clip", "face") and non-ML ("exif") - + * was problematic, and also their raw JSON was unnecessarily big. To deal with + * these better, we now have a single "derived" model type, whose data is a + * gzipped map of the form: * * { * "face": ... the face indexing result ... @@ -42,26 +35,12 @@ import { type RemoteFaceIndex } from "./face"; * "exif": ... the Exif extracted from the file ... * ... more in the future ... * } - * - * Thus, now this is best thought of a tag for a particular format of encoding - * all the derived data associated with a file. */ -// TODO-ML: Fix name to "combined" before release -const wipModelName = process.env.NEXT_PUBLIC_ENTE_ENABLE_WIP_ML_DONT_USE ?? ""; -// type EmbeddingModel = "xxxx-xxxx" /* Combined format */; -type EmbeddingModel = string; // "xxxx-xxxx" /* Combined format */; +type EmbeddingModel = "derived"; const RemoteEmbedding = z.object({ /** The ID of the file whose embedding this is. */ fileID: z.number(), - /** - * The embedding "type". - * - * This can be an arbitrary string since there might be models the current - * client does not know about; we limit our interactions to values that are - * one of {@link EmbeddingModel}. - */ - model: z.string(), /** * Base64 representation of the encrypted (model specific) embedding JSON. */ @@ -84,7 +63,7 @@ export type ParsedRemoteDerivedData = Partial<{ }>; /** - * The decrypted payload of a {@link RemoteEmbedding} for the "combined" + * The decrypted payload of a {@link RemoteEmbedding} for the "derived" * {@link EmbeddingModel}. * * [Note: Preserve unknown derived data fields] @@ -202,8 +181,7 @@ const ParsedRemoteDerivedData = z.object({ export const fetchDerivedData = async ( filesByID: Map, ): Promise> => { - // TODO-ML: Fix name to "combined" before release - const remoteEmbeddings = await fetchEmbeddings(wipModelName, [ + const remoteEmbeddings = await fetchEmbeddings("derived", [ ...filesByID.keys(), ]); @@ -212,7 +190,7 @@ export const fetchDerivedData = async ( const { fileID } = remoteEmbedding; const file = filesByID.get(fileID); if (!file) { - log.warn(`Ignoring derived data for unknown fileID ${fileID}`); + log.warn(`Ignoring derived data for unknown file id ${fileID}`); continue; } @@ -225,13 +203,13 @@ export const fetchDerivedData = async ( const jsonString = await gunzip(decryptedBytes); result.set(fileID, remoteDerivedDataFromJSONString(jsonString)); } catch (e) { - // This shouldn't happen. Likely some client has uploaded a - // corrupted embedding. Ignore it so that it gets reindexed and - // uploaded correctly again. - log.warn(`Ignoring unparseable embedding for ${fileID}`, e); + // This shouldn't happen. Best guess is that some client has + // uploaded a corrupted embedding. Ignore it so that it gets + // reindexed and uploaded correctly again. + log.warn(`Ignoring unparseable embedding for file id ${fileID}`, e); } } - log.debug(() => `Fetched ${result.size} combined embeddings`); + log.debug(() => `Fetched derived data for ${result.size} files`); return result; }; @@ -295,13 +273,7 @@ const fetchEmbeddings = async ( export const putDerivedData = async ( enteFile: EnteFile, derivedData: RawRemoteDerivedData, -) => - // TODO-ML: Fix name to "combined" before release - putEmbedding( - enteFile, - wipModelName, - await gzip(JSON.stringify(derivedData)), - ); +) => putEmbedding(enteFile, "derived", await gzip(JSON.stringify(derivedData))); /** * Upload an embedding to remote. From b61e4f4ac6d4427ac3964eab11a8be3e4b56cc00 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 13:55:37 +0530 Subject: [PATCH 02/10] Integrate exif --- web/packages/new/photos/services/ml/worker.ts | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index b2cda8170f..d19c4e221d 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -315,7 +315,7 @@ const syncWithLocalFilesAndGetFilesToIndex = async ( * * So this function also does things that are not related to ML and/or indexing: * - * - Extracting and updating Exif. + * - Extracting Exif. * - Saving face crops. * * --- @@ -368,6 +368,12 @@ const index = async ( const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face; const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip; + // exif is expected to be a JSON object in the shape of RawExifTags, but + // this function don't care what's inside it and can just treat it as an + // opaque blob. + const existingExif = remoteDerivedData?.raw.exif; + const hasExistingExif = existingExif !== undefined && existingExif !== null; + let existingFaceIndex: FaceIndex | undefined; if ( existingRemoteFaceIndex && @@ -389,18 +395,14 @@ const index = async ( // See if we already have all the derived data fields that we need. If so, // just update our local db and return. - if ( - existingFaceIndex && - existingCLIPIndex && - !process.env.NEXT_PUBLIC_ENTE_ENABLE_WIP_ML_DONT_USE /* TODO-ML: WIP */ - ) { + if (existingFaceIndex && existingCLIPIndex && hasExistingExif) { try { await saveIndexes( { fileID, ...existingFaceIndex }, { fileID, ...existingCLIPIndex }, ); } catch (e) { - log.error(`Failed to save indexes data for ${f}`, e); + log.error(`Failed to save indexes for ${f}`, e); throw e; } return; @@ -419,8 +421,9 @@ const index = async ( image = await imageBitmapAndData(renderableBlob); } catch (e) { // If we cannot get the raw image data for the file, then retrying again - // won't help. It'd only make sense to retry later if modify - // `renderableBlob` to be do something different for this type of file. + // won't help (if in the future we enhance the underlying code for + // `indexableBlobs` to handle this failing type we can trigger a + // reindexing attempt for failed files). // // See: [Note: Transient and permanent indexing failures] log.error(`Failed to get image data for indexing ${f}`, e); @@ -439,7 +442,8 @@ const index = async ( [faceIndex, clipIndex, exif] = await Promise.all([ existingFaceIndex ?? indexFaces(enteFile, image, electron), existingCLIPIndex ?? indexCLIP(image, electron), - originalBlob ? extractRawExif(originalBlob) : undefined, + existingExif ?? + (originalBlob ? extractRawExif(originalBlob) : undefined), ]); } catch (e) { // See: [Note: Transient and permanent indexing failures] @@ -453,9 +457,8 @@ const index = async ( const msg = []; if (!existingFaceIndex) msg.push(`${faceIndex.faces.length} faces`); if (!existingCLIPIndex) msg.push("clip"); - if (exif) - return ["exif", exif]; // TODO: Exif - else return `Indexed ${msg.join(" and ")} in ${f} (${ms} ms)`; + if (!hasExistingExif && originalBlob) msg.push("exif"); + return `Indexed ${msg.join(" and ")} in ${f} (${ms} ms)`; }); const remoteFaceIndex = existingRemoteFaceIndex ?? { @@ -479,6 +482,7 @@ const index = async ( ...existingRawDerivedData, face: remoteFaceIndex, clip: remoteCLIPIndex, + exif, }; log.debug(() => ["Uploading derived data", rawDerivedData]); From bae717dc69ab8a933556199d774d6a7e80285d8a Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 14:19:17 +0530 Subject: [PATCH 03/10] Prepare for internal users --- web/packages/new/photos/services/feature-flags.ts | 10 +++------- web/packages/new/photos/services/ml/index.ts | 12 ++++++------ web/packages/new/photos/services/ml/people.ts | 3 +-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/web/packages/new/photos/services/feature-flags.ts b/web/packages/new/photos/services/feature-flags.ts index fb48086d46..1f2e4329bc 100644 --- a/web/packages/new/photos/services/feature-flags.ts +++ b/web/packages/new/photos/services/feature-flags.ts @@ -1,4 +1,3 @@ -import { isDevBuild } from "@/base/env"; import { authenticatedRequestHeaders, ensureOk } from "@/base/http"; import { localUser } from "@/base/local-user"; import log from "@/base/log"; @@ -104,16 +103,13 @@ const remoteFeatureFlagsFetchingIfNeeded = async () => { /** * Return `true` if the current user is marked as an "internal" user. * - * 1. Everyone is considered as an internal user in dev builds. - * 2. Emails that end in `@ente.io` are always considered as internal users. - * 3. If the "internalUser" remote feature flag is set, the user is internal. - * 4. Otherwise false. + * 1. Emails that end in `@ente.io` are considered as internal users. + * 2. If the "internalUser" remote feature flag is set, the user is internal. + * 3. Otherwise false. * * See also: [Note: Feature Flags]. */ export const isInternalUser = async () => { - if (isDevBuild) return true; - const user = localUser(); if (user?.email.endsWith("@ente.io")) return true; diff --git a/web/packages/new/photos/services/ml/index.ts b/web/packages/new/photos/services/ml/index.ts index 91144b4f91..c3432b1023 100644 --- a/web/packages/new/photos/services/ml/index.ts +++ b/web/packages/new/photos/services/ml/index.ts @@ -11,7 +11,7 @@ import { FileType } from "@/media/file-type"; import type { EnteFile } from "@/new/photos/types/file"; import { throttled } from "@/utils/promise"; import { proxy } from "comlink"; -import { isBetaUser, isInternalUser } from "../feature-flags"; +import { isInternalUser } from "../feature-flags"; import { getRemoteFlag, updateRemoteFlag } from "../remote-store"; import type { UploadItem } from "../upload/types"; import { regenerateFaceCrops } from "./crop"; @@ -97,17 +97,17 @@ export const terminateMLWorker = () => { * * ML currently only works when we're running in our desktop app. */ -// TODO-ML: -export const isMLSupported = - isDesktop && process.env.NEXT_PUBLIC_ENTE_ENABLE_WIP_ML_DONT_USE; +export const isMLSupported = isDesktop; /** + * TODO-ML: This will not be needed when we move to a public beta. * Was this someone who might've enabled the beta ML? If so, show them the * coming back soon banner while we finalize it. - * TODO-ML: */ export const canEnableML = async () => - (await isInternalUser()) || (await isBetaUser()); + // TODO-ML: The interim condition should be + // isDevBuild || (await isInternalUser()) || (await isBetaUser()); + await isInternalUser(); /** * Initialize the ML subsystem if the user has enabled it in preferences. diff --git a/web/packages/new/photos/services/ml/people.ts b/web/packages/new/photos/services/ml/people.ts index 97118ea94a..90e1314571 100644 --- a/web/packages/new/photos/services/ml/people.ts +++ b/web/packages/new/photos/services/ml/people.ts @@ -5,8 +5,7 @@ export interface Person { displayFaceId: string; } -// TODO-ML(MR): Forced disable clustering. It doesn't currently work, -// need to finalize it before we move out of beta. +// Forced disable clustering. It doesn't currently work. // // > Error: Failed to execute 'transferToImageBitmap' on // > 'OffscreenCanvas': ImageBitmap construction failed From b121daa607f6aa234e4276e74e2693aff0b18cc7 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 14:40:11 +0530 Subject: [PATCH 04/10] Trace --- web/packages/new/photos/services/exif.ts | 43 ++++++++++++++++--- web/packages/new/photos/services/ml/blob.ts | 20 ++++----- web/packages/new/photos/services/ml/worker.ts | 13 ++++-- 3 files changed, 56 insertions(+), 20 deletions(-) diff --git a/web/packages/new/photos/services/exif.ts b/web/packages/new/photos/services/exif.ts index e3bc7a221a..ee41a0127c 100644 --- a/web/packages/new/photos/services/exif.ts +++ b/web/packages/new/photos/services/exif.ts @@ -6,31 +6,62 @@ import { type ParsedMetadataDate, } from "@/media/file-metadata"; import ExifReader from "exifreader"; +import type { EnteFile } from "../types/file"; import type { ParsedExtractedMetadata } from "../types/metadata"; import { isInternalUser } from "./feature-flags"; // TODO: Exif: WIP flag to inspect the migration from old to new lib. export const wipNewLib = async () => isDevBuild && (await isInternalUser()); +const cmpTsEq = (a: number | undefined | null, b: number | undefined) => { + if (!a || !b) return false; + if (a == b) return true; + if (a / 1e6 == b / 1e6) return true; + return false; +}; + export const cmpNewLib = ( oldLib: ParsedExtractedMetadata, newLib: ParsedMetadata, ) => { + const logM = (r: string) => + log.info("[exif]", r, JSON.stringify({ old: oldLib, new: newLib })); if ( - oldLib.creationTime == newLib.creationDate?.timestamp && + cmpTsEq(oldLib.creationTime, newLib.creationDate?.timestamp) && oldLib.location.latitude == newLib.location?.latitude && oldLib.location.longitude == newLib.location?.longitude ) { - if (oldLib.width == newLib.width && oldLib.height == newLib.height) - log.info("Exif migration ✅"); - else log.info("Exif migration ✅✨"); + if ( + oldLib.width == newLib.width && + oldLib.height == newLib.height && + oldLib.creationTime == newLib.creationDate?.timestamp + ) + logM("exact match"); + else logM("enhanced match"); log.debug(() => ["exif/cmp", { oldLib, newLib }]); } else { - log.info("Exif migration - Potential mismatch ❗️🚩"); - log.info({ oldLib, newLib }); + logM("potential mismatch ❗️🚩"); } }; +export const cmpNewLib2 = (enteFile: EnteFile, _exif: unknown) => { + // cast is fine here, this is just temporary debugging code. + const rawExif = _exif as RawExifTags; + const newLib = parseExif(rawExif); + const pem = { + location: { + latitude: enteFile.metadata.latitude, + longitude: enteFile.metadata.longitude, + }, + creationTime: enteFile.metadata.creationTime, + // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing + width: enteFile.w || null, + // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing + height: enteFile.h || null, + }; + cmpNewLib(pem, newLib); +}; + /** * Extract Exif and other metadata from the given file. * diff --git a/web/packages/new/photos/services/ml/blob.ts b/web/packages/new/photos/services/ml/blob.ts index e425f03911..015dc7462d 100644 --- a/web/packages/new/photos/services/ml/blob.ts +++ b/web/packages/new/photos/services/ml/blob.ts @@ -22,7 +22,7 @@ export interface IndexableBlobs { * - For live photos it will the (original) image component of the live * photo. */ - originalBlob: Blob | undefined; + originalImageBlob: Blob | undefined; /** * The original (if the browser possibly supports rendering this type of * images) or otherwise a converted JPEG blob. @@ -121,19 +121,19 @@ const indexableUploadItemBlobs = async ( electron: MLWorkerElectron, ) => { const fileType = enteFile.metadata.fileType; - let originalBlob: Blob | undefined; + let originalImageBlob: Blob | undefined; let renderableBlob: Blob; if (fileType == FileType.video) { const thumbnailData = await DownloadManager.getThumbnail(enteFile); renderableBlob = new Blob([ensure(thumbnailData)]); } else { - originalBlob = await readNonVideoUploadItem(uploadItem, electron); + originalImageBlob = await readNonVideoUploadItem(uploadItem, electron); renderableBlob = await renderableImageBlob( enteFile.metadata.title, - originalBlob, + originalImageBlob, ); } - return { originalBlob, renderableBlob }; + return { originalImageBlob, renderableBlob }; }; /** @@ -185,19 +185,19 @@ export const indexableEnteFileBlobs = async ( if (fileType == FileType.video) { const thumbnailData = await DownloadManager.getThumbnail(enteFile); return { - originalBlob: undefined, + originalImageBlob: undefined, renderableBlob: new Blob([ensure(thumbnailData)]), }; } const fileStream = await DownloadManager.getFile(enteFile); - const originalBlob = await new Response(fileStream).blob(); + const originalImageBlob = await new Response(fileStream).blob(); let renderableBlob: Blob; if (fileType == FileType.livePhoto) { const { imageFileName, imageData } = await decodeLivePhoto( enteFile.metadata.title, - originalBlob, + originalImageBlob, ); renderableBlob = await renderableImageBlob( imageFileName, @@ -206,12 +206,12 @@ export const indexableEnteFileBlobs = async ( } else if (fileType == FileType.image) { renderableBlob = await renderableImageBlob( enteFile.metadata.title, - originalBlob, + originalImageBlob, ); } else { // A layer above us should've already filtered these out. throw new Error(`Cannot index unsupported file type ${fileType}`); } - return { originalBlob, renderableBlob }; + return { originalImageBlob, renderableBlob }; }; diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index d19c4e221d..9070536da5 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -9,7 +9,7 @@ import { ensure } from "@/utils/ensure"; import { wait } from "@/utils/promise"; import { expose } from "comlink"; import downloadManager from "../download"; -import { extractRawExif } from "../exif"; +import { cmpNewLib2, extractRawExif } from "../exif"; import { getAllLocalFiles, getLocalTrashedFiles } from "../files"; import type { UploadItem } from "../upload/types"; import { @@ -410,7 +410,8 @@ const index = async ( // There is at least one derived data type that still needs to be indexed. - const { originalBlob, renderableBlob } = await indexableBlobs( + // Videos will not have an original blob whilst having a renderable blob. + const { originalImageBlob, renderableBlob } = await indexableBlobs( enteFile, uploadItem, electron, @@ -443,7 +444,9 @@ const index = async ( existingFaceIndex ?? indexFaces(enteFile, image, electron), existingCLIPIndex ?? indexCLIP(image, electron), existingExif ?? - (originalBlob ? extractRawExif(originalBlob) : undefined), + (originalImageBlob + ? extractRawExif(originalImageBlob) + : undefined), ]); } catch (e) { // See: [Note: Transient and permanent indexing failures] @@ -452,12 +455,14 @@ const index = async ( throw e; } + if (originalImageBlob) cmpNewLib2(enteFile, exif); + log.debug(() => { const ms = Date.now() - startTime; const msg = []; if (!existingFaceIndex) msg.push(`${faceIndex.faces.length} faces`); if (!existingCLIPIndex) msg.push("clip"); - if (!hasExistingExif && originalBlob) msg.push("exif"); + if (!hasExistingExif && originalImageBlob) msg.push("exif"); return `Indexed ${msg.join(" and ")} in ${f} (${ms} ms)`; }); From 30db24721abb44fa5fb56dd16b44e04a39fed1a5 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 14:46:29 +0530 Subject: [PATCH 05/10] Fix video spinner --- web/apps/photos/src/components/PhotoViewer/index.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/apps/photos/src/components/PhotoViewer/index.tsx b/web/apps/photos/src/components/PhotoViewer/index.tsx index 705e93eaaf..3e444b93df 100644 --- a/web/apps/photos/src/components/PhotoViewer/index.tsx +++ b/web/apps/photos/src/components/PhotoViewer/index.tsx @@ -310,7 +310,10 @@ function PhotoViewer(props: Iprops) { function updateExif(file: EnteFile) { if (file.metadata.fileType === FileType.video) { - setExif({ key: file.src, value: undefined }); + setExif({ + key: file.src, + value: { tags: undefined, parsed: undefined }, + }); return; } if (!file || !file.isSourceLoaded || file.conversionFailed) { From 9e2e8e4d63d8984ab0a2100eb196ec3c9d99d582 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 14:54:31 +0530 Subject: [PATCH 06/10] Fix --- web/packages/new/photos/services/exif.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/packages/new/photos/services/exif.ts b/web/packages/new/photos/services/exif.ts index ee41a0127c..7aa76272a7 100644 --- a/web/packages/new/photos/services/exif.ts +++ b/web/packages/new/photos/services/exif.ts @@ -14,9 +14,10 @@ import { isInternalUser } from "./feature-flags"; export const wipNewLib = async () => isDevBuild && (await isInternalUser()); const cmpTsEq = (a: number | undefined | null, b: number | undefined) => { + if (!a && !b) return true; if (!a || !b) return false; if (a == b) return true; - if (a / 1e6 == b / 1e6) return true; + if (Math.floor(a / 1e6) == Math.floor(b / 1e6)) return true; return false; }; From 0290991e2cb13a5bd9088d6dfd9aa347d3664586 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 15:09:10 +0530 Subject: [PATCH 07/10] DOMParser is not available in web workers Exif extraction from XMP thus fails with the following console warning Warning: DOMParser is not available. It is needed to be able to parse XMP tags Thus we need to explicitly take a dep on xmldom. --- web/apps/photos/package.json | 1 + web/packages/new/photos/services/ml/worker.ts | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/web/apps/photos/package.json b/web/apps/photos/package.json index 78f850a3c5..b881b9ac6b 100644 --- a/web/apps/photos/package.json +++ b/web/apps/photos/package.json @@ -12,6 +12,7 @@ "@ente/shared": "*", "@mui/x-date-pickers": "^5.0.0-alpha.6", "@stripe/stripe-js": "^1.13.2", + "@xmldom/xmldom": "^0.8.10", "bip39": "^3.0.4", "bs58": "^5.0.0", "chrono-node": "^2.2.6", diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 9070536da5..e50781fe20 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -7,6 +7,7 @@ import type { EnteFile } from "@/new/photos/types/file"; import { fileLogID } from "@/new/photos/utils/file"; import { ensure } from "@/utils/ensure"; import { wait } from "@/utils/promise"; +import { DOMParser } from "@xmldom/xmldom"; import { expose } from "comlink"; import downloadManager from "../download"; import { cmpNewLib2, extractRawExif } from "../exif"; @@ -94,6 +95,21 @@ export class MLWorker { // Initialize the downloadManager running in the web worker with the // user's token. It'll be used to download files to index if needed. await downloadManager.init(await ensureAuthToken()); + + // Take an explicit dependency on the DOMParser provided by xmldom so + // that webpack does not tree shake it. + // + // Normally, DOMParser is available to web code, so our Exif library + // (ExifReader) has an optional dependency on the the non-browser + // alternative DOMParser provided by @xmldom/xmldom. + // + // But window.DOMParser is not available to web workers. + // + // So we want to use the @xmldom/xmldom version. For this, we need to + // also explicitly refer to it so that webpack does not tree shake it, + // since inside ExifReader it is weakly referenced using + // `__non_webpack_require__('@xmldom/xmldom')`. + DOMParser; } /** From aad7300e4bc7c185e066f8ef44ac5db92d535bdf Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 15:41:44 +0530 Subject: [PATCH 08/10] Take 2 --- web/docs/dependencies.md | 7 +++++-- web/packages/new/photos/services/ml/worker.ts | 19 +++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/web/docs/dependencies.md b/web/docs/dependencies.md index e78b2c9907..c087e92811 100644 --- a/web/docs/dependencies.md +++ b/web/docs/dependencies.md @@ -192,8 +192,11 @@ For more details, see [translations.md](translations.md). ## Media - [ExifReader](https://github.com/mattiasw/ExifReader) is used for Exif - parsing. [piexifjs](https://github.com/hMatoba/piexifjs) is used for writing - back Exif (only supports JPEG). + parsing. We also need its optional peer dependency + [@xmldom/xmldom](https://github.com/xmldom/xmldom) since the browser's + DOMParser is not available in web workers. + [piexifjs](https://github.com/hMatoba/piexifjs) is used for writing back + Exif (only supports JPEG). - [jszip](https://github.com/Stuk/jszip) is used for reading zip files in the web code (Live photos are zip files under the hood). Note that the desktop diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index e50781fe20..86f7dc9f86 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -96,20 +96,23 @@ export class MLWorker { // user's token. It'll be used to download files to index if needed. await downloadManager.init(await ensureAuthToken()); - // Take an explicit dependency on the DOMParser provided by xmldom so - // that webpack does not tree shake it. - // // Normally, DOMParser is available to web code, so our Exif library // (ExifReader) has an optional dependency on the the non-browser // alternative DOMParser provided by @xmldom/xmldom. // // But window.DOMParser is not available to web workers. // - // So we want to use the @xmldom/xmldom version. For this, we need to - // also explicitly refer to it so that webpack does not tree shake it, - // since inside ExifReader it is weakly referenced using - // `__non_webpack_require__('@xmldom/xmldom')`. - DOMParser; + // So we need to get ExifReader to use the @xmldom/xmldom version. + // ExifReader references it using the following code: + // + // __non_webpack_require__('@xmldom/xmldom') + // + // So we need to explicitly reference it to ensure that it does not get + // tree shaken by webpack. But ensuring it is part of the bundle does + // not seem to work (for reasons I don't yet understand), so we also + // need to monkey patch it (This also ensures that it is not tree + // shaken). + globalThis.DOMParser = DOMParser; } /** From d116c5ccb1cd4ad888216c4d8c7a85d03bd144dd Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 15:57:04 +0530 Subject: [PATCH 09/10] Prune false positives for times picked from file names --- web/apps/photos/src/services/fix-exif.ts | 2 +- .../src/services/upload/uploadService.ts | 2 +- web/packages/new/photos/services/exif.ts | 30 +++++++++++-------- web/packages/new/photos/services/ml/worker.ts | 3 +- .../shared/utils/exif-old.ts} | 0 5 files changed, 21 insertions(+), 16 deletions(-) rename web/{apps/photos/src/services/exif.ts => packages/shared/utils/exif-old.ts} (100%) diff --git a/web/apps/photos/src/services/fix-exif.ts b/web/apps/photos/src/services/fix-exif.ts index 31b42495fb..4ebd9dc65f 100644 --- a/web/apps/photos/src/services/fix-exif.ts +++ b/web/apps/photos/src/services/fix-exif.ts @@ -4,12 +4,12 @@ import downloadManager from "@/new/photos/services/download"; import { EnteFile } from "@/new/photos/types/file"; import { detectFileTypeInfo } from "@/new/photos/utils/detect-type"; import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time"; +import { getParsedExifData } from "@ente/shared/utils/exif-old"; import type { FixOption } from "components/FixCreationTime"; import { changeFileCreationTime, updateExistingFilePubMetadata, } from "utils/file"; -import { getParsedExifData } from "./exif"; const EXIF_TIME_TAGS = [ "DateTimeOriginal", diff --git a/web/apps/photos/src/services/upload/uploadService.ts b/web/apps/photos/src/services/upload/uploadService.ts index a41ba6607f..d28d211cd4 100644 --- a/web/apps/photos/src/services/upload/uploadService.ts +++ b/web/apps/photos/src/services/upload/uploadService.ts @@ -31,9 +31,9 @@ import { DedicatedCryptoWorker } from "@ente/shared/crypto/internal/crypto.worke import type { B64EncryptionResult } from "@ente/shared/crypto/internal/libsodium"; import { ENCRYPTION_CHUNK_SIZE } from "@ente/shared/crypto/internal/libsodium"; import { CustomError, handleUploadError } from "@ente/shared/error"; +import { parseImageMetadata } from "@ente/shared/utils/exif-old"; import type { Remote } from "comlink"; import { addToCollection } from "services/collectionService"; -import { parseImageMetadata } from "services/exif"; import { PublicUploadProps, type LivePhotoAssets, diff --git a/web/packages/new/photos/services/exif.ts b/web/packages/new/photos/services/exif.ts index 7aa76272a7..28da2882a0 100644 --- a/web/packages/new/photos/services/exif.ts +++ b/web/packages/new/photos/services/exif.ts @@ -1,10 +1,13 @@ import { isDevBuild } from "@/base/env"; +import { nameAndExtension } from "@/base/file"; import log from "@/base/log"; import { parseMetadataDate, type ParsedMetadata, type ParsedMetadataDate, } from "@/media/file-metadata"; +import { FileType } from "@/media/file-type"; +import { parseImageMetadata } from "@ente/shared/utils/exif-old"; import ExifReader from "exifreader"; import type { EnteFile } from "../types/file"; import type { ParsedExtractedMetadata } from "../types/metadata"; @@ -45,22 +48,23 @@ export const cmpNewLib = ( } }; -export const cmpNewLib2 = (enteFile: EnteFile, _exif: unknown) => { +export const cmpNewLib2 = async ( + enteFile: EnteFile, + blob: Blob, + _exif: unknown, +) => { + const [, ext] = nameAndExtension(enteFile.metadata.title); + const oldLib = await parseImageMetadata( + new File([blob], enteFile.metadata.title), + { + fileType: FileType.image, + extension: ext ?? "", + }, + ); // cast is fine here, this is just temporary debugging code. const rawExif = _exif as RawExifTags; const newLib = parseExif(rawExif); - const pem = { - location: { - latitude: enteFile.metadata.latitude, - longitude: enteFile.metadata.longitude, - }, - creationTime: enteFile.metadata.creationTime, - // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing - width: enteFile.w || null, - // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing - height: enteFile.h || null, - }; - cmpNewLib(pem, newLib); + cmpNewLib(oldLib, newLib); }; /** diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 86f7dc9f86..ca9e4bd054 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -474,7 +474,8 @@ const index = async ( throw e; } - if (originalImageBlob) cmpNewLib2(enteFile, exif); + if (originalImageBlob) + await cmpNewLib2(enteFile, originalImageBlob, exif); log.debug(() => { const ms = Date.now() - startTime; diff --git a/web/apps/photos/src/services/exif.ts b/web/packages/shared/utils/exif-old.ts similarity index 100% rename from web/apps/photos/src/services/exif.ts rename to web/packages/shared/utils/exif-old.ts From 505e1de14c0d5f23a05895d4bfdfbc7a5e77175f Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Mon, 29 Jul 2024 16:09:33 +0530 Subject: [PATCH 10/10] lint --- web/packages/shared/utils/exif-old.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/web/packages/shared/utils/exif-old.ts b/web/packages/shared/utils/exif-old.ts index 32d36f0ead..b0041edc43 100644 --- a/web/packages/shared/utils/exif-old.ts +++ b/web/packages/shared/utils/exif-old.ts @@ -1,3 +1,8 @@ +// The code in this file is deprecated and meant to be deleted. +// +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-nocheck + import log from "@/base/log"; import { type FileTypeInfo } from "@/media/file-type"; import { NULL_LOCATION } from "@/new/photos/services/upload/types";