[web] Finalize migration to new Exif library (#2630)
This commit is contained in:
@@ -10,12 +10,10 @@
|
||||
"@ente/eslint-config": "*",
|
||||
"@ente/shared": "*",
|
||||
"@stripe/stripe-js": "^1.13.2",
|
||||
"@xmldom/xmldom": "^0.8.10",
|
||||
"bip39": "^3.0.4",
|
||||
"bs58": "^5.0.0",
|
||||
"chrono-node": "^2.2.6",
|
||||
"debounce": "^2.0.0",
|
||||
"exifr": "^7.1.3",
|
||||
"exifreader": "^4",
|
||||
"fast-srp-hap": "^2.0.4",
|
||||
"ffmpeg-wasm": "file:./thirdparty/ffmpeg-wasm",
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import log from "@/base/log";
|
||||
import type { Location } from "@/new/photos/types/metadata";
|
||||
import type { LocationTagData } from "types/entity";
|
||||
import type { Location, LocationTagData } from "types/entity";
|
||||
|
||||
export interface City {
|
||||
city: string;
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import log from "@/base/log";
|
||||
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
|
||||
|
||||
/**
|
||||
* Try to extract a date (as epoch microseconds) from a file name by matching it
|
||||
@@ -41,6 +40,21 @@ export const tryParseEpochMicrosecondsFromFileName = (
|
||||
}
|
||||
};
|
||||
|
||||
export function validateAndGetCreationUnixTimeInMicroSeconds(dateTime: Date) {
|
||||
if (!dateTime || isNaN(dateTime.getTime())) {
|
||||
return null;
|
||||
}
|
||||
const unixTime = dateTime.getTime() * 1000;
|
||||
//ignoring dateTimeString = "0000:00:00 00:00:00"
|
||||
if (unixTime === Date.UTC(0, 0, 0, 0, 0, 0, 0) || unixTime === 0) {
|
||||
return null;
|
||||
} else if (unixTime > Date.now() * 1000) {
|
||||
return null;
|
||||
} else {
|
||||
return unixTime;
|
||||
}
|
||||
}
|
||||
|
||||
interface DateComponent<T = number> {
|
||||
year: T;
|
||||
month: T;
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { Location } from "@/new/photos/types/metadata";
|
||||
|
||||
export enum EntityType {
|
||||
LOCATION_TAG = "location",
|
||||
}
|
||||
@@ -27,6 +25,11 @@ export interface EncryptedEntity {
|
||||
userID: number;
|
||||
}
|
||||
|
||||
export interface Location {
|
||||
latitude: number | null;
|
||||
longitude: number | null;
|
||||
}
|
||||
|
||||
export interface LocationTagData {
|
||||
name: string;
|
||||
radius: number;
|
||||
|
||||
@@ -201,11 +201,8 @@ For more details, see [translations.md](translations.md).
|
||||
## Media
|
||||
|
||||
- [ExifReader](https://github.com/mattiasw/ExifReader) is used for Exif
|
||||
parsing. We also need its optional peer dependency
|
||||
[@xmldom/xmldom](https://github.com/xmldom/xmldom) since the browser's
|
||||
DOMParser is not available in web workers.
|
||||
[piexifjs](https://github.com/hMatoba/piexifjs) is used for writing back
|
||||
Exif (only supports JPEG).
|
||||
parsing. [piexifjs](https://github.com/hMatoba/piexifjs) is used for writing
|
||||
back Exif (only supports JPEG).
|
||||
|
||||
- [jszip](https://github.com/Stuk/jszip) is used for reading zip files in the
|
||||
web code (Live photos are zip files under the hood). Note that the desktop
|
||||
|
||||
@@ -1,66 +1,10 @@
|
||||
import { nameAndExtension } from "@/base/file";
|
||||
import log from "@/base/log";
|
||||
import { inWorker } from "@/base/env";
|
||||
import {
|
||||
parseMetadataDate,
|
||||
type ParsedMetadata,
|
||||
type ParsedMetadataDate,
|
||||
} from "@/media/file-metadata";
|
||||
import { FileType } from "@/media/file-type";
|
||||
import { parseImageMetadata } from "@ente/shared/utils/exif-old";
|
||||
import ExifReader from "exifreader";
|
||||
import type { EnteFile } from "../types/file";
|
||||
import type { ParsedExtractedMetadata } from "../types/metadata";
|
||||
|
||||
const cmpTsEq = (a: number | undefined | null, b: number | undefined) => {
|
||||
if (!a && !b) return true;
|
||||
if (!a || !b) return false;
|
||||
if (a == b) return true;
|
||||
if (Math.floor(a / 1e6) == Math.floor(b / 1e6)) return true;
|
||||
return false;
|
||||
};
|
||||
|
||||
export const cmpNewLib = (
|
||||
oldLib: ParsedExtractedMetadata,
|
||||
newLib: ParsedMetadata,
|
||||
) => {
|
||||
const logM = (r: string) =>
|
||||
log.info("[exif]", r, JSON.stringify({ old: oldLib, new: newLib }));
|
||||
if (
|
||||
cmpTsEq(oldLib.creationTime, newLib.creationDate?.timestamp) &&
|
||||
oldLib.location.latitude == newLib.location?.latitude &&
|
||||
oldLib.location.longitude == newLib.location?.longitude
|
||||
) {
|
||||
if (
|
||||
oldLib.width == newLib.width &&
|
||||
oldLib.height == newLib.height &&
|
||||
oldLib.creationTime == newLib.creationDate?.timestamp
|
||||
)
|
||||
logM("exact match");
|
||||
else logM("enhanced match");
|
||||
log.debug(() => ["exif/cmp", { oldLib, newLib }]);
|
||||
} else {
|
||||
logM("potential mismatch ❗️🚩");
|
||||
}
|
||||
};
|
||||
|
||||
export const cmpNewLib2 = async (
|
||||
enteFile: EnteFile,
|
||||
blob: Blob,
|
||||
_exif: unknown,
|
||||
) => {
|
||||
const [, ext] = nameAndExtension(enteFile.metadata.title);
|
||||
const oldLib = await parseImageMetadata(
|
||||
new File([blob], enteFile.metadata.title),
|
||||
{
|
||||
fileType: FileType.image,
|
||||
extension: ext ?? "",
|
||||
},
|
||||
);
|
||||
// cast is fine here, this is just temporary debugging code.
|
||||
const rawExif = _exif as RawExifTags;
|
||||
const newLib = parseExif(rawExif);
|
||||
cmpNewLib(oldLib, newLib);
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract Exif and other metadata from the given file.
|
||||
@@ -166,8 +110,6 @@ const parseDates = (tags: RawExifTags) => {
|
||||
const iptc = parseIPTCDates(tags);
|
||||
const xmp = parseXMPDates(tags);
|
||||
|
||||
log.debug(() => ["exif/dates", { exif, iptc, xmp }]);
|
||||
|
||||
return {
|
||||
DateTimeOriginal:
|
||||
valid(xmp.DateTimeOriginal) ??
|
||||
@@ -533,6 +475,17 @@ export type RawExifTags = Omit<ExifReader.ExpandedTags, "Thumbnail" | "xmp"> & {
|
||||
* to know about ExifReader specifically.
|
||||
*/
|
||||
export const extractRawExif = async (blob: Blob): Promise<RawExifTags> => {
|
||||
// The browser's DOMParser is not available in web workers. So if this
|
||||
// function gets called in from a web worker, then it would not be able to
|
||||
// parse XMP tags.
|
||||
//
|
||||
// There is a way around this problem, by also installing ExifReader's
|
||||
// optional peer dependency "@xmldom/xmldom". But since we currently have no
|
||||
// use case for calling this code in a web worker, we just abort immediately
|
||||
// to let future us know that we need to install it.
|
||||
if (inWorker())
|
||||
throw new Error("DOMParser is not available in web workers");
|
||||
|
||||
const tags = await ExifReader.load(await blob.arrayBuffer(), {
|
||||
async: true,
|
||||
expanded: true,
|
||||
|
||||
@@ -10,41 +10,10 @@ import DownloadManager from "../download";
|
||||
import type { UploadItem } from "../upload/types";
|
||||
|
||||
/**
|
||||
* A pair of blobs - the original, and a possibly converted "renderable" one -
|
||||
* for a file that we're trying to index.
|
||||
*/
|
||||
export interface IndexableBlobs {
|
||||
/**
|
||||
* The original file's data (as a {@link Blob}).
|
||||
*
|
||||
* - For images this is guaranteed to be present.
|
||||
* - For videos it will not be present.
|
||||
* - For live photos it will the (original) image component of the live
|
||||
* photo.
|
||||
*/
|
||||
originalImageBlob: Blob | undefined;
|
||||
/**
|
||||
* The original (if the browser possibly supports rendering this type of
|
||||
* images) or otherwise a converted JPEG blob.
|
||||
*
|
||||
* This blob is meant to be used to construct the {@link ImageBitmap}
|
||||
* that'll be used for further operations that need access to the RGB data
|
||||
* of the image.
|
||||
*
|
||||
* - For images this is constructed from the image.
|
||||
* - For videos this is constructed from the thumbnail.
|
||||
* - For live photos this is constructed from the image component of the
|
||||
* live photo.
|
||||
*/
|
||||
renderableBlob: Blob;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexable blobs augmented with the image bitmap and RGBA data.
|
||||
* An image bitmap and its RGBA data.
|
||||
*
|
||||
* This is data structure containing data about an image in all formats that the
|
||||
* various indexing steps need. Consolidating all the data here and parsing them
|
||||
* in one go obviates the need for each indexing step to roll their own parsing.
|
||||
* various indexing steps need.
|
||||
*/
|
||||
export interface ImageBitmapAndData {
|
||||
/**
|
||||
@@ -66,7 +35,7 @@ export interface ImageBitmapAndData {
|
||||
* Create an {@link ImageBitmap} from the given {@link imageBlob}, and return
|
||||
* both the image bitmap and its {@link ImageData}.
|
||||
*/
|
||||
export const imageBitmapAndData = async (
|
||||
export const createImageBitmapAndData = async (
|
||||
imageBlob: Blob,
|
||||
): Promise<ImageBitmapAndData> => {
|
||||
const imageBitmap = await createImageBitmap(imageBlob);
|
||||
@@ -83,15 +52,15 @@ export const imageBitmapAndData = async (
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a pair of blobs for the given data - the original, and a renderable
|
||||
* one (possibly involving a JPEG conversion).
|
||||
* Return a renderable blob (converting to JPEG if needed) for the given data.
|
||||
*
|
||||
* The blob from the relevant image component is either constructed using the
|
||||
* given {@link uploadItem} if present, otherwise it is downloaded from remote.
|
||||
*
|
||||
* - For images the original is used.
|
||||
* - For live photos the original image component is used.
|
||||
* - For videos the thumbnail is used.
|
||||
* - For images it is constructed from the image.
|
||||
* - For videos it is constructed from the thumbnail.
|
||||
* - For live photos it is constructed from the image component of the live
|
||||
* photo.
|
||||
*
|
||||
* Then, if the image blob we have seems to be something that the browser cannot
|
||||
* handle, we convert it into a JPEG blob so that it can subsequently be used to
|
||||
@@ -107,34 +76,28 @@ export const imageBitmapAndData = async (
|
||||
* witness that we're actually running in our desktop app (and thus can safely
|
||||
* call our Node.js layer for various functionality).
|
||||
*/
|
||||
export const indexableBlobs = async (
|
||||
export const fetchRenderableBlob = async (
|
||||
enteFile: EnteFile,
|
||||
uploadItem: UploadItem | undefined,
|
||||
electron: ElectronMLWorker,
|
||||
): Promise<IndexableBlobs> =>
|
||||
): Promise<Blob> =>
|
||||
uploadItem
|
||||
? await indexableUploadItemBlobs(enteFile, uploadItem, electron)
|
||||
: await indexableEnteFileBlobs(enteFile);
|
||||
? await fetchRenderableUploadItemBlob(enteFile, uploadItem, electron)
|
||||
: await fetchRenderableEnteFileBlob(enteFile);
|
||||
|
||||
const indexableUploadItemBlobs = async (
|
||||
const fetchRenderableUploadItemBlob = async (
|
||||
enteFile: EnteFile,
|
||||
uploadItem: UploadItem,
|
||||
electron: ElectronMLWorker,
|
||||
) => {
|
||||
const fileType = enteFile.metadata.fileType;
|
||||
let originalImageBlob: Blob | undefined;
|
||||
let renderableBlob: Blob;
|
||||
if (fileType == FileType.video) {
|
||||
const thumbnailData = await DownloadManager.getThumbnail(enteFile);
|
||||
renderableBlob = new Blob([ensure(thumbnailData)]);
|
||||
return new Blob([ensure(thumbnailData)]);
|
||||
} else {
|
||||
originalImageBlob = await readNonVideoUploadItem(uploadItem, electron);
|
||||
renderableBlob = await renderableImageBlob(
|
||||
enteFile.metadata.title,
|
||||
originalImageBlob,
|
||||
);
|
||||
const blob = await readNonVideoUploadItem(uploadItem, electron);
|
||||
return renderableImageBlob(enteFile.metadata.title, blob);
|
||||
}
|
||||
return { originalImageBlob, renderableBlob };
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -173,39 +136,32 @@ const readNonVideoUploadItem = async (
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a pair of blobs for the given file - the original, and a renderable
|
||||
* one (possibly involving a JPEG conversion).
|
||||
* Return a renderable one (possibly involving a JPEG conversion) blob for the
|
||||
* given {@link EnteFile}.
|
||||
*
|
||||
* - The original will be downloaded if needed
|
||||
* - The original will be converted to JPEG if needed
|
||||
* - The original will be downloaded if needed.
|
||||
* - The original will be converted to JPEG if needed.
|
||||
*/
|
||||
export const indexableEnteFileBlobs = async (
|
||||
export const fetchRenderableEnteFileBlob = async (
|
||||
enteFile: EnteFile,
|
||||
): Promise<IndexableBlobs> => {
|
||||
): Promise<Blob> => {
|
||||
const fileType = enteFile.metadata.fileType;
|
||||
if (fileType == FileType.video) {
|
||||
const thumbnailData = await DownloadManager.getThumbnail(enteFile);
|
||||
return {
|
||||
originalImageBlob: undefined,
|
||||
renderableBlob: new Blob([ensure(thumbnailData)]),
|
||||
};
|
||||
return new Blob([ensure(thumbnailData)]);
|
||||
}
|
||||
|
||||
const fileStream = await DownloadManager.getFile(enteFile);
|
||||
const originalImageBlob = await new Response(fileStream).blob();
|
||||
|
||||
let renderableBlob: Blob;
|
||||
if (fileType == FileType.livePhoto) {
|
||||
const { imageFileName, imageData } = await decodeLivePhoto(
|
||||
enteFile.metadata.title,
|
||||
originalImageBlob,
|
||||
);
|
||||
renderableBlob = await renderableImageBlob(
|
||||
imageFileName,
|
||||
new Blob([imageData]),
|
||||
);
|
||||
return renderableImageBlob(imageFileName, new Blob([imageData]));
|
||||
} else if (fileType == FileType.image) {
|
||||
renderableBlob = await renderableImageBlob(
|
||||
return await renderableImageBlob(
|
||||
enteFile.metadata.title,
|
||||
originalImageBlob,
|
||||
);
|
||||
@@ -213,6 +169,4 @@ export const indexableEnteFileBlobs = async (
|
||||
// A layer above us should've already filtered these out.
|
||||
throw new Error(`Cannot index unsupported file type ${fileType}`);
|
||||
}
|
||||
|
||||
return { originalImageBlob, renderableBlob };
|
||||
};
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import { blobCache } from "@/base/blob-cache";
|
||||
import { ensure } from "@/utils/ensure";
|
||||
import type { EnteFile } from "../../types/file";
|
||||
import { indexableEnteFileBlobs } from "./blob";
|
||||
import { fetchRenderableEnteFileBlob } from "./blob";
|
||||
import { type Box, type FaceIndex } from "./face";
|
||||
import { clamp } from "./math";
|
||||
|
||||
@@ -26,7 +26,7 @@ export const regenerateFaceCrops = async (
|
||||
enteFile: EnteFile,
|
||||
faceIndex: FaceIndex,
|
||||
) => {
|
||||
const { renderableBlob } = await indexableEnteFileBlobs(enteFile);
|
||||
const renderableBlob = await fetchRenderableEnteFileBlob(enteFile);
|
||||
const imageBitmap = await createImageBitmap(renderableBlob);
|
||||
|
||||
try {
|
||||
|
||||
@@ -10,29 +10,26 @@ import { type RemoteCLIPIndex } from "./clip";
|
||||
import { type RemoteFaceIndex } from "./face";
|
||||
|
||||
/**
|
||||
* [Note: Derived embeddings and other metadata]
|
||||
* [Note: Derived embeddings model]
|
||||
*
|
||||
* The APIs they deal with derived data started in a ML context, and would store
|
||||
* embeddings generated by particular models. Thus the API endpoints use the
|
||||
* name "embedding", and are parameterized by a "model" enum.
|
||||
* The API endpoints related to embeddings and are parameterized by a "model"
|
||||
* enum. This is a bit of misnomer, since the contents of the payload are not
|
||||
* just the raw embeddings themselves, but also additional data generated by the
|
||||
* ML model.
|
||||
*
|
||||
* Next step in the evolution was that instead of just storing the embedding,
|
||||
* the code also started storing various additional data generated by the ML
|
||||
* model. For example, the face indexing process generates multiple face
|
||||
* embeddings per file, each with an associated detection box. So instead of
|
||||
* storing just a singular embedding, the data that got stored was this entire
|
||||
* face index structure containing multiple embeddings and associated data.
|
||||
* For example, the face indexing process generates multiple face embeddings per
|
||||
* file, each with an associated detection box. So instead of storing just a
|
||||
* singular embedding, the data is an entire face index structure containing
|
||||
* multiple embeddings and associated data.
|
||||
*
|
||||
* Further down, it was realized that the fan out caused on remote when trying
|
||||
* to fetch all derived data - both ML ("clip", "face") and non-ML ("exif") -
|
||||
* was problematic, and also their raw JSON was unnecessarily big. To deal with
|
||||
* these better, we now have a single "derived" model type, whose data is a
|
||||
* gzipped map of the form:
|
||||
* to fetch both CLIP and face embeddings was problematic, and also that their
|
||||
* raw JSON was unnecessarily big. To deal with these better, we now have a
|
||||
* single "derived" model type, whose data is a gzipped map of the form:
|
||||
*
|
||||
* {
|
||||
* "face": ... the face indexing result ...
|
||||
* "clip": ... the CLIP indexing result ...
|
||||
* "exif": ... the Exif extracted from the file ...
|
||||
* ... more in the future ...
|
||||
* }
|
||||
*/
|
||||
|
||||
@@ -8,15 +8,13 @@ import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { fileLogID } from "@/new/photos/utils/file";
|
||||
import { ensure } from "@/utils/ensure";
|
||||
import { wait } from "@/utils/promise";
|
||||
import { DOMParser } from "@xmldom/xmldom";
|
||||
import { expose, wrap } from "comlink";
|
||||
import downloadManager from "../download";
|
||||
import { cmpNewLib2, extractRawExif, type RawExifTags } from "../exif";
|
||||
import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
|
||||
import type { UploadItem } from "../upload/types";
|
||||
import {
|
||||
imageBitmapAndData,
|
||||
indexableBlobs,
|
||||
createImageBitmapAndData,
|
||||
fetchRenderableBlob,
|
||||
type ImageBitmapAndData,
|
||||
} from "./blob";
|
||||
import {
|
||||
@@ -105,24 +103,6 @@ export class MLWorker {
|
||||
// Initialize the downloadManager running in the web worker with the
|
||||
// user's token. It'll be used to download files to index if needed.
|
||||
await downloadManager.init(await ensureAuthToken());
|
||||
|
||||
// Normally, DOMParser is available to web code, so our Exif library
|
||||
// (ExifReader) has an optional dependency on the the non-browser
|
||||
// alternative DOMParser provided by @xmldom/xmldom.
|
||||
//
|
||||
// But window.DOMParser is not available to web workers.
|
||||
//
|
||||
// So we need to get ExifReader to use the @xmldom/xmldom version.
|
||||
// ExifReader references it using the following code:
|
||||
//
|
||||
// __non_webpack_require__('@xmldom/xmldom')
|
||||
//
|
||||
// So we need to explicitly reference it to ensure that it does not get
|
||||
// tree shaken by webpack. But ensuring it is part of the bundle does
|
||||
// not seem to work (for reasons I don't yet understand), so we also
|
||||
// need to monkey patch it (This also ensures that it is not tree
|
||||
// shaken).
|
||||
globalThis.DOMParser = DOMParser;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -371,19 +351,8 @@ const syncWithLocalFilesAndGetFilesToIndex = async (
|
||||
/**
|
||||
* Index file, save the persist the results locally, and put them on remote.
|
||||
*
|
||||
* [Note: ML indexing does more ML]
|
||||
*
|
||||
* Nominally, and primarily, indexing a file involves computing its various ML
|
||||
* embeddings: faces and CLIP. However, since this is a occasion where we have
|
||||
* the original file in memory, it is a great time to also compute other derived
|
||||
* data related to the file (instead of re-downloading it again).
|
||||
*
|
||||
* So this function also does things that are not related to ML and/or indexing:
|
||||
*
|
||||
* - Extracting Exif.
|
||||
* - Saving face crops.
|
||||
*
|
||||
* ---
|
||||
* Indexing a file involves computing its various ML embeddings: faces and CLIP.
|
||||
* Since we have the original file in memory, we also save the face crops.
|
||||
*
|
||||
* [Note: Transient and permanent indexing failures]
|
||||
*
|
||||
@@ -433,11 +402,6 @@ const index = async (
|
||||
const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face;
|
||||
const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip;
|
||||
|
||||
// exif is expected to be a JSON object in the shape of RawExifTags, but
|
||||
// this function don't care what's inside it and can just treat it as an
|
||||
// opaque blob.
|
||||
const existingExif = remoteDerivedData?.raw.exif;
|
||||
|
||||
let existingFaceIndex: FaceIndex | undefined;
|
||||
if (
|
||||
existingRemoteFaceIndex &&
|
||||
@@ -458,8 +422,8 @@ const index = async (
|
||||
existingCLIPIndex = { embedding };
|
||||
}
|
||||
|
||||
// See if we already have all the mandatory derived data fields. If so, just
|
||||
// update our local db and return.
|
||||
// If we already have all the derived data fields then just update our local
|
||||
// db and return.
|
||||
|
||||
if (existingFaceIndex && existingCLIPIndex) {
|
||||
try {
|
||||
@@ -476,8 +440,7 @@ const index = async (
|
||||
|
||||
// There is at least one derived data type that still needs to be indexed.
|
||||
|
||||
// Videos will not have an original blob whilst having a renderable blob.
|
||||
const { originalImageBlob, renderableBlob } = await indexableBlobs(
|
||||
const renderableBlob = await fetchRenderableBlob(
|
||||
enteFile,
|
||||
uploadItem,
|
||||
electron,
|
||||
@@ -485,7 +448,7 @@ const index = async (
|
||||
|
||||
let image: ImageBitmapAndData;
|
||||
try {
|
||||
image = await imageBitmapAndData(renderableBlob);
|
||||
image = await createImageBitmapAndData(renderableBlob);
|
||||
} catch (e) {
|
||||
// If we cannot get the raw image data for the file, then retrying again
|
||||
// won't help (if in the future we enhance the underlying code for
|
||||
@@ -501,15 +464,13 @@ const index = async (
|
||||
try {
|
||||
let faceIndex: FaceIndex;
|
||||
let clipIndex: CLIPIndex;
|
||||
let exif: unknown;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
[faceIndex, clipIndex, exif] = await Promise.all([
|
||||
[faceIndex, clipIndex] = await Promise.all([
|
||||
existingFaceIndex ?? indexFaces(enteFile, image, electron),
|
||||
existingCLIPIndex ?? indexCLIP(image, electron),
|
||||
existingExif ?? tryExtractExif(originalImageBlob, f),
|
||||
]);
|
||||
} catch (e) {
|
||||
// See: [Note: Transient and permanent indexing failures]
|
||||
@@ -518,19 +479,11 @@ const index = async (
|
||||
throw e;
|
||||
}
|
||||
|
||||
try {
|
||||
if (originalImageBlob && exif)
|
||||
await cmpNewLib2(enteFile, originalImageBlob, exif);
|
||||
} catch (e) {
|
||||
log.warn(`Skipping exif cmp for ${f}`, e);
|
||||
}
|
||||
|
||||
log.debug(() => {
|
||||
const ms = Date.now() - startTime;
|
||||
const msg = [];
|
||||
if (!existingFaceIndex) msg.push(`${faceIndex.faces.length} faces`);
|
||||
if (!existingCLIPIndex) msg.push("clip");
|
||||
if (!existingExif && originalImageBlob) msg.push("exif");
|
||||
return `Indexed ${msg.join(" and ")} in ${f} (${ms} ms)`;
|
||||
});
|
||||
|
||||
@@ -555,23 +508,17 @@ const index = async (
|
||||
...existingRawDerivedData,
|
||||
face: remoteFaceIndex,
|
||||
clip: remoteCLIPIndex,
|
||||
...(exif ? { exif } : {}),
|
||||
};
|
||||
|
||||
if (existingFaceIndex && existingCLIPIndex && !exif) {
|
||||
// If we were indexing just for exif, but exif generation didn't
|
||||
// happen, there is no need to upload.
|
||||
} else {
|
||||
log.debug(() => ["Uploading derived data", rawDerivedData]);
|
||||
log.debug(() => ["Uploading derived data", rawDerivedData]);
|
||||
|
||||
try {
|
||||
await putDerivedData(enteFile, rawDerivedData);
|
||||
} catch (e) {
|
||||
// See: [Note: Transient and permanent indexing failures]
|
||||
log.error(`Failed to put derived data for ${f}`, e);
|
||||
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
|
||||
throw e;
|
||||
}
|
||||
try {
|
||||
await putDerivedData(enteFile, rawDerivedData);
|
||||
} catch (e) {
|
||||
// See: [Note: Transient and permanent indexing failures]
|
||||
log.error(`Failed to put derived data for ${f}`, e);
|
||||
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
|
||||
throw e;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -582,7 +529,8 @@ const index = async (
|
||||
} catch (e) {
|
||||
// Not sure if DB failures should be considered permanent or
|
||||
// transient. There isn't a known case where writing to the local
|
||||
// indexedDB would fail.
|
||||
// indexedDB should systematically fail. It could fail if there was
|
||||
// no space on device, but that's eminently retriable.
|
||||
log.error(`Failed to save indexes for ${f}`, e);
|
||||
throw e;
|
||||
}
|
||||
@@ -602,34 +550,3 @@ const index = async (
|
||||
image.bitmap.close();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* A helper function that tries to extract the raw Exif, but returns `undefined`
|
||||
* if something goes wrong (or it isn't possible) instead of throwing.
|
||||
*
|
||||
* Exif extraction is not a critical item, we don't want the actual indexing to
|
||||
* fail because we were unable to extract Exif. This is not rare: one scenario
|
||||
* is if we were trying to index a file in an exotic format. The ML indexing
|
||||
* will succeed (because we convert it to a renderable blob), but the Exif
|
||||
* extraction will fail (since it needs the original blob, but the original blob
|
||||
* can be an arbitrary format).
|
||||
*
|
||||
* @param originalImageBlob A {@link Blob} containing the original data for the
|
||||
* image (or the image component of a live photo) whose Exif we're trying to
|
||||
* extract. If this is not available, we skip the extraction and return
|
||||
* `undefined`.
|
||||
*
|
||||
* @param f The {@link fileLogID} for the file this blob corresponds to.
|
||||
*/
|
||||
export const tryExtractExif = async (
|
||||
originalImageBlob: Blob | undefined,
|
||||
f: string,
|
||||
): Promise<RawExifTags | undefined> => {
|
||||
if (!originalImageBlob) return undefined;
|
||||
try {
|
||||
return await extractRawExif(originalImageBlob);
|
||||
} catch (e) {
|
||||
log.warn(`Ignoring error during Exif extraction for ${f}`, e);
|
||||
return undefined;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import type { ZipItem } from "@/base/types/ipc";
|
||||
import type { Location } from "../../types/metadata";
|
||||
|
||||
/**
|
||||
* An item to upload is one of the following:
|
||||
@@ -59,8 +58,6 @@ export const toDataOrPathOrZipEntry = (desktopUploadItem: DesktopUploadItem) =>
|
||||
|
||||
export const RANDOM_PERCENTAGE_PROGRESS_FOR_PUT = () => 90 + 10 * Math.random();
|
||||
|
||||
export const NULL_LOCATION: Location = { latitude: null, longitude: null };
|
||||
|
||||
export enum UPLOAD_STAGES {
|
||||
START,
|
||||
READING_GOOGLE_METADATA_FILES,
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
export interface Location {
|
||||
latitude: number | null;
|
||||
longitude: number | null;
|
||||
}
|
||||
|
||||
export interface ParsedExtractedMetadata {
|
||||
location: Location;
|
||||
creationTime: number | null;
|
||||
width: number | null;
|
||||
height: number | null;
|
||||
}
|
||||
@@ -22,21 +22,6 @@ export function getUnixTimeInMicroSecondsWithDelta(delta: TimeDelta): number {
|
||||
return currentDate.getTime() * 1000;
|
||||
}
|
||||
|
||||
export function validateAndGetCreationUnixTimeInMicroSeconds(dateTime: Date) {
|
||||
if (!dateTime || isNaN(dateTime.getTime())) {
|
||||
return null;
|
||||
}
|
||||
const unixTime = dateTime.getTime() * 1000;
|
||||
//ignoring dateTimeString = "0000:00:00 00:00:00"
|
||||
if (unixTime === Date.UTC(0, 0, 0, 0, 0, 0, 0) || unixTime === 0) {
|
||||
return null;
|
||||
} else if (unixTime > Date.now() * 1000) {
|
||||
return null;
|
||||
} else {
|
||||
return unixTime;
|
||||
}
|
||||
}
|
||||
|
||||
function _addDays(date: Date, days: number): Date {
|
||||
const result = new Date(date);
|
||||
result.setDate(date.getDate() + days);
|
||||
|
||||
@@ -1,340 +0,0 @@
|
||||
// The code in this file is deprecated and meant to be deleted.
|
||||
//
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-nocheck
|
||||
|
||||
import log from "@/base/log";
|
||||
import { type FileTypeInfo } from "@/media/file-type";
|
||||
import { NULL_LOCATION } from "@/new/photos/services/upload/types";
|
||||
import type {
|
||||
Location,
|
||||
ParsedExtractedMetadata,
|
||||
} from "@/new/photos/types/metadata";
|
||||
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
|
||||
import exifr from "exifr";
|
||||
|
||||
type ParsedEXIFData = Record<string, any> &
|
||||
Partial<{
|
||||
DateTimeOriginal: Date;
|
||||
CreateDate: Date;
|
||||
ModifyDate: Date;
|
||||
DateCreated: Date;
|
||||
MetadataDate: Date;
|
||||
latitude: number;
|
||||
longitude: number;
|
||||
imageWidth: number;
|
||||
imageHeight: number;
|
||||
}>;
|
||||
|
||||
type RawEXIFData = Record<string, any> &
|
||||
Partial<{
|
||||
DateTimeOriginal: string;
|
||||
CreateDate: string;
|
||||
ModifyDate: string;
|
||||
DateCreated: string;
|
||||
MetadataDate: string;
|
||||
GPSLatitude: number[];
|
||||
GPSLongitude: number[];
|
||||
GPSLatitudeRef: string;
|
||||
GPSLongitudeRef: string;
|
||||
ImageWidth: number;
|
||||
ImageHeight: number;
|
||||
}>;
|
||||
|
||||
const exifTagsNeededForParsingImageMetadata = [
|
||||
"DateTimeOriginal",
|
||||
"CreateDate",
|
||||
"ModifyDate",
|
||||
"GPSLatitude",
|
||||
"GPSLongitude",
|
||||
"GPSLatitudeRef",
|
||||
"GPSLongitudeRef",
|
||||
"DateCreated",
|
||||
"ExifImageWidth",
|
||||
"ExifImageHeight",
|
||||
"ImageWidth",
|
||||
"ImageHeight",
|
||||
"PixelXDimension",
|
||||
"PixelYDimension",
|
||||
"MetadataDate",
|
||||
];
|
||||
|
||||
/**
|
||||
* Read Exif data from an image {@link file} and use that to construct and
|
||||
* return an {@link ParsedExtractedMetadata}.
|
||||
*
|
||||
* This function is tailored for use when we upload files.
|
||||
*/
|
||||
export const parseImageMetadata = async (
|
||||
file: File,
|
||||
fileTypeInfo: FileTypeInfo,
|
||||
): Promise<ParsedExtractedMetadata> => {
|
||||
const exifData = await getParsedExifData(
|
||||
file,
|
||||
fileTypeInfo,
|
||||
exifTagsNeededForParsingImageMetadata,
|
||||
);
|
||||
|
||||
// TODO: Exif- remove me.
|
||||
log.debug(() => ["exif/old", exifData]);
|
||||
return {
|
||||
location: getEXIFLocation(exifData),
|
||||
creationTime: getEXIFTime(exifData),
|
||||
width: exifData?.imageWidth ?? null,
|
||||
height: exifData?.imageHeight ?? null,
|
||||
};
|
||||
};
|
||||
|
||||
export async function getParsedExifData(
|
||||
receivedFile: File,
|
||||
{ extension }: FileTypeInfo,
|
||||
tags?: string[],
|
||||
): Promise<ParsedEXIFData> {
|
||||
const exifLessFormats = ["gif", "bmp"];
|
||||
const exifrUnsupportedFileFormatMessage = "Unknown file format";
|
||||
|
||||
try {
|
||||
if (exifLessFormats.includes(extension)) return null;
|
||||
|
||||
const exifData: RawEXIFData = await exifr.parse(receivedFile, {
|
||||
reviveValues: false,
|
||||
tiff: true,
|
||||
xmp: true,
|
||||
icc: true,
|
||||
iptc: true,
|
||||
jfif: true,
|
||||
ihdr: true,
|
||||
});
|
||||
if (!exifData) {
|
||||
return null;
|
||||
}
|
||||
const filteredExifData = tags
|
||||
? Object.fromEntries(
|
||||
Object.entries(exifData).filter(([key]) =>
|
||||
tags.includes(key),
|
||||
),
|
||||
)
|
||||
: exifData;
|
||||
return parseExifData(filteredExifData);
|
||||
} catch (e) {
|
||||
if (e.message == exifrUnsupportedFileFormatMessage) {
|
||||
log.error(`EXIFR does not support ${extension} files`, e);
|
||||
return undefined;
|
||||
} else {
|
||||
log.error(`Failed to parse Exif data for a ${extension} file`, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseExifData(exifData: RawEXIFData): ParsedEXIFData {
|
||||
if (!exifData) {
|
||||
return null;
|
||||
}
|
||||
const {
|
||||
DateTimeOriginal,
|
||||
CreateDate,
|
||||
ModifyDate,
|
||||
DateCreated,
|
||||
ImageHeight,
|
||||
ImageWidth,
|
||||
ExifImageHeight,
|
||||
ExifImageWidth,
|
||||
PixelXDimension,
|
||||
PixelYDimension,
|
||||
MetadataDate,
|
||||
...rest
|
||||
} = exifData;
|
||||
const parsedExif: ParsedEXIFData = { ...rest };
|
||||
if (DateTimeOriginal) {
|
||||
parsedExif.DateTimeOriginal = parseEXIFDate(exifData.DateTimeOriginal);
|
||||
}
|
||||
if (CreateDate) {
|
||||
parsedExif.CreateDate = parseEXIFDate(exifData.CreateDate);
|
||||
}
|
||||
if (ModifyDate) {
|
||||
parsedExif.ModifyDate = parseEXIFDate(exifData.ModifyDate);
|
||||
}
|
||||
if (DateCreated) {
|
||||
parsedExif.DateCreated = parseEXIFDate(exifData.DateCreated);
|
||||
}
|
||||
if (MetadataDate) {
|
||||
parsedExif.MetadataDate = parseEXIFDate(exifData.MetadataDate);
|
||||
}
|
||||
if (exifData.GPSLatitude && exifData.GPSLongitude) {
|
||||
const parsedLocation = parseEXIFLocation(
|
||||
exifData.GPSLatitude,
|
||||
exifData.GPSLatitudeRef,
|
||||
exifData.GPSLongitude,
|
||||
exifData.GPSLongitudeRef,
|
||||
);
|
||||
parsedExif.latitude = parsedLocation.latitude;
|
||||
parsedExif.longitude = parsedLocation.longitude;
|
||||
}
|
||||
if (ImageWidth && ImageHeight) {
|
||||
if (typeof ImageWidth === "number" && typeof ImageHeight === "number") {
|
||||
parsedExif.imageWidth = ImageWidth;
|
||||
parsedExif.imageHeight = ImageHeight;
|
||||
} else {
|
||||
log.warn("Exif: Ignoring non-numeric ImageWidth or ImageHeight");
|
||||
}
|
||||
} else if (ExifImageWidth && ExifImageHeight) {
|
||||
if (
|
||||
typeof ExifImageWidth === "number" &&
|
||||
typeof ExifImageHeight === "number"
|
||||
) {
|
||||
parsedExif.imageWidth = ExifImageWidth;
|
||||
parsedExif.imageHeight = ExifImageHeight;
|
||||
} else {
|
||||
log.warn(
|
||||
"Exif: Ignoring non-numeric ExifImageWidth or ExifImageHeight",
|
||||
);
|
||||
}
|
||||
} else if (PixelXDimension && PixelYDimension) {
|
||||
if (
|
||||
typeof PixelXDimension === "number" &&
|
||||
typeof PixelYDimension === "number"
|
||||
) {
|
||||
parsedExif.imageWidth = PixelXDimension;
|
||||
parsedExif.imageHeight = PixelYDimension;
|
||||
} else {
|
||||
log.warn(
|
||||
"Exif: Ignoring non-numeric PixelXDimension or PixelYDimension",
|
||||
);
|
||||
}
|
||||
}
|
||||
return parsedExif;
|
||||
}
|
||||
|
||||
function parseEXIFDate(dateTimeString: string) {
|
||||
try {
|
||||
if (typeof dateTimeString !== "string" || dateTimeString === "") {
|
||||
throw new Error("Invalid date string");
|
||||
}
|
||||
|
||||
// Check and parse date in the format YYYYMMDD
|
||||
if (dateTimeString.length === 8) {
|
||||
const year = Number(dateTimeString.slice(0, 4));
|
||||
const month = Number(dateTimeString.slice(4, 6));
|
||||
const day = Number(dateTimeString.slice(6, 8));
|
||||
if (
|
||||
!Number.isNaN(year) &&
|
||||
!Number.isNaN(month) &&
|
||||
!Number.isNaN(day)
|
||||
) {
|
||||
const date = new Date(year, month - 1, day);
|
||||
if (!Number.isNaN(+date)) {
|
||||
return date;
|
||||
}
|
||||
}
|
||||
}
|
||||
const [year, month, day, hour, minute, second] = dateTimeString
|
||||
.match(/\d+/g)
|
||||
.map(Number);
|
||||
|
||||
if (
|
||||
typeof year === "undefined" ||
|
||||
Number.isNaN(year) ||
|
||||
typeof month === "undefined" ||
|
||||
Number.isNaN(month) ||
|
||||
typeof day === "undefined" ||
|
||||
Number.isNaN(day)
|
||||
) {
|
||||
throw new Error("Invalid date");
|
||||
}
|
||||
let date: Date;
|
||||
if (
|
||||
typeof hour === "undefined" ||
|
||||
Number.isNaN(hour) ||
|
||||
typeof minute === "undefined" ||
|
||||
Number.isNaN(minute) ||
|
||||
typeof second === "undefined" ||
|
||||
Number.isNaN(second)
|
||||
) {
|
||||
date = new Date(year, month - 1, day);
|
||||
} else {
|
||||
date = new Date(year, month - 1, day, hour, minute, second);
|
||||
}
|
||||
if (Number.isNaN(+date)) {
|
||||
throw new Error("Invalid date");
|
||||
}
|
||||
return date;
|
||||
} catch (e) {
|
||||
log.error(`Failed to parseEXIFDate ${dateTimeString}`, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function parseEXIFLocation(
|
||||
gpsLatitude: number[],
|
||||
gpsLatitudeRef: string,
|
||||
gpsLongitude: number[],
|
||||
gpsLongitudeRef: string,
|
||||
) {
|
||||
try {
|
||||
if (
|
||||
!Array.isArray(gpsLatitude) ||
|
||||
!Array.isArray(gpsLongitude) ||
|
||||
gpsLatitude.length !== 3 ||
|
||||
gpsLongitude.length !== 3
|
||||
) {
|
||||
throw new Error("Invalid Exif location");
|
||||
}
|
||||
const latitude = convertDMSToDD(
|
||||
gpsLatitude[0],
|
||||
gpsLatitude[1],
|
||||
gpsLatitude[2],
|
||||
gpsLatitudeRef,
|
||||
);
|
||||
const longitude = convertDMSToDD(
|
||||
gpsLongitude[0],
|
||||
gpsLongitude[1],
|
||||
gpsLongitude[2],
|
||||
gpsLongitudeRef,
|
||||
);
|
||||
return { latitude, longitude };
|
||||
} catch (e) {
|
||||
const p = {
|
||||
gpsLatitude,
|
||||
gpsLatitudeRef,
|
||||
gpsLongitude,
|
||||
gpsLongitudeRef,
|
||||
};
|
||||
log.error(`Failed to parse Exif location ${JSON.stringify(p)}`, e);
|
||||
return { ...NULL_LOCATION };
|
||||
}
|
||||
}
|
||||
|
||||
function convertDMSToDD(
|
||||
degrees: number,
|
||||
minutes: number,
|
||||
seconds: number,
|
||||
direction: string,
|
||||
) {
|
||||
let dd = degrees + minutes / 60 + seconds / (60 * 60);
|
||||
if (direction === "S" || direction === "W") dd *= -1;
|
||||
return dd;
|
||||
}
|
||||
|
||||
export function getEXIFLocation(exifData: ParsedEXIFData): Location {
|
||||
if (!exifData || (!exifData.latitude && exifData.latitude !== 0)) {
|
||||
return { ...NULL_LOCATION };
|
||||
}
|
||||
return { latitude: exifData.latitude, longitude: exifData.longitude };
|
||||
}
|
||||
|
||||
export function getEXIFTime(exifData: ParsedEXIFData): number {
|
||||
if (!exifData) {
|
||||
return null;
|
||||
}
|
||||
const dateTime =
|
||||
exifData.DateTimeOriginal ??
|
||||
exifData.DateCreated ??
|
||||
exifData.CreateDate ??
|
||||
exifData.MetadataDate ??
|
||||
exifData.ModifyDate;
|
||||
if (!dateTime) {
|
||||
return null;
|
||||
}
|
||||
return validateAndGetCreationUnixTimeInMicroSeconds(dateTime);
|
||||
}
|
||||
@@ -2486,11 +2486,6 @@ esutils@^2.0.2:
|
||||
resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64"
|
||||
integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==
|
||||
|
||||
exifr@^7.1.3:
|
||||
version "7.1.3"
|
||||
resolved "https://registry.yarnpkg.com/exifr/-/exifr-7.1.3.tgz#f6218012c36dbb7d843222011b27f065fddbab6f"
|
||||
integrity sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw==
|
||||
|
||||
exifreader@^4:
|
||||
version "4.23.3"
|
||||
resolved "https://registry.yarnpkg.com/exifreader/-/exifreader-4.23.3.tgz#3389c2dab3ab2501562ebdef4115ea34ab9d9aa4"
|
||||
|
||||
Reference in New Issue
Block a user