[web] Finalize migration to new Exif library (#2630)

This commit is contained in:
Manav Rathi
2024-08-07 21:07:37 +05:30
committed by GitHub
15 changed files with 93 additions and 635 deletions

View File

@@ -10,12 +10,10 @@
"@ente/eslint-config": "*",
"@ente/shared": "*",
"@stripe/stripe-js": "^1.13.2",
"@xmldom/xmldom": "^0.8.10",
"bip39": "^3.0.4",
"bs58": "^5.0.0",
"chrono-node": "^2.2.6",
"debounce": "^2.0.0",
"exifr": "^7.1.3",
"exifreader": "^4",
"fast-srp-hap": "^2.0.4",
"ffmpeg-wasm": "file:./thirdparty/ffmpeg-wasm",

View File

@@ -1,6 +1,5 @@
import log from "@/base/log";
import type { Location } from "@/new/photos/types/metadata";
import type { LocationTagData } from "types/entity";
import type { Location, LocationTagData } from "types/entity";
export interface City {
city: string;

View File

@@ -1,5 +1,4 @@
import log from "@/base/log";
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
/**
* Try to extract a date (as epoch microseconds) from a file name by matching it
@@ -41,6 +40,21 @@ export const tryParseEpochMicrosecondsFromFileName = (
}
};
export function validateAndGetCreationUnixTimeInMicroSeconds(dateTime: Date) {
if (!dateTime || isNaN(dateTime.getTime())) {
return null;
}
const unixTime = dateTime.getTime() * 1000;
//ignoring dateTimeString = "0000:00:00 00:00:00"
if (unixTime === Date.UTC(0, 0, 0, 0, 0, 0, 0) || unixTime === 0) {
return null;
} else if (unixTime > Date.now() * 1000) {
return null;
} else {
return unixTime;
}
}
interface DateComponent<T = number> {
year: T;
month: T;

View File

@@ -1,5 +1,3 @@
import { Location } from "@/new/photos/types/metadata";
export enum EntityType {
LOCATION_TAG = "location",
}
@@ -27,6 +25,11 @@ export interface EncryptedEntity {
userID: number;
}
export interface Location {
latitude: number | null;
longitude: number | null;
}
export interface LocationTagData {
name: string;
radius: number;

View File

@@ -201,11 +201,8 @@ For more details, see [translations.md](translations.md).
## Media
- [ExifReader](https://github.com/mattiasw/ExifReader) is used for Exif
parsing. We also need its optional peer dependency
[@xmldom/xmldom](https://github.com/xmldom/xmldom) since the browser's
DOMParser is not available in web workers.
[piexifjs](https://github.com/hMatoba/piexifjs) is used for writing back
Exif (only supports JPEG).
parsing. [piexifjs](https://github.com/hMatoba/piexifjs) is used for writing
back Exif (only supports JPEG).
- [jszip](https://github.com/Stuk/jszip) is used for reading zip files in the
web code (Live photos are zip files under the hood). Note that the desktop

View File

@@ -1,66 +1,10 @@
import { nameAndExtension } from "@/base/file";
import log from "@/base/log";
import { inWorker } from "@/base/env";
import {
parseMetadataDate,
type ParsedMetadata,
type ParsedMetadataDate,
} from "@/media/file-metadata";
import { FileType } from "@/media/file-type";
import { parseImageMetadata } from "@ente/shared/utils/exif-old";
import ExifReader from "exifreader";
import type { EnteFile } from "../types/file";
import type { ParsedExtractedMetadata } from "../types/metadata";
const cmpTsEq = (a: number | undefined | null, b: number | undefined) => {
if (!a && !b) return true;
if (!a || !b) return false;
if (a == b) return true;
if (Math.floor(a / 1e6) == Math.floor(b / 1e6)) return true;
return false;
};
export const cmpNewLib = (
oldLib: ParsedExtractedMetadata,
newLib: ParsedMetadata,
) => {
const logM = (r: string) =>
log.info("[exif]", r, JSON.stringify({ old: oldLib, new: newLib }));
if (
cmpTsEq(oldLib.creationTime, newLib.creationDate?.timestamp) &&
oldLib.location.latitude == newLib.location?.latitude &&
oldLib.location.longitude == newLib.location?.longitude
) {
if (
oldLib.width == newLib.width &&
oldLib.height == newLib.height &&
oldLib.creationTime == newLib.creationDate?.timestamp
)
logM("exact match");
else logM("enhanced match");
log.debug(() => ["exif/cmp", { oldLib, newLib }]);
} else {
logM("potential mismatch ❗️🚩");
}
};
export const cmpNewLib2 = async (
enteFile: EnteFile,
blob: Blob,
_exif: unknown,
) => {
const [, ext] = nameAndExtension(enteFile.metadata.title);
const oldLib = await parseImageMetadata(
new File([blob], enteFile.metadata.title),
{
fileType: FileType.image,
extension: ext ?? "",
},
);
// cast is fine here, this is just temporary debugging code.
const rawExif = _exif as RawExifTags;
const newLib = parseExif(rawExif);
cmpNewLib(oldLib, newLib);
};
/**
* Extract Exif and other metadata from the given file.
@@ -166,8 +110,6 @@ const parseDates = (tags: RawExifTags) => {
const iptc = parseIPTCDates(tags);
const xmp = parseXMPDates(tags);
log.debug(() => ["exif/dates", { exif, iptc, xmp }]);
return {
DateTimeOriginal:
valid(xmp.DateTimeOriginal) ??
@@ -533,6 +475,17 @@ export type RawExifTags = Omit<ExifReader.ExpandedTags, "Thumbnail" | "xmp"> & {
* to know about ExifReader specifically.
*/
export const extractRawExif = async (blob: Blob): Promise<RawExifTags> => {
// The browser's DOMParser is not available in web workers. So if this
// function gets called in from a web worker, then it would not be able to
// parse XMP tags.
//
// There is a way around this problem, by also installing ExifReader's
// optional peer dependency "@xmldom/xmldom". But since we currently have no
// use case for calling this code in a web worker, we just abort immediately
// to let future us know that we need to install it.
if (inWorker())
throw new Error("DOMParser is not available in web workers");
const tags = await ExifReader.load(await blob.arrayBuffer(), {
async: true,
expanded: true,

View File

@@ -10,41 +10,10 @@ import DownloadManager from "../download";
import type { UploadItem } from "../upload/types";
/**
* A pair of blobs - the original, and a possibly converted "renderable" one -
* for a file that we're trying to index.
*/
export interface IndexableBlobs {
/**
* The original file's data (as a {@link Blob}).
*
* - For images this is guaranteed to be present.
* - For videos it will not be present.
* - For live photos it will the (original) image component of the live
* photo.
*/
originalImageBlob: Blob | undefined;
/**
* The original (if the browser possibly supports rendering this type of
* images) or otherwise a converted JPEG blob.
*
* This blob is meant to be used to construct the {@link ImageBitmap}
* that'll be used for further operations that need access to the RGB data
* of the image.
*
* - For images this is constructed from the image.
* - For videos this is constructed from the thumbnail.
* - For live photos this is constructed from the image component of the
* live photo.
*/
renderableBlob: Blob;
}
/**
* Indexable blobs augmented with the image bitmap and RGBA data.
* An image bitmap and its RGBA data.
*
* This is data structure containing data about an image in all formats that the
* various indexing steps need. Consolidating all the data here and parsing them
* in one go obviates the need for each indexing step to roll their own parsing.
* various indexing steps need.
*/
export interface ImageBitmapAndData {
/**
@@ -66,7 +35,7 @@ export interface ImageBitmapAndData {
* Create an {@link ImageBitmap} from the given {@link imageBlob}, and return
* both the image bitmap and its {@link ImageData}.
*/
export const imageBitmapAndData = async (
export const createImageBitmapAndData = async (
imageBlob: Blob,
): Promise<ImageBitmapAndData> => {
const imageBitmap = await createImageBitmap(imageBlob);
@@ -83,15 +52,15 @@ export const imageBitmapAndData = async (
};
/**
* Return a pair of blobs for the given data - the original, and a renderable
* one (possibly involving a JPEG conversion).
* Return a renderable blob (converting to JPEG if needed) for the given data.
*
* The blob from the relevant image component is either constructed using the
* given {@link uploadItem} if present, otherwise it is downloaded from remote.
*
* - For images the original is used.
* - For live photos the original image component is used.
* - For videos the thumbnail is used.
* - For images it is constructed from the image.
* - For videos it is constructed from the thumbnail.
* - For live photos it is constructed from the image component of the live
* photo.
*
* Then, if the image blob we have seems to be something that the browser cannot
* handle, we convert it into a JPEG blob so that it can subsequently be used to
@@ -107,34 +76,28 @@ export const imageBitmapAndData = async (
* witness that we're actually running in our desktop app (and thus can safely
* call our Node.js layer for various functionality).
*/
export const indexableBlobs = async (
export const fetchRenderableBlob = async (
enteFile: EnteFile,
uploadItem: UploadItem | undefined,
electron: ElectronMLWorker,
): Promise<IndexableBlobs> =>
): Promise<Blob> =>
uploadItem
? await indexableUploadItemBlobs(enteFile, uploadItem, electron)
: await indexableEnteFileBlobs(enteFile);
? await fetchRenderableUploadItemBlob(enteFile, uploadItem, electron)
: await fetchRenderableEnteFileBlob(enteFile);
const indexableUploadItemBlobs = async (
const fetchRenderableUploadItemBlob = async (
enteFile: EnteFile,
uploadItem: UploadItem,
electron: ElectronMLWorker,
) => {
const fileType = enteFile.metadata.fileType;
let originalImageBlob: Blob | undefined;
let renderableBlob: Blob;
if (fileType == FileType.video) {
const thumbnailData = await DownloadManager.getThumbnail(enteFile);
renderableBlob = new Blob([ensure(thumbnailData)]);
return new Blob([ensure(thumbnailData)]);
} else {
originalImageBlob = await readNonVideoUploadItem(uploadItem, electron);
renderableBlob = await renderableImageBlob(
enteFile.metadata.title,
originalImageBlob,
);
const blob = await readNonVideoUploadItem(uploadItem, electron);
return renderableImageBlob(enteFile.metadata.title, blob);
}
return { originalImageBlob, renderableBlob };
};
/**
@@ -173,39 +136,32 @@ const readNonVideoUploadItem = async (
};
/**
* Return a pair of blobs for the given file - the original, and a renderable
* one (possibly involving a JPEG conversion).
* Return a renderable one (possibly involving a JPEG conversion) blob for the
* given {@link EnteFile}.
*
* - The original will be downloaded if needed
* - The original will be converted to JPEG if needed
* - The original will be downloaded if needed.
* - The original will be converted to JPEG if needed.
*/
export const indexableEnteFileBlobs = async (
export const fetchRenderableEnteFileBlob = async (
enteFile: EnteFile,
): Promise<IndexableBlobs> => {
): Promise<Blob> => {
const fileType = enteFile.metadata.fileType;
if (fileType == FileType.video) {
const thumbnailData = await DownloadManager.getThumbnail(enteFile);
return {
originalImageBlob: undefined,
renderableBlob: new Blob([ensure(thumbnailData)]),
};
return new Blob([ensure(thumbnailData)]);
}
const fileStream = await DownloadManager.getFile(enteFile);
const originalImageBlob = await new Response(fileStream).blob();
let renderableBlob: Blob;
if (fileType == FileType.livePhoto) {
const { imageFileName, imageData } = await decodeLivePhoto(
enteFile.metadata.title,
originalImageBlob,
);
renderableBlob = await renderableImageBlob(
imageFileName,
new Blob([imageData]),
);
return renderableImageBlob(imageFileName, new Blob([imageData]));
} else if (fileType == FileType.image) {
renderableBlob = await renderableImageBlob(
return await renderableImageBlob(
enteFile.metadata.title,
originalImageBlob,
);
@@ -213,6 +169,4 @@ export const indexableEnteFileBlobs = async (
// A layer above us should've already filtered these out.
throw new Error(`Cannot index unsupported file type ${fileType}`);
}
return { originalImageBlob, renderableBlob };
};

View File

@@ -1,7 +1,7 @@
import { blobCache } from "@/base/blob-cache";
import { ensure } from "@/utils/ensure";
import type { EnteFile } from "../../types/file";
import { indexableEnteFileBlobs } from "./blob";
import { fetchRenderableEnteFileBlob } from "./blob";
import { type Box, type FaceIndex } from "./face";
import { clamp } from "./math";
@@ -26,7 +26,7 @@ export const regenerateFaceCrops = async (
enteFile: EnteFile,
faceIndex: FaceIndex,
) => {
const { renderableBlob } = await indexableEnteFileBlobs(enteFile);
const renderableBlob = await fetchRenderableEnteFileBlob(enteFile);
const imageBitmap = await createImageBitmap(renderableBlob);
try {

View File

@@ -10,29 +10,26 @@ import { type RemoteCLIPIndex } from "./clip";
import { type RemoteFaceIndex } from "./face";
/**
* [Note: Derived embeddings and other metadata]
* [Note: Derived embeddings model]
*
* The APIs they deal with derived data started in a ML context, and would store
* embeddings generated by particular models. Thus the API endpoints use the
* name "embedding", and are parameterized by a "model" enum.
* The API endpoints related to embeddings and are parameterized by a "model"
* enum. This is a bit of misnomer, since the contents of the payload are not
* just the raw embeddings themselves, but also additional data generated by the
* ML model.
*
* Next step in the evolution was that instead of just storing the embedding,
* the code also started storing various additional data generated by the ML
* model. For example, the face indexing process generates multiple face
* embeddings per file, each with an associated detection box. So instead of
* storing just a singular embedding, the data that got stored was this entire
* face index structure containing multiple embeddings and associated data.
* For example, the face indexing process generates multiple face embeddings per
* file, each with an associated detection box. So instead of storing just a
* singular embedding, the data is an entire face index structure containing
* multiple embeddings and associated data.
*
* Further down, it was realized that the fan out caused on remote when trying
* to fetch all derived data - both ML ("clip", "face") and non-ML ("exif") -
* was problematic, and also their raw JSON was unnecessarily big. To deal with
* these better, we now have a single "derived" model type, whose data is a
* gzipped map of the form:
* to fetch both CLIP and face embeddings was problematic, and also that their
* raw JSON was unnecessarily big. To deal with these better, we now have a
* single "derived" model type, whose data is a gzipped map of the form:
*
* {
* "face": ... the face indexing result ...
* "clip": ... the CLIP indexing result ...
* "exif": ... the Exif extracted from the file ...
* ... more in the future ...
* }
*/

View File

@@ -8,15 +8,13 @@ import type { EnteFile } from "@/new/photos/types/file";
import { fileLogID } from "@/new/photos/utils/file";
import { ensure } from "@/utils/ensure";
import { wait } from "@/utils/promise";
import { DOMParser } from "@xmldom/xmldom";
import { expose, wrap } from "comlink";
import downloadManager from "../download";
import { cmpNewLib2, extractRawExif, type RawExifTags } from "../exif";
import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
import type { UploadItem } from "../upload/types";
import {
imageBitmapAndData,
indexableBlobs,
createImageBitmapAndData,
fetchRenderableBlob,
type ImageBitmapAndData,
} from "./blob";
import {
@@ -105,24 +103,6 @@ export class MLWorker {
// Initialize the downloadManager running in the web worker with the
// user's token. It'll be used to download files to index if needed.
await downloadManager.init(await ensureAuthToken());
// Normally, DOMParser is available to web code, so our Exif library
// (ExifReader) has an optional dependency on the the non-browser
// alternative DOMParser provided by @xmldom/xmldom.
//
// But window.DOMParser is not available to web workers.
//
// So we need to get ExifReader to use the @xmldom/xmldom version.
// ExifReader references it using the following code:
//
// __non_webpack_require__('@xmldom/xmldom')
//
// So we need to explicitly reference it to ensure that it does not get
// tree shaken by webpack. But ensuring it is part of the bundle does
// not seem to work (for reasons I don't yet understand), so we also
// need to monkey patch it (This also ensures that it is not tree
// shaken).
globalThis.DOMParser = DOMParser;
}
/**
@@ -371,19 +351,8 @@ const syncWithLocalFilesAndGetFilesToIndex = async (
/**
* Index file, save the persist the results locally, and put them on remote.
*
* [Note: ML indexing does more ML]
*
* Nominally, and primarily, indexing a file involves computing its various ML
* embeddings: faces and CLIP. However, since this is a occasion where we have
* the original file in memory, it is a great time to also compute other derived
* data related to the file (instead of re-downloading it again).
*
* So this function also does things that are not related to ML and/or indexing:
*
* - Extracting Exif.
* - Saving face crops.
*
* ---
* Indexing a file involves computing its various ML embeddings: faces and CLIP.
* Since we have the original file in memory, we also save the face crops.
*
* [Note: Transient and permanent indexing failures]
*
@@ -433,11 +402,6 @@ const index = async (
const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face;
const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip;
// exif is expected to be a JSON object in the shape of RawExifTags, but
// this function don't care what's inside it and can just treat it as an
// opaque blob.
const existingExif = remoteDerivedData?.raw.exif;
let existingFaceIndex: FaceIndex | undefined;
if (
existingRemoteFaceIndex &&
@@ -458,8 +422,8 @@ const index = async (
existingCLIPIndex = { embedding };
}
// See if we already have all the mandatory derived data fields. If so, just
// update our local db and return.
// If we already have all the derived data fields then just update our local
// db and return.
if (existingFaceIndex && existingCLIPIndex) {
try {
@@ -476,8 +440,7 @@ const index = async (
// There is at least one derived data type that still needs to be indexed.
// Videos will not have an original blob whilst having a renderable blob.
const { originalImageBlob, renderableBlob } = await indexableBlobs(
const renderableBlob = await fetchRenderableBlob(
enteFile,
uploadItem,
electron,
@@ -485,7 +448,7 @@ const index = async (
let image: ImageBitmapAndData;
try {
image = await imageBitmapAndData(renderableBlob);
image = await createImageBitmapAndData(renderableBlob);
} catch (e) {
// If we cannot get the raw image data for the file, then retrying again
// won't help (if in the future we enhance the underlying code for
@@ -501,15 +464,13 @@ const index = async (
try {
let faceIndex: FaceIndex;
let clipIndex: CLIPIndex;
let exif: unknown;
const startTime = Date.now();
try {
[faceIndex, clipIndex, exif] = await Promise.all([
[faceIndex, clipIndex] = await Promise.all([
existingFaceIndex ?? indexFaces(enteFile, image, electron),
existingCLIPIndex ?? indexCLIP(image, electron),
existingExif ?? tryExtractExif(originalImageBlob, f),
]);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
@@ -518,19 +479,11 @@ const index = async (
throw e;
}
try {
if (originalImageBlob && exif)
await cmpNewLib2(enteFile, originalImageBlob, exif);
} catch (e) {
log.warn(`Skipping exif cmp for ${f}`, e);
}
log.debug(() => {
const ms = Date.now() - startTime;
const msg = [];
if (!existingFaceIndex) msg.push(`${faceIndex.faces.length} faces`);
if (!existingCLIPIndex) msg.push("clip");
if (!existingExif && originalImageBlob) msg.push("exif");
return `Indexed ${msg.join(" and ")} in ${f} (${ms} ms)`;
});
@@ -555,23 +508,17 @@ const index = async (
...existingRawDerivedData,
face: remoteFaceIndex,
clip: remoteCLIPIndex,
...(exif ? { exif } : {}),
};
if (existingFaceIndex && existingCLIPIndex && !exif) {
// If we were indexing just for exif, but exif generation didn't
// happen, there is no need to upload.
} else {
log.debug(() => ["Uploading derived data", rawDerivedData]);
log.debug(() => ["Uploading derived data", rawDerivedData]);
try {
await putDerivedData(enteFile, rawDerivedData);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
log.error(`Failed to put derived data for ${f}`, e);
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
throw e;
}
try {
await putDerivedData(enteFile, rawDerivedData);
} catch (e) {
// See: [Note: Transient and permanent indexing failures]
log.error(`Failed to put derived data for ${f}`, e);
if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id);
throw e;
}
try {
@@ -582,7 +529,8 @@ const index = async (
} catch (e) {
// Not sure if DB failures should be considered permanent or
// transient. There isn't a known case where writing to the local
// indexedDB would fail.
// indexedDB should systematically fail. It could fail if there was
// no space on device, but that's eminently retriable.
log.error(`Failed to save indexes for ${f}`, e);
throw e;
}
@@ -602,34 +550,3 @@ const index = async (
image.bitmap.close();
}
};
/**
* A helper function that tries to extract the raw Exif, but returns `undefined`
* if something goes wrong (or it isn't possible) instead of throwing.
*
* Exif extraction is not a critical item, we don't want the actual indexing to
* fail because we were unable to extract Exif. This is not rare: one scenario
* is if we were trying to index a file in an exotic format. The ML indexing
* will succeed (because we convert it to a renderable blob), but the Exif
* extraction will fail (since it needs the original blob, but the original blob
* can be an arbitrary format).
*
* @param originalImageBlob A {@link Blob} containing the original data for the
* image (or the image component of a live photo) whose Exif we're trying to
* extract. If this is not available, we skip the extraction and return
* `undefined`.
*
* @param f The {@link fileLogID} for the file this blob corresponds to.
*/
export const tryExtractExif = async (
originalImageBlob: Blob | undefined,
f: string,
): Promise<RawExifTags | undefined> => {
if (!originalImageBlob) return undefined;
try {
return await extractRawExif(originalImageBlob);
} catch (e) {
log.warn(`Ignoring error during Exif extraction for ${f}`, e);
return undefined;
}
};

View File

@@ -1,5 +1,4 @@
import type { ZipItem } from "@/base/types/ipc";
import type { Location } from "../../types/metadata";
/**
* An item to upload is one of the following:
@@ -59,8 +58,6 @@ export const toDataOrPathOrZipEntry = (desktopUploadItem: DesktopUploadItem) =>
export const RANDOM_PERCENTAGE_PROGRESS_FOR_PUT = () => 90 + 10 * Math.random();
export const NULL_LOCATION: Location = { latitude: null, longitude: null };
export enum UPLOAD_STAGES {
START,
READING_GOOGLE_METADATA_FILES,

View File

@@ -1,11 +0,0 @@
export interface Location {
latitude: number | null;
longitude: number | null;
}
export interface ParsedExtractedMetadata {
location: Location;
creationTime: number | null;
width: number | null;
height: number | null;
}

View File

@@ -22,21 +22,6 @@ export function getUnixTimeInMicroSecondsWithDelta(delta: TimeDelta): number {
return currentDate.getTime() * 1000;
}
export function validateAndGetCreationUnixTimeInMicroSeconds(dateTime: Date) {
if (!dateTime || isNaN(dateTime.getTime())) {
return null;
}
const unixTime = dateTime.getTime() * 1000;
//ignoring dateTimeString = "0000:00:00 00:00:00"
if (unixTime === Date.UTC(0, 0, 0, 0, 0, 0, 0) || unixTime === 0) {
return null;
} else if (unixTime > Date.now() * 1000) {
return null;
} else {
return unixTime;
}
}
function _addDays(date: Date, days: number): Date {
const result = new Date(date);
result.setDate(date.getDate() + days);

View File

@@ -1,340 +0,0 @@
// The code in this file is deprecated and meant to be deleted.
//
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-nocheck
import log from "@/base/log";
import { type FileTypeInfo } from "@/media/file-type";
import { NULL_LOCATION } from "@/new/photos/services/upload/types";
import type {
Location,
ParsedExtractedMetadata,
} from "@/new/photos/types/metadata";
import { validateAndGetCreationUnixTimeInMicroSeconds } from "@ente/shared/time";
import exifr from "exifr";
type ParsedEXIFData = Record<string, any> &
Partial<{
DateTimeOriginal: Date;
CreateDate: Date;
ModifyDate: Date;
DateCreated: Date;
MetadataDate: Date;
latitude: number;
longitude: number;
imageWidth: number;
imageHeight: number;
}>;
type RawEXIFData = Record<string, any> &
Partial<{
DateTimeOriginal: string;
CreateDate: string;
ModifyDate: string;
DateCreated: string;
MetadataDate: string;
GPSLatitude: number[];
GPSLongitude: number[];
GPSLatitudeRef: string;
GPSLongitudeRef: string;
ImageWidth: number;
ImageHeight: number;
}>;
const exifTagsNeededForParsingImageMetadata = [
"DateTimeOriginal",
"CreateDate",
"ModifyDate",
"GPSLatitude",
"GPSLongitude",
"GPSLatitudeRef",
"GPSLongitudeRef",
"DateCreated",
"ExifImageWidth",
"ExifImageHeight",
"ImageWidth",
"ImageHeight",
"PixelXDimension",
"PixelYDimension",
"MetadataDate",
];
/**
* Read Exif data from an image {@link file} and use that to construct and
* return an {@link ParsedExtractedMetadata}.
*
* This function is tailored for use when we upload files.
*/
export const parseImageMetadata = async (
file: File,
fileTypeInfo: FileTypeInfo,
): Promise<ParsedExtractedMetadata> => {
const exifData = await getParsedExifData(
file,
fileTypeInfo,
exifTagsNeededForParsingImageMetadata,
);
// TODO: Exif- remove me.
log.debug(() => ["exif/old", exifData]);
return {
location: getEXIFLocation(exifData),
creationTime: getEXIFTime(exifData),
width: exifData?.imageWidth ?? null,
height: exifData?.imageHeight ?? null,
};
};
export async function getParsedExifData(
receivedFile: File,
{ extension }: FileTypeInfo,
tags?: string[],
): Promise<ParsedEXIFData> {
const exifLessFormats = ["gif", "bmp"];
const exifrUnsupportedFileFormatMessage = "Unknown file format";
try {
if (exifLessFormats.includes(extension)) return null;
const exifData: RawEXIFData = await exifr.parse(receivedFile, {
reviveValues: false,
tiff: true,
xmp: true,
icc: true,
iptc: true,
jfif: true,
ihdr: true,
});
if (!exifData) {
return null;
}
const filteredExifData = tags
? Object.fromEntries(
Object.entries(exifData).filter(([key]) =>
tags.includes(key),
),
)
: exifData;
return parseExifData(filteredExifData);
} catch (e) {
if (e.message == exifrUnsupportedFileFormatMessage) {
log.error(`EXIFR does not support ${extension} files`, e);
return undefined;
} else {
log.error(`Failed to parse Exif data for a ${extension} file`, e);
throw e;
}
}
}
function parseExifData(exifData: RawEXIFData): ParsedEXIFData {
if (!exifData) {
return null;
}
const {
DateTimeOriginal,
CreateDate,
ModifyDate,
DateCreated,
ImageHeight,
ImageWidth,
ExifImageHeight,
ExifImageWidth,
PixelXDimension,
PixelYDimension,
MetadataDate,
...rest
} = exifData;
const parsedExif: ParsedEXIFData = { ...rest };
if (DateTimeOriginal) {
parsedExif.DateTimeOriginal = parseEXIFDate(exifData.DateTimeOriginal);
}
if (CreateDate) {
parsedExif.CreateDate = parseEXIFDate(exifData.CreateDate);
}
if (ModifyDate) {
parsedExif.ModifyDate = parseEXIFDate(exifData.ModifyDate);
}
if (DateCreated) {
parsedExif.DateCreated = parseEXIFDate(exifData.DateCreated);
}
if (MetadataDate) {
parsedExif.MetadataDate = parseEXIFDate(exifData.MetadataDate);
}
if (exifData.GPSLatitude && exifData.GPSLongitude) {
const parsedLocation = parseEXIFLocation(
exifData.GPSLatitude,
exifData.GPSLatitudeRef,
exifData.GPSLongitude,
exifData.GPSLongitudeRef,
);
parsedExif.latitude = parsedLocation.latitude;
parsedExif.longitude = parsedLocation.longitude;
}
if (ImageWidth && ImageHeight) {
if (typeof ImageWidth === "number" && typeof ImageHeight === "number") {
parsedExif.imageWidth = ImageWidth;
parsedExif.imageHeight = ImageHeight;
} else {
log.warn("Exif: Ignoring non-numeric ImageWidth or ImageHeight");
}
} else if (ExifImageWidth && ExifImageHeight) {
if (
typeof ExifImageWidth === "number" &&
typeof ExifImageHeight === "number"
) {
parsedExif.imageWidth = ExifImageWidth;
parsedExif.imageHeight = ExifImageHeight;
} else {
log.warn(
"Exif: Ignoring non-numeric ExifImageWidth or ExifImageHeight",
);
}
} else if (PixelXDimension && PixelYDimension) {
if (
typeof PixelXDimension === "number" &&
typeof PixelYDimension === "number"
) {
parsedExif.imageWidth = PixelXDimension;
parsedExif.imageHeight = PixelYDimension;
} else {
log.warn(
"Exif: Ignoring non-numeric PixelXDimension or PixelYDimension",
);
}
}
return parsedExif;
}
function parseEXIFDate(dateTimeString: string) {
try {
if (typeof dateTimeString !== "string" || dateTimeString === "") {
throw new Error("Invalid date string");
}
// Check and parse date in the format YYYYMMDD
if (dateTimeString.length === 8) {
const year = Number(dateTimeString.slice(0, 4));
const month = Number(dateTimeString.slice(4, 6));
const day = Number(dateTimeString.slice(6, 8));
if (
!Number.isNaN(year) &&
!Number.isNaN(month) &&
!Number.isNaN(day)
) {
const date = new Date(year, month - 1, day);
if (!Number.isNaN(+date)) {
return date;
}
}
}
const [year, month, day, hour, minute, second] = dateTimeString
.match(/\d+/g)
.map(Number);
if (
typeof year === "undefined" ||
Number.isNaN(year) ||
typeof month === "undefined" ||
Number.isNaN(month) ||
typeof day === "undefined" ||
Number.isNaN(day)
) {
throw new Error("Invalid date");
}
let date: Date;
if (
typeof hour === "undefined" ||
Number.isNaN(hour) ||
typeof minute === "undefined" ||
Number.isNaN(minute) ||
typeof second === "undefined" ||
Number.isNaN(second)
) {
date = new Date(year, month - 1, day);
} else {
date = new Date(year, month - 1, day, hour, minute, second);
}
if (Number.isNaN(+date)) {
throw new Error("Invalid date");
}
return date;
} catch (e) {
log.error(`Failed to parseEXIFDate ${dateTimeString}`, e);
return null;
}
}
export function parseEXIFLocation(
gpsLatitude: number[],
gpsLatitudeRef: string,
gpsLongitude: number[],
gpsLongitudeRef: string,
) {
try {
if (
!Array.isArray(gpsLatitude) ||
!Array.isArray(gpsLongitude) ||
gpsLatitude.length !== 3 ||
gpsLongitude.length !== 3
) {
throw new Error("Invalid Exif location");
}
const latitude = convertDMSToDD(
gpsLatitude[0],
gpsLatitude[1],
gpsLatitude[2],
gpsLatitudeRef,
);
const longitude = convertDMSToDD(
gpsLongitude[0],
gpsLongitude[1],
gpsLongitude[2],
gpsLongitudeRef,
);
return { latitude, longitude };
} catch (e) {
const p = {
gpsLatitude,
gpsLatitudeRef,
gpsLongitude,
gpsLongitudeRef,
};
log.error(`Failed to parse Exif location ${JSON.stringify(p)}`, e);
return { ...NULL_LOCATION };
}
}
function convertDMSToDD(
degrees: number,
minutes: number,
seconds: number,
direction: string,
) {
let dd = degrees + minutes / 60 + seconds / (60 * 60);
if (direction === "S" || direction === "W") dd *= -1;
return dd;
}
export function getEXIFLocation(exifData: ParsedEXIFData): Location {
if (!exifData || (!exifData.latitude && exifData.latitude !== 0)) {
return { ...NULL_LOCATION };
}
return { latitude: exifData.latitude, longitude: exifData.longitude };
}
export function getEXIFTime(exifData: ParsedEXIFData): number {
if (!exifData) {
return null;
}
const dateTime =
exifData.DateTimeOriginal ??
exifData.DateCreated ??
exifData.CreateDate ??
exifData.MetadataDate ??
exifData.ModifyDate;
if (!dateTime) {
return null;
}
return validateAndGetCreationUnixTimeInMicroSeconds(dateTime);
}

View File

@@ -2486,11 +2486,6 @@ esutils@^2.0.2:
resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64"
integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==
exifr@^7.1.3:
version "7.1.3"
resolved "https://registry.yarnpkg.com/exifr/-/exifr-7.1.3.tgz#f6218012c36dbb7d843222011b27f065fddbab6f"
integrity sha512-g/aje2noHivrRSLbAUtBPWFbxKdKhgj/xr1vATDdUXPOFYJlQ62Ft0oy+72V6XLIpDJfHs6gXLbBLAolqOXYRw==
exifreader@^4:
version "4.23.3"
resolved "https://registry.yarnpkg.com/exifreader/-/exifreader-4.23.3.tgz#3389c2dab3ab2501562ebdef4115ea34ab9d9aa4"