From 6d2c317877ea800b7d5e76c3fbd2ba89703c124d Mon Sep 17 00:00:00 2001 From: Gary Peck Date: Sun, 2 Feb 2025 20:08:22 +0100 Subject: [PATCH] Add support for Takeout's new .supplemental-metadata.json files In recent Google Takeout archives, the metadata JSON files are named "${original_filename}.supplemental-metadata.json" instead of "${original_filename}.json", as before. I refactored the previous code so that `getMetadataJSONMapKeyForJSON()` only removes the ".json" suffix from the metadata filename and does not make any other changes. All of the filename munging is now done to the name of the media file. That was the only way I could make the process deterministic. As far as I can figure out, there's no deterministic way of deriving the media filename from the metadata filename -- it's only deterministic going from the media filename to the metadata filename. These new names are still subject to the 46-character clipping limit, with some specific rules about how the filename is clipped: - The ".json" suffix is never clipped, only the ".supplemental-metadata" portion is. - If the original filename is longer than 46 characters, then the ".supplemental-metadata" suffix gets completely removed during the clipping, along with a portion of the original filename (as before). - The numbered suffix (if present) is also never clipped. It is however added at the end of the clipped ".supplemental-metadata" portion, instead of after the original filename. E.g. "IMG_1234(1).jpg" would previously use a metadata filename of "IMG_1234.jpg(1).json". Now it uses a metadata filename of "IMG_1234.jpg.supplemental-metadata(1).json". But if the filename is too long, it gets turned into something like "IMG_1234.jpg.suppl(1).json". - Worth noting is that if the original filename is 45 characters long, then everything except for the "." from ".supplemental-metadata" will get clipped. So the metadata file ends up with a filename like "filename_that_is_45_chars_long.jpg..json". I added a bunch of additional test cases in `upload.test.ts` based on actual filenames I have in my Google Photos Takeout archives. The new code passes all of the new test cases, as well as the original ones. --- .../photos/src/services/upload/takeout.ts | 97 ++++++++++++------- web/apps/photos/tests/upload.test.ts | 45 +++++++-- 2 files changed, 99 insertions(+), 43 deletions(-) diff --git a/web/apps/photos/src/services/upload/takeout.ts b/web/apps/photos/src/services/upload/takeout.ts index 4fc723a9f7..013830ff89 100644 --- a/web/apps/photos/src/services/upload/takeout.ts +++ b/web/apps/photos/src/services/upload/takeout.ts @@ -23,65 +23,81 @@ export interface ParsedMetadataJSON { description?: string; } +export interface FileNameComponents { + originalName: string; + numberedSuffix: string; + extension: string; + isEditedFile: boolean; +} + export const MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT = 46; +const EDITED_FILE_SUFFIX = "-edited"; +const METADATA_SUFFIX = ".supplemental-metadata"; export const getMetadataJSONMapKeyForJSON = ( collectionID: number, jsonFileName: string, ) => { - let title = jsonFileName.slice(0, -1 * ".json".length); - const endsWithNumberedSuffixWithBrackets = /\(\d+\)$/.exec(title); - if (endsWithNumberedSuffixWithBrackets) { - title = title.slice( - 0, - -1 * endsWithNumberedSuffixWithBrackets[0].length, - ); - const [name, extension] = nameAndExtension(title); - return `${collectionID}-${name}${endsWithNumberedSuffixWithBrackets[0]}.${extension}`; - } - return `${collectionID}-${title}`; + return `${collectionID}-${jsonFileName.slice(0, -1 * ".json".length)}`; }; // if the file name is greater than MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT(46) , then google photos clips the file name // so we need to use the clipped file name to get the metadataJSON file export const getClippedMetadataJSONMapKeyForFile = ( collectionID: number, - fileName: string, + components: FileNameComponents, ) => { - return `${collectionID}-${fileName.slice( - 0, - MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT, - )}`; + const baseFileName = `${components.originalName}${components.extension}`; + return `${collectionID}-${baseFileName.slice(0, MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT)}${components.numberedSuffix ?? ""}`; +}; + +// newer Takeout exports are attaching a ".supplemental-metadata" suffix to the file name of the metadataJSON file, +// and then clipping the file name if it's too long (ending up with filenames like +// "very_long_file_name.jpg.supple.json") +export const getSupplementaryMetadataJSONMapKeyForFile = ( + collectionID: number, + components: FileNameComponents, +) => { + const baseFileName = `${components.originalName}${components.extension}${METADATA_SUFFIX}`; + return `${collectionID}-${baseFileName.slice(0, MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT)}${components.numberedSuffix ?? ""}`; }; export const getMetadataJSONMapKeyForFile = ( collectionID: number, - fileName: string, + components: FileNameComponents, ) => { - return `${collectionID}-${getFileOriginalName(fileName)}`; + const baseFileName = `${components.originalName}${components.extension}`; + return `${collectionID}-${baseFileName}${components.numberedSuffix ?? ""}`; }; -const EDITED_FILE_SUFFIX = "-edited"; - /* - Get the original file name for edited file to associate it to original file's metadataJSON file - as edited file doesn't have their own metadata file + Get the components of the file name. Also removes the "-edited" suffix, if present, so that the edited file can be + associated to the original file's metadataJSON file as edited files don't have their own metadata files. */ -function getFileOriginalName(fileName: string) { - let originalName: string = null; - const [name, extension] = nameAndExtension(fileName); +export const getFileNameComponents = (fileName: string): FileNameComponents => { + let [name, extension] = nameAndExtension(fileName); + if (extension) { + extension = "." + extension; + } + let numberedSuffix: string = null; + const endsWithNumberedSuffixWithBrackets = /\(\d+\)$/.exec(name); + if (endsWithNumberedSuffixWithBrackets) { + name = name.slice(0, -1 * endsWithNumberedSuffixWithBrackets[0].length); + numberedSuffix = endsWithNumberedSuffixWithBrackets[0]; + } const isEditedFile = name.endsWith(EDITED_FILE_SUFFIX); if (isEditedFile) { - originalName = name.slice(0, -1 * EDITED_FILE_SUFFIX.length); - } else { - originalName = name; + name = name.slice(0, -1 * EDITED_FILE_SUFFIX.length); } - if (extension) { - originalName += "." + extension; - } - return originalName; -} + + return { + originalName: name, + numberedSuffix, + extension, + isEditedFile, + }; +}; /** Try to parse the contents of a metadata JSON file from a Google Takeout. */ export const tryParseTakeoutMetadataJSON = async ( @@ -194,11 +210,20 @@ export const matchTakeoutMetadata = ( collectionID: number, parsedMetadataJSONMap: Map, ) => { - let key = getMetadataJSONMapKeyForFile(collectionID, fileName); + const components = getFileNameComponents(fileName); + let key = getMetadataJSONMapKeyForFile(collectionID, components); let takeoutMetadata = parsedMetadataJSONMap.get(key); - if (!takeoutMetadata && key.length > MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT) { - key = getClippedMetadataJSONMapKeyForFile(collectionID, fileName); + if (!takeoutMetadata) { + key = getClippedMetadataJSONMapKeyForFile(collectionID, components); + takeoutMetadata = parsedMetadataJSONMap.get(key); + } + + if (!takeoutMetadata) { + key = getSupplementaryMetadataJSONMapKeyForFile( + collectionID, + components, + ); takeoutMetadata = parsedMetadataJSONMap.get(key); } diff --git a/web/apps/photos/tests/upload.test.ts b/web/apps/photos/tests/upload.test.ts index 85be8e583a..b690f1c4cf 100644 --- a/web/apps/photos/tests/upload.test.ts +++ b/web/apps/photos/tests/upload.test.ts @@ -7,10 +7,11 @@ import { } from "@/new/photos/services/files"; import { parseDateFromDigitGroups } from "services/upload/date"; import { - MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT, getClippedMetadataJSONMapKeyForFile, getMetadataJSONMapKeyForFile, getMetadataJSONMapKeyForJSON, + getSupplementaryMetadataJSONMapKeyForFile, + getFileNameComponents, } from "services/upload/takeout"; import { getUserDetailsV2 } from "services/userService"; @@ -100,6 +101,30 @@ const FILE_NAME_TO_JSON_NAME = [ filename: "IMG2021021(1)74722(1).jpg", jsonFilename: "IMG2021021(1)74722.jpg(1).json", }, + { + filename: "IMG_1159.HEIC", + jsonFilename: "IMG_1159.HEIC.supplemental-metadata.json", + }, + { + filename: "PXL_20241231_151646544.MP.jpg", + jsonFilename: "PXL_20241231_151646544.MP.jpg.supplemental-met.json", + }, + { + filename: "PXL_20240827_094331806.PORTRAIT(1).jpg", + jsonFilename: "PXL_20240827_094331806.PORTRAIT.jpg.supplement(1).json", + }, + { + filename: "PXL_20240506_142610305.LONG_EXPOSURE-01.COVER.jpg", + jsonFilename: "PXL_20240506_142610305.LONG_EXPOSURE-01.COVER..json", + }, + { + filename: "PXL_20211120_223243932.MOTION-02.ORIGINAL.jpg", + jsonFilename: "PXL_20211120_223243932.MOTION-02.ORIGINAL.jpg..json", + }, + { + filename: "20220322_205147-edited(1).jpg", + jsonFilename: "20220322_205147.jpg.supplemental-metadata(1).json", + }, ]; export async function testUpload() { @@ -401,14 +426,20 @@ function mappingFileAndJSONFileCheck() { 0, jsonFilename, ); - let fileNameGeneratedKey = getMetadataJSONMapKeyForFile(0, filename); - if ( - fileNameGeneratedKey !== jsonFileNameGeneratedKey && - filename.length > MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT - ) { + + // this duplicates somewhat the logic in takeout.ts:matchTakeoutMetadata() + const components = getFileNameComponents(filename); + let fileNameGeneratedKey = getMetadataJSONMapKeyForFile(0, components); + if (fileNameGeneratedKey !== jsonFileNameGeneratedKey) { fileNameGeneratedKey = getClippedMetadataJSONMapKeyForFile( 0, - filename, + components, + ); + } + if (fileNameGeneratedKey !== jsonFileNameGeneratedKey) { + fileNameGeneratedKey = getSupplementaryMetadataJSONMapKeyForFile( + 0, + components, ); }