Add support for Takeout's new .supplemental-metadata.json files
In recent Google Takeout archives, the metadata JSON files are named
"${original_filename}.supplemental-metadata.json" instead of
"${original_filename}.json", as before.
I refactored the previous code so that `getMetadataJSONMapKeyForJSON()`
only removes the ".json" suffix from the metadata filename and does not
make any other changes. All of the filename munging is now done to the
name of the media file. That was the only way I could make the process
deterministic. As far as I can figure out, there's no deterministic way
of deriving the media filename from the metadata filename -- it's only
deterministic going from the media filename to the metadata filename.
These new names are still subject to the 46-character clipping limit,
with some specific rules about how the filename is clipped:
- The ".json" suffix is never clipped, only the ".supplemental-metadata"
portion is.
- If the original filename is longer than 46 characters, then the
".supplemental-metadata" suffix gets completely removed during the
clipping, along with a portion of the original filename (as before).
- The numbered suffix (if present) is also never clipped. It is however
added at the end of the clipped ".supplemental-metadata" portion,
instead of after the original filename. E.g. "IMG_1234(1).jpg" would
previously use a metadata filename of "IMG_1234.jpg(1).json". Now it
uses a metadata filename of
"IMG_1234.jpg.supplemental-metadata(1).json". But if the filename is
too long, it gets turned into something like
"IMG_1234.jpg.suppl(1).json".
- Worth noting is that if the original filename is 45 characters long,
then everything except for the "." from ".supplemental-metadata" will
get clipped. So the metadata file ends up with a filename like
"filename_that_is_45_chars_long.jpg..json".
I added a bunch of additional test cases in `upload.test.ts` based on
actual filenames I have in my Google Photos Takeout archives. The new
code passes all of the new test cases, as well as the original ones.
This commit is contained in:
@@ -23,65 +23,81 @@ export interface ParsedMetadataJSON {
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface FileNameComponents {
|
||||
originalName: string;
|
||||
numberedSuffix: string;
|
||||
extension: string;
|
||||
isEditedFile: boolean;
|
||||
}
|
||||
|
||||
export const MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT = 46;
|
||||
const EDITED_FILE_SUFFIX = "-edited";
|
||||
const METADATA_SUFFIX = ".supplemental-metadata";
|
||||
|
||||
export const getMetadataJSONMapKeyForJSON = (
|
||||
collectionID: number,
|
||||
jsonFileName: string,
|
||||
) => {
|
||||
let title = jsonFileName.slice(0, -1 * ".json".length);
|
||||
const endsWithNumberedSuffixWithBrackets = /\(\d+\)$/.exec(title);
|
||||
if (endsWithNumberedSuffixWithBrackets) {
|
||||
title = title.slice(
|
||||
0,
|
||||
-1 * endsWithNumberedSuffixWithBrackets[0].length,
|
||||
);
|
||||
const [name, extension] = nameAndExtension(title);
|
||||
return `${collectionID}-${name}${endsWithNumberedSuffixWithBrackets[0]}.${extension}`;
|
||||
}
|
||||
return `${collectionID}-${title}`;
|
||||
return `${collectionID}-${jsonFileName.slice(0, -1 * ".json".length)}`;
|
||||
};
|
||||
|
||||
// if the file name is greater than MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT(46) , then google photos clips the file name
|
||||
// so we need to use the clipped file name to get the metadataJSON file
|
||||
export const getClippedMetadataJSONMapKeyForFile = (
|
||||
collectionID: number,
|
||||
fileName: string,
|
||||
components: FileNameComponents,
|
||||
) => {
|
||||
return `${collectionID}-${fileName.slice(
|
||||
0,
|
||||
MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT,
|
||||
)}`;
|
||||
const baseFileName = `${components.originalName}${components.extension}`;
|
||||
return `${collectionID}-${baseFileName.slice(0, MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT)}${components.numberedSuffix ?? ""}`;
|
||||
};
|
||||
|
||||
// newer Takeout exports are attaching a ".supplemental-metadata" suffix to the file name of the metadataJSON file,
|
||||
// and then clipping the file name if it's too long (ending up with filenames like
|
||||
// "very_long_file_name.jpg.supple.json")
|
||||
export const getSupplementaryMetadataJSONMapKeyForFile = (
|
||||
collectionID: number,
|
||||
components: FileNameComponents,
|
||||
) => {
|
||||
const baseFileName = `${components.originalName}${components.extension}${METADATA_SUFFIX}`;
|
||||
return `${collectionID}-${baseFileName.slice(0, MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT)}${components.numberedSuffix ?? ""}`;
|
||||
};
|
||||
|
||||
export const getMetadataJSONMapKeyForFile = (
|
||||
collectionID: number,
|
||||
fileName: string,
|
||||
components: FileNameComponents,
|
||||
) => {
|
||||
return `${collectionID}-${getFileOriginalName(fileName)}`;
|
||||
const baseFileName = `${components.originalName}${components.extension}`;
|
||||
return `${collectionID}-${baseFileName}${components.numberedSuffix ?? ""}`;
|
||||
};
|
||||
|
||||
const EDITED_FILE_SUFFIX = "-edited";
|
||||
|
||||
/*
|
||||
Get the original file name for edited file to associate it to original file's metadataJSON file
|
||||
as edited file doesn't have their own metadata file
|
||||
Get the components of the file name. Also removes the "-edited" suffix, if present, so that the edited file can be
|
||||
associated to the original file's metadataJSON file as edited files don't have their own metadata files.
|
||||
*/
|
||||
function getFileOriginalName(fileName: string) {
|
||||
let originalName: string = null;
|
||||
const [name, extension] = nameAndExtension(fileName);
|
||||
export const getFileNameComponents = (fileName: string): FileNameComponents => {
|
||||
let [name, extension] = nameAndExtension(fileName);
|
||||
if (extension) {
|
||||
extension = "." + extension;
|
||||
}
|
||||
let numberedSuffix: string = null;
|
||||
|
||||
const endsWithNumberedSuffixWithBrackets = /\(\d+\)$/.exec(name);
|
||||
if (endsWithNumberedSuffixWithBrackets) {
|
||||
name = name.slice(0, -1 * endsWithNumberedSuffixWithBrackets[0].length);
|
||||
numberedSuffix = endsWithNumberedSuffixWithBrackets[0];
|
||||
}
|
||||
const isEditedFile = name.endsWith(EDITED_FILE_SUFFIX);
|
||||
if (isEditedFile) {
|
||||
originalName = name.slice(0, -1 * EDITED_FILE_SUFFIX.length);
|
||||
} else {
|
||||
originalName = name;
|
||||
name = name.slice(0, -1 * EDITED_FILE_SUFFIX.length);
|
||||
}
|
||||
if (extension) {
|
||||
originalName += "." + extension;
|
||||
}
|
||||
return originalName;
|
||||
}
|
||||
|
||||
return {
|
||||
originalName: name,
|
||||
numberedSuffix,
|
||||
extension,
|
||||
isEditedFile,
|
||||
};
|
||||
};
|
||||
|
||||
/** Try to parse the contents of a metadata JSON file from a Google Takeout. */
|
||||
export const tryParseTakeoutMetadataJSON = async (
|
||||
@@ -194,11 +210,20 @@ export const matchTakeoutMetadata = (
|
||||
collectionID: number,
|
||||
parsedMetadataJSONMap: Map<string, ParsedMetadataJSON>,
|
||||
) => {
|
||||
let key = getMetadataJSONMapKeyForFile(collectionID, fileName);
|
||||
const components = getFileNameComponents(fileName);
|
||||
let key = getMetadataJSONMapKeyForFile(collectionID, components);
|
||||
let takeoutMetadata = parsedMetadataJSONMap.get(key);
|
||||
|
||||
if (!takeoutMetadata && key.length > MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT) {
|
||||
key = getClippedMetadataJSONMapKeyForFile(collectionID, fileName);
|
||||
if (!takeoutMetadata) {
|
||||
key = getClippedMetadataJSONMapKeyForFile(collectionID, components);
|
||||
takeoutMetadata = parsedMetadataJSONMap.get(key);
|
||||
}
|
||||
|
||||
if (!takeoutMetadata) {
|
||||
key = getSupplementaryMetadataJSONMapKeyForFile(
|
||||
collectionID,
|
||||
components,
|
||||
);
|
||||
takeoutMetadata = parsedMetadataJSONMap.get(key);
|
||||
}
|
||||
|
||||
|
||||
@@ -7,10 +7,11 @@ import {
|
||||
} from "@/new/photos/services/files";
|
||||
import { parseDateFromDigitGroups } from "services/upload/date";
|
||||
import {
|
||||
MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT,
|
||||
getClippedMetadataJSONMapKeyForFile,
|
||||
getMetadataJSONMapKeyForFile,
|
||||
getMetadataJSONMapKeyForJSON,
|
||||
getSupplementaryMetadataJSONMapKeyForFile,
|
||||
getFileNameComponents,
|
||||
} from "services/upload/takeout";
|
||||
import { getUserDetailsV2 } from "services/userService";
|
||||
|
||||
@@ -100,6 +101,30 @@ const FILE_NAME_TO_JSON_NAME = [
|
||||
filename: "IMG2021021(1)74722(1).jpg",
|
||||
jsonFilename: "IMG2021021(1)74722.jpg(1).json",
|
||||
},
|
||||
{
|
||||
filename: "IMG_1159.HEIC",
|
||||
jsonFilename: "IMG_1159.HEIC.supplemental-metadata.json",
|
||||
},
|
||||
{
|
||||
filename: "PXL_20241231_151646544.MP.jpg",
|
||||
jsonFilename: "PXL_20241231_151646544.MP.jpg.supplemental-met.json",
|
||||
},
|
||||
{
|
||||
filename: "PXL_20240827_094331806.PORTRAIT(1).jpg",
|
||||
jsonFilename: "PXL_20240827_094331806.PORTRAIT.jpg.supplement(1).json",
|
||||
},
|
||||
{
|
||||
filename: "PXL_20240506_142610305.LONG_EXPOSURE-01.COVER.jpg",
|
||||
jsonFilename: "PXL_20240506_142610305.LONG_EXPOSURE-01.COVER..json",
|
||||
},
|
||||
{
|
||||
filename: "PXL_20211120_223243932.MOTION-02.ORIGINAL.jpg",
|
||||
jsonFilename: "PXL_20211120_223243932.MOTION-02.ORIGINAL.jpg..json",
|
||||
},
|
||||
{
|
||||
filename: "20220322_205147-edited(1).jpg",
|
||||
jsonFilename: "20220322_205147.jpg.supplemental-metadata(1).json",
|
||||
},
|
||||
];
|
||||
|
||||
export async function testUpload() {
|
||||
@@ -401,14 +426,20 @@ function mappingFileAndJSONFileCheck() {
|
||||
0,
|
||||
jsonFilename,
|
||||
);
|
||||
let fileNameGeneratedKey = getMetadataJSONMapKeyForFile(0, filename);
|
||||
if (
|
||||
fileNameGeneratedKey !== jsonFileNameGeneratedKey &&
|
||||
filename.length > MAX_FILE_NAME_LENGTH_GOOGLE_EXPORT
|
||||
) {
|
||||
|
||||
// this duplicates somewhat the logic in takeout.ts:matchTakeoutMetadata()
|
||||
const components = getFileNameComponents(filename);
|
||||
let fileNameGeneratedKey = getMetadataJSONMapKeyForFile(0, components);
|
||||
if (fileNameGeneratedKey !== jsonFileNameGeneratedKey) {
|
||||
fileNameGeneratedKey = getClippedMetadataJSONMapKeyForFile(
|
||||
0,
|
||||
filename,
|
||||
components,
|
||||
);
|
||||
}
|
||||
if (fileNameGeneratedKey !== jsonFileNameGeneratedKey) {
|
||||
fileNameGeneratedKey = getSupplementaryMetadataJSONMapKeyForFile(
|
||||
0,
|
||||
components,
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user