[web] Remove unnecessary sax dependency by reworking multipart uploads (#5997)

This commit is contained in:
Manav Rathi
2025-05-21 16:39:35 +05:30
committed by GitHub
6 changed files with 218 additions and 79 deletions

View File

@@ -22,8 +22,7 @@
"react-virtualized-auto-sizer": "^1.0.26",
"react-window": "^1.8.11",
"sanitize-filename": "^1.6.3",
"similarity-transformation": "^0.0.1",
"xml-js": "^1.6.11"
"similarity-transformation": "^0.0.1"
},
"devDependencies": {
"@types/node": "^22.15.18",

View File

@@ -311,7 +311,15 @@ export const FileList: React.FC<FileListProps> = ({
return timeStampList;
}
// TODO(RE): Remove after audit.
if (isDevBuild) throw new Error("Unexpected footer change");
if (
isDevBuild &&
(footer ||
publicCollectionGalleryContext.credentials ||
showAppDownloadBanner)
) {
console.log({ timeStampList, footer, showAppDownloadBanner });
throw new Error("Unexpected footer change");
}
if (footer) {
return [
...timeStampList,

View File

@@ -18,7 +18,8 @@ export const authenticatedRequestHeaders = async () => ({
/**
* Return headers that should be passed alongwith (almost) all unauthenticated
* `fetch` calls that we make to our API servers.
* `fetch` calls that we make to our remotes like our API servers (museum), or
* to presigned URLs that are handled by the S3 storage buckets themselves.
*
* - The client package name.
*/

View File

@@ -6,7 +6,9 @@ import {
authenticatedPublicAlbumsRequestHeaders,
authenticatedRequestHeaders,
ensureOk,
publicRequestHeaders,
retryAsyncOperation,
retryEnsuringHTTPOk,
type PublicAlbumsCredentials,
} from "ente-base/http";
import log from "ente-base/log";
@@ -18,12 +20,18 @@ import { z } from "zod";
import type { MultipartUploadURLs, UploadFile } from "./upload-service";
/**
* A pre-signed URL alongwith the associated object key.
* A pre-signed URL alongwith the associated object key that is later used to
* refer to file contents (the "object") that were uploaded to this URL.
*/
const ObjectUploadURL = z.object({
/** A pre-signed URL that can be used to upload data to S3. */
/**
* A pre-signed URL that can be used to upload data to an S3-compatible
* remote.
*/
objectKey: z.string(),
/** The objectKey with which remote will refer to this object. */
/**
* The objectKey with which remote (museum) will refer to this object.
*/
url: z.string(),
});
@@ -233,40 +241,179 @@ export class PhotosUploadHTTPClient {
throw e;
}
}
async completeMultipartUpload(completeURL: string, reqBody: unknown) {
try {
await retryAsyncOperation(() =>
// @ts-ignore
HTTPService.post(completeURL, reqBody, null, {
"content-type": "text/xml",
}),
);
} catch (e) {
log.error("put file in parts failed", e);
throw e;
}
}
async completeMultipartUploadV2(completeURL: string, reqBody: unknown) {
try {
const origin = await uploaderOrigin();
await retryAsyncOperation(() =>
HTTPService.post(
`${origin}/multipart-complete`,
reqBody,
// @ts-ignore
null,
{ "content-type": "text/xml", "UPLOAD-URL": completeURL },
),
);
} catch (e) {
log.error("put file in parts failed", e);
throw e;
}
}
}
/**
* Information about an individual part of a multipart upload that has been
* uploaded to the remote (S3 or proxy).
*
* See: [Note: Multipart uploads].
*/
export interface MultipartCompletedPart {
/**
* The part number (1-indexed).
*
* The part number indicates the sequential ordering where this part belongs
* in the overall file's data.
*/
partNumber: number;
/**
* The part "ETag".
*
* This is the Entity tag (retrieved as the "ETag" response header) returned
* by remote when the part was uploaded.
*/
eTag: string;
}
/**
* Construct an XML string of the format expected as the request body for
* {@link _completeMultipartUpload} or
* {@link _completeMultipartUploadViaWorker}.
*
* @param parts Information about the parts that were uploaded.
*/
const createMultipartUploadRequestBody = (
parts: MultipartCompletedPart[],
): string => {
// To avoid introducing a dependency on a XML library, we construct the
// requisite XML by hand.
//
// Example:
//
// <CompleteMultipartUpload>
// <Part>
// <PartNumber>1</PartNumber>
// <ETag>"1b3e6cdb1270c0b664076f109a7137c1"</ETag>
// </Part>
// <Part>
// <PartNumber>2</PartNumber>
// <ETag>"6049d6384a9e65694c833a3aca6584fd"</ETag>
// </Part>
// <Part>
// <PartNumber>3</PartNumber>
// <ETag>"331747eae8068f03b844e6f28cc0ed23"</ETag>
// </Part>
// </CompleteMultipartUpload>
//
//
// Spec:
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html
//
// <CompleteMultipartUpload>
// <Part>
// <PartNumber>integer</PartNumber>
// <ETag>string</ETag>
// </Part>
// ...
// </CompleteMultipartUpload>
//
// Note that in the example given on the spec page, the etag strings are quoted:
//
// <CompleteMultipartUpload>
// <Part>
// <PartNumber>1</PartNumber>
// <ETag>"a54357aff0632cce46d942af68356b38"</ETag>
// </Part>
// ...
// </CompleteMultipartUpload>
//
// No extra quotes need to be added, the etag values we get from remote
// already quoted, we just need to pass them verbatim.
const resultParts = parts.map(
(part) =>
`<Part><PartNumber>${part.partNumber}</PartNumber><ETag>${part.eTag}</ETag></Part>`,
);
return `<CompleteMultipartUpload>\n${resultParts.join("\n")}\n</CompleteMultipartUpload>`;
};
/**
* Complete a multipart upload by reporting information about all the uploaded
* parts to the provided {@link completionURL}.
*
* @param completionURL A presigned URL to which the final status of the
* uploaded parts should be reported to.
*
* @param completedParts Information about all the parts of the file that have
* been uploaded. The part numbers must start at 1 and must be consecutive.
*
* [Note: Multipart uploads]
*
* Multipart uploads are a mechanism to upload large files onto an remote
* storage bucket by breaking it into smaller chunks / "parts", uploading each
* part separately, and then reporting the consolidated information of all the
* uploaded parts to a URL that marks the upload as complete on remote.
*
* This allows greater resilience since uploads of individual parts can be
* retried independently without failing the entire upload on transient network
* issues. This also helps self hosters, since often cloud providers have limits
* to the size of single requests that they'll allow through (e.g. the
* Cloudflare free plan currently has a 100 MB request size limit).
*
* The flow is implemented in two ways:
*
* a. The normal way, where each requests is made to a remote S3 bucket directly
* using the presigned URL.
*
* b. Using workers, where the requests are proxied via a worker near to the
* user's network to speed the requests up.
*
* See the documentation of {@link shouldDisableCFUploadProxy} for more details
* about the via-worker flow.
*
* In both cases, the overall flow is roughly like the following:
*
* 1. Obtain multiple presigned URLs from remote (museum). The specific API call
* will be different (because of the different authentication mechanisms)
* when we're running in the context of the photos app and when we're running
* in the context of the public albums app.
*
* 2. Break the file to be uploaded into parts, and upload each part using a PUT
* request to one of the presigned URLs we got in step 1. There are two
* variants of this - one where we directly upload to the remote (S3), and
* one where we go via a worker.
*
* 3. Once all the parts have been uploaded, send a consolidated report of all
* the uploaded parts (the step 2's) to remote via another presigned
* "completion URL" that we also got in step 1. Like step 2, there are 2
* variants of this - one where we directly tell the remote (S3)
* ({@link completeMultipartUpload}), and one where we report via a worker
* ({@link completeMultipartUploadViaWorker}).
*/
export const completeMultipartUpload = (
completionURL: string,
completedParts: MultipartCompletedPart[],
) =>
retryEnsuringHTTPOk(() =>
fetch(completionURL, {
method: "POST",
headers: { ...publicRequestHeaders(), "Content-Type": "text/xml" },
body: createMultipartUploadRequestBody(completedParts),
}),
);
/**
* Variant of {@link completeMultipartUpload} that uses the CF worker.
*/
export const completeMultipartUploadViaWorker = async (
completionURL: string,
completedParts: MultipartCompletedPart[],
) => {
const origin = await uploaderOrigin();
return retryEnsuringHTTPOk(() =>
fetch(`${origin}/multipart-complete`, {
method: "POST",
headers: {
...publicRequestHeaders(),
"Content-Type": "text/xml",
"UPLOAD-URL": completionURL,
},
body: createMultipartUploadRequestBody(completedParts),
}),
);
};
/**
* Lowest layer for file upload related HTTP operations when we're running in
* the context of the public albums app.

View File

@@ -44,7 +44,6 @@ import { settingsSnapshot } from "ente-new/photos/services/settings";
import { CustomError, handleUploadError } from "ente-shared/error";
import { mergeUint8Arrays } from "ente-utils/array";
import { ensureInteger, ensureNumber } from "ente-utils/ensure";
import * as convert from "xml-js";
import type { UploadableUploadItem, UploadItem } from ".";
import {
RANDOM_PERCENTAGE_PROGRESS_FOR_PUT,
@@ -53,8 +52,11 @@ import {
} from ".";
import { tryParseEpochMicrosecondsFromFileName } from "./date";
import {
completeMultipartUpload,
completeMultipartUploadViaWorker,
PhotosUploadHTTPClient,
PublicAlbumsUploadHTTPClient,
type MultipartCompletedPart,
type ObjectUploadURL,
} from "./remote";
import type { ParsedMetadataJSON } from "./takeout";
@@ -1540,11 +1542,6 @@ const uploadToBucket = async (
}
};
interface PartEtag {
PartNumber: number;
ETag: string;
}
async function uploadStreamUsingMultipart(
fileLocalID: number,
dataStream: EncryptedFileStream,
@@ -1563,7 +1560,7 @@ async function uploadStreamUsingMultipart(
const streamReader = stream.getReader();
const percentPerPart =
RANDOM_PERCENTAGE_PROGRESS_FOR_PUT() / uploadPartCount;
const partEtags: PartEtag[] = [];
const completedParts: MultipartCompletedPart[] = [];
let fileSize = 0;
for (const [
index,
@@ -1571,8 +1568,8 @@ async function uploadStreamUsingMultipart(
] of multipartUploadURLs.partURLs.entries()) {
abortIfCancelled();
const uploadChunk = await combineChunksToFormUploadPart(streamReader);
fileSize += uploadChunk.length;
const uploadPart = await nextMultipartUploadPart(streamReader);
fileSize += uploadPart.length;
const progressTracker = makeProgressTracker(
fileLocalID,
percentPerPart,
@@ -1582,45 +1579,44 @@ async function uploadStreamUsingMultipart(
if (!isCFUploadProxyDisabled) {
eTag = await photosHTTPClient.putFilePartV2(
fileUploadURL,
uploadChunk,
uploadPart,
progressTracker,
);
} else {
eTag = await photosHTTPClient.putFilePart(
fileUploadURL,
uploadChunk,
uploadPart,
progressTracker,
);
}
partEtags.push({ PartNumber: index + 1, ETag: eTag });
completedParts.push({ partNumber: index + 1, eTag });
}
const { done } = await streamReader.read();
if (!done) throw new Error("More chunks than expected");
const completeURL = multipartUploadURLs.completeURL;
const cBody = convert.js2xml(
{ CompleteMultipartUpload: { Part: partEtags } },
{ compact: true, ignoreComment: true, spaces: 4 },
);
const completionURL = multipartUploadURLs.completeURL;
if (!isCFUploadProxyDisabled) {
await photosHTTPClient.completeMultipartUploadV2(completeURL, cBody);
await completeMultipartUploadViaWorker(completionURL, completedParts);
} else {
await photosHTTPClient.completeMultipartUpload(completeURL, cBody);
await completeMultipartUpload(completionURL, completedParts);
}
return { objectKey: multipartUploadURLs.objectKey, fileSize };
}
async function combineChunksToFormUploadPart(
/**
* Construct byte arrays, up to 20 MB each, containing the contents of (up to)
* the next 5 {@link streamEncryptionChunkSize} chunks read from the given
* {@link streamReader}.
*/
const nextMultipartUploadPart = async (
streamReader: ReadableStreamDefaultReader<Uint8Array>,
) {
const combinedChunks = [];
) => {
const chunks = [];
for (let i = 0; i < multipartChunksPerPart; i++) {
const { done, value: chunk } = await streamReader.read();
if (done) {
break;
}
combinedChunks.push(chunk);
if (done) break;
chunks.push(chunk);
}
return mergeUint8Arrays(combinedChunks);
}
return mergeUint8Arrays(chunks);
};

View File

@@ -3643,11 +3643,6 @@ sanitize-filename@^1.6.3:
dependencies:
truncate-utf8-bytes "^1.0.0"
sax@^1.2.4:
version "1.4.1"
resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f"
integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==
scheduler@^0.26.0:
version "0.26.0"
resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.26.0.tgz#4ce8a8c2a2095f13ea11bf9a445be50c555d6337"
@@ -4250,13 +4245,6 @@ wrap-ansi@^7.0.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"
xml-js@^1.6.11:
version "1.6.11"
resolved "https://registry.yarnpkg.com/xml-js/-/xml-js-1.6.11.tgz#927d2f6947f7f1c19a316dd8eea3614e8b18f8e9"
integrity sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==
dependencies:
sax "^1.2.4"
y18n@^5.0.5:
version "5.0.8"
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"