[web] Remove unnecessary sax dependency by reworking multipart uploads (#5997)
This commit is contained in:
@@ -22,8 +22,7 @@
|
||||
"react-virtualized-auto-sizer": "^1.0.26",
|
||||
"react-window": "^1.8.11",
|
||||
"sanitize-filename": "^1.6.3",
|
||||
"similarity-transformation": "^0.0.1",
|
||||
"xml-js": "^1.6.11"
|
||||
"similarity-transformation": "^0.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.15.18",
|
||||
|
||||
@@ -311,7 +311,15 @@ export const FileList: React.FC<FileListProps> = ({
|
||||
return timeStampList;
|
||||
}
|
||||
// TODO(RE): Remove after audit.
|
||||
if (isDevBuild) throw new Error("Unexpected footer change");
|
||||
if (
|
||||
isDevBuild &&
|
||||
(footer ||
|
||||
publicCollectionGalleryContext.credentials ||
|
||||
showAppDownloadBanner)
|
||||
) {
|
||||
console.log({ timeStampList, footer, showAppDownloadBanner });
|
||||
throw new Error("Unexpected footer change");
|
||||
}
|
||||
if (footer) {
|
||||
return [
|
||||
...timeStampList,
|
||||
|
||||
@@ -18,7 +18,8 @@ export const authenticatedRequestHeaders = async () => ({
|
||||
|
||||
/**
|
||||
* Return headers that should be passed alongwith (almost) all unauthenticated
|
||||
* `fetch` calls that we make to our API servers.
|
||||
* `fetch` calls that we make to our remotes like our API servers (museum), or
|
||||
* to presigned URLs that are handled by the S3 storage buckets themselves.
|
||||
*
|
||||
* - The client package name.
|
||||
*/
|
||||
|
||||
@@ -6,7 +6,9 @@ import {
|
||||
authenticatedPublicAlbumsRequestHeaders,
|
||||
authenticatedRequestHeaders,
|
||||
ensureOk,
|
||||
publicRequestHeaders,
|
||||
retryAsyncOperation,
|
||||
retryEnsuringHTTPOk,
|
||||
type PublicAlbumsCredentials,
|
||||
} from "ente-base/http";
|
||||
import log from "ente-base/log";
|
||||
@@ -18,12 +20,18 @@ import { z } from "zod";
|
||||
import type { MultipartUploadURLs, UploadFile } from "./upload-service";
|
||||
|
||||
/**
|
||||
* A pre-signed URL alongwith the associated object key.
|
||||
* A pre-signed URL alongwith the associated object key that is later used to
|
||||
* refer to file contents (the "object") that were uploaded to this URL.
|
||||
*/
|
||||
const ObjectUploadURL = z.object({
|
||||
/** A pre-signed URL that can be used to upload data to S3. */
|
||||
/**
|
||||
* A pre-signed URL that can be used to upload data to an S3-compatible
|
||||
* remote.
|
||||
*/
|
||||
objectKey: z.string(),
|
||||
/** The objectKey with which remote will refer to this object. */
|
||||
/**
|
||||
* The objectKey with which remote (museum) will refer to this object.
|
||||
*/
|
||||
url: z.string(),
|
||||
});
|
||||
|
||||
@@ -233,40 +241,179 @@ export class PhotosUploadHTTPClient {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async completeMultipartUpload(completeURL: string, reqBody: unknown) {
|
||||
try {
|
||||
await retryAsyncOperation(() =>
|
||||
// @ts-ignore
|
||||
HTTPService.post(completeURL, reqBody, null, {
|
||||
"content-type": "text/xml",
|
||||
}),
|
||||
);
|
||||
} catch (e) {
|
||||
log.error("put file in parts failed", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async completeMultipartUploadV2(completeURL: string, reqBody: unknown) {
|
||||
try {
|
||||
const origin = await uploaderOrigin();
|
||||
await retryAsyncOperation(() =>
|
||||
HTTPService.post(
|
||||
`${origin}/multipart-complete`,
|
||||
reqBody,
|
||||
// @ts-ignore
|
||||
null,
|
||||
{ "content-type": "text/xml", "UPLOAD-URL": completeURL },
|
||||
),
|
||||
);
|
||||
} catch (e) {
|
||||
log.error("put file in parts failed", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Information about an individual part of a multipart upload that has been
|
||||
* uploaded to the remote (S3 or proxy).
|
||||
*
|
||||
* See: [Note: Multipart uploads].
|
||||
*/
|
||||
export interface MultipartCompletedPart {
|
||||
/**
|
||||
* The part number (1-indexed).
|
||||
*
|
||||
* The part number indicates the sequential ordering where this part belongs
|
||||
* in the overall file's data.
|
||||
*/
|
||||
partNumber: number;
|
||||
/**
|
||||
* The part "ETag".
|
||||
*
|
||||
* This is the Entity tag (retrieved as the "ETag" response header) returned
|
||||
* by remote when the part was uploaded.
|
||||
*/
|
||||
eTag: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an XML string of the format expected as the request body for
|
||||
* {@link _completeMultipartUpload} or
|
||||
* {@link _completeMultipartUploadViaWorker}.
|
||||
*
|
||||
* @param parts Information about the parts that were uploaded.
|
||||
*/
|
||||
const createMultipartUploadRequestBody = (
|
||||
parts: MultipartCompletedPart[],
|
||||
): string => {
|
||||
// To avoid introducing a dependency on a XML library, we construct the
|
||||
// requisite XML by hand.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// <CompleteMultipartUpload>
|
||||
// <Part>
|
||||
// <PartNumber>1</PartNumber>
|
||||
// <ETag>"1b3e6cdb1270c0b664076f109a7137c1"</ETag>
|
||||
// </Part>
|
||||
// <Part>
|
||||
// <PartNumber>2</PartNumber>
|
||||
// <ETag>"6049d6384a9e65694c833a3aca6584fd"</ETag>
|
||||
// </Part>
|
||||
// <Part>
|
||||
// <PartNumber>3</PartNumber>
|
||||
// <ETag>"331747eae8068f03b844e6f28cc0ed23"</ETag>
|
||||
// </Part>
|
||||
// </CompleteMultipartUpload>
|
||||
//
|
||||
//
|
||||
// Spec:
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html
|
||||
//
|
||||
// <CompleteMultipartUpload>
|
||||
// <Part>
|
||||
// <PartNumber>integer</PartNumber>
|
||||
// <ETag>string</ETag>
|
||||
// </Part>
|
||||
// ...
|
||||
// </CompleteMultipartUpload>
|
||||
//
|
||||
// Note that in the example given on the spec page, the etag strings are quoted:
|
||||
//
|
||||
// <CompleteMultipartUpload>
|
||||
// <Part>
|
||||
// <PartNumber>1</PartNumber>
|
||||
// <ETag>"a54357aff0632cce46d942af68356b38"</ETag>
|
||||
// </Part>
|
||||
// ...
|
||||
// </CompleteMultipartUpload>
|
||||
//
|
||||
// No extra quotes need to be added, the etag values we get from remote
|
||||
// already quoted, we just need to pass them verbatim.
|
||||
|
||||
const resultParts = parts.map(
|
||||
(part) =>
|
||||
`<Part><PartNumber>${part.partNumber}</PartNumber><ETag>${part.eTag}</ETag></Part>`,
|
||||
);
|
||||
return `<CompleteMultipartUpload>\n${resultParts.join("\n")}\n</CompleteMultipartUpload>`;
|
||||
};
|
||||
|
||||
/**
|
||||
* Complete a multipart upload by reporting information about all the uploaded
|
||||
* parts to the provided {@link completionURL}.
|
||||
*
|
||||
* @param completionURL A presigned URL to which the final status of the
|
||||
* uploaded parts should be reported to.
|
||||
*
|
||||
* @param completedParts Information about all the parts of the file that have
|
||||
* been uploaded. The part numbers must start at 1 and must be consecutive.
|
||||
*
|
||||
* [Note: Multipart uploads]
|
||||
*
|
||||
* Multipart uploads are a mechanism to upload large files onto an remote
|
||||
* storage bucket by breaking it into smaller chunks / "parts", uploading each
|
||||
* part separately, and then reporting the consolidated information of all the
|
||||
* uploaded parts to a URL that marks the upload as complete on remote.
|
||||
*
|
||||
* This allows greater resilience since uploads of individual parts can be
|
||||
* retried independently without failing the entire upload on transient network
|
||||
* issues. This also helps self hosters, since often cloud providers have limits
|
||||
* to the size of single requests that they'll allow through (e.g. the
|
||||
* Cloudflare free plan currently has a 100 MB request size limit).
|
||||
*
|
||||
* The flow is implemented in two ways:
|
||||
*
|
||||
* a. The normal way, where each requests is made to a remote S3 bucket directly
|
||||
* using the presigned URL.
|
||||
*
|
||||
* b. Using workers, where the requests are proxied via a worker near to the
|
||||
* user's network to speed the requests up.
|
||||
*
|
||||
* See the documentation of {@link shouldDisableCFUploadProxy} for more details
|
||||
* about the via-worker flow.
|
||||
*
|
||||
* In both cases, the overall flow is roughly like the following:
|
||||
*
|
||||
* 1. Obtain multiple presigned URLs from remote (museum). The specific API call
|
||||
* will be different (because of the different authentication mechanisms)
|
||||
* when we're running in the context of the photos app and when we're running
|
||||
* in the context of the public albums app.
|
||||
*
|
||||
* 2. Break the file to be uploaded into parts, and upload each part using a PUT
|
||||
* request to one of the presigned URLs we got in step 1. There are two
|
||||
* variants of this - one where we directly upload to the remote (S3), and
|
||||
* one where we go via a worker.
|
||||
*
|
||||
* 3. Once all the parts have been uploaded, send a consolidated report of all
|
||||
* the uploaded parts (the step 2's) to remote via another presigned
|
||||
* "completion URL" that we also got in step 1. Like step 2, there are 2
|
||||
* variants of this - one where we directly tell the remote (S3)
|
||||
* ({@link completeMultipartUpload}), and one where we report via a worker
|
||||
* ({@link completeMultipartUploadViaWorker}).
|
||||
*/
|
||||
export const completeMultipartUpload = (
|
||||
completionURL: string,
|
||||
completedParts: MultipartCompletedPart[],
|
||||
) =>
|
||||
retryEnsuringHTTPOk(() =>
|
||||
fetch(completionURL, {
|
||||
method: "POST",
|
||||
headers: { ...publicRequestHeaders(), "Content-Type": "text/xml" },
|
||||
body: createMultipartUploadRequestBody(completedParts),
|
||||
}),
|
||||
);
|
||||
|
||||
/**
|
||||
* Variant of {@link completeMultipartUpload} that uses the CF worker.
|
||||
*/
|
||||
export const completeMultipartUploadViaWorker = async (
|
||||
completionURL: string,
|
||||
completedParts: MultipartCompletedPart[],
|
||||
) => {
|
||||
const origin = await uploaderOrigin();
|
||||
return retryEnsuringHTTPOk(() =>
|
||||
fetch(`${origin}/multipart-complete`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
...publicRequestHeaders(),
|
||||
"Content-Type": "text/xml",
|
||||
"UPLOAD-URL": completionURL,
|
||||
},
|
||||
body: createMultipartUploadRequestBody(completedParts),
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Lowest layer for file upload related HTTP operations when we're running in
|
||||
* the context of the public albums app.
|
||||
|
||||
@@ -44,7 +44,6 @@ import { settingsSnapshot } from "ente-new/photos/services/settings";
|
||||
import { CustomError, handleUploadError } from "ente-shared/error";
|
||||
import { mergeUint8Arrays } from "ente-utils/array";
|
||||
import { ensureInteger, ensureNumber } from "ente-utils/ensure";
|
||||
import * as convert from "xml-js";
|
||||
import type { UploadableUploadItem, UploadItem } from ".";
|
||||
import {
|
||||
RANDOM_PERCENTAGE_PROGRESS_FOR_PUT,
|
||||
@@ -53,8 +52,11 @@ import {
|
||||
} from ".";
|
||||
import { tryParseEpochMicrosecondsFromFileName } from "./date";
|
||||
import {
|
||||
completeMultipartUpload,
|
||||
completeMultipartUploadViaWorker,
|
||||
PhotosUploadHTTPClient,
|
||||
PublicAlbumsUploadHTTPClient,
|
||||
type MultipartCompletedPart,
|
||||
type ObjectUploadURL,
|
||||
} from "./remote";
|
||||
import type { ParsedMetadataJSON } from "./takeout";
|
||||
@@ -1540,11 +1542,6 @@ const uploadToBucket = async (
|
||||
}
|
||||
};
|
||||
|
||||
interface PartEtag {
|
||||
PartNumber: number;
|
||||
ETag: string;
|
||||
}
|
||||
|
||||
async function uploadStreamUsingMultipart(
|
||||
fileLocalID: number,
|
||||
dataStream: EncryptedFileStream,
|
||||
@@ -1563,7 +1560,7 @@ async function uploadStreamUsingMultipart(
|
||||
const streamReader = stream.getReader();
|
||||
const percentPerPart =
|
||||
RANDOM_PERCENTAGE_PROGRESS_FOR_PUT() / uploadPartCount;
|
||||
const partEtags: PartEtag[] = [];
|
||||
const completedParts: MultipartCompletedPart[] = [];
|
||||
let fileSize = 0;
|
||||
for (const [
|
||||
index,
|
||||
@@ -1571,8 +1568,8 @@ async function uploadStreamUsingMultipart(
|
||||
] of multipartUploadURLs.partURLs.entries()) {
|
||||
abortIfCancelled();
|
||||
|
||||
const uploadChunk = await combineChunksToFormUploadPart(streamReader);
|
||||
fileSize += uploadChunk.length;
|
||||
const uploadPart = await nextMultipartUploadPart(streamReader);
|
||||
fileSize += uploadPart.length;
|
||||
const progressTracker = makeProgressTracker(
|
||||
fileLocalID,
|
||||
percentPerPart,
|
||||
@@ -1582,45 +1579,44 @@ async function uploadStreamUsingMultipart(
|
||||
if (!isCFUploadProxyDisabled) {
|
||||
eTag = await photosHTTPClient.putFilePartV2(
|
||||
fileUploadURL,
|
||||
uploadChunk,
|
||||
uploadPart,
|
||||
progressTracker,
|
||||
);
|
||||
} else {
|
||||
eTag = await photosHTTPClient.putFilePart(
|
||||
fileUploadURL,
|
||||
uploadChunk,
|
||||
uploadPart,
|
||||
progressTracker,
|
||||
);
|
||||
}
|
||||
partEtags.push({ PartNumber: index + 1, ETag: eTag });
|
||||
completedParts.push({ partNumber: index + 1, eTag });
|
||||
}
|
||||
const { done } = await streamReader.read();
|
||||
if (!done) throw new Error("More chunks than expected");
|
||||
|
||||
const completeURL = multipartUploadURLs.completeURL;
|
||||
const cBody = convert.js2xml(
|
||||
{ CompleteMultipartUpload: { Part: partEtags } },
|
||||
{ compact: true, ignoreComment: true, spaces: 4 },
|
||||
);
|
||||
const completionURL = multipartUploadURLs.completeURL;
|
||||
if (!isCFUploadProxyDisabled) {
|
||||
await photosHTTPClient.completeMultipartUploadV2(completeURL, cBody);
|
||||
await completeMultipartUploadViaWorker(completionURL, completedParts);
|
||||
} else {
|
||||
await photosHTTPClient.completeMultipartUpload(completeURL, cBody);
|
||||
await completeMultipartUpload(completionURL, completedParts);
|
||||
}
|
||||
|
||||
return { objectKey: multipartUploadURLs.objectKey, fileSize };
|
||||
}
|
||||
|
||||
async function combineChunksToFormUploadPart(
|
||||
/**
|
||||
* Construct byte arrays, up to 20 MB each, containing the contents of (up to)
|
||||
* the next 5 {@link streamEncryptionChunkSize} chunks read from the given
|
||||
* {@link streamReader}.
|
||||
*/
|
||||
const nextMultipartUploadPart = async (
|
||||
streamReader: ReadableStreamDefaultReader<Uint8Array>,
|
||||
) {
|
||||
const combinedChunks = [];
|
||||
) => {
|
||||
const chunks = [];
|
||||
for (let i = 0; i < multipartChunksPerPart; i++) {
|
||||
const { done, value: chunk } = await streamReader.read();
|
||||
if (done) {
|
||||
break;
|
||||
}
|
||||
combinedChunks.push(chunk);
|
||||
if (done) break;
|
||||
chunks.push(chunk);
|
||||
}
|
||||
return mergeUint8Arrays(combinedChunks);
|
||||
}
|
||||
return mergeUint8Arrays(chunks);
|
||||
};
|
||||
|
||||
@@ -3643,11 +3643,6 @@ sanitize-filename@^1.6.3:
|
||||
dependencies:
|
||||
truncate-utf8-bytes "^1.0.0"
|
||||
|
||||
sax@^1.2.4:
|
||||
version "1.4.1"
|
||||
resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f"
|
||||
integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg==
|
||||
|
||||
scheduler@^0.26.0:
|
||||
version "0.26.0"
|
||||
resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.26.0.tgz#4ce8a8c2a2095f13ea11bf9a445be50c555d6337"
|
||||
@@ -4250,13 +4245,6 @@ wrap-ansi@^7.0.0:
|
||||
string-width "^4.1.0"
|
||||
strip-ansi "^6.0.0"
|
||||
|
||||
xml-js@^1.6.11:
|
||||
version "1.6.11"
|
||||
resolved "https://registry.yarnpkg.com/xml-js/-/xml-js-1.6.11.tgz#927d2f6947f7f1c19a316dd8eea3614e8b18f8e9"
|
||||
integrity sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==
|
||||
dependencies:
|
||||
sax "^1.2.4"
|
||||
|
||||
y18n@^5.0.5:
|
||||
version "5.0.8"
|
||||
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
|
||||
|
||||
Reference in New Issue
Block a user