diff --git a/web/apps/photos/package.json b/web/apps/photos/package.json index ac79884f49..4de13c539e 100644 --- a/web/apps/photos/package.json +++ b/web/apps/photos/package.json @@ -22,8 +22,7 @@ "react-virtualized-auto-sizer": "^1.0.26", "react-window": "^1.8.11", "sanitize-filename": "^1.6.3", - "similarity-transformation": "^0.0.1", - "xml-js": "^1.6.11" + "similarity-transformation": "^0.0.1" }, "devDependencies": { "@types/node": "^22.15.18", diff --git a/web/apps/photos/src/components/FileList.tsx b/web/apps/photos/src/components/FileList.tsx index 1452befb9c..f1ae399605 100644 --- a/web/apps/photos/src/components/FileList.tsx +++ b/web/apps/photos/src/components/FileList.tsx @@ -311,7 +311,15 @@ export const FileList: React.FC = ({ return timeStampList; } // TODO(RE): Remove after audit. - if (isDevBuild) throw new Error("Unexpected footer change"); + if ( + isDevBuild && + (footer || + publicCollectionGalleryContext.credentials || + showAppDownloadBanner) + ) { + console.log({ timeStampList, footer, showAppDownloadBanner }); + throw new Error("Unexpected footer change"); + } if (footer) { return [ ...timeStampList, diff --git a/web/packages/base/http.ts b/web/packages/base/http.ts index 6a12185c56..6b43bb4e9a 100644 --- a/web/packages/base/http.ts +++ b/web/packages/base/http.ts @@ -18,7 +18,8 @@ export const authenticatedRequestHeaders = async () => ({ /** * Return headers that should be passed alongwith (almost) all unauthenticated - * `fetch` calls that we make to our API servers. + * `fetch` calls that we make to our remotes like our API servers (museum), or + * to presigned URLs that are handled by the S3 storage buckets themselves. * * - The client package name. */ diff --git a/web/packages/gallery/services/upload/remote.ts b/web/packages/gallery/services/upload/remote.ts index 976a7ca3b8..46007067d9 100644 --- a/web/packages/gallery/services/upload/remote.ts +++ b/web/packages/gallery/services/upload/remote.ts @@ -6,7 +6,9 @@ import { authenticatedPublicAlbumsRequestHeaders, authenticatedRequestHeaders, ensureOk, + publicRequestHeaders, retryAsyncOperation, + retryEnsuringHTTPOk, type PublicAlbumsCredentials, } from "ente-base/http"; import log from "ente-base/log"; @@ -18,12 +20,18 @@ import { z } from "zod"; import type { MultipartUploadURLs, UploadFile } from "./upload-service"; /** - * A pre-signed URL alongwith the associated object key. + * A pre-signed URL alongwith the associated object key that is later used to + * refer to file contents (the "object") that were uploaded to this URL. */ const ObjectUploadURL = z.object({ - /** A pre-signed URL that can be used to upload data to S3. */ + /** + * A pre-signed URL that can be used to upload data to an S3-compatible + * remote. + */ objectKey: z.string(), - /** The objectKey with which remote will refer to this object. */ + /** + * The objectKey with which remote (museum) will refer to this object. + */ url: z.string(), }); @@ -233,40 +241,179 @@ export class PhotosUploadHTTPClient { throw e; } } - - async completeMultipartUpload(completeURL: string, reqBody: unknown) { - try { - await retryAsyncOperation(() => - // @ts-ignore - HTTPService.post(completeURL, reqBody, null, { - "content-type": "text/xml", - }), - ); - } catch (e) { - log.error("put file in parts failed", e); - throw e; - } - } - - async completeMultipartUploadV2(completeURL: string, reqBody: unknown) { - try { - const origin = await uploaderOrigin(); - await retryAsyncOperation(() => - HTTPService.post( - `${origin}/multipart-complete`, - reqBody, - // @ts-ignore - null, - { "content-type": "text/xml", "UPLOAD-URL": completeURL }, - ), - ); - } catch (e) { - log.error("put file in parts failed", e); - throw e; - } - } } +/** + * Information about an individual part of a multipart upload that has been + * uploaded to the remote (S3 or proxy). + * + * See: [Note: Multipart uploads]. + */ +export interface MultipartCompletedPart { + /** + * The part number (1-indexed). + * + * The part number indicates the sequential ordering where this part belongs + * in the overall file's data. + */ + partNumber: number; + /** + * The part "ETag". + * + * This is the Entity tag (retrieved as the "ETag" response header) returned + * by remote when the part was uploaded. + */ + eTag: string; +} + +/** + * Construct an XML string of the format expected as the request body for + * {@link _completeMultipartUpload} or + * {@link _completeMultipartUploadViaWorker}. + * + * @param parts Information about the parts that were uploaded. + */ +const createMultipartUploadRequestBody = ( + parts: MultipartCompletedPart[], +): string => { + // To avoid introducing a dependency on a XML library, we construct the + // requisite XML by hand. + // + // Example: + // + // + // + // 1 + // "1b3e6cdb1270c0b664076f109a7137c1" + // + // + // 2 + // "6049d6384a9e65694c833a3aca6584fd" + // + // + // 3 + // "331747eae8068f03b844e6f28cc0ed23" + // + // + // + // + // Spec: + // https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html + // + // + // + // integer + // string + // + // ... + // + // + // Note that in the example given on the spec page, the etag strings are quoted: + // + // + // + // 1 + // "a54357aff0632cce46d942af68356b38" + // + // ... + // + // + // No extra quotes need to be added, the etag values we get from remote + // already quoted, we just need to pass them verbatim. + + const resultParts = parts.map( + (part) => + `${part.partNumber}${part.eTag}`, + ); + return `\n${resultParts.join("\n")}\n`; +}; + +/** + * Complete a multipart upload by reporting information about all the uploaded + * parts to the provided {@link completionURL}. + * + * @param completionURL A presigned URL to which the final status of the + * uploaded parts should be reported to. + * + * @param completedParts Information about all the parts of the file that have + * been uploaded. The part numbers must start at 1 and must be consecutive. + * + * [Note: Multipart uploads] + * + * Multipart uploads are a mechanism to upload large files onto an remote + * storage bucket by breaking it into smaller chunks / "parts", uploading each + * part separately, and then reporting the consolidated information of all the + * uploaded parts to a URL that marks the upload as complete on remote. + * + * This allows greater resilience since uploads of individual parts can be + * retried independently without failing the entire upload on transient network + * issues. This also helps self hosters, since often cloud providers have limits + * to the size of single requests that they'll allow through (e.g. the + * Cloudflare free plan currently has a 100 MB request size limit). + * + * The flow is implemented in two ways: + * + * a. The normal way, where each requests is made to a remote S3 bucket directly + * using the presigned URL. + * + * b. Using workers, where the requests are proxied via a worker near to the + * user's network to speed the requests up. + * + * See the documentation of {@link shouldDisableCFUploadProxy} for more details + * about the via-worker flow. + * + * In both cases, the overall flow is roughly like the following: + * + * 1. Obtain multiple presigned URLs from remote (museum). The specific API call + * will be different (because of the different authentication mechanisms) + * when we're running in the context of the photos app and when we're running + * in the context of the public albums app. + * + * 2. Break the file to be uploaded into parts, and upload each part using a PUT + * request to one of the presigned URLs we got in step 1. There are two + * variants of this - one where we directly upload to the remote (S3), and + * one where we go via a worker. + * + * 3. Once all the parts have been uploaded, send a consolidated report of all + * the uploaded parts (the step 2's) to remote via another presigned + * "completion URL" that we also got in step 1. Like step 2, there are 2 + * variants of this - one where we directly tell the remote (S3) + * ({@link completeMultipartUpload}), and one where we report via a worker + * ({@link completeMultipartUploadViaWorker}). + */ +export const completeMultipartUpload = ( + completionURL: string, + completedParts: MultipartCompletedPart[], +) => + retryEnsuringHTTPOk(() => + fetch(completionURL, { + method: "POST", + headers: { ...publicRequestHeaders(), "Content-Type": "text/xml" }, + body: createMultipartUploadRequestBody(completedParts), + }), + ); + +/** + * Variant of {@link completeMultipartUpload} that uses the CF worker. + */ +export const completeMultipartUploadViaWorker = async ( + completionURL: string, + completedParts: MultipartCompletedPart[], +) => { + const origin = await uploaderOrigin(); + return retryEnsuringHTTPOk(() => + fetch(`${origin}/multipart-complete`, { + method: "POST", + headers: { + ...publicRequestHeaders(), + "Content-Type": "text/xml", + "UPLOAD-URL": completionURL, + }, + body: createMultipartUploadRequestBody(completedParts), + }), + ); +}; + /** * Lowest layer for file upload related HTTP operations when we're running in * the context of the public albums app. diff --git a/web/packages/gallery/services/upload/upload-service.ts b/web/packages/gallery/services/upload/upload-service.ts index 2ce0f46852..fb7beff2a3 100644 --- a/web/packages/gallery/services/upload/upload-service.ts +++ b/web/packages/gallery/services/upload/upload-service.ts @@ -44,7 +44,6 @@ import { settingsSnapshot } from "ente-new/photos/services/settings"; import { CustomError, handleUploadError } from "ente-shared/error"; import { mergeUint8Arrays } from "ente-utils/array"; import { ensureInteger, ensureNumber } from "ente-utils/ensure"; -import * as convert from "xml-js"; import type { UploadableUploadItem, UploadItem } from "."; import { RANDOM_PERCENTAGE_PROGRESS_FOR_PUT, @@ -53,8 +52,11 @@ import { } from "."; import { tryParseEpochMicrosecondsFromFileName } from "./date"; import { + completeMultipartUpload, + completeMultipartUploadViaWorker, PhotosUploadHTTPClient, PublicAlbumsUploadHTTPClient, + type MultipartCompletedPart, type ObjectUploadURL, } from "./remote"; import type { ParsedMetadataJSON } from "./takeout"; @@ -1540,11 +1542,6 @@ const uploadToBucket = async ( } }; -interface PartEtag { - PartNumber: number; - ETag: string; -} - async function uploadStreamUsingMultipart( fileLocalID: number, dataStream: EncryptedFileStream, @@ -1563,7 +1560,7 @@ async function uploadStreamUsingMultipart( const streamReader = stream.getReader(); const percentPerPart = RANDOM_PERCENTAGE_PROGRESS_FOR_PUT() / uploadPartCount; - const partEtags: PartEtag[] = []; + const completedParts: MultipartCompletedPart[] = []; let fileSize = 0; for (const [ index, @@ -1571,8 +1568,8 @@ async function uploadStreamUsingMultipart( ] of multipartUploadURLs.partURLs.entries()) { abortIfCancelled(); - const uploadChunk = await combineChunksToFormUploadPart(streamReader); - fileSize += uploadChunk.length; + const uploadPart = await nextMultipartUploadPart(streamReader); + fileSize += uploadPart.length; const progressTracker = makeProgressTracker( fileLocalID, percentPerPart, @@ -1582,45 +1579,44 @@ async function uploadStreamUsingMultipart( if (!isCFUploadProxyDisabled) { eTag = await photosHTTPClient.putFilePartV2( fileUploadURL, - uploadChunk, + uploadPart, progressTracker, ); } else { eTag = await photosHTTPClient.putFilePart( fileUploadURL, - uploadChunk, + uploadPart, progressTracker, ); } - partEtags.push({ PartNumber: index + 1, ETag: eTag }); + completedParts.push({ partNumber: index + 1, eTag }); } const { done } = await streamReader.read(); if (!done) throw new Error("More chunks than expected"); - const completeURL = multipartUploadURLs.completeURL; - const cBody = convert.js2xml( - { CompleteMultipartUpload: { Part: partEtags } }, - { compact: true, ignoreComment: true, spaces: 4 }, - ); + const completionURL = multipartUploadURLs.completeURL; if (!isCFUploadProxyDisabled) { - await photosHTTPClient.completeMultipartUploadV2(completeURL, cBody); + await completeMultipartUploadViaWorker(completionURL, completedParts); } else { - await photosHTTPClient.completeMultipartUpload(completeURL, cBody); + await completeMultipartUpload(completionURL, completedParts); } return { objectKey: multipartUploadURLs.objectKey, fileSize }; } -async function combineChunksToFormUploadPart( +/** + * Construct byte arrays, up to 20 MB each, containing the contents of (up to) + * the next 5 {@link streamEncryptionChunkSize} chunks read from the given + * {@link streamReader}. + */ +const nextMultipartUploadPart = async ( streamReader: ReadableStreamDefaultReader, -) { - const combinedChunks = []; +) => { + const chunks = []; for (let i = 0; i < multipartChunksPerPart; i++) { const { done, value: chunk } = await streamReader.read(); - if (done) { - break; - } - combinedChunks.push(chunk); + if (done) break; + chunks.push(chunk); } - return mergeUint8Arrays(combinedChunks); -} + return mergeUint8Arrays(chunks); +}; diff --git a/web/yarn.lock b/web/yarn.lock index 8685b8af75..158cb5b835 100644 --- a/web/yarn.lock +++ b/web/yarn.lock @@ -3643,11 +3643,6 @@ sanitize-filename@^1.6.3: dependencies: truncate-utf8-bytes "^1.0.0" -sax@^1.2.4: - version "1.4.1" - resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f" - integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg== - scheduler@^0.26.0: version "0.26.0" resolved "https://registry.yarnpkg.com/scheduler/-/scheduler-0.26.0.tgz#4ce8a8c2a2095f13ea11bf9a445be50c555d6337" @@ -4250,13 +4245,6 @@ wrap-ansi@^7.0.0: string-width "^4.1.0" strip-ansi "^6.0.0" -xml-js@^1.6.11: - version "1.6.11" - resolved "https://registry.yarnpkg.com/xml-js/-/xml-js-1.6.11.tgz#927d2f6947f7f1c19a316dd8eea3614e8b18f8e9" - integrity sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g== - dependencies: - sax "^1.2.4" - y18n@^5.0.5: version "5.0.8" resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"