diff --git a/desktop/src/main/services/ffmpeg-worker.ts b/desktop/src/main/services/ffmpeg-worker.ts new file mode 100644 index 0000000000..f6af11358c --- /dev/null +++ b/desktop/src/main/services/ffmpeg-worker.ts @@ -0,0 +1,821 @@ +/** + * @file ffmpeg invocations. This code runs in a utility process. + */ + +// See [Note: Using Electron APIs in UtilityProcess] about what we can and +// cannot import. +import { expose } from "comlink"; +import pathToFfmpeg from "ffmpeg-static"; +import { randomBytes } from "node:crypto"; +import fs_ from "node:fs"; +import fs from "node:fs/promises"; +import path from "node:path"; +import { Readable } from "node:stream"; +import type { FFmpegCommand } from "../../types/ipc"; +import log from "../log-worker"; +import { messagePortMainEndpoint } from "../utils/comlink"; +import { wait } from "../utils/common"; +import { execAsyncWorker } from "../utils/exec-worker"; + +/* Ditto in the web app's code (used by the Wasm FFmpeg invocation). */ +const ffmpegPathPlaceholder = "FFMPEG"; +const inputPathPlaceholder = "INPUT"; +const outputPathPlaceholder = "OUTPUT"; + +/** + * The interface of the object exposed by `ffmpeg-worker.ts` on the message port + * pair that the main process creates to communicate with it. + * + * @see {@link ffmpegUtilityProcessEndpoint}. + */ +export interface FFmpegUtilityProcess { + ffmpegExec: ( + command: FFmpegCommand, + inputFilePath: string, + outputFilePath: string, + ) => Promise; + + ffmpegConvertToMP4: ( + inputFilePath: string, + outputFilePath: string, + ) => Promise; + + ffmpegGenerateHLSPlaylistAndSegments: ( + inputFilePath: string, + outputPathPrefix: string, + outputUploadURL: string, + ) => Promise; +} + +log.debugString("Started ffmpeg utility process"); + +process.parentPort.once("message", (e) => { + // Expose an instance of `FFmpegUtilityProcess` on the port we got from our + // parent. + expose( + { + ffmpegExec, + ffmpegConvertToMP4, + ffmpegGenerateHLSPlaylistAndSegments, + } satisfies FFmpegUtilityProcess, + messagePortMainEndpoint(e.ports[0]!), + ); + mainProcess("ack", undefined); +}); + +/** + * Send a message to the main process using a barebones RPC protocol. + */ +const mainProcess = (method: string, param: unknown) => + process.parentPort.postMessage({ method, p: param }); + +/** + * Run a FFmpeg command + * + * [Note: FFmpeg in Electron] + * + * There is a Wasm build of FFmpeg, but that is currently 10-20 times slower + * that the native build. That is slow enough to be unusable for our purposes. + * https://ffmpegwasm.netlify.app/docs/performance + * + * So the alternative is to bundle a FFmpeg executable binary with our app. e.g. + * + * yarn add fluent-ffmpeg ffmpeg-static ffprobe-static + * + * (we only use ffmpeg-static, the rest are mentioned for completeness' sake). + * + * Interestingly, Electron already bundles an binary FFmpeg library (it comes + * from the ffmpeg fork maintained by Chromium). + * https://chromium.googlesource.com/chromium/third_party/ffmpeg + * https://stackoverflow.com/questions/53963672/what-version-of-ffmpeg-is-bundled-inside-electron + * + * This can be found in (e.g. on macOS) at + * + * $ file ente.app/Contents/Frameworks/Electron\ Framework.framework/Versions/Current/Libraries/libffmpeg.dylib + * .../libffmpeg.dylib: Mach-O 64-bit dynamically linked shared library arm64 + * + * But I'm not sure if our code is supposed to be able to use it, and how. + */ +const ffmpegExec = async ( + command: FFmpegCommand, + inputFilePath: string, + outputFilePath: string, +): Promise => { + let resolvedCommand: string[]; + if (Array.isArray(command)) { + resolvedCommand = command; + } else { + const isHDR = await isHDRVideo(inputFilePath); + resolvedCommand = isHDR ? command.hdr : command.default; + } + + const cmd = substitutePlaceholders( + resolvedCommand, + inputFilePath, + outputFilePath, + ); + + await execAsyncWorker(cmd); +}; + +const substitutePlaceholders = ( + command: string[], + inputFilePath: string, + outputFilePath: string, +) => + command.map((segment) => { + if (segment == ffmpegPathPlaceholder) { + return ffmpegBinaryPath(); + } else if (segment == inputPathPlaceholder) { + return inputFilePath; + } else if (segment == outputPathPlaceholder) { + return outputFilePath; + } else { + return segment; + } + }); + +/** + * Return the path to the `ffmpeg` binary. + * + * At runtime, the FFmpeg binary is present in a path like (macOS example): + * `ente.app/Contents/Resources/app.asar.unpacked/node_modules/ffmpeg-static/ffmpeg` + */ +const ffmpegBinaryPath = () => { + // This substitution of app.asar by app.asar.unpacked is suggested by the + // ffmpeg-static library author themselves: + // https://github.com/eugeneware/ffmpeg-static/issues/16 + return pathToFfmpeg!.replace("app.asar", "app.asar.unpacked"); +}; + +/** + * A variant of {@link ffmpegExec} adapted to work with streams so that it can + * handle the MP4 conversion of large video files. + * + * @param inputFilePath The path to a file on the user's local file system. This + * is the video we want to convert. + * + * @param outputFilePath The path to a file on the user's local file system where + * we should write the converted MP4 video. + */ +const ffmpegConvertToMP4 = async ( + inputFilePath: string, + outputFilePath: string, +): Promise => { + const command = [ + ffmpegPathPlaceholder, + "-i", + inputPathPlaceholder, + "-preset", + "ultrafast", + outputPathPlaceholder, + ]; + + const cmd = substitutePlaceholders(command, inputFilePath, outputFilePath); + + await execAsyncWorker(cmd); +}; + +export interface FFmpegGenerateHLSPlaylistAndSegmentsResult { + playlistPath: string; + dimensions: { width: number; height: number }; + videoSize: number; +} + +/** + * A bespoke variant of {@link ffmpegExec} for generation of HLS playlists for + * videos. + * + * Overview of the cases: + * + * H.264, <= 10 MB - Skip + * H.264, <= 4000 kb/s bitrate - Don't re-encode video stream + * BT.709, <= 2000 kb/s bitrate - Don't apply the scale+fps filter + * !BT.709 - Apply tonemap (zscale+tonemap+zscale) + * + * Example invocation: + * + * ffmpeg -i in.mov -vf 'scale=-2:720,fps=30,zscale=transfer=linear,tonemap=tonemap=hable:desat=0,zscale=primaries=709:transfer=709:matrix=709,format=yuv420p' -c:v libx264 -c:a aac -f hls -hls_key_info_file out.m3u8.info -hls_list_size 0 -hls_flags single_file out.m3u8 + * + * See: [Note: Preview variant of videos] + * + * @param inputFilePath The path to a file on the user's local file system. This + * is the video we want to generate an streamable HLS playlist for. + * + * @param outputPathPrefix The path to unique, unused and temporary prefix on + * the user's local file system. This function will write the generated HLS + * playlist and video segments under this prefix. + * + * @returns The paths to two files on the user's local file system - one + * containing the generated HLS playlist, and the other containing the + * transcoded and encrypted video segments that the HLS playlist refers to. + * + * If the video is such that it doesn't require stream generation, then this + * function returns `undefined`. + */ +const ffmpegGenerateHLSPlaylistAndSegments = async ( + inputFilePath: string, + outputPathPrefix: string, + outputUploadURL: string, +): Promise => { + const { isH264, isBT709, bitrate } = + await detectVideoCharacteristics(inputFilePath); + + log.debugString(JSON.stringify({ isH264, isBT709, bitrate })); + + // If the video is smaller than 10 MB, and already H.264 (the codec we are + // going to use for the conversion), then a streaming variant is not much + // use. Skip such cases. + // + // --- + // + // [Note: HEVC/H.265 issues] + // + // We've observed two issues out in the wild with HEVC videos: + // + // 1. On Linux, HEVC video streams don't play. However, since the audio + // stream plays, the browser tells us that the "video" itself is + // playable, but the user sees a blank screen with only audio. + // + // 2. HEVC + HDR videos taken on an iPhone have a rotation (`Side data: + // displaymatrix` in the ffmpeg output) that Chrome (and thus Electron) + // doesn't take into account, so these play upside down. + // + // Not fully related to this case, but mentioning here as to why both the + // size and codec need to be checked before skipping stream generation. + if (isH264) { + const inputVideoSize = await fs + .stat(inputFilePath) + .then((st) => st.size); + if (inputVideoSize <= 10 * 1024 * 1024 /* 10 MB */) { + return undefined; + } + } + + // If the video is already H.264 with a bitrate less than 4000 kbps, then we + // do not need to reencode the video stream (by _far_ the costliest part of + // the HLS stream generation). + const reencodeVideo = !(isH264 && bitrate && bitrate <= 4000 * 1000); + + // If the bitrate is not too high, then we don't need to rescale the video + // when generating the video stream. This is not a performance optimization, + // but more for avoiding making the video size smaller unnecessarily. + const rescaleVideo = !(bitrate && bitrate <= 2000 * 1000); + + // [Note: Tonemapping HDR to HD] + // + // BT.709 ("HD") is a standard that describes things like how color is + // encoded, the range of values, and their "meaning" - i.e. how to map the + // values in the video to the pixels on the screen. + // + // It is not the only such standard, there are three common examples: + // + // - BT.601 ("Standard-Definition" or SD) + // - BT.709 ("High-Definition" or HD) + // - BT.2020 ("Ultra-High-Definition" or UHD, aka HDR^). + // + // ^ HDR ("High-Dynamic-Range") is an addendum to BT.2020, but for our + // purpose here we can treat it as as alias. + // + // BT.709 is the most common amongst these for older files out stored on + // computers, and they conform mostly to the standard (one notable exception + // is that the BT.709 standard also recommends using the yuv422p pixel + // format, but de facto yuv420p is used because many video players only + // support yuv420p). + // + // Since BT.709 is the most widely supported standard, we use it when + // generating the HLS playlist so to allow playback across the widest + // possible hardware/OS/browser combinations. + // + // If we convert HDR to HD without naively, then the colors look washed out + // compared to the original. To resolve this, we use a ffmpeg filterchain + // that uses the tonemap filter. + // + // However applying this tonemap to videos that are already HD leads to a + // brightness drop. So we conditionally apply this filter chain only if the + // colorspace is not already BT.709. + // + // See also: [Note: Alternative FFmpeg command for HDR videos], although + // that uses a allow-list based check (while here we use deny-list). + // + // Reference: + // - https://trac.ffmpeg.org/wiki/colorspace + const tonemap = !isBT709; + + // We want the generated playlist to refer to the chunks as "output.ts". + // + // So we arrange things accordingly: We use the `outputPathPrefix` as our + // working directory, and then ask ffmpeg to generate a playlist with the + // name "output.m3u8". + // + // ffmpeg will automatically place the segments in a file with the same base + // name as the playlist, but with a ".ts" extension. And since we use the + // "single_file" option, all the segments will be placed in a file named + // "output.ts". + + await fs.mkdir(outputPathPrefix); + + const playlistPath = path.join(outputPathPrefix, "output.m3u8"); + const videoPath = path.join(outputPathPrefix, "output.ts"); + + // Generate a cryptographically secure random key (16 bytes). + const keyBytes = randomBytes(16); + const keyB64 = keyBytes.toString("base64"); + + // Convert it to a data: URI that will be added to the playlist. + const keyURI = `data:text/plain;base64,${keyB64}`; + + // Determine two paths - one where we will write the key itself, and where + // we will write the "key info" that provides ffmpeg the `keyURI` and the + // `keyPath;. + const keyPath = playlistPath + ".key"; + const keyInfoPath = playlistPath + ".key-info"; + + // Generate a "key info": + // + // - the first line specifies the key URI that is written into the playlist. + // - the second line specifies the path to the local file system file from + // where ffmpeg should read the key. + const keyInfo = [keyURI, keyPath].join("\n"); + + // Overview: + // + // - Video H.264 HD 720p 30fps. + // - Audio AAC 128kbps. + // - Encrypted HLS playlist with a single file containing all the chunks. + // + // Reference: + // - `man ffmpeg-all` + // - https://trac.ffmpeg.org/wiki/Encode/H.264 + // + const command = [ + ffmpegBinaryPath(), + // Reduce the amount of output lines we have to parse. + ["-hide_banner"], + // Input file. We don't need any extra options that apply to the input file. + "-i", + inputFilePath, + // The remaining options apply to the next output file (`playlistPath`). + reencodeVideo + ? [ + // `-vf` creates a filter graph for the video stream. It is a + // comma separated list of filters chained together, e.g. + // `filter1=key=value:key=value.filter2=key=value`. + "-vf", + [ + // Do the rescaling to even number of pixels always if the + // tonemapping is going to be applied subsequently, + // otherwise the tonemapping will fail with "image + // dimensions must be divisible by subsampling factor". + // + // While we add the extra condition here for completeness, + // it won't usually matter since a non-BT.709 video is + // likely using a new codec, and as such would've a high + // enough bitrate to require rescaling anyways. + rescaleVideo || tonemap + ? [ + // Scales the video to maximum 720p height, + // keeping aspect ratio and the calculated + // dimension divisible by 2 (some of the other + // operations require an even pixel count). + "scale=-2:720", + // Convert the video to a constant 30 fps, + // duplicating or dropping frames as necessary. + "fps=30", + ] + : [], + // Convert the colorspace if the video is not in the HD + // color space (bt709). Before conversion, tone map colors + // so that they work the same across the change in the + // dyamic range. + // + // 1. The tonemap filter only works linear light, so we + // first use zscale with transfer=linear to linearize + // the input. + // + // 2. Then we use the tonemap, with the hable option that + // is best for preserving details. desat=0 turns off + // the default desaturation. + // + // 3. Use zscale again to "convert to BT.709" by asking it + // to set the all three of color primaries, transfer + // characteristics and colorspace matrix to 709 (Note: + // the constants specified in the tonemap filter help + // do not include the "bt" prefix) + // + // See: https://ffmpeg.org/ffmpeg-filters.html#tonemap-1 + // + // See: [Note: Tonemapping HDR to HD] + tonemap + ? [ + "zscale=transfer=linear", + "tonemap=tonemap=hable:desat=0", + "zscale=primaries=709:transfer=709:matrix=709", + ] + : [], + // Output using the well supported pixel format: 8-bit YUV + // planar color space with 4:2:0 chroma subsampling. + "format=yuv420p", + ] + .flat() + .join(","), + ] + : [], + reencodeVideo + ? // Video codec H.264 + // + // - `-c:v libx264` converts the video stream to the H.264 codec. + // + // - We don't supply a bitrate, instead it uses the default CRF + // ("23") as recommended in the ffmpeg trac. + // + // - We don't supply a preset, it'll use the default ("medium"). + ["-c:v", "libx264"] + : // Keep the video stream unchanged + ["-c:v", "copy"], + // Audio codec AAC + // + // - `-c:a aac` converts the audio stream to use the AAC codec + // + // - We don't supply a bitrate, it'll use the AAC default 128k bps. + ["-c:a", "aac"], + // Generate a HLS playlist. + ["-f", "hls"], + // Tell ffmpeg where to find the key, and the URI for the key to write + // into the generated playlist. Implies "-hls_enc 1". + ["-hls_key_info_file", keyInfoPath], + // Generate as many playlist entries as needed (default limit is 5). + ["-hls_list_size", "0"], + // Place all the video segments within the same .ts file (with the same + // path as the playlist file but with a ".ts" extension). + ["-hls_flags", "single_file"], + // Output path where the playlist should be generated. + playlistPath, + ].flat(); + + let dimensions: ReturnType; + let videoSize: number; + + try { + // Write the key and the keyInfo to their desired paths. + await Promise.all([ + fs.writeFile(keyPath, keyBytes), + fs.writeFile(keyInfoPath, keyInfo, { encoding: "utf8" }), + ]); + + // Run the ffmpeg command to generate the HLS playlist and segments. + // + // Note: Depending on the size of the input file, this may take long! + const { stderr: conversionStderr } = await execAsyncWorker(command); + + // Determine the dimensions of the generated video from the stderr + // output produced by ffmpeg during the conversion. + dimensions = detectVideoDimensions(conversionStderr); + + // Find the size of the generated video segments by reading the size of + // the generated .ts file. + videoSize = await fs.stat(videoPath).then((st) => st.size); + + await uploadVideoSegments(videoPath, videoSize, outputUploadURL); + } catch (e) { + log.error("HLS generation failed", e); + await Promise.all([deletePathIgnoringErrors(playlistPath)]); + throw e; + } finally { + await Promise.all([ + deletePathIgnoringErrors(keyInfoPath), + deletePathIgnoringErrors(keyPath), + deletePathIgnoringErrors(videoPath), + // ffmpeg writes a /path/output.ts.tmp, clear it out too. + deletePathIgnoringErrors(videoPath + ".tmp"), + ]); + } + + return { playlistPath, dimensions, videoSize }; +}; + +/** + * A variant of {@link deletePathIgnoringErrors} (which we can't directly use in + * the utility process). It unconditionally removes the item at the provided + * path; in particular, this will not raise any errors if there is no item at + * the given path (as may be expected to happen when we run during catch + * handlers). + */ +const deletePathIgnoringErrors = async (tempFilePath: string) => { + try { + await fs.rm(tempFilePath, { force: true }); + } catch (e) { + log.error(`Could not delete item at path ${tempFilePath}`, e); + } +}; + +/** + * A regex that matches the first line of the form + * + * Stream #0:0: Video: h264 (High 10) ([27][0][0][0] / 0x001B), yuv420p10le(tv, bt2020nc/bt2020/arib-std-b67), 1920x1080, 30 fps, 30 tbr, 90k tbn + * + * The part after Video: is the first capture group. + * + * Another example: + * + * Stream #0:1[0x2](und): Video: h264 (Constrained Baseline) (avc1 / 0x31637661), yuv420p(progressive), 480x270 [SAR 1:1 DAR 16:9], 539 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default) + */ +const videoStreamLineRegex = /Stream #.+: Video:(.+)\n/; + +/** {@link videoStreamLineRegex}, but global. */ +const videoStreamLinesRegex = /Stream #.+: Video:(.+)\n/g; + +/** + * A regex that matches " kb/s" preceded by a space. See + * {@link videoStreamLineRegex} for the context in which it is used. + */ +const videoBitrateRegex = / ([1-9]\d*) kb\/s/; + +/** + * A regex that matches x pair preceded by a space. See + * {@link videoStreamLineRegex} for the context in which it is used. + * + * We constrain the digit sequence not to begin with 0 to exclude hexadecimal + * representations of various constants that ffmpeg prints on this line (e.g. + * "avc1 / 0x31637661"). + */ +const videoDimensionsRegex = / ([1-9]\d*)x([1-9]\d*)/; + +interface VideoCharacteristics { + isH264: boolean; + isBT709: boolean; + bitrate: number | undefined; +} +/** + * Heuristically determine information about the video at the given + * {@link inputFilePath}: + * + * - If is encoded using H.264 codec. + * - If it uses the BT.709 colorspace. + * - Its bitrate. + * + * The defaults are tailored for the cases in which these conditions are used, + * so that even if we get the detection wrong we'll only end up encoding videos + * that could've possibly been skipped as an optimization. + * + * [Note: Parsing CLI output might break on ffmpeg updates] + * + * This function tries to determine the these bits of information about the + * given video by scanning the ffmpeg info output for the video stream line, and + * doing various string matches and regex extractions. + * + * Needless to say, while this works currently, this is liable to break in the + * future. So if something stops working after updating ffmpeg, look here! + * + * Ideally, we'd have done this using `ffprobe`, but we don't have the ffprobe + * binary at hand, so we make do by grepping the log output of ffmpeg. + * + * For reference, + * + * - codec and colorspace are printed by the `avcodec_string` function in the + * ffmpeg source: + * https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/avcodec.c + * + * - bitrate is printed by the `dump_stream_format` function in `dump.c`. + */ +const detectVideoCharacteristics = async (inputFilePath: string) => { + const videoInfo = await pseudoFFProbeVideo(inputFilePath); + const videoStreamLine = videoStreamLineRegex.exec(videoInfo)?.at(1)?.trim(); + + // Since the checks are heuristic, start with defaults that would cause the + // codec conversion to happen, even if it is unnecessary. + const res: VideoCharacteristics = { + isH264: false, + isBT709: false, + bitrate: undefined, + }; + if (!videoStreamLine) return res; + + res.isH264 = videoStreamLine.startsWith("h264 "); + res.isBT709 = videoStreamLine.includes("bt709"); + // The regex matches "\d kb/s", but there can be other units for the + // bitrate. However, (a) "kb/s" is the most common for videos out in the + // wild, and (b) even if we guess wrong it we'll just do "-v:c x264" instead + // of "-v:c copy", so only unnecessary processing but no change in output. + const brs = videoBitrateRegex.exec(videoStreamLine)?.at(0); + if (brs) { + const br = parseInt(brs, 10); + if (br) res.bitrate = br; + } + + return res; +}; + +/** + * Heuristically detect the dimensions of the given video from the log output of + * the ffmpeg invocation during the HLS playlist generation. + * + * This function tries to determine the width and height of the generated video + * from the output log written by ffmpeg on its stderr during the generation + * process, scanning it for the last video stream line, and trying to match a + * "x" regex. + * + * See: [Note: Parsing CLI output might break on ffmpeg updates]. + */ +const detectVideoDimensions = (conversionStderr: string) => { + // There is a nicer way to do it - by running `pseudoFFProbeVideo` on the + // generated playlist. However, that playlist includes a data URL that + // specifies the encryption info, and ffmpeg refuses to read that unless we + // specify the "-allowed_extensions ALL" or something to that effect. + // + // Unfortunately, our current ffmpeg binary (5.x) does not support that + // option. So we instead parse the conversion output itself. + // + // This is also nice, since it saves on an extra ffmpeg invocation. But we + // now need to be careful to find the right video stream line, since the + // conversion output includes both the input and output video stream lines. + // + // To match the right (output) video stream line, we use a global regex, and + // use the last match since that'd correspond to the single video stream + // written in the output. + const videoStreamLine = Array.from( + conversionStderr.matchAll(videoStreamLinesRegex), + ) + .at(-1) /* Last Stream...: Video: line in the output */ + ?.at(1); /* First capture group */ + if (videoStreamLine) { + const [, ws, hs] = videoDimensionsRegex.exec(videoStreamLine) ?? []; + if (ws && hs) { + const w = parseInt(ws, 10); + const h = parseInt(hs, 10); + if (w && h) { + return { width: w, height: h }; + } + } + } + throw new Error( + `Unable to detect video dimensions from stream line [${videoStreamLine ?? ""}]`, + ); +}; + +/** + * Heuristically detect if the file at given path is a HDR video. + * + * This is similar to {@link detectVideoCharacteristics}, and see that + * function's documentation for all the caveats. However, this function uses an + * allow-list instead, and considers any file with color transfer "smpte2084" or + * "arib-std-b67" to be HDR. While this is in some sense a more exact check, it + * comes with different caveats: + * + * - These particular constants are not guaranteed to be correct; these are just + * what I saw on the internet as being used / recommended for detecting HDR. + * + * - Since we don't have ffprobe, we're not checking the color space value + * itself but a substring of the stream line in the ffmpeg stderr output. + * + * In particular, we use this more exact check for places where we have less + * leeway. e.g. when generating thumbnails, if we apply the tonemapping to any + * non-BT.709 file (as the HLS stream generation does), we start getting the + * "code 3074: no path between colorspaces" error during the JPEG conversion + * (this is not a problem in the H.264 conversion). + * + * - See: [Note: Alternative FFmpeg command for HDR videos] + * - See: [Note: Tonemapping HDR to HD] + * + * @param inputFilePath The path to a video file on the user's machine. + * + * @returns `true` if this file is likely a HDR video. Exceptions are treated as + * `false` to make this function safe to invoke without breaking the happy path. + */ +const isHDRVideo = async (inputFilePath: string) => { + try { + const videoInfo = await pseudoFFProbeVideo(inputFilePath); + const vs = videoStreamLineRegex.exec(videoInfo)?.at(1); + if (!vs) return false; + return vs.includes("smpte2084") || vs.includes("arib-std-b67"); + } catch (e) { + log.warn(`Could not detect HDR status of ${inputFilePath}`, e); + return false; + } +}; + +/** + * Return the stderr of ffmpeg in an attempt to gain information about the video + * at the given {@link inputFilePath}. + * + * We don't have the ffprobe binary at hand, which is why we need to use this + * alternative. See: [Note: Parsing CLI output might break on ffmpeg updates] + * + * @returns the stderr of ffmpeg after running it on the input file. The exact + * command we run is: + * + * ffmpeg -i in.mov -an -frames:v 0 -f null - 2>info.txt + * + * And the returned string is the contents of the `info.txt` thus produced. + */ +const pseudoFFProbeVideo = async (inputFilePath: string) => { + const command = [ + ffmpegPathPlaceholder, + // Reduce the amount of output lines we have to parse. + ["-hide_banner"], + ["-i", inputPathPlaceholder], + "-an", + ["-frames:v", "0"], + ["-f", "null"], + "-", + ].flat(); + + const cmd = substitutePlaceholders(command, inputFilePath, /* NA */ ""); + + const { stderr } = await execAsyncWorker(cmd); + + return stderr; +}; + +/** + * Upload the file at the given {@link videoFilePath} to the provided presigned + * {@link objectUploadURL} using a HTTP PUT request. + * + * In case on non-HTTP-4xx errors, retry up to 3 times with exponential backoff. + * + * See: [Note: Upload HLS video segment from node side]. + * + * @param videoFilePath The path to the file on the user's file system to + * upload. + * + * @param videoSize The size in bytes of the file at {@link videoFilePath}. + * + * @param objectUploadURL A pre-signed URL to upload the file. + * + * --- + * + * This is an inlined but bespoke reimplementation of `retryEnsuringHTTPOkOr4xx` + * from `web/packages/base/http.ts` + * + * - We don't have the rest of the scaffolding used by that function, which is + * why it is intially inlined bespoked. + * + * - It handles the specific use case of uploading videos since generating the + * HLS stream is a fairly expensive operation, so a retry to discount + * transient network issues is called for. There are only 2 retries for a + * total of 3 attempts, and the retry gaps are more spaced out. + * + * - Later it was discovered that net.fetch is much slower than node's native + * fetch, so this implementation has further diverged. + * + * - This also moved to a utility process, where we also have a more restricted + * ability to import electron API. + */ +const uploadVideoSegments = async ( + videoFilePath: string, + videoSize: number, + objectUploadURL: string, +) => { + const waitTimeBeforeNextTry = [5000, 20000]; + + while (true) { + let abort = false; + try { + const nodeStream = fs_.createReadStream(videoFilePath); + const webStream = Readable.toWeb(nodeStream); + + // net.fetch is 40-50x slower than the native fetch for this + // particular PUT request. This is easily reproducible - replace + // `fetch` with `net.fetch`, then even on localhost the PUT requests + // start taking a minute or so, while they take second(s) with + // node's native fetch. + const res = await fetch(objectUploadURL, { + method: "PUT", + // net.fetch apparently deduces and inserts a content-length, + // because when we use the node native fetch then we need to + // provide it explicitly. + headers: { "Content-Length": `${videoSize}` }, + // The duplex option is required since we're passing a stream. + // + // @ts-expect-error TypeScript's libdom.d.ts does not include + // the "duplex" parameter, e.g. see + // https://github.com/node-fetch/node-fetch/issues/1769. + duplex: "half", + body: webStream, + }); + + if (res.ok) { + // Success. + return; + } + if (res.status >= 400 && res.status < 500) { + // HTTP 4xx. + abort = true; + } + throw new Error( + `Failed to upload generated HLS video: HTTP ${res.status} ${res.statusText}`, + ); + } catch (e) { + if (abort) { + throw e; + } + const t = waitTimeBeforeNextTry.shift(); + if (!t) { + throw e; + } else { + log.warn("Will retry potentially transient request failure", e); + } + await wait(t); + } + } +}; diff --git a/desktop/src/main/services/ffmpeg.ts b/desktop/src/main/services/ffmpeg.ts index b5587b6ba1..1b0e623faa 100644 --- a/desktop/src/main/services/ffmpeg.ts +++ b/desktop/src/main/services/ffmpeg.ts @@ -1,53 +1,39 @@ -import pathToFfmpeg from "ffmpeg-static"; -import { randomBytes } from "node:crypto"; +/** + * @file A bridge to the ffmpeg utility process. This code runs in the main + * process. + */ + +import { wrap } from "comlink"; import fs from "node:fs/promises"; -import path, { basename } from "node:path"; import type { FFmpegCommand, ZipItem } from "../../types/ipc"; -import log from "../log"; -import { execAsync } from "../utils/electron"; import { deleteTempFileIgnoringErrors, makeFileForDataOrStreamOrPathOrZipItem, makeTempFilePath, } from "../utils/temp"; - -/* Ditto in the web app's code (used by the Wasm FFmpeg invocation). */ -const ffmpegPathPlaceholder = "FFMPEG"; -const inputPathPlaceholder = "INPUT"; -const outputPathPlaceholder = "OUTPUT"; +import type { FFmpegUtilityProcess } from "./ffmpeg-worker"; +import { ffmpegUtilityProcessEndpoint } from "./workers"; /** - * Run a FFmpeg command - * - * [Note: FFmpeg in Electron] - * - * There is a Wasm build of FFmpeg, but that is currently 10-20 times slower - * that the native build. That is slow enough to be unusable for our purposes. - * https://ffmpegwasm.netlify.app/docs/performance - * - * So the alternative is to bundle a FFmpeg executable binary with our app. e.g. - * - * yarn add fluent-ffmpeg ffmpeg-static ffprobe-static - * - * (we only use ffmpeg-static, the rest are mentioned for completeness' sake). - * - * Interestingly, Electron already bundles an binary FFmpeg library (it comes - * from the ffmpeg fork maintained by Chromium). - * https://chromium.googlesource.com/chromium/third_party/ffmpeg - * https://stackoverflow.com/questions/53963672/what-version-of-ffmpeg-is-bundled-inside-electron - * - * This can be found in (e.g. on macOS) at - * - * $ file ente.app/Contents/Frameworks/Electron\ Framework.framework/Versions/Current/Libraries/libffmpeg.dylib - * .../libffmpeg.dylib: Mach-O 64-bit dynamically linked shared library arm64 - * - * But I'm not sure if our code is supposed to be able to use it, and how. + * Return a handle to the ffmpeg utility process, starting it if needed. + */ +export const ffmpegUtilityProcess = () => + ffmpegUtilityProcessEndpoint().then((port) => + wrap(port), + ); + +/** + * Implement the IPC "ffmpegExec" contract, writing the input and output to + * temporary files as needed, and then forward to the {@link ffmpegExec} running + * in the utility process. */ export const ffmpegExec = async ( command: FFmpegCommand, dataOrPathOrZipItem: Uint8Array | string | ZipItem, outputFileExtension: string, ): Promise => { + const worker = await ffmpegUtilityProcess(); + const { path: inputFilePath, isFileTemporary: isInputFileTemporary, @@ -58,22 +44,7 @@ export const ffmpegExec = async ( try { await writeToTemporaryInputFile(); - let resolvedCommand: string[]; - if (Array.isArray(command)) { - resolvedCommand = command; - } else { - const isHDR = await isHDRVideo(inputFilePath); - log.debug(() => [basename(inputFilePath), { isHDR }]); - resolvedCommand = isHDR ? command.hdr : command.default; - } - - const cmd = substitutePlaceholders( - resolvedCommand, - inputFilePath, - outputFilePath, - ); - - await execAsync(cmd); + await worker.ffmpegExec(command, inputFilePath, outputFilePath); return await fs.readFile(outputFilePath); } finally { @@ -82,597 +53,3 @@ export const ffmpegExec = async ( await deleteTempFileIgnoringErrors(outputFilePath); } }; - -const substitutePlaceholders = ( - command: string[], - inputFilePath: string, - outputFilePath: string, -) => - command.map((segment) => { - if (segment == ffmpegPathPlaceholder) { - return ffmpegBinaryPath(); - } else if (segment == inputPathPlaceholder) { - return inputFilePath; - } else if (segment == outputPathPlaceholder) { - return outputFilePath; - } else { - return segment; - } - }); - -/** - * Return the path to the `ffmpeg` binary. - * - * At runtime, the FFmpeg binary is present in a path like (macOS example): - * `ente.app/Contents/Resources/app.asar.unpacked/node_modules/ffmpeg-static/ffmpeg` - */ -const ffmpegBinaryPath = () => { - // This substitution of app.asar by app.asar.unpacked is suggested by the - // ffmpeg-static library author themselves: - // https://github.com/eugeneware/ffmpeg-static/issues/16 - return pathToFfmpeg!.replace("app.asar", "app.asar.unpacked"); -}; - -/** - * A variant of {@link ffmpegExec} adapted to work with streams so that it can - * handle the MP4 conversion of large video files. - * - * @param inputFilePath The path to a file on the user's local file system. This - * is the video we want to convert. - * - * @param outputFilePath The path to a file on the user's local file system where - * we should write the converted MP4 video. - */ -export const ffmpegConvertToMP4 = async ( - inputFilePath: string, - outputFilePath: string, -): Promise => { - const command = [ - ffmpegPathPlaceholder, - "-i", - inputPathPlaceholder, - "-preset", - "ultrafast", - outputPathPlaceholder, - ]; - - const cmd = substitutePlaceholders(command, inputFilePath, outputFilePath); - - await execAsync(cmd); -}; - -export interface FFmpegGenerateHLSPlaylistAndSegmentsResult { - playlistPath: string; - videoPath: string; - dimensions: { width: number; height: number }; - videoSize: number; -} - -/** - * A bespoke variant of {@link ffmpegExec} for generation of HLS playlists for - * videos. - * - * Overview of the cases: - * - * H.264, <= 10 MB - Skip - * H.264, <= 4000 kb/s bitrate - Don't re-encode video stream - * BT.709, <= 2000 kb/s bitrate - Don't apply the scale+fps filter - * !BT.709 - Apply tonemap (zscale+tonemap+zscale) - * - * Example invocation: - * - * ffmpeg -i in.mov -vf 'scale=-2:720,fps=30,zscale=transfer=linear,tonemap=tonemap=hable:desat=0,zscale=primaries=709:transfer=709:matrix=709,format=yuv420p' -c:v libx264 -c:a aac -f hls -hls_key_info_file out.m3u8.info -hls_list_size 0 -hls_flags single_file out.m3u8 - * - * See: [Note: Preview variant of videos] - * - * @param inputFilePath The path to a file on the user's local file system. This - * is the video we want to generate an streamable HLS playlist for. - * - * @param outputPathPrefix The path to unique, unused and temporary prefix on - * the user's local file system. This function will write the generated HLS - * playlist and video segments under this prefix. - * - * @returns The paths to two files on the user's local file system - one - * containing the generated HLS playlist, and the other containing the - * transcoded and encrypted video segments that the HLS playlist refers to. - * - * If the video is such that it doesn't require stream generation, then this - * function returns `undefined`. - */ -export const ffmpegGenerateHLSPlaylistAndSegments = async ( - inputFilePath: string, - outputPathPrefix: string, -): Promise => { - const { isH264, isBT709, bitrate } = - await detectVideoCharacteristics(inputFilePath); - - log.debug(() => [basename(inputFilePath), { isH264, isBT709, bitrate }]); - - // If the video is smaller than 10 MB, and already H.264 (the codec we are - // going to use for the conversion), then a streaming variant is not much - // use. Skip such cases. - // - // --- - // - // [Note: HEVC/H.265 issues] - // - // We've observed two issues out in the wild with HEVC videos: - // - // 1. On Linux, HEVC video streams don't play. However, since the audio - // stream plays, the browser tells us that the "video" itself is - // playable, but the user sees a blank screen with only audio. - // - // 2. HEVC + HDR videos taken on an iPhone have a rotation (`Side data: - // displaymatrix` in the ffmpeg output) that Chrome (and thus Electron) - // doesn't take into account, so these play upside down. - // - // Not fully related to this case, but mentioning here as to why both the - // size and codec need to be checked before skipping stream generation. - if (isH264) { - const inputVideoSize = await fs - .stat(inputFilePath) - .then((st) => st.size); - if (inputVideoSize <= 10 * 1024 * 1024 /* 10 MB */) { - return undefined; - } - } - - // If the video is already H.264 with a bitrate less than 4000 kbps, then we - // do not need to reencode the video stream (by _far_ the costliest part of - // the HLS stream generation). - const reencodeVideo = !(isH264 && bitrate && bitrate <= 4000 * 1000); - - // If the bitrate is not too high, then we don't need to rescale the video - // when generating the video stream. This is not a performance optimization, - // but more for avoiding making the video size smaller unnecessarily. - const rescaleVideo = !(bitrate && bitrate <= 2000 * 1000); - - // [Note: Tonemapping HDR to HD] - // - // BT.709 ("HD") is a standard that describes things like how color is - // encoded, the range of values, and their "meaning" - i.e. how to map the - // values in the video to the pixels on the screen. - // - // It is not the only such standard, there are three common examples: - // - // - BT.601 ("Standard-Definition" or SD) - // - BT.709 ("High-Definition" or HD) - // - BT.2020 ("Ultra-High-Definition" or UHD, aka HDR^). - // - // ^ HDR ("High-Dynamic-Range") is an addendum to BT.2020, but for our - // purpose here we can treat it as as alias. - // - // BT.709 is the most common amongst these for older files out stored on - // computers, and they conform mostly to the standard (one notable exception - // is that the BT.709 standard also recommends using the yuv422p pixel - // format, but de facto yuv420p is used because many video players only - // support yuv420p). - // - // Since BT.709 is the most widely supported standard, we use it when - // generating the HLS playlist so to allow playback across the widest - // possible hardware/OS/browser combinations. - // - // If we convert HDR to HD without naively, then the colors look washed out - // compared to the original. To resolve this, we use a ffmpeg filterchain - // that uses the tonemap filter. - // - // However applying this tonemap to videos that are already HD leads to a - // brightness drop. So we conditionally apply this filter chain only if the - // colorspace is not already BT.709. - // - // See also: [Note: Alternative FFmpeg command for HDR videos], although - // that uses a allow-list based check (while here we use deny-list). - // - // Reference: - // - https://trac.ffmpeg.org/wiki/colorspace - const tonemap = !isBT709; - - // We want the generated playlist to refer to the chunks as "output.ts". - // - // So we arrange things accordingly: We use the `outputPathPrefix` as our - // working directory, and then ask ffmpeg to generate a playlist with the - // name "output.m3u8". - // - // ffmpeg will automatically place the segments in a file with the same base - // name as the playlist, but with a ".ts" extension. And since we use the - // "single_file" option, all the segments will be placed in a file named - // "output.ts". - - await fs.mkdir(outputPathPrefix); - - const playlistPath = path.join(outputPathPrefix, "output.m3u8"); - const videoPath = path.join(outputPathPrefix, "output.ts"); - - // Generate a cryptographically secure random key (16 bytes). - const keyBytes = randomBytes(16); - const keyB64 = keyBytes.toString("base64"); - - // Convert it to a data: URI that will be added to the playlist. - const keyURI = `data:text/plain;base64,${keyB64}`; - - // Determine two paths - one where we will write the key itself, and where - // we will write the "key info" that provides ffmpeg the `keyURI` and the - // `keyPath;. - const keyPath = playlistPath + ".key"; - const keyInfoPath = playlistPath + ".key-info"; - - // Generate a "key info": - // - // - the first line specifies the key URI that is written into the playlist. - // - the second line specifies the path to the local file system file from - // where ffmpeg should read the key. - const keyInfo = [keyURI, keyPath].join("\n"); - - // Overview: - // - // - Video H.264 HD 720p 30fps. - // - Audio AAC 128kbps. - // - Encrypted HLS playlist with a single file containing all the chunks. - // - // Reference: - // - `man ffmpeg-all` - // - https://trac.ffmpeg.org/wiki/Encode/H.264 - // - const command = [ - ffmpegBinaryPath(), - // Reduce the amount of output lines we have to parse. - ["-hide_banner"], - // Input file. We don't need any extra options that apply to the input file. - "-i", - inputFilePath, - // The remaining options apply to the next output file (`playlistPath`). - reencodeVideo - ? [ - // `-vf` creates a filter graph for the video stream. It is a - // comma separated list of filters chained together, e.g. - // `filter1=key=value:key=value.filter2=key=value`. - "-vf", - [ - // Do the rescaling to even number of pixels always if the - // tonemapping is going to be applied subsequently, - // otherwise the tonemapping will fail with "image - // dimensions must be divisible by subsampling factor". - // - // While we add the extra condition here for completeness, - // it won't usually matter since a non-BT.709 video is - // likely using a new codec, and as such would've a high - // enough bitrate to require rescaling anyways. - rescaleVideo || tonemap - ? [ - // Scales the video to maximum 720p height, - // keeping aspect ratio and the calculated - // dimension divisible by 2 (some of the other - // operations require an even pixel count). - "scale=-2:720", - // Convert the video to a constant 30 fps, - // duplicating or dropping frames as necessary. - "fps=30", - ] - : [], - // Convert the colorspace if the video is not in the HD - // color space (bt709). Before conversion, tone map colors - // so that they work the same across the change in the - // dyamic range. - // - // 1. The tonemap filter only works linear light, so we - // first use zscale with transfer=linear to linearize - // the input. - // - // 2. Then we use the tonemap, with the hable option that - // is best for preserving details. desat=0 turns off - // the default desaturation. - // - // 3. Use zscale again to "convert to BT.709" by asking it - // to set the all three of color primaries, transfer - // characteristics and colorspace matrix to 709 (Note: - // the constants specified in the tonemap filter help - // do not include the "bt" prefix) - // - // See: https://ffmpeg.org/ffmpeg-filters.html#tonemap-1 - // - // See: [Note: Tonemapping HDR to HD] - tonemap - ? [ - "zscale=transfer=linear", - "tonemap=tonemap=hable:desat=0", - "zscale=primaries=709:transfer=709:matrix=709", - ] - : [], - // Output using the well supported pixel format: 8-bit YUV - // planar color space with 4:2:0 chroma subsampling. - "format=yuv420p", - ] - .flat() - .join(","), - ] - : [], - reencodeVideo - ? // Video codec H.264 - // - // - `-c:v libx264` converts the video stream to the H.264 codec. - // - // - We don't supply a bitrate, instead it uses the default CRF - // ("23") as recommended in the ffmpeg trac. - // - // - We don't supply a preset, it'll use the default ("medium"). - ["-c:v", "libx264"] - : // Keep the video stream unchanged - ["-c:v", "copy"], - // Audio codec AAC - // - // - `-c:a aac` converts the audio stream to use the AAC codec - // - // - We don't supply a bitrate, it'll use the AAC default 128k bps. - ["-c:a", "aac"], - // Generate a HLS playlist. - ["-f", "hls"], - // Tell ffmpeg where to find the key, and the URI for the key to write - // into the generated playlist. Implies "-hls_enc 1". - ["-hls_key_info_file", keyInfoPath], - // Generate as many playlist entries as needed (default limit is 5). - ["-hls_list_size", "0"], - // Place all the video segments within the same .ts file (with the same - // path as the playlist file but with a ".ts" extension). - ["-hls_flags", "single_file"], - // Output path where the playlist should be generated. - playlistPath, - ].flat(); - - let dimensions: ReturnType; - let videoSize: number; - - try { - // Write the key and the keyInfo to their desired paths. - await Promise.all([ - fs.writeFile(keyPath, keyBytes), - fs.writeFile(keyInfoPath, keyInfo, { encoding: "utf8" }), - ]); - - // Run the ffmpeg command to generate the HLS playlist and segments. - // - // Note: Depending on the size of the input file, this may take long! - const { stderr: conversionStderr } = await execAsync(command); - - // Determine the dimensions of the generated video from the stderr - // output produced by ffmpeg during the conversion. - dimensions = detectVideoDimensions(conversionStderr); - - // Find the size of the generated video segments by reading the size of - // the generated .ts file. - videoSize = await fs.stat(videoPath).then((st) => st.size); - } catch (e) { - log.error("HLS generation failed", e); - await Promise.all([ - deleteTempFileIgnoringErrors(playlistPath), - deleteTempFileIgnoringErrors(videoPath), - ]); - throw e; - } finally { - await Promise.all([ - deleteTempFileIgnoringErrors(keyInfoPath), - deleteTempFileIgnoringErrors(keyPath), - // ffmpeg writes a /path/output.ts.tmp, clear it out too. - deleteTempFileIgnoringErrors(videoPath + ".tmp"), - ]); - } - - return { playlistPath, videoPath, dimensions, videoSize }; -}; - -/** - * A regex that matches the first line of the form - * - * Stream #0:0: Video: h264 (High 10) ([27][0][0][0] / 0x001B), yuv420p10le(tv, bt2020nc/bt2020/arib-std-b67), 1920x1080, 30 fps, 30 tbr, 90k tbn - * - * The part after Video: is the first capture group. - * - * Another example: - * - * Stream #0:1[0x2](und): Video: h264 (Constrained Baseline) (avc1 / 0x31637661), yuv420p(progressive), 480x270 [SAR 1:1 DAR 16:9], 539 kb/s, 29.97 fps, 29.97 tbr, 30k tbn (default) - */ -const videoStreamLineRegex = /Stream #.+: Video:(.+)\n/; - -/** {@link videoStreamLineRegex}, but global. */ -const videoStreamLinesRegex = /Stream #.+: Video:(.+)\n/g; - -/** - * A regex that matches " kb/s" preceded by a space. See - * {@link videoStreamLineRegex} for the context in which it is used. - */ -const videoBitrateRegex = / ([1-9]\d*) kb\/s/; - -/** - * A regex that matches x pair preceded by a space. See - * {@link videoStreamLineRegex} for the context in which it is used. - * - * We constrain the digit sequence not to begin with 0 to exclude hexadecimal - * representations of various constants that ffmpeg prints on this line (e.g. - * "avc1 / 0x31637661"). - */ -const videoDimensionsRegex = / ([1-9]\d*)x([1-9]\d*)/; - -interface VideoCharacteristics { - isH264: boolean; - isBT709: boolean; - bitrate: number | undefined; -} -/** - * Heuristically determine information about the video at the given - * {@link inputFilePath}: - * - * - If is encoded using H.264 codec. - * - If it uses the BT.709 colorspace. - * - Its bitrate. - * - * The defaults are tailored for the cases in which these conditions are used, - * so that even if we get the detection wrong we'll only end up encoding videos - * that could've possibly been skipped as an optimization. - * - * [Note: Parsing CLI output might break on ffmpeg updates] - * - * This function tries to determine the these bits of information about the - * given video by scanning the ffmpeg info output for the video stream line, and - * doing various string matches and regex extractions. - * - * Needless to say, while this works currently, this is liable to break in the - * future. So if something stops working after updating ffmpeg, look here! - * - * Ideally, we'd have done this using `ffprobe`, but we don't have the ffprobe - * binary at hand, so we make do by grepping the log output of ffmpeg. - * - * For reference, - * - * - codec and colorspace are printed by the `avcodec_string` function in the - * ffmpeg source: - * https://github.com/FFmpeg/FFmpeg/blob/master/libavcodec/avcodec.c - * - * - bitrate is printed by the `dump_stream_format` function in `dump.c`. - */ -const detectVideoCharacteristics = async (inputFilePath: string) => { - const videoInfo = await pseudoFFProbeVideo(inputFilePath); - const videoStreamLine = videoStreamLineRegex.exec(videoInfo)?.at(1)?.trim(); - - // Since the checks are heuristic, start with defaults that would cause the - // codec conversion to happen, even if it is unnecessary. - const res: VideoCharacteristics = { - isH264: false, - isBT709: false, - bitrate: undefined, - }; - if (!videoStreamLine) return res; - - res.isH264 = videoStreamLine.startsWith("h264 "); - res.isBT709 = videoStreamLine.includes("bt709"); - // The regex matches "\d kb/s", but there can be other units for the - // bitrate. However, (a) "kb/s" is the most common for videos out in the - // wild, and (b) even if we guess wrong it we'll just do "-v:c x264" instead - // of "-v:c copy", so only unnecessary processing but no change in output. - const brs = videoBitrateRegex.exec(videoStreamLine)?.at(0); - if (brs) { - const br = parseInt(brs, 10); - if (br) res.bitrate = br; - } - - return res; -}; - -/** - * Heuristically detect the dimensions of the given video from the log output of - * the ffmpeg invocation during the HLS playlist generation. - * - * This function tries to determine the width and height of the generated video - * from the output log written by ffmpeg on its stderr during the generation - * process, scanning it for the last video stream line, and trying to match a - * "x" regex. - * - * See: [Note: Parsing CLI output might break on ffmpeg updates]. - */ -const detectVideoDimensions = (conversionStderr: string) => { - // There is a nicer way to do it - by running `pseudoFFProbeVideo` on the - // generated playlist. However, that playlist includes a data URL that - // specifies the encryption info, and ffmpeg refuses to read that unless we - // specify the "-allowed_extensions ALL" or something to that effect. - // - // Unfortunately, our current ffmpeg binary (5.x) does not support that - // option. So we instead parse the conversion output itself. - // - // This is also nice, since it saves on an extra ffmpeg invocation. But we - // now need to be careful to find the right video stream line, since the - // conversion output includes both the input and output video stream lines. - // - // To match the right (output) video stream line, we use a global regex, and - // use the last match since that'd correspond to the single video stream - // written in the output. - const videoStreamLine = Array.from( - conversionStderr.matchAll(videoStreamLinesRegex), - ) - .at(-1) /* Last Stream...: Video: line in the output */ - ?.at(1); /* First capture group */ - if (videoStreamLine) { - const [, ws, hs] = videoDimensionsRegex.exec(videoStreamLine) ?? []; - if (ws && hs) { - const w = parseInt(ws, 10); - const h = parseInt(hs, 10); - if (w && h) { - return { width: w, height: h }; - } - } - } - throw new Error( - `Unable to detect video dimensions from stream line [${videoStreamLine ?? ""}]`, - ); -}; - -/** - * Heuristically detect if the file at given path is a HDR video. - * - * This is similar to {@link detectVideoCharacteristics}, and see that - * function's documentation for all the caveats. However, this function uses an - * allow-list instead, and considers any file with color transfer "smpte2084" or - * "arib-std-b67" to be HDR. While this is in some sense a more exact check, it - * comes with different caveats: - * - * - These particular constants are not guaranteed to be correct; these are just - * what I saw on the internet as being used / recommended for detecting HDR. - * - * - Since we don't have ffprobe, we're not checking the color space value - * itself but a substring of the stream line in the ffmpeg stderr output. - * - * In particular, we use this more exact check for places where we have less - * leeway. e.g. when generating thumbnails, if we apply the tonemapping to any - * non-BT.709 file (as the HLS stream generation does), we start getting the - * "code 3074: no path between colorspaces" error during the JPEG conversion - * (this is not a problem in the H.264 conversion). - * - * - See: [Note: Alternative FFmpeg command for HDR videos] - * - See: [Note: Tonemapping HDR to HD] - * - * @param inputFilePath The path to a video file on the user's machine. - * - * @returns `true` if this file is likely a HDR video. Exceptions are treated as - * `false` to make this function safe to invoke without breaking the happy path. - */ -const isHDRVideo = async (inputFilePath: string) => { - try { - const videoInfo = await pseudoFFProbeVideo(inputFilePath); - const vs = videoStreamLineRegex.exec(videoInfo)?.at(1); - if (!vs) return false; - return vs.includes("smpte2084") || vs.includes("arib-std-b67"); - } catch (e) { - log.warn(`Could not detect HDR status of ${inputFilePath}`, e); - return false; - } -}; - -/** - * Return the stderr of ffmpeg in an attempt to gain information about the video - * at the given {@link inputFilePath}. - * - * We don't have the ffprobe binary at hand, which is why we need to use this - * alternative. See: [Note: Parsing CLI output might break on ffmpeg updates] - * - * @returns the stderr of ffmpeg after running it on the input file. The exact - * command we run is: - * - * ffmpeg -i in.mov -an -frames:v 0 -f null - 2>info.txt - * - * And the returned string is the contents of the `info.txt` thus produced. - */ -const pseudoFFProbeVideo = async (inputFilePath: string) => { - const command = [ - ffmpegPathPlaceholder, - // Reduce the amount of output lines we have to parse. - ["-hide_banner"], - ["-i", inputPathPlaceholder], - "-an", - ["-frames:v", "0"], - ["-f", "null"], - "-", - ].flat(); - - const cmd = substitutePlaceholders(command, inputFilePath, /* NA */ ""); - - const { stderr } = await execAsync(cmd); - - return stderr; -}; diff --git a/desktop/src/main/services/workers.ts b/desktop/src/main/services/workers.ts index cb6923bc62..e44b72cd3b 100644 --- a/desktop/src/main/services/workers.ts +++ b/desktop/src/main/services/workers.ts @@ -3,6 +3,7 @@ * utility processes that we create. */ +import type { Endpoint } from "comlink"; import { MessageChannelMain, type BrowserWindow, @@ -12,12 +13,26 @@ import { app, utilityProcess } from "electron/main"; import path from "node:path"; import type { UtilityProcessType } from "../../types/ipc"; import log, { processUtilityProcessLogMessage } from "../log"; +import { messagePortMainEndpoint } from "../utils/comlink"; /** The active ML utility process, if any. */ -let _child: UtilityProcess | undefined; +let _utilityProcessML: UtilityProcess | undefined; /** - * Create a new ML utility process, terminating the older ones (if any). + * A promise to a comlink {@link Endpoint} that can be used to communicate with + * the active ffmpeg utility process (if any). + */ +let _utilityProcessFFmpegEndpoint: Promise | undefined; + +/** + * Create a new utility process of the given {@link type}, terminating the older + * ones (if any). + * + * Currently the only type is "ml". The following note explains the reasoning + * why utility processes were used for the first workload (ML) that was handled + * this way. Similar reasoning applies to subsequent workloads (ffmpeg) that + * have been offloaded to utility processes in a slightly different manner to + * avoid stutter in the UI. * * [Note: ML IPC] * @@ -75,20 +90,13 @@ let _child: UtilityProcess | undefined; export const triggerCreateUtilityProcess = ( type: UtilityProcessType, window: BrowserWindow, -) => { - switch (type) { - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition - case "ml": - triggerCreateMLUtilityProcess(window); - break; - } -}; +) => triggerCreateMLUtilityProcess(window); export const triggerCreateMLUtilityProcess = (window: BrowserWindow) => { - if (_child) { + if (_utilityProcessML) { log.debug(() => "Terminating previous ML utility process"); - _child.kill(); - _child = undefined; + _utilityProcessML.kill(); + _utilityProcessML = undefined; } const { port1, port2 } = new MessageChannelMain(); @@ -99,9 +107,9 @@ export const triggerCreateMLUtilityProcess = (window: BrowserWindow) => { window.webContents.postMessage("utilityProcessPort/ml", undefined, [port2]); - handleMessagesFromUtilityProcess(child); + handleMessagesFromMLUtilityProcess(child); - _child = child; + _utilityProcessML = child; }; /** @@ -127,7 +135,7 @@ export const triggerCreateMLUtilityProcess = (window: BrowserWindow) => { * - When we need to communicate from the utility process to the main process, * we use the `parentPort` in the utility process. */ -const handleMessagesFromUtilityProcess = (child: UtilityProcess) => { +const handleMessagesFromMLUtilityProcess = (child: UtilityProcess) => { child.on("message", (m: unknown) => { if (processUtilityProcessLogMessage("[ml-worker]", m)) { return; @@ -135,3 +143,65 @@ const handleMessagesFromUtilityProcess = (child: UtilityProcess) => { log.info("Ignoring unknown message from ML utility process", m); }); }; + +/** + * A comlink endpoint that can be used to communicate with the ffmpeg utility + * process. If there is no ffmpeg utility process, a new one is created on + * demand. + * + * See [Note: ML IPC] for a general outline of why utility processes are needed + * (tl;dr; to avoid stutter on the UI). + * + * In the case of ffmpeg, the IPC flow is a bit different: the utility process + * is not exposed to the web layer, and is internal to the node layer. The + * reason for this difference is that we need to create temporary files etc, and + * doing it a utility process requires access to the `app` module which are not + * accessible (See: [Note: Using Electron APIs in UtilityProcess]). + * + * There could've been possible reasonable workarounds, but the architecture + * we've adopted of three layers: + * + * Renderer (web) <-> Node.js main <-> Node.js ffmpeg utility process + * + * The temporary file creation etc is handled in the Node.js main process, and + * paths to the files are forwarded to the ffmpeg utility process to act on. + * + * @returns an endpoint that can be used to communicate with the utility + * process. The utility process is expected to expose an object that conforms to + * the {@link ElectronFFmpegWorkerNode} interface on this endpoint. + */ +export const ffmpegUtilityProcessEndpoint = () => + (_utilityProcessFFmpegEndpoint ??= createFFmpegUtilityProcessEndpoint()); + +const createFFmpegUtilityProcessEndpoint = () => { + // Promise.withResolvers is currently in the node available to us. + let resolve: ((endpoint: Endpoint) => void) | undefined; + const promise = new Promise((r) => (resolve = r)); + + const { port1, port2 } = new MessageChannelMain(); + + const child = utilityProcess.fork(path.join(__dirname, "ffmpeg-worker.js")); + // Send a handle to the port (one end of the message channel) to the utility + // process. The utility process will reply with an "ack" when it get it. + child.postMessage({}, [port1]); + + child.on("message", (m: unknown) => { + if (m && typeof m == "object" && "method" in m) { + switch (m.method) { + case "ack": + resolve!(messagePortMainEndpoint(port2)); + return; + } + } + + if (processUtilityProcessLogMessage("[ffmpeg-worker]", m)) { + return; + } + + log.info("Ignoring unknown message from ffmpeg utility process", m); + }); + + // Resolve with the other end of the message channel (once we get an "ack" + // from the utility process). + return promise; +}; diff --git a/desktop/src/main/stream.ts b/desktop/src/main/stream.ts index 01d591042f..85d572870f 100644 --- a/desktop/src/main/stream.ts +++ b/desktop/src/main/stream.ts @@ -3,18 +3,13 @@ */ import { net, protocol } from "electron/main"; import { randomUUID } from "node:crypto"; -import fs_ from "node:fs"; import fs from "node:fs/promises"; -import { Readable, Writable } from "node:stream"; +import { Writable } from "node:stream"; import { pathToFileURL } from "node:url"; import log from "./log"; -import { - ffmpegConvertToMP4, - ffmpegGenerateHLSPlaylistAndSegments, - type FFmpegGenerateHLSPlaylistAndSegmentsResult, -} from "./services/ffmpeg"; +import { ffmpegUtilityProcess } from "./services/ffmpeg"; +import { type FFmpegGenerateHLSPlaylistAndSegmentsResult } from "./services/ffmpeg-worker"; import { markClosableZip, openZip } from "./services/zip"; -import { wait } from "./utils/common"; import { writeStream } from "./utils/stream"; import { deleteTempFile, @@ -234,12 +229,14 @@ export const clearPendingVideoResults = () => pendingVideoResults.clear(); * See also: [Note: IPC streams] */ const handleConvertToMP4Write = async (request: Request) => { + const worker = await ffmpegUtilityProcess(); + const inputTempFilePath = await makeTempFilePath(); await writeStream(inputTempFilePath, request.body!); const outputTempFilePath = await makeTempFilePath("mp4"); try { - await ffmpegConvertToMP4(inputTempFilePath, outputTempFilePath); + await worker.ffmpegConvertToMP4(inputTempFilePath, outputTempFilePath); } catch (e) { log.error("Conversion to MP4 failed", e); await deleteTempFileIgnoringErrors(outputTempFilePath); @@ -311,6 +308,8 @@ const handleGenerateHLSWrite = async ( } } + const worker = await ffmpegUtilityProcess(); + const { path: inputFilePath, isFileTemporary: isInputFileTemporary, @@ -322,9 +321,10 @@ const handleGenerateHLSWrite = async ( try { await writeToTemporaryInputFile(); - result = await ffmpegGenerateHLSPlaylistAndSegments( + result = await worker.ffmpegGenerateHLSPlaylistAndSegments( inputFilePath, outputFilePathPrefix, + objectUploadURL, ); if (!result) { @@ -332,115 +332,17 @@ const handleGenerateHLSWrite = async ( return new Response(null, { status: 204 }); } - const { playlistPath, videoPath, videoSize, dimensions } = result; - try { - await uploadVideoSegments(videoPath, videoSize, objectUploadURL); + const { playlistPath, videoSize, dimensions } = result; - const playlistToken = randomUUID(); - pendingVideoResults.set(playlistToken, playlistPath); + const playlistToken = randomUUID(); + pendingVideoResults.set(playlistToken, playlistPath); - return new Response( - JSON.stringify({ playlistToken, dimensions, videoSize }), - { status: 200 }, - ); - } catch (e) { - await deleteTempFileIgnoringErrors(playlistPath); - throw e; - } finally { - await deleteTempFileIgnoringErrors(videoPath); - } + return new Response( + JSON.stringify({ playlistToken, videoSize, dimensions }), + { status: 200 }, + ); } finally { if (isInputFileTemporary) await deleteTempFileIgnoringErrors(inputFilePath); } }; - -/** - * Upload the file at the given {@link videoFilePath} to the provided presigned - * {@link objectUploadURL} using a HTTP PUT request. - * - * In case on non-HTTP-4xx errors, retry up to 3 times with exponential backoff. - * - * See: [Note: Upload HLS video segment from node side]. - * - * @param videoFilePath The path to the file on the user's file system to - * upload. - * - * @param videoSize The size in bytes of the file at {@link videoFilePath}. - * - * @param objectUploadURL A pre-signed URL to upload the file. - * - * --- - * - * This is an inlined but bespoke reimplementation of `retryEnsuringHTTPOkOr4xx` - * from `web/packages/base/http.ts` - * - * - We don't have the rest of the scaffolding used by that function, which is - * why it is intially inlined bespoked. - * - * - It handles the specific use case of uploading videos since generating the - * HLS stream is a fairly expensive operation, so a retry to discount - * transient network issues is called for. There are only 2 retries for a - * total of 3 attempts, and the retry gaps are more spaced out. - * - * - Later it was discovered that net.fetch is much slower than node's native - * fetch, so this implementation has further diverged. - */ -export const uploadVideoSegments = async ( - videoFilePath: string, - videoSize: number, - objectUploadURL: string, -) => { - const waitTimeBeforeNextTry = [5000, 20000]; - - while (true) { - let abort = false; - try { - const nodeStream = fs_.createReadStream(videoFilePath); - const webStream = Readable.toWeb(nodeStream); - - // net.fetch is 40-50x slower than the native fetch for this - // particular PUT request. This is easily reproducible (replace - // `fetch` with `net.fetch`, then even on localhost the PUT requests - // start taking a minute or so; with node's native fetch, it is - // second(s)). - const res = await fetch(objectUploadURL, { - method: "PUT", - // net.fetch apparently deduces and inserts a content-length, - // because when we use the node native fetch then we need to - // provide it explicitly. - headers: { "Content-Length": `${videoSize}` }, - // The duplex option is required since we're passing a stream. - // - // @ts-expect-error TypeScript's libdom.d.ts does not include - // the "duplex" parameter, e.g. see - // https://github.com/node-fetch/node-fetch/issues/1769. - duplex: "half", - body: webStream, - }); - - if (res.ok) { - // Success. - return; - } - if (res.status >= 400 && res.status < 500) { - // HTTP 4xx. - abort = true; - } - throw new Error( - `Failed to upload generated HLS video: HTTP ${res.status} ${res.statusText}`, - ); - } catch (e) { - if (abort) { - throw e; - } - const t = waitTimeBeforeNextTry.shift(); - if (!t) { - throw e; - } else { - log.warn("Will retry potentially transient request failure", e); - } - await wait(t); - } - } -}; diff --git a/desktop/src/main/utils/comlink.ts b/desktop/src/main/utils/comlink.ts index d2006e795b..f0edd758af 100644 --- a/desktop/src/main/utils/comlink.ts +++ b/desktop/src/main/utils/comlink.ts @@ -19,7 +19,7 @@ export const messagePortMainEndpoint = (mp: MessagePortMain): Endpoint => { const listeners = new WeakMap(); return { postMessage: (message, transfer) => { - mp.postMessage(message, transfer as unknown as MessagePortMain[]); + mp.postMessage(message, (transfer ?? []) as MessagePortMain[]); }, addEventListener: (_, eh) => { const l: EL = (data) => diff --git a/desktop/src/main/utils/exec-worker.ts b/desktop/src/main/utils/exec-worker.ts new file mode 100644 index 0000000000..02ac116a6d --- /dev/null +++ b/desktop/src/main/utils/exec-worker.ts @@ -0,0 +1,23 @@ +import shellescape from "any-shell-escape"; +import { exec } from "node:child_process"; +import { promisify } from "node:util"; +import log from "../log-worker"; + +/** + * Run a shell command asynchronously (utility process edition). + * + * This is an almost verbatim copy of {@link execAsync} from `electron.ts`, + * except it is meant to be usable from a utility process where only a subset of + * imports are available. See [Note: Using Electron APIs in UtilityProcess]. + */ +export const execAsyncWorker = async (command: string | string[]) => { + const escapedCommand = Array.isArray(command) + ? shellescape(command) + : command; + const startTime = Date.now(); + const result = await execAsync_(escapedCommand); + log.debugString(`${escapedCommand} (${Date.now() - startTime} ms)`); + return result; +}; + +const execAsync_ = promisify(exec); diff --git a/web/packages/base/types/ipc.ts b/web/packages/base/types/ipc.ts index 25f5ce2126..083cc81037 100644 --- a/web/packages/base/types/ipc.ts +++ b/web/packages/base/types/ipc.ts @@ -370,7 +370,7 @@ export interface Electron { outputFileExtension: string, ) => Promise; - // - ML + // - Utility process /** * Trigger the creation of a new utility process of the given {@link type}, diff --git a/web/packages/gallery/services/ffmpeg/index.ts b/web/packages/gallery/services/ffmpeg/index.ts index 1dd4f7f4b7..a182ee1400 100644 --- a/web/packages/gallery/services/ffmpeg/index.ts +++ b/web/packages/gallery/services/ffmpeg/index.ts @@ -15,7 +15,6 @@ import { parseMetadataDate, type ParsedMetadata, } from "ente-media/file-metadata"; -import { settingsSnapshot } from "ente-new/photos/services/settings"; import { ffmpegPathPlaceholder, inputPathPlaceholder, @@ -38,14 +37,7 @@ import { ffmpegExecWeb } from "./web"; */ export const generateVideoThumbnailWeb = async (blob: Blob) => _generateVideoThumbnail((seekTime: number) => - ffmpegExecWeb( - // TODO(HLS): Enable for all - settingsSnapshot().isInternalUser - ? makeGenThumbnailCommand(seekTime) - : _makeGenThumbnailCommand(seekTime, false), - blob, - "jpeg", - ), + ffmpegExecWeb(makeGenThumbnailCommand(seekTime), blob, "jpeg"), ); const _generateVideoThumbnail = async ( diff --git a/web/packages/gallery/services/ffmpeg/web.ts b/web/packages/gallery/services/ffmpeg/web.ts index 4c63762b0a..09152e6e46 100644 --- a/web/packages/gallery/services/ffmpeg/web.ts +++ b/web/packages/gallery/services/ffmpeg/web.ts @@ -94,7 +94,6 @@ const ffmpegExec = async ( resolvedCommand = command; } else { const isHDR = await isHDRVideo(ffmpeg, inputPath); - log.debug(() => `[wasm] input file is ${isHDR ? "" : "not "}HDR`); resolvedCommand = isHDR ? command.hdr : command.default; } diff --git a/web/packages/gallery/utils/native-stream.ts b/web/packages/gallery/utils/native-stream.ts index 43e4523224..41d39064ef 100644 --- a/web/packages/gallery/utils/native-stream.ts +++ b/web/packages/gallery/utils/native-stream.ts @@ -178,8 +178,9 @@ export type GenerateHLSResult = z.infer; * @param video The video to convert. * * - If we're called during the upload process, then this will be set to the - * {@link FileSystemUploadItem} that was uploaded. This way, we can directly use - * the on-disk file instead of needing to download the original from remote. + * {@link FileSystemUploadItem} that was uploaded. This way, we can directly + * use the on-disk file instead of needing to download the original from + * remote. * * - Otherwise it should be a {@link ReadableStream} of the video contents. *