This commit is contained in:
Manav Rathi
2025-04-30 08:10:36 +05:30
parent 9405d549c7
commit da60436e91
7 changed files with 52 additions and 34 deletions

View File

@@ -76,6 +76,10 @@ export interface FileAndPath {
/**
* The of cases of {@link UploadItem} that apply when we're running in the
* context of our desktop app.
*
* If we know that we're running in the context of the desktop app then
* {@link toDesktopUploadItem} can be used to convert from an
* {@link UploadItem}.
*/
export type DesktopUploadItem = Exclude<UploadItem, File>;

View File

@@ -314,6 +314,9 @@ const blobToDataURL = (blob: Blob) =>
* client, allowing us to create its streamable variant without needing to
* redownload the video.
*
* It only does the processing if we're running in the context of the desktop
* app as the video processing is resource intensive.
*
* Note that this is an optimization. Even if we don't process the video at this
* time (e.g. if the video processor can't keep up with the uploads), we will
* eventually process it later as part of a backfill.

View File

@@ -170,7 +170,7 @@ export type GenerateHLSResult = z.infer<typeof GenerateHLSResult>;
*
* This is a variant of {@link writeStream} tailored for the HLS generation. It
* is similar to {@link initiateConvertToMP4}, but also supports streaming
* {@link UploadItem}s and {@link ReadableStream}s.
* {@link DesktopUploadItem}s and {@link ReadableStream}s.
*
* @param _ An {@link Electron} instance, witness to the fact that we're running
* in the context of the desktop app. It is otherwise not used.
@@ -178,8 +178,8 @@ export type GenerateHLSResult = z.infer<typeof GenerateHLSResult>;
* @param video The video to convert.
*
* - If we're called during the upload process, then this will be set to the
* {@link UploadItem} that was uploaded. This way, we can directly use the
* on-disk file instead of needing to download the original from remote.
* {@link DesktopUploadItem} that was uploaded. This way, we can directly use
* the on-disk file instead of needing to download the original from remote.
*
* - Otherwise it should be a {@link ReadableStream} of the video contents.
*
@@ -207,7 +207,7 @@ export const initiateGenerateHLS = async (
if (video instanceof ReadableStream) {
body = video;
} else {
// video is an UploadItem
// video is a DesktopUploadItem
body = null;
if (typeof video == "string") {
// Path to a regular file on the user's filesystem.

View File

@@ -2,7 +2,7 @@ import { basename } from "ente-base/file-name";
import type { ElectronMLWorker } from "ente-base/types/ipc";
import { renderableImageBlob } from "ente-gallery/services/convert";
import { downloadManager } from "ente-gallery/services/download";
import type { UploadItem } from "ente-gallery/services/upload";
import type { DesktopUploadItem } from "ente-gallery/services/upload";
import { readStream } from "ente-gallery/utils/native-stream";
import type { EnteFile } from "ente-media/file";
import { FileType } from "ente-media/file-type";
@@ -68,8 +68,8 @@ export const createImageBitmapAndData = async (
* @param file The {@link EnteFile} to index.
*
* @param uploadItem If we're called during the upload process, then this will
* be set to the {@link UploadItem} that was uploaded. This way, we can directly
* use the on-disk file instead of needing to download the original from remote.
* be set to the {@link DesktopUploadItem} that was uploaded so that we can
* directly use the on-disk file instead of needing to download the original.
*
* @param electron The {@link ElectronMLWorker} instance that stands as a
* witness that we're actually running in our desktop app (and thus can safely
@@ -77,7 +77,7 @@ export const createImageBitmapAndData = async (
*/
export const fetchRenderableBlob = async (
file: EnteFile,
uploadItem: UploadItem | undefined,
uploadItem: DesktopUploadItem | undefined,
electron: ElectronMLWorker,
): Promise<Blob> =>
uploadItem
@@ -86,7 +86,7 @@ export const fetchRenderableBlob = async (
const fetchRenderableUploadItemBlob = async (
file: EnteFile,
uploadItem: UploadItem,
uploadItem: DesktopUploadItem,
electron: ElectronMLWorker,
) => {
const fileType = file.metadata.fileType;
@@ -104,14 +104,15 @@ const fetchRenderableUploadItemBlob = async (
*
* See: [Note: Reading a UploadItem]
*
* @param uploadItem An {@link UploadItem} which we are trying to index. The
* code calling us guarantees that this function will not be called for videos.
* @param uploadItem An {@link DesktopUploadItem} which we are trying to index.
* The code calling us guarantees that this function will not be called for
* videos.
*
* @returns a web {@link File} that can be used to access the upload item's
* contents.
*/
const readNonVideoUploadItem = async (
uploadItem: UploadItem,
uploadItem: DesktopUploadItem,
electron: ElectronMLWorker,
): Promise<File> => {
if (typeof uploadItem == "string" || Array.isArray(uploadItem)) {
@@ -126,11 +127,7 @@ const readNonVideoUploadItem = async (
lastModified: lastModifiedMs,
});
} else {
if (uploadItem instanceof File) {
return uploadItem;
} else {
return uploadItem.file;
}
return uploadItem.file;
}
};

View File

@@ -81,22 +81,22 @@ export type LocalCLIPIndex = CLIPIndex & {
};
/**
* Compute the CLIP embedding of a given file.
* Compute the CLIP embedding of a given {@link image}.
*
* This function is the entry point to the CLIP indexing pipeline. The file goes
* through various stages:
* This function is the third and fourth stage of the CLIP indexing pipeline.
* The file goes through various stages:
*
* 1. Downloading the original if needed.
* 2. Convert (if needed) and pre-process.
* 3. Compute embeddings using ONNX/CLIP.
* 1. Download the original (if needed).
* 2. Convert (if needed) to obtain an image bitmap.
* 3. Preprocess the image bitmap.
* 4. Compute embeddings of this preprocessed image using ONNX/CLIP.
*
* Once all of it is done, it CLIP embedding (wrapped as a {@link CLIPIndex} so
* that it can be saved locally and also uploaded to the user's remote storage
* for use on their other devices).
*
* @param uploadItem If we're called during the upload process, then this will
* be set to the {@link UploadItem} that was uploaded. This way, we can directly
* use the on-disk file instead of needing to download the original from remote.
* @param image The image bitmap (and its associated data) of the image file
* whose CLIP embedding we're computing.
*
* @param electron The {@link ElectronMLWorker} instance that allows us to call
* our Node.js layer to run the ONNX inference.

View File

@@ -10,7 +10,10 @@ import log from "ente-base/log";
import { masterKeyFromSession } from "ente-base/session";
import type { Electron } from "ente-base/types/ipc";
import { ComlinkWorker } from "ente-base/worker/comlink-worker";
import type { UploadItem } from "ente-gallery/services/upload";
import {
toDesktopUploadItem,
type UploadItem,
} from "ente-gallery/services/upload";
import type { EnteFile } from "ente-media/file";
import { FileType } from "ente-media/file-type";
import { throttled } from "ente-utils/promise";
@@ -424,7 +427,8 @@ const workerDidUnawaitedIndex = () => void debounceUpdateClustersAndPeople();
/**
* Run indexing on a file which was uploaded from this client.
*
* Indexing only happens if ML is enabled.
* Indexing only happens if ML is enabled and we're running in the desktop app
* as it is resource intensive.
*
* This function is called by the uploader when it uploads a new file from this
* client, giving us the opportunity to index it live. This is only an
@@ -440,9 +444,14 @@ const workerDidUnawaitedIndex = () => void debounceUpdateClustersAndPeople();
*/
export const indexNewUpload = (file: EnteFile, uploadItem: UploadItem) => {
if (!isMLEnabled()) return;
const electron = globalThis.electron;
if (!electron) return;
const desktopUploadItem = toDesktopUploadItem(electron, uploadItem);
if (file.metadata.fileType !== FileType.image) return;
log.debug(() => ["ml/liveq", { file, uploadItem }]);
void worker().then((w) => w.onUpload(file, uploadItem));
log.debug(() => ["ml/liveq", { file, uploadItem: desktopUploadItem }]);
void worker().then((w) => w.onUpload(file, desktopUploadItem));
};
export type MLStatus =

View File

@@ -6,7 +6,7 @@ import log from "ente-base/log";
import { logUnhandledErrorsAndRejectionsInWorker } from "ente-base/log-web";
import type { ElectronMLWorker } from "ente-base/types/ipc";
import { isNetworkDownloadError } from "ente-gallery/services/download";
import type { UploadItem } from "ente-gallery/services/upload";
import type { DesktopUploadItem } from "ente-gallery/services/upload";
import { fileLogID, type EnteFile } from "ente-media/file";
import { wait } from "ente-utils/promise";
import { getAllLocalFiles, getLocalTrashedFiles } from "../files";
@@ -66,8 +66,13 @@ const idleDurationMax = 16 * 60; /* 16 minutes */
interface IndexableItem {
/** The {@link EnteFile} to (potentially) index. */
file: EnteFile;
/** If the file was uploaded from the current client, then its contents. */
uploadItem: UploadItem | undefined;
/**
* If the file was uploaded from the current client, then its contents.
*
* Since indexing only happens in the desktop app, this is the more specific
* type {@link DesktopUploadItem}.
*/
uploadItem: DesktopUploadItem | undefined;
/** The existing ML data on remote corresponding to this file. */
remoteMLData: RemoteMLData | undefined;
}
@@ -177,7 +182,7 @@ export class MLWorker {
* representation of the file's contents with us and won't need to download
* the file from remote.
*/
onUpload(file: EnteFile, uploadItem: UploadItem) {
onUpload(file: EnteFile, uploadItem: DesktopUploadItem) {
// Add the recently uploaded file to the live indexing queue.
//
// Limit the queue to some maximum so that we don't keep growing