From 82f808e5334ca2cd2995ae0ba28c1bc4a2d1aba5 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 30 Jul 2024 09:59:33 +0530 Subject: [PATCH] Outline --- desktop/src/main/services/ml.ts | 60 ++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/desktop/src/main/services/ml.ts b/desktop/src/main/services/ml.ts index 55bb8d79c2..dcda511df6 100644 --- a/desktop/src/main/services/ml.ts +++ b/desktop/src/main/services/ml.ts @@ -5,10 +5,6 @@ * * The ML runtime we use for inference is [ONNX](https://onnxruntime.ai). Models * for various tasks are not shipped with the app but are downloaded on demand. - * - * The primary reason for doing these tasks in the Node.js layer is so that we - * can use the binary ONNX runtime which is 10-20x faster than the WASM based - * web one. */ import { app, net } from "electron/main"; @@ -19,6 +15,62 @@ import * as ort from "onnxruntime-node"; import log from "../log"; import { writeStream } from "../stream"; +/** + * Create a new ML session. + * + * [Note: ML IPC] + * + * The primary reason for doing ML tasks in the Node.js layer is so that we can + * use the binary ONNX runtime, which is 10-20x faster than the WASM one that + * can be used directly on the web layer. + * + * For this to work, the main and renderer process need to communicate with each + * other. Further, in the web layer the ML indexing runs in a web worker (so as + * to not get in the way of the main thread). So the communication has 2 hops: + * + * Node.js main <-> Renderer main <-> Renderer web worker + * + * This naive way works, but has a problem. The Node.js main process is in the + * code path for delivering user events to the renderer process. The ML tasks we + * do take in the order of 100-300 ms (possibly more) for each individual + * inference. Thus, the Node.js main process is busy for those 100-300 ms, and + * does not forward events to the renderer, causing the UI to jitter. + * + * The solution for this is to spawn an Electron UtilityProcess, which we can + * think of a regular Node.js child process. This frees up the Node.js main + * process, and would remove the jitter. + * https://www.electronjs.org/docs/latest/tutorial/process-model + * + * It would seem that this introduces another hop in our IPC + * + * Node.js utility process <-> Node.js main <-> ... + * + * but here we can use the special bit about Electron utility processes that + * separates them from regular Node.js child processes: their support for + * message ports. https://www.electronjs.org/docs/latest/tutorial/message-ports + * + * As a brief summary, a MessagePort is a web feature that allows two contexts + * to communicate. A pair of message ports is called a message channel. The cool + * thing about these is that we can pass these ports themselves over IPC. + * + * So we + * + * 1. Spawn a utility process. + * 2. In the utility process create a message channel. + * 3. Keep one port of the pair with us, and send the other over IPC to the + * _web worker_ that is coordinating the ML indexing on the web layer. + * + * Thereafter, the utility process and web worker can directly talk to each + * other! + * + * Node.js utility process <-> Renderer web worker + * + */ +export const createMLSession = () => { + // }: Promise => { + throw new Error("Not implemented"); +}; + /** * Return a function that can be used to trigger a download of the specified * model, and the creating of an ONNX inference session initialized using it.