From 82f808e5334ca2cd2995ae0ba28c1bc4a2d1aba5 Mon Sep 17 00:00:00 2001
From: Manav Rathi <manav@mrmr.io>
Date: Tue, 30 Jul 2024 09:59:33 +0530
Subject: [PATCH] Outline

---
 desktop/src/main/services/ml.ts | 60 ++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 4 deletions(-)
diff --git a/desktop/src/main/services/ml.ts b/desktop/src/main/services/ml.ts
index 55bb8d79c2..dcda511df6 100644
--- a/desktop/src/main/services/ml.ts
+++ b/desktop/src/main/services/ml.ts
@@ -5,10 +5,6 @@
  *
  * The ML runtime we use for inference is [ONNX](https://onnxruntime.ai). Models
  * for various tasks are not shipped with the app but are downloaded on demand.
- *
- * The primary reason for doing these tasks in the Node.js layer is so that we
- * can use the binary ONNX runtime which is 10-20x faster than the WASM based
- * web one.
  */
 
 import { app, net } from "electron/main";
@@ -19,6 +15,62 @@ import * as ort from "onnxruntime-node";
 import log from "../log";
 import { writeStream } from "../stream";
 
+/**
+ * Create a new ML session.
+ *
+ * [Note: ML IPC]
+ *
+ * The primary reason for doing ML tasks in the Node.js layer is so that we can
+ * use the binary ONNX runtime, which is 10-20x faster than the WASM one that
+ * can be used directly on the web layer.
+ *
+ * For this to work, the main and renderer process need to communicate with each
+ * other. Further, in the web layer the ML indexing runs in a web worker (so as
+ * to not get in the way of the main thread). So the communication has 2 hops:
+ *
+ *     Node.js main <-> Renderer main <-> Renderer web worker
+ *
+ * This naive way works, but has a problem. The Node.js main process is in the
+ * code path for delivering user events to the renderer process. The ML tasks we
+ * do take in the order of 100-300 ms (possibly more) for each individual
+ * inference. Thus, the Node.js main process is busy for those 100-300 ms, and
+ * does not forward events to the renderer, causing the UI to jitter.
+ *
+ * The solution for this is to spawn an Electron UtilityProcess, which we can
+ * think of a regular Node.js child process.  This frees up the Node.js main
+ * process, and would remove the jitter.
+ * https://www.electronjs.org/docs/latest/tutorial/process-model
+ *
+ * It would seem that this introduces another hop in our IPC
+ *
+ *     Node.js utility process <-> Node.js main <-> ...
+ *
+ * but here we can use the special bit about Electron utility processes that
+ * separates them from regular Node.js child processes: their support for
+ * message ports. https://www.electronjs.org/docs/latest/tutorial/message-ports
+ *
+ * As a brief summary, a MessagePort is a web feature that allows two contexts
+ * to communicate. A pair of message ports is called a message channel. The cool
+ * thing about these is that we can pass these ports themselves over IPC.
+ *
+ * So we
+ *
+ * 1.  Spawn a utility process.
+ * 2.  In the utility process create a message channel.
+ * 3.  Keep one port of the pair with us, and send the other over IPC to the
+ *     _web worker_ that is coordinating the ML indexing on the web layer.
+ *
+ * Thereafter, the utility process and web worker can directly talk to each
+ * other!
+ *
+ *     Node.js utility process <-> Renderer web worker
+ *
+ */
+export const createMLSession = () => {
+    // }: Promise<MessagePort> => {
+    throw new Error("Not implemented");
+};
+
 /**
  * Return a function that can be used to trigger a download of the specified
  * model, and the creating of an ONNX inference session initialized using it.