Tweaks (non-functional)

This commit is contained in:
Manav Rathi
2024-09-26 07:25:25 +05:30
parent c8ab6be9f8
commit cda925fc80
3 changed files with 11 additions and 5 deletions

View File

@@ -203,8 +203,7 @@ const createInferenceSession = async (modelPath: string) => {
const cachedCLIPImageSession = makeCachedInferenceSession(
"mobileclip_s2_image_opset18_rgba_sim.onnx",
143061211 /* 143 MB */,
// TODO: manav: check above number, because I got 143093992 but might be calculating wrong
143093992 /* 143 MB */,
);
/**

View File

@@ -553,8 +553,9 @@ export interface ElectronMLWorker {
* See: [Note: Natural language search using CLIP]
*
* The input is a opaque float32 array representing the image. The layout
* and exact encoding of the input is specific to our implementation and the
* ML model (CLIP) we use.
* and exact encoding of the input is specific to the runtime (ONNX) and the
* ML model (a MobileCLIP variant) we use. In particular, the image
* pre-processing happens within our model itself.
*
* @returns A CLIP embedding (an array of 512 floating point values).
*/

View File

@@ -112,9 +112,15 @@ const computeEmbedding = async (
imageData: ImageData,
electron: ElectronMLWorker,
): Promise<Float32Array> => {
// In contrast to the face detection model, the image pre-preprocessing
// happens within the model itself, using ONNX primitives. This is more
// performant and also saves us from having to reinvent (say) the
// antialising wheels.
const { height, width, data: pixelData } = imageData;
const inputShape = [height, width, 4]; // [H, W, C]
return normalized(await electron.computeCLIPImageEmbedding(pixelData, inputShape));
return normalized(
await electron.computeCLIPImageEmbedding(pixelData, inputShape),
);
};
const normalized = (embedding: Float32Array) => {