Tweaks (non-functional)
This commit is contained in:
@@ -203,8 +203,7 @@ const createInferenceSession = async (modelPath: string) => {
|
||||
|
||||
const cachedCLIPImageSession = makeCachedInferenceSession(
|
||||
"mobileclip_s2_image_opset18_rgba_sim.onnx",
|
||||
143061211 /* 143 MB */,
|
||||
// TODO: manav: check above number, because I got 143093992 but might be calculating wrong
|
||||
143093992 /* 143 MB */,
|
||||
);
|
||||
|
||||
/**
|
||||
|
||||
@@ -553,8 +553,9 @@ export interface ElectronMLWorker {
|
||||
* See: [Note: Natural language search using CLIP]
|
||||
*
|
||||
* The input is a opaque float32 array representing the image. The layout
|
||||
* and exact encoding of the input is specific to our implementation and the
|
||||
* ML model (CLIP) we use.
|
||||
* and exact encoding of the input is specific to the runtime (ONNX) and the
|
||||
* ML model (a MobileCLIP variant) we use. In particular, the image
|
||||
* pre-processing happens within our model itself.
|
||||
*
|
||||
* @returns A CLIP embedding (an array of 512 floating point values).
|
||||
*/
|
||||
|
||||
@@ -112,9 +112,15 @@ const computeEmbedding = async (
|
||||
imageData: ImageData,
|
||||
electron: ElectronMLWorker,
|
||||
): Promise<Float32Array> => {
|
||||
// In contrast to the face detection model, the image pre-preprocessing
|
||||
// happens within the model itself, using ONNX primitives. This is more
|
||||
// performant and also saves us from having to reinvent (say) the
|
||||
// antialising wheels.
|
||||
const { height, width, data: pixelData } = imageData;
|
||||
const inputShape = [height, width, 4]; // [H, W, C]
|
||||
return normalized(await electron.computeCLIPImageEmbedding(pixelData, inputShape));
|
||||
return normalized(
|
||||
await electron.computeCLIPImageEmbedding(pixelData, inputShape),
|
||||
);
|
||||
};
|
||||
|
||||
const normalized = (embedding: Float32Array) => {
|
||||
|
||||
Reference in New Issue
Block a user