Mob antialias (#3191)

## Description - Stop using unnecessary ByteData and Color classes, using simple Uint8List and RGB integers instead - Use antialias for clip image preprocessing ## Tests Tested by comparing resulting embedding to Python.
2024-09-09 22:59:18 -04:00
parent 1189610b42 762fa7150e
commit 642b7ae5ba
6 changed files with 194 additions and 155 deletions
--- a/mobile/lib/services/machine_learning/face_ml/face_detection/face_detection_service.dart
+++ b/mobile/lib/services/machine_learning/face_ml/face_detection/face_detection_service.dart
@@ -1,5 +1,5 @@
 import "dart:async";
-import 'dart:typed_data' show ByteData, Float32List;
+import 'dart:typed_data' show Uint8List, Float32List;
 import 'dart:ui' as ui show Image;

 import 'package:logging/logging.dart';
@@ -44,7 +44,7 @@ class FaceDetectionService extends MlModel {
  /// Detects faces in the given image data.
  static Future<List<FaceDetectionRelative>> predict(
    ui.Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    int sessionAddress,
  ) async {
    assert(
@@ -59,7 +59,7 @@ class FaceDetectionService extends MlModel {
    final (inputImageList, newSize) =
        await preprocessImageToFloat32ChannelsFirst(
      image,
-      imageByteData,
+      rawRgbaBytes,
      normalization: 1,
      requiredWidth: kInputWidth,
      requiredHeight: kInputHeight,
--- a/mobile/lib/services/machine_learning/face_ml/face_recognition_service.dart
+++ b/mobile/lib/services/machine_learning/face_ml/face_recognition_service.dart
@@ -1,5 +1,5 @@
 import "dart:async" show unawaited;
-import "dart:typed_data" show ByteData, Float32List;
+import "dart:typed_data" show Uint8List, Float32List;
 import "dart:ui" show Image;

 import "package:logging/logging.dart";
@@ -70,7 +70,7 @@ class FaceRecognitionService {
  static Future<List<FaceResult>> runFacesPipeline(
    int enteFileID,
    Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    int faceDetectionAddress,
    int faceEmbeddingAddress,
  ) async {
@@ -82,7 +82,7 @@ class FaceRecognitionService {
        await _detectFacesSync(
      enteFileID,
      image,
-      imageByteData,
+      rawRgbaBytes,
      faceDetectionAddress,
      faceResults,
    );
@@ -100,7 +100,7 @@ class FaceRecognitionService {
    // Align the faces
    final Float32List faceAlignmentResult = await _alignFacesSync(
      image,
-      imageByteData,
+      rawRgbaBytes,
      faceDetectionResult,
      faceResults,
    );
@@ -130,7 +130,7 @@ class FaceRecognitionService {
  static Future<List<FaceDetectionRelative>> _detectFacesSync(
    int fileID,
    Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    int interpreterAddress,
    List<FaceResult> faceResults,
  ) async {
@@ -139,7 +139,7 @@ class FaceRecognitionService {
      final List<FaceDetectionRelative> faces =
          await FaceDetectionService.predict(
        image,
-        imageByteData,
+        rawRgbaBytes,
        interpreterAddress,
      );

@@ -166,7 +166,7 @@ class FaceRecognitionService {
  /// Returns a list of the aligned faces as image data.
  static Future<Float32List> _alignFacesSync(
    Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    List<FaceDetectionRelative> faces,
    List<FaceResult> faceResults,
  ) async {
@@ -174,7 +174,7 @@ class FaceRecognitionService {
      final (alignedFaces, alignmentResults, _, blurValues, _) =
          await preprocessToMobileFaceNetFloat32List(
        image,
-        imageByteData,
+        rawRgbaBytes,
        faces,
      );

--- a/mobile/lib/services/machine_learning/semantic_search/clip/clip_image_encoder.dart
+++ b/mobile/lib/services/machine_learning/semantic_search/clip/clip_image_encoder.dart
@@ -1,4 +1,4 @@
-import "dart:typed_data";
+import "dart:typed_data" show Uint8List, Float32List;
 import "dart:ui" show Image;

 import "package:logging/logging.dart";
@@ -29,12 +29,12 @@ class ClipImageEncoder extends MlModel {

  static Future<List<double>> predict(
    Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    int sessionAddress, [
    int? enteFileID,
  ]) async {
    final startTime = DateTime.now();
-    final inputList = await preprocessImageClip(image, imageByteData);
+    final inputList = await preprocessImageClip(image, rawRgbaBytes);
    final preprocessingTime = DateTime.now();
    final preprocessingMs =
        preprocessingTime.difference(startTime).inMilliseconds;
--- a/mobile/lib/services/machine_learning/semantic_search/semantic_search_service.dart
+++ b/mobile/lib/services/machine_learning/semantic_search/semantic_search_service.dart
@@ -258,12 +258,12 @@ class SemanticSearchService {
  static Future<ClipResult> runClipImage(
    int enteFileID,
    Image image,
-    ByteData imageByteData,
+    Uint8List rawRgbaBytes,
    int clipImageAddress,
  ) async {
    final embedding = await ClipImageEncoder.predict(
      image,
-      imageByteData,
+      rawRgbaBytes,
      clipImageAddress,
      enteFileID,
    );
--- a/mobile/lib/utils/image_ml_util.dart
+++ b/mobile/lib/utils/image_ml_util.dart
@@ -1,8 +1,7 @@
 import "dart:async";
-import "dart:developer" show log;
 import "dart:io" show File, Platform;
-import "dart:math" show max, min;
-import "dart:typed_data" show Float32List, Uint8List, ByteData;
+import "dart:math" show exp, max, min, pi;
+import "dart:typed_data" show Float32List, Uint8List;
 import "dart:ui";

 import 'package:flutter/painting.dart' as paint show decodeImageFromList;
@@ -21,12 +20,24 @@ import 'package:photos/services/machine_learning/face_ml/face_filtering/blur_det

 final _logger = Logger("ImageMlUtil");

-Future<(Image, ByteData)> decodeImageFromPath(String imagePath) async {
+/// These are 8 bit unsigned integers in range 0-255 for each RGB channel
+typedef RGB = (int, int, int);
+
+const gaussianKernelSize = 5;
+const gaussianKernelRadius = gaussianKernelSize ~/ 2;
+const gaussianSigma = 10.0;
+final List<List<double>> gaussianKernel =
+    create2DGaussianKernel(gaussianKernelSize, gaussianSigma);
+
+const maxKernelSize = gaussianKernelSize;
+const maxKernelRadius = maxKernelSize ~/ 2;
+
+Future<(Image, Uint8List)> decodeImageFromPath(String imagePath) async {
  try {
    final imageData = await File(imagePath).readAsBytes();
    final image = await decodeImageFromData(imageData);
-    final ByteData imageByteData = await getByteDataFromImage(image);
-    return (image, imageByteData);
+    final rawRgbaBytes = await _getRawRgbaBytes(image);
+    return (image, rawRgbaBytes);
  } catch (e, s) {
    final format = imagePath.split('.').last;
    if (Platform.isAndroid) {
@@ -37,8 +48,8 @@ Future<(Image, ByteData)> decodeImageFromPath(String imagePath) async {
        _logger.info('Conversion successful, decoding JPG');
        final imageData = await File(jpgPath).readAsBytes();
        final image = await decodeImageFromData(imageData);
-        final ByteData imageByteData = await getByteDataFromImage(image);
-        return (image, imageByteData);
+        final rawRgbaBytes = await _getRawRgbaBytes(image);
+        return (image, rawRgbaBytes);
      }
      _logger.info('Unable to convert $format to JPG');
    }
@@ -78,25 +89,31 @@ Future<Image> decodeImageFromData(Uint8List imageData) async {
  // final Image image = await completer.future;
  // stream.removeListener(listener);
  // return image;
+}

-  // // Decoding using the ImageProvider from material.Image. This is not faster than the above, and also the code below is not finished!
-  // final materialImage = material.Image.memory(imageData);
-  // final ImageProvider uiImage = await materialImage.image;
+Future<Uint8List> _getRawRgbaBytes(Image image) async {
+  return await _getByteDataFromImage(image, format: ImageByteFormat.rawRgba);
+}
+
+/// Encodes an [Image] object to a [Uint8List], in the png format.
+/// Can be used with `Image.memory()`.
+Future<Uint8List> _encodeImageToPng(Image image) async {
+  return await _getByteDataFromImage(image, format: ImageByteFormat.png);
 }

 /// Returns the [ByteData] object of the image, in rawRgba format.
 ///
 /// Throws an exception if the image could not be converted to ByteData.
-Future<ByteData> getByteDataFromImage(
+Future<Uint8List> _getByteDataFromImage(
  Image image, {
-  ImageByteFormat format = ImageByteFormat.rawRgba,
+  required ImageByteFormat format,
 }) async {
-  final ByteData? byteDataRgba = await image.toByteData(format: format);
-  if (byteDataRgba == null) {
-    log('[ImageMlUtils] Could not convert image to ByteData');
-    throw Exception('Could not convert image to ByteData');
+  final byteData = await image.toByteData(format: format);
+  if (byteData == null) {
+    _logger.severe('Failed to get byte data in $format from image');
+    throw Exception('Failed to get byte data in $format from image');
  }
-  return byteDataRgba;
+  return byteData.buffer.asUint8List();
 }

 /// Generates a face thumbnail from [imageData] and [faceBoxes].
@@ -161,11 +178,11 @@ Future<List<Uint8List>> generateFaceThumbnailsUsingCanvas(

 Future<(Float32List, Dimensions)> preprocessImageToFloat32ChannelsFirst(
  Image image,
-  ByteData imgByteData, {
+  Uint8List rawRgbaBytes, {
  required int normalization,
  required int requiredWidth,
  required int requiredHeight,
-  Color Function(num, num, Image, ByteData) getPixel = _getPixelBilinear,
+  RGB Function(num, num, Image, Uint8List) getPixel = _getPixelBilinear,
  maintainAspectRatio = true,
 }) async {
  final normFunction = normalization == 2
@@ -193,20 +210,20 @@ Future<(Float32List, Dimensions)> preprocessImageToFloat32ChannelsFirst(
  final int channelOffsetBlue = 2 * requiredHeight * requiredWidth;
  for (var h = 0; h < requiredHeight; h++) {
    for (var w = 0; w < requiredWidth; w++) {
-      late Color pixel;
+      late RGB pixel;
      if (w >= scaledWidth || h >= scaledHeight) {
-        pixel = const Color.fromRGBO(114, 114, 114, 1.0);
+        pixel = const (114, 114, 114);
      } else {
        pixel = getPixel(
          w / scaleW,
          h / scaleH,
          image,
-          imgByteData,
+          rawRgbaBytes,
        );
      }
-      buffer[pixelIndex] = normFunction(pixel.red);
-      buffer[pixelIndex + channelOffsetGreen] = normFunction(pixel.green);
-      buffer[pixelIndex + channelOffsetBlue] = normFunction(pixel.blue);
+      buffer[pixelIndex] = normFunction(pixel.$1);
+      buffer[pixelIndex + channelOffsetGreen] = normFunction(pixel.$2);
+      buffer[pixelIndex + channelOffsetBlue] = normFunction(pixel.$3);
      pixelIndex++;
    }
  }
@@ -216,12 +233,13 @@ Future<(Float32List, Dimensions)> preprocessImageToFloat32ChannelsFirst(

 Future<Float32List> preprocessImageClip(
  Image image,
-  ByteData imgByteData,
+  Uint8List rawRgbaBytes,
 ) async {
  const int requiredWidth = 256;
  const int requiredHeight = 256;
  const int requiredSize = 3 * requiredWidth * requiredHeight;
  final scale = max(requiredWidth / image.width, requiredHeight / image.height);
+  final bool useAntiAlias = scale < 0.8;
  final scaledWidth = (image.width * scale).round();
  final scaledHeight = (image.height * scale).round();
  final widthOffset = max(0, scaledWidth - requiredWidth) / 2;
@@ -234,15 +252,16 @@ Future<Float32List> preprocessImageClip(
  const int blueOff = 2 * requiredHeight * requiredWidth;
  for (var h = 0 + heightOffset; h < scaledHeight - heightOffset; h++) {
    for (var w = 0 + widthOffset; w < scaledWidth - widthOffset; w++) {
-      final Color pixel = _getPixelBilinear(
+      final RGB pixel = _getPixelBilinear(
        w / scale,
        h / scale,
        image,
-        imgByteData,
+        rawRgbaBytes,
+        antiAlias: useAntiAlias,
      );
-      buffer[pixelIndex] = pixel.red / 255;
-      buffer[pixelIndex + greenOff] = pixel.green / 255;
-      buffer[pixelIndex + blueOff] = pixel.blue / 255;
+      buffer[pixelIndex] = pixel.$1 / 255;
+      buffer[pixelIndex + greenOff] = pixel.$2 / 255;
+      buffer[pixelIndex + blueOff] = pixel.$3 / 255;
      pixelIndex++;
    }
  }
@@ -253,7 +272,7 @@ Future<Float32List> preprocessImageClip(
 Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
    preprocessToMobileFaceNetFloat32List(
  Image image,
-  ByteData imageByteData,
+  Uint8List rawRgbaBytes,
  List<FaceDetectionRelative> relativeFaces, {
  int width = 112,
  int height = 112,
@@ -290,7 +309,7 @@ Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>

    _warpAffineFloat32List(
      image,
-      imageByteData,
+      rawRgbaBytes,
      alignmentResult.affineMatrix,
      alignedImagesFloat32List,
      alignedImageIndex,
@@ -320,29 +339,55 @@ Future<(Float32List, List<AlignmentResult>, List<bool>, List<double>, Size)>
 }

 /// Reads the pixel color at the specified coordinates.
-Color _readPixelColor(
-  Image image,
-  ByteData byteData,
+RGB _readPixelColor(
  int x,
  int y,
+  Image image,
+  Uint8List rgbaBytes,
 ) {
-  if (x < 0 || x >= image.width || y < 0 || y >= image.height) {
-    // throw ArgumentError('Invalid pixel coordinates.');
-    if (y != -1) {
-      log('[WARNING] `readPixelColor`: Invalid pixel coordinates, out of bounds');
+  if (y < 0 || y >= image.height || x < 0 || x >= image.width) {
+    if (y < -maxKernelRadius ||
+        y >= image.height + maxKernelRadius ||
+        x < -maxKernelRadius ||
+        x >= image.width + maxKernelRadius) {
+      _logger.severe(
+        '`readPixelColor`: Invalid pixel coordinates, out of bounds. x: $x, y: $y',
+      );
    }
-    return const Color.fromARGB(0, 0, 0, 0);
+    return const (114, 114, 114);
  }
-  assert(byteData.lengthInBytes == 4 * image.width * image.height);
+
+  assert(rgbaBytes.lengthInBytes == 4 * image.width * image.height);

  final int byteOffset = 4 * (image.width * y + x);
-  return Color(_rgbaToArgb(byteData.getUint32(byteOffset)));
+  return (
+    rgbaBytes[byteOffset], // red
+    rgbaBytes[byteOffset + 1], // green
+    rgbaBytes[byteOffset + 2] // blue
+  );
 }

-int _rgbaToArgb(int rgbaColor) {
-  final int a = rgbaColor & 0xFF;
-  final int rgb = rgbaColor >> 8;
-  return rgb + (a << 24);
+RGB _getPixelBlurred(
+  int x,
+  int y,
+  Image image,
+  Uint8List rgbaBytes,
+) {
+  double r = 0, g = 0, b = 0;
+  for (int ky = 0; ky < gaussianKernelSize; ky++) {
+    for (int kx = 0; kx < gaussianKernelSize; kx++) {
+      final int px = (x - gaussianKernelRadius + kx);
+      final int py = (y - gaussianKernelRadius + ky);
+
+      final RGB pixelRgbTuple = _readPixelColor(px, py, image, rgbaBytes);
+      final double weight = gaussianKernel[ky][kx];
+
+      r += pixelRgbTuple.$1 * weight;
+      g += pixelRgbTuple.$2 * weight;
+      b += pixelRgbTuple.$3 * weight;
+    }
+  }
+  return (r.round(), g.round(), b.round());
 }

 List<List<int>> _createGrayscaleIntMatrixFromNormalized2List(
@@ -373,29 +418,6 @@ List<List<int>> _createGrayscaleIntMatrixFromNormalized2List(
  );
 }

-Float32List _createFloat32ListFromImageChannelsFirst(
-  Image image,
-  ByteData byteDataRgba, {
-  double Function(num) normFunction = _normalizePixelRange2,
-}) {
-  final convertedBytes = Float32List(3 * image.height * image.width);
-  final buffer = Float32List.view(convertedBytes.buffer);
-
-  int pixelIndex = 0;
-  final int channelOffsetGreen = image.height * image.width;
-  final int channelOffsetBlue = 2 * image.height * image.width;
-  for (var h = 0; h < image.height; h++) {
-    for (var w = 0; w < image.width; w++) {
-      final pixel = _readPixelColor(image, byteDataRgba, w, h);
-      buffer[pixelIndex] = normFunction(pixel.red);
-      buffer[pixelIndex + channelOffsetGreen] = normFunction(pixel.green);
-      buffer[pixelIndex + channelOffsetBlue] = normFunction(pixel.blue);
-      pixelIndex++;
-    }
-  }
-  return convertedBytes.buffer.asFloat32List();
-}
-
 /// Function normalizes the pixel value to be in range [-1, 1].
 ///
 /// It assumes that the pixel value is originally in range [0, 255]
@@ -421,20 +443,6 @@ double _normalizePixelNoRange(num pixelValue) {
  return pixelValue.toDouble();
 }

-/// Encodes an [Image] object to a [Uint8List], by default in the png format.
-///
-/// Note that the result can be used with `Image.memory()` only if the [format] is png.
-Future<Uint8List> _encodeImageToUint8List(
-  Image image, {
-  ImageByteFormat format = ImageByteFormat.png,
-}) async {
-  final ByteData byteDataPng =
-      await getByteDataFromImage(image, format: format);
-  final encodedImage = byteDataPng.buffer.asUint8List();
-
-  return encodedImage;
-}
-
 Future<Image> _cropImage(
  Image image, {
  required double x,
@@ -470,7 +478,7 @@ Future<Image> _cropImage(

 void _warpAffineFloat32List(
  Image inputImage,
-  ByteData imgByteDataRgba,
+  Uint8List rawRgbaBytes,
  List<List<double>> affineMatrix,
  Float32List outputList,
  int startIndex, {
@@ -524,16 +532,16 @@ void _warpAffineFloat32List(
      final num xOrigin = (xTrans - b00) * a00Prime + (yTrans - b10) * a01Prime;
      final num yOrigin = (xTrans - b00) * a10Prime + (yTrans - b10) * a11Prime;

-      final Color pixel =
-          _getPixelBicubic(xOrigin, yOrigin, inputImage, imgByteDataRgba);
+      final RGB pixel =
+          _getPixelBicubic(xOrigin, yOrigin, inputImage, rawRgbaBytes);

      // Set the new pixel
      outputList[startIndex + 3 * (yTrans * width + xTrans)] =
-          _normalizePixelRange2(pixel.red);
+          _normalizePixelRange2(pixel.$1);
      outputList[startIndex + 3 * (yTrans * width + xTrans) + 1] =
-          _normalizePixelRange2(pixel.green);
+          _normalizePixelRange2(pixel.$2);
      outputList[startIndex + 3 * (yTrans * width + xTrans) + 2] =
-          _normalizePixelRange2(pixel.blue);
+          _normalizePixelRange2(pixel.$3);
    }
  }
 }
@@ -552,13 +560,16 @@ Future<Uint8List> _cropAndEncodeCanvas(
    width: width,
    height: height,
  );
-  return await _encodeImageToUint8List(
-    croppedImage,
-    format: ImageByteFormat.png,
-  );
+  return await _encodeImageToPng(croppedImage);
 }

-Color _getPixelBilinear(num fx, num fy, Image image, ByteData byteDataRgba) {
+RGB _getPixelBilinear(
+  num fx,
+  num fy,
+  Image image,
+  Uint8List rawRgbaBytes, {
+  bool antiAlias = false,
+}) {
  // Clamp to image boundaries
  fx = fx.clamp(0, image.width - 1);
  fy = fy.clamp(0, image.height - 1);
@@ -573,11 +584,13 @@ Color _getPixelBilinear(num fx, num fy, Image image, ByteData byteDataRgba) {
  final dx1 = 1.0 - dx;
  final dy1 = 1.0 - dy;

-  // Get the original pixels
-  final Color pixel1 = _readPixelColor(image, byteDataRgba, x0, y0);
-  final Color pixel2 = _readPixelColor(image, byteDataRgba, x1, y0);
-  final Color pixel3 = _readPixelColor(image, byteDataRgba, x0, y1);
-  final Color pixel4 = _readPixelColor(image, byteDataRgba, x1, y1);
+  // Get the original pixels (with gaussian blur if antialias)
+  final RGB Function(int, int, Image, Uint8List) readPixel =
+      antiAlias ? _getPixelBlurred : _readPixelColor;
+  final RGB pixel1 = readPixel(x0, y0, image, rawRgbaBytes);
+  final RGB pixel2 = readPixel(x1, y0, image, rawRgbaBytes);
+  final RGB pixel3 = readPixel(x0, y1, image, rawRgbaBytes);
+  final RGB pixel4 = readPixel(x1, y1, image, rawRgbaBytes);

  int bilinear(
    num val1,
@@ -589,16 +602,15 @@ Color _getPixelBilinear(num fx, num fy, Image image, ByteData byteDataRgba) {
          .round();

  // Calculate the weighted sum of pixels
-  final int r = bilinear(pixel1.red, pixel2.red, pixel3.red, pixel4.red);
-  final int g =
-      bilinear(pixel1.green, pixel2.green, pixel3.green, pixel4.green);
-  final int b = bilinear(pixel1.blue, pixel2.blue, pixel3.blue, pixel4.blue);
+  final int r = bilinear(pixel1.$1, pixel2.$1, pixel3.$1, pixel4.$1);
+  final int g = bilinear(pixel1.$2, pixel2.$2, pixel3.$2, pixel4.$2);
+  final int b = bilinear(pixel1.$3, pixel2.$3, pixel3.$3, pixel4.$3);

-  return Color.fromRGBO(r, g, b, 1.0);
+  return (r, g, b);
 }

 /// Get the pixel value using Bicubic Interpolation. Code taken mainly from https://github.com/brendan-duncan/image/blob/6e407612752ffdb90b28cd5863c7f65856349348/lib/src/image/image.dart#L697
-Color _getPixelBicubic(num fx, num fy, Image image, ByteData byteDataRgba) {
+RGB _getPixelBicubic(num fx, num fy, Image image, Uint8List rawRgbaBytes) {
  fx = fx.clamp(0, image.width - 1);
  fy = fy.clamp(0, image.height - 1);

@@ -619,66 +631,66 @@ Color _getPixelBicubic(num fx, num fy, Image image, ByteData byteDataRgba) {
              dx * dx * (2 * ipp - 5 * icp + 4 * inp - iap) +
              dx * dx * dx * (-ipp + 3 * icp - 3 * inp + iap));

-  final icc = _readPixelColor(image, byteDataRgba, x, y);
+  final icc = _readPixelColor(x, y, image, rawRgbaBytes);

  final ipp =
-      px < 0 || py < 0 ? icc : _readPixelColor(image, byteDataRgba, px, py);
-  final icp = px < 0 ? icc : _readPixelColor(image, byteDataRgba, x, py);
+      px < 0 || py < 0 ? icc : _readPixelColor(px, py, image, rawRgbaBytes);
+  final icp = px < 0 ? icc : _readPixelColor(x, py, image, rawRgbaBytes);
  final inp = py < 0 || nx >= image.width
      ? icc
-      : _readPixelColor(image, byteDataRgba, nx, py);
+      : _readPixelColor(nx, py, image, rawRgbaBytes);
  final iap = ax >= image.width || py < 0
      ? icc
-      : _readPixelColor(image, byteDataRgba, ax, py);
+      : _readPixelColor(ax, py, image, rawRgbaBytes);

-  final ip0 = cubic(dx, ipp.red, icp.red, inp.red, iap.red);
-  final ip1 = cubic(dx, ipp.green, icp.green, inp.green, iap.green);
-  final ip2 = cubic(dx, ipp.blue, icp.blue, inp.blue, iap.blue);
+  final ip0 = cubic(dx, ipp.$1, icp.$1, inp.$1, iap.$1);
+  final ip1 = cubic(dx, ipp.$2, icp.$2, inp.$2, iap.$2);
+  final ip2 = cubic(dx, ipp.$3, icp.$3, inp.$3, iap.$3);
  // final ip3 = cubic(dx, ipp.a, icp.a, inp.a, iap.a);

-  final ipc = px < 0 ? icc : _readPixelColor(image, byteDataRgba, px, y);
+  final ipc = px < 0 ? icc : _readPixelColor(px, y, image, rawRgbaBytes);
  final inc =
-      nx >= image.width ? icc : _readPixelColor(image, byteDataRgba, nx, y);
+      nx >= image.width ? icc : _readPixelColor(nx, y, image, rawRgbaBytes);
  final iac =
-      ax >= image.width ? icc : _readPixelColor(image, byteDataRgba, ax, y);
+      ax >= image.width ? icc : _readPixelColor(ax, y, image, rawRgbaBytes);

-  final ic0 = cubic(dx, ipc.red, icc.red, inc.red, iac.red);
-  final ic1 = cubic(dx, ipc.green, icc.green, inc.green, iac.green);
-  final ic2 = cubic(dx, ipc.blue, icc.blue, inc.blue, iac.blue);
+  final ic0 = cubic(dx, ipc.$1, icc.$1, inc.$1, iac.$1);
+  final ic1 = cubic(dx, ipc.$2, icc.$2, inc.$2, iac.$2);
+  final ic2 = cubic(dx, ipc.$3, icc.$3, inc.$3, iac.$3);
  // final ic3 = cubic(dx, ipc.a, icc.a, inc.a, iac.a);

  final ipn = px < 0 || ny >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, px, ny);
+      : _readPixelColor(px, ny, image, rawRgbaBytes);
  final icn =
-      ny >= image.height ? icc : _readPixelColor(image, byteDataRgba, x, ny);
+      ny >= image.height ? icc : _readPixelColor(x, ny, image, rawRgbaBytes);
  final inn = nx >= image.width || ny >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, nx, ny);
+      : _readPixelColor(nx, ny, image, rawRgbaBytes);
  final ian = ax >= image.width || ny >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, ax, ny);
+      : _readPixelColor(ax, ny, image, rawRgbaBytes);

-  final in0 = cubic(dx, ipn.red, icn.red, inn.red, ian.red);
-  final in1 = cubic(dx, ipn.green, icn.green, inn.green, ian.green);
-  final in2 = cubic(dx, ipn.blue, icn.blue, inn.blue, ian.blue);
+  final in0 = cubic(dx, ipn.$1, icn.$1, inn.$1, ian.$1);
+  final in1 = cubic(dx, ipn.$2, icn.$2, inn.$2, ian.$2);
+  final in2 = cubic(dx, ipn.$3, icn.$3, inn.$3, ian.$3);
  // final in3 = cubic(dx, ipn.a, icn.a, inn.a, ian.a);

  final ipa = px < 0 || ay >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, px, ay);
+      : _readPixelColor(px, ay, image, rawRgbaBytes);
  final ica =
-      ay >= image.height ? icc : _readPixelColor(image, byteDataRgba, x, ay);
+      ay >= image.height ? icc : _readPixelColor(x, ay, image, rawRgbaBytes);
  final ina = nx >= image.width || ay >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, nx, ay);
+      : _readPixelColor(nx, ay, image, rawRgbaBytes);
  final iaa = ax >= image.width || ay >= image.height
      ? icc
-      : _readPixelColor(image, byteDataRgba, ax, ay);
+      : _readPixelColor(ax, ay, image, rawRgbaBytes);

-  final ia0 = cubic(dx, ipa.red, ica.red, ina.red, iaa.red);
-  final ia1 = cubic(dx, ipa.green, ica.green, ina.green, iaa.green);
-  final ia2 = cubic(dx, ipa.blue, ica.blue, ina.blue, iaa.blue);
+  final ia0 = cubic(dx, ipa.$1, ica.$1, ina.$1, iaa.$1);
+  final ia1 = cubic(dx, ipa.$2, ica.$2, ina.$2, iaa.$2);
+  final ia2 = cubic(dx, ipa.$3, ica.$3, ina.$3, iaa.$3);
  // final ia3 = cubic(dx, ipa.a, ica.a, ina.a, iaa.a);

  final c0 = cubic(dy, ip0, ic0, in0, ia0).clamp(0, 255).toInt();
@@ -686,5 +698,32 @@ Color _getPixelBicubic(num fx, num fy, Image image, ByteData byteDataRgba) {
  final c2 = cubic(dy, ip2, ic2, in2, ia2).clamp(0, 255).toInt();
  // final c3 = cubic(dy, ip3, ic3, in3, ia3);

-  return Color.fromRGBO(c0, c1, c2, 1.0);
+  return (c0, c1, c2); // (red, green, blue)
+}
+
+List<List<double>> create2DGaussianKernel(int size, double sigma) {
+  final List<List<double>> kernel =
+      List.generate(size, (_) => List<double>.filled(size, 0));
+  double sum = 0.0;
+  final int center = size ~/ 2;
+
+  for (int y = 0; y < size; y++) {
+    for (int x = 0; x < size; x++) {
+      final int dx = x - center;
+      final int dy = y - center;
+      final double g = (1 / (2 * pi * sigma * sigma)) *
+          exp(-(dx * dx + dy * dy) / (2 * sigma * sigma));
+      kernel[y][x] = g;
+      sum += g;
+    }
+  }
+
+  // Normalize the kernel
+  for (int y = 0; y < size; y++) {
+    for (int x = 0; x < size; x++) {
+      kernel[y][x] /= sum;
+    }
+  }
+
+  return kernel;
 }
--- a/mobile/lib/utils/ml_util.dart
+++ b/mobile/lib/utils/ml_util.dart
@@ -396,7 +396,7 @@ Future<MLResult> analyzeImageStatic(Map args) async {
    final startTime = DateTime.now();

    // Decode the image once to use for both face detection and alignment
-    final (image, imageByteData) = await decodeImageFromPath(imagePath);
+    final (image, rawRgbaBytes) = await decodeImageFromPath(imagePath);
    final decodedImageSize =
        Dimensions(height: image.height, width: image.width);
    final result = MLResult.fromEnteFileID(enteFileID);
@@ -408,7 +408,7 @@ Future<MLResult> analyzeImageStatic(Map args) async {
      final resultFaces = await FaceRecognitionService.runFacesPipeline(
        enteFileID,
        image,
-        imageByteData,
+        rawRgbaBytes,
        faceDetectionAddress,
        faceEmbeddingAddress,
      );
@@ -426,7 +426,7 @@ Future<MLResult> analyzeImageStatic(Map args) async {
      final clipResult = await SemanticSearchService.runClipImage(
        enteFileID,
        image,
-        imageByteData,
+        rawRgbaBytes,
        clipImageAddress,
      );
      result.clip = clipResult;