Add option for exact search of similar files

This commit is contained in:
laurenspriem
2025-08-05 11:52:03 +05:30
parent 6dfcc58144
commit b5113dd420
4 changed files with 63 additions and 8 deletions

View File

@@ -176,11 +176,15 @@ class ClipVectorDB {
Future<(Uint64List, Float32List)> searchClosestVectors(
List<double> query,
int count,
bool exact,
) async {
final db = await _vectorDB;
try {
final result =
await db.searchVectors(query: query, count: BigInt.from(count));
final result = await db.searchVectors(
query: query,
count: BigInt.from(count),
exact: exact,
);
return result;
} catch (e, s) {
_logger.severe("Error searching closest vectors", e, s);
@@ -190,10 +194,12 @@ class ClipVectorDB {
Future<(BigInt, double)> searchClosestVector(
List<double> query,
bool exact,
) async {
final db = await _vectorDB;
try {
final result = await db.searchVectors(query: query, count: BigInt.one);
final result =
await db.searchVectors(query: query, count: BigInt.one, exact: exact);
return (result.$1[0], result.$2[0]);
} catch (e, s) {
_logger.severe("Error searching closest vector", e, s);
@@ -204,12 +210,14 @@ class ClipVectorDB {
Future<(List<Uint64List>, List<Float32List>)> bulkSearchVectors(
List<Float32List> queries,
BigInt count,
bool exact,
) async {
final db = await _vectorDB;
try {
final result = await db.bulkSearchVectors(
queries: queries,
count: count,
exact: exact,
);
return result;
} catch (e, s) {
@@ -230,7 +238,7 @@ class ClipVectorDB {
final minimumSimilarity = minimumSimilarityMap[query]!;
final textEmbedding = entry.value;
final (potentialFileIDs, distances) =
await searchClosestVectors(textEmbedding, 1000);
await searchClosestVectors(textEmbedding, 1000, true);
final queryResults = <QueryResult>[];
for (var i = 0; i < potentialFileIDs.length; i++) {
final similarity = 1 - distances[i];

View File

@@ -1,11 +1,13 @@
import "dart:math" show max;
import "dart:typed_data" show Float32List;
import "package:flutter/foundation.dart" show kDebugMode;
import "package:flutter_rust_bridge/flutter_rust_bridge_for_generated.dart"
show Uint64List;
import 'package:logging/logging.dart';
import "package:photos/db/ml/clip_vector_db.dart";
import "package:photos/db/ml/db.dart";
import "package:photos/extensions/stop_watch.dart";
import 'package:photos/models/file/file.dart';
import "package:photos/models/file/file_type.dart";
import "package:photos/models/ml/vector.dart";
@@ -21,10 +23,18 @@ class SimilarImagesService {
/// Returns a list of SimilarFiles, where each SimilarFiles object contains
/// a list of files that are perceptually similar
Future<List<SimilarFiles>> getSimilarFiles(double distanceThreshold) async {
Future<List<SimilarFiles>> getSimilarFiles(
double distanceThreshold,
bool exact,
) async {
try {
final now = DateTime.now();
final List<SimilarFiles> result =
await _getSimilarFiles(distanceThreshold);
await _getSimilarFiles(distanceThreshold, exact);
final duration = DateTime.now().difference(now);
_logger.info(
"Found ${result.length} similar files in ${duration.inSeconds} seconds for threshold $distanceThreshold and exact $exact",
);
return result;
} catch (e, s) {
_logger.severe("failed to get similar files", e, s);
@@ -32,10 +42,15 @@ class SimilarImagesService {
}
}
Future<List<SimilarFiles>> _getSimilarFiles(double distanceThreshold) async {
Future<List<SimilarFiles>> _getSimilarFiles(
double distanceThreshold,
bool exact,
) async {
final w = (kDebugMode ? EnteWatch('getSimilarFiles') : null)?..start();
final mlDataDB = MLDataDB.instance;
_logger.info("Checking migration and filling clip vector DB");
await mlDataDB.checkMigrateFillClipVectorDB();
w?.log("checkMigrateFillClipVectorDB");
// Get the embeddings ready for vector search
final List<EmbeddingVector> allImageEmbeddings =
@@ -48,13 +63,16 @@ class SimilarImagesService {
final keys = Uint64List.fromList(
allImageEmbeddings.map((e) => BigInt.from(e.fileID)).toList(),
);
w?.log("getAllClipVectors");
// Run bulk vector search
final (vectorKeys, distances) =
await ClipVectorDB.instance.bulkSearchVectors(
clipFloat32,
BigInt.from(100),
exact,
);
w?.log("bulkSearchVectors");
// Get all files, and create a map of fileID to file
final allFiles = Set<EnteFile>.from(
@@ -66,6 +84,7 @@ class SimilarImagesService {
allFileIdsToFile[file.uploadedFileID!] = file;
}
}
w?.log("getAllFilesForSearch");
// Run through the vector search results and create SimilarFiles objects
final alreadyUsedFileIDs = <int>{};
@@ -111,9 +130,11 @@ class SimilarImagesService {
allSimilarFiles.add(similarFiles);
}
}
w?.log("going through files");
// Sort the similar files by total size in descending order
allSimilarFiles.sort((a, b) => b.totalSize.compareTo(a.totalSize));
w?.log("sort similar files");
return allSimilarFiles;
}

View File

@@ -420,6 +420,7 @@ class _MLDebugSectionWidgetState extends State<MLDebugSectionWidget> {
final (vectorKeys, distances) = await vectorDB.bulkSearchVectors(
queries: queries,
count: count,
exact: false,
);
w?.log(
@@ -504,6 +505,7 @@ class _MLDebugSectionWidgetState extends State<MLDebugSectionWidget> {
final (vectorKeys, distances) = await vectorDB.bulkSearchVectors(
queries: clipFloat32,
count: count,
exact: false,
);
w?.log(

View File

@@ -12,6 +12,7 @@ import 'package:photos/theme/ente_theme.dart';
import "package:photos/ui/common/loading_widget.dart";
import 'package:photos/ui/components/buttons/button_widget.dart';
import "package:photos/ui/components/models/button_type.dart";
import "package:photos/ui/components/toggle_switch_widget.dart";
import "package:photos/ui/viewer/file/detail_page.dart";
import "package:photos/ui/viewer/file/thumbnail_widget.dart";
import "package:photos/utils/dialog_util.dart";
@@ -49,6 +50,7 @@ class _SimilarImagesPageState extends State<SimilarImagesPage> {
double _distanceThreshold = 0.04; // Default value
List<SimilarFiles> _similarFilesList = [];
SortKey _sortKey = SortKey.size;
bool _exactSearch = false;
@override
void initState() {
@@ -159,6 +161,25 @@ class _SimilarImagesPageState extends State<SimilarImagesPage> {
textAlign: TextAlign.center,
),
const SizedBox(height: 48),
Row(
mainAxisAlignment: MainAxisAlignment.spaceBetween,
children: [
Text(
"Exact search", // TODO: lau: extract string
style: getEnteTextTheme(context).bodyBold,
),
ToggleSwitchWidget(
value: () => _exactSearch,
onChanged: () async {
if (_isDisposed) return;
setState(() {
_exactSearch = !_exactSearch;
});
},
),
],
),
const SizedBox(height: 32),
ButtonWidget(
labelText: "Find similar images", // TODO: lau: extract string
buttonType: ButtonType.primary,
@@ -269,8 +290,11 @@ class _SimilarImagesPageState extends State<SimilarImagesPage> {
});
try {
// You can use _toggleValue here for advanced mode features
_logger.info("exact mode: $_exactSearch");
final similarFiles = await SimilarImagesService.instance
.getSimilarFiles(_distanceThreshold);
.getSimilarFiles(_distanceThreshold, _exactSearch);
_logger.info(
"Found ${similarFiles.length} groups of similar images",
);