[mob][photos] Add text embeddings cache service (#7130)
## Description Add text embeddings cache service to prevent recomputes for: - Memories - Magic cache ## Tests Tested in debug mode on my pixel phone.
This commit is contained in:
@@ -196,6 +196,11 @@ lib/
|
||||
- Ensure documentation reflects the current implementation
|
||||
- Update examples in specs if behavior changes
|
||||
|
||||
### 5. Database Methods - BEST PRACTICE
|
||||
**Prioritize readability in database methods**
|
||||
- For small result sets (e.g., 1-2 stale entries), prefer filtering in Dart for cleaner, more readable code
|
||||
- For large datasets, use SQL WHERE clauses for performance - they're much more efficient in SQLite
|
||||
|
||||
## Important Notes
|
||||
|
||||
- Large service files (some 70k+ lines) - consider file context when editing
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -230,7 +230,6 @@ class SuperLogging {
|
||||
} catch (e) {
|
||||
$.warning("Failed to initialize log viewer: $e");
|
||||
}
|
||||
}
|
||||
|
||||
if (appConfig.body == null) return;
|
||||
|
||||
@@ -313,14 +312,14 @@ class SuperLogging {
|
||||
saveLogString(str, rec.error);
|
||||
// Hook for external log viewer (if available)
|
||||
// This allows the log_viewer package to capture logs without creating a dependency
|
||||
if(_logViewerCallback != null) {
|
||||
try {
|
||||
if (_logViewerCallback != null) {
|
||||
_logViewerCallback!(rec, config.prefix);
|
||||
if (_logViewerCallback != null) {
|
||||
try {
|
||||
if (_logViewerCallback != null) {
|
||||
_logViewerCallback!(rec, config.prefix);
|
||||
}
|
||||
} catch (_) {
|
||||
// Silently ignore any errors from the log viewer
|
||||
}
|
||||
} catch (_) {
|
||||
// Silently ignore any errors from the log viewer
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,6 +62,7 @@ class MLDataDB with SqlDbBase implements IMLDataDB<int> {
|
||||
createClipEmbeddingsTable,
|
||||
createFileDataTable,
|
||||
createFaceCacheTable,
|
||||
createTextEmbeddingsCacheTable,
|
||||
];
|
||||
|
||||
// only have a single app-wide reference to the database
|
||||
@@ -1429,6 +1430,56 @@ class MLDataDB with SqlDbBase implements IMLDataDB<int> {
|
||||
Bus.instance.fire(EmbeddingUpdatedEvent());
|
||||
}
|
||||
|
||||
/// WARNING: don't confuse this with [putClip]. If you're not sure, use [putClip]
|
||||
Future<void> putRepeatedTextEmbeddingCache(
|
||||
String query,
|
||||
List<double> embedding,
|
||||
) async {
|
||||
final db = await asyncDB;
|
||||
await db.execute(
|
||||
'INSERT OR REPLACE INTO $textEmbeddingsCacheTable '
|
||||
'($textQueryColumn, $embeddingColumn, $mlVersionColumn, $createdAtColumn) '
|
||||
'VALUES (?, ?, ?, ?)',
|
||||
[
|
||||
query,
|
||||
Float32List.fromList(embedding).buffer.asUint8List(),
|
||||
clipMlVersion,
|
||||
DateTime.now().millisecondsSinceEpoch,
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
/// WARNING: don't confuse this with [getAllClipVectors]. If you're not sure, use [getAllClipVectors]
|
||||
Future<List<double>?> getRepeatedTextEmbeddingCache(String query) async {
|
||||
final db = await asyncDB;
|
||||
final results = await db.getAll(
|
||||
'SELECT $embeddingColumn, $mlVersionColumn, $createdAtColumn '
|
||||
'FROM $textEmbeddingsCacheTable '
|
||||
'WHERE $textQueryColumn = ?',
|
||||
[query],
|
||||
);
|
||||
|
||||
if (results.isEmpty) return null;
|
||||
|
||||
final threeMonthsAgo =
|
||||
DateTime.now().millisecondsSinceEpoch - (90 * 24 * 60 * 60 * 1000);
|
||||
|
||||
// Find first valid entry
|
||||
for (final result in results) {
|
||||
if (result[mlVersionColumn] == clipMlVersion &&
|
||||
result[createdAtColumn] as int > threeMonthsAgo) {
|
||||
return Float32List.view((result[embeddingColumn] as Uint8List).buffer);
|
||||
}
|
||||
}
|
||||
|
||||
// No valid entry found, clean up
|
||||
await db.execute(
|
||||
'DELETE FROM $textEmbeddingsCacheTable WHERE $textQueryColumn = ?',
|
||||
[query],
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
@override
|
||||
Future<void> deleteClipEmbeddings(List<int> fileIDs) async {
|
||||
final db = await instance.asyncDB;
|
||||
|
||||
@@ -16,6 +16,8 @@ const mlVersionColumn = 'ml_version';
|
||||
const personIdColumn = 'person_id';
|
||||
const clusterIDColumn = 'cluster_id';
|
||||
const personOrClusterIdColumn = 'person_or_cluster_id';
|
||||
const textQueryColumn = 'text_query';
|
||||
const createdAtColumn = 'created_at';
|
||||
|
||||
const createFacesTable = '''CREATE TABLE IF NOT EXISTS $facesTable (
|
||||
$fileIDColumn INTEGER NOT NULL,
|
||||
@@ -137,3 +139,18 @@ CREATE TABLE IF NOT EXISTS $faceCacheTable (
|
||||
''';
|
||||
|
||||
const deleteFaceCacheTable = 'DELETE FROM $faceCacheTable';
|
||||
|
||||
// ## TEXT EMBEDDINGS CACHE TABLE
|
||||
const textEmbeddingsCacheTable = 'text_embeddings_cache';
|
||||
|
||||
const createTextEmbeddingsCacheTable = '''
|
||||
CREATE TABLE IF NOT EXISTS $textEmbeddingsCacheTable (
|
||||
$textQueryColumn TEXT NOT NULL,
|
||||
$embeddingColumn BLOB NOT NULL,
|
||||
$mlVersionColumn INTEGER NOT NULL,
|
||||
$createdAtColumn INTEGER NOT NULL,
|
||||
PRIMARY KEY ($textQueryColumn)
|
||||
);
|
||||
''';
|
||||
|
||||
const deleteTextEmbeddingsCacheTable = 'DELETE FROM $textEmbeddingsCacheTable';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import 'dart:async';
|
||||
import 'dart:async';
|
||||
import 'dart:io';
|
||||
|
||||
import "package:adaptive_theme/adaptive_theme.dart";
|
||||
|
||||
@@ -19,6 +19,7 @@ import "package:photos/services/smart_albums_service.dart";
|
||||
import "package:photos/services/smart_memories_service.dart";
|
||||
import "package:photos/services/storage_bonus_service.dart";
|
||||
import "package:photos/services/sync/trash_sync_service.dart";
|
||||
import "package:photos/services/text_embeddings_cache_service.dart";
|
||||
import "package:photos/services/update_service.dart";
|
||||
import "package:photos/utils/local_settings.dart";
|
||||
import "package:shared_preferences/shared_preferences.dart";
|
||||
@@ -136,6 +137,12 @@ SmartMemoriesService get smartMemoriesService {
|
||||
return _smartMemoriesService!;
|
||||
}
|
||||
|
||||
TextEmbeddingsCacheService? _textEmbeddingsCacheService;
|
||||
TextEmbeddingsCacheService get textEmbeddingsCacheService {
|
||||
_textEmbeddingsCacheService ??= TextEmbeddingsCacheService.instance;
|
||||
return _textEmbeddingsCacheService!;
|
||||
}
|
||||
|
||||
BillingService? _billingService;
|
||||
BillingService get billingService {
|
||||
_billingService ??= BillingService(
|
||||
|
||||
@@ -193,15 +193,22 @@ class SemanticSearchService {
|
||||
return results;
|
||||
}
|
||||
|
||||
Future<Map<String, List<int>>> getMatchingFileIDs(
|
||||
/// Get matching file IDs for common repeated queries like smart memories and magic cache.
|
||||
/// WARNING: Use this method carefully - it uses persistent caching which is only
|
||||
/// beneficial for queries that are repeated across app sessions.
|
||||
/// For regular user searches, use getMatchingFiles instead.
|
||||
Future<Map<String, List<int>>> getMatchingFileIDsForCommonQueries(
|
||||
Map<String, double> queryToScore,
|
||||
) async {
|
||||
final textEmbeddings = <String, List<double>>{};
|
||||
final minimumSimilarityMap = <String, double>{};
|
||||
|
||||
for (final entry in queryToScore.entries) {
|
||||
final query = entry.key;
|
||||
final score = entry.value;
|
||||
final textEmbedding = await _getTextEmbedding(query);
|
||||
// Use cache service instead of _getTextEmbedding
|
||||
final textEmbedding =
|
||||
await textEmbeddingsCacheService.getEmbedding(query);
|
||||
textEmbeddings[query] = textEmbedding;
|
||||
minimumSimilarityMap[query] = score;
|
||||
}
|
||||
@@ -210,6 +217,7 @@ class SemanticSearchService {
|
||||
textEmbeddings,
|
||||
minimumSimilarityMap: minimumSimilarityMap,
|
||||
);
|
||||
|
||||
final result = <String, List<int>>{};
|
||||
for (final entry in queryResults.entries) {
|
||||
final query = entry.key;
|
||||
|
||||
@@ -401,8 +401,8 @@ class MagicCacheService {
|
||||
for (Prompt prompt in magicPromptsData) {
|
||||
queryToScore[prompt.query] = prompt.minScore;
|
||||
}
|
||||
final clipResults =
|
||||
await SemanticSearchService.instance.getMatchingFileIDs(queryToScore);
|
||||
final clipResults = await SemanticSearchService.instance
|
||||
.getMatchingFileIDsForCommonQueries(queryToScore);
|
||||
for (Prompt prompt in magicPromptsData) {
|
||||
final List<int> fileUploadedIDs = clipResults[prompt.query] ?? [];
|
||||
if (fileUploadedIDs.isNotEmpty) {
|
||||
|
||||
@@ -37,7 +37,6 @@ import "package:photos/services/location_service.dart";
|
||||
import "package:photos/services/machine_learning/face_ml/person/person_service.dart";
|
||||
import "package:photos/services/machine_learning/ml_result.dart";
|
||||
import "package:photos/services/search_service.dart";
|
||||
import "package:photos/utils/text_embeddings_util.dart";
|
||||
|
||||
class MemoriesResult {
|
||||
final List<SmartMemory> memories;
|
||||
@@ -103,18 +102,29 @@ class SmartMemoriesService {
|
||||
'allImageEmbeddings has ${allImageEmbeddings.length} entries $t',
|
||||
);
|
||||
|
||||
// Load pre-computed text embeddings from assets
|
||||
final textEmbeddings = await loadTextEmbeddingsFromAssets();
|
||||
if (textEmbeddings == null) {
|
||||
_logger.severe('Failed to load pre-computed text embeddings');
|
||||
throw Exception(
|
||||
'Failed to load pre-computed text embeddings',
|
||||
_logger.info('Loading text embeddings via cache service');
|
||||
final clipPositiveTextVector = Vector.fromList(
|
||||
await textEmbeddingsCacheService.getEmbedding(
|
||||
"Photo of a precious and nostalgic memory radiating warmth, vibrant energy, or quiet beauty — alive with color, light, or emotion",
|
||||
),
|
||||
);
|
||||
|
||||
final clipPeopleActivityVectors = <PeopleActivity, Vector>{};
|
||||
for (final activity in PeopleActivity.values) {
|
||||
final query = activityQuery(activity);
|
||||
clipPeopleActivityVectors[activity] = Vector.fromList(
|
||||
await textEmbeddingsCacheService.getEmbedding(query),
|
||||
);
|
||||
}
|
||||
_logger.info('Using pre-computed text embeddings from assets');
|
||||
final clipPositiveTextVector = textEmbeddings.clipPositiveVector;
|
||||
final clipPeopleActivityVectors = textEmbeddings.peopleActivityVectors;
|
||||
final clipMemoryTypeVectors = textEmbeddings.clipMemoryTypeVectors;
|
||||
|
||||
final clipMemoryTypeVectors = <ClipMemoryType, Vector>{};
|
||||
for (final memoryType in ClipMemoryType.values) {
|
||||
final query = clipQuery(memoryType);
|
||||
clipMemoryTypeVectors[memoryType] = Vector.fromList(
|
||||
await textEmbeddingsCacheService.getEmbedding(query),
|
||||
);
|
||||
}
|
||||
_logger.info('Text embeddings loaded via cache service');
|
||||
|
||||
final local = await getLocale();
|
||||
final languageCode = local?.languageCode ?? "en";
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
import 'package:logging/logging.dart';
|
||||
import 'package:photos/db/ml/db.dart';
|
||||
import 'package:photos/services/machine_learning/ml_computer.dart';
|
||||
|
||||
class TextEmbeddingsCacheService {
|
||||
static final _logger = Logger('TextEmbeddingsCacheService');
|
||||
|
||||
TextEmbeddingsCacheService._privateConstructor();
|
||||
static final instance = TextEmbeddingsCacheService._privateConstructor();
|
||||
|
||||
Future<List<double>> getEmbedding(String query) async {
|
||||
// 1. Check database cache
|
||||
final dbResult =
|
||||
await MLDataDB.instance.getRepeatedTextEmbeddingCache(query);
|
||||
if (dbResult != null) {
|
||||
_logger.info('Text embedding cache hit for query');
|
||||
return dbResult;
|
||||
}
|
||||
|
||||
// 2. Compute new embedding
|
||||
_logger.info('Computing new text embedding for query');
|
||||
final embedding = await MLComputer.instance.runClipText(query);
|
||||
|
||||
// 3. Store in database cache
|
||||
await MLDataDB.instance.putRepeatedTextEmbeddingCache(query, embedding);
|
||||
|
||||
return embedding;
|
||||
}
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
import 'dart:convert';
|
||||
import "dart:developer" as dev show log;
|
||||
import "dart:io" show File;
|
||||
|
||||
import 'package:flutter/services.dart';
|
||||
import 'package:logging/logging.dart';
|
||||
import 'package:ml_linalg/vector.dart';
|
||||
import "package:path_provider/path_provider.dart"
|
||||
show getExternalStorageDirectory;
|
||||
import 'package:photos/models/memories/clip_memory.dart';
|
||||
import 'package:photos/models/memories/people_memory.dart';
|
||||
import "package:photos/services/machine_learning/ml_computer.dart"
|
||||
show MLComputer;
|
||||
|
||||
final _logger = Logger('TextEmbeddingsUtil');
|
||||
|
||||
/// Loads pre-computed text embeddings from assets
|
||||
Future<TextEmbeddings?> loadTextEmbeddingsFromAssets() async {
|
||||
try {
|
||||
_logger.info('Loading text embeddings from assets');
|
||||
final jsonString =
|
||||
await rootBundle.loadString('assets/ml/text_embeddings.json');
|
||||
final data = json.decode(jsonString) as Map<String, dynamic>;
|
||||
|
||||
final embeddings = data['embeddings'] as Map<String, dynamic>;
|
||||
|
||||
// Parse clip positive embedding
|
||||
Vector? clipPositiveVector;
|
||||
final clipPositive = embeddings['clip_positive'] as Map<String, dynamic>;
|
||||
final clipPositiveVectorData =
|
||||
(clipPositive['vector'] as List).cast<double>();
|
||||
if (clipPositiveVectorData.isNotEmpty) {
|
||||
clipPositiveVector = Vector.fromList(clipPositiveVectorData);
|
||||
}
|
||||
|
||||
// Parse people activities embeddings
|
||||
final Map<PeopleActivity, Vector> peopleActivityVectors = {};
|
||||
final peopleActivities =
|
||||
embeddings['people_activities'] as Map<String, dynamic>;
|
||||
for (final activity in PeopleActivity.values) {
|
||||
final activityName = activity.toString().split('.').last;
|
||||
if (peopleActivities.containsKey(activityName)) {
|
||||
final activityData =
|
||||
peopleActivities[activityName] as Map<String, dynamic>;
|
||||
final vector = (activityData['vector'] as List).cast<double>();
|
||||
if (vector.isNotEmpty) {
|
||||
peopleActivityVectors[activity] = Vector.fromList(vector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse clip memory types embeddings
|
||||
final Map<ClipMemoryType, Vector> clipMemoryTypeVectors = {};
|
||||
final clipMemoryTypes =
|
||||
embeddings['clip_memory_types'] as Map<String, dynamic>;
|
||||
for (final memoryType in ClipMemoryType.values) {
|
||||
final typeName = memoryType.toString().split('.').last;
|
||||
if (clipMemoryTypes.containsKey(typeName)) {
|
||||
final typeData = clipMemoryTypes[typeName] as Map<String, dynamic>;
|
||||
final vector = (typeData['vector'] as List).cast<double>();
|
||||
if (vector.isNotEmpty) {
|
||||
clipMemoryTypeVectors[memoryType] = Vector.fromList(vector);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we have all required embeddings
|
||||
if (clipPositiveVector == null) {
|
||||
_logger.severe('Clip positive vector is missing');
|
||||
throw Exception('Clip positive vector is missing');
|
||||
}
|
||||
|
||||
if (peopleActivityVectors.length != PeopleActivity.values.length) {
|
||||
_logger.severe('Some people activity vectors are missing');
|
||||
throw Exception('Some people activity vectors are missing');
|
||||
}
|
||||
|
||||
if (clipMemoryTypeVectors.length != ClipMemoryType.values.length) {
|
||||
_logger.severe('Some clip memory type vectors are missing');
|
||||
throw Exception('Some clip memory type vectors are missing');
|
||||
}
|
||||
|
||||
_logger.info('Text embeddings loaded successfully from JSON assets');
|
||||
return TextEmbeddings(
|
||||
clipPositiveVector: clipPositiveVector,
|
||||
peopleActivityVectors: peopleActivityVectors,
|
||||
clipMemoryTypeVectors: clipMemoryTypeVectors,
|
||||
);
|
||||
} catch (e, stackTrace) {
|
||||
_logger.severe('Failed to load text embeddings from JSON', e, stackTrace);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
class TextEmbeddings {
|
||||
final Vector clipPositiveVector;
|
||||
final Map<PeopleActivity, Vector> peopleActivityVectors;
|
||||
final Map<ClipMemoryType, Vector> clipMemoryTypeVectors;
|
||||
|
||||
const TextEmbeddings({
|
||||
required this.clipPositiveVector,
|
||||
required this.peopleActivityVectors,
|
||||
required this.clipMemoryTypeVectors,
|
||||
});
|
||||
}
|
||||
|
||||
/// Helper function to generate text embeddings and save them to a JSON file
|
||||
/// Run this once to generate the embeddings, then copy the output
|
||||
/// to assets/ml/text_embeddings.json
|
||||
Future<void> generateAndSaveTextEmbeddings() async {
|
||||
final Map<String, dynamic> embeddingsData = {
|
||||
'version': '1.0.0',
|
||||
'embeddings': {
|
||||
'clip_positive': {},
|
||||
'people_activities': {},
|
||||
'clip_memory_types': {},
|
||||
},
|
||||
};
|
||||
|
||||
// Generate clip positive embedding
|
||||
const String clipPositiveQuery =
|
||||
'Photo of a precious and nostalgic memory radiating warmth, vibrant energy, or quiet beauty — alive with color, light, or emotion';
|
||||
final clipPositiveVector =
|
||||
await MLComputer.instance.runClipText(clipPositiveQuery);
|
||||
embeddingsData['embeddings']['clip_positive'] = {
|
||||
'prompt': clipPositiveQuery,
|
||||
'vector': clipPositiveVector,
|
||||
};
|
||||
|
||||
// Generate people activity embeddings
|
||||
final peopleActivities = <String, dynamic>{};
|
||||
for (final activity in PeopleActivity.values) {
|
||||
final activityName = activity.toString().split('.').last;
|
||||
final prompt = activityQuery(activity);
|
||||
final vector = await MLComputer.instance.runClipText(prompt);
|
||||
peopleActivities[activityName] = {
|
||||
'prompt': prompt,
|
||||
'vector': vector,
|
||||
};
|
||||
}
|
||||
embeddingsData['embeddings']['people_activities'] = peopleActivities;
|
||||
|
||||
// Generate clip memory type embeddings
|
||||
final clipMemoryTypes = <String, dynamic>{};
|
||||
for (final memoryType in ClipMemoryType.values) {
|
||||
final typeName = memoryType.toString().split('.').last;
|
||||
final prompt = clipQuery(memoryType);
|
||||
final vector = await MLComputer.instance.runClipText(prompt);
|
||||
clipMemoryTypes[typeName] = {
|
||||
'prompt': prompt,
|
||||
'vector': vector,
|
||||
};
|
||||
}
|
||||
embeddingsData['embeddings']['clip_memory_types'] = clipMemoryTypes;
|
||||
|
||||
// Convert to JSON and log it
|
||||
final jsonString = const JsonEncoder.withIndent(' ').convert(embeddingsData);
|
||||
dev.log(
|
||||
'_generateAndSaveTextEmbeddings: Generated text embeddings JSON',
|
||||
);
|
||||
|
||||
final tempDir = await getExternalStorageDirectory();
|
||||
final file = File('${tempDir!.path}/text_embeddings.json');
|
||||
await file.writeAsString(jsonString);
|
||||
dev.log(
|
||||
'_generateAndSaveTextEmbeddings: Saved text embeddings to ${file.path}',
|
||||
);
|
||||
|
||||
dev.log(
|
||||
'_generateAndSaveTextEmbeddings: Text embeddings generation complete! Copy the JSON output above to assets/ml/text_embeddings.json',
|
||||
);
|
||||
}
|
||||
@@ -349,7 +349,6 @@ flutter:
|
||||
- assets/image-editor/
|
||||
- assets/icons/
|
||||
- assets/launcher_icon/
|
||||
- assets/ml/
|
||||
fonts:
|
||||
- family: Inter
|
||||
fonts:
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
- Neeraj: Potential fix for ios in-app payment
|
||||
- Neeraj: (i) Debug option to enable logViewer
|
||||
- Laurens: text embedding caching for memories and discover
|
||||
- Neeraj: (i) Debug option to enable logViewer
|
||||
- Neeraj: Potential fix for ios in-app payment
|
||||
Reference in New Issue
Block a user