This commit is contained in:
Manav Rathi
2024-09-03 15:13:05 +05:30
parent 041aaea0b9
commit 875fd10b50
2 changed files with 3 additions and 2 deletions

View File

@@ -178,7 +178,8 @@ export const clipMatches = async (
const textEmbedding = normalized(t);
const items = (await cachedOrReadCLIPIndexes()).map(
({ fileID, embedding }) =>
// The dot product gives us cosine similarity here since both the
// vectors are already normalized.
[fileID, dotProduct(embedding, textEmbedding)] as const,
);
// This score threshold was obtain heuristically. 0.2 generally gives solid

View File

@@ -24,7 +24,7 @@ export const clamp = (value: number, min: number, max: number) =>
* product. When we're computing dot products in a hot loop, skipping over that
* unnecessary renormalization matters.
*
* When comparing embeddings we usually want is the cosine similarity, but when
* When comparing embeddings we usually want is the cosine similarity, but if
* both the embeddings involved are already normalized, we can save the norm
* calculations and directly do their `dotProduct`.
*