Rearrange

This commit is contained in:
Manav Rathi
2024-07-01 14:52:11 +05:30
parent b9972e131a
commit eaecf91aa1

View File

@@ -167,6 +167,76 @@ const embeddingSyncTime = (model: EmbeddingModel) =>
const saveEmbeddingSyncTime = (t: number, model: EmbeddingModel) =>
localStorage.setItem("embeddingSyncTime:" + model, `${t}`);
/**
* The maximum number of items to fetch in a single GET /embeddings/diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the same exact updated at. Conceptually, it
* may happen that there are more items than the limit we've provided.
*
* The behaviour of this limit is different for file diff and embeddings diff.
*
* - For file diff, the limit is advisory, and remote may return less, equal
* or more items than the provided limit. The scenario where it returns more
* is when more files than the limit have the same updated at. Theoretically
* it would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user selects all
* the files in their library and updates them all in one go in the UI,
* their client app must use batched API calls to make those updates, and
* each of those batches would get distinct updated at.
*
* - For embeddings diff, there are no bulk updates and this limit is enforced
* as a maximum. While theoretically it is possible for an arbitrary number
* of files to have the same updated at, in practice it is not possible with
* the current set of APIs where clients PUT individual embeddings (the
* updated at is a server timestamp). And even if somehow a large number of
* files get the same updated at and thus get truncated in the response, it
* won't lead to any data loss, the client which requested that particular
* truncated diff will just regenerate them.
*/
const diffLimit = 500;
/**
* GET embeddings for the given model that have been updated {@link sinceTime}.
*
* This fetches the next {@link diffLimit} embeddings whose {@link updatedAt} is
* greater than the given {@link sinceTime} (non-inclusive).
*
* @param model The {@link EmbeddingModel} whose diff we wish for.
*
* @param sinceTime The updatedAt of the last embedding we've synced (epoch ms).
* Pass 0 to fetch everything from the beginning.
*
* @returns an array of {@link RemoteEmbedding}. The returned array is limited
* to a maximum count of {@link diffLimit}.
*
* > See [Note: Limit of returned items in /diff requests].
*/
const getEmbeddingsDiff = async (
model: EmbeddingModel,
sinceTime: number,
): Promise<RemoteEmbedding[]> => {
const params = new URLSearchParams({
model,
sinceTime: `${sinceTime}`,
limit: `${diffLimit}`,
});
const url = await apiURL("/embeddings/diff");
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
if (!res.ok) throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
return z.array(RemoteEmbedding).parse(await res.json());
};
// MARK: - Face
/**
* Fetch new or updated face embeddings from remote and save them locally.
*
@@ -190,7 +260,7 @@ export const pullFaceEmbeddings = () =>
* This is a variant of {@link saveFaceIndex} that performs version checking as
* described in [Note: Handling versioning of embeddings].
*/
export const saveFaceIndexIfNewer = async (index: FaceIndex) => {
const saveFaceIndexIfNewer = async (index: FaceIndex) => {
const version = index.faceEmbedding.version;
if (version <= faceIndexingVersion) {
log.info(
@@ -269,71 +339,3 @@ const FaceIndex = z
})
// Retain fields we might not (currently) understand.
.passthrough();
/**
* The maximum number of items to fetch in a single GET /embeddings/diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the same exact updated at. Conceptually, it
* may happen that there are more items than the limit we've provided.
*
* The behaviour of this limit is different for file diff and embeddings diff.
*
* - For file diff, the limit is advisory, and remote may return less, equal
* or more items than the provided limit. The scenario where it returns more
* is when more files than the limit have the same updated at. Theoretically
* it would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user selects all
* the files in their library and updates them all in one go in the UI,
* their client app must use batched API calls to make those updates, and
* each of those batches would get distinct updated at.
*
* - For embeddings diff, there are no bulk updates and this limit is enforced
* as a maximum. While theoretically it is possible for an arbitrary number
* of files to have the same updated at, in practice it is not possible with
* the current set of APIs where clients PUT individual embeddings (the
* updated at is a server timestamp). And even if somehow a large number of
* files get the same updated at and thus get truncated in the response, it
* won't lead to any data loss, the client which requested that particular
* truncated diff will just regenerate them.
*/
const diffLimit = 500;
/**
* GET embeddings for the given model that have been updated {@link sinceTime}.
*
* This fetches the next {@link diffLimit} embeddings whose {@link updatedAt} is
* greater than the given {@link sinceTime} (non-inclusive).
*
* @param model The {@link EmbeddingModel} whose diff we wish for.
*
* @param sinceTime The updatedAt of the last embedding we've synced (epoch ms).
* Pass 0 to fetch everything from the beginning.
*
* @returns an array of {@link RemoteEmbedding}. The returned array is limited
* to a maximum count of {@link diffLimit}.
*
* > See [Note: Limit of returned items in /diff requests].
*/
const getEmbeddingsDiff = async (
model: EmbeddingModel,
sinceTime: number,
): Promise<RemoteEmbedding[]> => {
const params = new URLSearchParams({
model,
sinceTime: `${sinceTime}`,
limit: `${diffLimit}`,
});
const url = await apiURL("/embeddings/diff");
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
if (!res.ok) throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
return z.array(RemoteEmbedding).parse(await res.json());
};