Rearrange
This commit is contained in:
@@ -167,6 +167,76 @@ const embeddingSyncTime = (model: EmbeddingModel) =>
|
||||
const saveEmbeddingSyncTime = (t: number, model: EmbeddingModel) =>
|
||||
localStorage.setItem("embeddingSyncTime:" + model, `${t}`);
|
||||
|
||||
/**
|
||||
* The maximum number of items to fetch in a single GET /embeddings/diff
|
||||
*
|
||||
* [Note: Limit of returned items in /diff requests]
|
||||
*
|
||||
* The various GET /diff API methods, which tell the client what all has changed
|
||||
* since a timestamp (provided by the client) take a limit parameter.
|
||||
*
|
||||
* These diff API calls return all items whose updated at is greater
|
||||
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
|
||||
* pagination of items which have the same exact updated at. Conceptually, it
|
||||
* may happen that there are more items than the limit we've provided.
|
||||
*
|
||||
* The behaviour of this limit is different for file diff and embeddings diff.
|
||||
*
|
||||
* - For file diff, the limit is advisory, and remote may return less, equal
|
||||
* or more items than the provided limit. The scenario where it returns more
|
||||
* is when more files than the limit have the same updated at. Theoretically
|
||||
* it would make the diff response unbounded, however in practice file
|
||||
* modifications themselves are all batched. Even if the user selects all
|
||||
* the files in their library and updates them all in one go in the UI,
|
||||
* their client app must use batched API calls to make those updates, and
|
||||
* each of those batches would get distinct updated at.
|
||||
*
|
||||
* - For embeddings diff, there are no bulk updates and this limit is enforced
|
||||
* as a maximum. While theoretically it is possible for an arbitrary number
|
||||
* of files to have the same updated at, in practice it is not possible with
|
||||
* the current set of APIs where clients PUT individual embeddings (the
|
||||
* updated at is a server timestamp). And even if somehow a large number of
|
||||
* files get the same updated at and thus get truncated in the response, it
|
||||
* won't lead to any data loss, the client which requested that particular
|
||||
* truncated diff will just regenerate them.
|
||||
*/
|
||||
const diffLimit = 500;
|
||||
|
||||
/**
|
||||
* GET embeddings for the given model that have been updated {@link sinceTime}.
|
||||
*
|
||||
* This fetches the next {@link diffLimit} embeddings whose {@link updatedAt} is
|
||||
* greater than the given {@link sinceTime} (non-inclusive).
|
||||
*
|
||||
* @param model The {@link EmbeddingModel} whose diff we wish for.
|
||||
*
|
||||
* @param sinceTime The updatedAt of the last embedding we've synced (epoch ms).
|
||||
* Pass 0 to fetch everything from the beginning.
|
||||
*
|
||||
* @returns an array of {@link RemoteEmbedding}. The returned array is limited
|
||||
* to a maximum count of {@link diffLimit}.
|
||||
*
|
||||
* > See [Note: Limit of returned items in /diff requests].
|
||||
*/
|
||||
const getEmbeddingsDiff = async (
|
||||
model: EmbeddingModel,
|
||||
sinceTime: number,
|
||||
): Promise<RemoteEmbedding[]> => {
|
||||
const params = new URLSearchParams({
|
||||
model,
|
||||
sinceTime: `${sinceTime}`,
|
||||
limit: `${diffLimit}`,
|
||||
});
|
||||
const url = await apiURL("/embeddings/diff");
|
||||
const res = await fetch(`${url}?${params.toString()}`, {
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
|
||||
return z.array(RemoteEmbedding).parse(await res.json());
|
||||
};
|
||||
|
||||
// MARK: - Face
|
||||
|
||||
/**
|
||||
* Fetch new or updated face embeddings from remote and save them locally.
|
||||
*
|
||||
@@ -190,7 +260,7 @@ export const pullFaceEmbeddings = () =>
|
||||
* This is a variant of {@link saveFaceIndex} that performs version checking as
|
||||
* described in [Note: Handling versioning of embeddings].
|
||||
*/
|
||||
export const saveFaceIndexIfNewer = async (index: FaceIndex) => {
|
||||
const saveFaceIndexIfNewer = async (index: FaceIndex) => {
|
||||
const version = index.faceEmbedding.version;
|
||||
if (version <= faceIndexingVersion) {
|
||||
log.info(
|
||||
@@ -269,71 +339,3 @@ const FaceIndex = z
|
||||
})
|
||||
// Retain fields we might not (currently) understand.
|
||||
.passthrough();
|
||||
|
||||
/**
|
||||
* The maximum number of items to fetch in a single GET /embeddings/diff
|
||||
*
|
||||
* [Note: Limit of returned items in /diff requests]
|
||||
*
|
||||
* The various GET /diff API methods, which tell the client what all has changed
|
||||
* since a timestamp (provided by the client) take a limit parameter.
|
||||
*
|
||||
* These diff API calls return all items whose updated at is greater
|
||||
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
|
||||
* pagination of items which have the same exact updated at. Conceptually, it
|
||||
* may happen that there are more items than the limit we've provided.
|
||||
*
|
||||
* The behaviour of this limit is different for file diff and embeddings diff.
|
||||
*
|
||||
* - For file diff, the limit is advisory, and remote may return less, equal
|
||||
* or more items than the provided limit. The scenario where it returns more
|
||||
* is when more files than the limit have the same updated at. Theoretically
|
||||
* it would make the diff response unbounded, however in practice file
|
||||
* modifications themselves are all batched. Even if the user selects all
|
||||
* the files in their library and updates them all in one go in the UI,
|
||||
* their client app must use batched API calls to make those updates, and
|
||||
* each of those batches would get distinct updated at.
|
||||
*
|
||||
* - For embeddings diff, there are no bulk updates and this limit is enforced
|
||||
* as a maximum. While theoretically it is possible for an arbitrary number
|
||||
* of files to have the same updated at, in practice it is not possible with
|
||||
* the current set of APIs where clients PUT individual embeddings (the
|
||||
* updated at is a server timestamp). And even if somehow a large number of
|
||||
* files get the same updated at and thus get truncated in the response, it
|
||||
* won't lead to any data loss, the client which requested that particular
|
||||
* truncated diff will just regenerate them.
|
||||
*/
|
||||
const diffLimit = 500;
|
||||
|
||||
/**
|
||||
* GET embeddings for the given model that have been updated {@link sinceTime}.
|
||||
*
|
||||
* This fetches the next {@link diffLimit} embeddings whose {@link updatedAt} is
|
||||
* greater than the given {@link sinceTime} (non-inclusive).
|
||||
*
|
||||
* @param model The {@link EmbeddingModel} whose diff we wish for.
|
||||
*
|
||||
* @param sinceTime The updatedAt of the last embedding we've synced (epoch ms).
|
||||
* Pass 0 to fetch everything from the beginning.
|
||||
*
|
||||
* @returns an array of {@link RemoteEmbedding}. The returned array is limited
|
||||
* to a maximum count of {@link diffLimit}.
|
||||
*
|
||||
* > See [Note: Limit of returned items in /diff requests].
|
||||
*/
|
||||
const getEmbeddingsDiff = async (
|
||||
model: EmbeddingModel,
|
||||
sinceTime: number,
|
||||
): Promise<RemoteEmbedding[]> => {
|
||||
const params = new URLSearchParams({
|
||||
model,
|
||||
sinceTime: `${sinceTime}`,
|
||||
limit: `${diffLimit}`,
|
||||
});
|
||||
const url = await apiURL("/embeddings/diff");
|
||||
const res = await fetch(`${url}?${params.toString()}`, {
|
||||
headers: await authenticatedRequestHeaders(),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to fetch ${url}: HTTP ${res.status}`);
|
||||
return z.array(RemoteEmbedding).parse(await res.json());
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user