[web] Store user entities verbatim (#3458)

This commit is contained in:
Manav Rathi
2024-09-25 13:58:46 +05:30
committed by GitHub
16 changed files with 598 additions and 517 deletions

View File

@@ -9,12 +9,16 @@ import log from "./log";
* storage is limited to the main thread).
*
* The "kv" database consists of one object store, "kv". Keys are strings.
* Values can be strings or number or booleans.
* Values can be arbitrary JSON objects.
*/
interface KVDBSchema extends DBSchema {
kv: {
key: string;
value: string | number | boolean;
/**
* Typescript doesn't have a native JSON type, so this needs to be
* unknown
*/
value: unknown;
};
}
@@ -101,8 +105,16 @@ export const clearKVDB = async () => {
/**
* Return the string value stored corresponding to {@link key}, or `undefined`
* if there is no such entry.
*
* Typescript doesn't have a native JSON type, so the return value is type as an
* `unknown`. For primitive types, you can avoid casting by using the
* {@link getKVS} (string), {@link getKVN} (number) or {@link getKVB} (boolean)
* methods that do an additional runtime check of the type.
*/
export const getKV = async (key: string) => _getKV<string>(key, "string");
export const getKV = async (key: string) => {
const db = await kvDB();
return db.get("kv", key);
};
export const _getKV = async <T extends string | number | boolean>(
key: string,
@@ -113,11 +125,14 @@ export const _getKV = async <T extends string | number | boolean>(
if (v === undefined) return undefined;
if (typeof v != type)
throw new Error(
`Expected the value corresponding to key ${key} to be a ${type}, but instead got ${v}`,
`Expected the value corresponding to key ${key} to be a ${type}, but instead got ${String(v)}`,
);
return v as T;
};
/** String variant of {@link getKV}. */
export const getKVS = async (key: string) => _getKV<string>(key, "string");
/** Numeric variant of {@link getKV}. */
export const getKVN = async (key: string) => _getKV<number>(key, "number");
@@ -127,8 +142,11 @@ export const getKVB = async (key: string) => _getKV<boolean>(key, "boolean");
/**
* Save the given {@link value} corresponding to {@link key}, overwriting any
* existing value.
*
* @param value Any arbitrary JSON object. Typescript doesn't have a native JSON
* type, so this is typed as a unknown
*/
export const setKV = async (key: string, value: string | number | boolean) => {
export const setKV = async (key: string, value: unknown) => {
const db = await kvDB();
await db.put("kv", value, key);
};

View File

@@ -2,7 +2,7 @@
import { ensure } from "@/utils/ensure";
import { z } from "zod";
import { getKV } from "./kv";
import { getKVS } from "./kv";
// TODO: During login the only field present is email. Which makes this
// optionality indicated by these types incorrect.
@@ -57,4 +57,4 @@ export const ensureLocalUser = (): LocalUser => {
* The underlying data is stored in IndexedDB, and can be accessed from web
* workers.
*/
export const ensureAuthToken = async () => ensure(await getKV("token"));
export const ensureAuthToken = async () => ensure(await getKVS("token"));

View File

@@ -1,4 +1,4 @@
import { getKV } from "@/base/kv";
import { getKVS } from "@/base/kv";
/**
* Return the origin (scheme, host, port triple) that should be used for making
@@ -35,7 +35,7 @@ export const apiURL = async (path: string) => (await apiOrigin()) + path;
* Otherwise return undefined.
*/
export const customAPIOrigin = async () =>
(await getKV("apiOrigin")) ??
(await getKVS("apiOrigin")) ??
process.env.NEXT_PUBLIC_ENTE_ENDPOINT ??
undefined;

View File

@@ -1,6 +1,6 @@
import { useIsMobileWidth } from "@/base/hooks";
import { ensureOk } from "@/base/http";
import { getKV, removeKV, setKV } from "@/base/kv";
import { getKVS, removeKV, setKV } from "@/base/kv";
import log from "@/base/log";
import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined";
import {
@@ -69,7 +69,8 @@ const Contents: React.FC<ContentsProps> = (props) => {
>();
useEffect(
() => void getKV("apiOrigin").then((o) => setInitialAPIOrigin(o ?? "")),
() =>
void getKVS("apiOrigin").then((o) => setInitialAPIOrigin(o ?? "")),
[],
);

View File

@@ -29,11 +29,12 @@ import { deleteDB } from "idb";
*/
export const runMigrations = async () => {
const m = (await getKVN("migrationLevel")) ?? 0;
const latest = 2;
const latest = 3;
if (m < latest) {
log.info(`Running migrations ${m} => ${latest}`);
if (m < 1 && isDesktop) await m0();
if (m < 2) await m1();
if (m < 1 && isDesktop) await m1();
if (m < 2) await m2();
if (m < 3) await m3();
await setKV("migrationLevel", latest);
}
};
@@ -42,7 +43,7 @@ export const runMigrations = async () => {
// almost all clients would've migrated over.
// Added: Aug 2024 (v1.7.3). Prunable.
const m0 = () =>
const m1 = () =>
Promise.all([
// Delete the legacy face DB v1.
deleteDB("mldata"),
@@ -67,7 +68,7 @@ const m0 = () =>
});
// Added: Sep 2024 (v1.7.5-beta). Prunable.
const m1 = () =>
const m2 = () =>
// Older versions of the user-entities code kept the diff related state
// in a different place. These entries are not needed anymore (the tags
// themselves will get resynced).
@@ -75,4 +76,27 @@ const m1 = () =>
localForage.removeItem("location_tags"),
localForage.removeItem("location_tags_key"),
localForage.removeItem("location_tags_time"),
// Remove data from an intermediate format that stored user-entities
// piecewise instead of as generic, verbatim, entities.
removeKV("locationTags"),
removeKV("entityKey/locationTags"),
removeKV("latestUpdatedAt/locationTags"),
]);
// Added: Sep 2024 (v1.7.5-beta). Prunable.
const m3 = () =>
Promise.all([
// Delete the legacy face DB v1.
//
// This was already removed in m1, but that was behind an isDesktop
// check, but later I found out that old web versions also created this
// DB (although empty).
deleteDB("mldata"),
// Remove data from an intermediate format that stored user-entities in
// their parsed form instead of as generic, verbatim, entities.
removeKV("locationTags"),
removeKV("entityKey/location"),
removeKV("latestUpdatedAt/location"),
]);

View File

@@ -130,6 +130,8 @@ export const clusterFaces = async (
// The resultant clusters.
// TODO-Cluster Later on, instead of starting from a blank slate, this will
// be list of existing clusters we fetch from remote.
// - fetchRemoteClusterFeedback
// [..local, ..remote]
let clusters: FaceCluster[] = [];
// Process the faces in batches, but keep an overlap between batches to
@@ -188,6 +190,10 @@ export const clusterFaces = async (
`Generated ${sortedClusters.length} clusters from ${faces.length} faces (${clusteredFaceCount} clustered ${faces.length - clusteredFaceCount} unclustered) (${timeTakenMs} ms)`,
);
// reconcileClusters
// Save local
// updated map -> remote - Put
return { clusters: sortedClusters, people };
};

View File

@@ -431,6 +431,7 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => {
* - A cgroup, in which case it should add or overwrite the entry for the
* corresponding cluster group (as identified by its {@link id}).
*/
// TODO-Cluster delete me
export const applyCGroupDiff = async (diff: (string | CGroup)[]) => {
const db = await mlDB();
const tx = db.transaction("cluster-group", "readwrite");

View File

@@ -3,8 +3,8 @@ import log from "@/base/log";
import type { EnteFile } from "@/new/photos/types/file";
import { nullToUndefined } from "@/utils/transform";
import { z } from "zod";
import { gunzip, gzip } from "../../utils/gzip";
import { fetchFileData, putFileData } from "../file-data";
import { gunzip, gzip } from "../gzip";
import { type RemoteCLIPIndex } from "./clip";
import { type RemoteFaceIndex } from "./face";

View File

@@ -2,9 +2,9 @@ import { masterKeyFromSession } from "@/base/session-store";
import { wipClusterEnable } from ".";
import type { EnteFile } from "../../types/file";
import { getLocalFiles } from "../files";
import { pullCGroups } from "../user-entity";
import { pullUserEntities, savedCGroups } from "../user-entity";
import type { FaceCluster } from "./cluster";
import { getClusterGroups, getFaceIndexes } from "./db";
import { getFaceIndexes } from "./db";
import { fileIDFromFaceID } from "./face";
/**
@@ -131,7 +131,7 @@ export const syncCGroups = async () => {
if (!(await wipClusterEnable())) return;
const masterKey = await masterKeyFromSession();
await pullCGroups(masterKey);
await pullUserEntities("cgroup", masterKey);
};
export type NamedPerson = Omit<Person, "name"> & {
@@ -180,9 +180,9 @@ export const namedPeopleFromCGroups = async (): Promise<NamedPerson[]> => {
}
// Convert cgroups to people.
const cgroups = await getClusterGroups();
const cgroups = await savedCGroups();
return cgroups
.map((cgroup) => {
.map(({ id, data: cgroup }) => {
// Hidden cgroups are clusters specifically marked so as to not be shown
// in the UI.
if (cgroup.isHidden) return undefined;
@@ -229,8 +229,6 @@ export const namedPeopleFromCGroups = async (): Promise<NamedPerson[]> => {
displayFaceFile = highestScoringFace.file;
}
const id = cgroup.id;
return { id, name, fileIDs, displayFaceID, displayFaceFile };
})
.filter((c) => !!c)

View File

@@ -12,7 +12,7 @@ import * as chrono from "chrono-node";
import { expose } from "comlink";
import { z } from "zod";
import {
pullLocationTags,
pullUserEntities,
savedLocationTags,
type LocationTag,
} from "../user-entity";
@@ -47,7 +47,7 @@ export class SearchWorker {
*/
async sync(masterKey: Uint8Array) {
return Promise.all([
pullLocationTags(masterKey)
pullUserEntities("location", masterKey)
.then(() => savedLocationTags())
.then((ts) => (this.locationTags = ts)),
fetchCities().then((cs) => (this.cities = cs)),

View File

@@ -1,489 +0,0 @@
import {
decryptBlob,
decryptBoxB64,
encryptBoxB64,
generateNewBlobOrStreamKey,
} from "@/base/crypto";
import { authenticatedRequestHeaders, ensureOk, HTTPError } from "@/base/http";
import { getKV, getKVN, setKV } from "@/base/kv";
import { apiURL } from "@/base/origins";
import { ensure } from "@/utils/ensure";
import { nullToUndefined } from "@/utils/transform";
import { z } from "zod";
import { gunzip } from "./gzip";
import { applyCGroupDiff } from "./ml/db";
import type { CGroup } from "./ml/people";
/**
* User entities are predefined lists of otherwise arbitrary data that the user
* can store for their account.
*
* e.g. location tags, cluster groups.
*/
export type EntityType =
/**
* A location tag.
*
* The entity data is base64(encrypt(json))
*/
| "location"
/**
* A cluster group.
*
* The entity data is base64(encrypt(gzip(json)))
*/
| "cgroup";
/**
* Update our local location tags with changes from remote.
*
* This function fetches all the location tag user entities from remote and
* updates our local database. It uses local state to remember the latest entry
* the last time it did a pull, so each subsequent pull is a lightweight diff.
*
* @param masterKey The user's master key. This is used to encrypt and decrypt
* the location tags specific entity key.
*/
export const pullLocationTags = async (masterKey: Uint8Array) => {
const decoder = new TextDecoder();
const parse = (id: string, data: Uint8Array): LocationTag => ({
id,
...RemoteLocationTag.parse(JSON.parse(decoder.decode(data))),
});
const processBatch = async (entities: UserEntityChange[]) => {
const existingTagsByID = new Map(
(await savedLocationTags()).map((t) => [t.id, t]),
);
entities.forEach(({ id, data }) =>
data
? existingTagsByID.set(id, parse(id, data))
: existingTagsByID.delete(id),
);
return saveLocationTags([...existingTagsByID.values()]);
};
return pullUserEntities("location", masterKey, processBatch);
};
/** Zod schema for the tag that we get from or put to remote. */
const RemoteLocationTag = z.object({
name: z.string(),
radius: z.number(),
centerPoint: z.object({
latitude: z.number(),
longitude: z.number(),
}),
});
/** Zod schema for the tag that we persist locally. */
const LocalLocationTag = RemoteLocationTag.extend({
id: z.string(),
});
export type LocationTag = z.infer<typeof LocalLocationTag>;
const saveLocationTags = (tags: LocationTag[]) =>
setKV("locationTags", JSON.stringify(tags));
/**
* Return all the location tags that are present locally.
*
* Use {@link pullLocationTags} to synchronize this list with remote.
*/
export const savedLocationTags = async () =>
LocalLocationTag.array().parse(
JSON.parse((await getKV("locationTags")) ?? "[]"),
);
/**
* Update our local cgroups with changes from remote.
*
* This fetches all the user entities corresponding to the "cgroup" entity type
* from remote that have been created, updated or deleted since the last time we
* checked.
*
* This diff is then applied to the data we have persisted locally.
*
* @param masterKey The user's master key. This is used to encrypt and decrypt
* the cgroup specific entity key.
*/
export const pullCGroups = (masterKey: Uint8Array) => {
// See: [Note: strict mode migration]
//
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const parse = async (id: string, data: Uint8Array): Promise<CGroup> => ({
id,
name: undefined,
avatarFaceID: undefined,
...RemoteCGroup.parse(JSON.parse(await gunzip(data))),
});
const processBatch = async (entities: UserEntityChange[]) =>
await applyCGroupDiff(
await Promise.all(
entities.map(async ({ id, data }) =>
data ? await parse(id, data) : id,
),
),
);
return pullUserEntities("cgroup", masterKey, processBatch);
};
const RemoteFaceCluster = z.object({
id: z.string(),
faces: z.string().array(),
});
const RemoteCGroup = z.object({
name: z.string().nullish().transform(nullToUndefined),
assigned: z.array(RemoteFaceCluster),
// The remote cgroup also has a "rejected" property, but that is not
// currently used by any of the clients.
isHidden: z.boolean(),
avatarFaceID: z.string().nullish().transform(nullToUndefined),
});
/**
* The maximum number of items to fetch in a single diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the exact same updated at.
*
* Conceptually, it may happen that there are more items than the limit we've
* provided, but there are practical safeguards.
*
* For file diff, the limit is advisory, and remote may return less, equal or
* more items than the provided limit. The scenario where it returns more is
* when more files than the limit have the same updated at. Theoretically it
* would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user were to select all
* the files in their library and updates them all in one go in the UI, their
* client app is required to use batched API calls to make those updates, and
* each of those batches would get distinct updated at.
*/
const defaultDiffLimit = 500;
/**
* An entry in the user entity diff.
*
* Each change either contains the latest data associated with a particular user
* entity that has been created or updated, or indicates that the corresponding
* entity has been deleted.
*/
interface UserEntityChange {
/**
* A UUID or nanoid of the entity.
*/
id: string;
/**
* Arbitrary (decrypted) data associated with the entity. The format of this
* data is specific to each entity type.
*
* This will not be present for entities that have been deleted on remote.
*/
data: Uint8Array | undefined;
/**
* Epoch microseconds denoting when this entity was last changed (created or
* updated or deleted).
*/
updatedAt: number;
}
/**
* Sync of the given {@link type} entities that we have locally with remote.
*
* This fetches all the user entities of {@link type} from remote that have been
* created, updated or deleted since the last time we checked.
*
* For each diff response, the {@link processBatch} is invoked to give a chance
* to caller to apply the updates to the data we have persisted locally.
*
* The user's {@link masterKey} is used to decrypt (or encrypt, when generating
* a new one) the entity key.
*/
const pullUserEntities = async (
type: EntityType,
masterKey: Uint8Array,
processBatch: (entities: UserEntityChange[]) => Promise<void>,
) => {
const entityKeyB64 = await getOrCreateEntityKeyB64(type, masterKey);
let sinceTime = (await savedLatestUpdatedAt(type)) ?? 0;
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition
while (true) {
const entities = await userEntityDiff(type, sinceTime, entityKeyB64);
if (entities.length == 0) break;
await processBatch(entities);
sinceTime = entities.reduce(
(max, entity) => Math.max(max, entity.updatedAt),
sinceTime,
);
await saveLatestUpdatedAt(type, sinceTime);
}
};
/**
* Zod schema for a item in the user entity diff.
*/
const RemoteUserEntityChange = z.object({
id: z.string(),
/**
* Base64 string containing the encrypted contents of the entity.
*
* Will be `null` when isDeleted is true.
*/
encryptedData: z.string().nullable(),
/**
* Base64 string containing the decryption header.
*
* Will be `null` when isDeleted is true.
*/
header: z.string().nullable(),
isDeleted: z.boolean(),
updatedAt: z.number(),
});
/**
* Fetch the next set of changes (upsert or deletion) to user entities of the
* given type since the given time.
*
* @param type The type of the entities to fetch.
*
* @param sinceTime Epoch milliseconds. This is used to ask remote to provide us
* only entities whose {@link updatedAt} is more than the given value. Set this
* to zero to start from the beginning.
*
* @param entityKeyB64 The base64 encoded key to use for decrypting the
* encrypted contents of the user entity.
*
* [Note: Diff response will have at most one entry for an id]
*
* Unlike git diffs which track all changes, the diffs we get from remote are
* guaranteed to contain only one entry (upsert or delete) for a particular Ente
* object. This holds true irrespective of the diff limit.
*
* For example, in a user entity diff, it is guaranteed that there will only be
* at max one entry for a particular entity id. The entry will have no data to
* indicate that the corresponding entity was deleted. Otherwise, when the data
* is present, it is taken as the creation of a new entity or the updation of an
* existing one.
*
* This behaviour comes from how remote stores the underlying, say, entities. A
* diff returns just entities whose updation times greater than the provided
* since time (limited to the given diff limit). So there will be at most one
* row for a particular entity id. And if that entity has been deleted, then the
* row will be a tombstone, so data be absent.
*/
const userEntityDiff = async (
type: EntityType,
sinceTime: number,
entityKeyB64: string,
): Promise<UserEntityChange[]> => {
const decrypt = (encryptedData: string, decryptionHeader: string) =>
decryptBlob({ encryptedData, decryptionHeader }, entityKeyB64);
const params = new URLSearchParams({
type,
sinceTime: sinceTime.toString(),
limit: defaultDiffLimit.toString(),
});
const url = await apiURL(`/user-entity/entity/diff`);
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
ensureOk(res);
const diff = z
.object({ diff: z.array(RemoteUserEntityChange) })
.parse(await res.json()).diff;
return Promise.all(
diff.map(
async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({
id,
data: !isDeleted
? await decrypt(ensure(encryptedData), ensure(header))
: undefined,
updatedAt,
}),
),
);
};
/**
* Return the entity key that can be used to decrypt the encrypted contents of
* user entities of the given {@link type}.
*
* 1. See if we have the encrypted entity key present locally. If so, return
* the entity key by decrypting it using with the user's master key.
*
* 2. Otherwise fetch the encrypted entity key for that type from remote. If we
* get one, obtain the entity key by decrypt the encrypted one using the
* user's master key, save it locally for future use, and return it.
*
* 3. Otherwise generate a new entity key, encrypt it using the user's master
* key, putting the encrypted one to remote and also saving it locally, and
* return it.
*
* See also, [Note: User entity keys].
*/
const getOrCreateEntityKeyB64 = async (
type: EntityType,
masterKey: Uint8Array,
) => {
// See if we already have it locally.
const saved = await savedRemoteUserEntityKey(type);
if (saved) return decryptEntityKey(saved, masterKey);
// See if remote already has it.
const existing = await getUserEntityKey(type);
if (existing) {
// Only save it if we can decrypt it to avoid corrupting our local state
// in unforeseen circumstances.
const result = await decryptEntityKey(existing, masterKey);
await saveRemoteUserEntityKey(type, existing);
return result;
}
// Nada. Create a new one, put it to remote, save it locally, and return.
// As a sanity check, genarate the key but immediately encrypt it as if it
// were fetched from remote and then try to decrypt it before doing anything
// with it.
const generated = await generateNewEncryptedEntityKey(masterKey);
const result = decryptEntityKey(generated, masterKey);
await postUserEntityKey(type, generated);
await saveRemoteUserEntityKey(type, generated);
return result;
};
const entityKeyKey = (type: EntityType) => `entityKey/${type}`;
/**
* Return the locally persisted {@link RemoteUserEntityKey}, if any,
* corresponding the given {@link type}.
*/
const savedRemoteUserEntityKey = (
type: EntityType,
): Promise<RemoteUserEntityKey | undefined> =>
getKV(entityKeyKey(type)).then((s) =>
s ? RemoteUserEntityKey.parse(JSON.parse(s)) : undefined,
);
/**
* Setter for {@link entityKey}.
*/
const saveRemoteUserEntityKey = (
type: EntityType,
entityKey: RemoteUserEntityKey,
) => setKV(entityKeyKey(type), JSON.stringify(entityKey));
/**
* Generate a new entity key and return it in the shape of an
* {@link RemoteUserEntityKey} after encrypting it using the user's master key.
*/
const generateNewEncryptedEntityKey = async (masterKey: Uint8Array) => {
const { encryptedData, nonce } = await encryptBoxB64(
await generateNewBlobOrStreamKey(),
masterKey,
);
// Remote calls it the header, but it really is the nonce.
return { encryptedKey: encryptedData, header: nonce };
};
/**
* Decrypt an encrypted entity key using the user's master key.
*/
const decryptEntityKey = async (
remote: RemoteUserEntityKey,
masterKey: Uint8Array,
) =>
decryptBoxB64(
{
encryptedData: remote.encryptedKey,
// Remote calls it the header, but it really is the nonce.
nonce: remote.header,
},
masterKey,
);
/**
* Fetch the encryption key for the given user entity {@link type} from remote.
*
* [Note: User entity keys]
*
* There is one encryption key (itself encrypted with the user's master key) for
* each user entity type. If the key doesn't exist on remote, then the client is
* expected to create one on the user's behalf. Remote will disallow attempts to
* multiple keys for the same user entity type.
*/
const getUserEntityKey = async (
type: EntityType,
): Promise<RemoteUserEntityKey | undefined> => {
const params = new URLSearchParams({ type });
const url = await apiURL("/user-entity/key");
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
if (!res.ok) {
// Remote says HTTP 404 Not Found if there is no key yet for the user.
if (res.status == 404) return undefined;
throw new HTTPError(res);
} else {
return RemoteUserEntityKey.parse(await res.json());
}
};
const RemoteUserEntityKey = z.object({
/** Base64 encoded entity key, encrypted with the user's master key. */
encryptedKey: z.string(),
/** Base64 encoded nonce used during encryption of this entity key. */
header: z.string(),
});
type RemoteUserEntityKey = z.infer<typeof RemoteUserEntityKey>;
/**
* Create a new encryption key for the given user entity {@link type} on remote.
*
* See: [Note: User entity keys]
*/
const postUserEntityKey = async (
type: EntityType,
entityKey: RemoteUserEntityKey,
) => {
const url = await apiURL("/user-entity/key");
const res = await fetch(url, {
method: "POST",
headers: await authenticatedRequestHeaders(),
body: JSON.stringify({ type, ...entityKey }),
});
ensureOk(res);
};
const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`;
/**
* Return the locally persisted value for the latest `updatedAt` time for the
* given entity {@link type}.
*
* This is used to checkpoint diffs, so that we can resume fetching from the
* last time we did a fetch.
*/
const savedLatestUpdatedAt = (type: EntityType) =>
getKVN(latestUpdatedAtKey(type));
/**
* Setter for {@link savedLatestUpdatedAt}.
*/
const saveLatestUpdatedAt = (type: EntityType, value: number) =>
setKV(latestUpdatedAtKey(type), value);

View File

@@ -0,0 +1,97 @@
import { getKV, getKVN, setKV } from "@/base/kv";
import { z } from "zod";
import { type EntityType } from ".";
import { RemoteUserEntityKey } from "./remote";
// Our DB footprint---
//
// All these are stored in the kv db.
const entitiesKey = (type: EntityType) => `entity/${type}`;
const entityKeyKey = (type: EntityType) => `entity/${type}/key`;
const latestUpdatedAtKey = (type: EntityType) => `entity/${type}/time`;
// ^---
/**
* A locally persisted user entity.
*/
export interface LocalUserEntity {
/**
* The UUID or nanoid of the entity.
*/
id: string;
/**
* Arbitrary (decrypted) data associated with the entity. The format of this
* data is specific to each entity type, but it is guaranteed to be JSON
* serializable.
*/
data: unknown;
/**
* Epoch microseconds denoting when this entity was last changed (created or
* updated).
*/
updatedAt: number;
}
const LocalUserEntity = z.object({
id: z.string(),
// Retain the data verbatim.
data: z.object({}).passthrough(),
updatedAt: z.number(),
});
/**
* Update the list of locally persisted user entities of the given {@link type}.
*/
export const saveEntities = (type: EntityType, items: LocalUserEntity[]) =>
setKV(entitiesKey(type), items);
/**
* Return the list of locally persisted user entities of the given {@link type}.
*/
export const savedEntities = async (
type: EntityType,
): Promise<LocalUserEntity[]> =>
// See: [Note: strict mode migration]
//
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
LocalUserEntity.array().parse((await getKV(entitiesKey(type))) ?? []);
/**
* Save the {@link entityKey} for the given user entity {@link type} to our
* local database.
*/
export const saveRemoteUserEntityKey = (
type: EntityType,
entityKey: RemoteUserEntityKey,
) => setKV(entityKeyKey(type), entityKey);
/**
* Return the locally persisted {@link RemoteUserEntityKey}, if any,
* corresponding the given user entity {@link type}.
*/
export const savedRemoteUserEntityKey = (
type: EntityType,
): Promise<RemoteUserEntityKey | undefined> =>
getKV(entityKeyKey(type)).then((s) =>
s ? RemoteUserEntityKey.parse(s) : undefined,
);
/**
* Save the latest `updatedAt` {@link value} for the given user entity
* {@link type} to our local database.
*/
export const saveLatestUpdatedAt = (type: EntityType, value: number) =>
setKV(latestUpdatedAtKey(type), value);
/**
* Return the locally persisted value for the latest `updatedAt` time for the
* given user entity {@link type}.
*
* This is used to checkpoint diffs, so that we can resume fetching from the
* last time we did a fetch.
*/
export const savedLatestUpdatedAt = (type: EntityType) =>
getKVN(latestUpdatedAtKey(type));

View File

@@ -0,0 +1,223 @@
import {
decryptBoxB64,
encryptBoxB64,
generateNewBlobOrStreamKey,
} from "@/base/crypto";
import { nullToUndefined } from "@/utils/transform";
import { z } from "zod";
import { gunzip } from "../../utils/gzip";
import {
savedEntities,
savedLatestUpdatedAt,
savedRemoteUserEntityKey,
saveEntities,
saveLatestUpdatedAt,
saveRemoteUserEntityKey,
type LocalUserEntity,
} from "./db";
import {
getUserEntityKey,
postUserEntityKey,
RemoteUserEntityKey,
userEntityDiff,
} from "./remote";
/**
* User entities are predefined lists of otherwise arbitrary data that the user
* can store for their account.
*
* e.g. location tags, cluster groups.
*/
export type EntityType =
/**
* A location tag.
*
* The entity data is base64(encrypt(json))
*/
| "location"
/**
* A cluster group.
*
* The entity data is base64(encrypt(gzip(json)))
*/
| "cgroup";
/**
* Zod schema for the fields of interest in the location tag that we get from
* remote.
*/
const RemoteLocationTag = z.object({
name: z.string(),
radius: z.number(),
centerPoint: z.object({
latitude: z.number(),
longitude: z.number(),
}),
});
/**
* A view of the location tag data suitable for use by the rest of the app.
*/
export type LocationTag = z.infer<typeof RemoteLocationTag>;
/**
* Return the list of locally available location tags.
*/
export const savedLocationTags = (): Promise<LocationTag[]> =>
savedEntities("location").then((es) =>
es.map((e) => RemoteLocationTag.parse(e.data)),
);
const RemoteFaceCluster = z.object({
id: z.string(),
faces: z.string().array(),
});
/**
* Zod schema for the fields of interest in the cgroup that we get from remote.
*/
const RemoteCGroup = z.object({
name: z.string().nullish().transform(nullToUndefined),
assigned: z.array(RemoteFaceCluster),
// The remote cgroup also has a "rejected" property, but that is not
// currently used by any of the clients.
isHidden: z.boolean(),
avatarFaceID: z.string().nullish().transform(nullToUndefined),
});
export type RemoteCGroup = z.infer<typeof RemoteCGroup>;
export type CGroupUserEntity = Omit<LocalUserEntity, "data"> & {
data: RemoteCGroup;
};
/**
* Return the list of locally available cgroup user entities.
*/
export const savedCGroups = (): Promise<CGroupUserEntity[]> =>
savedEntities("cgroup").then((es) =>
es.map((e) => ({ ...e, data: RemoteCGroup.parse(e.data) })),
);
/**
* Update our local entities of the given {@link type} by pulling the latest
* changes from remote.
*
* This fetches all the user entities corresponding to the given user entity
* type from remote that have been created, updated or deleted since the last
* time we checked.
*
* This diff is then applied to the data we have persisted locally.
*
* It uses local state to remember the latest entry the last time it did a pull,
* so each subsequent pull is a lightweight diff.
*
* @param masterKey The user's masterKey, which is is used to decrypt the entity
* key (or encrypt it, when generating a new one).
*/
export const pullUserEntities = async (
type: EntityType,
masterKey: Uint8Array,
) => {
const entityKeyB64 = await getOrCreateEntityKeyB64(type, masterKey);
const isGzipped = type == "cgroup";
let sinceTime = (await savedLatestUpdatedAt(type)) ?? 0;
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition
while (true) {
const diff = await userEntityDiff(type, sinceTime, entityKeyB64);
if (diff.length == 0) break;
const entityByID = new Map(
(await savedEntities(type)).map((e) => [e.id, e]),
);
for (const { id, data, updatedAt } of diff) {
if (data) {
const s = isGzipped
? await gunzip(data)
: new TextDecoder().decode(data);
entityByID.set(id, { id, data: JSON.parse(s), updatedAt });
} else {
entityByID.delete(id);
}
sinceTime = Math.max(sinceTime, updatedAt);
}
await saveEntities(type, [...entityByID.values()]);
await saveLatestUpdatedAt(type, sinceTime);
}
};
/**
* Return the entity key that can be used to decrypt the encrypted contents of
* user entities of the given {@link type}.
*
* 1. See if we have the encrypted entity key present locally. If so, return
* the entity key by decrypting it using with the user's master key.
*
* 2. Otherwise fetch the encrypted entity key for that type from remote. If we
* get one, obtain the entity key by decrypt the encrypted one using the
* user's master key, save it locally for future use, and return it.
*
* 3. Otherwise generate a new entity key, encrypt it using the user's master
* key, putting the encrypted one to remote and also saving it locally, and
* return it.
*
* See also, [Note: User entity keys].
*/
const getOrCreateEntityKeyB64 = async (
type: EntityType,
masterKey: Uint8Array,
) => {
// See if we already have it locally.
const saved = await savedRemoteUserEntityKey(type);
if (saved) return decryptEntityKey(saved, masterKey);
// See if remote already has it.
const existing = await getUserEntityKey(type);
if (existing) {
// Only save it if we can decrypt it to avoid corrupting our local state
// in unforeseen circumstances.
const result = await decryptEntityKey(existing, masterKey);
await saveRemoteUserEntityKey(type, existing);
return result;
}
// Nada. Create a new one, put it to remote, save it locally, and return.
// As a sanity check, genarate the key but immediately encrypt it as if it
// were fetched from remote and then try to decrypt it before doing anything
// with it.
const generated = await generateNewEncryptedEntityKey(masterKey);
const result = decryptEntityKey(generated, masterKey);
await postUserEntityKey(type, generated);
await saveRemoteUserEntityKey(type, generated);
return result;
};
const generateNewEncryptedEntityKey = async (masterKey: Uint8Array) => {
const { encryptedData, nonce } = await encryptBoxB64(
await generateNewBlobOrStreamKey(),
masterKey,
);
// Remote calls it the header, but it really is the nonce.
return { encryptedKey: encryptedData, header: nonce };
};
/**
* Decrypt an encrypted entity key using the user's master key.
*/
const decryptEntityKey = async (
remote: RemoteUserEntityKey,
masterKey: Uint8Array,
) =>
decryptBoxB64(
{
encryptedData: remote.encryptedKey,
// Remote calls it the header, but it really is the nonce.
nonce: remote.header,
},
masterKey,
);

View File

@@ -0,0 +1,202 @@
import { decryptBlob } from "@/base/crypto";
import { authenticatedRequestHeaders, ensureOk, HTTPError } from "@/base/http";
import { apiURL } from "@/base/origins";
import { ensure } from "@/utils/ensure";
import { z } from "zod";
import type { EntityType } from ".";
/**
* The maximum number of items to fetch in a single diff
*
* [Note: Limit of returned items in /diff requests]
*
* The various GET /diff API methods, which tell the client what all has changed
* since a timestamp (provided by the client) take a limit parameter.
*
* These diff API calls return all items whose updated at is greater
* (non-inclusive) than the timestamp we provide. So there is no mechanism for
* pagination of items which have the exact same updated at.
*
* Conceptually, it may happen that there are more items than the limit we've
* provided, but there are practical safeguards.
*
* For file diff, the limit is advisory, and remote may return less, equal or
* more items than the provided limit. The scenario where it returns more is
* when more files than the limit have the same updated at. Theoretically it
* would make the diff response unbounded, however in practice file
* modifications themselves are all batched. Even if the user were to select all
* the files in their library and updates them all in one go in the UI, their
* client app is required (with server side enforcement) to use batched API
* calls to make those updates. Thus, each of those batches would get distinct
* updated at values.
*
* For entity diff, there are no bulk operations yet that can result in the
* assignment of the same microsecond to hundreds of items.
*/
const defaultDiffLimit = 500;
/**
* An entry in the user entity diff.
*
* Each change either contains the latest data associated with a particular user
* entity that has been created or updated, or indicates that the corresponding
* entity has been deleted.
*/
export interface UserEntityChange {
/**
* The UUID or nanoid of the entity.
*/
id: string;
/**
* Arbitrary (decrypted) data associated with the entity. The format of this
* data is specific to each entity type.
*
* This will not be present for entities that have been deleted on remote.
*/
data: Uint8Array | undefined;
/**
* Epoch microseconds denoting when this entity was last changed (created or
* updated or deleted).
*/
updatedAt: number;
}
/**
* Zod schema for a item in the user entity diff.
*/
const RemoteUserEntityChange = z.object({
id: z.string(),
/**
* Base64 string containing the encrypted contents of the entity.
*
* Will be `null` when isDeleted is true.
*/
encryptedData: z.string().nullable(),
/**
* Base64 string containing the decryption header.
*
* Will be `null` when isDeleted is true.
*/
header: z.string().nullable(),
isDeleted: z.boolean(),
updatedAt: z.number(),
});
/**
* Fetch the next set of changes (upserts or deletions) to user entities of the
* given type since the given time.
*
* @param type The type of the entities to fetch.
*
* @param sinceTime Epoch milliseconds. This is used to ask remote to provide us
* only entities whose {@link updatedAt} is more than the given value. Set this
* to zero to start from the beginning.
*
* @param entityKeyB64 The base64 encoded key to use for decrypting the
* encrypted contents of the user entity.
*
* [Note: Diff response will have at most one entry for an id]
*
* Unlike git diffs which track all changes, the diffs we get from remote are
* guaranteed to contain only one entry (upsert or delete) for a particular Ente
* object. This holds true irrespective of the diff limit.
*
* For example, in a user entity diff, it is guaranteed that there will only be
* at max one entry for a particular entity id. The entry will have no data to
* indicate that the corresponding entity was deleted. Otherwise, when the data
* is present, it is taken as the creation of a new entity or the updation of an
* existing one.
*
* This behaviour comes from how remote stores the underlying, say, entities. A
* diff returns just entities whose updation times greater than the provided
* since time (limited to the given diff limit). So there will be at most one
* row for a particular entity id. And if that entity has been deleted, then the
* row will be a tombstone, so data be absent.
*/
export const userEntityDiff = async (
type: EntityType,
sinceTime: number,
entityKeyB64: string,
): Promise<UserEntityChange[]> => {
const decrypt = (encryptedData: string, decryptionHeader: string) =>
decryptBlob({ encryptedData, decryptionHeader }, entityKeyB64);
const params = new URLSearchParams({
type,
sinceTime: sinceTime.toString(),
limit: defaultDiffLimit.toString(),
});
const url = await apiURL(`/user-entity/entity/diff`);
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
ensureOk(res);
const diff = z
.object({ diff: z.array(RemoteUserEntityChange) })
.parse(await res.json()).diff;
return Promise.all(
diff.map(
async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({
id,
data: !isDeleted
? await decrypt(ensure(encryptedData), ensure(header))
: undefined,
updatedAt,
}),
),
);
};
/**
* Fetch the encryption key for the given user entity {@link type} from remote.
*
* [Note: User entity keys]
*
* There is one encryption key (itself encrypted with the user's master key) for
* each user entity type. If the key doesn't exist on remote, then the client is
* expected to create one on the user's behalf. Remote will disallow attempts to
* multiple keys for the same user entity type.
*/
export const getUserEntityKey = async (
type: EntityType,
): Promise<RemoteUserEntityKey | undefined> => {
const params = new URLSearchParams({ type });
const url = await apiURL("/user-entity/key");
const res = await fetch(`${url}?${params.toString()}`, {
headers: await authenticatedRequestHeaders(),
});
if (!res.ok) {
// Remote says HTTP 404 Not Found if there is no key yet for the user.
if (res.status == 404) return undefined;
throw new HTTPError(res);
} else {
return RemoteUserEntityKey.parse(await res.json());
}
};
export const RemoteUserEntityKey = z.object({
/** Base64 encoded entity key, encrypted with the user's master key. */
encryptedKey: z.string(),
/** Base64 encoded nonce used during encryption of this entity key. */
header: z.string(),
});
export type RemoteUserEntityKey = z.infer<typeof RemoteUserEntityKey>;
/**
* Create a new encryption key for the given user entity {@link type} on remote.
*
* See: [Note: User entity keys]
*/
export const postUserEntityKey = async (
type: EntityType,
entityKey: RemoteUserEntityKey,
) => {
const url = await apiURL("/user-entity/key");
const res = await fetch(url, {
method: "POST",
headers: await authenticatedRequestHeaders(),
body: JSON.stringify({ type, ...entityKey }),
});
ensureOk(res);
};

View File

@@ -1,4 +1,4 @@
import { getKV, removeKV, setKV } from "@/base/kv";
import { getKVS, removeKV, setKV } from "@/base/kv";
import log from "@/base/log";
export enum LS_KEYS {
@@ -78,7 +78,7 @@ export const migrateKVToken = async (user: unknown) => {
typeof oldLSUser == "object" &&
"token" in oldLSUser &&
typeof oldLSUser.token == "string" &&
!(await getKV("token"));
!(await getKVS("token"));
user &&
typeof user == "object" &&