diff --git a/web/packages/base/kv.ts b/web/packages/base/kv.ts index ba00b96d7b..56e47cf30f 100644 --- a/web/packages/base/kv.ts +++ b/web/packages/base/kv.ts @@ -9,12 +9,16 @@ import log from "./log"; * storage is limited to the main thread). * * The "kv" database consists of one object store, "kv". Keys are strings. - * Values can be strings or number or booleans. + * Values can be arbitrary JSON objects. */ interface KVDBSchema extends DBSchema { kv: { key: string; - value: string | number | boolean; + /** + * Typescript doesn't have a native JSON type, so this needs to be + * unknown + */ + value: unknown; }; } @@ -101,8 +105,16 @@ export const clearKVDB = async () => { /** * Return the string value stored corresponding to {@link key}, or `undefined` * if there is no such entry. + * + * Typescript doesn't have a native JSON type, so the return value is type as an + * `unknown`. For primitive types, you can avoid casting by using the + * {@link getKVS} (string), {@link getKVN} (number) or {@link getKVB} (boolean) + * methods that do an additional runtime check of the type. */ -export const getKV = async (key: string) => _getKV(key, "string"); +export const getKV = async (key: string) => { + const db = await kvDB(); + return db.get("kv", key); +}; export const _getKV = async ( key: string, @@ -113,11 +125,14 @@ export const _getKV = async ( if (v === undefined) return undefined; if (typeof v != type) throw new Error( - `Expected the value corresponding to key ${key} to be a ${type}, but instead got ${v}`, + `Expected the value corresponding to key ${key} to be a ${type}, but instead got ${String(v)}`, ); return v as T; }; +/** String variant of {@link getKV}. */ +export const getKVS = async (key: string) => _getKV(key, "string"); + /** Numeric variant of {@link getKV}. */ export const getKVN = async (key: string) => _getKV(key, "number"); @@ -127,8 +142,11 @@ export const getKVB = async (key: string) => _getKV(key, "boolean"); /** * Save the given {@link value} corresponding to {@link key}, overwriting any * existing value. + * + * @param value Any arbitrary JSON object. Typescript doesn't have a native JSON + * type, so this is typed as a unknown */ -export const setKV = async (key: string, value: string | number | boolean) => { +export const setKV = async (key: string, value: unknown) => { const db = await kvDB(); await db.put("kv", value, key); }; diff --git a/web/packages/base/local-user.ts b/web/packages/base/local-user.ts index d38ed5a69c..5a3eeafaac 100644 --- a/web/packages/base/local-user.ts +++ b/web/packages/base/local-user.ts @@ -2,7 +2,7 @@ import { ensure } from "@/utils/ensure"; import { z } from "zod"; -import { getKV } from "./kv"; +import { getKVS } from "./kv"; // TODO: During login the only field present is email. Which makes this // optionality indicated by these types incorrect. @@ -57,4 +57,4 @@ export const ensureLocalUser = (): LocalUser => { * The underlying data is stored in IndexedDB, and can be accessed from web * workers. */ -export const ensureAuthToken = async () => ensure(await getKV("token")); +export const ensureAuthToken = async () => ensure(await getKVS("token")); diff --git a/web/packages/base/origins.ts b/web/packages/base/origins.ts index d1474014a3..f904bebe4d 100644 --- a/web/packages/base/origins.ts +++ b/web/packages/base/origins.ts @@ -1,4 +1,4 @@ -import { getKV } from "@/base/kv"; +import { getKVS } from "@/base/kv"; /** * Return the origin (scheme, host, port triple) that should be used for making @@ -35,7 +35,7 @@ export const apiURL = async (path: string) => (await apiOrigin()) + path; * Otherwise return undefined. */ export const customAPIOrigin = async () => - (await getKV("apiOrigin")) ?? + (await getKVS("apiOrigin")) ?? process.env.NEXT_PUBLIC_ENTE_ENDPOINT ?? undefined; diff --git a/web/packages/new/photos/components/DevSettings.tsx b/web/packages/new/photos/components/DevSettings.tsx index 5a95933216..385acdc585 100644 --- a/web/packages/new/photos/components/DevSettings.tsx +++ b/web/packages/new/photos/components/DevSettings.tsx @@ -1,6 +1,6 @@ import { useIsMobileWidth } from "@/base/hooks"; import { ensureOk } from "@/base/http"; -import { getKV, removeKV, setKV } from "@/base/kv"; +import { getKVS, removeKV, setKV } from "@/base/kv"; import log from "@/base/log"; import InfoOutlinedIcon from "@mui/icons-material/InfoOutlined"; import { @@ -69,7 +69,8 @@ const Contents: React.FC = (props) => { >(); useEffect( - () => void getKV("apiOrigin").then((o) => setInitialAPIOrigin(o ?? "")), + () => + void getKVS("apiOrigin").then((o) => setInitialAPIOrigin(o ?? "")), [], ); diff --git a/web/packages/new/photos/services/migrations.ts b/web/packages/new/photos/services/migrations.ts index 0c4e18d9cf..d1d367c88a 100644 --- a/web/packages/new/photos/services/migrations.ts +++ b/web/packages/new/photos/services/migrations.ts @@ -29,11 +29,12 @@ import { deleteDB } from "idb"; */ export const runMigrations = async () => { const m = (await getKVN("migrationLevel")) ?? 0; - const latest = 2; + const latest = 3; if (m < latest) { log.info(`Running migrations ${m} => ${latest}`); - if (m < 1 && isDesktop) await m0(); - if (m < 2) await m1(); + if (m < 1 && isDesktop) await m1(); + if (m < 2) await m2(); + if (m < 3) await m3(); await setKV("migrationLevel", latest); } }; @@ -42,7 +43,7 @@ export const runMigrations = async () => { // almost all clients would've migrated over. // Added: Aug 2024 (v1.7.3). Prunable. -const m0 = () => +const m1 = () => Promise.all([ // Delete the legacy face DB v1. deleteDB("mldata"), @@ -67,7 +68,7 @@ const m0 = () => }); // Added: Sep 2024 (v1.7.5-beta). Prunable. -const m1 = () => +const m2 = () => // Older versions of the user-entities code kept the diff related state // in a different place. These entries are not needed anymore (the tags // themselves will get resynced). @@ -75,4 +76,27 @@ const m1 = () => localForage.removeItem("location_tags"), localForage.removeItem("location_tags_key"), localForage.removeItem("location_tags_time"), + + // Remove data from an intermediate format that stored user-entities + // piecewise instead of as generic, verbatim, entities. + removeKV("locationTags"), + removeKV("entityKey/locationTags"), + removeKV("latestUpdatedAt/locationTags"), + ]); + +// Added: Sep 2024 (v1.7.5-beta). Prunable. +const m3 = () => + Promise.all([ + // Delete the legacy face DB v1. + // + // This was already removed in m1, but that was behind an isDesktop + // check, but later I found out that old web versions also created this + // DB (although empty). + deleteDB("mldata"), + + // Remove data from an intermediate format that stored user-entities in + // their parsed form instead of as generic, verbatim, entities. + removeKV("locationTags"), + removeKV("entityKey/location"), + removeKV("latestUpdatedAt/location"), ]); diff --git a/web/packages/new/photos/services/ml/cluster.ts b/web/packages/new/photos/services/ml/cluster.ts index 6fa917f2eb..08a566e464 100644 --- a/web/packages/new/photos/services/ml/cluster.ts +++ b/web/packages/new/photos/services/ml/cluster.ts @@ -130,6 +130,8 @@ export const clusterFaces = async ( // The resultant clusters. // TODO-Cluster Later on, instead of starting from a blank slate, this will // be list of existing clusters we fetch from remote. + // - fetchRemoteClusterFeedback + // [..local, ..remote] let clusters: FaceCluster[] = []; // Process the faces in batches, but keep an overlap between batches to @@ -188,6 +190,10 @@ export const clusterFaces = async ( `Generated ${sortedClusters.length} clusters from ${faces.length} faces (${clusteredFaceCount} clustered ${faces.length - clusteredFaceCount} unclustered) (${timeTakenMs} ms)`, ); + // reconcileClusters + // Save local + // updated map -> remote - Put + return { clusters: sortedClusters, people }; }; diff --git a/web/packages/new/photos/services/ml/db.ts b/web/packages/new/photos/services/ml/db.ts index 893ca8ddc1..1894fef33d 100644 --- a/web/packages/new/photos/services/ml/db.ts +++ b/web/packages/new/photos/services/ml/db.ts @@ -431,6 +431,7 @@ export const setFaceClusters = async (clusters: FaceCluster[]) => { * - A cgroup, in which case it should add or overwrite the entry for the * corresponding cluster group (as identified by its {@link id}). */ +// TODO-Cluster delete me export const applyCGroupDiff = async (diff: (string | CGroup)[]) => { const db = await mlDB(); const tx = db.transaction("cluster-group", "readwrite"); diff --git a/web/packages/new/photos/services/ml/ml-data.ts b/web/packages/new/photos/services/ml/ml-data.ts index 9f03c618cd..fbb5709654 100644 --- a/web/packages/new/photos/services/ml/ml-data.ts +++ b/web/packages/new/photos/services/ml/ml-data.ts @@ -3,8 +3,8 @@ import log from "@/base/log"; import type { EnteFile } from "@/new/photos/types/file"; import { nullToUndefined } from "@/utils/transform"; import { z } from "zod"; +import { gunzip, gzip } from "../../utils/gzip"; import { fetchFileData, putFileData } from "../file-data"; -import { gunzip, gzip } from "../gzip"; import { type RemoteCLIPIndex } from "./clip"; import { type RemoteFaceIndex } from "./face"; diff --git a/web/packages/new/photos/services/ml/people.ts b/web/packages/new/photos/services/ml/people.ts index f33e27e53c..5eba77a5b0 100644 --- a/web/packages/new/photos/services/ml/people.ts +++ b/web/packages/new/photos/services/ml/people.ts @@ -2,9 +2,9 @@ import { masterKeyFromSession } from "@/base/session-store"; import { wipClusterEnable } from "."; import type { EnteFile } from "../../types/file"; import { getLocalFiles } from "../files"; -import { pullCGroups } from "../user-entity"; +import { pullUserEntities, savedCGroups } from "../user-entity"; import type { FaceCluster } from "./cluster"; -import { getClusterGroups, getFaceIndexes } from "./db"; +import { getFaceIndexes } from "./db"; import { fileIDFromFaceID } from "./face"; /** @@ -131,7 +131,7 @@ export const syncCGroups = async () => { if (!(await wipClusterEnable())) return; const masterKey = await masterKeyFromSession(); - await pullCGroups(masterKey); + await pullUserEntities("cgroup", masterKey); }; export type NamedPerson = Omit & { @@ -180,9 +180,9 @@ export const namedPeopleFromCGroups = async (): Promise => { } // Convert cgroups to people. - const cgroups = await getClusterGroups(); + const cgroups = await savedCGroups(); return cgroups - .map((cgroup) => { + .map(({ id, data: cgroup }) => { // Hidden cgroups are clusters specifically marked so as to not be shown // in the UI. if (cgroup.isHidden) return undefined; @@ -229,8 +229,6 @@ export const namedPeopleFromCGroups = async (): Promise => { displayFaceFile = highestScoringFace.file; } - const id = cgroup.id; - return { id, name, fileIDs, displayFaceID, displayFaceFile }; }) .filter((c) => !!c) diff --git a/web/packages/new/photos/services/search/worker.ts b/web/packages/new/photos/services/search/worker.ts index 9ba9ea8762..ed6b4df859 100644 --- a/web/packages/new/photos/services/search/worker.ts +++ b/web/packages/new/photos/services/search/worker.ts @@ -12,7 +12,7 @@ import * as chrono from "chrono-node"; import { expose } from "comlink"; import { z } from "zod"; import { - pullLocationTags, + pullUserEntities, savedLocationTags, type LocationTag, } from "../user-entity"; @@ -47,7 +47,7 @@ export class SearchWorker { */ async sync(masterKey: Uint8Array) { return Promise.all([ - pullLocationTags(masterKey) + pullUserEntities("location", masterKey) .then(() => savedLocationTags()) .then((ts) => (this.locationTags = ts)), fetchCities().then((cs) => (this.cities = cs)), diff --git a/web/packages/new/photos/services/user-entity.ts b/web/packages/new/photos/services/user-entity.ts deleted file mode 100644 index 67b8eaf744..0000000000 --- a/web/packages/new/photos/services/user-entity.ts +++ /dev/null @@ -1,489 +0,0 @@ -import { - decryptBlob, - decryptBoxB64, - encryptBoxB64, - generateNewBlobOrStreamKey, -} from "@/base/crypto"; -import { authenticatedRequestHeaders, ensureOk, HTTPError } from "@/base/http"; -import { getKV, getKVN, setKV } from "@/base/kv"; -import { apiURL } from "@/base/origins"; -import { ensure } from "@/utils/ensure"; -import { nullToUndefined } from "@/utils/transform"; -import { z } from "zod"; -import { gunzip } from "./gzip"; -import { applyCGroupDiff } from "./ml/db"; -import type { CGroup } from "./ml/people"; - -/** - * User entities are predefined lists of otherwise arbitrary data that the user - * can store for their account. - * - * e.g. location tags, cluster groups. - */ -export type EntityType = - /** - * A location tag. - * - * The entity data is base64(encrypt(json)) - */ - | "location" - /** - * A cluster group. - * - * The entity data is base64(encrypt(gzip(json))) - */ - | "cgroup"; - -/** - * Update our local location tags with changes from remote. - * - * This function fetches all the location tag user entities from remote and - * updates our local database. It uses local state to remember the latest entry - * the last time it did a pull, so each subsequent pull is a lightweight diff. - * - * @param masterKey The user's master key. This is used to encrypt and decrypt - * the location tags specific entity key. - */ -export const pullLocationTags = async (masterKey: Uint8Array) => { - const decoder = new TextDecoder(); - const parse = (id: string, data: Uint8Array): LocationTag => ({ - id, - ...RemoteLocationTag.parse(JSON.parse(decoder.decode(data))), - }); - - const processBatch = async (entities: UserEntityChange[]) => { - const existingTagsByID = new Map( - (await savedLocationTags()).map((t) => [t.id, t]), - ); - entities.forEach(({ id, data }) => - data - ? existingTagsByID.set(id, parse(id, data)) - : existingTagsByID.delete(id), - ); - return saveLocationTags([...existingTagsByID.values()]); - }; - - return pullUserEntities("location", masterKey, processBatch); -}; - -/** Zod schema for the tag that we get from or put to remote. */ -const RemoteLocationTag = z.object({ - name: z.string(), - radius: z.number(), - centerPoint: z.object({ - latitude: z.number(), - longitude: z.number(), - }), -}); - -/** Zod schema for the tag that we persist locally. */ -const LocalLocationTag = RemoteLocationTag.extend({ - id: z.string(), -}); - -export type LocationTag = z.infer; - -const saveLocationTags = (tags: LocationTag[]) => - setKV("locationTags", JSON.stringify(tags)); - -/** - * Return all the location tags that are present locally. - * - * Use {@link pullLocationTags} to synchronize this list with remote. - */ -export const savedLocationTags = async () => - LocalLocationTag.array().parse( - JSON.parse((await getKV("locationTags")) ?? "[]"), - ); - -/** - * Update our local cgroups with changes from remote. - * - * This fetches all the user entities corresponding to the "cgroup" entity type - * from remote that have been created, updated or deleted since the last time we - * checked. - * - * This diff is then applied to the data we have persisted locally. - * - * @param masterKey The user's master key. This is used to encrypt and decrypt - * the cgroup specific entity key. - */ -export const pullCGroups = (masterKey: Uint8Array) => { - // See: [Note: strict mode migration] - // - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore - const parse = async (id: string, data: Uint8Array): Promise => ({ - id, - name: undefined, - avatarFaceID: undefined, - ...RemoteCGroup.parse(JSON.parse(await gunzip(data))), - }); - - const processBatch = async (entities: UserEntityChange[]) => - await applyCGroupDiff( - await Promise.all( - entities.map(async ({ id, data }) => - data ? await parse(id, data) : id, - ), - ), - ); - - return pullUserEntities("cgroup", masterKey, processBatch); -}; - -const RemoteFaceCluster = z.object({ - id: z.string(), - faces: z.string().array(), -}); - -const RemoteCGroup = z.object({ - name: z.string().nullish().transform(nullToUndefined), - assigned: z.array(RemoteFaceCluster), - // The remote cgroup also has a "rejected" property, but that is not - // currently used by any of the clients. - isHidden: z.boolean(), - avatarFaceID: z.string().nullish().transform(nullToUndefined), -}); - -/** - * The maximum number of items to fetch in a single diff - * - * [Note: Limit of returned items in /diff requests] - * - * The various GET /diff API methods, which tell the client what all has changed - * since a timestamp (provided by the client) take a limit parameter. - * - * These diff API calls return all items whose updated at is greater - * (non-inclusive) than the timestamp we provide. So there is no mechanism for - * pagination of items which have the exact same updated at. - * - * Conceptually, it may happen that there are more items than the limit we've - * provided, but there are practical safeguards. - * - * For file diff, the limit is advisory, and remote may return less, equal or - * more items than the provided limit. The scenario where it returns more is - * when more files than the limit have the same updated at. Theoretically it - * would make the diff response unbounded, however in practice file - * modifications themselves are all batched. Even if the user were to select all - * the files in their library and updates them all in one go in the UI, their - * client app is required to use batched API calls to make those updates, and - * each of those batches would get distinct updated at. - */ -const defaultDiffLimit = 500; - -/** - * An entry in the user entity diff. - * - * Each change either contains the latest data associated with a particular user - * entity that has been created or updated, or indicates that the corresponding - * entity has been deleted. - */ -interface UserEntityChange { - /** - * A UUID or nanoid of the entity. - */ - id: string; - /** - * Arbitrary (decrypted) data associated with the entity. The format of this - * data is specific to each entity type. - * - * This will not be present for entities that have been deleted on remote. - */ - data: Uint8Array | undefined; - /** - * Epoch microseconds denoting when this entity was last changed (created or - * updated or deleted). - */ - updatedAt: number; -} - -/** - * Sync of the given {@link type} entities that we have locally with remote. - * - * This fetches all the user entities of {@link type} from remote that have been - * created, updated or deleted since the last time we checked. - * - * For each diff response, the {@link processBatch} is invoked to give a chance - * to caller to apply the updates to the data we have persisted locally. - * - * The user's {@link masterKey} is used to decrypt (or encrypt, when generating - * a new one) the entity key. - */ -const pullUserEntities = async ( - type: EntityType, - masterKey: Uint8Array, - processBatch: (entities: UserEntityChange[]) => Promise, -) => { - const entityKeyB64 = await getOrCreateEntityKeyB64(type, masterKey); - - let sinceTime = (await savedLatestUpdatedAt(type)) ?? 0; - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition - while (true) { - const entities = await userEntityDiff(type, sinceTime, entityKeyB64); - if (entities.length == 0) break; - - await processBatch(entities); - - sinceTime = entities.reduce( - (max, entity) => Math.max(max, entity.updatedAt), - sinceTime, - ); - await saveLatestUpdatedAt(type, sinceTime); - } -}; - -/** - * Zod schema for a item in the user entity diff. - */ -const RemoteUserEntityChange = z.object({ - id: z.string(), - /** - * Base64 string containing the encrypted contents of the entity. - * - * Will be `null` when isDeleted is true. - */ - encryptedData: z.string().nullable(), - /** - * Base64 string containing the decryption header. - * - * Will be `null` when isDeleted is true. - */ - header: z.string().nullable(), - isDeleted: z.boolean(), - updatedAt: z.number(), -}); - -/** - * Fetch the next set of changes (upsert or deletion) to user entities of the - * given type since the given time. - * - * @param type The type of the entities to fetch. - * - * @param sinceTime Epoch milliseconds. This is used to ask remote to provide us - * only entities whose {@link updatedAt} is more than the given value. Set this - * to zero to start from the beginning. - * - * @param entityKeyB64 The base64 encoded key to use for decrypting the - * encrypted contents of the user entity. - * - * [Note: Diff response will have at most one entry for an id] - * - * Unlike git diffs which track all changes, the diffs we get from remote are - * guaranteed to contain only one entry (upsert or delete) for a particular Ente - * object. This holds true irrespective of the diff limit. - * - * For example, in a user entity diff, it is guaranteed that there will only be - * at max one entry for a particular entity id. The entry will have no data to - * indicate that the corresponding entity was deleted. Otherwise, when the data - * is present, it is taken as the creation of a new entity or the updation of an - * existing one. - * - * This behaviour comes from how remote stores the underlying, say, entities. A - * diff returns just entities whose updation times greater than the provided - * since time (limited to the given diff limit). So there will be at most one - * row for a particular entity id. And if that entity has been deleted, then the - * row will be a tombstone, so data be absent. - */ -const userEntityDiff = async ( - type: EntityType, - sinceTime: number, - entityKeyB64: string, -): Promise => { - const decrypt = (encryptedData: string, decryptionHeader: string) => - decryptBlob({ encryptedData, decryptionHeader }, entityKeyB64); - - const params = new URLSearchParams({ - type, - sinceTime: sinceTime.toString(), - limit: defaultDiffLimit.toString(), - }); - const url = await apiURL(`/user-entity/entity/diff`); - const res = await fetch(`${url}?${params.toString()}`, { - headers: await authenticatedRequestHeaders(), - }); - ensureOk(res); - const diff = z - .object({ diff: z.array(RemoteUserEntityChange) }) - .parse(await res.json()).diff; - return Promise.all( - diff.map( - async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({ - id, - data: !isDeleted - ? await decrypt(ensure(encryptedData), ensure(header)) - : undefined, - updatedAt, - }), - ), - ); -}; - -/** - * Return the entity key that can be used to decrypt the encrypted contents of - * user entities of the given {@link type}. - * - * 1. See if we have the encrypted entity key present locally. If so, return - * the entity key by decrypting it using with the user's master key. - * - * 2. Otherwise fetch the encrypted entity key for that type from remote. If we - * get one, obtain the entity key by decrypt the encrypted one using the - * user's master key, save it locally for future use, and return it. - * - * 3. Otherwise generate a new entity key, encrypt it using the user's master - * key, putting the encrypted one to remote and also saving it locally, and - * return it. - * - * See also, [Note: User entity keys]. - */ -const getOrCreateEntityKeyB64 = async ( - type: EntityType, - masterKey: Uint8Array, -) => { - // See if we already have it locally. - const saved = await savedRemoteUserEntityKey(type); - if (saved) return decryptEntityKey(saved, masterKey); - - // See if remote already has it. - const existing = await getUserEntityKey(type); - if (existing) { - // Only save it if we can decrypt it to avoid corrupting our local state - // in unforeseen circumstances. - const result = await decryptEntityKey(existing, masterKey); - await saveRemoteUserEntityKey(type, existing); - return result; - } - - // Nada. Create a new one, put it to remote, save it locally, and return. - - // As a sanity check, genarate the key but immediately encrypt it as if it - // were fetched from remote and then try to decrypt it before doing anything - // with it. - const generated = await generateNewEncryptedEntityKey(masterKey); - const result = decryptEntityKey(generated, masterKey); - await postUserEntityKey(type, generated); - await saveRemoteUserEntityKey(type, generated); - return result; -}; - -const entityKeyKey = (type: EntityType) => `entityKey/${type}`; - -/** - * Return the locally persisted {@link RemoteUserEntityKey}, if any, - * corresponding the given {@link type}. - */ -const savedRemoteUserEntityKey = ( - type: EntityType, -): Promise => - getKV(entityKeyKey(type)).then((s) => - s ? RemoteUserEntityKey.parse(JSON.parse(s)) : undefined, - ); - -/** - * Setter for {@link entityKey}. - */ -const saveRemoteUserEntityKey = ( - type: EntityType, - entityKey: RemoteUserEntityKey, -) => setKV(entityKeyKey(type), JSON.stringify(entityKey)); - -/** - * Generate a new entity key and return it in the shape of an - * {@link RemoteUserEntityKey} after encrypting it using the user's master key. - */ -const generateNewEncryptedEntityKey = async (masterKey: Uint8Array) => { - const { encryptedData, nonce } = await encryptBoxB64( - await generateNewBlobOrStreamKey(), - masterKey, - ); - // Remote calls it the header, but it really is the nonce. - return { encryptedKey: encryptedData, header: nonce }; -}; - -/** - * Decrypt an encrypted entity key using the user's master key. - */ -const decryptEntityKey = async ( - remote: RemoteUserEntityKey, - masterKey: Uint8Array, -) => - decryptBoxB64( - { - encryptedData: remote.encryptedKey, - // Remote calls it the header, but it really is the nonce. - nonce: remote.header, - }, - masterKey, - ); - -/** - * Fetch the encryption key for the given user entity {@link type} from remote. - * - * [Note: User entity keys] - * - * There is one encryption key (itself encrypted with the user's master key) for - * each user entity type. If the key doesn't exist on remote, then the client is - * expected to create one on the user's behalf. Remote will disallow attempts to - * multiple keys for the same user entity type. - */ -const getUserEntityKey = async ( - type: EntityType, -): Promise => { - const params = new URLSearchParams({ type }); - const url = await apiURL("/user-entity/key"); - const res = await fetch(`${url}?${params.toString()}`, { - headers: await authenticatedRequestHeaders(), - }); - if (!res.ok) { - // Remote says HTTP 404 Not Found if there is no key yet for the user. - if (res.status == 404) return undefined; - throw new HTTPError(res); - } else { - return RemoteUserEntityKey.parse(await res.json()); - } -}; - -const RemoteUserEntityKey = z.object({ - /** Base64 encoded entity key, encrypted with the user's master key. */ - encryptedKey: z.string(), - /** Base64 encoded nonce used during encryption of this entity key. */ - header: z.string(), -}); - -type RemoteUserEntityKey = z.infer; - -/** - * Create a new encryption key for the given user entity {@link type} on remote. - * - * See: [Note: User entity keys] - */ -const postUserEntityKey = async ( - type: EntityType, - entityKey: RemoteUserEntityKey, -) => { - const url = await apiURL("/user-entity/key"); - const res = await fetch(url, { - method: "POST", - headers: await authenticatedRequestHeaders(), - body: JSON.stringify({ type, ...entityKey }), - }); - ensureOk(res); -}; - -const latestUpdatedAtKey = (type: EntityType) => `latestUpdatedAt/${type}`; - -/** - * Return the locally persisted value for the latest `updatedAt` time for the - * given entity {@link type}. - * - * This is used to checkpoint diffs, so that we can resume fetching from the - * last time we did a fetch. - */ -const savedLatestUpdatedAt = (type: EntityType) => - getKVN(latestUpdatedAtKey(type)); - -/** - * Setter for {@link savedLatestUpdatedAt}. - */ -const saveLatestUpdatedAt = (type: EntityType, value: number) => - setKV(latestUpdatedAtKey(type), value); diff --git a/web/packages/new/photos/services/user-entity/db.ts b/web/packages/new/photos/services/user-entity/db.ts new file mode 100644 index 0000000000..f3d5eebcd7 --- /dev/null +++ b/web/packages/new/photos/services/user-entity/db.ts @@ -0,0 +1,97 @@ +import { getKV, getKVN, setKV } from "@/base/kv"; +import { z } from "zod"; +import { type EntityType } from "."; +import { RemoteUserEntityKey } from "./remote"; + +// Our DB footprint--- +// +// All these are stored in the kv db. + +const entitiesKey = (type: EntityType) => `entity/${type}`; +const entityKeyKey = (type: EntityType) => `entity/${type}/key`; +const latestUpdatedAtKey = (type: EntityType) => `entity/${type}/time`; + +// ^--- + +/** + * A locally persisted user entity. + */ +export interface LocalUserEntity { + /** + * The UUID or nanoid of the entity. + */ + id: string; + /** + * Arbitrary (decrypted) data associated with the entity. The format of this + * data is specific to each entity type, but it is guaranteed to be JSON + * serializable. + */ + data: unknown; + /** + * Epoch microseconds denoting when this entity was last changed (created or + * updated). + */ + updatedAt: number; +} + +const LocalUserEntity = z.object({ + id: z.string(), + // Retain the data verbatim. + data: z.object({}).passthrough(), + updatedAt: z.number(), +}); + +/** + * Update the list of locally persisted user entities of the given {@link type}. + */ +export const saveEntities = (type: EntityType, items: LocalUserEntity[]) => + setKV(entitiesKey(type), items); + +/** + * Return the list of locally persisted user entities of the given {@link type}. + */ +export const savedEntities = async ( + type: EntityType, +): Promise => + // See: [Note: strict mode migration] + // + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore + LocalUserEntity.array().parse((await getKV(entitiesKey(type))) ?? []); + +/** + * Save the {@link entityKey} for the given user entity {@link type} to our + * local database. + */ +export const saveRemoteUserEntityKey = ( + type: EntityType, + entityKey: RemoteUserEntityKey, +) => setKV(entityKeyKey(type), entityKey); + +/** + * Return the locally persisted {@link RemoteUserEntityKey}, if any, + * corresponding the given user entity {@link type}. + */ +export const savedRemoteUserEntityKey = ( + type: EntityType, +): Promise => + getKV(entityKeyKey(type)).then((s) => + s ? RemoteUserEntityKey.parse(s) : undefined, + ); + +/** + * Save the latest `updatedAt` {@link value} for the given user entity + * {@link type} to our local database. + */ +export const saveLatestUpdatedAt = (type: EntityType, value: number) => + setKV(latestUpdatedAtKey(type), value); + +/** + * Return the locally persisted value for the latest `updatedAt` time for the + * given user entity {@link type}. + * + * This is used to checkpoint diffs, so that we can resume fetching from the + * last time we did a fetch. + */ +export const savedLatestUpdatedAt = (type: EntityType) => + getKVN(latestUpdatedAtKey(type)); diff --git a/web/packages/new/photos/services/user-entity/index.ts b/web/packages/new/photos/services/user-entity/index.ts new file mode 100644 index 0000000000..35e3709288 --- /dev/null +++ b/web/packages/new/photos/services/user-entity/index.ts @@ -0,0 +1,223 @@ +import { + decryptBoxB64, + encryptBoxB64, + generateNewBlobOrStreamKey, +} from "@/base/crypto"; +import { nullToUndefined } from "@/utils/transform"; +import { z } from "zod"; +import { gunzip } from "../../utils/gzip"; +import { + savedEntities, + savedLatestUpdatedAt, + savedRemoteUserEntityKey, + saveEntities, + saveLatestUpdatedAt, + saveRemoteUserEntityKey, + type LocalUserEntity, +} from "./db"; +import { + getUserEntityKey, + postUserEntityKey, + RemoteUserEntityKey, + userEntityDiff, +} from "./remote"; + +/** + * User entities are predefined lists of otherwise arbitrary data that the user + * can store for their account. + * + * e.g. location tags, cluster groups. + */ +export type EntityType = + /** + * A location tag. + * + * The entity data is base64(encrypt(json)) + */ + | "location" + /** + * A cluster group. + * + * The entity data is base64(encrypt(gzip(json))) + */ + | "cgroup"; + +/** + * Zod schema for the fields of interest in the location tag that we get from + * remote. + */ +const RemoteLocationTag = z.object({ + name: z.string(), + radius: z.number(), + centerPoint: z.object({ + latitude: z.number(), + longitude: z.number(), + }), +}); + +/** + * A view of the location tag data suitable for use by the rest of the app. + */ +export type LocationTag = z.infer; + +/** + * Return the list of locally available location tags. + */ +export const savedLocationTags = (): Promise => + savedEntities("location").then((es) => + es.map((e) => RemoteLocationTag.parse(e.data)), + ); + +const RemoteFaceCluster = z.object({ + id: z.string(), + faces: z.string().array(), +}); + +/** + * Zod schema for the fields of interest in the cgroup that we get from remote. + */ +const RemoteCGroup = z.object({ + name: z.string().nullish().transform(nullToUndefined), + assigned: z.array(RemoteFaceCluster), + // The remote cgroup also has a "rejected" property, but that is not + // currently used by any of the clients. + isHidden: z.boolean(), + avatarFaceID: z.string().nullish().transform(nullToUndefined), +}); + +export type RemoteCGroup = z.infer; + +export type CGroupUserEntity = Omit & { + data: RemoteCGroup; +}; + +/** + * Return the list of locally available cgroup user entities. + */ +export const savedCGroups = (): Promise => + savedEntities("cgroup").then((es) => + es.map((e) => ({ ...e, data: RemoteCGroup.parse(e.data) })), + ); + +/** + * Update our local entities of the given {@link type} by pulling the latest + * changes from remote. + * + * This fetches all the user entities corresponding to the given user entity + * type from remote that have been created, updated or deleted since the last + * time we checked. + * + * This diff is then applied to the data we have persisted locally. + * + * It uses local state to remember the latest entry the last time it did a pull, + * so each subsequent pull is a lightweight diff. + * + * @param masterKey The user's masterKey, which is is used to decrypt the entity + * key (or encrypt it, when generating a new one). + */ +export const pullUserEntities = async ( + type: EntityType, + masterKey: Uint8Array, +) => { + const entityKeyB64 = await getOrCreateEntityKeyB64(type, masterKey); + + const isGzipped = type == "cgroup"; + + let sinceTime = (await savedLatestUpdatedAt(type)) ?? 0; + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition + while (true) { + const diff = await userEntityDiff(type, sinceTime, entityKeyB64); + if (diff.length == 0) break; + + const entityByID = new Map( + (await savedEntities(type)).map((e) => [e.id, e]), + ); + + for (const { id, data, updatedAt } of diff) { + if (data) { + const s = isGzipped + ? await gunzip(data) + : new TextDecoder().decode(data); + entityByID.set(id, { id, data: JSON.parse(s), updatedAt }); + } else { + entityByID.delete(id); + } + sinceTime = Math.max(sinceTime, updatedAt); + } + + await saveEntities(type, [...entityByID.values()]); + await saveLatestUpdatedAt(type, sinceTime); + } +}; + +/** + * Return the entity key that can be used to decrypt the encrypted contents of + * user entities of the given {@link type}. + * + * 1. See if we have the encrypted entity key present locally. If so, return + * the entity key by decrypting it using with the user's master key. + * + * 2. Otherwise fetch the encrypted entity key for that type from remote. If we + * get one, obtain the entity key by decrypt the encrypted one using the + * user's master key, save it locally for future use, and return it. + * + * 3. Otherwise generate a new entity key, encrypt it using the user's master + * key, putting the encrypted one to remote and also saving it locally, and + * return it. + * + * See also, [Note: User entity keys]. + */ +const getOrCreateEntityKeyB64 = async ( + type: EntityType, + masterKey: Uint8Array, +) => { + // See if we already have it locally. + const saved = await savedRemoteUserEntityKey(type); + if (saved) return decryptEntityKey(saved, masterKey); + + // See if remote already has it. + const existing = await getUserEntityKey(type); + if (existing) { + // Only save it if we can decrypt it to avoid corrupting our local state + // in unforeseen circumstances. + const result = await decryptEntityKey(existing, masterKey); + await saveRemoteUserEntityKey(type, existing); + return result; + } + + // Nada. Create a new one, put it to remote, save it locally, and return. + + // As a sanity check, genarate the key but immediately encrypt it as if it + // were fetched from remote and then try to decrypt it before doing anything + // with it. + const generated = await generateNewEncryptedEntityKey(masterKey); + const result = decryptEntityKey(generated, masterKey); + await postUserEntityKey(type, generated); + await saveRemoteUserEntityKey(type, generated); + return result; +}; + +const generateNewEncryptedEntityKey = async (masterKey: Uint8Array) => { + const { encryptedData, nonce } = await encryptBoxB64( + await generateNewBlobOrStreamKey(), + masterKey, + ); + // Remote calls it the header, but it really is the nonce. + return { encryptedKey: encryptedData, header: nonce }; +}; + +/** + * Decrypt an encrypted entity key using the user's master key. + */ +const decryptEntityKey = async ( + remote: RemoteUserEntityKey, + masterKey: Uint8Array, +) => + decryptBoxB64( + { + encryptedData: remote.encryptedKey, + // Remote calls it the header, but it really is the nonce. + nonce: remote.header, + }, + masterKey, + ); diff --git a/web/packages/new/photos/services/user-entity/remote.ts b/web/packages/new/photos/services/user-entity/remote.ts new file mode 100644 index 0000000000..c32e253af4 --- /dev/null +++ b/web/packages/new/photos/services/user-entity/remote.ts @@ -0,0 +1,202 @@ +import { decryptBlob } from "@/base/crypto"; +import { authenticatedRequestHeaders, ensureOk, HTTPError } from "@/base/http"; +import { apiURL } from "@/base/origins"; +import { ensure } from "@/utils/ensure"; +import { z } from "zod"; +import type { EntityType } from "."; + +/** + * The maximum number of items to fetch in a single diff + * + * [Note: Limit of returned items in /diff requests] + * + * The various GET /diff API methods, which tell the client what all has changed + * since a timestamp (provided by the client) take a limit parameter. + * + * These diff API calls return all items whose updated at is greater + * (non-inclusive) than the timestamp we provide. So there is no mechanism for + * pagination of items which have the exact same updated at. + * + * Conceptually, it may happen that there are more items than the limit we've + * provided, but there are practical safeguards. + * + * For file diff, the limit is advisory, and remote may return less, equal or + * more items than the provided limit. The scenario where it returns more is + * when more files than the limit have the same updated at. Theoretically it + * would make the diff response unbounded, however in practice file + * modifications themselves are all batched. Even if the user were to select all + * the files in their library and updates them all in one go in the UI, their + * client app is required (with server side enforcement) to use batched API + * calls to make those updates. Thus, each of those batches would get distinct + * updated at values. + * + * For entity diff, there are no bulk operations yet that can result in the + * assignment of the same microsecond to hundreds of items. + */ +const defaultDiffLimit = 500; + +/** + * An entry in the user entity diff. + * + * Each change either contains the latest data associated with a particular user + * entity that has been created or updated, or indicates that the corresponding + * entity has been deleted. + */ +export interface UserEntityChange { + /** + * The UUID or nanoid of the entity. + */ + id: string; + /** + * Arbitrary (decrypted) data associated with the entity. The format of this + * data is specific to each entity type. + * + * This will not be present for entities that have been deleted on remote. + */ + data: Uint8Array | undefined; + /** + * Epoch microseconds denoting when this entity was last changed (created or + * updated or deleted). + */ + updatedAt: number; +} + +/** + * Zod schema for a item in the user entity diff. + */ +const RemoteUserEntityChange = z.object({ + id: z.string(), + /** + * Base64 string containing the encrypted contents of the entity. + * + * Will be `null` when isDeleted is true. + */ + encryptedData: z.string().nullable(), + /** + * Base64 string containing the decryption header. + * + * Will be `null` when isDeleted is true. + */ + header: z.string().nullable(), + isDeleted: z.boolean(), + updatedAt: z.number(), +}); + +/** + * Fetch the next set of changes (upserts or deletions) to user entities of the + * given type since the given time. + * + * @param type The type of the entities to fetch. + * + * @param sinceTime Epoch milliseconds. This is used to ask remote to provide us + * only entities whose {@link updatedAt} is more than the given value. Set this + * to zero to start from the beginning. + * + * @param entityKeyB64 The base64 encoded key to use for decrypting the + * encrypted contents of the user entity. + * + * [Note: Diff response will have at most one entry for an id] + * + * Unlike git diffs which track all changes, the diffs we get from remote are + * guaranteed to contain only one entry (upsert or delete) for a particular Ente + * object. This holds true irrespective of the diff limit. + * + * For example, in a user entity diff, it is guaranteed that there will only be + * at max one entry for a particular entity id. The entry will have no data to + * indicate that the corresponding entity was deleted. Otherwise, when the data + * is present, it is taken as the creation of a new entity or the updation of an + * existing one. + * + * This behaviour comes from how remote stores the underlying, say, entities. A + * diff returns just entities whose updation times greater than the provided + * since time (limited to the given diff limit). So there will be at most one + * row for a particular entity id. And if that entity has been deleted, then the + * row will be a tombstone, so data be absent. + */ +export const userEntityDiff = async ( + type: EntityType, + sinceTime: number, + entityKeyB64: string, +): Promise => { + const decrypt = (encryptedData: string, decryptionHeader: string) => + decryptBlob({ encryptedData, decryptionHeader }, entityKeyB64); + + const params = new URLSearchParams({ + type, + sinceTime: sinceTime.toString(), + limit: defaultDiffLimit.toString(), + }); + const url = await apiURL(`/user-entity/entity/diff`); + const res = await fetch(`${url}?${params.toString()}`, { + headers: await authenticatedRequestHeaders(), + }); + ensureOk(res); + const diff = z + .object({ diff: z.array(RemoteUserEntityChange) }) + .parse(await res.json()).diff; + return Promise.all( + diff.map( + async ({ id, encryptedData, header, isDeleted, updatedAt }) => ({ + id, + data: !isDeleted + ? await decrypt(ensure(encryptedData), ensure(header)) + : undefined, + updatedAt, + }), + ), + ); +}; + +/** + * Fetch the encryption key for the given user entity {@link type} from remote. + * + * [Note: User entity keys] + * + * There is one encryption key (itself encrypted with the user's master key) for + * each user entity type. If the key doesn't exist on remote, then the client is + * expected to create one on the user's behalf. Remote will disallow attempts to + * multiple keys for the same user entity type. + */ +export const getUserEntityKey = async ( + type: EntityType, +): Promise => { + const params = new URLSearchParams({ type }); + const url = await apiURL("/user-entity/key"); + const res = await fetch(`${url}?${params.toString()}`, { + headers: await authenticatedRequestHeaders(), + }); + if (!res.ok) { + // Remote says HTTP 404 Not Found if there is no key yet for the user. + if (res.status == 404) return undefined; + throw new HTTPError(res); + } else { + return RemoteUserEntityKey.parse(await res.json()); + } +}; + +export const RemoteUserEntityKey = z.object({ + /** Base64 encoded entity key, encrypted with the user's master key. */ + encryptedKey: z.string(), + /** Base64 encoded nonce used during encryption of this entity key. */ + header: z.string(), +}); + +export type RemoteUserEntityKey = z.infer; + +/** + * Create a new encryption key for the given user entity {@link type} on remote. + * + * See: [Note: User entity keys] + */ +export const postUserEntityKey = async ( + type: EntityType, + entityKey: RemoteUserEntityKey, +) => { + const url = await apiURL("/user-entity/key"); + const res = await fetch(url, { + method: "POST", + headers: await authenticatedRequestHeaders(), + body: JSON.stringify({ type, ...entityKey }), + }); + ensureOk(res); +}; diff --git a/web/packages/new/photos/services/gzip.ts b/web/packages/new/photos/utils/gzip.ts similarity index 100% rename from web/packages/new/photos/services/gzip.ts rename to web/packages/new/photos/utils/gzip.ts diff --git a/web/packages/shared/storage/localStorage/index.ts b/web/packages/shared/storage/localStorage/index.ts index a90c1d838b..2ba7e17f28 100644 --- a/web/packages/shared/storage/localStorage/index.ts +++ b/web/packages/shared/storage/localStorage/index.ts @@ -1,4 +1,4 @@ -import { getKV, removeKV, setKV } from "@/base/kv"; +import { getKVS, removeKV, setKV } from "@/base/kv"; import log from "@/base/log"; export enum LS_KEYS { @@ -78,7 +78,7 @@ export const migrateKVToken = async (user: unknown) => { typeof oldLSUser == "object" && "token" in oldLSUser && typeof oldLSUser.token == "string" && - !(await getKV("token")); + !(await getKVS("token")); user && typeof user == "object" &&