diff --git a/web/packages/new/photos/services/ml/embedding.ts b/web/packages/new/photos/services/ml/embedding.ts index 6c9e9ad140..743336e035 100644 --- a/web/packages/new/photos/services/ml/embedding.ts +++ b/web/packages/new/photos/services/ml/embedding.ts @@ -47,26 +47,6 @@ import { type RemoteFaceIndex } from "./face"; * * Thus, now this is best thought of a tag for a particular format of encoding * all the derived data associated with a file. - * - * [Note: Handling versioning of embeddings] - * - * The embeddings themselves have a version embedded in them, so it is possible - * for us to make backward compatible updates to the indexing process on newer - * clients (There is also a top level version field too but that is not used). - * - * If we bump the version of same model (say when indexing on a newer client), - * the assumption will be that older client will be able to consume the - * response. e.g. Say if we improve blur detection, older client should just - * consume embeddings with a newer version and not try to index the file again - * locally. - * - * If we get an embedding with version that is older than the version the client - * supports, then the client should ignore it. This way, the file will get - * reindexed locally an embedding with a newer version will get put to remote. - * - * In the case where the changes are not backward compatible and can only be - * consumed by clients with the relevant scaffolding, then we change this - * "model" (i.e "type") field to create a new universe of embeddings. */ export type EmbeddingModel = // TODO-ML: prune diff --git a/web/packages/new/photos/services/ml/face.ts b/web/packages/new/photos/services/ml/face.ts index 139c27ad25..910970d3b9 100644 --- a/web/packages/new/photos/services/ml/face.ts +++ b/web/packages/new/photos/services/ml/face.ts @@ -80,9 +80,30 @@ export type RemoteFaceIndex = FaceIndex & { /** * An integral version number of the indexing algorithm / pipeline. * + * [Note: Embedding versions] + * + * Embeddings have an associated version so it is possible for us to make + * backward compatible updates to the indexing process on newer clients. + * * Clients agree out of band what a particular version means, and guarantee * that an embedding with a particular version will be the same (to epsilon * cosine similarity) irrespective of the client that indexed the file. + * + * If we bump the version of same model (say when indexing on a newer + * client), we will do it in a manner that older client will be able to + * consume the response. The schema should not change in non-additive + * manners. For example, say if we improve blur detection, older client + * should just consume embeddings with a newer version and not try to index + * the file again locally. + * + * When fetching from remote, if we get an embedding with version that is + * older than the version the client supports, then the client should ignore + * it. This way, the file will get reindexed locally and an embedding with a + * newer version will also get saved to remote. + * + * In the case where the changes are not backward compatible and can only be + * consumed by clients by making code changes, then we will introduce a new + * subtype (top level key) in the derived data. */ version: number; /** diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index 61d489d325..54925c8298 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -368,18 +368,27 @@ const index = async ( // Massage the existing data (if any) that we got from remote to the form // that the rest of this function operates on. + // + // Discard any existing data that is made by an older indexing pipelines. + // See: [Note: Embedding versions] const existingRemoteFaceIndex = remoteDerivedData?.parsed?.face; const existingRemoteCLIPIndex = remoteDerivedData?.parsed?.clip; let existingFaceIndex: FaceIndex | undefined; - if (existingRemoteFaceIndex) { + if ( + existingRemoteFaceIndex && + existingRemoteFaceIndex.version >= faceIndexingVersion + ) { const { width, height, faces } = existingRemoteFaceIndex; existingFaceIndex = { width, height, faces }; } let existingCLIPIndex: CLIPIndex | undefined; - if (existingRemoteCLIPIndex) { + if ( + existingRemoteCLIPIndex && + existingRemoteCLIPIndex.version >= clipIndexingVersion + ) { const { embedding } = existingRemoteCLIPIndex; existingCLIPIndex = { embedding }; }