From 4019afdd901138f23038b43a5352bdfc41ebc0b2 Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Sat, 6 Jul 2024 10:23:33 +0530 Subject: [PATCH] [desktop] Consider HTTP 4xx errors when PUT-ing embeddings as perm failures Ref: - https://github.com/ente-io/ente/pull/2369 - https://github.com/ente-io/ente/pull/2368 --- web/packages/new/photos/services/ml/worker.ts | 35 +++++++++++++++++-- web/packages/next/http.ts | 10 ++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/web/packages/new/photos/services/ml/worker.ts b/web/packages/new/photos/services/ml/worker.ts index c7773b6ead..2377e17aa5 100644 --- a/web/packages/new/photos/services/ml/worker.ts +++ b/web/packages/new/photos/services/ml/worker.ts @@ -1,6 +1,7 @@ import type { EnteFile } from "@/new/photos/types/file"; import { fileLogID } from "@/new/photos/utils/file"; import { clientPackageName } from "@/next/app"; +import { isHTTP4xxError } from "@/next/http"; import { getKVN } from "@/next/kv"; import { ensureAuthToken } from "@/next/local-user"; import log from "@/next/log"; @@ -329,19 +330,47 @@ export const index = async ( try { faceIndex = await indexFaces(enteFile, uploadItem, electron, userAgent); } catch (e) { - // Mark indexing as having failed only if the indexing itself - // failed, not if there were subsequent failures (like when trying - // to put the result to remote or save it to the local face DB). log.error(`Failed to index faces in ${f}`, e); await markIndexingFailed(enteFile.id); throw e; } + // [Note: Transient and permanent indexing failures] + // + // Generally speaking, we mark indexing for a file as having failed only if + // the indexing itself failed, not if there were subsequent failures (like + // when trying to put the result to remote or save it to the local face DB). + // + // When we mark it as failed, then a flag is persisted corresponding to this + // file in the ML DB so that it won't get reindexed in future runs. This are + // thus considered as permanent failures. + // + // > We might retry in future versions if we identify reasons for indexing + // > to fail (it shouldn't) and rectify them. + // + // On the other hand, saving the face index to remote might fail for + // transient issues (network issues, or remote having hiccups). We don't + // mark a file as failed permanently in such cases, so that it gets retried + // at some point. These are considered as transient failures. + // + // However, this opens the possibility of some non-transient failure getting + // classified as a transient failure and causing the client to try and index + // the same file again and again, when in fact there is a issue specific to + // that file which is preventing the index from being saved. What exactly? + // We don't know, but the possibility exists. + // + // To reduce the chances of this happening, we treat HTTP 4xx responses as + // permanent failures too - there are no known cases where a client retrying + // a 4xx response would work, and there are known (but rare) cases where a + // client might get a 4xx (e.g. if the file has over ~700 faces, then remote + // will return a 413 Request Entity Too Large). + try { await putFaceIndex(enteFile, faceIndex); await saveFaceIndex(faceIndex); } catch (e) { log.error(`Failed to put/save face index for ${f}`, e); + if (isHTTP4xxError(e)) await markIndexingFailed(enteFile.id); throw e; } diff --git a/web/packages/next/http.ts b/web/packages/next/http.ts index 8ad2674298..4cfb5cfdff 100644 --- a/web/packages/next/http.ts +++ b/web/packages/next/http.ts @@ -39,3 +39,13 @@ export class HTTPError extends Error { this.res = res; } } + +/** + * Return true if this is a HTTP "client" error. + * + * This is a convenience matcher to check if {@link e} is an instance of + * {@link HTTPError} with a 4xx status code. Such errors are client errors, and + * (generally) retrying them will not help. + */ +export const isHTTP4xxError = (e: unknown) => + e instanceof HTTPError && e.res.status >= 400 && e.res.status <= 499;