[desktop] Clustering WIP - Part x/x (#3040)

This commit is contained in:
Manav Rathi
2024-08-29 21:03:09 +05:30
committed by GitHub
7 changed files with 374 additions and 146 deletions

View File

@@ -4,10 +4,10 @@ import {
faceCrop,
wipClusterDebugPageContents,
type ClusterDebugPageContents,
type FaceFileNeighbour,
type FaceFileNeighbours,
type ClusterPreviewFaceWF,
type ClusterPreviewWF,
} from "@/new/photos/services/ml";
import type { Face } from "@/new/photos/services/ml/face";
import { faceDirection } from "@/new/photos/services/ml/face";
import {
FlexWrapper,
FluidContainer,
@@ -15,7 +15,7 @@ import {
} from "@ente/shared/components/Container";
import EnteSpinner from "@ente/shared/components/EnteSpinner";
import BackButton from "@mui/icons-material/ArrowBackOutlined";
import { Box, IconButton, styled, Typography } from "@mui/material";
import { Box, IconButton, Stack, styled, Typography } from "@mui/material";
import { useRouter } from "next/router";
import { AppContext } from "pages/_app";
import React, { useContext, useEffect, useMemo, useRef, useState } from "react";
@@ -49,13 +49,22 @@ export default function ClusterDebug() {
}
return (
<>
<Typography variant="small">
{`${clusterRes.clusters.length} clusters`}
</Typography>
<Typography variant="small" color="text.muted">
Showing only upto first 30 faces (and only upto 30 nearest
neighbours of each).
</Typography>
<Stack m={1}>
<Typography variant="small" mb={1}>
{`${clusterRes.clusters.length} clusters from ${clusterRes.clusteredCount} faces. ${clusterRes.unclusteredCount} unclustered faces.`}
</Typography>
<Typography variant="small" color="text.muted">
Showing only top 30 and bottom 30 clusters.
</Typography>
<Typography variant="small" color="text.muted">
For each cluster showing only up to 50 faces, sorted by
cosine similarity to highest scoring face in the cluster.
</Typography>
<Typography variant="small" color="text.muted">
Below each face is its{" "}
<b>blur - score - cosineSimilarity - direction</b>
</Typography>
</Stack>
<hr />
<Container>
<AutoSizer>
@@ -112,7 +121,7 @@ const ClusterPhotoList: React.FC<ClusterPhotoListProps> = ({
width,
clusterRes,
}) => {
const { faceFNs, clusterIDForFaceID } = clusterRes;
const { clusterPreviewWFs, clusterIDForFaceID } = clusterRes;
const [itemList, setItemList] = useState<ItemListItem[]>([]);
const listRef = useRef(null);
@@ -125,8 +134,8 @@ const ClusterPhotoList: React.FC<ClusterPhotoListProps> = ({
const listItemHeight = 120 * shrinkRatio + 24 + 4;
useEffect(() => {
setItemList(itemListFromFaceFNs(faceFNs, columns));
}, [columns, faceFNs]);
setItemList(itemListFromClusterPreviewWFs(clusterPreviewWFs, columns));
}, [columns, clusterPreviewWFs]);
useEffect(() => {
listRef.current?.resetAfterIndex(0);
@@ -138,7 +147,7 @@ const ClusterPhotoList: React.FC<ClusterPhotoListProps> = ({
const generateKey = (i: number) =>
Array.isArray(itemList[i])
? `${itemList[i][0].enteFile.id}/${itemList[i][0].face.faceID}-${itemList[i].slice(-1)[0].enteFile.id}/${itemList[i].slice(-1)[0].face.faceID}-${i}`
: `${itemList[i].faceID}-${i}`;
: `${itemList[i]}-${i}`;
return (
<VariableSizeList
@@ -163,13 +172,13 @@ const ClusterPhotoList: React.FC<ClusterPhotoListProps> = ({
>
{!Array.isArray(item) ? (
<LabelContainer span={columns}>
{`score ${item.score.toFixed(2)} blur ${item.blur.toFixed(0)}`}
{`cluster size ${item.toFixed(2)}`}
</LabelContainer>
) : (
item.map((faceFN, i) => (
item.map((faceWF, i) => (
<FaceItem
key={i.toString()}
{...{ faceFN, clusterIDForFaceID }}
{...{ faceWF, clusterIDForFaceID }}
/>
))
)}
@@ -181,19 +190,20 @@ const ClusterPhotoList: React.FC<ClusterPhotoListProps> = ({
);
};
type ItemListItem = Face | FaceFileNeighbour[];
// type ItemListItem = Face | FaceFileNeighbour[];
type ItemListItem = number | ClusterPreviewFaceWF[];
const itemListFromFaceFNs = (
faceFNs: FaceFileNeighbours[],
const itemListFromClusterPreviewWFs = (
clusterPreviewWFs: ClusterPreviewWF[],
columns: number,
) => {
const result: ItemListItem[] = [];
for (let index = 0; index < faceFNs.length; index++) {
const { face, neighbours } = faceFNs[index];
result.push(face);
for (let index = 0; index < clusterPreviewWFs.length; index++) {
const { clusterSize, faces } = clusterPreviewWFs[index];
result.push(clusterSize);
let lastIndex = 0;
while (lastIndex < neighbours.length) {
result.push(neighbours.slice(lastIndex, lastIndex + columns));
while (lastIndex < faces.length) {
result.push(faces.slice(lastIndex, lastIndex + columns));
lastIndex += columns;
}
}
@@ -210,12 +220,12 @@ const getShrinkRatio = (width: number, columns: number) =>
(columns * 120);
interface FaceItemProps {
faceFN: FaceFileNeighbour;
faceWF: ClusterPreviewFaceWF;
clusterIDForFaceID: Map<string, string>;
}
const FaceItem: React.FC<FaceItemProps> = ({ faceFN, clusterIDForFaceID }) => {
const { face, enteFile, cosineSimilarity } = faceFN;
const FaceItem: React.FC<FaceItemProps> = ({ faceWF, clusterIDForFaceID }) => {
const { face, enteFile, cosineSimilarity } = faceWF;
const { faceID } = face;
const [objectURL, setObjectURL] = useState<string | undefined>();
@@ -235,6 +245,8 @@ const FaceItem: React.FC<FaceItemProps> = ({ faceFN, clusterIDForFaceID }) => {
};
}, [faceID, enteFile]);
const fd = faceDirection(face.detection);
const d = fd == "straight" ? "•" : fd == "left" ? "←" : "→";
return (
<FaceChip
style={{
@@ -252,9 +264,20 @@ const FaceItem: React.FC<FaceItemProps> = ({ faceFN, clusterIDForFaceID }) => {
src={objectURL}
/>
)}
<Typography variant="small" color="text.muted" textAlign="right">
{cosineSimilarity.toFixed(2)}
</Typography>
<Stack direction="row" justifyContent="space-between">
<Typography variant="small" color="text.muted">
{`b${face.blur.toFixed(0)} `}
</Typography>
<Typography variant="small" color="text.muted">
{`s${face.score.toFixed(1)}`}
</Typography>
<Typography variant="small" color="text.muted">
{`c${cosineSimilarity.toFixed(1)}`}
</Typography>
<Typography variant="small" color="text.muted">
{`d${d}`}
</Typography>
</Stack>
</FaceChip>
);
};

View File

@@ -8,7 +8,6 @@ import {
enableML,
mlStatusSnapshot,
mlStatusSubscribe,
wipCluster,
wipClusterEnable,
type MLStatus,
} from "@/new/photos/services/ml";
@@ -341,7 +340,7 @@ const ManageML: React.FC<ManageMLProps> = ({
// TODO-Cluster
const router = useRouter();
const wipClusterNow = () => wipCluster();
// const wipClusterNow = () => wipCluster();
const wipClusterShowNow = () => router.push("/cluster-debug");
return (
@@ -391,18 +390,20 @@ const ManageML: React.FC<ManageMLProps> = ({
<Box>
<MenuItemGroup>
<EnteMenuItem
label={ut("Create clusters • internal only option")}
onClick={wipClusterNow}
label={ut(
"Create clusters • internal only option",
)}
onClick={wipClusterShowNow}
/>
</MenuItemGroup>
<MenuSectionTitle
title={ut(
"Create in-memory clusters from arbitrary 2k photos. Nothing will be saved or synced to remote. You can view the results in search dropdown.",
"Create and show in-memory clusters. Takes ~ 1 min. Nothing will be saved or synced to remote. You can also view all clusters in the search dropdown later.",
)}
/>
</Box>
)}
{showClusterOpt && (
{/* {showClusterOpt && (
<Box>
<MenuItemGroup>
<EnteMenuItem
@@ -416,7 +417,7 @@ const ManageML: React.FC<ManageMLProps> = ({
)}
/>
</Box>
)}
)} */}
</Stack>
);
};

View File

@@ -124,6 +124,16 @@ interface FaceNeighbour {
cosineSimilarity: number;
}
export interface ClusterPreview {
clusterSize: number;
faces: ClusterPreviewFace[];
}
interface ClusterPreviewFace {
face: Face;
cosineSimilarity: number;
}
/**
* Cluster faces into groups.
*
@@ -338,115 +348,256 @@ function* enumerateFaces(faceIndices: FaceIndex[]) {
}
}
export const clusterFacesHdb = async (faceIndexes: FaceIndex[]) => {
export const clusterFacesHdb = (faceIndexes: FaceIndex[]) => {
const t = Date.now();
// A flattened array of faces.
// TODO-Cluster note the 2k slice
const faces = [...enumerateFaces(faceIndexes)].slice(0, 2000);
// TODO-Cluster ad-hoc filtering and slicing
const faces0 = [...enumerateFaces(faceIndexes)].filter((f) => f.blur > 99);
// .slice(0, 6000);
// TODO-Cluster testing code, can be removed once done
const faces = Array(1)
.fill(0)
.flatMap(() => faces0);
// For fast reverse lookup - map from face ids to the face.
const faceForFaceID = new Map(faces.map((f) => [f.faceID, f]));
const faceEmbeddings = faces.map(({ embedding }) => embedding);
const {
clusters: clusterIndices,
noise,
debugInfo,
} = clusterFacesHdbscan(faceEmbeddings);
log.info({ method: "hdbscan", clusterIndices, noise, debugInfo });
log.info(
`Clustered ${faces.length} faces into ${clusterIndices.length} clusters (${Date.now() - t} ms)`,
);
// For fast reverse lookup - map from cluster ids to their index in the
// clusters array.
const clusterIndexForClusterID = new Map<string, number>();
// For fast reverse lookup - map from face ids to the id of the cluster to
// which they belong.
// For fast reverse lookup - map from the id of a face to the id of the
// cluster to which it belongs.
const clusterIDForFaceID = new Map<string, string>();
// A function to chain two reverse lookup.
const firstFaceOfCluster = (cluster: FaceCluster) =>
ensure(faceForFaceID.get(ensure(cluster.faceIDs[0])));
// A function to generate new cluster IDs.
const newClusterID = () => newNonSecureID("cluster_");
// Convert the numerical face indices into the result.
// The resultant clusters.
// TODO-Cluster Later on, instead of starting from a blank slate, this will
// be list of existing clusters we fetch from remote.
const clusters: FaceCluster[] = [];
for (const [ci, faceIndices] of clusterIndices.entries()) {
const clusterID = newClusterID();
const faceIDs: string[] = [];
clusterIndexForClusterID.set(clusterID, ci);
for (const fi of faceIndices) {
// Can't find a way of avoiding the null assertion here.
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const face = faces[fi]!;
clusterIDForFaceID.set(face.faceID, clusterID);
faceIDs.push(face.faceID);
// Process the faces in batches. The faces are already sorted by file ID,
// which is a monotonically increasing integer, so we will also have some
// temporal locality.
//
// The number 2500 was derived by ad-hoc observations and takes a few
// seconds. On a particular test dataset and a particular machine,
// clustering 1k took ~2 seconds, 10k took ~2 mins, while 20k took ~8 mins.
// Memory usage was constant in all these cases.
//
// At around 100k faces, the clustering starts taking hours, and we start
// running into stack overflows. The stack overflows can perhaps be avoided
// by restructuring the code, but hours of uninterruptible work is anyways
// not feasible.
const batchSize = 2500;
for (let i = 0; i < faceEmbeddings.length; i += batchSize) {
const it = Date.now();
const embeddings = faceEmbeddings.slice(i, i + batchSize);
const { clusters: hdbClusters } = clusterFacesHdbscan(embeddings);
log.info(
`hdbscan produced ${hdbClusters.length} clusters from ${embeddings.length} faces (${Date.now() - it} ms)`,
);
// Merge the new clusters we got from hdbscan into the existing clusters
// if they are "near" them (using some heuristic).
//
// We need to ensure we don't change any of the existing cluster IDs,
// since these might be existing clusters we got from remote.
for (const hdbCluster of hdbClusters) {
// Find the existing cluster whose (arbitrarily chosen) first face
// is the nearest neighbour of the (arbitrarily chosen) first face
// of the cluster produced by hdbscan.
const newFace = ensure(faces[i + ensure(hdbCluster[0])]);
let nnCluster: FaceCluster | undefined;
let nnCosineSimilarity = 0;
for (const existingCluster of clusters) {
const existingFace = firstFaceOfCluster(existingCluster);
// The vectors are already normalized, so we can directly use their
// dot product as their cosine similarity.
const csim = dotProduct(
existingFace.embedding,
newFace.embedding,
);
// Use a higher cosine similarity threshold if either of the two
// faces are blurry.
const threshold =
existingFace.blur < 200 || newFace.blur < 200 ? 0.9 : 0.7;
if (csim > threshold && csim > nnCosineSimilarity) {
nnCluster = existingCluster;
nnCosineSimilarity = csim;
}
}
if (nnCluster) {
// If we found an existing cluster that is near enough,
// sublimate the cluster produced by hdbscan into that cluster.
for (const j of hdbCluster) {
const { faceID } = ensure(faces[i + j]);
nnCluster.faceIDs.push(faceID);
clusterIDForFaceID.set(faceID, nnCluster.id);
}
} else {
// Otherwise make a new cluster from the cluster produced by
// hdbscan.
const clusterID = newClusterID();
const faceIDs: string[] = [];
for (const j of hdbCluster) {
const { faceID } = ensure(faces[i + j]);
faceIDs.push(faceID);
clusterIDForFaceID.set(faceID, clusterID);
}
clusterIndexForClusterID.set(clusterID, clusters.length);
clusters.push({ id: clusterID, faceIDs });
}
}
clusters.push({ id: clusterID, faceIDs });
}
// Convert into the data structure we're using to debug/visualize.
const faceAndNeigbours: FaceNeighbours[] = [];
for (const fi of faces) {
let neighbours: FaceNeighbour[] = [];
for (const fj of faces) {
// The vectors are already normalized, so we can directly use their
// dot product as their cosine similarity.
const csim = dotProduct(fi.embedding, fj.embedding);
neighbours.push({ face: fj, cosineSimilarity: csim });
}
// const faceAndNeigbours: FaceNeighbours[] = [];
// const topFaces = faces.sort((a, b) => b.score - a.score).slice(0, 30);
// for (const fi of topFaces) {
// let neighbours: FaceNeighbour[] = [];
// for (const fj of faces) {
// // The vectors are already normalized, so we can directly use their
// // dot product as their cosine similarity.
// const csim = dotProduct(fi.embedding, fj.embedding);
// neighbours.push({ face: fj, cosineSimilarity: csim });
// }
neighbours = neighbours.sort(
(a, b) => b.cosineSimilarity - a.cosineSimilarity,
// neighbours = neighbours
// .sort((a, b) => b.cosineSimilarity - a.cosineSimilarity)
// .slice(0, 30);
// faceAndNeigbours.push({ face: fi, neighbours });
// }
// Convert into the data structure we're using to debug/visualize.
//
// > Showing only top 30 and bottom 30 clusters (and only up to 50 faces in
// > each, sorted by cosine distance to highest scoring face in the
// > cluster).
const sortedClusters = clusters.sort(
(a, b) => b.faceIDs.length - a.faceIDs.length,
);
const debugClusters =
sortedClusters.length < 60
? sortedClusters
: sortedClusters.slice(0, 30).concat(sortedClusters.slice(-30));
const clusterPreviews: ClusterPreview[] = [];
for (const cluster of debugClusters) {
const faces = cluster.faceIDs.map((id) =>
ensure(faceForFaceID.get(id)),
);
faceAndNeigbours.push({ face: fi, neighbours });
const topFace = faces.reduce((max, face) =>
max.score > face.score ? max : face,
);
const previewFaces: ClusterPreviewFace[] = [];
for (const face of faces) {
const csim = dotProduct(topFace.embedding, face.embedding);
previewFaces.push({ face, cosineSimilarity: csim });
}
clusterPreviews.push({
clusterSize: cluster.faceIDs.length,
faces: previewFaces
.sort((a, b) => b.cosineSimilarity - a.cosineSimilarity)
.slice(0, 50),
});
}
// Prune too small clusters.
// TODO-Cluster this is likely not needed since hdbscan already has a min?
const validClusters = clusters.filter(({ faceIDs }) => faceIDs.length > 1);
let cgroups = await clusterGroups();
// let cgroups = await clusterGroups();
// // TODO-Cluster - Currently we're not syncing with remote or saving anything
// // locally, so cgroups will be empty. Create a temporary (unsaved, unsynced)
// // cgroup, one per cluster.
// cgroups = cgroups.concat(
// validClusters.map((c) => ({
// id: c.id,
// name: undefined,
// clusterIDs: [c.id],
// isHidden: false,
// avatarFaceID: undefined,
// displayFaceID: undefined,
// })),
// );
// // For each cluster group, use the highest scoring face in any of its
// // clusters as its display face.
// for (const cgroup of cgroups) {
// cgroup.displayFaceID = cgroup.clusterIDs
// .map((clusterID) => clusterIndexForClusterID.get(clusterID))
// .filter((i) => i !== undefined) /* 0 is a valid index */
// .flatMap((i) => clusters[i]?.faceIDs ?? [])
// .map((faceID) => faceForFaceID.get(faceID))
// .filter((face) => !!face)
// .reduce((max, face) =>
// max.score > face.score ? max : face,
// ).faceID;
// }
// TODO-Cluster - Currently we're not syncing with remote or saving anything
// locally, so cgroups will be empty. Create a temporary (unsaved, unsynced)
// cgroup, one per cluster.
cgroups = cgroups.concat(
validClusters.map((c) => ({
id: c.id,
const cgroups: CGroup[] = [];
for (const cluster of sortedClusters) {
const faces = cluster.faceIDs.map((id) =>
ensure(faceForFaceID.get(id)),
);
const topFace = faces.reduce((max, face) =>
max.score > face.score ? max : face,
);
cgroups.push({
id: cluster.id,
name: undefined,
clusterIDs: [c.id],
clusterIDs: [cluster.id],
isHidden: false,
avatarFaceID: undefined,
displayFaceID: undefined,
})),
);
// For each cluster group, use the highest scoring face in any of its
// clusters as its display face.
const faceForFaceID = new Map(faces.map((f) => [f.faceID, f]));
for (const cgroup of cgroups) {
cgroup.displayFaceID = cgroup.clusterIDs
.map((clusterID) => clusterIndexForClusterID.get(clusterID))
.filter((i) => i !== undefined) /* 0 is a valid index */
.flatMap((i) => clusters[i]?.faceIDs ?? [])
.map((faceID) => faceForFaceID.get(faceID))
.filter((face) => !!face)
.reduce((max, face) =>
max.score > face.score ? max : face,
).faceID;
displayFaceID: topFace.faceID,
});
}
log.info("ml/cluster", {
faces,
validClusters,
clusterIndexForClusterID: Object.fromEntries(clusterIndexForClusterID),
clusterIDForFaceID: Object.fromEntries(clusterIDForFaceID),
cgroups,
});
// log.info("ml/cluster", {
// faces,
// validClusters,
// clusterIndexForClusterID: Object.fromEntries(clusterIndexForClusterID),
// clusterIDForFaceID: Object.fromEntries(clusterIDForFaceID),
// cgroups,
// });
log.info(
`Clustered ${faces.length} faces into ${validClusters.length} clusters (${Date.now() - t} ms)`,
`Clustered ${faces.length} faces into ${validClusters.length} clusters, with ${faces.length - clusterIDForFaceID.size} faces remaining unclustered (${Date.now() - t} ms)`,
);
return { faces, clusters: validClusters, cgroups, faceAndNeigbours };
const clusteredCount = clusterIDForFaceID.size;
const unclusteredCount = faces.length - clusteredCount;
return {
// faces,
clusteredCount,
unclusteredCount,
clusters: validClusters,
cgroups,
clusterPreviews,
clusterIDForFaceID,
};
};

View File

@@ -24,7 +24,7 @@ export const clusterFacesHdbscan = (
minSamples: 5,
clusterSelectionEpsilon: 0.6,
clusterSelectionMethod: "leaf",
debug: true,
debug: false,
});
return {

View File

@@ -714,7 +714,7 @@ const detectBlur = (
type FaceDirection = "left" | "right" | "straight";
const faceDirection = ({ landmarks }: FaceDetection): FaceDirection => {
export const faceDirection = ({ landmarks }: FaceDetection): FaceDirection => {
const leftEye = landmarks[0]!;
const rightEye = landmarks[1]!;
const nose = landmarks[2]!;

View File

@@ -20,14 +20,9 @@ import { getAllLocalFiles } from "../files";
import { getRemoteFlag, updateRemoteFlag } from "../remote-store";
import type { SearchPerson } from "../search/types";
import type { UploadItem } from "../upload/types";
import { clusterFacesHdb, type CGroup, type FaceCluster } from "./cluster-new";
import { type CGroup, type FaceCluster } from "./cluster-new";
import { regenerateFaceCrops } from "./crop";
import {
clearMLDB,
faceIndex,
faceIndexes,
indexableAndIndexedCounts,
} from "./db";
import { clearMLDB, faceIndex, indexableAndIndexedCounts } from "./db";
import type { Face } from "./face";
import { MLWorker } from "./worker";
import type { CLIPMatches } from "./worker-types";
@@ -360,8 +355,23 @@ export interface FaceFileNeighbour {
cosineSimilarity: number;
}
// "with file"
export interface ClusterPreviewWF {
clusterSize: number;
faces: ClusterPreviewFaceWF[];
}
export interface ClusterPreviewFaceWF {
face: Face;
enteFile: EnteFile;
cosineSimilarity: number;
}
export interface ClusterDebugPageContents {
faceFNs: FaceFileNeighbours[];
clusteredCount: number;
unclusteredCount: number;
// faceFNs: FaceFileNeighbours[];
clusterPreviewWFs: ClusterPreviewWF[];
clusters: FaceCluster[];
clusterIDForFaceID: Map<string, string>;
}
@@ -377,48 +387,84 @@ export const wipClusterDebugPageContents = async (): Promise<
triggerStatusUpdate();
// const { faceAndNeigbours, clusters, cgroups } = await clusterFaces(
const { faceAndNeigbours, clusters, cgroups } = await clusterFacesHdb(
await faceIndexes(),
);
const searchPersons = await convertToSearchPersons(clusters, cgroups);
const {
clusteredCount,
unclusteredCount,
clusterPreviews,
clusters,
cgroups,
clusterIDForFaceID,
} = await worker().then((w) => w.clusterFacesHdb());
// const searchPersons = await convertToSearchPersons(clusters, cgroups);
const localFiles = await getAllLocalFiles();
const localFileByID = new Map(localFiles.map((f) => [f.id, f]));
const fileForFace = ({ faceID }: Face) =>
ensure(localFileByID.get(ensure(fileIDFromFaceID(faceID))));
const faceFNs = faceAndNeigbours
.map(({ face, neighbours }) => ({
// const faceFNs = faceAndNeigbours.map(
// ({ topFace: face, faces: neighbours }) => ({
// face,
// neighbours: neighbours.map(({ face, cosineSimilarity }) => ({
// face,
// enteFile: fileForFace(face),
// cosineSimilarity,
// })),
// }),
// );
const clusterPreviewWFs = clusterPreviews.map(({ clusterSize, faces }) => ({
clusterSize,
faces: faces.map(({ face, cosineSimilarity }) => ({
face,
neighbours: neighbours.map(({ face, cosineSimilarity }) => ({
face,
enteFile: fileForFace(face),
cosineSimilarity,
})),
}))
.sort((a, b) => b.face.score - a.face.score);
enteFile: fileForFace(face),
cosineSimilarity,
})),
}));
const clusterIDForFaceID = new Map(
clusters.flatMap((cluster) =>
cluster.faceIDs.map((id) => [id, cluster.id]),
),
);
const clusterByID = new Map(clusters.map((c) => [c.id, c]));
const searchPersons = cgroups
.map((cgroup) => {
const faceID = ensure(cgroup.displayFaceID);
const fileID = ensure(fileIDFromFaceID(faceID));
const file = ensure(localFileByID.get(fileID));
const faceIDs = cgroup.clusterIDs
.map((id) => ensure(clusterByID.get(id)))
.flatMap((cluster) => cluster.faceIDs);
const fileIDs = faceIDs
.map((faceID) => fileIDFromFaceID(faceID))
.filter((fileID) => fileID !== undefined);
return {
id: cgroup.id,
name: cgroup.name,
faceIDs,
files: [...new Set(fileIDs)],
displayFaceID: faceID,
displayFaceFile: file,
};
})
.sort((a, b) => b.faceIDs.length - a.faceIDs.length);
_wip_isClustering = false;
_wip_searchPersons = searchPersons;
triggerStatusUpdate();
const prunedFaceFNs = faceFNs.slice(0, 30).map(({ face, neighbours }) => ({
face,
neighbours: neighbours.slice(0, 30),
}));
return { faceFNs: prunedFaceFNs, clusters, clusterIDForFaceID };
return {
clusteredCount,
unclusteredCount,
clusterPreviewWFs,
clusters,
clusterIDForFaceID,
};
};
export const wipCluster = () => void wipClusterDebugPageContents();
const convertToSearchPersons = async (
// TODO-Cluster remove me
export const convertToSearchPersons = async (
clusters: FaceCluster[],
cgroups: CGroup[],
) => {

View File

@@ -24,8 +24,10 @@ import {
indexCLIP,
type CLIPIndex,
} from "./clip";
import { clusterFacesHdb } from "./cluster-new";
import { saveFaceCrops } from "./crop";
import {
faceIndexes,
indexableFileIDs,
markIndexingFailed,
saveIndexes,
@@ -272,6 +274,11 @@ export class MLWorker {
remoteMLData: mlDataByID.get(id),
}));
}
// TODO-Cluster
async clusterFacesHdb() {
return clusterFacesHdb(await faceIndexes());
}
}
expose(MLWorker);