[desktop] Clustering WIP - Part x/x (#3364)
The sync scaffolding is mostly in place now.
This commit is contained in:
@@ -17,7 +17,6 @@
|
||||
"exifreader": "^4",
|
||||
"fast-srp-hap": "^2.0.4",
|
||||
"ffmpeg-wasm": "file:./thirdparty/ffmpeg-wasm",
|
||||
"hdbscan": "0.0.1-alpha.5",
|
||||
"leaflet": "^1.9.4",
|
||||
"leaflet-defaulticon-compatibility": "^0.1.1",
|
||||
"localforage": "^1.9.0",
|
||||
|
||||
@@ -1,581 +0,0 @@
|
||||
import { SelectionBar } from "@/base/components/Navbar";
|
||||
import { pt } from "@/base/i18n";
|
||||
import {
|
||||
faceCrop,
|
||||
wipClusterDebugPageContents,
|
||||
type ClusterDebugPageContents,
|
||||
} from "@/new/photos/services/ml";
|
||||
import {
|
||||
type ClusterFace,
|
||||
type ClusteringOpts,
|
||||
type ClusteringProgress,
|
||||
type OnClusteringProgress,
|
||||
} from "@/new/photos/services/ml/cluster";
|
||||
import { faceDirection } from "@/new/photos/services/ml/face";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import {
|
||||
FlexWrapper,
|
||||
FluidContainer,
|
||||
VerticallyCentered,
|
||||
} from "@ente/shared/components/Container";
|
||||
import BackButton from "@mui/icons-material/ArrowBackOutlined";
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
Checkbox,
|
||||
FormControlLabel,
|
||||
IconButton,
|
||||
LinearProgress,
|
||||
Stack,
|
||||
styled,
|
||||
TextField,
|
||||
Typography,
|
||||
} from "@mui/material";
|
||||
import { useFormik, type FormikProps } from "formik";
|
||||
import { useRouter } from "next/router";
|
||||
import { AppContext } from "pages/_app";
|
||||
import React, {
|
||||
memo,
|
||||
useCallback,
|
||||
useContext,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
import AutoSizer from "react-virtualized-auto-sizer";
|
||||
import {
|
||||
areEqual,
|
||||
VariableSizeList,
|
||||
type ListChildComponentProps,
|
||||
} from "react-window";
|
||||
|
||||
// TODO-Cluster Temporary component for debugging
|
||||
export default function ClusterDebug() {
|
||||
const { startLoading, finishLoading, showNavBar } = useContext(AppContext);
|
||||
|
||||
// The clustering result.
|
||||
const [clusterRes, setClusterRes] = useState<
|
||||
ClusterDebugPageContents | undefined
|
||||
>();
|
||||
|
||||
// Keep the loading state callback as a ref instead of state to prevent
|
||||
// rerendering when the progress gets updated during clustering.
|
||||
const onProgressRef = useRef<OnClusteringProgress | undefined>();
|
||||
|
||||
// Keep the form state at the top level otherwise it gets reset as we
|
||||
// scroll.
|
||||
const formik = useFormik<ClusteringOpts>({
|
||||
initialValues: {
|
||||
minBlur: 10,
|
||||
minScore: 0.8,
|
||||
minClusterSize: 2,
|
||||
joinThreshold: 0.76,
|
||||
earlyExitThreshold: 0.9,
|
||||
batchSize: 10000,
|
||||
offsetIncrement: 7500,
|
||||
badFaceHeuristics: true,
|
||||
},
|
||||
onSubmit: (values) =>
|
||||
cluster(
|
||||
{
|
||||
minBlur: toFloat(values.minBlur),
|
||||
minScore: toFloat(values.minScore),
|
||||
minClusterSize: toFloat(values.minClusterSize),
|
||||
joinThreshold: toFloat(values.joinThreshold),
|
||||
earlyExitThreshold: toFloat(values.earlyExitThreshold),
|
||||
batchSize: toFloat(values.batchSize),
|
||||
offsetIncrement: toFloat(values.offsetIncrement),
|
||||
badFaceHeuristics: values.badFaceHeuristics,
|
||||
},
|
||||
(progress: ClusteringProgress) =>
|
||||
onProgressRef.current?.(progress),
|
||||
),
|
||||
});
|
||||
|
||||
const cluster = useCallback(
|
||||
async (opts: ClusteringOpts, onProgress: OnClusteringProgress) => {
|
||||
setClusterRes(undefined);
|
||||
startLoading();
|
||||
setClusterRes(await wipClusterDebugPageContents(opts, onProgress));
|
||||
finishLoading();
|
||||
},
|
||||
[startLoading, finishLoading],
|
||||
);
|
||||
|
||||
useEffect(() => showNavBar(true), []);
|
||||
|
||||
return (
|
||||
<>
|
||||
<Container>
|
||||
<AutoSizer>
|
||||
{({ height, width }) => (
|
||||
<ClusterList {...{ width, height, clusterRes }}>
|
||||
<OptionsForm {...{ formik, onProgressRef }} />
|
||||
</ClusterList>
|
||||
)}
|
||||
</AutoSizer>
|
||||
</Container>
|
||||
<Options />
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
// Formik converts nums to a string on edit.
|
||||
const toFloat = (n: number | string) =>
|
||||
typeof n == "string" ? parseFloat(n) : n;
|
||||
|
||||
const Options: React.FC = () => {
|
||||
const router = useRouter();
|
||||
|
||||
const close = () => router.push("/gallery");
|
||||
|
||||
return (
|
||||
<SelectionBar>
|
||||
<FluidContainer>
|
||||
<IconButton onClick={close}>
|
||||
<BackButton />
|
||||
</IconButton>
|
||||
<Box sx={{ marginInline: "auto" }}>{pt("Face Clusters")}</Box>
|
||||
</FluidContainer>
|
||||
</SelectionBar>
|
||||
);
|
||||
};
|
||||
|
||||
const Container = styled("div")`
|
||||
display: block;
|
||||
flex: 1;
|
||||
width: 100%;
|
||||
flex-wrap: wrap;
|
||||
overflow: hidden;
|
||||
.pswp-thumbnail {
|
||||
display: inline-block;
|
||||
}
|
||||
`;
|
||||
|
||||
type OptionsFormProps = LoaderProps & {
|
||||
formik: FormikProps<ClusteringOpts>;
|
||||
};
|
||||
|
||||
const OptionsForm: React.FC<OptionsFormProps> = ({ formik, onProgressRef }) => {
|
||||
return (
|
||||
<Stack>
|
||||
<Typography paddingInline={1}>Parameters</Typography>
|
||||
<MemoizedForm {...formik} />
|
||||
{formik.isSubmitting && <Loader {...{ onProgressRef }} />}
|
||||
</Stack>
|
||||
);
|
||||
};
|
||||
|
||||
const MemoizedForm = memo(
|
||||
({
|
||||
values,
|
||||
handleSubmit,
|
||||
handleChange,
|
||||
isSubmitting,
|
||||
}: FormikProps<ClusteringOpts>) => (
|
||||
<form onSubmit={handleSubmit}>
|
||||
<Stack>
|
||||
<Stack
|
||||
direction="row"
|
||||
gap={1}
|
||||
sx={{ ".MuiFormControl-root": { flex: "1" } }}
|
||||
>
|
||||
<TextField
|
||||
name="minBlur"
|
||||
label="minBlur"
|
||||
value={values.minBlur}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="minScore"
|
||||
label="minScore"
|
||||
value={values.minScore}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="minClusterSize"
|
||||
label="minClusterSize"
|
||||
value={values.minClusterSize}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="joinThreshold"
|
||||
label="joinThreshold"
|
||||
value={values.joinThreshold}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="earlyExitThreshold"
|
||||
label="earlyExitThreshold"
|
||||
value={values.earlyExitThreshold}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="batchSize"
|
||||
label="batchSize"
|
||||
value={values.batchSize}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
<TextField
|
||||
name="offsetIncrement"
|
||||
label="offsetIncrement"
|
||||
value={values.offsetIncrement}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
</Stack>
|
||||
<Stack direction="row" justifyContent={"space-between"} p={1}>
|
||||
<FormControlLabel
|
||||
control={
|
||||
<Checkbox
|
||||
name={"badFaceHeuristics"}
|
||||
checked={values.badFaceHeuristics}
|
||||
size="small"
|
||||
onChange={handleChange}
|
||||
/>
|
||||
}
|
||||
label={
|
||||
<Typography color="text.secondary">
|
||||
Bad face heuristics
|
||||
</Typography>
|
||||
}
|
||||
/>
|
||||
<Button
|
||||
color="secondary"
|
||||
type="submit"
|
||||
disabled={isSubmitting}
|
||||
>
|
||||
Cluster
|
||||
</Button>
|
||||
</Stack>
|
||||
</Stack>
|
||||
</form>
|
||||
),
|
||||
);
|
||||
|
||||
interface LoaderProps {
|
||||
onProgressRef: React.MutableRefObject<OnClusteringProgress | undefined>;
|
||||
}
|
||||
|
||||
const Loader: React.FC<LoaderProps> = ({ onProgressRef }) => {
|
||||
const [progress, setProgress] = useState<ClusteringProgress>({
|
||||
completed: 0,
|
||||
total: 0,
|
||||
});
|
||||
|
||||
onProgressRef.current = setProgress;
|
||||
|
||||
const { completed, total } = progress;
|
||||
|
||||
return (
|
||||
<VerticallyCentered mt={4} gap={2}>
|
||||
<Stack
|
||||
direction="row"
|
||||
gap={1}
|
||||
alignItems={"center"}
|
||||
paddingInline={"1rem"}
|
||||
sx={{
|
||||
width: "100%",
|
||||
"& div": {
|
||||
flex: 1,
|
||||
},
|
||||
}}
|
||||
>
|
||||
<Box sx={{ mr: 1 }}>
|
||||
<LinearProgress
|
||||
variant="determinate"
|
||||
value={
|
||||
total > 0
|
||||
? Math.round((completed / total) * 100)
|
||||
: 0
|
||||
}
|
||||
/>
|
||||
</Box>
|
||||
<Typography
|
||||
variant="small"
|
||||
sx={{
|
||||
minWidth: "10rem",
|
||||
textAlign: "right",
|
||||
}}
|
||||
>{`${completed} / ${total}`}</Typography>
|
||||
</Stack>
|
||||
</VerticallyCentered>
|
||||
);
|
||||
};
|
||||
|
||||
type ClusterListProps = ClusterResHeaderProps & {
|
||||
height: number;
|
||||
width: number;
|
||||
};
|
||||
|
||||
const ClusterList: React.FC<React.PropsWithChildren<ClusterListProps>> = ({
|
||||
width,
|
||||
height,
|
||||
clusterRes,
|
||||
children,
|
||||
}) => {
|
||||
const [items, setItems] = useState<Item[]>([]);
|
||||
const listRef = useRef(null);
|
||||
|
||||
const columns = useMemo(
|
||||
() => Math.max(Math.floor(getFractionFittableColumns(width)), 4),
|
||||
[width],
|
||||
);
|
||||
|
||||
const shrinkRatio = getShrinkRatio(width, columns);
|
||||
const listItemHeight = 120 * shrinkRatio + 24 + 4;
|
||||
|
||||
useEffect(() => {
|
||||
setItems(clusterRes ? itemsFromClusterRes(clusterRes, columns) : []);
|
||||
}, [columns, clusterRes]);
|
||||
|
||||
useEffect(() => {
|
||||
listRef.current?.resetAfterIndex(0);
|
||||
}, [items]);
|
||||
|
||||
const itemSize = (index: number) =>
|
||||
index === 0
|
||||
? 140
|
||||
: index === 1
|
||||
? 110
|
||||
: Array.isArray(items[index - 2])
|
||||
? listItemHeight
|
||||
: 36;
|
||||
|
||||
return (
|
||||
<VariableSizeList
|
||||
height={height}
|
||||
width={width}
|
||||
ref={listRef}
|
||||
itemData={{ items, clusterRes, columns, shrinkRatio, children }}
|
||||
itemCount={2 + items.length}
|
||||
itemSize={itemSize}
|
||||
overscanCount={3}
|
||||
>
|
||||
{ClusterListItemRenderer}
|
||||
</VariableSizeList>
|
||||
);
|
||||
};
|
||||
|
||||
type Item = string | FaceWithFile[];
|
||||
|
||||
const itemsFromClusterRes = (
|
||||
clusterRes: ClusterDebugPageContents,
|
||||
columns: number,
|
||||
) => {
|
||||
const { clusterPreviewsWithFile, unclusteredFacesWithFile } = clusterRes;
|
||||
|
||||
const result: Item[] = [];
|
||||
for (let index = 0; index < clusterPreviewsWithFile.length; index++) {
|
||||
const { clusterSize, faces } = clusterPreviewsWithFile[index];
|
||||
result.push(`cluster size ${clusterSize.toFixed(2)}`);
|
||||
let lastIndex = 0;
|
||||
while (lastIndex < faces.length) {
|
||||
result.push(faces.slice(lastIndex, lastIndex + columns));
|
||||
lastIndex += columns;
|
||||
}
|
||||
}
|
||||
|
||||
if (unclusteredFacesWithFile.length) {
|
||||
result.push(`•• unclustered faces ${unclusteredFacesWithFile.length}`);
|
||||
let lastIndex = 0;
|
||||
while (lastIndex < unclusteredFacesWithFile.length) {
|
||||
result.push(
|
||||
unclusteredFacesWithFile.slice(lastIndex, lastIndex + columns),
|
||||
);
|
||||
lastIndex += columns;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
const getFractionFittableColumns = (width: number) =>
|
||||
(width - 2 * getGapFromScreenEdge(width) + 4) / (120 + 4);
|
||||
|
||||
const getGapFromScreenEdge = (width: number) => (width > 4 * 120 ? 24 : 4);
|
||||
|
||||
const getShrinkRatio = (width: number, columns: number) =>
|
||||
(width - 2 * getGapFromScreenEdge(width) - (columns - 1) * 4) /
|
||||
(columns * 120);
|
||||
|
||||
// It in necessary to define the item renderer otherwise it gets recreated every
|
||||
// time the parent rerenders, causing the form to lose its submitting state.
|
||||
const ClusterListItemRenderer = React.memo<ListChildComponentProps>(
|
||||
({ index, style, data }) => {
|
||||
const { clusterRes, columns, shrinkRatio, items, children } = data;
|
||||
|
||||
if (index == 0) return <div style={style}>{children}</div>;
|
||||
|
||||
if (index == 1)
|
||||
return (
|
||||
<div style={style}>
|
||||
<ClusterResHeader clusterRes={clusterRes} />
|
||||
</div>
|
||||
);
|
||||
|
||||
const item = items[index - 2];
|
||||
return (
|
||||
<ListItem style={style}>
|
||||
<ListContainer columns={columns} shrinkRatio={shrinkRatio}>
|
||||
{!Array.isArray(item) ? (
|
||||
<LabelContainer span={columns}>{item}</LabelContainer>
|
||||
) : (
|
||||
item.map((f, i) => (
|
||||
<FaceItem key={i.toString()} faceWithFile={f} />
|
||||
))
|
||||
)}
|
||||
</ListContainer>
|
||||
</ListItem>
|
||||
);
|
||||
},
|
||||
areEqual,
|
||||
);
|
||||
|
||||
interface ClusterResHeaderProps {
|
||||
clusterRes: ClusterDebugPageContents | undefined;
|
||||
}
|
||||
|
||||
const ClusterResHeader: React.FC<ClusterResHeaderProps> = ({ clusterRes }) => {
|
||||
if (!clusterRes) return null;
|
||||
|
||||
const {
|
||||
totalFaceCount,
|
||||
filteredFaceCount,
|
||||
clusteredFaceCount,
|
||||
unclusteredFaceCount,
|
||||
timeTakenMs,
|
||||
clusters,
|
||||
} = clusterRes;
|
||||
|
||||
return (
|
||||
<Stack m={1}>
|
||||
<Typography mb={1} variant="small">
|
||||
{`${clusters.length} clusters in ${(timeTakenMs / 1000).toFixed(0)} seconds • ${totalFaceCount} faces ${filteredFaceCount} filtered ${clusteredFaceCount} clustered ${unclusteredFaceCount} unclustered`}
|
||||
</Typography>
|
||||
<Typography variant="small" color="text.muted">
|
||||
Showing only top 30 clusters, bottom 30 clusters, and
|
||||
unclustered faces.
|
||||
</Typography>
|
||||
<Typography variant="small" color="text.muted">
|
||||
For each cluster showing only up to 50 faces, sorted by cosine
|
||||
similarity to its highest scoring face.
|
||||
</Typography>
|
||||
<Typography variant="small" color="text.muted">
|
||||
Below each face is its blur, score, cosineSimilarity, direction.
|
||||
Bad faces are outlined.
|
||||
</Typography>
|
||||
</Stack>
|
||||
);
|
||||
};
|
||||
|
||||
const ListItem = styled("div")`
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
`;
|
||||
|
||||
const ListContainer = styled(Box, {
|
||||
shouldForwardProp: (propName) => propName != "shrinkRatio",
|
||||
})<{
|
||||
columns: number;
|
||||
shrinkRatio: number;
|
||||
}>`
|
||||
display: grid;
|
||||
grid-template-columns: ${({ columns, shrinkRatio }) =>
|
||||
`repeat(${columns},${120 * shrinkRatio}px)`};
|
||||
grid-column-gap: 4px;
|
||||
width: 100%;
|
||||
padding: 4px;
|
||||
`;
|
||||
|
||||
const ListItemContainer = styled(FlexWrapper)<{ span: number }>`
|
||||
grid-column: span ${(props) => props.span};
|
||||
`;
|
||||
|
||||
const LabelContainer = styled(ListItemContainer)`
|
||||
color: ${({ theme }) => theme.colors.text.muted};
|
||||
height: 32px;
|
||||
`;
|
||||
|
||||
interface FaceItemProps {
|
||||
faceWithFile: FaceWithFile;
|
||||
}
|
||||
|
||||
interface FaceWithFile {
|
||||
face: ClusterFace;
|
||||
enteFile: EnteFile;
|
||||
cosineSimilarity?: number;
|
||||
wasMerged?: boolean;
|
||||
}
|
||||
|
||||
const FaceItem: React.FC<FaceItemProps> = ({ faceWithFile }) => {
|
||||
const { face, enteFile, cosineSimilarity } = faceWithFile;
|
||||
const { faceID, isBadFace } = face;
|
||||
|
||||
const [objectURL, setObjectURL] = useState<string | undefined>();
|
||||
|
||||
useEffect(() => {
|
||||
let didCancel = false;
|
||||
let thisObjectURL: string | undefined;
|
||||
|
||||
void faceCrop(faceID, enteFile).then((blob) => {
|
||||
if (blob && !didCancel)
|
||||
setObjectURL((thisObjectURL = URL.createObjectURL(blob)));
|
||||
});
|
||||
|
||||
return () => {
|
||||
didCancel = true;
|
||||
if (thisObjectURL) URL.revokeObjectURL(thisObjectURL);
|
||||
};
|
||||
}, [faceID, enteFile]);
|
||||
|
||||
const fd = faceDirection(face.detection);
|
||||
const d = fd == "straight" ? "•" : fd == "left" ? "←" : "→";
|
||||
return (
|
||||
<FaceChip
|
||||
style={{
|
||||
outline: isBadFace ? `1px solid rosybrown` : undefined,
|
||||
outlineOffset: "2px",
|
||||
}}
|
||||
>
|
||||
{objectURL && (
|
||||
<img
|
||||
style={{
|
||||
objectFit: "cover",
|
||||
width: "100%",
|
||||
height: "100%",
|
||||
}}
|
||||
src={objectURL}
|
||||
/>
|
||||
)}
|
||||
<Stack direction="row" justifyContent="space-between">
|
||||
<Typography variant="small" color="text.muted">
|
||||
{`b${face.blur.toFixed(0)} `}
|
||||
</Typography>
|
||||
<Typography variant="small" color="text.muted">
|
||||
{`s${face.score.toFixed(1)}`}
|
||||
</Typography>
|
||||
{cosineSimilarity && (
|
||||
<Typography variant="small" color="text.muted">
|
||||
{`c${cosineSimilarity.toFixed(1)}`}
|
||||
</Typography>
|
||||
)}
|
||||
<Typography variant="small" color="text.muted">
|
||||
{`d${d}`}
|
||||
</Typography>
|
||||
</Stack>
|
||||
</FaceChip>
|
||||
);
|
||||
};
|
||||
|
||||
const FaceChip = styled(Box)`
|
||||
width: 120px;
|
||||
height: 120px;
|
||||
`;
|
||||
@@ -14,7 +14,6 @@ import {
|
||||
getLocalFiles,
|
||||
getLocalTrashedFiles,
|
||||
} from "@/new/photos/services/files";
|
||||
import { wipHasSwitchedOnceCmpAndSet } from "@/new/photos/services/ml";
|
||||
import type { Person } from "@/new/photos/services/ml/cgroups";
|
||||
import {
|
||||
filterSearchableFiles,
|
||||
@@ -681,16 +680,6 @@ export default function Gallery() {
|
||||
};
|
||||
}, [selectAll, clearSelection]);
|
||||
|
||||
useEffect(() => {
|
||||
// TODO-Cluster
|
||||
if (process.env.NEXT_PUBLIC_ENTE_WIP_CL_AUTO) {
|
||||
setTimeout(() => {
|
||||
if (!wipHasSwitchedOnceCmpAndSet())
|
||||
router.push("cluster-debug");
|
||||
}, 2000);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const fileToCollectionsMap = useMemoSingleThreaded(() => {
|
||||
return constructFileToCollectionMap(files);
|
||||
}, [files]);
|
||||
|
||||
@@ -30,5 +30,9 @@ export const preFileInfoSync = async () => {
|
||||
* libraries after initial login), and the `preFileInfoSync`, which is called
|
||||
* before doing the file sync and thus should run immediately after login.
|
||||
*/
|
||||
export const sync = () =>
|
||||
Promise.all([syncMapEnabled(), mlSync(), searchDataSync()]);
|
||||
export const sync = async () => {
|
||||
await Promise.all([syncMapEnabled(), searchDataSync()]);
|
||||
// ML sync might take a very long time for initial indexing, so don't wait
|
||||
// for it to finish.
|
||||
void mlSync();
|
||||
};
|
||||
|
||||
@@ -209,9 +209,6 @@ For more details, see [translations.md](translations.md).
|
||||
> provides affine transforms, while `matrix` is for performing computations
|
||||
> on matrices, say inverting them or performing their decomposition.
|
||||
|
||||
- [hdbscan](https://github.com/shaileshpandit/hdbscan-js) is used for face
|
||||
clustering.
|
||||
|
||||
## Auth app specific
|
||||
|
||||
- [otpauth](https://github.com/hectorm/otpauth) is used for the generation of
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import { EnteDrawer } from "@/base/components/EnteDrawer";
|
||||
import { MenuItemGroup, MenuSectionTitle } from "@/base/components/Menu";
|
||||
import { Titlebar } from "@/base/components/Titlebar";
|
||||
import { pt, ut } from "@/base/i18n";
|
||||
import { ut } from "@/base/i18n";
|
||||
import log from "@/base/log";
|
||||
import {
|
||||
disableML,
|
||||
enableML,
|
||||
mlStatusSnapshot,
|
||||
mlStatusSubscribe,
|
||||
wipCluster,
|
||||
wipClusterEnable,
|
||||
type MLStatus,
|
||||
} from "@/new/photos/services/ml";
|
||||
@@ -27,7 +28,6 @@ import {
|
||||
type DialogProps,
|
||||
} from "@mui/material";
|
||||
import { t } from "i18next";
|
||||
import { useRouter } from "next/router";
|
||||
import React, { useEffect, useState, useSyncExternalStore } from "react";
|
||||
import { Trans } from "react-i18next";
|
||||
import type { NewAppContextPhotos } from "../types/context";
|
||||
@@ -316,7 +316,7 @@ const ManageML: React.FC<ManageMLProps> = ({
|
||||
break;
|
||||
case "clustering":
|
||||
// TODO-Cluster
|
||||
status = pt("Grouping faces");
|
||||
status = t("people");
|
||||
break;
|
||||
default:
|
||||
status = t("indexing_status_done");
|
||||
@@ -338,10 +338,6 @@ const ManageML: React.FC<ManageMLProps> = ({
|
||||
});
|
||||
};
|
||||
|
||||
// TODO-Cluster
|
||||
const router = useRouter();
|
||||
const wipClusterDebug = () => router.push("/cluster-debug");
|
||||
|
||||
return (
|
||||
<Stack px={"16px"} py={"20px"} gap={4}>
|
||||
<Stack gap={3}>
|
||||
@@ -392,12 +388,12 @@ const ManageML: React.FC<ManageMLProps> = ({
|
||||
label={ut(
|
||||
"Create clusters • internal only option",
|
||||
)}
|
||||
onClick={wipClusterDebug}
|
||||
onClick={() => void wipCluster()}
|
||||
/>
|
||||
</MenuItemGroup>
|
||||
<MenuSectionTitle
|
||||
title={ut(
|
||||
"Create and show in-memory clusters (not saved or synced). You can also view them in the search dropdown later.",
|
||||
"Create in-memory clusters (not saved or synced). You can also view them in the search dropdown later.",
|
||||
)}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
@@ -20,7 +20,9 @@ export const SearchPeopleList: React.FC<SearchPeopleListProps> = ({
|
||||
}) => {
|
||||
const isMobileWidth = useIsMobileWidth();
|
||||
return (
|
||||
<SearchPeopleContainer>
|
||||
<SearchPeopleContainer
|
||||
sx={{ justifyContent: people.length > 3 ? "center" : "start" }}
|
||||
>
|
||||
{people.slice(0, isMobileWidth ? 6 : 7).map((person) => (
|
||||
<SearchPeopleButton
|
||||
key={person.id}
|
||||
@@ -40,7 +42,6 @@ export const SearchPeopleList: React.FC<SearchPeopleListProps> = ({
|
||||
const SearchPeopleContainer = styled("div")`
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
margin-block: 12px;
|
||||
@@ -195,6 +196,10 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
|
||||
) : (
|
||||
<Skeleton
|
||||
variant="circular"
|
||||
animation="wave"
|
||||
sx={{
|
||||
backgroundColor: (theme) => theme.colors.background.elevated2,
|
||||
}}
|
||||
width={placeholderDimension}
|
||||
height={placeholderDimension}
|
||||
/>
|
||||
|
||||
@@ -419,6 +419,7 @@ const EmptyState: React.FC<EmptyStateProps> = ({
|
||||
label = t("indexing_fetching", mlStatus);
|
||||
break;
|
||||
case "clustering":
|
||||
// TODO-Cluster
|
||||
label = t("indexing_people", mlStatus);
|
||||
break;
|
||||
case "done":
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import { masterKeyFromSession } from "@/base/session-store";
|
||||
import { fileIDFromFaceID, wipClusterEnable } from ".";
|
||||
import { wipClusterEnable } from ".";
|
||||
import type { EnteFile } from "../../types/file";
|
||||
import { getLocalFiles } from "../files";
|
||||
import { pullCGroups } from "../user-entity";
|
||||
import type { FaceCluster } from "./cluster";
|
||||
import { getClusterGroups, getFaceIndexes } from "./db";
|
||||
import { fileIDFromFaceID } from "./face";
|
||||
|
||||
/**
|
||||
* A cgroup ("cluster group") is a group of clusters (possibly containing just a
|
||||
@@ -78,12 +79,15 @@ export interface CGroup {
|
||||
}
|
||||
|
||||
/**
|
||||
* A massaged version of {@link CGroup} suitable for being shown in the UI.
|
||||
* A massaged version of {@link CGroup} or a {@link FaceCluster} suitable for
|
||||
* being shown in the UI.
|
||||
*
|
||||
* We transform both both remote cluster groups and local-only face clusters
|
||||
* into the same "person" object that can be shown in the UI.
|
||||
*
|
||||
* The cgroups synced with remote do not directly correspond to "people".
|
||||
* CGroups represent both positive and negative feedback, where the negations
|
||||
* are specifically feedback meant so that we do not show the corresponding
|
||||
* cluster in the UI.
|
||||
* CGroups represent both positive and negative feedback (i.e, the user does not
|
||||
* wish a particular cluster group to be shown in the UI).
|
||||
*
|
||||
* So while each person has an underlying cgroups, not all cgroups have a
|
||||
* corresponding person.
|
||||
@@ -95,13 +99,15 @@ export interface CGroup {
|
||||
*/
|
||||
export interface Person {
|
||||
/**
|
||||
* Nanoid of the underlying {@link CGroup}.
|
||||
* Nanoid of the underlying {@link CGroup} or {@link FaceCluster}.
|
||||
*/
|
||||
id: string;
|
||||
/**
|
||||
* The name of the person.
|
||||
*
|
||||
* This will only be set for named cgroups.
|
||||
*/
|
||||
name: string;
|
||||
name: string | undefined;
|
||||
/**
|
||||
* IDs of the (unique) files in which this face occurs.
|
||||
*/
|
||||
@@ -117,22 +123,6 @@ export interface Person {
|
||||
displayFaceFile: EnteFile;
|
||||
}
|
||||
|
||||
// TODO-Cluster remove me
|
||||
/**
|
||||
* A {@link CGroup} annotated with various in-memory state to make it easier for
|
||||
* the upper layers of our code to directly use it.
|
||||
*/
|
||||
export type AnnotatedCGroup = CGroup & {
|
||||
/**
|
||||
* Locally determined ID of the "best" face that should be used as the
|
||||
* display face, to represent this cluster group in the UI.
|
||||
*
|
||||
* This property is not synced with remote. For more details, see
|
||||
* {@link avatarFaceID}.
|
||||
*/
|
||||
displayFaceID: string | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* Fetch existing cgroups for the user from remote and save them to DB.
|
||||
*/
|
||||
@@ -144,8 +134,12 @@ export const syncCGroups = async () => {
|
||||
await pullCGroups(masterKey);
|
||||
};
|
||||
|
||||
export type NamedPerson = Omit<Person, "name"> & {
|
||||
name: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Construct in-memory "people" from the cgroups present locally.
|
||||
* Construct in-memory {@link NamedPerson}s from the cgroups present locally.
|
||||
*
|
||||
* This function is meant to run after files, cgroups and faces have been synced
|
||||
* with remote. It then uses all the information in the local DBs to construct
|
||||
@@ -154,7 +148,7 @@ export const syncCGroups = async () => {
|
||||
* @return A list of {@link Person}s, sorted by the number of files that they
|
||||
* reference.
|
||||
*/
|
||||
export const updatedPeople = async () => {
|
||||
export const namedPeopleFromCGroups = async (): Promise<NamedPerson[]> => {
|
||||
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL) return [];
|
||||
if (!(await wipClusterEnable())) return [];
|
||||
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
import { Hdbscan, type DebugInfo } from "hdbscan";
|
||||
|
||||
/**
|
||||
* Each "cluster" is a list of indexes of the embeddings belonging to that
|
||||
* particular cluster.
|
||||
*/
|
||||
export type EmbeddingCluster = number[];
|
||||
|
||||
export interface ClusterHdbscanResult {
|
||||
clusters: EmbeddingCluster[];
|
||||
noise: number[];
|
||||
debugInfo?: DebugInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cluster the given {@link embeddings} using hdbscan.
|
||||
*/
|
||||
export const clusterHdbscan = (
|
||||
embeddings: number[][],
|
||||
): ClusterHdbscanResult => {
|
||||
const hdbscan = new Hdbscan({
|
||||
input: embeddings,
|
||||
minClusterSize: 3,
|
||||
minSamples: 5,
|
||||
clusterSelectionEpsilon: 0.6,
|
||||
clusterSelectionMethod: "leaf",
|
||||
debug: false,
|
||||
});
|
||||
|
||||
return {
|
||||
clusters: hdbscan.getClusters(),
|
||||
noise: hdbscan.getNoise(),
|
||||
debugInfo: hdbscan.getDebugInfo(),
|
||||
};
|
||||
};
|
||||
@@ -1,10 +1,15 @@
|
||||
import { assertionFailed } from "@/base/assert";
|
||||
import { newNonSecureID } from "@/base/id-worker";
|
||||
import log from "@/base/log";
|
||||
import { ensure } from "@/utils/ensure";
|
||||
import { wait } from "@/utils/promise";
|
||||
import type { EnteFile } from "../../types/file";
|
||||
import type { AnnotatedCGroup } from "./cgroups";
|
||||
import { faceDirection, type Face, type FaceIndex } from "./face";
|
||||
import type { Person } from "./cgroups";
|
||||
import {
|
||||
faceDirection,
|
||||
fileIDFromFaceID,
|
||||
type Face,
|
||||
type FaceIndex,
|
||||
} from "./face";
|
||||
import { dotProduct } from "./math";
|
||||
|
||||
/**
|
||||
@@ -26,24 +31,22 @@ export interface FaceCluster {
|
||||
faces: string[];
|
||||
}
|
||||
|
||||
export interface ClusteringOpts {
|
||||
minBlur: number;
|
||||
minScore: number;
|
||||
minClusterSize: number;
|
||||
joinThreshold: number;
|
||||
earlyExitThreshold: number;
|
||||
batchSize: number;
|
||||
offsetIncrement: number;
|
||||
badFaceHeuristics: boolean;
|
||||
}
|
||||
const clusteringOptions = {
|
||||
minBlur: 10,
|
||||
minScore: 0.8,
|
||||
minClusterSize: 2,
|
||||
joinThreshold: 0.76,
|
||||
earlyExitThreshold: 0.9,
|
||||
batchSize: 10000,
|
||||
offsetIncrement: 7500,
|
||||
badFaceHeuristics: true,
|
||||
};
|
||||
|
||||
export interface ClusteringProgress {
|
||||
completed: number;
|
||||
total: number;
|
||||
}
|
||||
|
||||
export type OnClusteringProgress = (progress: ClusteringProgress) => void;
|
||||
|
||||
/** A {@link Face} annotated with data needed during clustering. */
|
||||
export type ClusterFace = Omit<Face, "embedding"> & {
|
||||
embedding: Float32Array;
|
||||
@@ -65,12 +68,18 @@ export interface ClusterPreviewFace {
|
||||
* Generates clusters from the given faces using a batched form of linear
|
||||
* clustering, with a bit of lookback (and a dollop of heuristics) to get the
|
||||
* clusters to merge across batches.
|
||||
*
|
||||
* [Note: Draining the event loop during clustering]
|
||||
*
|
||||
* The clustering is a synchronous operation, but we make it async to
|
||||
* artificially drain the worker's event loop after each mini-batch so that
|
||||
* other interactions with the worker (where this code runs) do not get stalled
|
||||
* while clustering is in progress.
|
||||
*/
|
||||
export const clusterFaces = (
|
||||
export const clusterFaces = async (
|
||||
faceIndexes: FaceIndex[],
|
||||
localFiles: EnteFile[],
|
||||
opts: ClusteringOpts,
|
||||
onProgress: OnClusteringProgress,
|
||||
onProgress: (progress: ClusteringProgress) => void,
|
||||
) => {
|
||||
const {
|
||||
minBlur,
|
||||
@@ -81,7 +90,7 @@ export const clusterFaces = (
|
||||
batchSize,
|
||||
offsetIncrement,
|
||||
badFaceHeuristics,
|
||||
} = opts;
|
||||
} = clusteringOptions;
|
||||
const t = Date.now();
|
||||
|
||||
const localFileByID = new Map(localFiles.map((f) => [f.id, f]));
|
||||
@@ -137,7 +146,7 @@ export const clusterFaces = (
|
||||
clusters,
|
||||
};
|
||||
|
||||
const newState = clusterBatchLinear(
|
||||
const newState = await clusterBatchLinear(
|
||||
batch,
|
||||
oldState,
|
||||
joinThreshold,
|
||||
@@ -168,76 +177,18 @@ export const clusterFaces = (
|
||||
(a, b) => b.faces.length - a.faces.length,
|
||||
);
|
||||
|
||||
// Convert into the data structure we're using to debug/visualize.
|
||||
const clusterPreviewClusters =
|
||||
sortedClusters.length < 60
|
||||
? sortedClusters
|
||||
: sortedClusters.slice(0, 30).concat(sortedClusters.slice(-30));
|
||||
const clusterPreviews = clusterPreviewClusters.map((cluster) => {
|
||||
const faces = cluster.faces.map((id) => ensure(faceForFaceID.get(id)));
|
||||
const topFace = faces.reduce((top, face) =>
|
||||
top.score > face.score ? top : face,
|
||||
);
|
||||
const previewFaces: ClusterPreviewFace[] = faces.map((face) => {
|
||||
const csim = dotProduct(topFace.embedding, face.embedding);
|
||||
return { face, cosineSimilarity: csim, wasMerged: false };
|
||||
});
|
||||
return {
|
||||
clusterSize: cluster.faces.length,
|
||||
faces: previewFaces
|
||||
.sort((a, b) => b.cosineSimilarity - a.cosineSimilarity)
|
||||
.slice(0, 50),
|
||||
};
|
||||
});
|
||||
// TODO-Cluster
|
||||
// This isn't really part of the clustering, but help the main thread out by
|
||||
// pre-computing temporary in-memory people, one per cluster.
|
||||
const people = toPeople(sortedClusters, localFileByID, faceForFaceID);
|
||||
|
||||
// TODO-Cluster - Currently we're not syncing with remote or saving anything
|
||||
// locally, so cgroups will be empty. Create a temporary (unsaved, unsynced)
|
||||
// cgroup, one per cluster.
|
||||
|
||||
const cgroups: AnnotatedCGroup[] = [];
|
||||
for (const cluster of sortedClusters) {
|
||||
const faces = cluster.faces.map((id) => ensure(faceForFaceID.get(id)));
|
||||
const topFace = faces.reduce((top, face) =>
|
||||
top.score > face.score ? top : face,
|
||||
);
|
||||
cgroups.push({
|
||||
id: cluster.id,
|
||||
name: undefined,
|
||||
assigned: [cluster],
|
||||
isHidden: false,
|
||||
avatarFaceID: undefined,
|
||||
displayFaceID: topFace.faceID,
|
||||
});
|
||||
}
|
||||
|
||||
// TODO-Cluster the total face count is only needed during debugging
|
||||
let totalFaceCount = 0;
|
||||
for (const fi of faceIndexes) totalFaceCount += fi.faces.length;
|
||||
const filteredFaceCount = faces.length;
|
||||
const clusteredFaceCount = clusterIDForFaceID.size;
|
||||
const unclusteredFaceCount = filteredFaceCount - clusteredFaceCount;
|
||||
|
||||
const unclusteredFaces = faces.filter(
|
||||
({ faceID }) => !clusterIDForFaceID.has(faceID),
|
||||
);
|
||||
|
||||
const timeTakenMs = Date.now() - t;
|
||||
log.info(
|
||||
`Clustered ${faces.length} faces into ${sortedClusters.length} clusters, ${faces.length - clusterIDForFaceID.size} faces remain unclustered (${timeTakenMs} ms)`,
|
||||
`Generated ${sortedClusters.length} clusters from ${faces.length} faces (${clusteredFaceCount} clustered ${faces.length - clusteredFaceCount} unclustered) (${timeTakenMs} ms)`,
|
||||
);
|
||||
|
||||
return {
|
||||
totalFaceCount,
|
||||
filteredFaceCount,
|
||||
clusteredFaceCount,
|
||||
unclusteredFaceCount,
|
||||
localFileByID,
|
||||
clusterPreviews,
|
||||
clusters: sortedClusters,
|
||||
cgroups,
|
||||
unclusteredFaces: unclusteredFaces,
|
||||
timeTakenMs,
|
||||
};
|
||||
return { clusters: sortedClusters, people };
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -289,28 +240,13 @@ const isSidewaysFace = (face: Face) =>
|
||||
/** Generate a new cluster ID. */
|
||||
const newClusterID = () => newNonSecureID("cluster_");
|
||||
|
||||
/**
|
||||
* Extract the fileID of the {@link EnteFile} to which the face belongs from its
|
||||
* faceID.
|
||||
*
|
||||
* TODO-Cluster - duplicated with ml/index.ts
|
||||
*/
|
||||
const fileIDFromFaceID = (faceID: string) => {
|
||||
const fileID = parseInt(faceID.split("_")[0] ?? "");
|
||||
if (isNaN(fileID)) {
|
||||
assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
|
||||
return undefined;
|
||||
}
|
||||
return fileID;
|
||||
};
|
||||
|
||||
interface ClusteringState {
|
||||
clusterIDForFaceID: Map<string, string>;
|
||||
clusterIndexForFaceID: Map<string, number>;
|
||||
clusters: FaceCluster[];
|
||||
}
|
||||
|
||||
const clusterBatchLinear = (
|
||||
const clusterBatchLinear = async (
|
||||
faces: ClusterFace[],
|
||||
oldState: ClusteringState,
|
||||
joinThreshold: number,
|
||||
@@ -331,7 +267,11 @@ const clusterBatchLinear = (
|
||||
|
||||
// For each face in the batch
|
||||
for (const [i, fi] of faces.entries()) {
|
||||
if (i % 100 == 0) onProgress({ completed: i, total: faces.length });
|
||||
if (i % 100 == 0) {
|
||||
onProgress({ completed: i, total: faces.length });
|
||||
// See: [Note: Draining the event loop during clustering]
|
||||
await wait(0);
|
||||
}
|
||||
|
||||
// If the face is already part of a cluster, then skip it.
|
||||
if (state.clusterIDForFaceID.has(fi.faceID)) continue;
|
||||
@@ -385,3 +325,43 @@ const clusterBatchLinear = (
|
||||
|
||||
return state;
|
||||
};
|
||||
|
||||
/**
|
||||
* Construct a {@link Person} object for each cluster.
|
||||
*/
|
||||
const toPeople = (
|
||||
clusters: FaceCluster[],
|
||||
localFileByID: Map<number, EnteFile>,
|
||||
faceForFaceID: Map<string, ClusterFace>,
|
||||
): Person[] =>
|
||||
clusters
|
||||
.map((cluster) => {
|
||||
const faces = cluster.faces.map((id) =>
|
||||
ensure(faceForFaceID.get(id)),
|
||||
);
|
||||
|
||||
const faceIDs = cluster.faces;
|
||||
const fileIDs = faceIDs.map((faceID) =>
|
||||
ensure(fileIDFromFaceID(faceID)),
|
||||
);
|
||||
|
||||
const topFace = faces.reduce((top, face) =>
|
||||
top.score > face.score ? top : face,
|
||||
);
|
||||
|
||||
const displayFaceID = topFace.faceID;
|
||||
const displayFaceFileID = ensure(fileIDFromFaceID(displayFaceID));
|
||||
const displayFaceFile = ensure(
|
||||
localFileByID.get(displayFaceFileID),
|
||||
);
|
||||
|
||||
return {
|
||||
id: cluster.id,
|
||||
name: undefined,
|
||||
faceIDs,
|
||||
fileIDs: [...new Set(fileIDs)],
|
||||
displayFaceID,
|
||||
displayFaceFile,
|
||||
};
|
||||
})
|
||||
.sort((a, b) => b.faceIDs.length - a.faceIDs.length);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
//
|
||||
/* eslint-disable @typescript-eslint/no-non-null-assertion */
|
||||
|
||||
import { assertionFailed } from "@/base/assert";
|
||||
import type { ElectronMLWorker } from "@/base/types/ipc";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { Matrix } from "ml-matrix";
|
||||
@@ -149,7 +150,7 @@ export interface Face {
|
||||
* Finally, this face ID is not completely opaque. It consists of underscore
|
||||
* separated components, the first of which is the ID of the
|
||||
* {@link EnteFile} to which this face belongs. Client code can rely on this
|
||||
* structure and can parse it if needed.
|
||||
* structure and can parse it if needed using {@link fileIDFromFaceID}.
|
||||
*/
|
||||
faceID: string;
|
||||
/**
|
||||
@@ -228,6 +229,19 @@ export interface Box {
|
||||
height: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the fileID of the {@link EnteFile} to which the face belongs from its
|
||||
* faceID.
|
||||
*/
|
||||
export const fileIDFromFaceID = (faceID: string) => {
|
||||
const fileID = parseInt(faceID.split("_")[0] ?? "");
|
||||
if (isNaN(fileID)) {
|
||||
assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
|
||||
return undefined;
|
||||
}
|
||||
return fileID;
|
||||
};
|
||||
|
||||
/**
|
||||
* Index faces in the given file.
|
||||
*
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
*/
|
||||
|
||||
import { isDesktop } from "@/base/app";
|
||||
import { assertionFailed } from "@/base/assert";
|
||||
import { blobCache } from "@/base/blob-cache";
|
||||
import { ensureElectron } from "@/base/electron";
|
||||
import { isDevBuild } from "@/base/env";
|
||||
@@ -19,14 +18,7 @@ import { isInternalUser } from "../feature-flags";
|
||||
import { getRemoteFlag, updateRemoteFlag } from "../remote-store";
|
||||
import { setSearchPeople } from "../search";
|
||||
import type { UploadItem } from "../upload/types";
|
||||
import { syncCGroups, updatedPeople, type Person } from "./cgroups";
|
||||
import {
|
||||
type ClusterFace,
|
||||
type ClusteringOpts,
|
||||
type ClusterPreviewFace,
|
||||
type FaceCluster,
|
||||
type OnClusteringProgress,
|
||||
} from "./cluster";
|
||||
import { namedPeopleFromCGroups, syncCGroups, type Person } from "./cgroups";
|
||||
import { regenerateFaceCrops } from "./crop";
|
||||
import { clearMLDB, getFaceIndex, getIndexableAndIndexedCounts } from "./db";
|
||||
import { MLWorker } from "./worker";
|
||||
@@ -59,6 +51,11 @@ class MLState {
|
||||
*/
|
||||
comlinkWorker: Promise<ComlinkWorker<typeof MLWorker>> | undefined;
|
||||
|
||||
/**
|
||||
* `true` if a sync is currently in progress.
|
||||
*/
|
||||
isSyncing = false;
|
||||
|
||||
/**
|
||||
* Subscriptions to {@link MLStatus} updates.
|
||||
*
|
||||
@@ -85,6 +82,20 @@ class MLState {
|
||||
*/
|
||||
peopleSnapshot: Person[] | undefined;
|
||||
|
||||
/**
|
||||
* Cached in-memory copy of people generated from local clusters.
|
||||
*
|
||||
* Part of {@link peopleSnapshot}.
|
||||
*/
|
||||
peopleLocal: Person[] = [];
|
||||
|
||||
/**
|
||||
* Cached in-memory copy of people generated from remote cgroups.
|
||||
*
|
||||
* Part of {@link peopleSnapshot}.
|
||||
*/
|
||||
peopleRemote: Person[] = [];
|
||||
|
||||
/**
|
||||
* In flight face crop regeneration promises indexed by the IDs of the files
|
||||
* whose faces we are regenerating.
|
||||
@@ -101,9 +112,7 @@ const worker = () =>
|
||||
|
||||
const createComlinkWorker = async () => {
|
||||
const electron = ensureElectron();
|
||||
const delegate = {
|
||||
workerDidProcessFileOrIdle,
|
||||
};
|
||||
const delegate = { workerDidUpdateStatus };
|
||||
|
||||
// Obtain a message port from the Electron layer.
|
||||
const messagePort = await createMLWorker(electron);
|
||||
@@ -223,6 +232,7 @@ export const disableML = async () => {
|
||||
await updateIsMLEnabledRemote(false);
|
||||
setIsMLEnabledLocal(false);
|
||||
_state.isMLEnabled = false;
|
||||
_state.isSyncing = false;
|
||||
await terminateMLWorker();
|
||||
triggerStatusUpdate();
|
||||
};
|
||||
@@ -304,11 +314,36 @@ export const mlStatusSync = async () => {
|
||||
* least once prior to calling this in the sync sequence.
|
||||
*/
|
||||
export const mlSync = async () => {
|
||||
if (_state.isMLEnabled) {
|
||||
await Promise.all([worker().then((w) => w.sync()), syncCGroups()]).then(
|
||||
updatePeople,
|
||||
);
|
||||
}
|
||||
if (!_state.isMLEnabled) return;
|
||||
if (_state.isSyncing) return;
|
||||
_state.isSyncing = true;
|
||||
|
||||
// Dependency order for the sync
|
||||
//
|
||||
// files -> faces -> cgroups -> clusters
|
||||
//
|
||||
|
||||
// Fetch indexes, or index locally if needed.
|
||||
await worker().then((w) => w.sync());
|
||||
|
||||
// Fetch existing cgroups.
|
||||
await syncCGroups();
|
||||
|
||||
// Generate local clusters
|
||||
// TODO-Cluster
|
||||
// Warning - this is heavily WIP
|
||||
wipClusterLocalOnce();
|
||||
|
||||
// Update our in-memory snapshot of people.
|
||||
const namedPeople = await namedPeopleFromCGroups();
|
||||
_state.peopleRemote = namedPeople;
|
||||
updatePeopleSnapshot();
|
||||
|
||||
// Notify the search subsystem of the update. Since the search only used
|
||||
// named cgroups, we only give it the people we got from cgroups.
|
||||
setSearchPeople(namedPeople);
|
||||
|
||||
_state.isSyncing = false;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -343,120 +378,25 @@ export const wipClusterEnable = async (): Promise<boolean> =>
|
||||
(await isInternalUser());
|
||||
|
||||
// // TODO-Cluster temporary state here
|
||||
let _wip_isClustering = false;
|
||||
let _wip_peopleLocal: Person[] | undefined;
|
||||
let _wip_peopleRemote: Person[] | undefined;
|
||||
let _wip_hasSwitchedOnce = false;
|
||||
|
||||
export const wipHasSwitchedOnceCmpAndSet = () => {
|
||||
if (_wip_hasSwitchedOnce) return true;
|
||||
export const wipClusterLocalOnce = () => {
|
||||
if (!process.env.NEXT_PUBLIC_ENTE_WIP_CL_AUTO) return;
|
||||
if (_wip_hasSwitchedOnce) return;
|
||||
_wip_hasSwitchedOnce = true;
|
||||
return false;
|
||||
void wipCluster();
|
||||
};
|
||||
|
||||
export interface ClusterPreviewWithFile {
|
||||
clusterSize: number;
|
||||
faces: ClusterPreviewFaceWithFile[];
|
||||
}
|
||||
|
||||
export type ClusterPreviewFaceWithFile = ClusterPreviewFace & {
|
||||
enteFile: EnteFile;
|
||||
};
|
||||
|
||||
export interface ClusterDebugPageContents {
|
||||
totalFaceCount: number;
|
||||
filteredFaceCount: number;
|
||||
clusteredFaceCount: number;
|
||||
unclusteredFaceCount: number;
|
||||
timeTakenMs: number;
|
||||
clusters: FaceCluster[];
|
||||
clusterPreviewsWithFile: ClusterPreviewWithFile[];
|
||||
unclusteredFacesWithFile: {
|
||||
face: ClusterFace;
|
||||
enteFile: EnteFile;
|
||||
}[];
|
||||
}
|
||||
|
||||
export const wipClusterDebugPageContents = async (
|
||||
opts: ClusteringOpts,
|
||||
onProgress: OnClusteringProgress,
|
||||
): Promise<ClusterDebugPageContents> => {
|
||||
export const wipCluster = async () => {
|
||||
if (!(await wipClusterEnable())) throw new Error("Not implemented");
|
||||
|
||||
log.info("clustering", opts);
|
||||
_wip_isClustering = true;
|
||||
_wip_peopleLocal = undefined;
|
||||
triggerStatusUpdate();
|
||||
|
||||
const {
|
||||
localFileByID,
|
||||
clusterPreviews,
|
||||
clusters,
|
||||
cgroups,
|
||||
unclusteredFaces,
|
||||
...rest
|
||||
} = await worker().then((w) => w.clusterFaces(opts, proxy(onProgress)));
|
||||
const { people } = await worker().then((w) => w.clusterFaces());
|
||||
|
||||
const fileForFace = ({ faceID }: { faceID: string }) =>
|
||||
ensure(localFileByID.get(ensure(fileIDFromFaceID(faceID))));
|
||||
|
||||
const clusterPreviewsWithFile = clusterPreviews.map(
|
||||
({ clusterSize, faces }) => ({
|
||||
clusterSize,
|
||||
faces: faces.map(({ face, ...rest }) => ({
|
||||
face,
|
||||
enteFile: fileForFace(face),
|
||||
...rest,
|
||||
})),
|
||||
}),
|
||||
);
|
||||
|
||||
const unclusteredFacesWithFile = unclusteredFaces.map((face) => ({
|
||||
face,
|
||||
enteFile: fileForFace(face),
|
||||
}));
|
||||
|
||||
const clusterByID = new Map(clusters.map((c) => [c.id, c]));
|
||||
|
||||
const people = cgroups
|
||||
// TODO-Cluster
|
||||
.map((cgroup) => ({ ...cgroup, name: cgroup.id }))
|
||||
.map((cgroup) => {
|
||||
if (!cgroup.name) return undefined;
|
||||
const faceID = ensure(cgroup.displayFaceID);
|
||||
const fileID = ensure(fileIDFromFaceID(faceID));
|
||||
const file = ensure(localFileByID.get(fileID));
|
||||
|
||||
const faceIDs = cgroup.assigned
|
||||
.map(({ id }) => ensure(clusterByID.get(id)))
|
||||
.flatMap((cluster) => cluster.faces);
|
||||
const fileIDs = faceIDs
|
||||
.map((faceID) => fileIDFromFaceID(faceID))
|
||||
.filter((fileID) => fileID !== undefined);
|
||||
|
||||
return {
|
||||
id: cgroup.id,
|
||||
name: cgroup.name,
|
||||
faceIDs,
|
||||
fileIDs: [...new Set(fileIDs)],
|
||||
displayFaceID: faceID,
|
||||
displayFaceFile: file,
|
||||
};
|
||||
})
|
||||
.filter((c) => !!c)
|
||||
.sort((a, b) => b.faceIDs.length - a.faceIDs.length);
|
||||
|
||||
_wip_isClustering = false;
|
||||
_wip_peopleLocal = people;
|
||||
_state.peopleLocal = people;
|
||||
updatePeopleSnapshot();
|
||||
triggerStatusUpdate();
|
||||
setPeopleSnapshot((_wip_peopleRemote ?? []).concat(people));
|
||||
|
||||
return {
|
||||
clusters,
|
||||
clusterPreviewsWithFile,
|
||||
unclusteredFacesWithFile,
|
||||
...rest,
|
||||
};
|
||||
};
|
||||
|
||||
export type MLStatus =
|
||||
@@ -545,6 +485,19 @@ const setMLStatusSnapshot = (snapshot: MLStatus) => {
|
||||
const getMLStatus = async (): Promise<MLStatus> => {
|
||||
if (!_state.isMLEnabled) return { phase: "disabled" };
|
||||
|
||||
const w = await worker();
|
||||
|
||||
// The worker has a clustering progress set iff it is clustering. This
|
||||
// overrides other behaviours.
|
||||
const clusteringProgress = await w.clusteringProgess;
|
||||
if (clusteringProgress) {
|
||||
return {
|
||||
phase: "clustering",
|
||||
nSyncedFiles: clusteringProgress.completed,
|
||||
nTotalFiles: clusteringProgress.total,
|
||||
};
|
||||
}
|
||||
|
||||
const { indexedCount, indexableCount } =
|
||||
await getIndexableAndIndexedCounts();
|
||||
|
||||
@@ -556,11 +509,9 @@ const getMLStatus = async (): Promise<MLStatus> => {
|
||||
// indexable count.
|
||||
|
||||
let phase: MLStatus["phase"];
|
||||
const state = await (await worker()).state;
|
||||
const state = await w.state;
|
||||
if (state == "indexing" || state == "fetching") {
|
||||
phase = state;
|
||||
} else if (_wip_isClustering) {
|
||||
phase = "clustering";
|
||||
} else if (state == "init" || indexableCount > 0) {
|
||||
phase = "scheduled";
|
||||
} else {
|
||||
@@ -596,7 +547,7 @@ const setInterimScheduledStatus = () => {
|
||||
setMLStatusSnapshot({ phase: "scheduled", nSyncedFiles, nTotalFiles });
|
||||
};
|
||||
|
||||
const workerDidProcessFileOrIdle = throttled(updateMLStatusSnapshot, 2000);
|
||||
const workerDidUpdateStatus = throttled(updateMLStatusSnapshot, 2000);
|
||||
|
||||
/**
|
||||
* A function that can be used to subscribe to updates to {@link Person}s.
|
||||
@@ -631,22 +582,14 @@ export const peopleSubscribe = (onChange: () => void): (() => void) => {
|
||||
*/
|
||||
export const peopleSnapshot = () => _state.peopleSnapshot;
|
||||
|
||||
const updatePeopleSnapshot = () =>
|
||||
setPeopleSnapshot(_state.peopleRemote.concat(_state.peopleLocal));
|
||||
|
||||
const setPeopleSnapshot = (snapshot: Person[] | undefined) => {
|
||||
_state.peopleSnapshot = snapshot;
|
||||
_state.peopleListeners.forEach((l) => l());
|
||||
};
|
||||
|
||||
/**
|
||||
* Update our in-memory snapshot of people, also notifying the search subsystem
|
||||
* of the update.
|
||||
*/
|
||||
const updatePeople = async () => {
|
||||
const people = await updatedPeople();
|
||||
_wip_peopleRemote = people;
|
||||
setPeopleSnapshot(people.concat(_wip_peopleLocal ?? []));
|
||||
setSearchPeople(people);
|
||||
};
|
||||
|
||||
/**
|
||||
* Use CLIP to perform a natural language search over image embeddings.
|
||||
*
|
||||
@@ -674,20 +617,6 @@ export const unidentifiedFaceIDs = async (
|
||||
return index?.faces.map((f) => f.faceID) ?? [];
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract the fileID of the {@link EnteFile} to which the face belongs from its
|
||||
* faceID.
|
||||
*/
|
||||
// TODO-Cluster
|
||||
export const fileIDFromFaceID = (faceID: string) => {
|
||||
const fileID = parseInt(faceID.split("_")[0] ?? "");
|
||||
if (isNaN(fileID)) {
|
||||
assertionFailed(`Ignoring attempt to parse invalid faceID ${faceID}`);
|
||||
return undefined;
|
||||
}
|
||||
return fileID;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return the cached face crop for the given face, regenerating it if needed.
|
||||
*
|
||||
|
||||
@@ -3,15 +3,16 @@
|
||||
*/
|
||||
|
||||
/**
|
||||
* Callbacks invoked by the worker at various points in the indexing pipeline to
|
||||
* notify the main thread of events it might be interested in.
|
||||
* Callbacks invoked by the worker at various points in the indexing and
|
||||
* clustering pipeline to notify the main thread of events it might be
|
||||
* interested in.
|
||||
*/
|
||||
export interface MLWorkerDelegate {
|
||||
/**
|
||||
* Called whenever the worker processes a file during indexing (either
|
||||
* successfully or with errors), or when in goes into the "idle" state.
|
||||
* Called whenever the worker does some action that might need the UI state
|
||||
* indicating the indexing or clustering status to be updated.
|
||||
*/
|
||||
workerDidProcessFileOrIdle: () => void;
|
||||
workerDidUpdateStatus: () => void;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -25,11 +25,7 @@ import {
|
||||
indexCLIP,
|
||||
type CLIPIndex,
|
||||
} from "./clip";
|
||||
import {
|
||||
clusterFaces,
|
||||
type ClusteringOpts,
|
||||
type OnClusteringProgress,
|
||||
} from "./cluster";
|
||||
import { clusterFaces, type ClusteringProgress } from "./cluster";
|
||||
import { saveFaceCrops } from "./crop";
|
||||
import {
|
||||
getFaceIndexes,
|
||||
@@ -101,6 +97,8 @@ interface IndexableItem {
|
||||
export class MLWorker {
|
||||
/** The last known state of the worker. */
|
||||
public state: WorkerState = "init";
|
||||
/** If the worker is currently clustering, then its last known progress. */
|
||||
public clusteringProgess: ClusteringProgress | undefined;
|
||||
|
||||
private electron: ElectronMLWorker | undefined;
|
||||
private delegate: MLWorkerDelegate | undefined;
|
||||
@@ -246,7 +244,7 @@ export class MLWorker {
|
||||
this.state = "idle";
|
||||
this.idleDuration = Math.min(this.idleDuration * 2, idleDurationMax);
|
||||
this.idleTimeout = setTimeout(scheduleTick, this.idleDuration * 1000);
|
||||
this.delegate?.workerDidProcessFileOrIdle();
|
||||
this.delegate?.workerDidUpdateStatus();
|
||||
}
|
||||
|
||||
/** Return the next batch of items to backfill (if any). */
|
||||
@@ -280,14 +278,25 @@ export class MLWorker {
|
||||
}));
|
||||
}
|
||||
|
||||
// TODO-Cluster
|
||||
async clusterFaces(opts: ClusteringOpts, onProgress: OnClusteringProgress) {
|
||||
return clusterFaces(
|
||||
/**
|
||||
* Run face clustering on all faces.
|
||||
*
|
||||
* This should only be invoked when the face indexing (including syncing
|
||||
* with remote) is complete so that we cluster the latest set of faces.
|
||||
*/
|
||||
async clusterFaces() {
|
||||
const result = await clusterFaces(
|
||||
await getFaceIndexes(),
|
||||
await getAllLocalFiles(),
|
||||
opts,
|
||||
onProgress,
|
||||
(progress) => this.updateClusteringProgress(progress),
|
||||
);
|
||||
this.updateClusteringProgress(undefined);
|
||||
return result;
|
||||
}
|
||||
|
||||
private updateClusteringProgress(progress: ClusteringProgress | undefined) {
|
||||
this.clusteringProgess = progress;
|
||||
this.delegate?.workerDidUpdateStatus();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,7 +356,7 @@ const indexNextBatch = async (
|
||||
await Promise.race(tasks);
|
||||
|
||||
// Let the main thread now we're doing something.
|
||||
delegate?.workerDidProcessFileOrIdle();
|
||||
delegate?.workerDidUpdateStatus();
|
||||
|
||||
// Let us drain the microtask queue. This also gives a chance for other
|
||||
// interactive tasks like `clipMatches` to run.
|
||||
|
||||
@@ -4,7 +4,7 @@ import { ComlinkWorker } from "@/base/worker/comlink-worker";
|
||||
import { FileType } from "@/media/file-type";
|
||||
import i18n, { t } from "i18next";
|
||||
import { clipMatches, isMLEnabled, isMLSupported } from "../ml";
|
||||
import type { Person } from "../ml/cgroups";
|
||||
import type { NamedPerson } from "../ml/cgroups";
|
||||
import type {
|
||||
LabelledFileType,
|
||||
LabelledSearchDateComponents,
|
||||
@@ -58,9 +58,9 @@ export const setSearchCollectionsAndFiles = (cf: SearchCollectionsAndFiles) =>
|
||||
void worker().then((w) => w.setCollectionsAndFiles(cf));
|
||||
|
||||
/**
|
||||
* Set the people that we should search across.
|
||||
* Set the (named) people that we should search across.
|
||||
*/
|
||||
export const setSearchPeople = (people: Person[]) =>
|
||||
export const setSearchPeople = (people: NamedPerson[]) =>
|
||||
void worker().then((w) => w.setPeople(people));
|
||||
|
||||
/**
|
||||
|
||||
@@ -2,7 +2,7 @@ import { HTTPError } from "@/base/http";
|
||||
import type { Location } from "@/base/types";
|
||||
import type { Collection } from "@/media/collection";
|
||||
import { fileCreationPhotoDate, fileLocation } from "@/media/file-metadata";
|
||||
import type { Person } from "@/new/photos/services/ml/cgroups";
|
||||
import type { NamedPerson } from "@/new/photos/services/ml/cgroups";
|
||||
import type { EnteFile } from "@/new/photos/types/file";
|
||||
import { ensure } from "@/utils/ensure";
|
||||
import { nullToUndefined } from "@/utils/transform";
|
||||
@@ -37,7 +37,7 @@ export class SearchWorker {
|
||||
collections: [],
|
||||
files: [],
|
||||
};
|
||||
private people: Person[] = [];
|
||||
private people: NamedPerson[] = [];
|
||||
|
||||
/**
|
||||
* Fetch any state we might need when the actual search happens.
|
||||
@@ -62,9 +62,9 @@ export class SearchWorker {
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the people that we should search across.
|
||||
* Set the (named) people that we should search across.
|
||||
*/
|
||||
setPeople(people: Person[]) {
|
||||
setPeople(people: NamedPerson[]) {
|
||||
this.people = people;
|
||||
}
|
||||
|
||||
@@ -122,7 +122,7 @@ const suggestionsForString = (
|
||||
re: RegExp,
|
||||
searchString: string,
|
||||
{ collections, files }: SearchCollectionsAndFiles,
|
||||
people: Person[],
|
||||
people: NamedPerson[],
|
||||
{ locale, holidays, labelledFileTypes }: LocalizedSearchData,
|
||||
locationTags: LocationTag[],
|
||||
cities: City[],
|
||||
@@ -196,7 +196,10 @@ const fileCaptionSuggestion = (
|
||||
: [];
|
||||
};
|
||||
|
||||
const peopleSuggestions = (re: RegExp, people: Person[]): SearchSuggestion[] =>
|
||||
const peopleSuggestions = (
|
||||
re: RegExp,
|
||||
people: NamedPerson[],
|
||||
): SearchSuggestion[] =>
|
||||
people
|
||||
.filter((p) => re.test(p.name))
|
||||
.map((person) => ({ type: "person", person, label: person.name }));
|
||||
|
||||
@@ -2671,13 +2671,6 @@ hasown@^2.0.0, hasown@^2.0.1, hasown@^2.0.2:
|
||||
dependencies:
|
||||
function-bind "^1.1.2"
|
||||
|
||||
hdbscan@0.0.1-alpha.5:
|
||||
version "0.0.1-alpha.5"
|
||||
resolved "https://registry.yarnpkg.com/hdbscan/-/hdbscan-0.0.1-alpha.5.tgz#8b0cd45243fa60d2fe83e31f1e8bc939ff374c0d"
|
||||
integrity sha512-Jv92UaFFRAMcK8GKhyxlSGvkf5pf9Y9HpmRQyyWfWop5nm2zs2NmgGG3wOCYo5zy1AeZFtVJjgbpaPjR0IsR/Q==
|
||||
dependencies:
|
||||
kd-tree-javascript "^1.0.3"
|
||||
|
||||
heic-convert@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/heic-convert/-/heic-convert-2.1.0.tgz#7f764529e37591ae263ef49582d1d0c13491526e"
|
||||
@@ -3121,11 +3114,6 @@ jszip@^3.10.1:
|
||||
readable-stream "~2.3.6"
|
||||
setimmediate "^1.0.5"
|
||||
|
||||
kd-tree-javascript@^1.0.3:
|
||||
version "1.0.3"
|
||||
resolved "https://registry.yarnpkg.com/kd-tree-javascript/-/kd-tree-javascript-1.0.3.tgz#ab5239ed44e347e10065590fd479e947bedff96c"
|
||||
integrity sha512-7oSugmaxTCJFqey11rlTSEQD3hGDnRgROMj9MEREvDGV8SlIFwN7x3jJRyFoi+mjO0+4wuSuaDLS1reNQHP7uA==
|
||||
|
||||
keyv@^4.5.3:
|
||||
version "4.5.4"
|
||||
resolved "https://registry.yarnpkg.com/keyv/-/keyv-4.5.4.tgz#a879a99e29452f942439f2a405e3af8b31d4de93"
|
||||
|
||||
Reference in New Issue
Block a user