From 5d0926ebb2d0ab9d140af8a7dbada1c3787db37b Mon Sep 17 00:00:00 2001 From: Manav Rathi Date: Tue, 25 Jun 2024 12:16:23 +0530 Subject: [PATCH] Impl --- desktop/src/main/services/zip.ts | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 desktop/src/main/services/zip.ts diff --git a/desktop/src/main/services/zip.ts b/desktop/src/main/services/zip.ts new file mode 100644 index 0000000000..8cab702392 --- /dev/null +++ b/desktop/src/main/services/zip.ts @@ -0,0 +1,38 @@ +import { LRUCache } from "lru-cache"; +import StreamZip from "node-stream-zip"; + +const _cache = new LRUCache({ max: 50 }); + +/** + * Cached `StreamZip.async`s + * + * This function uses an LRU cache to cache handles to zip files indexed by + * their path. + * + * To clear the cache (which is a good idea to avoid having open file handles + * lying around), use {@link clearOpenZipCache}. + * + * Why was this needed + * ------------------- + * + * Caching the StreamZip file handles _significantly_ (hours => seconds) + * improves the performance of the metadata parsing step during import of large + * Google Takeout zips. + * + * In ad-hoc tests, it seems that beyond a certain zip size (few GBs), reopening + * the handle to a stream zip overshadows the time taken to read the individual + * JSONs. + */ +export const openZip = (zipPath: string) => { + let result = _cache.get(zipPath); + if (!result) { + result = new StreamZip.async({ file: zipPath }); + _cache.set(zipPath, result); + } + return result; +}; + +/** + * Clear any entries previously cached by {@link openZip}. + */ +export const clearOpenZipCache = () => _cache.clear();