Skip to content

Commit 88839ce

Browse files
committed
Updates... πŸ‘€
1 parent 72e2b48 commit 88839ce

File tree

12 files changed

+1306
-925
lines changed

12 files changed

+1306
-925
lines changed

β€Žbin/cache-checksums.jsβ€Ž

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict';
2+
3+
/*
4+
* Saves checksums for all image files under process.env.FOLDER_PATH in Redis.
5+
*/
6+
7+
const pcloudSdk = require('pcloud-sdk-js');
8+
9+
const {getFilesRecursive} = require('../lib/iter');
10+
const RedisCacher = require('../lib/redis-cacher');
11+
12+
const accessToken = process.env.ACCESS_TOKEN;
13+
const path = process.env.FOLDER_PATH;
14+
15+
const pClient = pcloudSdk.createClient(accessToken);
16+
const rCacher = new RedisCacher({pCloudClient: pClient});
17+
18+
async function run() {
19+
const response = await pClient.api('listfolder', {params: {path, recursive: 1}});
20+
const files = getFilesRecursive(response.metadata);
21+
await rCacher.cacheChecksums(files);
22+
}
23+
24+
run().catch(error => {
25+
console.error(error);
26+
}).finally(() => {
27+
rCacher.quit();
28+
});

β€Žbin/intersects-checksums.jsβ€Ž

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
const {promisify} = require('util');
2+
const pcloudSdk = require('pcloud-sdk-js');
3+
const pMap = require('p-map');
4+
const redis = require('redis');
5+
6+
const pClient = pcloudSdk.createClient(process.env.ACCESS_TOKEN);
7+
const rClient = redis.createClient();
8+
const redisHmget = promisify(rClient.hmget).bind(rClient);
9+
10+
function intersects(a, b) {
11+
for (const v of a.values()) {
12+
if (b.has(v)) {
13+
return true;
14+
}
15+
}
16+
17+
return false;
18+
}
19+
20+
function mergeNames(hashesPerName) {
21+
const result = [];
22+
for (const {name, hashes} of hashesPerName) {
23+
const resultItem = result.find(i => intersects(hashes, i.hashUnion));
24+
if (!resultItem) {
25+
result.push({names: [name], hashUnion: hashes});
26+
continue;
27+
}
28+
29+
resultItem.names.push(name);
30+
hashes.forEach(h => resultItem.hashUnion.add(h));
31+
}
32+
33+
return result;
34+
}
35+
36+
async function getChecksum(file) {
37+
const key = file.fileid.toString();
38+
const [checksum] = await redisHmget('checksums', key);
39+
if (checksum === null) {
40+
throw new Error(`File ${file.name}'s checksum is not cached`);
41+
}
42+
43+
return [file.name, checksum];
44+
}
45+
46+
async function run() {
47+
const folderId = Number.parseInt(process.env.FOLDER_ID, 10);
48+
const allFiles = await pClient.listfolder(folderId, {recursive: true});
49+
const goodFolders = allFiles.contents.filter(file => file.isfolder);
50+
51+
const hashesPerName = await pMap(goodFolders, async folder => {
52+
const name = folder.name;
53+
const fileList = folder.contents.filter(f => f.category === 1);
54+
const checksumList = await pMap(fileList, async f => getChecksum(f), {concurrency: 5});
55+
console.log('βœ“', name);
56+
const hashes = new Set(checksumList.map(c => c[1]));
57+
return {name, hashes};
58+
}, {concurrency: 2});
59+
60+
const nonSingleMergedNames = mergeNames(hashesPerName);
61+
const mergedNames = nonSingleMergedNames.filter(
62+
({names}) => names.length > 1
63+
);
64+
65+
const onlyNames = mergedNames.map(({names}) => names);
66+
67+
console.log(onlyNames);
68+
}
69+
70+
run().catch(error => {
71+
console.error(error);
72+
}).finally(() => {
73+
rClient.quit();
74+
});

β€Žintersects.jsβ€Ž renamed to β€Žbin/intersects-hashes.jsβ€Ž

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ function intersects(a, b) {
1616
function mergeNames(hashesPerName) {
1717
const result = [];
1818
for (const {name, hashes} of hashesPerName) {
19-
const resultItem = result.find((i) => intersects(hashes, i.hashUnion));
19+
const resultItem = result.find(i => intersects(hashes, i.hashUnion));
2020
if (!resultItem) {
2121
result.push({names: [name], hashUnion: hashes});
2222
continue;
2323
}
2424

2525
resultItem.names.push(name);
26-
hashes.forEach((h) => resultItem.hashUnion.add(h));
26+
hashes.forEach(h => resultItem.hashUnion.add(h));
2727
}
2828

2929
return result;
@@ -32,15 +32,15 @@ function mergeNames(hashesPerName) {
3232
async function run() {
3333
const folderId = Number.parseInt(process.env.FOLDER_ID, 10);
3434
const allFiles = await client.listfolder(folderId, {recursive: true});
35-
const goodFolders = allFiles.contents.filter((f) =>
35+
const goodFolders = allFiles.contents.filter(f =>
3636
f.name.match(/^[\w-]+ \d+$/)
3737
);
3838

39-
const hashesPerName = await pMap(goodFolders, async (f) => {
39+
const hashesPerName = await pMap(goodFolders, async f => {
4040
const name = f.name;
4141
const hashList = f.contents
42-
.filter((f) => f.category === 1)
43-
.map((f) => f.hash);
42+
.filter(f => f.category === 1)
43+
.map(f => f.hash);
4444
const hashes = new Set(hashList);
4545
return {name, hashes};
4646
});
@@ -57,6 +57,6 @@ async function run() {
5757
console.log(onlyNames);
5858
}
5959

60-
run().catch((error) => {
60+
run().catch(error => {
6161
console.error(error);
6262
});

β€Žbin/rm-dups.jsβ€Ž

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Finds duplicates within folders, recursively
2+
3+
const pcloudSdk = require('pcloud-sdk-js');
4+
const pMap = require('p-map');
5+
const delay = require('delay');
6+
7+
const {getFoldersRecursive} = require('../lib/iter');
8+
9+
const client = pcloudSdk.createClient(process.env.ACCESS_TOKEN);
10+
11+
function groupByHash(files) {
12+
const hashToFilesMap = new Map();
13+
for (const file of files) {
14+
const fileGroup = hashToFilesMap.get(file.hash);
15+
if (fileGroup) {
16+
fileGroup.push(file);
17+
} else {
18+
hashToFilesMap.set(file.hash, [file]);
19+
}
20+
}
21+
22+
// Map<hash, file[]>
23+
return hashToFilesMap;
24+
}
25+
26+
function minDate(file) {
27+
const created = new Date(file.created).getTime();
28+
const modified = new Date(file.modified).getTime();
29+
return created < modified ? created : modified;
30+
}
31+
32+
async function cleanFolder(folder) {
33+
const foldername = folder.name;
34+
const imageFiles = folder.contents.filter(file => file.category === 1);
35+
const hashToFilesMap = groupByHash(imageFiles);
36+
const fileGroups = [...hashToFilesMap.values()].filter(
37+
files => files.length > 1
38+
);
39+
fileGroups.forEach(fileGroup => fileGroup.sort((a, b) => minDate(a) - minDate(b)));
40+
41+
const cleanedFileGroups = fileGroups.map(
42+
fileGroup => fileGroup.map(f => ({name: f.name, fileid: f.fileid}))
43+
);
44+
45+
const result = {foldername, fileGroups: cleanedFileGroups};
46+
return result;
47+
}
48+
49+
async function recursiveCleanFolder(folder) {
50+
const results = await pMap(getFoldersRecursive(folder), f => cleanFolder(f));
51+
return results.filter(({fileGroups}) => fileGroups.length > 0);
52+
}
53+
54+
async function run() {
55+
const path = process.env.FOLDER_PATH;
56+
const response = await client.api('listfolder', {params: {path, recursive: 1}});
57+
const folder = response.metadata;
58+
const dupFilesPerFolder = await recursiveCleanFolder(folder);
59+
60+
const deletion = dupFilesPerFolder.map(({foldername, fileGroups}) => {
61+
const keepDelGroups = fileGroups.map(g => ({
62+
keep: g[0],
63+
del: g.slice(1)
64+
}));
65+
return {foldername, keepDelGroups};
66+
});
67+
68+
const allToDelete = deletion
69+
.flatMap(({keepDelGroups}) => keepDelGroups.map(({del}) => del))
70+
.flat();
71+
72+
await pMap(
73+
allToDelete,
74+
async file => {
75+
const promise = delay(Math.random() * 6000);
76+
console.log('Deleting', file.name);
77+
await client.deletefile(file.fileid);
78+
await promise;
79+
},
80+
{concurrency: 10}
81+
);
82+
}
83+
84+
run().catch(error => {
85+
console.error(error);
86+
});

β€Žbin/rm-empty.jsβ€Ž

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Remove empty folders
2+
3+
const pcloudSdk = require('pcloud-sdk-js');
4+
const pMap = require('p-map');
5+
const pFilter = require('p-filter');
6+
const delay = require('delay');
7+
8+
const client = pcloudSdk.createClient(process.env.ACCESS_TOKEN);
9+
10+
async function run() {
11+
const folderId = Number.parseInt(process.env.FOLDER_ID, 10);
12+
const allFiles = await client.listfolder(folderId, {recursive: true});
13+
const foldersToRemove = await pFilter(allFiles.contents, f => {
14+
return f.isfolder && f.contents.length === 0;
15+
});
16+
console.log(foldersToRemove);
17+
await pMap(foldersToRemove, async f => {
18+
const delayPromise = delay(2000);
19+
await client.deletefolder(f.folderid);
20+
await delayPromise;
21+
}, {concurrency: 10});
22+
}
23+
24+
run().catch(error => console.error('Error', error));

β€Ždocker-compose.yamlβ€Ž

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version: "3.8"
2+
services:
3+
redis:
4+
image: redis:6.0.5-alpine
5+
ports: ["6379:6379"]
6+
volumes: [redis-data:/data]
7+
volumes:
8+
redis-data:

β€Ždups.jsβ€Ž

Lines changed: 0 additions & 81 deletions
This file was deleted.

β€Žlib/iter.jsβ€Ž

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
'use strict';
2+
3+
function * getFoldersRecursive(folder) {
4+
yield folder;
5+
for (const subFolder of folder.contents.filter(f => f.isfolder)) {
6+
yield * getFoldersRecursive(subFolder);
7+
}
8+
}
9+
10+
function * getFilesRecursive(folder, filter = null) {
11+
for (const file of folder.contents.filter(f => !f.isfolder)) {
12+
if (!filter || filter(file)) {
13+
yield file;
14+
}
15+
}
16+
17+
for (const subFolder of folder.contents.filter(f => f.isfolder)) {
18+
yield * getFilesRecursive(subFolder);
19+
}
20+
}
21+
22+
module.exports = {getFoldersRecursive, getFilesRecursive};

0 commit comments

Comments
Β (0)