Skip to content

Commit 1a13e5c

Browse files
committed
add cmd zipunicode to fix zip enc issues
1 parent 84c9eac commit 1a13e5c

File tree

10 files changed

+426
-78
lines changed

10 files changed

+426
-78
lines changed

cmd/cmd_prefix.js

Lines changed: 39 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@ import chalk from 'chalk';
1111
import { sify } from 'chinese-conv';
1212
import fs from 'fs-extra';
1313
import inquirer from "inquirer";
14+
import { cpus } from "os";
15+
import pMap from 'p-map';
1416
import path from "path";
15-
16-
1717
import { asyncFilter } from '../lib/core.js';
1818
import * as log from '../lib/debug.js';
19-
import * as enc from '../lib/encoding.js';
2019
import * as mf from '../lib/file.js';
2120
import * as helper from '../lib/helper.js';
2221
import { renameFiles } from "./cmd_shared.js";
@@ -26,8 +25,6 @@ const MODE_DIR = "dirname";
2625
const MODE_PREFIX = "prefix";
2726
const MODE_MEDIA = "media";
2827
const MODE_CLEAN = 'clean';
29-
const MODE_TC2SC = "tc2sc"; // 繁体转简体
30-
const MODE_FIXENC = "fixenc"; // 乱码还原
3128

3229
const NAME_LENGTH = 32;
3330

@@ -67,7 +64,7 @@ const builder = function addOptions(ya, helpOrVersionSet) {
6764
type: "string",
6865
default: MODE_AUTO,
6966
description: "filename prefix mode for output ",
70-
choices: [MODE_AUTO, MODE_DIR, MODE_PREFIX, MODE_MEDIA, MODE_CLEAN, MODE_TC2SC, MODE_FIXENC],
67+
choices: [MODE_AUTO, MODE_DIR, MODE_PREFIX, MODE_MEDIA, MODE_CLEAN],
7168
})
7269
.option("auto", {
7370
type: "boolean",
@@ -93,14 +90,6 @@ const builder = function addOptions(ya, helpOrVersionSet) {
9390
type: "boolean",
9491
description: "mode clean only",
9592
})
96-
.option("tc-to-sc", {
97-
type: "boolean",
98-
description: "mode tc to sc",
99-
})
100-
.option("fix-encoding", {
101-
type: "boolean",
102-
description: "mode fix encoding messy chars",
103-
})
10493
// 清理文件名中的特殊字符和非法字符
10594
.option("clean", {
10695
alias: "c",
@@ -158,7 +147,7 @@ const reImageName = /更新|合集|画师|图片|视频|插画|视图|作品|订
158147
// \p{ASCII} ASCII字符
159148
// \uFE10-\uFE1F 中文全角标点
160149
// \uFF01-\uFF11 中文全角标点
161-
const reNonChars = /[^\p{Unified_Ideograph}\p{P}\p{sc=Hira}0-z]/ugi;
150+
const reNonChars = /[^\p{Unified_Ideograph}\p{sc=Hira}\p{sc=Kana}\w]/ugi;
162151
// 匹配空白字符和特殊字符
163152
// https://www.unicode.org/charts/PDF/U3000.pdf
164153
// https://www.asciitable.com/
@@ -177,7 +166,7 @@ const reMediaDirName = /^图片|视频|电影|电视剧|Image|Video|Thumbs$/gi;
177166
// https://github.com/fujaru/aromanize-js
178167
// https://www.npmjs.com/package/aromanize
179168
// https://www.npmjs.com/package/@lazy-cjk/japanese
180-
function cleanAlbumName(nameString, sep, filename) {
169+
function cleanFileName(nameString, sep, filename, keepNumber = false) {
181170
let nameStr = nameString;
182171
// 去掉方括号 [xxx] 的内容
183172
// nameStr = nameStr.replaceAll(/\[.+?\]/gi, "");
@@ -186,19 +175,21 @@ function cleanAlbumName(nameString, sep, filename) {
186175
// 去掉视频说明文字
187176
nameStr = nameStr.replaceAll(reVideoName, "");
188177
// 去掉日期字符串
189-
nameStr = nameStr.replaceAll(/\d+\d+/gi, "");
190-
nameStr = nameStr.replaceAll(/\d{4}-\d{2}-\d{2}/gi, "");
178+
if (!keepNumber) {
179+
nameStr = nameStr.replaceAll(/\d+\d+/ugi, "");
180+
nameStr = nameStr.replaceAll(/\d{4}-\d{2}-\d{2}/ugi, "");
181+
}
191182
// 去掉 [100P5V 2.25GB] No.46 这种图片集说明
192-
nameStr = nameStr.replaceAll(/\[\d+P.*(\d+V)?.*?\]/gi, "");
193-
nameStr = nameStr.replaceAll(/No\.\d+|\d+\.?\d+GB?|\d+P|\d+V|NO\.(\d+)/gi, "$1");
183+
nameStr = nameStr.replaceAll(/\[\d+P.*(\d+V)?.*?\]/ugi, "");
184+
nameStr = nameStr.replaceAll(/No\.\d+|\d+\.?\d+GB?|\d+P|\d+V|NO\.(\d+)/ugi, "$1");
194185
if (helper.isImageFile(filename)) {
195186
// 去掉 2024.03.22 这种格式的日期
196-
nameStr = nameStr.replaceAll(/\d{4}\.\d{2}\.\d{2}/gi, "");
187+
nameStr = nameStr.replaceAll(/\d{4}\.\d{2}\.\d{2}/ugi, "");
197188
}
198189
// 去掉中文标点特殊符号
199-
nameStr = nameStr.replaceAll(/[\u3000-\u303F\uFE10-\uFE1F\uFF01-\uFF11]/gi, "");
190+
nameStr = nameStr.replaceAll(/[\u3000-\u303F\uFE10-\uFE2F]/ugi, "");
200191
// () [] {} <> . - 改为下划线
201-
nameStr = nameStr.replaceAll(/[\(\)\[\]{}<>\.\-]/gi, sep);
192+
nameStr = nameStr.replaceAll(/[\(\)\[\]{}<>\.\-]/ugi, sep);
202193
// 日文转罗马字母
203194
// nameStr = hepburn.fromKana(nameStr);
204195
// nameStr = wanakana.toRomaji(nameStr);
@@ -232,20 +223,16 @@ function parseNameMode(argv) {
232223
if (argv.dirname) { mode = MODE_DIR; }
233224
if (argv.media) { mode = MODE_MEDIA; }
234225
if (argv.cleanOnly) { mode = MODE_CLEAN; }
235-
if (argv.tcToSc) { mode = MODE_TC2SC; }
236-
if (argv.fixEncoding) { mode = MODE_FIXENC; }
237226
return mode;
238227
}
239228

240-
let badUnicodeCount = 0;
241229
// 重复文件名Set,检测重复,防止覆盖
242230
const nameDuplicateSet = new Set();
243-
function createNewNameByMode(f, argv) {
231+
async function createNewNameByMode(f) {
232+
const argv = f.argv;
244233
const mode = parseNameMode(argv);
245234
const nameLength = (mode === MODE_MEDIA
246-
|| mode === MODE_CLEAN
247-
|| mode === MODE_TC2SC
248-
|| mode === MODE_FIXENC) ?
235+
|| mode === MODE_CLEAN) ?
249236
200 : argv.length || NAME_LENGTH;
250237
const nameSlice = nameLength * -1;
251238
const [dir, base, ext] = helper.pathSplit(f.path);
@@ -264,8 +251,6 @@ function createNewNameByMode(f, argv) {
264251
let oldBase = base;
265252
switch (mode) {
266253
case MODE_CLEAN:
267-
case MODE_TC2SC:
268-
case MODE_FIXENC:
269254
{
270255
sep = ".";
271256
prefix = "";
@@ -311,68 +296,45 @@ function createNewNameByMode(f, argv) {
311296
break;
312297
default:
313298
throw new Error(`Invalid mode: ${mode} ${argv.mode}`)
314-
break;
315299
}
316300

317-
if (mode !== MODE_CLEAN && mode !== MODE_TC2SC && mode !== MODE_FIXENC) {
301+
if (mode !== MODE_CLEAN) {
318302
// 无有效前缀,报错退出
319303
if (!prefix || prefix.length == 0) {
320304
log.warn(logTag, `Invalid Prefix: ${helper.pathShort(f.path)} ${mode}`);
321305
throw new Error(`No prefix supplied!`);
322306
}
323307
}
308+
log.show(prefix)
324309
let newPathFixed = null;
325-
// 此模式仅执行简繁转换,不进行其它操作
326-
if (mode === MODE_TC2SC) {
327-
oldBase = sify(oldBase);
328-
} else if (mode == MODE_FIXENC) {
329-
const strPath = path.resolve(f.path).split(path.sep).join(' ')
330-
if (enc.hasBadUnicode(strPath)) {
331-
log.show(logTag, `Bad:${++badUnicodeCount}`, f.path)
332-
}
333-
// 当模式为MODE_FIXENC时,对文件路径进行特定的编码修复处理
334-
// 对旧基础路径进行中日韩文字编码修复
335-
let [fs, ft] = enc.fixCJKEnc(oldBase);
336-
oldBase = fs.trim();
337-
// 将目录路径分割,并对每个部分进行编码修复
338-
const dirNamesFixed = dir.split(path.sep).map(s => {
339-
let [rs, rt] = enc.fixCJKEnc(s)
340-
return rs.trim();
341-
});
342-
// 重新组合修复后的目录路径
343-
const dirFixed = path.join(...dirNamesFixed);
344-
// 生成修复后的新路径,包括旧基础路径和文件扩展名
345-
newPathFixed = path.join(dirFixed, `${oldBase}${ext}`);
346-
}
347310
// 是否净化文件名,去掉各种特殊字符
348-
else if (argv.clean || mode === MODE_CLEAN) {
349-
prefix = cleanAlbumName(prefix, sep, oldName);
350-
oldBase = cleanAlbumName(oldBase, sep, oldName);
311+
if (argv.clean || mode === MODE_CLEAN) {
312+
prefix = cleanFileName(prefix, sep, oldName, false);
313+
oldBase = cleanFileName(oldBase, sep, oldName, true);
351314
}
352315
// 不添加重复前缀
353316
if (oldBase.includes(prefix)) {
354317
log.info(logTag, `IgnorePrefix: ${ipx} ${helper.pathShort(f.path)}`);
355318
prefix = "";
356319
}
357320
let fullBase = prefix.length > 0 ? (prefix + sep + oldBase) : oldBase;
358-
if (mode !== MODE_TC2SC && mode !== MODE_FIXENC) {
359-
// 去除首位空白和特殊字符
360-
fullBase = fullBase.replaceAll(reStripUglyChars, "");
361-
// 多余空白和字符替换为一个字符 _或.
362-
fullBase = fullBase.replaceAll(reUglyChars, sep);
363-
// 去掉重复词组,如目录名和人名
364-
fullBase = Array.from(new Set(fullBase.split(sep))).join(sep)
365-
fullBase = unicodeStrLength(fullBase) > nameLength ? fullBase.slice(nameSlice) : fullBase;
366-
// 再次去掉首位的特殊字符和空白字符
367-
fullBase = fullBase.replaceAll(reStripUglyChars, "");
368-
}
321+
// 去除首位空白和特殊字符
322+
fullBase = fullBase.replaceAll(reStripUglyChars, "");
323+
// 多余空白和字符替换为一个字符 _或.
324+
fullBase = fullBase.replaceAll(reUglyChars, sep);
325+
// 去掉重复词组,如目录名和人名
326+
fullBase = Array.from(new Set(fullBase.split(sep))).join(sep)
327+
fullBase = unicodeStrLength(fullBase) > nameLength ? fullBase.slice(nameSlice) : fullBase;
328+
// 再次去掉首位的特殊字符和空白字符
329+
fullBase = fullBase.replaceAll(reStripUglyChars, "");
330+
369331
const newName = `${fullBase}${ext}`;
370332
const newPath = newPathFixed ?? path.join(dir, newName);
371333
if (newPath === f.path) {
372334
log.info(logTag, `Same: ${ipx} ${helper.pathShort(newPath)}`);
373335
f.skipped = true;
374336
}
375-
else if (fs.existsSync(newPath)) {
337+
else if (await fs.pathExists(newPath)) {
376338
log.info(logTag, `Exists: ${ipx} ${helper.pathShort(newPath)}`);
377339
f.skipped = true;
378340
}
@@ -444,12 +406,17 @@ const handler = async function cmdPrefix(argv) {
444406
files = files.map((f, i) => {
445407
return {
446408
...f,
409+
argv: argv,
447410
index: i,
448411
total: files.length,
449412
}
450413
})
451414
const fCount = files.length;
452-
const tasks = files.map(f => createNewNameByMode(f, argv)).filter(f => f?.outName)
415+
//const tasks = files.map(f => createNewNameByMode(f, argv)).filter(f => f?.outName)
416+
417+
let tasks = await pMap(files, createNewNameByMode, { concurrency: cpus().length * 4 })
418+
tasks = tasks.filter(f => f?.outName)
419+
453420
const tCount = tasks.length;
454421
log.showYellow(
455422
logTag, `Total ${fCount - tCount} files are skipped.`

0 commit comments

Comments
 (0)