Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions lib/filename-generator/by-site-structure.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import path from 'path';
import url from 'url';
import sanitizeFilename from 'sanitize-filename';
import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, shortenFilename} from '../utils/index.js';
import {getHostFromUrl, getFilepathFromUrl, getFilenameExtension, sanitizeFilename} from '../utils/index.js';
import resourceTypes from '../config/resource-types.js';
import resourceTypeExtensions from '../config/resource-ext-by-type.js';

Expand Down Expand Up @@ -49,7 +48,6 @@ export default function generateFilename (resource, {defaultFilename}) {

function sanitizeFilepath (filePath) {
filePath = path.normalize(filePath);
const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart, {replacement: '_'}));
pathParts[pathParts.length - 1] = shortenFilename(pathParts[pathParts.length - 1]);
const pathParts = filePath.split(path.sep).map(pathPart => sanitizeFilename(pathPart));
return pathParts.join(path.sep);
}
13 changes: 6 additions & 7 deletions lib/filename-generator/by-type.js
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
import path from 'path';
import sanitizeFilename from 'sanitize-filename';
import {shortenFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js';
import {sanitizeFilename, getFilenameExtension, getFilenameFromUrl} from '../utils/index.js';
import typeExtensions from '../config/resource-ext-by-type.js';

export default function generateFilename (resource, {subdirectories, defaultFilename}, occupiedFileNames) {
const occupiedNames = getSubDirectoryNames({subdirectories}).concat(occupiedFileNames);

let filename = getFilenameForResource(resource, {subdirectories, defaultFilename});
filename = shortenFilename(sanitizeFilename(filename, {replacement: '_'}));
filename = sanitizeFilename(filename);

const extension = getFilenameExtension(filename);
const directory = getDirectoryByExtension(extension, {subdirectories, defaultFilename});

let currentFilename = path.join(directory, filename);
let currentFilepath = path.join(directory, filename);
const basename = path.basename(filename, extension);
let index = 1;

while (occupiedNames.includes(currentFilename)) {
currentFilename = path.join(directory, `${basename}_${index}${extension}`);
while (occupiedNames.includes(currentFilepath)) {
currentFilepath = path.join(directory, `${basename}_${index}${extension}`);
index++;
}

return currentFilename;
return currentFilepath;
}

function getFilenameForResource (resource, {defaultFilename}) {
Expand Down
19 changes: 10 additions & 9 deletions lib/utils/index.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import url from 'url';
import path from 'path';
import normalize from 'normalize-url';
import { default as filenamifySanitizeFilename } from 'filenamify';
import typeByMime from '../config/resource-type-by-mime.js';
import typeByExt from '../config/resource-type-by-ext.js';

import logger from '../logger.js';

const MAX_FILENAME_LENGTH = 255;

const MAX_FILENAME_LENGTH = 230;
Copy link

Copilot AI Nov 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The MAX_FILENAME_LENGTH is set to 230, but the tests expect filenames to be at most 255 characters (lessThanOrEqual(255)). This creates an inconsistency where the implementation will limit filenames to 230 characters while tests verify against 255. Consider either:

  1. Updating MAX_FILENAME_LENGTH to 255 to match the test expectations, or
  2. Updating all test assertions to use lessThanOrEqual(230) instead of lessThanOrEqual(255)

Note: The value 230 might be intentionally lower than 255 to leave room for path separators or other modifications, but this should be documented and the tests should reflect the actual limit.

Suggested change
const MAX_FILENAME_LENGTH = 230;
const MAX_FILENAME_LENGTH = 255;

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests check that the filenames are not longer than allowed by the system.

Existing implementation has the logic to avoid possible filename conflicts by adding the suffix before the extension.

Length 230 was selected as a value which allows to always be within the allowed FS/OS length.

const IS_URL = /^((http[s]?:)?\/\/)/;

function isUrl (path) {
Expand Down Expand Up @@ -97,13 +99,12 @@ function getFilenameExtension (filepath) {
return (typeof filepath === 'string') ? path.extname(filepath).toLowerCase() : null;
}

function shortenFilename (filename) {
if (filename.length >= MAX_FILENAME_LENGTH) {
const shortFilename = filename.substring(0, 20) + getFilenameExtension(filename);
logger.debug(`[utils] shorten filename: ${filename} -> ${shortFilename}`);
return shortFilename;
function sanitizeFilename (filename) {
const sanitized = filenamifySanitizeFilename(filename, {replacement: '_', maxLength: MAX_FILENAME_LENGTH});
if (sanitized !== filename) {
logger.debug(`[utils] sanitize filename: ${filename} -> ${sanitized}`);
}
return filename;
return sanitized;
}

function normalizeUrl (u, opts) {
Expand Down Expand Up @@ -183,7 +184,7 @@ function updateResourceEncoding (resource, encoding) {
const updatedText = Buffer.from(resourceText, resource.getEncoding()).toString(encoding);
resource.setText(updatedText);
}

resource.setEncoding(encoding);
}

Expand All @@ -197,7 +198,7 @@ export {
getFilenameExtension,
getHashFromUrl,
getHostFromUrl,
shortenFilename,
sanitizeFilename,
prettifyFilename,
normalizeUrl,
urlsEqual,
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
"cheerio": "^1.1.2",
"css-url-parser": "^1.0.0",
"debug": "^4.3.1",
"filenamify": "^7.0.0",
"fs-extra": "^11.1.0",
"got": "^14.4.7",
"normalize-url": "^8.0.0",
"p-queue": "^9.0.0",
"sanitize-filename": "^1.6.3",
"srcset": "^5.0.0"
},
"devDependencies": {
Expand Down
18 changes: 10 additions & 8 deletions test/unit/filename-generator/by-site-structure-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ describe('FilenameGenerator: bySiteStructure', () => {

it('should replace not allowed characters from filename', () => {
const r1 = new Resource('http://example.com/some/path/<*a*>.png');
bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/__a__.png');
bySiteStructureFilenameGenerator(r1, options).should.equalFileSystemPath('example.com/some/path/_a_.png');
});

it('should replace not allowed characters from path', () => {
Expand Down Expand Up @@ -74,8 +74,10 @@ describe('FilenameGenerator: bySiteStructure', () => {
it('should shorten filename', () => {
const resourceFilename = new Array(1000).fill('a').join('') + '.png';
const r = new Resource('http://example.com/' + resourceFilename);
const filename = bySiteStructureFilenameGenerator(r, options);
should(filename.length).be.lessThan(255);
const filepath = bySiteStructureFilenameGenerator(r, options);
const filenameParts = filepath.split('/');
const filename = filenameParts[filenameParts.length - 1];
should(filename.length).be.lessThanOrEqual(255);
});

it('should shorten filename if resource is html without ext and default name is too long', () => {
Expand All @@ -85,17 +87,17 @@ describe('FilenameGenerator: bySiteStructure', () => {
const filepath = bySiteStructureFilenameGenerator(r, { defaultFilename: defaultFilename });
const filenameParts = filepath.split('/');
const filename = filenameParts[filenameParts.length - 1];
should(filename.length).be.lessThan(255);
should(filename.length).be.lessThanOrEqual(255);
});

it('should return decoded filepath', () => {
const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B');
const filename = bySiteStructureFilenameGenerator(r, options);
filename.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы');
const filepath = bySiteStructureFilenameGenerator(r, options);
filepath.should.equalFileSystemPath('developer.mozilla.org/ru/docs/JavaScript_шеллы');

const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png');
const filename2 = bySiteStructureFilenameGenerator(r2, options);
filename2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png');
const filepath2 = bySiteStructureFilenameGenerator(r2, options);
filepath2.should.equalFileSystemPath('developer.mozilla.org/Hello Günter.png');
});

it('should keep query strings', () => {
Expand Down
96 changes: 50 additions & 46 deletions test/unit/filename-generator/by-type-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,27 @@ import byTypeFilenameGenerator from '../../../lib/filename-generator/by-type.js'
describe('FilenameGenerator: byType', () => {
it('should return resource filename', () => {
const r = new Resource('http://example.com/a.png', 'b.png');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('b.png');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('b.png');
});

it('should return url-based filename if resource has no filename', () => {
const r = new Resource('http://example.com/a.png');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('a.png');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('a.png');
});

it('should return url-based filename if resource has empty filename', () => {
const r = new Resource('http://example.com/a.png', '');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('a.png');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('a.png');
});

it('should add missed extensions for html resources', () => {
const r = new Resource('http://example.com/about', '');
r.getType = sinon.stub().returns('html');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('about.html');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('about.html');
});

it('should add missed extensions for css resources', () => {
Expand All @@ -41,15 +41,15 @@ describe('FilenameGenerator: byType', () => {
it('should add missed extensions for js resources', () => {
const r = new Resource('http://example.com/js', '');
r.getType = sinon.stub().returns('js');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('js.js');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('js.js');
});

it('should not add missed extensions for other resources', () => {
const r = new Resource('http://1.gravatar.com/avatar/4d63e4a045c7ff22accc33dc08442f86?s=140&amp;d=%2Fwp-content%2Fuploads%2F2015%2F05%2FGood-JOb-150x150.jpg&amp;r=g', '');
r.getType = sinon.stub().returns('home');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('4d63e4a045c7ff22accc33dc08442f86');
});

it('should return filename with correct subdirectory', () => {
Expand All @@ -60,8 +60,8 @@ describe('FilenameGenerator: byType', () => {
};

const r = new Resource('http://example.com/a.png');
const filename = byTypeFilenameGenerator(r, options, []);
filename.should.equalFileSystemPath('img/a.png');
const filepath = byTypeFilenameGenerator(r, options, []);
filepath.should.equalFileSystemPath('img/a.png');
});

it('should return filename with correct subdirectory when string cases are different', () => {
Expand All @@ -72,14 +72,14 @@ describe('FilenameGenerator: byType', () => {
};

const r = new Resource('http://example.com/a.PNG');
const f = byTypeFilenameGenerator(r, options, []);
f.should.equalFileSystemPath('img/a.PNG');
const filepath = byTypeFilenameGenerator(r, options, []);
filepath.should.equalFileSystemPath('img/a.PNG');
});

it('should return different filename if desired filename is occupied', () => {
const r = new Resource('http://second-example.com/a.png');
const filename = byTypeFilenameGenerator(r, {}, [ 'a.png' ]);
filename.should.not.equalFileSystemPath('a.png');
const filepath = byTypeFilenameGenerator(r, {}, [ 'a.png' ]);
filepath.should.not.equalFileSystemPath('a.png');
});

it('should return different filename if desired filename is occupied N times', () => {
Expand All @@ -89,30 +89,32 @@ describe('FilenameGenerator: byType', () => {
const r3 = new Resource('http://third-example.com/a.png');
const r4 = new Resource('http://fourth-example.com/a.png');

const f1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames);
f1.should.equalFileSystemPath('a.png');
occupiedFilenames.push(f1);
const fp1 = byTypeFilenameGenerator(r1, {}, occupiedFilenames);
fp1.should.equalFileSystemPath('a.png');
occupiedFilenames.push(fp1);

const f2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames);
f2.should.not.equal(f1);
occupiedFilenames.push(f2);
const fp2 = byTypeFilenameGenerator(r2, {}, occupiedFilenames);
fp2.should.not.equal(fp1);
occupiedFilenames.push(fp2);

const f3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames);
f3.should.not.equal(f1);
f3.should.not.equal(f2);
occupiedFilenames.push(f3);
const fp3 = byTypeFilenameGenerator(r3, {}, occupiedFilenames);
fp3.should.not.equal(fp1);
fp3.should.not.equal(fp2);
occupiedFilenames.push(fp3);

const f4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames);
f4.should.not.equal(f1);
f4.should.not.equal(f2);
f4.should.not.equal(f3);
const fp4 = byTypeFilenameGenerator(r4, {}, occupiedFilenames);
fp4.should.not.equal(fp1);
fp4.should.not.equal(fp2);
fp4.should.not.equal(fp3);
});

it('should shorten filename', () => {
const resourceFilename = new Array(1000).fill('a').join('') + '.png';
const r = new Resource('http://example.com/a.png', resourceFilename);
const filename = byTypeFilenameGenerator(r, {}, []);
should(filename.length).be.lessThan(255);
const filepath = byTypeFilenameGenerator(r, {}, []);
const filenameParts = filepath.split('/');
const filename = filenameParts[filenameParts.length - 1];
should(filename.length).be.lessThanOrEqual(255);
});

it('should return different short filename if first short filename is occupied', () => {
Expand All @@ -121,28 +123,30 @@ describe('FilenameGenerator: byType', () => {
const r1 = new Resource('http://first-example.com/a.png', resourceFilename);
const r2 = new Resource('http://second-example.com/a.png', resourceFilename);

const f1 = byTypeFilenameGenerator(r1, {}, []);
should(f1.length).be.lessThan(255);
const fp1 = byTypeFilenameGenerator(r1, {}, []);
const filenameParts1 = fp1.split('/');
const filename1 = filenameParts1[filenameParts1.length - 1];
should(filename1.length).be.lessThanOrEqual(255);

const f2 = byTypeFilenameGenerator(r2, {}, [ f1 ]);
should(f2.length).be.lessThan(255);
should(f2).not.be.eql(f1);

should(f2).not.be.eql(f1);
const fp2 = byTypeFilenameGenerator(r2, {}, [ fp1 ]);
const filenameParts2 = fp2.split('/');
const filename2 = filenameParts2[filenameParts2.length - 1];
should(filename2.length).be.lessThanOrEqual(255);
should(filename2).not.be.eql(filename1);
});

it('should return decoded url-based filename', () => {
const r = new Resource('https://developer.mozilla.org/ru/docs/JavaScript_%D1%88%D0%B5%D0%BB%D0%BB%D1%8B');
const filename = byTypeFilenameGenerator(r, {}, []);
filename.should.equalFileSystemPath('JavaScript_шеллы');
const filepath = byTypeFilenameGenerator(r, {}, []);
filepath.should.equalFileSystemPath('JavaScript_шеллы');

const r2 = new Resource('https://developer.mozilla.org/Hello%20G%C3%BCnter.png');
const filename2 = byTypeFilenameGenerator(r2, {}, []);
filename2.should.equalFileSystemPath('Hello Günter.png');
const filepath2 = byTypeFilenameGenerator(r2, {}, []);
filepath2.should.equalFileSystemPath('Hello Günter.png');
});

it('should remove not allowed characters from filename', () => {
const r1 = new Resource('http://example.com/some/path/<*a*>.png');
byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('__a__.png');
byTypeFilenameGenerator(r1, {}, []).should.equalFileSystemPath('_a_.png');
});
});
Loading