Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@
},
"devDependencies": {
"c8": "^10.0.0",
"chai": "^6.2.0",
"eslint": "^8.5.0",
"mocha": "^11.0.1",
"nock": "^14.0.0",
"should": "^13.2.3",
"sinon": "^21.0.0"
},
"files": [
Expand Down
7 changes: 4 additions & 3 deletions test/e2e/e2e-test.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import 'should';
import scrape from 'website-scraper';
import fs from 'fs-extra';
import * as chai from 'chai';

import { readFile } from 'fs/promises';
const urls = JSON.parse(await readFile(new URL('./urls.json', import.meta.url)));
const options = JSON.parse(await readFile(new URL('./options.json', import.meta.url)));

const resultDirname = './test/e2e/results';
chai.should();

describe('E2E', function() {
before(function() {
Expand All @@ -26,7 +27,7 @@ describe('E2E', function() {
scraperOptions.urls = [ { url: url, filename: 'index.html' } ];
scraperOptions.filenameGenerator = 'byType';
return scrape(scraperOptions).then(function(result) {
result.should.be.ok();
result.should.be.ok;
});
});

Expand All @@ -37,7 +38,7 @@ describe('E2E', function() {
scraperOptions.urls = [ { url: url } ];
scraperOptions.filenameGenerator = 'bySiteStructure';
return scrape(scraperOptions).then(function(result) {
result.should.be.ok();
result.should.be.ok;
});
});
});
Expand Down
69 changes: 37 additions & 32 deletions test/functional/base/base.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import should from 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -72,18 +74,21 @@ describe('Functional: base', function() {
// should return right result
result.should.be.instanceOf(Array).and.have.length(3);

result[0].should.have.properties({ url: 'http://example.com/', filename: 'index.html' });
result[0].should.have.properties('children');
result[0].should.have.property('url', 'http://example.com/');
result[0].should.have.property('filename', 'index.html');
result[0].should.have.property('children');
result[0].children.should.be.instanceOf(Array).and.have.length(4);
result[0].children[0].should.be.instanceOf(Resource);

result[1].should.have.properties({ url: 'http://example.com/about', filename: 'about.html' });
result[1].should.have.properties('children');
result[1].should.have.property('url', 'http://example.com/about');
result[1].should.have.property('filename', 'about.html');
result[1].should.have.property('children');
result[1].children.should.be.instanceOf(Array).and.have.length(4);
result[1].children[0].should.be.instanceOf(Resource);

result[2].should.have.properties({ url: 'http://blog.example.com/', filename: 'blog.html' }); // url after redirect
result[2].should.have.properties('children');
result[2].should.have.property('url', 'http://blog.example.com/'); // url after redirect
result[2].should.have.property('filename', 'blog.html');
result[2].should.have.property('children');
result[2].children.should.be.instanceOf(Array).and.have.length(1);
result[2].children[0].should.be.instanceOf(Resource);

Expand All @@ -102,7 +107,7 @@ describe('Functional: base', function() {
// all sources in index.html should be replaced with local paths
let $ = cheerio.load(fs.readFileSync(testDirname + '/index.html').toString());
$('link[rel="stylesheet"]').attr('href').should.be.eql('css/index.css');
$('style').html().should.containEql('img/background.png');
$('style').html().should.contain('img/background.png');
$('img').attr('src').should.be.eql('img/cat.jpg');
$('script').attr('src').should.be.eql('js/script.min.js');

Expand All @@ -115,22 +120,22 @@ describe('Functional: base', function() {

// all sources in index.css should be replaces with local files recursively
const indexCss = fs.readFileSync(testDirname + '/css/index.css').toString();
indexCss.should.not.containEql('files/index-import-1.css');
indexCss.should.not.containEql('files/index-import-2.css');
indexCss.should.not.containEql('http://example.com/files/index-image-1.png');
indexCss.should.containEql('index-import-1.css');
indexCss.should.containEql('index-import-2.css');
indexCss.should.containEql('../img/index-image-1.png');
indexCss.should.not.contain('files/index-import-1.css');
indexCss.should.not.contain('files/index-import-2.css');
indexCss.should.not.contain('http://example.com/files/index-image-1.png');
indexCss.should.contain('index-import-1.css');
indexCss.should.contain('index-import-2.css');
indexCss.should.contain('../img/index-image-1.png');

const indexImportCss = fs.readFileSync(testDirname + '/css/index-import-2.css').toString();
indexImportCss.should.not.containEql('http://example.com/files/index-image-2.png');
indexImportCss.should.containEql('../img/index-image-2.png');
indexImportCss.should.not.contain('http://example.com/files/index-image-2.png');
indexImportCss.should.contain('../img/index-image-2.png');

// should deal with base tag in about.html and not load new resources
// all sources in about.html should be replaced with already loaded local resources
$ = cheerio.load(fs.readFileSync(testDirname + '/about.html').toString());
$('link[rel="stylesheet"]').attr('href').should.be.eql('css/index.css');
$('style').html().should.containEql('img/background.png');
$('style').html().should.contain('img/background.png');
$('img').attr('src').should.be.eql('img/cat.jpg');
$('script').attr('src').should.be.eql('js/script.min.js');

Expand All @@ -144,21 +149,21 @@ describe('Functional: base', function() {
return scrape({...options, filenameGenerator: 'bySiteStructure'}).then(function(result) {
result.should.be.instanceOf(Array).and.have.length(3);

should(result[0].url).eql('http://example.com/');
should(result[0].filename).equalFileSystemPath('example.com/index.html');
result[0].should.have.properties('children');
result[0].url.should.eql('http://example.com/');
result[0].filename.should.equalFileSystemPath('example.com/index.html');
result[0].should.have.property('children');
result[0].children.should.be.instanceOf(Array).and.have.length(4);
result[0].children[0].should.be.instanceOf(Resource);

should(result[1].url).eql('http://example.com/about');
should(result[1].filename).equalFileSystemPath('example.com/about/index.html');
result[1].should.have.properties('children');
result[1].url.should.eql('http://example.com/about');
result[1].filename.should.equalFileSystemPath('example.com/about/index.html');
result[1].should.have.property('children');
result[1].children.should.be.instanceOf(Array).and.have.length(4);
result[1].children[0].should.be.instanceOf(Resource);

should(result[2].url).eql('http://blog.example.com/'); // url after redirect
should(result[2].filename).equalFileSystemPath('blog.example.com/index.html');
result[2].should.have.properties('children');
result[2].url.should.eql('http://blog.example.com/'); // url after redirect
result[2].filename.should.equalFileSystemPath('blog.example.com/index.html');
result[2].should.have.property('children');
result[2].children.should.be.instanceOf(Array).and.have.length(1);
result[2].children[0].should.be.instanceOf(Resource);

Expand All @@ -177,7 +182,7 @@ describe('Functional: base', function() {
// all sources in index.html should be replaced with local paths
let $ = cheerio.load(fs.readFileSync(testDirname + '/example.com/index.html').toString());
$('link[rel="stylesheet"]').attr('href').should.be.eql('index.css');
$('style').html().should.containEql('background.png');
$('style').html().should.contain('background.png');
$('img').attr('src').should.be.eql('cat.jpg');
$('script').attr('src').should.be.eql('script.min.js');

Expand All @@ -190,18 +195,18 @@ describe('Functional: base', function() {

// all sources in index.css should be replaces with local files recursively
const indexCss = fs.readFileSync(testDirname + '/example.com/index.css').toString();
indexCss.should.containEql('files/index-import-1.css');
indexCss.should.containEql('files/index-import-2.css');
indexCss.should.containEql('files/index-image-1.png');
indexCss.should.contain('files/index-import-1.css');
indexCss.should.contain('files/index-import-2.css');
indexCss.should.contain('files/index-image-1.png');

const indexImportCss = fs.readFileSync(testDirname + '/example.com/files/index-import-2.css').toString();
indexImportCss.should.containEql('index-image-2.png');
indexImportCss.should.contain('index-image-2.png');

// should deal with base tag in about.html and not load new resources
// all sources in about.html should be replaced with already loaded local resources
$ = cheerio.load(fs.readFileSync(testDirname + '/example.com/about/index.html').toString());
$('link[rel="stylesheet"]').attr('href').should.be.eql('../index.css');
$('style').html().should.containEql('../background.png');
$('style').html().should.contain('../background.png');
$('img').attr('src').should.be.eql('../cat.jpg');
$('script').attr('src').should.be.eql('../script.min.js');

Expand Down
9 changes: 5 additions & 4 deletions test/functional/base/check-it-works.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import should from 'should';
import * as chai from 'chai';
chai.should();
import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -28,9 +29,9 @@ describe('Functional: check it works', function() {
};

return scrape(options).then((result) => {
should(result[0].url).be.eql('http://example.com/');
should(result[0].filename).be.eql('index.html');
should(result[0].text).be.eql('<html><head></head><body>TEST PROMISES</body></html>');
result[0].url.should.be.eql('http://example.com/');
result[0].filename.should.be.eql('index.html');
result[0].text.should.be.eql('<html><head></head><body>TEST PROMISES</body></html>');
});
});
});
8 changes: 5 additions & 3 deletions test/functional/binary-resources/images.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import should from 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -63,7 +65,7 @@ describe('Functional: images', () => {
const resultPng = fs.readFileSync(testDirname + '/img/test-image.png');
const resultJpg = fs.readFileSync(testDirname + '/img/test-image.jpg');

should(resultPng).be.eql(originalPng);
should(resultJpg).be.eql(originalJpg);
resultPng.should.be.eql(originalPng);
resultJpg.should.be.eql(originalJpg);
});
});
22 changes: 12 additions & 10 deletions test/functional/callbacks/callbacks.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import should from 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -45,12 +47,12 @@ describe('Functional: onResourceSaved and onResourceError callbacks in plugin',
};

return scrape(options).then(function() {
should(resourceSavedStub.calledOnce).be.eql(true);
should(resourceSavedStub.args[0][0].resource.url).be.eql('http://example.com/');
resourceSavedStub.calledOnce.should.be.eql(true);
resourceSavedStub.args[0][0].resource.url.should.be.eql('http://example.com/');

should(resourceErrorStub.calledOnce).be.eql(true);
should(resourceErrorStub.args[0][0].resource.url).be.eql('http://nodejs.org/');
should(resourceErrorStub.args[0][0].error.message).be.eql('REQUEST ERROR!!');
resourceErrorStub.calledOnce.should.be.eql(true);
resourceErrorStub.args[0][0].resource.url.should.be.eql('http://nodejs.org/');
resourceErrorStub.args[0][0].error.message.should.be.eql('REQUEST ERROR!!');
});
});

Expand Down Expand Up @@ -80,11 +82,11 @@ describe('Functional: onResourceSaved and onResourceError callbacks in plugin',
};

return scrape(options).then(function() {
should(true).eql(false);
false.should.be.true;
}).catch(() => {
should(resourceErrorStub.calledOnce).be.eql(true);
should(resourceErrorStub.args[0][0].resource.url).be.eql('http://nodejs.org/');
should(resourceErrorStub.args[0][0].error.message).be.eql('REQUEST ERROR!!');
resourceErrorStub.calledOnce.should.be.eql(true);
resourceErrorStub.args[0][0].resource.url.should.be.eql('http://nodejs.org/');
resourceErrorStub.args[0][0].error.message.should.be.eql('REQUEST ERROR!!');
});
});
});
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down
10 changes: 6 additions & 4 deletions test/functional/css-handling/css-handling.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import should from 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -51,10 +53,10 @@ describe('Functional: css handling', function() {

const indexHtml = fs.readFileSync(testDirname + '/index.html').toString();

should(indexHtml).containEql('local/style-tag.png');
should(indexHtml).containEql('local/style-attr.png');
indexHtml.should.contain('local/style-tag.png');
indexHtml.should.contain('local/style-attr.png');

should(indexHtml).containEql('background: url(\'css-like-text-in-html.png\')');
indexHtml.should.contain("background: url('css-like-text-in-html.png')");
});
});
});
16 changes: 9 additions & 7 deletions test/functional/data-url/data-url.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import should from 'should';
import * as chai from 'chai';
chai.should();

import '../../utils/assertions.js';
import nock from 'nock';
import fs from 'fs-extra';
Expand Down Expand Up @@ -37,12 +39,12 @@ describe('Functional: data urls handling', function () {

const actualIndexHtml = fs.readFileSync(testDirname + '/index.html').toString();

should(actualIndexHtml).containEql('<source media="(max-width: 559px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
should(actualIndexHtml).containEql('<source media="(min-width: 560px) and (max-width: 719px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
should(actualIndexHtml).containEql('<source media="(min-width: 720px) and (max-width: 899px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
should(actualIndexHtml).containEql('<source media="(min-width: 900px) and (max-width: 1199px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
should(actualIndexHtml).containEql('<source media="(min-width: 1200px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
should(actualIndexHtml).containEql('<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" srcset="images/521811121-392x351.jpg 2x">');
actualIndexHtml.should.contain('<source media="(max-width: 559px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
actualIndexHtml.should.contain('<source media="(min-width: 560px) and (max-width: 719px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
actualIndexHtml.should.contain('<source media="(min-width: 720px) and (max-width: 899px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
actualIndexHtml.should.contain('<source media="(min-width: 900px) and (max-width: 1199px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
actualIndexHtml.should.contain('<source media="(min-width: 1200px)" srcset="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 1x, data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7 2x">');
actualIndexHtml.should.contain('<img src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" srcset="images/521811121-392x351.jpg 2x">');
});
});
});
20 changes: 10 additions & 10 deletions test/functional/encoding/encoding.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ describe('Functional: encoding', () => {
await scrape(options);

const scrapedIndex = await fs.readFile(testDirname + '/index.html', { encoding: 'utf8' });
scrapedIndex.should.be.containEql('<div id="special-characters-korean">저는 7년 동안 한국에서 살았어요.</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-ukrainian">Слава Україні!</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-chinese">加入网站</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-ukrainian">Обладнання та ПЗ</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-french">PAR PASSION DU VÉLO</div>');
scrapedIndex.should.contain('<div id="special-characters-korean">\uc800\ub294 7\ub144 \ub3d9\uc548 \ud55c\uad6d\uc5d0\uc11c \uc0b4\uc558\uc5b4\uc694.</div>');
scrapedIndex.should.contain('<div id="special-characters-ukrainian">\u0421\u043b\u0430\u0432\u0430 \u0423\u043a\u0440\u0430\u0457\u043d\u0456!</div>');
scrapedIndex.should.contain('<div id="special-characters-chinese">\u52a0\u5165\u7f51\u7ad9</div>');
scrapedIndex.should.contain('<div id="special-characters-ukrainian">\u041e\u0431\u043b\u0430\u0434\u043d\u0430\u043d\u043d\u044f \u0442\u0430 \u041f\u0417</div>');
scrapedIndex.should.contain('<div id="special-characters-french">PAR PASSION DU V\u00c9LO</div>');
});

it('should save the page with enconding from html meta tag', async () => {
Expand All @@ -45,10 +45,10 @@ describe('Functional: encoding', () => {
await scrape(options);

const scrapedIndex = await fs.readFile(testDirname + '/index.html', { encoding: 'utf8' });
scrapedIndex.should.be.containEql('<div id="special-characters-korean">저는 7년 동안 한국에서 살았어요.</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-ukrainian">Слава Україні!</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-chinese">加入网站</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-ukrainian">Обладнання та ПЗ</div>');
scrapedIndex.should.be.containEql('<div id="special-characters-french">PAR PASSION DU VÉLO</div>');
scrapedIndex.should.contain('<div id="special-characters-korean">\uc800\ub294 7\ub144 \ub3d9\uc548 \ud55c\uad6d\uc5d0\uc11c \uc0b4\uc558\uc5b4\uc694.</div>');
scrapedIndex.should.contain('<div id="special-characters-ukrainian">\u0421\u043b\u0430\u0432\u0430 \u0423\u043a\u0440\u0430\u0457\u043d\u0456!</div>');
scrapedIndex.should.contain('<div id="special-characters-chinese">\u52a0\u5165\u7f51\u7ad9</div>');
scrapedIndex.should.contain('<div id="special-characters-ukrainian">\u041e\u0431\u043b\u0430\u0434\u043d\u0430\u043d\u043d\u044f \u0442\u0430 \u041f\u0417</div>');
scrapedIndex.should.contain('<div id="special-characters-french">PAR PASSION DU V\u00c9LO</div>');
});
});
Loading