Skip to content

Commit 080a170

Browse files
committed
Added API-related retries
1 parent bcd9daf commit 080a170

File tree

3 files changed

+66
-156
lines changed

3 files changed

+66
-156
lines changed

package-lock.json

Lines changed: 41 additions & 142 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@scrapingant/amazon-proxy-scraper",
3-
"version": "2.0.2",
3+
"version": "2.1.0",
44
"description": "Amazon products scraper by keyword with using ScrapingAnt API",
55
"main": "index.js",
66
"bin": {
@@ -21,11 +21,11 @@
2121
"author": "ScrapingAnt",
2222
"license": "MIT",
2323
"dependencies": {
24-
"@scrapingant/scrapingant-client": "0.0.1",
25-
"cheerio": "^1.0.0-rc.5",
24+
"@scrapingant/scrapingant-client": "0.1.0",
2625
"cli-progress": "^3.9.0",
2726
"json2csv": "^5.0.6",
2827
"json2xls": "^0.1.2",
28+
"promise-retry": "^2.0.1",
2929
"yargs": "^16.2.0"
3030
},
3131
"devDependencies": {},

products-scraper.js

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ const writeDataToCsv = require('./utils').writeDataToCsv;
55
const writeDataToXls = require('./utils').writeDataToXls;
66
const cliProgress = require('cli-progress');
77
const querystring = require('querystring');
8+
const retry = require('promise-retry');
89
const ScrapingAntClient = require('@scrapingant/scrapingant-client')
910

11+
1012
const CONSTANTS = require('./constants');
1113

1214
class ProductsScraper {
@@ -131,15 +133,22 @@ class ProductsScraper {
131133
...(this.currentSearchPage > 1 ? { page: this.currentSearchPage, ref: `sr_pg_${this.currentSearchPage}` } : {})
132134
});
133135

136+
// Retry for avoiding empty or detected result from Amazon
134137
for (let i = 0; i < CONSTANTS.limit.retry; i++) {
135-
const response = await this.client.scrape(
136-
`${this.host}/s?${queryParams}`,
137-
{ proxy_country: this.country }
138-
);
139-
const pageBody = response.content;
140-
const products = this.getProducts(pageBody);
141-
if (Object.keys(products).length > 0) {
142-
return products;
138+
try {
139+
// Retry for any network or accessibility cases
140+
const response = await retry((attempt) => this.client.scrape(
141+
`${this.host}/s?${queryParams}`,
142+
{ proxy_country: this.country }
143+
).catch(attempt), { retries: CONSTANTS.limit.retry });
144+
145+
const pageBody = response.content;
146+
const products = this.getProducts(pageBody);
147+
if (Object.keys(products).length > 0) {
148+
return products;
149+
}
150+
} catch (err) {
151+
console.error(`Failed to get page ${this.currentSearchPage} for keyword ${this.keyword}. Going to retry...`);
143152
}
144153
}
145154

@@ -246,12 +255,14 @@ class ProductsScraper {
246255
* The main idea of this method is pretty simple - amend existing products object with additional data
247256
*/
248257
async getProductPageData(amazonId) {
258+
// Retry for avoiding empty or detected result from Amazon
249259
for (let i = 0; i < CONSTANTS.limit.retry; i++) {
250260
try {
251-
const response = await this.client.scrape(
261+
// Retry for any network or accessibility cases
262+
const response = await retry((attempt) => this.client.scrape(
252263
`${this.host}/dp/${amazonId}`,
253264
{ proxy_country: this.country }
254-
);
265+
).catch(attempt), { retries: CONSTANTS.limit.retry });
255266
const pageBody = response.content;
256267

257268
const dom = cheerio.load(pageBody.replace(/\s\s+/g, '').replace(/\n/g, ''));
@@ -270,7 +281,7 @@ class ProductsScraper {
270281
}
271282

272283
} catch (exception) {
273-
// Hiding the exception for retry
284+
console.error(`Failed to get product ${amazonId} for keyword ${this.keyword}. Going to retry...`);
274285
}
275286
}
276287
}

0 commit comments

Comments
 (0)