@@ -5,8 +5,10 @@ const writeDataToCsv = require('./utils').writeDataToCsv;
55const writeDataToXls = require ( './utils' ) . writeDataToXls ;
66const cliProgress = require ( 'cli-progress' ) ;
77const querystring = require ( 'querystring' ) ;
8+ const retry = require ( 'promise-retry' ) ;
89const ScrapingAntClient = require ( '@scrapingant/scrapingant-client' )
910
11+
1012const CONSTANTS = require ( './constants' ) ;
1113
1214class ProductsScraper {
@@ -131,15 +133,22 @@ class ProductsScraper {
131133 ...( this . currentSearchPage > 1 ? { page : this . currentSearchPage , ref : `sr_pg_${ this . currentSearchPage } ` } : { } )
132134 } ) ;
133135
136+ // Retry for avoiding empty or detected result from Amazon
134137 for ( let i = 0 ; i < CONSTANTS . limit . retry ; i ++ ) {
135- const response = await this . client . scrape (
136- `${ this . host } /s?${ queryParams } ` ,
137- { proxy_country : this . country }
138- ) ;
139- const pageBody = response . content ;
140- const products = this . getProducts ( pageBody ) ;
141- if ( Object . keys ( products ) . length > 0 ) {
142- return products ;
138+ try {
139+ // Retry for any network or accessibility cases
140+ const response = await retry ( ( attempt ) => this . client . scrape (
141+ `${ this . host } /s?${ queryParams } ` ,
142+ { proxy_country : this . country }
143+ ) . catch ( attempt ) , { retries : CONSTANTS . limit . retry } ) ;
144+
145+ const pageBody = response . content ;
146+ const products = this . getProducts ( pageBody ) ;
147+ if ( Object . keys ( products ) . length > 0 ) {
148+ return products ;
149+ }
150+ } catch ( err ) {
151+ console . error ( `Failed to get page ${ this . currentSearchPage } for keyword ${ this . keyword } . Going to retry...` ) ;
143152 }
144153 }
145154
@@ -246,12 +255,14 @@ class ProductsScraper {
246255 * The main idea of this method is pretty simple - amend existing products object with additional data
247256 */
248257 async getProductPageData ( amazonId ) {
258+ // Retry for avoiding empty or detected result from Amazon
249259 for ( let i = 0 ; i < CONSTANTS . limit . retry ; i ++ ) {
250260 try {
251- const response = await this . client . scrape (
261+ // Retry for any network or accessibility cases
262+ const response = await retry ( ( attempt ) => this . client . scrape (
252263 `${ this . host } /dp/${ amazonId } ` ,
253264 { proxy_country : this . country }
254- ) ;
265+ ) . catch ( attempt ) , { retries : CONSTANTS . limit . retry } ) ;
255266 const pageBody = response . content ;
256267
257268 const dom = cheerio . load ( pageBody . replace ( / \s \s + / g, '' ) . replace ( / \n / g, '' ) ) ;
@@ -270,7 +281,7 @@ class ProductsScraper {
270281 }
271282
272283 } catch ( exception ) {
273- // Hiding the exception for retry
284+ console . error ( `Failed to get product ${ amazonId } for keyword ${ this . keyword } . Going to retry...` ) ;
274285 }
275286 }
276287 }
0 commit comments