Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bot/job_options_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class JobOptionsParser
def initialize
@parser = Trollop::Parser.new do
opt :no_offsite_links, 'Do not fetch offsite links'
opt :no_cookies, 'Do not use cookies'
opt :youtube_dl, 'Use youtube-dl on grabbed pages'
opt :ignore_sets, 'Ignore sets to apply', :type => :string
opt :pipeline, 'Run job on this pipeline', :type => :string
Expand All @@ -23,6 +24,7 @@ def parse(str)
b[0] = (case b[0]
when '--ignoresets','--ignore_sets','--ignoreset','--ignore-set','--ignore_set','--ig-set','--igset' then '--ignore-sets'
when '--nooffsitelinks','--no-offsite','--nooffsite' then '--no-offsite-links'
when '--nocookies' then '--no-cookies'
when '--useragentalias','--user-agent','--useragent' then '--user-agent-alias'
when '--concurrent' then '--concurrency'
when '--reason' then '--explain'
Expand Down
5 changes: 5 additions & 0 deletions bot/pipeline_options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ def run_post_registration_hooks(m, job, params)
messages << 'offsite links: no'
end

if params[:no_cookies]
job.no_cookies!
messages << 'use cookies: no'
end

if !messages.empty?
reply m, "Options: #{messages.join('; ')}"
end
Expand Down
3 changes: 3 additions & 0 deletions doc/commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ Accepted parameters

Aliases: ``--nooffsitelinks``, ``--no-offsite``, ``--nooffsite``

``--no-cookies``
do not use cookies for each request

``--user-agent-alias ALIAS``
specify a user-agent to use::

Expand Down
4 changes: 4 additions & 0 deletions lib/job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,10 @@ def no_offsite_links!
redis.hset(ident, 'no_offsite_links', true)
end

def no_cookies!
redis.hset(ident, 'no_cookies', true)
end

def yahoo
silently do
set_delay(0, 0)
Expand Down
1 change: 1 addition & 0 deletions pipeline/archivebot/seesaw/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def process(self, item):
item['url_file'] = job_data.get('url_file')
item['user_agent'] = job_data.get('user_agent')
item['no_offsite_links'] = job_data.get('no_offsite_links')
item['no_cookies'] = job_data.get('no_cookies')
item['youtube_dl'] = job_data.get('youtube_dl')

item.log_output('Received item %s.' % ident)
Expand Down
6 changes: 5 additions & 1 deletion pipeline/archivebot/seesaw/wpull.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def make_args(item, default_user_agent, wpull_exe, youtube_dl_exe, finished_warc
'-o', '%(item_dir)s/wpull.log' % item,
'--database', '%(item_dir)s/wpull.db' % item,
'--html-parser', 'libxml2-lxml',
'--save-cookies', '%(cookie_jar)s' % item,
'--no-check-certificate',
'--no-strong-crypto',
'--delete-after',
Expand Down Expand Up @@ -51,6 +50,11 @@ def make_args(item, default_user_agent, wpull_exe, youtube_dl_exe, finished_warc
'--youtube-dl-exe', youtube_dl_exe
]

if item.get('no_cookies'):
args.append('--no-cookies')
else:
add_args(args, ['--save-cookies', '%(cookie_jar)s'], item)

if item['url'].startswith("http://www.reddit.com/") or \
item['url'].startswith("https://www.reddit.com/"):
add_args(args, ['--header', 'Cookie: over18=1'], item)
Expand Down
4 changes: 4 additions & 0 deletions spec/bot/job_options_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
expect(parser.parse('--concurrency=4')[:concurrency]).to eq(4)
end

it 'recognizes --no-cookies' do
expect(parser.parse('--no-cookies')[:no_cookies]).to eq(true)
end

describe 'when unknown options are present' do
it 'raises UnknownOptionError' do
expect(lambda { parser.parse('--foo=bar') }).to raise_error(JobOptionsParser::UnknownOptionError)
Expand Down