diff --git a/do_authentication.py b/do_authentication.py index f3c15b1..b5c0f24 100644 --- a/do_authentication.py +++ b/do_authentication.py @@ -2,35 +2,70 @@ import requests import sys import datetime +import time +import urllib.parse -def authenticate(username, password, authen_base_url): - authen_headers = {'Content-Type': 'application/json', - 'Accept': 'application/json'} +def authenticate(username, password, authen_base_url, retry_attempts=3): + """Authenticate with ICANN CZDS API and return an access token.""" - credential = {'username': username, - 'password': password} + authen_headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'User-Agent': 'Mozilla/5.0 (compatible; CZDS-Client/1.0; +https://github.com/mthcht/czds-api-client-python)' + } + + credential = { + "username": username, + "password": password, # No encoding for now, unless ICANN blocks special characters + "grant_type": "password" # Some APIs require this + } authen_url = authen_base_url + '/api/authenticate' - response = requests.post(authen_url, data=json.dumps(credential), headers=authen_headers) - - status_code = response.status_code - - # Return the access_token on status code 200. Otherwise, terminate the program. - if status_code == 200: - access_token = response.json()['accessToken'] - print('{0}: Received access_token:'.format(datetime.datetime.now())) - print(access_token) - return access_token - elif status_code == 404: - sys.stderr.write("Invalid url " + authen_url) - exit(1) - elif status_code == 401: - sys.stderr.write("Invalid username/password. Please reset your password via web") - exit(1) - elif status_code == 500: - sys.stderr.write("Internal server error. Please try again later") - exit(1) - else: - sys.stderr.write("Failed to authenticate user {0} with error code {1}".format(username, status_code)) - exit(1) + for attempt in range(1, retry_attempts + 1): + try: + response = requests.post(authen_url, json=credential, headers=authen_headers) + status_code = response.status_code + response_text = response.text + + if status_code == 200: + access_token = response.json().get('accessToken') + print(f"{datetime.datetime.now()}: Authentication successful!") + print(f"Received access token: {access_token[:10]}... [truncated]") + return access_token + + elif status_code == 400: + sys.stderr.write(f"\n{datetime.datetime.now()} - ERROR: 400 Bad Request\n") + sys.stderr.write(f"Request Headers: {json.dumps(authen_headers, indent=2)}\n") + sys.stderr.write(f"Request Body: {json.dumps(credential, indent=2)}\n") + sys.stderr.write(f"Response Headers: {json.dumps(dict(response.headers), indent=2)}\n") + sys.stderr.write(f"Response Body: {response_text}\n") + sys.stderr.write("\n🚨 Possible causes: Invalid request format, missing fields, ICANN API changes, or IP blocking.\n") + exit(1) + + elif status_code == 401: + sys.stderr.write("\nERROR: 401 Unauthorized - Invalid username or password.\n") + exit(1) + + elif status_code == 429: + wait_time = 2 ** attempt # Exponential backoff + sys.stderr.write(f"\nERROR: 429 Too Many Requests - Rate limited. Retrying in {wait_time} seconds...\n") + time.sleep(wait_time) + + elif status_code == 500: + sys.stderr.write("\nERROR: 500 Internal Server Error - ICANN API might be down. Try again later.\n") + exit(1) + + else: + sys.stderr.write(f"\nERROR: Unexpected Status Code {status_code}\n") + sys.stderr.write(f"Response Headers: {json.dumps(dict(response.headers), indent=2)}\n") + sys.stderr.write(f"Response Body: {response_text}\n") + exit(1) + + except requests.exceptions.RequestException as e: + sys.stderr.write(f"\nERROR: Network or Connection Issue\n") + sys.stderr.write(f"Exception: {str(e)}\n") + exit(1) + + sys.stderr.write("\nERROR: Maximum authentication retries exceeded.\n") + exit(1) diff --git a/download.py b/download.py index 976e68d..aac1ca5 100644 --- a/download.py +++ b/download.py @@ -1,9 +1,10 @@ import json import sys -import cgi import os import datetime - +import time +import requests # Ensure requests module is available +from email.message import Message # Replaces deprecated cgi module from do_authentication import authenticate from do_http_get import do_get @@ -16,139 +17,145 @@ config_data = os.environ['CZDS_CONFIG'] config = json.loads(config_data) else: - config_file = open("config.json", "r") - config = json.load(config_file) - config_file.close() -except: - sys.stderr.write("Error loading config.json file.\n") - exit(1) - -# The config.json file must contain the following data: -username = config['icann.account.username'] -password = config['icann.account.password'] -authen_base_url = config['authentication.base.url'] -czds_base_url = config['czds.base.url'] - -# This is optional. Default to current directory -working_directory = config.get('working.directory', '.') # Default to current directory - -if not username: - sys.stderr.write("'icann.account.username' parameter not found in the config.json file\n") - exit(1) - -if not password: - sys.stderr.write("'icann.account.password' parameter not found in the config.json file\n") + with open("config.json", "r") as config_file: + config = json.load(config_file) +except Exception as e: + sys.stderr.write(f"Error loading config.json file: {e}\n") exit(1) -if not authen_base_url: - sys.stderr.write("'authentication.base.url' parameter not found in the config.json file\n") - exit(1) +# Extract configuration +username = config.get('icann.account.username') +password = config.get('icann.account.password') +authen_base_url = config.get('authentication.base.url') +czds_base_url = config.get('czds.base.url') +working_directory = config.get('working.directory', '.') -if not czds_base_url: - sys.stderr.write("'czds.base.url' parameter not found in the config.json file\n") +# Validate required fields +if not all([username, password, authen_base_url, czds_base_url]): + sys.stderr.write("Missing required parameters in config.json\n") exit(1) - - ############################################################################################################## -# Second Step: authenticate the user to get an access_token. -# Note that the access_token is global for all the REST API calls afterwards +# Second Step: Authenticate the user to get an access_token. ############################################################################################################## -print("Authenticate user {0}".format(username)) -access_token = authenticate(username, password, authen_base_url) - +print(f"Authenticating user {username}") +try: + access_token = authenticate(username, password, authen_base_url) + if not access_token: + sys.stderr.write("Authentication failed: No access token received.\n") + exit(1) +except requests.exceptions.RequestException as e: + sys.stderr.write(f"Authentication request failed: {e}\n") + exit(1) ############################################################################################################## # Third Step: Get the download zone file links ############################################################################################################## -# Function definition for listing the zone links -def get_zone_links(czds_base_url): - global access_token +def get_zone_links(czds_base_url, retry_attempts=3): + """Fetches the list of available zone file links with retries.""" + global access_token + links_url = f"{czds_base_url}/czds/downloads/links" - links_url = czds_base_url + "/czds/downloads/links" - links_response = do_get(links_url, access_token) + for attempt in range(1, retry_attempts + 1): + response = do_get(links_url, access_token) - status_code = links_response.status_code + if response.status_code == 200: + zone_links = response.json() + print(f"{datetime.datetime.now()}: Number of zone files to download: {len(zone_links)}") + return zone_links - if status_code == 200: - zone_links = links_response.json() - print("{0}: The number of zone files to be downloaded is {1}".format(datetime.datetime.now(),len(zone_links))) - return zone_links - elif status_code == 401: - print("The access_token has been expired. Re-authenticate user {0}".format(username)) - access_token = authenticate(username, password, authen_base_url) - get_zone_links(czds_base_url) - else: - sys.stderr.write("Failed to get zone links from {0} with error code {1}\n".format(links_url, status_code)) - return None + elif response.status_code == 401: + print("Access token expired. Re-authenticating...") + access_token = authenticate(username, password, authen_base_url) + return get_zone_links(czds_base_url) + elif response.status_code == 429: # Rate limiting + wait_time = 2 ** attempt # Exponential backoff + print(f"Rate limit hit. Retrying in {wait_time} seconds...") + time.sleep(wait_time) + + else: + sys.stderr.write(f"Failed to fetch zone links (HTTP {response.status_code}):\n") + sys.stderr.write(f"Response Headers: {response.headers}\n") + sys.stderr.write(f"Response Body: {response.text}\n") + + sys.stderr.write("Exceeded maximum retry attempts for fetching zone links.\n") + return None -# Get the zone links zone_links = get_zone_links(czds_base_url) if not zone_links: exit(1) - - ############################################################################################################## -# Fourth Step: download zone files +# Fourth Step: Download zone files with enhanced error handling ############################################################################################################## -# Function definition to download one zone file -def download_one_zone(url, output_directory): - print("{0}: Downloading zone file from {1}".format(str(datetime.datetime.now()), url)) - - global access_token - download_zone_response = do_get(url, access_token) - - status_code = download_zone_response.status_code - - if status_code == 200: - # Try to get the filename from the header - _,option = cgi.parse_header(download_zone_response.headers['content-disposition']) - filename = option.get('filename') +def parse_filename(response): + """Extracts filename from content-disposition header or generates a fallback name.""" + content_disposition = response.headers.get('content-disposition') + if content_disposition: + message = Message() + message['content-disposition'] = content_disposition + filename = message.get_param('filename') + if filename: + return filename + return f"{response.url.rsplit('/', 1)[-1].split('.')[0]}.txt.gz" + +def download_one_zone(url, output_directory, retry_attempts=3): + """Downloads a single zone file from the given URL with retries.""" + global access_token + print(f"{datetime.datetime.now()}: Downloading {url}") + + for attempt in range(1, retry_attempts + 1): + response = do_get(url, access_token) + + if response.status_code == 200: + filename = parse_filename(response) + file_path = os.path.join(output_directory, filename) + + with open(file_path, 'wb') as file: + for chunk in response.iter_content(1024): + file.write(chunk) + + print(f"{datetime.datetime.now()}: Downloaded to {file_path}") + return + + elif response.status_code == 401: + print("Access token expired. Re-authenticating...") + access_token = authenticate(username, password, authen_base_url) + return download_one_zone(url, output_directory) + + elif response.status_code == 404: + print(f"No zone file found for {url}") + return + + elif response.status_code == 429: # Rate limiting + wait_time = 2 ** attempt # Exponential backoff + print(f"Rate limit hit. Retrying in {wait_time} seconds...") + time.sleep(wait_time) + + else: + sys.stderr.write(f"Failed to download {url} (HTTP {response.status_code})\n") + sys.stderr.write(f"Response Headers: {response.headers}\n") + sys.stderr.write(f"Response Body: {response.text}\n") + + sys.stderr.write(f"Exceeded maximum retry attempts for {url}\n") - # If could get a filename from the header, then makeup one like [tld].txt.gz - if not filename: - filename = url.rsplit('/', 1)[-1].rsplit('.')[-2] + '.txt.gz' - - # This is where the zone file will be saved - path = '{0}/{1}'.format(output_directory, filename) - - with open(path, 'wb') as f: - for chunk in download_zone_response.iter_content(1024): - f.write(chunk) - - print("{0}: Completed downloading zone to file {1}".format(str(datetime.datetime.now()), path)) - - elif status_code == 401: - print("The access_token has been expired. Re-authenticate user {0}".format(username)) - access_token = authenticate(username, password, authen_base_url) - download_one_zone(url, output_directory) - elif status_code == 404: - print("No zone file found for {0}".format(url)) - else: - sys.stderr.write('Failed to download zone from {0} with code {1}\n'.format(url, status_code)) - -# Function definition for downloading all the zone files def download_zone_files(urls, working_directory): - - # The zone files will be saved in a sub-directory - output_directory = working_directory + "/zonefiles" + """Downloads all zone files into a sub-directory.""" + output_directory = os.path.join(working_directory, "zonefiles") if not os.path.exists(output_directory): os.makedirs(output_directory) - # Download the zone files one by one for link in urls: download_one_zone(link, output_directory) -# Finally, download all zone files start_time = datetime.datetime.now() download_zone_files(zone_links, working_directory) end_time = datetime.datetime.now() -print("{0}: DONE DONE. Completed downloading all zone files. Time spent: {1}".format(str(end_time), (end_time-start_time))) +print(f"{end_time}: Completed downloading all zone files. Time taken: {end_time - start_time}") diff --git a/extract_zonefiles.py b/extract_zonefiles.py new file mode 100644 index 0000000..b04fa3d --- /dev/null +++ b/extract_zonefiles.py @@ -0,0 +1,24 @@ +import os +import gzip +import shutil + +# Define source and destination directories +source_dir = "zones/zonefiles/" +destination_dir = "zones/extracted_zonefiles/" + +# Ensure destination directory exists +os.makedirs(destination_dir, exist_ok=True) + +# Iterate over all files in the source directory +for filename in os.listdir(source_dir): + if filename.endswith(".gz"): + source_file = os.path.join(source_dir, filename) + destination_file = os.path.join(destination_dir, filename[:-3]) # Remove .gz extension + + # Extract the .gz file + with gzip.open(source_file, 'rb') as f_in, open(destination_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + + print(f"Extracted: {filename} -> {destination_file}") + +print("Extraction complete.")