Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 62 additions & 27 deletions do_authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,70 @@
import requests
import sys
import datetime
import time
import urllib.parse

def authenticate(username, password, authen_base_url):
authen_headers = {'Content-Type': 'application/json',
'Accept': 'application/json'}
def authenticate(username, password, authen_base_url, retry_attempts=3):
"""Authenticate with ICANN CZDS API and return an access token."""

credential = {'username': username,
'password': password}
authen_headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'User-Agent': 'Mozilla/5.0 (compatible; CZDS-Client/1.0; +https://github.com/mthcht/czds-api-client-python)'
}

credential = {
"username": username,
"password": password, # No encoding for now, unless ICANN blocks special characters
"grant_type": "password" # Some APIs require this
}

authen_url = authen_base_url + '/api/authenticate'

response = requests.post(authen_url, data=json.dumps(credential), headers=authen_headers)

status_code = response.status_code

# Return the access_token on status code 200. Otherwise, terminate the program.
if status_code == 200:
access_token = response.json()['accessToken']
print('{0}: Received access_token:'.format(datetime.datetime.now()))
print(access_token)
return access_token
elif status_code == 404:
sys.stderr.write("Invalid url " + authen_url)
exit(1)
elif status_code == 401:
sys.stderr.write("Invalid username/password. Please reset your password via web")
exit(1)
elif status_code == 500:
sys.stderr.write("Internal server error. Please try again later")
exit(1)
else:
sys.stderr.write("Failed to authenticate user {0} with error code {1}".format(username, status_code))
exit(1)
for attempt in range(1, retry_attempts + 1):
try:
response = requests.post(authen_url, json=credential, headers=authen_headers)
status_code = response.status_code
response_text = response.text

if status_code == 200:
access_token = response.json().get('accessToken')
print(f"{datetime.datetime.now()}: Authentication successful!")
print(f"Received access token: {access_token[:10]}... [truncated]")
return access_token

elif status_code == 400:
sys.stderr.write(f"\n{datetime.datetime.now()} - ERROR: 400 Bad Request\n")
sys.stderr.write(f"Request Headers: {json.dumps(authen_headers, indent=2)}\n")
sys.stderr.write(f"Request Body: {json.dumps(credential, indent=2)}\n")
sys.stderr.write(f"Response Headers: {json.dumps(dict(response.headers), indent=2)}\n")
sys.stderr.write(f"Response Body: {response_text}\n")
sys.stderr.write("\n🚨 Possible causes: Invalid request format, missing fields, ICANN API changes, or IP blocking.\n")
exit(1)

elif status_code == 401:
sys.stderr.write("\nERROR: 401 Unauthorized - Invalid username or password.\n")
exit(1)

elif status_code == 429:
wait_time = 2 ** attempt # Exponential backoff
sys.stderr.write(f"\nERROR: 429 Too Many Requests - Rate limited. Retrying in {wait_time} seconds...\n")
time.sleep(wait_time)

elif status_code == 500:
sys.stderr.write("\nERROR: 500 Internal Server Error - ICANN API might be down. Try again later.\n")
exit(1)

else:
sys.stderr.write(f"\nERROR: Unexpected Status Code {status_code}\n")
sys.stderr.write(f"Response Headers: {json.dumps(dict(response.headers), indent=2)}\n")
sys.stderr.write(f"Response Body: {response_text}\n")
exit(1)

except requests.exceptions.RequestException as e:
sys.stderr.write(f"\nERROR: Network or Connection Issue\n")
sys.stderr.write(f"Exception: {str(e)}\n")
exit(1)

sys.stderr.write("\nERROR: Maximum authentication retries exceeded.\n")
exit(1)
205 changes: 106 additions & 99 deletions download.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import sys
import cgi
import os
import datetime

import time
import requests # Ensure requests module is available
from email.message import Message # Replaces deprecated cgi module
from do_authentication import authenticate
from do_http_get import do_get

Expand All @@ -16,139 +17,145 @@
config_data = os.environ['CZDS_CONFIG']
config = json.loads(config_data)
else:
config_file = open("config.json", "r")
config = json.load(config_file)
config_file.close()
except:
sys.stderr.write("Error loading config.json file.\n")
exit(1)

# The config.json file must contain the following data:
username = config['icann.account.username']
password = config['icann.account.password']
authen_base_url = config['authentication.base.url']
czds_base_url = config['czds.base.url']

# This is optional. Default to current directory
working_directory = config.get('working.directory', '.') # Default to current directory

if not username:
sys.stderr.write("'icann.account.username' parameter not found in the config.json file\n")
exit(1)

if not password:
sys.stderr.write("'icann.account.password' parameter not found in the config.json file\n")
with open("config.json", "r") as config_file:
config = json.load(config_file)
except Exception as e:
sys.stderr.write(f"Error loading config.json file: {e}\n")
exit(1)

if not authen_base_url:
sys.stderr.write("'authentication.base.url' parameter not found in the config.json file\n")
exit(1)
# Extract configuration
username = config.get('icann.account.username')
password = config.get('icann.account.password')
authen_base_url = config.get('authentication.base.url')
czds_base_url = config.get('czds.base.url')
working_directory = config.get('working.directory', '.')

if not czds_base_url:
sys.stderr.write("'czds.base.url' parameter not found in the config.json file\n")
# Validate required fields
if not all([username, password, authen_base_url, czds_base_url]):
sys.stderr.write("Missing required parameters in config.json\n")
exit(1)



##############################################################################################################
# Second Step: authenticate the user to get an access_token.
# Note that the access_token is global for all the REST API calls afterwards
# Second Step: Authenticate the user to get an access_token.
##############################################################################################################

print("Authenticate user {0}".format(username))
access_token = authenticate(username, password, authen_base_url)

print(f"Authenticating user {username}")

try:
access_token = authenticate(username, password, authen_base_url)
if not access_token:
sys.stderr.write("Authentication failed: No access token received.\n")
exit(1)
except requests.exceptions.RequestException as e:
sys.stderr.write(f"Authentication request failed: {e}\n")
exit(1)

##############################################################################################################
# Third Step: Get the download zone file links
##############################################################################################################

# Function definition for listing the zone links
def get_zone_links(czds_base_url):
global access_token
def get_zone_links(czds_base_url, retry_attempts=3):
"""Fetches the list of available zone file links with retries."""
global access_token
links_url = f"{czds_base_url}/czds/downloads/links"

links_url = czds_base_url + "/czds/downloads/links"
links_response = do_get(links_url, access_token)
for attempt in range(1, retry_attempts + 1):
response = do_get(links_url, access_token)

status_code = links_response.status_code
if response.status_code == 200:
zone_links = response.json()
print(f"{datetime.datetime.now()}: Number of zone files to download: {len(zone_links)}")
return zone_links

if status_code == 200:
zone_links = links_response.json()
print("{0}: The number of zone files to be downloaded is {1}".format(datetime.datetime.now(),len(zone_links)))
return zone_links
elif status_code == 401:
print("The access_token has been expired. Re-authenticate user {0}".format(username))
access_token = authenticate(username, password, authen_base_url)
get_zone_links(czds_base_url)
else:
sys.stderr.write("Failed to get zone links from {0} with error code {1}\n".format(links_url, status_code))
return None
elif response.status_code == 401:
print("Access token expired. Re-authenticating...")
access_token = authenticate(username, password, authen_base_url)
return get_zone_links(czds_base_url)

elif response.status_code == 429: # Rate limiting
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limit hit. Retrying in {wait_time} seconds...")
time.sleep(wait_time)

else:
sys.stderr.write(f"Failed to fetch zone links (HTTP {response.status_code}):\n")
sys.stderr.write(f"Response Headers: {response.headers}\n")
sys.stderr.write(f"Response Body: {response.text}\n")

sys.stderr.write("Exceeded maximum retry attempts for fetching zone links.\n")
return None

# Get the zone links
zone_links = get_zone_links(czds_base_url)
if not zone_links:
exit(1)



##############################################################################################################
# Fourth Step: download zone files
# Fourth Step: Download zone files with enhanced error handling
##############################################################################################################

# Function definition to download one zone file
def download_one_zone(url, output_directory):
print("{0}: Downloading zone file from {1}".format(str(datetime.datetime.now()), url))

global access_token
download_zone_response = do_get(url, access_token)

status_code = download_zone_response.status_code

if status_code == 200:
# Try to get the filename from the header
_,option = cgi.parse_header(download_zone_response.headers['content-disposition'])
filename = option.get('filename')
def parse_filename(response):
"""Extracts filename from content-disposition header or generates a fallback name."""
content_disposition = response.headers.get('content-disposition')
if content_disposition:
message = Message()
message['content-disposition'] = content_disposition
filename = message.get_param('filename')
if filename:
return filename
return f"{response.url.rsplit('/', 1)[-1].split('.')[0]}.txt.gz"

def download_one_zone(url, output_directory, retry_attempts=3):
"""Downloads a single zone file from the given URL with retries."""
global access_token
print(f"{datetime.datetime.now()}: Downloading {url}")

for attempt in range(1, retry_attempts + 1):
response = do_get(url, access_token)

if response.status_code == 200:
filename = parse_filename(response)
file_path = os.path.join(output_directory, filename)

with open(file_path, 'wb') as file:
for chunk in response.iter_content(1024):
file.write(chunk)

print(f"{datetime.datetime.now()}: Downloaded to {file_path}")
return

elif response.status_code == 401:
print("Access token expired. Re-authenticating...")
access_token = authenticate(username, password, authen_base_url)
return download_one_zone(url, output_directory)

elif response.status_code == 404:
print(f"No zone file found for {url}")
return

elif response.status_code == 429: # Rate limiting
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limit hit. Retrying in {wait_time} seconds...")
time.sleep(wait_time)

else:
sys.stderr.write(f"Failed to download {url} (HTTP {response.status_code})\n")
sys.stderr.write(f"Response Headers: {response.headers}\n")
sys.stderr.write(f"Response Body: {response.text}\n")

sys.stderr.write(f"Exceeded maximum retry attempts for {url}\n")

# If could get a filename from the header, then makeup one like [tld].txt.gz
if not filename:
filename = url.rsplit('/', 1)[-1].rsplit('.')[-2] + '.txt.gz'

# This is where the zone file will be saved
path = '{0}/{1}'.format(output_directory, filename)

with open(path, 'wb') as f:
for chunk in download_zone_response.iter_content(1024):
f.write(chunk)

print("{0}: Completed downloading zone to file {1}".format(str(datetime.datetime.now()), path))

elif status_code == 401:
print("The access_token has been expired. Re-authenticate user {0}".format(username))
access_token = authenticate(username, password, authen_base_url)
download_one_zone(url, output_directory)
elif status_code == 404:
print("No zone file found for {0}".format(url))
else:
sys.stderr.write('Failed to download zone from {0} with code {1}\n'.format(url, status_code))

# Function definition for downloading all the zone files
def download_zone_files(urls, working_directory):

# The zone files will be saved in a sub-directory
output_directory = working_directory + "/zonefiles"
"""Downloads all zone files into a sub-directory."""
output_directory = os.path.join(working_directory, "zonefiles")

if not os.path.exists(output_directory):
os.makedirs(output_directory)

# Download the zone files one by one
for link in urls:
download_one_zone(link, output_directory)

# Finally, download all zone files
start_time = datetime.datetime.now()
download_zone_files(zone_links, working_directory)
end_time = datetime.datetime.now()

print("{0}: DONE DONE. Completed downloading all zone files. Time spent: {1}".format(str(end_time), (end_time-start_time)))
print(f"{end_time}: Completed downloading all zone files. Time taken: {end_time - start_time}")
24 changes: 24 additions & 0 deletions extract_zonefiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
import gzip
import shutil

# Define source and destination directories
source_dir = "zones/zonefiles/"
destination_dir = "zones/extracted_zonefiles/"

# Ensure destination directory exists
os.makedirs(destination_dir, exist_ok=True)

# Iterate over all files in the source directory
for filename in os.listdir(source_dir):
if filename.endswith(".gz"):
source_file = os.path.join(source_dir, filename)
destination_file = os.path.join(destination_dir, filename[:-3]) # Remove .gz extension

# Extract the .gz file
with gzip.open(source_file, 'rb') as f_in, open(destination_file, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)

print(f"Extracted: {filename} -> {destination_file}")

print("Extraction complete.")