diff --git a/08-Check-ODAP-Tables.py b/08-Check-ODAP-Tables.py index 7ffec17..8f6b0fd 100755 --- a/08-Check-ODAP-Tables.py +++ b/08-Check-ODAP-Tables.py @@ -211,4 +211,24 @@ log.error("Found at least one bad author email; ending audit here.") sys.exit(7) + ## + ## Check for stories without chapters + ## + + log.debug("Checking for stories without any chapters.") + found_error = False + + empty_stories = sql.execute_dict( + "SELECT s.id as sid FROM stories s LEFT JOIN chapters c ON c.story_id = s.id WHERE c.story_id IS NULL" + ) + + if empty_stories: + found_error = True + for story in empty_stories: + log.error(f"Found story with no chapters: {story['sid']}") + + if found_error: + log.error("Found at least one story with no chapters; ending audit here.") + sys.exit(8) + log.info("All checks completed successfully.") diff --git a/xx-Remove-emails-from-Open-Doors-Tables.py b/xx-Remove-emails-from-Open-Doors-Tables.py index 6a718a1..ebd34b3 100755 --- a/xx-Remove-emails-from-Open-Doors-Tables.py +++ b/xx-Remove-emails-from-Open-Doors-Tables.py @@ -6,8 +6,11 @@ from prompt_toolkit.formatted_text import FormattedText from prompt_toolkit.shortcuts import clear +# This regex is pulled from the HTML5 spec. Though it is technically not +# compliant with RFC 5322 ("a willful violation"), it's good enough for our +# purposes. email_regex = re.compile( - r"([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A-Z^-~]+)*|\"([]!#-[^-~ \t]|(\\[\t -~]))+\")@([-!#-'*+/-9=?A-Z^-~]+(\.[-!#-'*+/-9=?A-Z^-~]+)*|\[[\t -Z^-~]*])" + r"([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+)@([a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)" ) @@ -44,7 +47,7 @@ def is_mailto(match) -> bool: def ask_user_for_action(match) -> str: start, end = match.span() raw_email = match.string[start:end] - domain = match.group(5) + domain = match.group(2) clear() print_context(match, 50) while True: @@ -85,7 +88,7 @@ def return_from_list(match) -> str: return raw_email elif address_entry is not None: return address_entry - domain = match.group(5) + domain = match.group(2) domain_entry = domains.get(domain) if domain_entry is True: return raw_email