From d4f0b20b71440caa223ed91389dea7186a4fcb20 Mon Sep 17 00:00:00 2001
From: eggy <d7chen@uwaterloo.ca>
Date: Sun, 26 May 2024 15:28:06 -0400
Subject: [PATCH 01/13] fix: specify database when fetching

---
 shared_python/Chapters.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py
index 7bd44f6..10e2eff 100755
--- a/shared_python/Chapters.py
+++ b/shared_python/Chapters.py
@@ -81,6 +81,7 @@ def _gather_and_dedupe(self, chapters_path, extensions, has_ids=False):
                 for cid, duplicate in duplicate_chapters.items():
                     # look up the author id and add that one to the file_names list
                     sql_author_id = self.sql.execute_and_fetchall(
+                        self.sql.database,
                         "SELECT author_id FROM chapters WHERE id = {0}".format(cid)
                     )
                     if len(sql_author_id) > 0:

From f8c98b4a8d7faf8817f9de1cb6c5e322e4e85086 Mon Sep 17 00:00:00 2001
From: eggy <d7chen@uwaterloo.ca>
Date: Sun, 26 May 2024 16:06:02 -0400
Subject: [PATCH 02/13] fix: correct args

---
 03-Export-Tags-Authors-Stories.py |  1 +
 automated_archive/aa.py           | 11 +++++------
 shared_python/Sql.py              |  4 +++-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/03-Export-Tags-Authors-Stories.py b/03-Export-Tags-Authors-Stories.py
index 4604b78..d036adf 100755
--- a/03-Export-Tags-Authors-Stories.py
+++ b/03-Export-Tags-Authors-Stories.py
@@ -22,6 +22,7 @@ def write_csv(data, filename, columns):
         fp.close()
 
 
+
 if __name__ == "__main__":
     """
   This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google
diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index 44b4b4d..28ae35f 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -4,6 +4,7 @@
 import codecs
 import re
 import os
+from pathlib import Path
 from html.parser import HTMLParser
 
 from pymysql import connect
@@ -123,7 +124,7 @@ def _extract_fandoms(args, record):
 
 
 def _create_mysql(args, FILES, log):
-    db = connect(args.db_host, args.db_user, args.db_password, "")
+    db = connect(host=args.db_host, user=args.db_user, password=args.db_password, db="")
     cursor = db.cursor()
     DATABASE_NAME = args.temp_db_database
 
@@ -132,12 +133,10 @@ def _create_mysql(args, FILES, log):
     cursor.execute("create database {0};".format(DATABASE_NAME))
     cursor.execute("use {0}".format(DATABASE_NAME))
 
-    sql = Sql(args)
-    codepath = os.path.dirname(os.path.realpath(__file__))
+    sql = Sql(args, log)
+    script_path = Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql"
 
-    sql.run_script_from_file(
-        codepath + "/shared_python/create-open-doors-tables.sql", database=DATABASE_NAME
-    )
+    sql.run_script_from_file(script_path, database=DATABASE_NAME)
     db.commit()
 
     authors = [
diff --git a/shared_python/Sql.py b/shared_python/Sql.py
index 9a5227b..5bf0100 100755
--- a/shared_python/Sql.py
+++ b/shared_python/Sql.py
@@ -1,4 +1,6 @@
 import re
+from pathlib import Path
+from typing import Union
 import warnings
 
 # ignore unhelpful MySQL warnings
@@ -53,7 +55,7 @@ def execute_and_fetchall(self, database: str, statement: str):
         self.conn.commit()
         return cursor.fetchall()
 
-    def run_script_from_file(self, filename, database, initial_load=False):
+    def run_script_from_file(self, filename: Union[str, Path], database, initial_load=False):
         # Open and read the file as a single buffer
         fd = open(filename, "r")
         sqlFile = fd.read()

From 8ff4a68e851694b930a014a58a99a8b43b7886fe Mon Sep 17 00:00:00 2001
From: eggy <d7chen@uwaterloo.ca>
Date: Sun, 26 May 2024 16:15:55 -0400
Subject: [PATCH 03/13] fix: correct string type

---
 shared_python/Tags.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shared_python/Tags.py b/shared_python/Tags.py
index e69068f..48b2724 100755
--- a/shared_python/Tags.py
+++ b/shared_python/Tags.py
@@ -94,7 +94,7 @@ def populate_tag_table(
                                 tag_col_lookup[col], str
                             ):  # Probably AA or a custom archive
                                 cleaned_tag = (
-                                    val.encode("utf-8").replace("'", "'").strip()
+                                    val.replace("'", "'").strip()
                                 )
 
                                 values.append(

From 4a08b7edfff6f151e0d0e43effe7b80d0ca24505 Mon Sep 17 00:00:00 2001
From: eggy <d7chen@uwaterloo.ca>
Date: Sun, 26 May 2024 22:17:59 -0400
Subject: [PATCH 04/13] chore: fix lint

---
 automated_archive/aa.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index 28ae35f..06df23d 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -3,7 +3,6 @@
 import datetime
 import codecs
 import re
-import os
 from pathlib import Path
 from html.parser import HTMLParser
 

From b02e2935daae7585d41e979768fab9a8b2e5c604 Mon Sep 17 00:00:00 2001
From: eggy <d7chen@uwaterloo.ca>
Date: Sun, 26 May 2024 22:19:30 -0400
Subject: [PATCH 05/13] chore: fix format

---
 03-Export-Tags-Authors-Stories.py | 1 -
 automated_archive/aa.py           | 4 +++-
 shared_python/Chapters.py         | 2 +-
 shared_python/Sql.py              | 4 +++-
 shared_python/Tags.py             | 4 +---
 5 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/03-Export-Tags-Authors-Stories.py b/03-Export-Tags-Authors-Stories.py
index d036adf..4604b78 100755
--- a/03-Export-Tags-Authors-Stories.py
+++ b/03-Export-Tags-Authors-Stories.py
@@ -22,7 +22,6 @@ def write_csv(data, filename, columns):
         fp.close()
 
 
-
 if __name__ == "__main__":
     """
   This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google
diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index 06df23d..df451b4 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -133,7 +133,9 @@ def _create_mysql(args, FILES, log):
     cursor.execute("use {0}".format(DATABASE_NAME))
 
     sql = Sql(args, log)
-    script_path = Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql"
+    script_path = (
+        Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql"
+    )
 
     sql.run_script_from_file(script_path, database=DATABASE_NAME)
     db.commit()
diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py
index 10e2eff..7189dae 100755
--- a/shared_python/Chapters.py
+++ b/shared_python/Chapters.py
@@ -82,7 +82,7 @@ def _gather_and_dedupe(self, chapters_path, extensions, has_ids=False):
                     # look up the author id and add that one to the file_names list
                     sql_author_id = self.sql.execute_and_fetchall(
                         self.sql.database,
-                        "SELECT author_id FROM chapters WHERE id = {0}".format(cid)
+                        "SELECT author_id FROM chapters WHERE id = {0}".format(cid),
                     )
                     if len(sql_author_id) > 0:
                         author_id = sql_author_id[0][0]
diff --git a/shared_python/Sql.py b/shared_python/Sql.py
index 5bf0100..31e140f 100755
--- a/shared_python/Sql.py
+++ b/shared_python/Sql.py
@@ -55,7 +55,9 @@ def execute_and_fetchall(self, database: str, statement: str):
         self.conn.commit()
         return cursor.fetchall()
 
-    def run_script_from_file(self, filename: Union[str, Path], database, initial_load=False):
+    def run_script_from_file(
+        self, filename: Union[str, Path], database, initial_load=False
+    ):
         # Open and read the file as a single buffer
         fd = open(filename, "r")
         sqlFile = fd.read()
diff --git a/shared_python/Tags.py b/shared_python/Tags.py
index 48b2724..11da9ed 100755
--- a/shared_python/Tags.py
+++ b/shared_python/Tags.py
@@ -93,9 +93,7 @@ def populate_tag_table(
                             if isinstance(
                                 tag_col_lookup[col], str
                             ):  # Probably AA or a custom archive
-                                cleaned_tag = (
-                                    val.replace("'", "'").strip()
-                                )
+                                cleaned_tag = val.replace("'", "'").strip()
 
                                 values.append(
                                     '({0}, "{1}", "{2}", "{3}")'.format(

From 53415c045a58fdf0d5b2eb242e842df298b1f86c Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Sun, 16 Mar 2025 13:43:53 -0700
Subject: [PATCH 06/13] Updated step 1 to use working schema & other changes
 for Unit B

---
 automated_archive/aa.py | 95 +++++++++++++++++++++++++++++------------
 shared_python/Sql.py    |  8 ++--
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index df451b4..ed822ce 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -1,10 +1,10 @@
 # -- coding: utf-8 --
 
-import datetime
+from datetime import datetime
 import codecs
 import re
-from pathlib import Path
-from html.parser import HTMLParser
+import html
+import urllib.request
 
 from pymysql import connect
 
@@ -22,11 +22,17 @@ def _clean_file(filepath, log):
     :param filepath: Path to ARCHIVE_DB.pl
     :return: Python dictionary keyed by original story id
     """
-    h = HTMLParser()
-    archive_db = codecs.open(filepath, "r", encoding="utf-8").read()
+    for i, encoding in enumerate(["utf-8","ascii","Latin-1","Windows-1252"]):
+        try:
+            archive_db = codecs.open(filepath, "r", encoding=encoding).read()
+            break
+        except:
+            log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl")
+            if i == 3:
+                raise RuntimeError("ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again.")
 
     # Manually escape single quote entity and reformat file as a Python dictionary
-    step1 = h.unescape(archive_db.replace("&#39;", "\\&#39;"))
+    step1 = html.unescape(archive_db.replace("&#39;", "\\&#39;"))
 
     # Indent the file with a single tab instead of whatever is currently used
     step15 = re.sub(r"^\s+", "\t", step1)
@@ -122,6 +128,30 @@ def _extract_fandoms(args, record):
     return tags.strip(", ")
 
 
+def _extract_date(args, record):
+    date_string = record.get(
+        "PrintTime",
+        record.get(
+            "DatePrint",
+            record.get(
+                "Date", str(datetime.now().strftime("%m/%d/%y"))
+            ),
+        ),
+    )
+    
+    dt = None
+    try:
+        # If the date is in the form of a Unix timestamp
+        if date_string.isdigit():
+            dt = datetime.fromtimestamp(int(date_string))
+        else:
+            dt = datetime.strptime(date_string, "%m/%d/%y")
+    except:
+        log.error("Failed to parse date value: "+date_string)
+    
+    return dt.strftime("%Y-%m-%d") if dt else ""
+
+
 def _create_mysql(args, FILES, log):
     db = connect(host=args.db_host, user=args.db_user, password=args.db_password, db="")
     cursor = db.cursor()
@@ -132,12 +162,13 @@ def _create_mysql(args, FILES, log):
     cursor.execute("create database {0};".format(DATABASE_NAME))
     cursor.execute("use {0}".format(DATABASE_NAME))
 
-    sql = Sql(args, log)
-    script_path = (
-        Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql"
-    )
+    # Instead of duplicating this file in the repo grab it from the master branch of eFiction
+    url = "https://raw.githubusercontent.com/otwcode/open-doors-eFiction/refs/heads/master/opendoors/open-doors-tables-working.sql"
+    with urllib.request.urlopen(url) as response:
+        script = response.read().decode()
 
-    sql.run_script_from_file(script_path, database=DATABASE_NAME)
+    sql = Sql(args, log)
+    sql.run_sql_file(script, database=DATABASE_NAME)
     db.commit()
 
     authors = [
@@ -164,18 +195,7 @@ def _create_mysql(args, FILES, log):
             FILES[i].get("Summary", "").replace("'", "\\'"),
             _extract_tags(args, FILES[i]),
             _extract_characters(args, FILES[i]),
-            datetime.datetime.strptime(
-                FILES[i].get(
-                    "PrintTime",
-                    FILES[i].get(
-                        "DatePrint",
-                        FILES[i].get(
-                            "Date", str(datetime.datetime.now().strftime("%m/%d/%y"))
-                        ),
-                    ),
-                ),
-                "%m/%d/%y",
-            ).strftime("%Y-%m-%d"),
+            _extract_date(args, FILES[i]),
             FILES[i].get("Location", "").replace("'", "\\'"),
             FILES[i]
             .get("LocationURL", FILES[i].get("StoryURL", ""))
@@ -183,7 +203,7 @@ def _create_mysql(args, FILES, log):
             FILES[i].get("Notes", "").replace("'", "\\'"),
             _extract_relationships(args, FILES[i]),
             FILES[i].get("Rating", ""),
-            FILES[i].get("Warnings", "").replace("'", "\\'"),
+            FILES[i].get("Warnings", FILES[i].get("OptionalWarnings", "")).replace("'", "\\'"),
             FILES[i].get("Author", "").strip(),
             FILES[i].get("Email", FILES[i].get("EmailAuthor", "")).lower().strip(),
             FILES[i].get("FileType", args.chapters_file_extensions)
@@ -196,6 +216,7 @@ def _create_mysql(args, FILES, log):
 
     cur = 0
     total = len(FILES)
+    item_dict = {}
     for (
         original_id,
         title,
@@ -225,7 +246,7 @@ def _create_mysql(args, FILES, log):
                 table_name = "stories"
             else:
                 filename = url
-                table_name = "bookmarks"
+                table_name = "story_links"
 
             # Clean up fandoms and add default fandom if it exists
             final_fandoms = fandoms.replace("'", r"\'")
@@ -241,10 +262,14 @@ def _create_mysql(args, FILES, log):
                 if element[1] == author and element[2] == email
             ]
             authorid = result[0][0]
+            item_dict[original_id] = {
+                "authorid": authorid,
+                "itemtype": "story_link" if table_name == "story_links" else "story"
+            }
 
             stor = """
-        INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings, author_id)
-        VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}', '{13}');\n""".format(
+        INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings)
+        VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}');\n""".format(
                 table_name,
                 original_id,
                 final_fandoms.replace(r"\\", "\\"),
@@ -258,7 +283,6 @@ def _create_mysql(args, FILES, log):
                 pairings,
                 rating,
                 warnings,
-                authorid,
             )
             cursor.execute(stor)
         except:
@@ -284,6 +308,21 @@ def _create_mysql(args, FILES, log):
             )
             raise
     db.commit()
+    
+    for itemid, item_info in item_dict.items():
+        try:
+            item_auth = """
+            INSERT INTO item_authors (author_id, item_id, item_type)
+            VALUES({0}, {1}, '{2}');\n""".format(
+                item_info["authorid"],
+                itemid,
+                item_info["itemtype"]
+            )
+            cursor.execute(item_auth)
+        except:
+            log.error(f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}")
+            raise
+    db.commit()
 
 
 def clean_and_load_data(args, log):
diff --git a/shared_python/Sql.py b/shared_python/Sql.py
index 31e140f..2d2d6af 100755
--- a/shared_python/Sql.py
+++ b/shared_python/Sql.py
@@ -1,6 +1,4 @@
 import re
-from pathlib import Path
-from typing import Union
 import warnings
 
 # ignore unhelpful MySQL warnings
@@ -56,13 +54,17 @@ def execute_and_fetchall(self, database: str, statement: str):
         return cursor.fetchall()
 
     def run_script_from_file(
-        self, filename: Union[str, Path], database, initial_load=False
+        self, filename, database, initial_load=False
     ):
         # Open and read the file as a single buffer
         fd = open(filename, "r")
         sqlFile = fd.read()
         fd.close()
+        self.run_sql_file(sqlFile, database, initial_load)
 
+    def run_sql_file(
+        self, sqlFile, database, initial_load=False
+    ):
         # replace placeholders and return all SQL commands (split on ';')
         sqlCommands = sqlFile.replace("$DATABASE$", database).split(";\n")
 

From cac604ae00af3960215e22436f24f2b0b9ae1835 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Sun, 16 Mar 2025 15:37:07 -0700
Subject: [PATCH 07/13] Updated step 2b to insert unique tags and item_tags

---
 02b-Extract-Tags-From-Stories.py |  1 -
 shared_python/Tags.py            | 41 +++++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/02b-Extract-Tags-From-Stories.py b/02b-Extract-Tags-From-Stories.py
index dce273e..0b6fa77 100755
--- a/02b-Extract-Tags-From-Stories.py
+++ b/02b-Extract-Tags-From-Stories.py
@@ -22,7 +22,6 @@
             args.temp_db_database
         )
     )
-    tags.create_tags_table()
 
     tag_col_list = {}
     stories_id_name = ""
diff --git a/shared_python/Tags.py b/shared_python/Tags.py
index 11da9ed..9c993fa 100755
--- a/shared_python/Tags.py
+++ b/shared_python/Tags.py
@@ -1,4 +1,5 @@
 import re
+from collections import defaultdict
 from html.parser import HTMLParser
 from logging import Logger
 
@@ -83,8 +84,9 @@ def populate_tag_table(
             )
         )
 
+        tags_to_insert = {}
+        tags_to_story_ids = defaultdict(list)
         for story_tags_row in data:
-            values = []
             for col in tag_columns:
                 needs_fandom = col in tags_with_fandoms
                 if story_tags_row[col] is not None:
@@ -93,25 +95,36 @@ def populate_tag_table(
                             if isinstance(
                                 tag_col_lookup[col], str
                             ):  # Probably AA or a custom archive
-                                cleaned_tag = val.replace("'", "'").strip()
-
-                                values.append(
-                                    '({0}, "{1}", "{2}", "{3}")'.format(
-                                        story_tags_row[story_id_col_name],
-                                        re.sub(r'(?<!\\)"', '\\"', cleaned_tag),
+                                cleaned_tag = re.sub(r'(?<!\\)"', '\\"', val.replace("'", "'").strip())
+                                tags_to_story_ids[cleaned_tag].append(story_tags_row[story_id_col_name])
+                                tags_to_insert[cleaned_tag] = '("{0}", "{1}", "{2}")'.format(
+                                        cleaned_tag,
                                         tag_col_lookup[col],
                                         story_tags_row["fandoms"]
                                         if needs_fandom
                                         else "",
-                                    )
                                 )
 
-            if len(values) > 0:
-                self.sql.execute(
-                    """
-               INSERT INTO tags (storyid, original_tag, original_table, ao3_tag_fandom) VALUES {0}
-             """.format(", ".join(values))
-                )
+        if len(tags_to_insert) > 0:
+            self.sql.execute(
+                """
+           INSERT INTO tags (original_tag, original_type, ao3_tag_fandom) VALUES {0}
+         """.format(", ".join(tags_to_insert.values()))
+            )
+            
+            tag_data = self.sql.execute_dict(
+                "SELECT id, original_tag FROM tags"
+            )
+            for tag_row in tag_data:
+                story_ids = set(tags_to_story_ids[tag_row["original_tag"]])
+                for story_id in story_ids:
+                    self.sql.execute("""
+                    INSERT INTO item_tags (item_id, item_type, tag_id) VALUES ({0}, "{1}", {2})
+                    """.format(
+                        story_id,
+                        "story_link" if table_name == "story_links" else "story",
+                        tag_row["id"]
+                    ))
 
     def distinct_tags(self, database):
         """

From afb8e65c26d587a9f62c8a55c0dfae925984cda1 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Sun, 16 Mar 2025 16:01:22 -0700
Subject: [PATCH 08/13] Ran ruff formatter

---
 automated_archive/aa.py | 42 ++++++++++++++++++++++-------------------
 shared_python/Sql.py    |  8 ++------
 shared_python/Tags.py   | 38 ++++++++++++++++++++-----------------
 3 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index ed822ce..bc2579b 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -22,14 +22,16 @@ def _clean_file(filepath, log):
     :param filepath: Path to ARCHIVE_DB.pl
     :return: Python dictionary keyed by original story id
     """
-    for i, encoding in enumerate(["utf-8","ascii","Latin-1","Windows-1252"]):
+    for i, encoding in enumerate(["utf-8", "ascii", "Latin-1", "Windows-1252"]):
         try:
             archive_db = codecs.open(filepath, "r", encoding=encoding).read()
             break
-        except:
+        except:  # noqa: E722
             log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl")
             if i == 3:
-                raise RuntimeError("ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again.")
+                raise RuntimeError(
+                    "ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again."
+                )
 
     # Manually escape single quote entity and reformat file as a Python dictionary
     step1 = html.unescape(archive_db.replace("&#39;", "\\&#39;"))
@@ -128,17 +130,15 @@ def _extract_fandoms(args, record):
     return tags.strip(", ")
 
 
-def _extract_date(args, record):
+def _extract_date(args, record, log):
     date_string = record.get(
         "PrintTime",
         record.get(
             "DatePrint",
-            record.get(
-                "Date", str(datetime.now().strftime("%m/%d/%y"))
-            ),
+            record.get("Date", str(datetime.now().strftime("%m/%d/%y"))),
         ),
     )
-    
+
     dt = None
     try:
         # If the date is in the form of a Unix timestamp
@@ -146,9 +146,11 @@ def _extract_date(args, record):
             dt = datetime.fromtimestamp(int(date_string))
         else:
             dt = datetime.strptime(date_string, "%m/%d/%y")
-    except:
-        log.error("Failed to parse date value: "+date_string)
-    
+    except Exception as e:
+        log.error(
+            f"Failed to parse date value '{date_string}' due to exception: {str(e)}"
+        )
+
     return dt.strftime("%Y-%m-%d") if dt else ""
 
 
@@ -195,7 +197,7 @@ def _create_mysql(args, FILES, log):
             FILES[i].get("Summary", "").replace("'", "\\'"),
             _extract_tags(args, FILES[i]),
             _extract_characters(args, FILES[i]),
-            _extract_date(args, FILES[i]),
+            _extract_date(args, FILES[i], log),
             FILES[i].get("Location", "").replace("'", "\\'"),
             FILES[i]
             .get("LocationURL", FILES[i].get("StoryURL", ""))
@@ -203,7 +205,9 @@ def _create_mysql(args, FILES, log):
             FILES[i].get("Notes", "").replace("'", "\\'"),
             _extract_relationships(args, FILES[i]),
             FILES[i].get("Rating", ""),
-            FILES[i].get("Warnings", FILES[i].get("OptionalWarnings", "")).replace("'", "\\'"),
+            FILES[i]
+            .get("Warnings", FILES[i].get("OptionalWarnings", ""))
+            .replace("'", "\\'"),
             FILES[i].get("Author", "").strip(),
             FILES[i].get("Email", FILES[i].get("EmailAuthor", "")).lower().strip(),
             FILES[i].get("FileType", args.chapters_file_extensions)
@@ -264,7 +268,7 @@ def _create_mysql(args, FILES, log):
             authorid = result[0][0]
             item_dict[original_id] = {
                 "authorid": authorid,
-                "itemtype": "story_link" if table_name == "story_links" else "story"
+                "itemtype": "story_link" if table_name == "story_links" else "story",
             }
 
             stor = """
@@ -308,19 +312,19 @@ def _create_mysql(args, FILES, log):
             )
             raise
     db.commit()
-    
+
     for itemid, item_info in item_dict.items():
         try:
             item_auth = """
             INSERT INTO item_authors (author_id, item_id, item_type)
             VALUES({0}, {1}, '{2}');\n""".format(
-                item_info["authorid"],
-                itemid,
-                item_info["itemtype"]
+                item_info["authorid"], itemid, item_info["itemtype"]
             )
             cursor.execute(item_auth)
         except:
-            log.error(f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}")
+            log.error(
+                f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}"
+            )
             raise
     db.commit()
 
diff --git a/shared_python/Sql.py b/shared_python/Sql.py
index 2d2d6af..54057aa 100755
--- a/shared_python/Sql.py
+++ b/shared_python/Sql.py
@@ -53,18 +53,14 @@ def execute_and_fetchall(self, database: str, statement: str):
         self.conn.commit()
         return cursor.fetchall()
 
-    def run_script_from_file(
-        self, filename, database, initial_load=False
-    ):
+    def run_script_from_file(self, filename, database, initial_load=False):
         # Open and read the file as a single buffer
         fd = open(filename, "r")
         sqlFile = fd.read()
         fd.close()
         self.run_sql_file(sqlFile, database, initial_load)
 
-    def run_sql_file(
-        self, sqlFile, database, initial_load=False
-    ):
+    def run_sql_file(self, sqlFile, database, initial_load=False):
         # replace placeholders and return all SQL commands (split on ';')
         sqlCommands = sqlFile.replace("$DATABASE$", database).split(";\n")
 
diff --git a/shared_python/Tags.py b/shared_python/Tags.py
index 9c993fa..b1c4b53 100755
--- a/shared_python/Tags.py
+++ b/shared_python/Tags.py
@@ -95,14 +95,18 @@ def populate_tag_table(
                             if isinstance(
                                 tag_col_lookup[col], str
                             ):  # Probably AA or a custom archive
-                                cleaned_tag = re.sub(r'(?<!\\)"', '\\"', val.replace("'", "'").strip())
-                                tags_to_story_ids[cleaned_tag].append(story_tags_row[story_id_col_name])
-                                tags_to_insert[cleaned_tag] = '("{0}", "{1}", "{2}")'.format(
-                                        cleaned_tag,
-                                        tag_col_lookup[col],
-                                        story_tags_row["fandoms"]
-                                        if needs_fandom
-                                        else "",
+                                cleaned_tag = re.sub(
+                                    r'(?<!\\)"', '\\"', val.replace("'", "'").strip()
+                                )
+                                tags_to_story_ids[cleaned_tag].append(
+                                    story_tags_row[story_id_col_name]
+                                )
+                                tags_to_insert[
+                                    cleaned_tag
+                                ] = '("{0}", "{1}", "{2}")'.format(
+                                    cleaned_tag,
+                                    tag_col_lookup[col],
+                                    story_tags_row["fandoms"] if needs_fandom else "",
                                 )
 
         if len(tags_to_insert) > 0:
@@ -111,20 +115,20 @@ def populate_tag_table(
            INSERT INTO tags (original_tag, original_type, ao3_tag_fandom) VALUES {0}
          """.format(", ".join(tags_to_insert.values()))
             )
-            
-            tag_data = self.sql.execute_dict(
-                "SELECT id, original_tag FROM tags"
-            )
+
+            tag_data = self.sql.execute_dict("SELECT id, original_tag FROM tags")
             for tag_row in tag_data:
                 story_ids = set(tags_to_story_ids[tag_row["original_tag"]])
                 for story_id in story_ids:
-                    self.sql.execute("""
+                    self.sql.execute(
+                        """
                     INSERT INTO item_tags (item_id, item_type, tag_id) VALUES ({0}, "{1}", {2})
                     """.format(
-                        story_id,
-                        "story_link" if table_name == "story_links" else "story",
-                        tag_row["id"]
-                    ))
+                            story_id,
+                            "story_link" if table_name == "story_links" else "story",
+                            tag_row["id"],
+                        )
+                    )
 
     def distinct_tags(self, database):
         """

From d5e4ab1541af115c70aec1fa4a6c09ed18d093f5 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Sun, 16 Mar 2025 16:05:05 -0700
Subject: [PATCH 09/13] Changed macos github action from latest to 13

---
 .github/workflows/python-app-macos-windows.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app-macos-windows.yml b/.github/workflows/python-app-macos-windows.yml
index 9d0d720..ec824a5 100644
--- a/.github/workflows/python-app-macos-windows.yml
+++ b/.github/workflows/python-app-macos-windows.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ macos-latest, windows-latest ]
+        os: [ macos-13, windows-latest ] # Using macos-13 since macos-latest no longer supports 3.8
 
     steps:
       - uses: actions/checkout@v2

From 75d8f656e06c5b19f69d7884ce16f5cfabf18f37 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Sun, 16 Mar 2025 18:44:27 -0700
Subject: [PATCH 10/13] Prompt for encoding of ARCHIVE_DB.pl

---
 automated_archive/aa.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index bc2579b..e0593ff 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -22,16 +22,10 @@ def _clean_file(filepath, log):
     :param filepath: Path to ARCHIVE_DB.pl
     :return: Python dictionary keyed by original story id
     """
-    for i, encoding in enumerate(["utf-8", "ascii", "Latin-1", "Windows-1252"]):
-        try:
-            archive_db = codecs.open(filepath, "r", encoding=encoding).read()
-            break
-        except:  # noqa: E722
-            log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl")
-            if i == 3:
-                raise RuntimeError(
-                    "ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again."
-                )
+    encoding = input('Encoding for the ARCHIVE_DB.pl file (default: "utf-8"): ')
+    if encoding is None or encoding == "":
+        encoding = "utf-8"
+    archive_db = codecs.open(filepath, "r", encoding=encoding).read()
 
     # Manually escape single quote entity and reformat file as a Python dictionary
     step1 = html.unescape(archive_db.replace("&#39;", "\\&#39;"))

From 30733a73fca5c709ca2e0453c960d061f57faec9 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Mon, 17 Mar 2025 13:21:40 -0700
Subject: [PATCH 11/13] Updated step 2a so it'll work on windows if chapter
 urls contain forward slashes

---
 shared_python/Chapters.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py
index 7189dae..6d79b35 100755
--- a/shared_python/Chapters.py
+++ b/shared_python/Chapters.py
@@ -143,6 +143,8 @@ def populate_chapters(self, folder=None, extensions=None):
         else:
             for _, chapter_path in file_paths.items():
                 path = chapter_path.replace(self.args.chapters_path, "")[1:]
+                if os.sep == "\\":  # if this script is run on windows
+                    path = path.replace("\\", "/")
                 with codecs.open(chapter_path, "r", encoding=char_encoding) as c:
                     try:
                         cur = Common.print_progress(cur, total)

From c6731cde69b119f3153a12526491329e762a418e Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Tue, 18 Mar 2025 18:15:34 -0700
Subject: [PATCH 12/13] Updated the wording for the ARCHIVE_DB.pl prompt

per Ariana's suggestion

Co-authored-by: Ariana <ariana-paris@users.noreply.github.com>
---
 automated_archive/aa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index e0593ff..2ddc38d 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -22,7 +22,7 @@ def _clean_file(filepath, log):
     :param filepath: Path to ARCHIVE_DB.pl
     :return: Python dictionary keyed by original story id
     """
-    encoding = input('Encoding for the ARCHIVE_DB.pl file (default: "utf-8"): ')
+    encoding = input('Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): ')
     if encoding is None or encoding == "":
         encoding = "utf-8"
     archive_db = codecs.open(filepath, "r", encoding=encoding).read()

From 8dfed1f71af63879ec4088683bb79349d1ee3114 Mon Sep 17 00:00:00 2001
From: Brianna Dardin <brianna.dardin@gmail.com>
Date: Tue, 18 Mar 2025 18:20:07 -0700
Subject: [PATCH 13/13] Ran ruff formatter again to fix build checks

---
 automated_archive/aa.py | 4 +++-
 shared_python/Tags.py   | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/automated_archive/aa.py b/automated_archive/aa.py
index 2ddc38d..6d86166 100755
--- a/automated_archive/aa.py
+++ b/automated_archive/aa.py
@@ -22,7 +22,9 @@ def _clean_file(filepath, log):
     :param filepath: Path to ARCHIVE_DB.pl
     :return: Python dictionary keyed by original story id
     """
-    encoding = input('Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): ')
+    encoding = input(
+        'Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): '
+    )
     if encoding is None or encoding == "":
         encoding = "utf-8"
     archive_db = codecs.open(filepath, "r", encoding=encoding).read()
diff --git a/shared_python/Tags.py b/shared_python/Tags.py
index b1c4b53..1dc780c 100755
--- a/shared_python/Tags.py
+++ b/shared_python/Tags.py
@@ -96,7 +96,9 @@ def populate_tag_table(
                                 tag_col_lookup[col], str
                             ):  # Probably AA or a custom archive
                                 cleaned_tag = re.sub(
-                                    r'(?<!\\)"', '\\"', val.replace("'", "'").strip()
+                                    r'(?<!\\)"',
+                                    '\\"',
+                                    val.replace("'", "'").strip(),
                                 )
                                 tags_to_story_ids[cleaned_tag].append(
                                     story_tags_row[story_id_col_name]