From d4f0b20b71440caa223ed91389dea7186a4fcb20 Mon Sep 17 00:00:00 2001 From: eggy Date: Sun, 26 May 2024 15:28:06 -0400 Subject: [PATCH 01/13] fix: specify database when fetching --- shared_python/Chapters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py index 7bd44f6..10e2eff 100755 --- a/shared_python/Chapters.py +++ b/shared_python/Chapters.py @@ -81,6 +81,7 @@ def _gather_and_dedupe(self, chapters_path, extensions, has_ids=False): for cid, duplicate in duplicate_chapters.items(): # look up the author id and add that one to the file_names list sql_author_id = self.sql.execute_and_fetchall( + self.sql.database, "SELECT author_id FROM chapters WHERE id = {0}".format(cid) ) if len(sql_author_id) > 0: From f8c98b4a8d7faf8817f9de1cb6c5e322e4e85086 Mon Sep 17 00:00:00 2001 From: eggy Date: Sun, 26 May 2024 16:06:02 -0400 Subject: [PATCH 02/13] fix: correct args --- 03-Export-Tags-Authors-Stories.py | 1 + automated_archive/aa.py | 11 +++++------ shared_python/Sql.py | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/03-Export-Tags-Authors-Stories.py b/03-Export-Tags-Authors-Stories.py index 4604b78..d036adf 100755 --- a/03-Export-Tags-Authors-Stories.py +++ b/03-Export-Tags-Authors-Stories.py @@ -22,6 +22,7 @@ def write_csv(data, filename, columns): fp.close() + if __name__ == "__main__": """ This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google diff --git a/automated_archive/aa.py b/automated_archive/aa.py index 44b4b4d..28ae35f 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -4,6 +4,7 @@ import codecs import re import os +from pathlib import Path from html.parser import HTMLParser from pymysql import connect @@ -123,7 +124,7 @@ def _extract_fandoms(args, record): def _create_mysql(args, FILES, log): - db = connect(args.db_host, args.db_user, args.db_password, "") + db = connect(host=args.db_host, user=args.db_user, password=args.db_password, db="") cursor = db.cursor() DATABASE_NAME = args.temp_db_database @@ -132,12 +133,10 @@ def _create_mysql(args, FILES, log): cursor.execute("create database {0};".format(DATABASE_NAME)) cursor.execute("use {0}".format(DATABASE_NAME)) - sql = Sql(args) - codepath = os.path.dirname(os.path.realpath(__file__)) + sql = Sql(args, log) + script_path = Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql" - sql.run_script_from_file( - codepath + "/shared_python/create-open-doors-tables.sql", database=DATABASE_NAME - ) + sql.run_script_from_file(script_path, database=DATABASE_NAME) db.commit() authors = [ diff --git a/shared_python/Sql.py b/shared_python/Sql.py index 9a5227b..5bf0100 100755 --- a/shared_python/Sql.py +++ b/shared_python/Sql.py @@ -1,4 +1,6 @@ import re +from pathlib import Path +from typing import Union import warnings # ignore unhelpful MySQL warnings @@ -53,7 +55,7 @@ def execute_and_fetchall(self, database: str, statement: str): self.conn.commit() return cursor.fetchall() - def run_script_from_file(self, filename, database, initial_load=False): + def run_script_from_file(self, filename: Union[str, Path], database, initial_load=False): # Open and read the file as a single buffer fd = open(filename, "r") sqlFile = fd.read() From 8ff4a68e851694b930a014a58a99a8b43b7886fe Mon Sep 17 00:00:00 2001 From: eggy Date: Sun, 26 May 2024 16:15:55 -0400 Subject: [PATCH 03/13] fix: correct string type --- shared_python/Tags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared_python/Tags.py b/shared_python/Tags.py index e69068f..48b2724 100755 --- a/shared_python/Tags.py +++ b/shared_python/Tags.py @@ -94,7 +94,7 @@ def populate_tag_table( tag_col_lookup[col], str ): # Probably AA or a custom archive cleaned_tag = ( - val.encode("utf-8").replace("'", "'").strip() + val.replace("'", "'").strip() ) values.append( From 4a08b7edfff6f151e0d0e43effe7b80d0ca24505 Mon Sep 17 00:00:00 2001 From: eggy Date: Sun, 26 May 2024 22:17:59 -0400 Subject: [PATCH 04/13] chore: fix lint --- automated_archive/aa.py | 1 - 1 file changed, 1 deletion(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index 28ae35f..06df23d 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -3,7 +3,6 @@ import datetime import codecs import re -import os from pathlib import Path from html.parser import HTMLParser From b02e2935daae7585d41e979768fab9a8b2e5c604 Mon Sep 17 00:00:00 2001 From: eggy Date: Sun, 26 May 2024 22:19:30 -0400 Subject: [PATCH 05/13] chore: fix format --- 03-Export-Tags-Authors-Stories.py | 1 - automated_archive/aa.py | 4 +++- shared_python/Chapters.py | 2 +- shared_python/Sql.py | 4 +++- shared_python/Tags.py | 4 +--- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/03-Export-Tags-Authors-Stories.py b/03-Export-Tags-Authors-Stories.py index d036adf..4604b78 100755 --- a/03-Export-Tags-Authors-Stories.py +++ b/03-Export-Tags-Authors-Stories.py @@ -22,7 +22,6 @@ def write_csv(data, filename, columns): fp.close() - if __name__ == "__main__": """ This step exports the Tag Wrangling and Authors with stories CSV files which you then have to import into Google diff --git a/automated_archive/aa.py b/automated_archive/aa.py index 06df23d..df451b4 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -133,7 +133,9 @@ def _create_mysql(args, FILES, log): cursor.execute("use {0}".format(DATABASE_NAME)) sql = Sql(args, log) - script_path = Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql" + script_path = ( + Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql" + ) sql.run_script_from_file(script_path, database=DATABASE_NAME) db.commit() diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py index 10e2eff..7189dae 100755 --- a/shared_python/Chapters.py +++ b/shared_python/Chapters.py @@ -82,7 +82,7 @@ def _gather_and_dedupe(self, chapters_path, extensions, has_ids=False): # look up the author id and add that one to the file_names list sql_author_id = self.sql.execute_and_fetchall( self.sql.database, - "SELECT author_id FROM chapters WHERE id = {0}".format(cid) + "SELECT author_id FROM chapters WHERE id = {0}".format(cid), ) if len(sql_author_id) > 0: author_id = sql_author_id[0][0] diff --git a/shared_python/Sql.py b/shared_python/Sql.py index 5bf0100..31e140f 100755 --- a/shared_python/Sql.py +++ b/shared_python/Sql.py @@ -55,7 +55,9 @@ def execute_and_fetchall(self, database: str, statement: str): self.conn.commit() return cursor.fetchall() - def run_script_from_file(self, filename: Union[str, Path], database, initial_load=False): + def run_script_from_file( + self, filename: Union[str, Path], database, initial_load=False + ): # Open and read the file as a single buffer fd = open(filename, "r") sqlFile = fd.read() diff --git a/shared_python/Tags.py b/shared_python/Tags.py index 48b2724..11da9ed 100755 --- a/shared_python/Tags.py +++ b/shared_python/Tags.py @@ -93,9 +93,7 @@ def populate_tag_table( if isinstance( tag_col_lookup[col], str ): # Probably AA or a custom archive - cleaned_tag = ( - val.replace("'", "'").strip() - ) + cleaned_tag = val.replace("'", "'").strip() values.append( '({0}, "{1}", "{2}", "{3}")'.format( From 53415c045a58fdf0d5b2eb242e842df298b1f86c Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Sun, 16 Mar 2025 13:43:53 -0700 Subject: [PATCH 06/13] Updated step 1 to use working schema & other changes for Unit B --- automated_archive/aa.py | 95 +++++++++++++++++++++++++++++------------ shared_python/Sql.py | 8 ++-- 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index df451b4..ed822ce 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -1,10 +1,10 @@ # -- coding: utf-8 -- -import datetime +from datetime import datetime import codecs import re -from pathlib import Path -from html.parser import HTMLParser +import html +import urllib.request from pymysql import connect @@ -22,11 +22,17 @@ def _clean_file(filepath, log): :param filepath: Path to ARCHIVE_DB.pl :return: Python dictionary keyed by original story id """ - h = HTMLParser() - archive_db = codecs.open(filepath, "r", encoding="utf-8").read() + for i, encoding in enumerate(["utf-8","ascii","Latin-1","Windows-1252"]): + try: + archive_db = codecs.open(filepath, "r", encoding=encoding).read() + break + except: + log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl") + if i == 3: + raise RuntimeError("ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again.") # Manually escape single quote entity and reformat file as a Python dictionary - step1 = h.unescape(archive_db.replace("'", "\\'")) + step1 = html.unescape(archive_db.replace("'", "\\'")) # Indent the file with a single tab instead of whatever is currently used step15 = re.sub(r"^\s+", "\t", step1) @@ -122,6 +128,30 @@ def _extract_fandoms(args, record): return tags.strip(", ") +def _extract_date(args, record): + date_string = record.get( + "PrintTime", + record.get( + "DatePrint", + record.get( + "Date", str(datetime.now().strftime("%m/%d/%y")) + ), + ), + ) + + dt = None + try: + # If the date is in the form of a Unix timestamp + if date_string.isdigit(): + dt = datetime.fromtimestamp(int(date_string)) + else: + dt = datetime.strptime(date_string, "%m/%d/%y") + except: + log.error("Failed to parse date value: "+date_string) + + return dt.strftime("%Y-%m-%d") if dt else "" + + def _create_mysql(args, FILES, log): db = connect(host=args.db_host, user=args.db_user, password=args.db_password, db="") cursor = db.cursor() @@ -132,12 +162,13 @@ def _create_mysql(args, FILES, log): cursor.execute("create database {0};".format(DATABASE_NAME)) cursor.execute("use {0}".format(DATABASE_NAME)) - sql = Sql(args, log) - script_path = ( - Path(__file__).parent.parent / "shared_python" / "create-open-doors-tables.sql" - ) + # Instead of duplicating this file in the repo grab it from the master branch of eFiction + url = "https://raw.githubusercontent.com/otwcode/open-doors-eFiction/refs/heads/master/opendoors/open-doors-tables-working.sql" + with urllib.request.urlopen(url) as response: + script = response.read().decode() - sql.run_script_from_file(script_path, database=DATABASE_NAME) + sql = Sql(args, log) + sql.run_sql_file(script, database=DATABASE_NAME) db.commit() authors = [ @@ -164,18 +195,7 @@ def _create_mysql(args, FILES, log): FILES[i].get("Summary", "").replace("'", "\\'"), _extract_tags(args, FILES[i]), _extract_characters(args, FILES[i]), - datetime.datetime.strptime( - FILES[i].get( - "PrintTime", - FILES[i].get( - "DatePrint", - FILES[i].get( - "Date", str(datetime.datetime.now().strftime("%m/%d/%y")) - ), - ), - ), - "%m/%d/%y", - ).strftime("%Y-%m-%d"), + _extract_date(args, FILES[i]), FILES[i].get("Location", "").replace("'", "\\'"), FILES[i] .get("LocationURL", FILES[i].get("StoryURL", "")) @@ -183,7 +203,7 @@ def _create_mysql(args, FILES, log): FILES[i].get("Notes", "").replace("'", "\\'"), _extract_relationships(args, FILES[i]), FILES[i].get("Rating", ""), - FILES[i].get("Warnings", "").replace("'", "\\'"), + FILES[i].get("Warnings", FILES[i].get("OptionalWarnings", "")).replace("'", "\\'"), FILES[i].get("Author", "").strip(), FILES[i].get("Email", FILES[i].get("EmailAuthor", "")).lower().strip(), FILES[i].get("FileType", args.chapters_file_extensions) @@ -196,6 +216,7 @@ def _create_mysql(args, FILES, log): cur = 0 total = len(FILES) + item_dict = {} for ( original_id, title, @@ -225,7 +246,7 @@ def _create_mysql(args, FILES, log): table_name = "stories" else: filename = url - table_name = "bookmarks" + table_name = "story_links" # Clean up fandoms and add default fandom if it exists final_fandoms = fandoms.replace("'", r"\'") @@ -241,10 +262,14 @@ def _create_mysql(args, FILES, log): if element[1] == author and element[2] == email ] authorid = result[0][0] + item_dict[original_id] = { + "authorid": authorid, + "itemtype": "story_link" if table_name == "story_links" else "story" + } stor = """ - INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings, author_id) - VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}', '{13}');\n""".format( + INSERT INTO {0} (id, fandoms, title, summary, tags, characters, date, url, notes, relationships, rating, warnings) + VALUES({1}, '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}');\n""".format( table_name, original_id, final_fandoms.replace(r"\\", "\\"), @@ -258,7 +283,6 @@ def _create_mysql(args, FILES, log): pairings, rating, warnings, - authorid, ) cursor.execute(stor) except: @@ -284,6 +308,21 @@ def _create_mysql(args, FILES, log): ) raise db.commit() + + for itemid, item_info in item_dict.items(): + try: + item_auth = """ + INSERT INTO item_authors (author_id, item_id, item_type) + VALUES({0}, {1}, '{2}');\n""".format( + item_info["authorid"], + itemid, + item_info["itemtype"] + ) + cursor.execute(item_auth) + except: + log.error(f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}") + raise + db.commit() def clean_and_load_data(args, log): diff --git a/shared_python/Sql.py b/shared_python/Sql.py index 31e140f..2d2d6af 100755 --- a/shared_python/Sql.py +++ b/shared_python/Sql.py @@ -1,6 +1,4 @@ import re -from pathlib import Path -from typing import Union import warnings # ignore unhelpful MySQL warnings @@ -56,13 +54,17 @@ def execute_and_fetchall(self, database: str, statement: str): return cursor.fetchall() def run_script_from_file( - self, filename: Union[str, Path], database, initial_load=False + self, filename, database, initial_load=False ): # Open and read the file as a single buffer fd = open(filename, "r") sqlFile = fd.read() fd.close() + self.run_sql_file(sqlFile, database, initial_load) + def run_sql_file( + self, sqlFile, database, initial_load=False + ): # replace placeholders and return all SQL commands (split on ';') sqlCommands = sqlFile.replace("$DATABASE$", database).split(";\n") From cac604ae00af3960215e22436f24f2b0b9ae1835 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Sun, 16 Mar 2025 15:37:07 -0700 Subject: [PATCH 07/13] Updated step 2b to insert unique tags and item_tags --- 02b-Extract-Tags-From-Stories.py | 1 - shared_python/Tags.py | 41 +++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/02b-Extract-Tags-From-Stories.py b/02b-Extract-Tags-From-Stories.py index dce273e..0b6fa77 100755 --- a/02b-Extract-Tags-From-Stories.py +++ b/02b-Extract-Tags-From-Stories.py @@ -22,7 +22,6 @@ args.temp_db_database ) ) - tags.create_tags_table() tag_col_list = {} stories_id_name = "" diff --git a/shared_python/Tags.py b/shared_python/Tags.py index 11da9ed..9c993fa 100755 --- a/shared_python/Tags.py +++ b/shared_python/Tags.py @@ -1,4 +1,5 @@ import re +from collections import defaultdict from html.parser import HTMLParser from logging import Logger @@ -83,8 +84,9 @@ def populate_tag_table( ) ) + tags_to_insert = {} + tags_to_story_ids = defaultdict(list) for story_tags_row in data: - values = [] for col in tag_columns: needs_fandom = col in tags_with_fandoms if story_tags_row[col] is not None: @@ -93,25 +95,36 @@ def populate_tag_table( if isinstance( tag_col_lookup[col], str ): # Probably AA or a custom archive - cleaned_tag = val.replace("'", "'").strip() - - values.append( - '({0}, "{1}", "{2}", "{3}")'.format( - story_tags_row[story_id_col_name], - re.sub(r'(? 0: - self.sql.execute( - """ - INSERT INTO tags (storyid, original_tag, original_table, ao3_tag_fandom) VALUES {0} - """.format(", ".join(values)) - ) + if len(tags_to_insert) > 0: + self.sql.execute( + """ + INSERT INTO tags (original_tag, original_type, ao3_tag_fandom) VALUES {0} + """.format(", ".join(tags_to_insert.values())) + ) + + tag_data = self.sql.execute_dict( + "SELECT id, original_tag FROM tags" + ) + for tag_row in tag_data: + story_ids = set(tags_to_story_ids[tag_row["original_tag"]]) + for story_id in story_ids: + self.sql.execute(""" + INSERT INTO item_tags (item_id, item_type, tag_id) VALUES ({0}, "{1}", {2}) + """.format( + story_id, + "story_link" if table_name == "story_links" else "story", + tag_row["id"] + )) def distinct_tags(self, database): """ From afb8e65c26d587a9f62c8a55c0dfae925984cda1 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Sun, 16 Mar 2025 16:01:22 -0700 Subject: [PATCH 08/13] Ran ruff formatter --- automated_archive/aa.py | 42 ++++++++++++++++++++++------------------- shared_python/Sql.py | 8 ++------ shared_python/Tags.py | 38 ++++++++++++++++++++----------------- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index ed822ce..bc2579b 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -22,14 +22,16 @@ def _clean_file(filepath, log): :param filepath: Path to ARCHIVE_DB.pl :return: Python dictionary keyed by original story id """ - for i, encoding in enumerate(["utf-8","ascii","Latin-1","Windows-1252"]): + for i, encoding in enumerate(["utf-8", "ascii", "Latin-1", "Windows-1252"]): try: archive_db = codecs.open(filepath, "r", encoding=encoding).read() break - except: + except: # noqa: E722 log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl") if i == 3: - raise RuntimeError("ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again.") + raise RuntimeError( + "ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again." + ) # Manually escape single quote entity and reformat file as a Python dictionary step1 = html.unescape(archive_db.replace("'", "\\'")) @@ -128,17 +130,15 @@ def _extract_fandoms(args, record): return tags.strip(", ") -def _extract_date(args, record): +def _extract_date(args, record, log): date_string = record.get( "PrintTime", record.get( "DatePrint", - record.get( - "Date", str(datetime.now().strftime("%m/%d/%y")) - ), + record.get("Date", str(datetime.now().strftime("%m/%d/%y"))), ), ) - + dt = None try: # If the date is in the form of a Unix timestamp @@ -146,9 +146,11 @@ def _extract_date(args, record): dt = datetime.fromtimestamp(int(date_string)) else: dt = datetime.strptime(date_string, "%m/%d/%y") - except: - log.error("Failed to parse date value: "+date_string) - + except Exception as e: + log.error( + f"Failed to parse date value '{date_string}' due to exception: {str(e)}" + ) + return dt.strftime("%Y-%m-%d") if dt else "" @@ -195,7 +197,7 @@ def _create_mysql(args, FILES, log): FILES[i].get("Summary", "").replace("'", "\\'"), _extract_tags(args, FILES[i]), _extract_characters(args, FILES[i]), - _extract_date(args, FILES[i]), + _extract_date(args, FILES[i], log), FILES[i].get("Location", "").replace("'", "\\'"), FILES[i] .get("LocationURL", FILES[i].get("StoryURL", "")) @@ -203,7 +205,9 @@ def _create_mysql(args, FILES, log): FILES[i].get("Notes", "").replace("'", "\\'"), _extract_relationships(args, FILES[i]), FILES[i].get("Rating", ""), - FILES[i].get("Warnings", FILES[i].get("OptionalWarnings", "")).replace("'", "\\'"), + FILES[i] + .get("Warnings", FILES[i].get("OptionalWarnings", "")) + .replace("'", "\\'"), FILES[i].get("Author", "").strip(), FILES[i].get("Email", FILES[i].get("EmailAuthor", "")).lower().strip(), FILES[i].get("FileType", args.chapters_file_extensions) @@ -264,7 +268,7 @@ def _create_mysql(args, FILES, log): authorid = result[0][0] item_dict[original_id] = { "authorid": authorid, - "itemtype": "story_link" if table_name == "story_links" else "story" + "itemtype": "story_link" if table_name == "story_links" else "story", } stor = """ @@ -308,19 +312,19 @@ def _create_mysql(args, FILES, log): ) raise db.commit() - + for itemid, item_info in item_dict.items(): try: item_auth = """ INSERT INTO item_authors (author_id, item_id, item_type) VALUES({0}, {1}, '{2}');\n""".format( - item_info["authorid"], - itemid, - item_info["itemtype"] + item_info["authorid"], itemid, item_info["itemtype"] ) cursor.execute(item_auth) except: - log.error(f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}") + log.error( + f"Failed to insert item_authors for {item_info['itemtype']} {itemid} with author {item_info['authorid']}" + ) raise db.commit() diff --git a/shared_python/Sql.py b/shared_python/Sql.py index 2d2d6af..54057aa 100755 --- a/shared_python/Sql.py +++ b/shared_python/Sql.py @@ -53,18 +53,14 @@ def execute_and_fetchall(self, database: str, statement: str): self.conn.commit() return cursor.fetchall() - def run_script_from_file( - self, filename, database, initial_load=False - ): + def run_script_from_file(self, filename, database, initial_load=False): # Open and read the file as a single buffer fd = open(filename, "r") sqlFile = fd.read() fd.close() self.run_sql_file(sqlFile, database, initial_load) - def run_sql_file( - self, sqlFile, database, initial_load=False - ): + def run_sql_file(self, sqlFile, database, initial_load=False): # replace placeholders and return all SQL commands (split on ';') sqlCommands = sqlFile.replace("$DATABASE$", database).split(";\n") diff --git a/shared_python/Tags.py b/shared_python/Tags.py index 9c993fa..b1c4b53 100755 --- a/shared_python/Tags.py +++ b/shared_python/Tags.py @@ -95,14 +95,18 @@ def populate_tag_table( if isinstance( tag_col_lookup[col], str ): # Probably AA or a custom archive - cleaned_tag = re.sub(r'(? 0: @@ -111,20 +115,20 @@ def populate_tag_table( INSERT INTO tags (original_tag, original_type, ao3_tag_fandom) VALUES {0} """.format(", ".join(tags_to_insert.values())) ) - - tag_data = self.sql.execute_dict( - "SELECT id, original_tag FROM tags" - ) + + tag_data = self.sql.execute_dict("SELECT id, original_tag FROM tags") for tag_row in tag_data: story_ids = set(tags_to_story_ids[tag_row["original_tag"]]) for story_id in story_ids: - self.sql.execute(""" + self.sql.execute( + """ INSERT INTO item_tags (item_id, item_type, tag_id) VALUES ({0}, "{1}", {2}) """.format( - story_id, - "story_link" if table_name == "story_links" else "story", - tag_row["id"] - )) + story_id, + "story_link" if table_name == "story_links" else "story", + tag_row["id"], + ) + ) def distinct_tags(self, database): """ From d5e4ab1541af115c70aec1fa4a6c09ed18d093f5 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Sun, 16 Mar 2025 16:05:05 -0700 Subject: [PATCH 09/13] Changed macos github action from latest to 13 --- .github/workflows/python-app-macos-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app-macos-windows.yml b/.github/workflows/python-app-macos-windows.yml index 9d0d720..ec824a5 100644 --- a/.github/workflows/python-app-macos-windows.yml +++ b/.github/workflows/python-app-macos-windows.yml @@ -15,7 +15,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ macos-latest, windows-latest ] + os: [ macos-13, windows-latest ] # Using macos-13 since macos-latest no longer supports 3.8 steps: - uses: actions/checkout@v2 From 75d8f656e06c5b19f69d7884ce16f5cfabf18f37 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Sun, 16 Mar 2025 18:44:27 -0700 Subject: [PATCH 10/13] Prompt for encoding of ARCHIVE_DB.pl --- automated_archive/aa.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index bc2579b..e0593ff 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -22,16 +22,10 @@ def _clean_file(filepath, log): :param filepath: Path to ARCHIVE_DB.pl :return: Python dictionary keyed by original story id """ - for i, encoding in enumerate(["utf-8", "ascii", "Latin-1", "Windows-1252"]): - try: - archive_db = codecs.open(filepath, "r", encoding=encoding).read() - break - except: # noqa: E722 - log.error(f"{encoding} encoding failed to read ARCHIVE_DB.pl") - if i == 3: - raise RuntimeError( - "ARCHIVE_DB.pl can't be read by any of the default encodings, please fix the file and try again." - ) + encoding = input('Encoding for the ARCHIVE_DB.pl file (default: "utf-8"): ') + if encoding is None or encoding == "": + encoding = "utf-8" + archive_db = codecs.open(filepath, "r", encoding=encoding).read() # Manually escape single quote entity and reformat file as a Python dictionary step1 = html.unescape(archive_db.replace("'", "\\'")) From 30733a73fca5c709ca2e0453c960d061f57faec9 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Mon, 17 Mar 2025 13:21:40 -0700 Subject: [PATCH 11/13] Updated step 2a so it'll work on windows if chapter urls contain forward slashes --- shared_python/Chapters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shared_python/Chapters.py b/shared_python/Chapters.py index 7189dae..6d79b35 100755 --- a/shared_python/Chapters.py +++ b/shared_python/Chapters.py @@ -143,6 +143,8 @@ def populate_chapters(self, folder=None, extensions=None): else: for _, chapter_path in file_paths.items(): path = chapter_path.replace(self.args.chapters_path, "")[1:] + if os.sep == "\\": # if this script is run on windows + path = path.replace("\\", "/") with codecs.open(chapter_path, "r", encoding=char_encoding) as c: try: cur = Common.print_progress(cur, total) From c6731cde69b119f3153a12526491329e762a418e Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Tue, 18 Mar 2025 18:15:34 -0700 Subject: [PATCH 12/13] Updated the wording for the ARCHIVE_DB.pl prompt per Ariana's suggestion Co-authored-by: Ariana --- automated_archive/aa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index e0593ff..2ddc38d 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -22,7 +22,7 @@ def _clean_file(filepath, log): :param filepath: Path to ARCHIVE_DB.pl :return: Python dictionary keyed by original story id """ - encoding = input('Encoding for the ARCHIVE_DB.pl file (default: "utf-8"): ') + encoding = input('Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): ') if encoding is None or encoding == "": encoding = "utf-8" archive_db = codecs.open(filepath, "r", encoding=encoding).read() From 8dfed1f71af63879ec4088683bb79349d1ee3114 Mon Sep 17 00:00:00 2001 From: Brianna Dardin Date: Tue, 18 Mar 2025 18:20:07 -0700 Subject: [PATCH 13/13] Ran ruff formatter again to fix build checks --- automated_archive/aa.py | 4 +++- shared_python/Tags.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/automated_archive/aa.py b/automated_archive/aa.py index 2ddc38d..6d86166 100755 --- a/automated_archive/aa.py +++ b/automated_archive/aa.py @@ -22,7 +22,9 @@ def _clean_file(filepath, log): :param filepath: Path to ARCHIVE_DB.pl :return: Python dictionary keyed by original story id """ - encoding = input('Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): ') + encoding = input( + 'Encoding for the ARCHIVE_DB.pl file, e.g. "utf-8", "latin_1", "cp1252" (default: "utf-8"): ' + ) if encoding is None or encoding == "": encoding = "utf-8" archive_db = codecs.open(filepath, "r", encoding=encoding).read() diff --git a/shared_python/Tags.py b/shared_python/Tags.py index b1c4b53..1dc780c 100755 --- a/shared_python/Tags.py +++ b/shared_python/Tags.py @@ -96,7 +96,9 @@ def populate_tag_table( tag_col_lookup[col], str ): # Probably AA or a custom archive cleaned_tag = re.sub( - r'(?