diff --git a/verifydump/cache.py b/verifydump/cache.py new file mode 100644 index 0000000..5c13147 --- /dev/null +++ b/verifydump/cache.py @@ -0,0 +1,119 @@ +import json +import logging +import pathlib +import sqlite3 + +from .dat import Game, Dat + +cache_connection = None + + +def initialize_cache(cache_path: pathlib.Path): + logging.debug(f'Initializing cache DB at {cache_path.name}') + global cache_connection + cache_table_creation = "CREATE TABLE IF NOT EXISTS cache (" \ + "name TEXT PRIMARY KEY NOT NULL UNIQUE, " \ + "size INTEGER NOT NULL, " \ + "time INTEGER NOT NULL, " \ + "data TEXT NOT NULL)" + cache_connection = sqlite3.connect(cache_path) + cache_connection.row_factory = sqlite3.Row + with cache_connection: + cursor = cache_connection.cursor() + cursor.execute(cache_table_creation) + + +def cache_chd(chd_path: pathlib.Path, game: Game, cue_verification_result: str): + logging.debug(f'Caching {chd_path.name}') + name = chd_path.name + size = chd_path.stat().st_size + time = chd_path.stat().st_mtime_ns + roms = [] + for rom in game.roms: + roms.append({"name": rom.name, "size": rom.size, "sha1": rom.sha1hex}) + data = {"cue_verification_result": cue_verification_result, "name": game.name, "roms": roms} + data_json = json.dumps(data) + + with cache_connection: + cursor = cache_connection.cursor() + cursor.execute("INSERT OR REPLACE INTO cache (name, size, time, data) VALUES (?,?,?,?)", [name, size, time, data_json]) + + +def cache_rvz(rvz_path: pathlib.Path, sha1: str): + logging.debug(f'Caching {rvz_path.name}') + name = rvz_path.name + size = rvz_path.stat().st_size + time = rvz_path.stat().st_mtime_ns + + with cache_connection: + cursor = cache_connection.cursor() + cursor.execute("INSERT OR REPLACE INTO cache (name, size, time, data) VALUES (?,?,?,?)", + [name, size, time, sha1]) + + +def get_matching_game_from_dat(cached_data: dict, dat: Dat) -> Game: + cached_game_name = cached_data["name"] + cached_roms = cached_data["roms"] + dat_game = next((game for game in dat.games if game.name == cached_game_name), None) + if not dat_game: + return None + if len(cached_roms) != len(dat_game.roms): + return None + for cached_rom in cached_roms: + cached_rom_name = cached_rom["name"] + cached_rom_size = cached_rom["size"] + cached_rom_sha1 = cached_rom["sha1"] + dat_rom = next((rom for rom in dat_game.roms if rom.name == cached_rom_name), None) + if not dat_rom: + return None + if cached_rom_size != dat_rom.size: + return None + if cached_rom_sha1 != dat_rom.sha1hex: + return None + return dat_game + + +def get_cached_chd(chd_path: pathlib.Path, dat: Dat) -> tuple[Game, str]: + logging.debug(f'Checking cache for {chd_path.name}') + name = chd_path.name + size = chd_path.stat().st_size + time = chd_path.stat().st_mtime_ns + with cache_connection: + cursor = cache_connection.cursor() + cursor.execute("SELECT size, time, data FROM cache WHERE name = ?", [name]) + result = cursor.fetchone() + if result is None: + return (None, None) + cached_size = result["size"] + cached_time = result["time"] + cached_data_json = result["data"] + if size != cached_size: + return (None, None) + if time != cached_time: + return (None, None) + cached_data = json.loads(cached_data_json) + matched_game = get_matching_game_from_dat(cached_data, dat) + if not matched_game: + return (None, None) + return (matched_game, cached_data["cue_verification_result"]) + + +def get_cached_rvz(rvz_path: pathlib.Path) -> str: + logging.debug(f'Checking cache for {rvz_path.name}') + name = rvz_path.name + size = rvz_path.stat().st_size + time = rvz_path.stat().st_mtime_ns + with cache_connection: + cursor = cache_connection.cursor() + cursor.execute("SELECT size, time, data FROM cache WHERE name = ?", [name]) + result = cursor.fetchone() + if result is None: + return None + cached_size = result["size"] + cached_time = result["time"] + cached_data = result["data"] + if size != cached_size: + return None + if time != cached_time: + return None + return cached_data diff --git a/verifydump/console.py b/verifydump/console.py index 49cd804..235e8d0 100644 --- a/verifydump/console.py +++ b/verifydump/console.py @@ -9,6 +9,7 @@ from .convert import ConversionException, convert_chd_to_normalized_redump_dump_folder, convert_gdi_to_cue from .verify import VerificationException, verify_dumps from .dat import DatParsingException, load_dat +from .cache import initialize_cache def arg_parser_with_common_args() -> argparse.ArgumentParser: @@ -28,12 +29,23 @@ def verifydump_main(): arg_parser = arg_parser_with_common_args() arg_parser.add_argument("--allow-cue-file-mismatches", action=argparse.BooleanOptionalAction, default=False, help=f"If the .cue file that {pathlib.Path(sys.argv[0]).stem} generates doesn't match the original dump or extra provided .cue file then it is usually reported as an error. If this option is used then the mismatch is still reported, but isn't treated as an error.") arg_parser.add_argument("--report-unverified", action=argparse.BooleanOptionalAction, default=False, help="Reports games that are present in the Datfile but were not successfully verified.") + arg_parser.add_argument("--cache_file", metavar="cache_file", help="Optional cache file for caching verification results to avoid re-processing files.") arg_parser.add_argument("dat_file", metavar="dat_file_or_zip", help="The Datfile that your dumps will be verified against. It can be zipped.") arg_parser.add_argument("dump_file_or_folder", nargs="+", help="The dump files to verify. Specify any number of .chd files, .rvz files, or folders containing those.") args = arg_parser.parse_args() handle_common_args(args) + if args.cache_file: + try: + cache_file = pathlib.Path(args.cache_file) + initialize_cache(cache_file) + except Exception as e: + print(f"Error initializing cache: {e}") + sys.exit(1) + else: + cache_file = None + try: dat = load_dat(pathlib.Path(args.dat_file)) except DatParsingException as e: @@ -43,7 +55,7 @@ def verifydump_main(): print(f"Error reading Datfile: {e}") sys.exit(1) - (verified_games, errors) = verify_dumps(dat, [pathlib.Path(i) for i in args.dump_file_or_folder], show_command_output=args.show_command_output, allow_cue_mismatches=args.allow_cue_file_mismatches, extra_cue_source=pathlib.Path(args.extra_cue_source) if args.extra_cue_source else None) + (verified_games, errors) = verify_dumps(dat, [pathlib.Path(i) for i in args.dump_file_or_folder], show_command_output=args.show_command_output, allow_cue_mismatches=args.allow_cue_file_mismatches, extra_cue_source=pathlib.Path(args.extra_cue_source) if args.extra_cue_source else None, cache=cache_file) if len(verified_games) > 1: print(f"Successfully verified {len(verified_games)} dumps") diff --git a/verifydump/verify.py b/verifydump/verify.py index 16a2a2a..eb0d901 100644 --- a/verifydump/verify.py +++ b/verifydump/verify.py @@ -9,6 +9,7 @@ import typing import zipfile +from .cache import cache_chd, get_cached_chd, cache_rvz, get_cached_rvz from .convert import ConversionException, convert_chd_to_normalized_redump_dump_folder, get_sha1hex_for_rvz from .dat import Dat, Game @@ -32,12 +33,20 @@ def __init__(self, game: Game, cue_verification_result: CueVerificationResult): self.cue_verification_result = cue_verification_result -def verify_chd(chd_path: pathlib.Path, dat: Dat, show_command_output: bool, allow_cue_mismatches: bool, extra_cue_source: pathlib.Path) -> Game: +def verify_chd(chd_path: pathlib.Path, dat: Dat, show_command_output: bool, allow_cue_mismatches: bool, extra_cue_source: pathlib.Path, cache: pathlib.Path) -> Game: logging.debug(f'Verifying dump file "{chd_path}"') with tempfile.TemporaryDirectory() as redump_dump_folder_name: - redump_dump_folder = pathlib.Path(redump_dump_folder_name) - convert_chd_to_normalized_redump_dump_folder(chd_path, redump_dump_folder, system=dat.system, show_command_output=show_command_output) - verification_result = verify_redump_dump_folder(redump_dump_folder, dat=dat, extra_cue_source=extra_cue_source) + verification_result = None + if cache: + already_cached = False + (cache_matched_game, cached_cue_verification_result) = get_cached_chd(chd_path, dat) + if cache_matched_game and cached_cue_verification_result: + already_cached = True + verification_result = VerificationResult(cache_matched_game, CueVerificationResult[cached_cue_verification_result]) + if not verification_result: + redump_dump_folder = pathlib.Path(redump_dump_folder_name) + convert_chd_to_normalized_redump_dump_folder(chd_path, redump_dump_folder, system=dat.system, show_command_output=show_command_output) + verification_result = verify_redump_dump_folder(redump_dump_folder, dat=dat, extra_cue_source=extra_cue_source) if verification_result.cue_verification_result in (CueVerificationResult.NO_CUE_NEEDED, CueVerificationResult.GENERATED_CUE_VERIFIED_EXACTLY): logging.info(f'Dump verified correct and complete: "{verification_result.game.name}"') @@ -62,6 +71,9 @@ def verify_chd(chd_path: pathlib.Path, dat: Dat, show_command_output: bool, allo else: raise Exception(f"Unhandled CueVerificationResult value: {verification_result.cue_verification_result}") + if cache and not already_cached: + cache_chd(chd_path, verification_result.game, verification_result.cue_verification_result.name) + return verification_result.game @@ -207,10 +219,17 @@ def verify_redump_dump_folder(dump_folder: pathlib.Path, dat: Dat, extra_cue_sou return VerificationResult(game=game, cue_verification_result=CueVerificationResult.GENERATED_CUE_DOES_NOT_MATCH_ESSENTIALS_FROM_EXTRA_CUE) -def verify_rvz(rvz_path: pathlib.Path, dat: Dat, show_command_output: bool) -> Game: +def verify_rvz(rvz_path: pathlib.Path, dat: Dat, show_command_output: bool, cache: pathlib.Path) -> Game: logging.debug(f'Verifying dump file "{rvz_path}"') - sha1hex = get_sha1hex_for_rvz(rvz_path, show_command_output=show_command_output) + sha1hex = None + if cache: + already_cached = False + sha1hex = get_cached_rvz(rvz_path) + if sha1hex: + already_cached = True + if not sha1hex: + sha1hex = get_sha1hex_for_rvz(rvz_path, show_command_output=show_command_output) roms_with_matching_sha1 = dat.roms_by_sha1hex.get(sha1hex) @@ -226,10 +245,13 @@ def verify_rvz(rvz_path: pathlib.Path, dat: Dat, show_command_output: bool) -> G raise VerificationException(f'Dump file "{rvz_path.name}" found in Dat, but it should be named {list_of_rom_names_that_match_sha1}') logging.info(f'Dump verified correct and complete: "{rom_with_matching_sha1_and_name.game.name}"') + + if cache and not already_cached: + cache_rvz(rvz_path, sha1hex) return rom_with_matching_sha1_and_name.game -def verify_dumps(dat: Dat, dump_file_or_folder_paths: typing.List[pathlib.Path], show_command_output: bool, allow_cue_mismatches: bool, extra_cue_source: pathlib.Path) -> tuple[list, list]: +def verify_dumps(dat: Dat, dump_file_or_folder_paths: typing.List[pathlib.Path], show_command_output: bool, allow_cue_mismatches: bool, extra_cue_source: pathlib.Path, cache: pathlib.Path) -> tuple[list, list]: verified_games = [] errors = [] @@ -237,9 +259,9 @@ def verify_dump_if_format_is_supported(dump_path: pathlib.Path, error_if_unsuppo suffix_lower = dump_path.suffix.lower() try: if suffix_lower == ".chd": - verified_games.append(verify_chd(dump_path, dat=dat, show_command_output=show_command_output, allow_cue_mismatches=allow_cue_mismatches, extra_cue_source=extra_cue_source)) + verified_games.append(verify_chd(dump_path, dat=dat, show_command_output=show_command_output, allow_cue_mismatches=allow_cue_mismatches, extra_cue_source=extra_cue_source, cache=cache)) elif suffix_lower == ".rvz": - verified_games.append(verify_rvz(dump_path, dat=dat, show_command_output=show_command_output)) + verified_games.append(verify_rvz(dump_path, dat=dat, show_command_output=show_command_output, cache=cache)) elif error_if_unsupported: raise VerificationException(f'{pathlib.Path(sys.argv[0]).stem} doesn\'t know how to handle "{suffix_lower}" dumps') except VerificationException as e: