diff --git a/README.md b/README.md index c9baa45..5ff3d9c 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,32 @@ cat examples/checksums.csv > examples/example_content/dir/.hidden_dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df >``` +- **ZIP Support:** + sum-buddy supports processing ZIP files. When a ZIP file is encountered, it will: + - Calculate the checksum of the ZIP file itself. + - List each file inside the ZIP as `zipfile.zip/filename` with its own checksum, using in-memory streaming (no extraction to disk). + + Example: + ```bash + sum-buddy --output-file examples/checksums_zip.csv examples/example_content/ + ``` + > Output + > ```console + > Calculating md5 checksums on examples/example_content/: 100%|████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 15109.16it/s] + > md5 checksums for examples/example_content/ written to examples/checksums_zip.csv + > ``` + ```bash + cat examples/checksums_zip.csv + ``` + > Output: + > ```console + > filepath,filename,md5 + > examples/example_content/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df + > examples/example_content/testzip.zip,testzip.zip,dcf68ba27f40590ff899b63d44e18836 + > examples/example_content/testzip.zip/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df + > examples/example_content/testzip.zip/dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df + > examples/example_content/dir/file.txt,file.txt,7d52c7437e9af58dac029dd11b1024df + > ``` If only a target directory is passed, the default settings are to ignore hidden files and directories (those that begin with a `.`), use the `md5` algorithm, and print output to `stdout`, which can be piped (`|`). @@ -172,9 +198,11 @@ pip install -e ".[dev]" 3. Install pre-commit hook ```bash pre-commit install -pre-commit autoupdate # optionally update ``` -4. Run tests: + +### Tests + +To run all tests: ```bash -pytest +python -m pytest ``` diff --git a/src/sumbuddy/__main__.py b/src/sumbuddy/__main__.py index f681c98..7a15e31 100644 --- a/src/sumbuddy/__main__.py +++ b/src/sumbuddy/__main__.py @@ -7,6 +7,7 @@ from tqdm import tqdm import sys import os +from sumbuddy.archive import ArchiveHandler def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='md5', length=None): """ @@ -24,21 +25,23 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi mapper = Mapper() if os.path.isfile(input_path): - file_paths = [input_path] + regular_files = [input_path] + zip_archives = [] if ignore_file: print("Warning: --ignore-file (-i) flag is ignored when input is a single file.") if include_hidden: print("Warning: --include-hidden (-H) flag is ignored when input is a single file.") else: try: - file_paths = mapper.gather_file_paths(input_path, ignore_file=ignore_file, include_hidden=include_hidden) + regular_files, zip_archives = mapper.gather_file_paths(input_path, ignore_file=ignore_file, include_hidden=include_hidden) except (EmptyInputDirectoryError, NoFilesAfterFilteringError) as e: sys.exit(str(e)) # Exclude the output file from being hashed if output_filepath: output_file_abs_path = os.path.abspath(output_filepath) - file_paths = [path for path in file_paths if os.path.abspath(path) != output_file_abs_path] + regular_files = [path for path in regular_files if os.path.abspath(path) != output_file_abs_path] + zip_archives = [path for path in zip_archives if os.path.abspath(path) != output_file_abs_path] hasher = Hasher(algorithm) output_stream = open(output_filepath, 'w', newline='') if output_filepath else sys.stdout @@ -48,10 +51,25 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi writer.writerow(["filepath", "filename", f"{algorithm}"]) disable_tqdm = output_filepath is None - for file_path in tqdm(file_paths, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm): - checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length) - writer.writerow([file_path, os.path.basename(file_path), checksum]) - + total_files = len(regular_files) + sum(1 for z in zip_archives for _ in ArchiveHandler.stream_zip(z)) + len(zip_archives) + with tqdm(total=total_files, desc=f"Calculating {algorithm} checksums on {input_path}", disable=disable_tqdm) as pbar: + # Process regular files + for file_path in regular_files: + checksum = hasher.checksum_file(file_path, algorithm=algorithm, length=length) + writer.writerow([file_path, os.path.basename(file_path), checksum]) + pbar.update(1) + # Process zip archives + for zip_path in zip_archives: + # Write checksum for the zip file itself + checksum = hasher.checksum_file(zip_path, algorithm=algorithm, length=length) + writer.writerow([zip_path, os.path.basename(zip_path), checksum]) + pbar.update(1) + # Write checksums for each file inside the zip + for member, file_obj in ArchiveHandler.stream_zip(zip_path): + virtual_path = f"{zip_path}/{member}" + checksum = hasher.checksum_file(file_obj, algorithm=algorithm, length=length) + writer.writerow([virtual_path, os.path.basename(member), checksum]) + pbar.update(1) finally: if output_filepath: output_stream.close() @@ -60,7 +78,6 @@ def get_checksums(input_path, output_filepath=None, ignore_file=None, include_hi print(f"{algorithm} checksums for {input_path} written to {output_filepath}") def main(): - available_algorithms = ', '.join(hashlib.algorithms_available) parser = argparse.ArgumentParser(description="Generate CSV with filepath, filename, and checksums for all files in a given directory (or a single file)") diff --git a/src/sumbuddy/archive.py b/src/sumbuddy/archive.py new file mode 100644 index 0000000..d04de31 --- /dev/null +++ b/src/sumbuddy/archive.py @@ -0,0 +1,65 @@ +import os +import zipfile +import tempfile +import shutil + +class ArchiveHandler: + def __init__(self): + self.temp_dir = None + + def process_zip(self, zip_path, root_dir): + """ + Process a zip file and return paths to its contents. + + Parameters: + ------------ + zip_path - String. Path to the zip file. + root_dir - String. Root directory for relative path calculations. + + Returns: + --------- + List of tuples (file_path, relative_path) for files in the zip. + """ + if not zipfile.is_zipfile(zip_path): + return [] + + # Create a temporary directory for extraction + self.temp_dir = tempfile.mkdtemp() + + try: + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + # Extract all contents to temp directory + zip_ref.extractall(self.temp_dir) + + # Get list of all files in the zip + file_paths = [] + for member in zip_ref.namelist(): + # Only add files, not directories + if member.endswith('/'): + continue + full_path = os.path.join(self.temp_dir, member) + # The path as it should appear in the CSV: zip_path/member + rel_path = f"{zip_path}/{member}" + file_paths.append((full_path, rel_path)) + return file_paths + except Exception as e: + self.cleanup() + raise e + + def cleanup(self): + """Clean up temporary directory if it exists.""" + if self.temp_dir and os.path.exists(self.temp_dir): + shutil.rmtree(self.temp_dir) + self.temp_dir = None + + @staticmethod + def stream_zip(zip_path): + """ + Yield (name, file-like object) for each file in the ZIP archive. + Only yields regular files (not directories). + """ + with zipfile.ZipFile(zip_path, 'r') as zip_ref: + for member in zip_ref.namelist(): + if member.endswith('/'): + continue # skip directories + yield member, zip_ref.open(member) \ No newline at end of file diff --git a/src/sumbuddy/hasher.py b/src/sumbuddy/hasher.py index a17ff2c..85012bd 100644 --- a/src/sumbuddy/hasher.py +++ b/src/sumbuddy/hasher.py @@ -5,13 +5,13 @@ class Hasher: def __init__(self, algorithm='md5'): self.algorithm = algorithm - def checksum_file(self, file_path, algorithm=None, length=None): + def checksum_file(self, file_path_or_obj, algorithm=None, length=None): """ Calculate the checksum of a file using the specified algorithm. Parameters: ------------ - file_path - String. Path to file to apply checksum function. + file_path_or_obj - String or file-like object. Path to file or file-like object to apply checksum function. algorithm - String. Hash function to use for checksums. Default: 'md5', see options with 'hashlib.algorithms_available'. length - Integer [optional]. Length of the digest for SHAKE and BLAKE algorithms in bytes. @@ -55,9 +55,14 @@ def checksum_file(self, file_path, algorithm=None, length=None): raise LengthUsedForFixedLengthHashError(algorithm) hash_func = hashlib.new(algorithm) - # Read the file and update the hash function - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): + # Handle both file paths and file-like objects + if isinstance(file_path_or_obj, str): + with open(file_path_or_obj, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_func.update(chunk) + else: + # Assume it's a file-like object + for chunk in iter(lambda: file_path_or_obj.read(4096), b""): hash_func.update(chunk) # Return the hash digest diff --git a/src/sumbuddy/mapper.py b/src/sumbuddy/mapper.py index a611872..f0fcdc1 100644 --- a/src/sumbuddy/mapper.py +++ b/src/sumbuddy/mapper.py @@ -1,10 +1,13 @@ import os +import zipfile from sumbuddy.filter import Filter from sumbuddy.exceptions import EmptyInputDirectoryError, NoFilesAfterFilteringError, NotADirectoryError +from sumbuddy.archive import ArchiveHandler class Mapper: def __init__(self): self.filter_manager = Filter() + self.archive_handler = ArchiveHandler() def reset_filter(self, ignore_file=None, include_hidden=False): """ @@ -28,16 +31,7 @@ def reset_filter(self, ignore_file=None, include_hidden=False): def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=False): """ Generate list of file paths in the input directory based on ignore pattern rules. - - Parameters: - ------------ - input_directory - String. Directory to traverse for files. - ignore_file - String [optional]. Filepath for the ignore patterns file. - include_hidden - Boolean [optional]. Whether to include hidden files. - - Returns: - --------- - file_paths - List. Files in input_directory that are not ignored. + Returns a tuple: (regular_files, zip_archives) """ if not os.path.isdir(input_directory): @@ -45,7 +39,8 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa self.reset_filter(ignore_file=ignore_file, include_hidden=include_hidden) - file_paths = [] + regular_files = [] + zip_archives = [] root_directory = os.path.abspath(input_directory) has_files = False @@ -55,11 +50,14 @@ def gather_file_paths(self, input_directory, ignore_file=None, include_hidden=Fa for name in files: file_path = os.path.join(root, name) if self.filter_manager.should_include(file_path, root_directory): - file_paths.append(file_path) + if zipfile.is_zipfile(file_path): + zip_archives.append(file_path) + else: + regular_files.append(file_path) if not has_files: raise EmptyInputDirectoryError(input_directory) - if not file_paths: + if not (regular_files or zip_archives): raise NoFilesAfterFilteringError(input_directory, ignore_file) - return file_paths + return regular_files, zip_archives diff --git a/tests/test_archive.py b/tests/test_archive.py new file mode 100644 index 0000000..68f76b8 --- /dev/null +++ b/tests/test_archive.py @@ -0,0 +1,118 @@ +import tempfile +import zipfile +from pathlib import Path + +from sumbuddy.archive import ArchiveHandler +from sumbuddy.mapper import Mapper +from sumbuddy.hasher import Hasher + + +class TestArchiveHandler: + """Test cases for ArchiveHandler class.""" + + def test_stream_zip_success(self): + """Test streaming files from a zip archive.""" + test_zip_path = Path(__file__).parent / "test_archive.zip" + assert test_zip_path.exists(), "Test zip file not found" + members = list(ArchiveHandler.stream_zip(str(test_zip_path))) + assert len(members) == 2 + names = [name for name, _ in members] + assert any("test_file.txt" in n for n in names) + assert any("nested_file.txt" in n for n in names) + # Check that file-like objects are readable + for name, file_obj in members: + content = file_obj.read() + assert isinstance(content, bytes) + file_obj.close() + + def test_stream_zip_invalid_file(self): + """Test streaming from a non-zip file raises BadZipFile.""" + with tempfile.TemporaryDirectory() as temp_dir: + non_zip_file = Path(temp_dir) / "not_a_zip.txt" + non_zip_file.write_text("This is not a zip file") + try: + list(ArchiveHandler.stream_zip(str(non_zip_file))) + except zipfile.BadZipFile: + pass # Expected + else: + assert False, "Expected zipfile.BadZipFile to be raised for non-zip file" + + +class TestMapperWithZip: + """Test cases for Mapper class with zip file support.""" + + def test_gather_file_paths_with_zip(self): + """Test gathering file paths including zip files.""" + mapper = Mapper() + test_zip_path = Path(__file__).parent / "test_archive.zip" + with tempfile.TemporaryDirectory() as temp_dir: + temp_zip_path = Path(temp_dir) / "test_archive.zip" + import shutil + shutil.copy2(test_zip_path, temp_zip_path) + regular_files, zip_archives = mapper.gather_file_paths(temp_dir) + assert str(temp_zip_path) in zip_archives + assert isinstance(regular_files, list) + assert isinstance(zip_archives, list) + + def test_gather_file_paths_with_zip_and_filter(self): + """Test gathering file paths with zip files and filters.""" + mapper = Mapper() + test_zip_path = Path(__file__).parent / "test_archive.zip" + with tempfile.TemporaryDirectory() as temp_dir: + temp_zip_path = Path(temp_dir) / "test_archive.zip" + import shutil + shutil.copy2(test_zip_path, temp_zip_path) + ignore_file = Path(temp_dir) / ".ignore" + ignore_file.write_text("**/nested_dir/**") + regular_files, zip_archives = mapper.gather_file_paths(temp_dir, ignore_file=str(ignore_file)) + assert str(temp_zip_path) in zip_archives + assert isinstance(regular_files, list) + assert isinstance(zip_archives, list) + + +class TestHasherWithZip: + """Test cases for Hasher class with zip file support.""" + + def test_checksum_file_with_file_like_object(self): + """Test checksum calculation with file-like object.""" + hasher = Hasher() + test_zip_path = Path(__file__).parent / "test_archive.zip" + with zipfile.ZipFile(test_zip_path, 'r') as zip_file: + file_name = zip_file.namelist()[0] + with zip_file.open(file_name) as file_obj: + checksum = hasher.checksum_file(file_obj) + assert isinstance(checksum, str) + assert len(checksum) > 0 + + def test_checksum_file_with_zip_file_path(self): + """Test checksum calculation with zip file path.""" + hasher = Hasher() + test_zip_path = Path(__file__).parent / "test_archive.zip" + checksum = hasher.checksum_file(str(test_zip_path)) + assert isinstance(checksum, str) + assert len(checksum) > 0 + + +def test_integration_zip_support(): + """Integration test for zip support functionality.""" + from sumbuddy import get_checksums + import tempfile + import csv + test_zip_path = Path(__file__).parent / "test_archive.zip" + with tempfile.TemporaryDirectory() as temp_dir: + temp_zip_path = Path(temp_dir) / "test_archive.zip" + import shutil + shutil.copy2(test_zip_path, temp_zip_path) + output_file = Path(temp_dir) / "checksums.csv" + get_checksums(temp_dir, output_file) + assert output_file.exists() + with open(output_file, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) >= 3 + zip_rows = [r for r in rows if r['filename'] == 'test_archive.zip'] + assert len(zip_rows) == 1 + zip_content_rows = [r for r in rows if 'test_archive.zip/' in r['filepath']] + assert len(zip_content_rows) == 2 + for row in rows: + assert row['md5'] and len(row['md5']) > 0 \ No newline at end of file diff --git a/tests/test_archive.zip b/tests/test_archive.zip new file mode 100644 index 0000000..d25a8f5 Binary files /dev/null and b/tests/test_archive.zip differ diff --git a/tests/test_getChecksums.py b/tests/test_getChecksums.py index ec659ff..e2f18be 100644 --- a/tests/test_getChecksums.py +++ b/tests/test_getChecksums.py @@ -41,7 +41,7 @@ def test_get_checksums_single_file_to_stdout(self, mock_checksum, mock_open, moc @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], [])) @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum') def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath): get_checksums(self.input_path, self.output_filepath, ignore_file=None, include_hidden=False, algorithm=self.algorithm) @@ -55,7 +55,7 @@ def test_get_checksums_to_file(self, mock_checksum, mock_gather, mock_open, mock @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], [])) @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum') def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath): output_stream = StringIO() @@ -70,7 +70,7 @@ def test_get_checksums_to_stdout(self, mock_checksum, mock_gather, mock_open, mo @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], [])) @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum') def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath): get_checksums(self.input_path, output_filepath=None, ignore_file=self.ignore_file, include_hidden=False, algorithm=self.algorithm) @@ -79,7 +79,7 @@ def test_get_checksums_with_ignore_file(self, mock_checksum, mock_gather, mock_o @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt', '.hidden_file']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt', '.hidden_file'], [])) @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum') def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath): get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=True, algorithm=self.algorithm) @@ -88,7 +88,7 @@ def test_get_checksums_include_hidden(self, mock_checksum, mock_gather, mock_ope @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], [])) @patch('sumbuddy.Hasher.checksum_file', side_effect=lambda x, **kwargs: 'dummychecksum') def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, mock_open, mock_exists, mock_abspath): algorithm = 'sha256' @@ -106,7 +106,7 @@ def test_get_checksums_different_algorithm(self, mock_checksum, mock_gather, moc @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=False) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=[]) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=([], [])) def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists, mock_abspath): output_stream = StringIO() with patch('sys.stdout', new=output_stream): @@ -118,7 +118,7 @@ def test_get_checksums_empty_directory(self, mock_gather, mock_open, mock_exists @patch('os.path.abspath', side_effect=lambda x: x) @patch('os.path.exists', return_value=True) @patch('builtins.open', new_callable=mock_open) - @patch('sumbuddy.Mapper.gather_file_paths', return_value=['file1.txt', 'file2.txt']) + @patch('sumbuddy.Mapper.gather_file_paths', return_value=(['file1.txt', 'file2.txt'], [])) def test_get_checksums_invalid_algorithm(self, mock_gather, mock_open, mock_exists, mock_abspath): with self.assertRaises(ValueError): get_checksums(self.input_path, output_filepath=None, ignore_file=None, include_hidden=False, algorithm='invalid_alg') diff --git a/tests/test_mapper.py b/tests/test_mapper.py index 4d9baf6..6352bd6 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -35,11 +35,11 @@ def test_gather_file_paths(self): with open(os.path.join(subdir_path, '.hidden.txt'), 'w') as file: file.write('Some content') - file_paths = mapper.gather_file_paths(temp_dir) - self.assertEqual(len(file_paths), 3) - self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths) - self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths) - self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths) + regular_files, zip_archives = mapper.gather_file_paths(temp_dir) + self.assertEqual(len(regular_files), 3) + self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files) + self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files) + self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files) # Create ignore file and test with it, if we ignore the .txt files, we will # only have the ignore file in the list of file paths. @@ -47,26 +47,26 @@ def test_gather_file_paths(self): with open(ignore_file_path, 'w') as ignore_file: ignore_file.write("*.txt") - file_paths = mapper.gather_file_paths(temp_dir, ignore_file=ignore_file_path) - self.assertEqual(len(file_paths), 1) - self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths) + regular_files, zip_archives = mapper.gather_file_paths(temp_dir, ignore_file=ignore_file_path) + self.assertEqual(len(regular_files), 1) + self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files) # Test including hidden files - file_paths = mapper.gather_file_paths(temp_dir, include_hidden=True) - self.assertEqual(len(file_paths), 6) - self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths) - self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths) - self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths) - self.assertIn(os.path.join(temp_dir, '.hidden.txt'), file_paths) - self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths) - self.assertIn(os.path.join(subdir_path, '.hidden.txt'), file_paths) + regular_files, zip_archives = mapper.gather_file_paths(temp_dir, include_hidden=True) + self.assertEqual(len(regular_files), 6) + self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files) + self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files) + self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files) + self.assertIn(os.path.join(temp_dir, '.hidden.txt'), regular_files) + self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files) + self.assertIn(os.path.join(subdir_path, '.hidden.txt'), regular_files) - file_paths = mapper.gather_file_paths(temp_dir) - self.assertEqual(len(file_paths), 4) - self.assertIn(os.path.join(temp_dir, 'file1.txt'), file_paths) - self.assertIn(os.path.join(temp_dir, 'file2.txt'), file_paths) - self.assertIn(os.path.join(temp_dir, 'ignore_file'), file_paths) - self.assertIn(os.path.join(subdir_path, 'file3.txt'), file_paths) + regular_files, zip_archives = mapper.gather_file_paths(temp_dir) + self.assertEqual(len(regular_files), 4) + self.assertIn(os.path.join(temp_dir, 'file1.txt'), regular_files) + self.assertIn(os.path.join(temp_dir, 'file2.txt'), regular_files) + self.assertIn(os.path.join(temp_dir, 'ignore_file'), regular_files) + self.assertIn(os.path.join(subdir_path, 'file3.txt'), regular_files) def test_gather_file_paths_empty(self): mapper = Mapper()