Skip to content

Commit c0351e9

Browse files
authored
feat: refactor storage submodule for improved readability and maintainability (#99)
Enhance the `storage` submodule by improving code structure and adding utility functions for path resolution and dependency checks. Update tests to ensure functionality remains intact.
1 parent c9e5425 commit c0351e9

File tree

12 files changed

+1056
-471
lines changed

12 files changed

+1056
-471
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ target/
2020
.claude/
2121
.cursor/
2222
.zed/
23-
23+
.gemini
24+
.coverage*
2425
# files
2526
**/*.so
2627
**/*.sqlite

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ test = [
105105
"requests",
106106
"anyio",
107107
"coverage>=7.6.1",
108+
"covdefaults",
108109
"pytest>=8.0.0",
109110
"pytest-asyncio>=0.23.8",
110111
"pytest-cov>=5.0.0",
@@ -168,6 +169,14 @@ include = [
168169
"sqlspec/utils/type_guards.py", # Type guard utilities
169170
"sqlspec/utils/fixtures.py", # File fixture loading
170171
"sqlspec/utils/data_transformation.py", # Data transformation utilities
172+
173+
# === STORAGE LAYER ===
174+
"sqlspec/storage/_utils.py",
175+
"sqlspec/storage/registry.py",
176+
"sqlspec/storage/backends/base.py",
177+
"sqlspec/storage/backends/obstore.py",
178+
"sqlspec/storage/backends/fsspec.py",
179+
"sqlspec/storage/backends/local.py",
171180
]
172181
mypy-args = [
173182
"--ignore-missing-imports",

sqlspec/storage/_utils.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Shared utilities for storage backends."""
2+
3+
from typing import TYPE_CHECKING
4+
5+
from sqlspec.exceptions import MissingDependencyError
6+
from sqlspec.typing import PYARROW_INSTALLED
7+
8+
if TYPE_CHECKING:
9+
from pathlib import Path
10+
11+
__all__ = ("ensure_pyarrow", "resolve_storage_path")
12+
13+
14+
def ensure_pyarrow() -> None:
15+
"""Ensure PyArrow is available for Arrow operations.
16+
17+
Raises:
18+
MissingDependencyError: If pyarrow is not installed.
19+
"""
20+
if not PYARROW_INSTALLED:
21+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
22+
23+
24+
def resolve_storage_path(
25+
path: "str | Path", base_path: str = "", protocol: str = "file", strip_file_scheme: bool = True
26+
) -> str:
27+
"""Resolve path relative to base_path with protocol-specific handling.
28+
29+
Args:
30+
path: Path to resolve (may include file:// scheme).
31+
base_path: Base path to prepend if path is relative.
32+
protocol: Storage protocol (file, s3, gs, etc.).
33+
strip_file_scheme: Whether to strip file:// prefix.
34+
35+
Returns:
36+
Resolved path string suitable for the storage backend.
37+
38+
Examples:
39+
>>> resolve_storage_path("/data/file.txt", protocol="file")
40+
'data/file.txt'
41+
42+
>>> resolve_storage_path(
43+
... "file.txt", base_path="/base", protocol="file"
44+
... )
45+
'base/file.txt'
46+
47+
>>> resolve_storage_path(
48+
... "file:///data/file.txt", strip_file_scheme=True
49+
... )
50+
'data/file.txt'
51+
52+
>>> resolve_storage_path(
53+
... "/data/subdir/file.txt",
54+
... base_path="/data",
55+
... protocol="file",
56+
... )
57+
'subdir/file.txt'
58+
"""
59+
from pathlib import Path as PathlibPath
60+
61+
path_str = str(path)
62+
63+
if strip_file_scheme and path_str.startswith("file://"):
64+
path_str = path_str.removeprefix("file://")
65+
66+
# For local file protocol
67+
if protocol == "file":
68+
path_obj = PathlibPath(path_str)
69+
70+
# Absolute path handling
71+
if path_obj.is_absolute():
72+
if base_path:
73+
base_obj = PathlibPath(base_path)
74+
# Try to make path relative to base_path
75+
try:
76+
relative = path_obj.relative_to(base_obj)
77+
# Return joined path for FSSpec-style backends
78+
return f"{base_path.rstrip('/')}/{relative}"
79+
except ValueError:
80+
# Path is outside base_path
81+
return path_str.lstrip("/")
82+
# No base_path - strip leading /
83+
return path_str.lstrip("/")
84+
85+
# Relative path with base_path - join them
86+
if base_path:
87+
return f"{base_path.rstrip('/')}/{path_str}"
88+
89+
# Relative path without base_path
90+
return path_str
91+
92+
# For cloud storage protocols (s3, gs, etc.), join with base_path
93+
if not base_path:
94+
return path_str
95+
96+
clean_base = base_path.rstrip("/")
97+
clean_path = path_str.lstrip("/")
98+
return f"{clean_base}/{clean_path}"

0 commit comments

Comments
 (0)