|
4 | 4 | from typing import Literal |
5 | 5 | from uuid import uuid4 |
6 | 6 |
|
7 | | -import fsspec # type: ignore |
8 | 7 | import xarray as xr |
9 | 8 | import zarr |
10 | 9 | from fsspec.implementations.local import LocalFileSystem # type: ignore |
11 | 10 |
|
12 | 11 | from reformatters.common.config import Config |
13 | | -from reformatters.common.fsspec import fsspec_apply |
14 | 12 | from reformatters.common.logging import get_logger |
| 13 | +from reformatters.common.retry import retry |
15 | 14 |
|
16 | 15 | logger = get_logger(__name__) |
17 | 16 |
|
@@ -113,15 +112,11 @@ def copy_data_var( |
113 | 112 | f"Copying data var chunks to primary store ({primary_store}) for {relative_dir}." |
114 | 113 | ) |
115 | 114 |
|
116 | | - fs, path = _get_fs_and_path(primary_store) |
117 | | - fs.auto_mkdir = True |
118 | | - |
119 | | - # We want to support local and s3fs filesystems. fsspec local filesystem is sync, |
120 | | - # but our s3fs from zarr.storage.FsspecStore is async and here we work around it. |
121 | | - # The AsyncFileSystem wrapper on LocalFilesystem raises NotImplementedError when _put is called. |
122 | | - source = f"{tmp_store / relative_dir}/" |
123 | | - dest = f"{path}/{relative_dir}" |
124 | | - fsspec_apply(fs, "put", source, dest, recursive=True, auto_mkdir=True) |
| 115 | + for file in tmp_store.glob(f"{relative_dir}**/*"): |
| 116 | + if not file.is_file(): |
| 117 | + continue |
| 118 | + key = str(file.relative_to(tmp_store)) |
| 119 | + sync_to_store(primary_store, key, file.read_bytes()) |
125 | 120 |
|
126 | 121 | if track_progress_callback is not None: |
127 | 122 | track_progress_callback() |
@@ -151,25 +146,18 @@ def copy_zarr_metadata( |
151 | 146 | metadata_files.append(tmp_store / "zarr.json") |
152 | 147 | metadata_files.extend(tmp_store.glob("*/zarr.json")) |
153 | 148 |
|
154 | | - fs, path = _get_fs_and_path(primary_store) |
155 | | - |
156 | | - # This could be partially parallelized BUT make sure to write the coords before the metadata. |
157 | 149 | for file in metadata_files: |
158 | | - relative = file.relative_to(tmp_store) |
159 | | - dest = f"{path}/{relative}" |
160 | | - fsspec_apply(fs, "put_file", file, dest) |
161 | | - |
162 | | - |
163 | | -def _get_fs_and_path( |
164 | | - store: zarr.abc.store.Store, |
165 | | -) -> tuple[fsspec.AbstractFileSystem, str]: |
166 | | - """Gross work around to allow us to make other store types quack like FsspecStore.""" |
167 | | - fs = getattr(store, "fs", None) |
168 | | - if not isinstance(fs, fsspec.AbstractFileSystem): |
169 | | - raise ValueError( |
170 | | - "primary_store must have an fs that is an instance of fsspec.AbstractFileSystem" |
171 | | - ) |
172 | | - path = getattr(store, "path", None) |
173 | | - if not isinstance(path, str): |
174 | | - raise ValueError("primary_store must have a path attribute that is a string") |
175 | | - return fs, path |
| 150 | + relative_path = str(file.relative_to(tmp_store)) |
| 151 | + sync_to_store(primary_store, relative_path, file.read_bytes()) |
| 152 | + |
| 153 | + |
| 154 | +def sync_to_store(store: zarr.abc.store.Store, key: str, data: bytes) -> None: |
| 155 | + retry( |
| 156 | + lambda: zarr.core.sync.sync( |
| 157 | + store.set( |
| 158 | + key, |
| 159 | + zarr.core.buffer.default_buffer_prototype().buffer.from_bytes(data), |
| 160 | + ) |
| 161 | + ), |
| 162 | + max_attempts=6, |
| 163 | + ) |
0 commit comments