1111import sentry_sdk
1212import typer
1313import xarray as xr
14- import zarr
1514from pydantic import computed_field
1615
1716from reformatters .common import docker , template_utils , validation
2726from reformatters .common .logging import get_logger
2827from reformatters .common .pydantic import FrozenBaseModel
2928from reformatters .common .region_job import RegionJob , SourceFileCoord
29+ from reformatters .common .storage import StorageConfig , StoreFactory
3030from reformatters .common .template_config import TemplateConfig
3131from reformatters .common .types import DatetimeLike
3232from reformatters .common .zarr import (
3333 copy_zarr_metadata ,
3434 get_local_tmp_store ,
35- get_mode ,
36- get_zarr_store ,
3735)
3836
3937DATA_VAR = TypeVar ("DATA_VAR" , bound = DataVar [Any ])
4240logger = get_logger (__name__ )
4341
4442
45- class DynamicalDatasetStorageConfig (FrozenBaseModel ):
46- """Configuration for the storage of a dataset in production."""
47-
48- base_path : str
49- k8s_secret_names : Sequence [str ] = []
50-
51-
5243class DynamicalDataset (FrozenBaseModel , Generic [DATA_VAR , SOURCE_FILE_COORD ]):
5344 """Top level class managing a dataset configuration and processing."""
5445
5546 template_config : TemplateConfig [DATA_VAR ]
5647 region_job_class : type [RegionJob [DATA_VAR , SOURCE_FILE_COORD ]]
5748
58- storage_config : DynamicalDatasetStorageConfig
49+ storage_config : StorageConfig
50+
51+ @computed_field # type: ignore[prop-decorator]
52+ @property
53+ def primary_store_factory (self ) -> StoreFactory :
54+ return StoreFactory (
55+ storage_config = self .storage_config ,
56+ dataset_id = self .dataset_id ,
57+ template_config_version = self .template_config .version ,
58+ )
5959
6060 def operational_kubernetes_resources (self , image_tag : str ) -> Iterable [CronJob ]:
6161 """
@@ -127,27 +127,29 @@ def update(
127127 ) -> None :
128128 """Update an existing dataset with the latest data."""
129129 with self ._monitor (ReformatCronJob , reformat_job_name ):
130- final_store = self ._final_store ()
131130 tmp_store = self ._tmp_store ()
132131
133132 jobs , template_ds = self .region_job_class .operational_update_jobs (
134- final_store = final_store ,
133+ primary_store_factory = self . primary_store_factory ,
135134 tmp_store = tmp_store ,
136135 get_template_fn = self ._get_template ,
137136 append_dim = self .template_config .append_dim ,
138137 all_data_vars = self .template_config .data_vars ,
139138 reformat_job_name = reformat_job_name ,
140139 )
141- template_utils .write_metadata (template_ds , tmp_store , get_mode (tmp_store ))
140+ template_utils .write_metadata (template_ds , tmp_store )
141+
142142 for job in jobs :
143143 process_results = job .process ()
144144 updated_template = job .update_template_with_results (process_results )
145- template_utils . write_metadata (
146- updated_template , tmp_store , get_mode ( tmp_store )
147- )
148- copy_zarr_metadata (updated_template , tmp_store , final_store )
145+ # overwrite the tmp store metadata with updated template
146+ template_utils . write_metadata ( updated_template , tmp_store )
147+ primary_store = self . primary_store_factory . store ( )
148+ copy_zarr_metadata (updated_template , tmp_store , primary_store )
149149
150- logger .info (f"Operational update complete. Wrote to store: { final_store } " )
150+ logger .info (
151+ f"Operational update complete. Wrote to store: { self .primary_store_factory .store ()} "
152+ )
151153
152154 def backfill_kubernetes (
153155 self ,
@@ -164,15 +166,12 @@ def backfill_kubernetes(
164166 image_tag = docker_image or docker .build_and_push_image ()
165167
166168 template_ds = self ._get_template (append_dim_end )
167- final_store = self ._final_store ()
168- logger .info (f"Writing zarr metadata to { final_store } " )
169-
170- template_utils .write_metadata (template_ds , final_store , get_mode (final_store ))
169+ template_utils .write_metadata (template_ds , self .primary_store_factory )
171170
172171 num_jobs = len (
173172 self .region_job_class .get_jobs (
174173 kind = "backfill" ,
175- final_store = final_store ,
174+ primary_store_factory = self . primary_store_factory ,
176175 tmp_store = self ._tmp_store (),
177176 template_ds = template_ds ,
178177 append_dim = self .template_config .append_dim ,
@@ -259,9 +258,7 @@ def backfill_local(
259258 ) -> None :
260259 """Run dataset reformatting locally in this process."""
261260 template_ds = self ._get_template (append_dim_end )
262- final_store = self ._final_store ()
263-
264- template_utils .write_metadata (template_ds , final_store , get_mode (final_store ))
261+ template_utils .write_metadata (template_ds , self .primary_store_factory )
265262
266263 self .process_backfill_region_jobs (
267264 append_dim_end ,
@@ -273,7 +270,7 @@ def backfill_local(
273270 filter_contains = filter_contains ,
274271 filter_variable_names = filter_variable_names ,
275272 )
276- logger .info (f"Done writing to { final_store } " )
273+ logger .info (f"Done writing to { self . primary_store_factory . store () } " )
277274
278275 def process_backfill_region_jobs (
279276 self ,
@@ -291,7 +288,7 @@ def process_backfill_region_jobs(
291288
292289 region_jobs = self .region_job_class .get_jobs (
293290 kind = "backfill" ,
294- final_store = self ._final_store () ,
291+ primary_store_factory = self .primary_store_factory ,
295292 tmp_store = self ._tmp_store (),
296293 template_ds = self ._get_template (append_dim_end ),
297294 append_dim = self .template_config .append_dim ,
@@ -320,7 +317,7 @@ def validate_dataset(
320317 ) -> None :
321318 """Validate the dataset, raising an exception if it is invalid."""
322319 with self ._monitor (ValidationCronJob , reformat_job_name ):
323- store = self ._final_store ()
320+ store = self .primary_store_factory . store ()
324321 validation .validate_dataset (store , validators = self .validators ())
325322
326323 logger .info (f"Done validating { store } " )
@@ -339,13 +336,6 @@ def get_cli(
339336 app .command ("validate" )(self .validate_dataset )
340337 return app
341338
342- def _final_store (self ) -> zarr .abc .store .Store :
343- return get_zarr_store (
344- self .storage_config .base_path ,
345- self .template_config .dataset_id ,
346- self .template_config .version ,
347- )
348-
349339 def _tmp_store (self ) -> Path :
350340 return get_local_tmp_store ()
351341
0 commit comments