From d8ceff3ee5721725e51432829d58e734a470f9f1 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Wed, 16 Jul 2025 10:30:49 +0100 Subject: [PATCH 01/35] initialize-new-integration dwd icon-eu forecast. And register. --- src/reformatters/__main__.py | 2 + .../dwd/icon_eu/forecast/__init__.py | 1 + .../dwd/icon_eu/forecast/dynamical_dataset.py | 55 +++ .../dwd/icon_eu/forecast/region_job.py | 289 +++++++++++++ .../dwd/icon_eu/forecast/template_config.py | 385 ++++++++++++++++++ tests/dwd/icon_eu/forecast/__init__.py | 0 .../forecast/dynamical_dataset_test.py | 62 +++ tests/dwd/icon_eu/forecast/region_job_test.py | 37 ++ .../icon_eu/forecast/template_config_test.py | 48 +++ 9 files changed, 879 insertions(+) create mode 100644 src/reformatters/dwd/icon_eu/forecast/__init__.py create mode 100644 src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py create mode 100644 src/reformatters/dwd/icon_eu/forecast/region_job.py create mode 100644 src/reformatters/dwd/icon_eu/forecast/template_config.py create mode 100644 tests/dwd/icon_eu/forecast/__init__.py create mode 100644 tests/dwd/icon_eu/forecast/dynamical_dataset_test.py create mode 100644 tests/dwd/icon_eu/forecast/region_job_test.py create mode 100644 tests/dwd/icon_eu/forecast/template_config_test.py diff --git a/src/reformatters/__main__.py b/src/reformatters/__main__.py index d309bb6a..9428ea34 100644 --- a/src/reformatters/__main__.py +++ b/src/reformatters/__main__.py @@ -18,6 +18,7 @@ NoaaNdviCdrAnalysisDataset, ) from reformatters.contrib.uarizona.swann.analysis import UarizonaSwannAnalysisDataset +from reformatters.dwd.icon_eu.forecast import DwdIconEuForecastDataset from reformatters.example.new_dataset import initialize_new_integration from reformatters.noaa.gfs.forecast import NoaaGfsForecastDataset @@ -50,6 +51,7 @@ class UpstreamGriddedZarrsDatasetStorageConfig(DynamicalDatasetStorageConfig): storage_config=UpstreamGriddedZarrsDatasetStorageConfig() ), NoaaGfsForecastDataset(storage_config=SourceCoopDatasetStorageConfig()), + DwdIconEuForecastDataset(storage_config=SourceCoopDatasetStorageConfig()), ] if Config.is_sentry_enabled: diff --git a/src/reformatters/dwd/icon_eu/forecast/__init__.py b/src/reformatters/dwd/icon_eu/forecast/__init__.py new file mode 100644 index 00000000..e91d3357 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/__init__.py @@ -0,0 +1 @@ +from .dynamical_dataset import DwdIconEuForecastDataset as DwdIconEuForecastDataset diff --git a/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py b/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py new file mode 100644 index 00000000..2fb26ce1 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py @@ -0,0 +1,55 @@ +from collections.abc import Sequence + +from reformatters.common import validation +from reformatters.common.dynamical_dataset import DynamicalDataset +from reformatters.common.kubernetes import CronJob + +from .region_job import DwdIconEuForecastRegionJob, DwdIconEuForecastSourceFileCoord +from .template_config import DwdIconEuDataVar, DwdIconEuForecastTemplateConfig + + +class DwdIconEuForecastDataset( + DynamicalDataset[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] +): + template_config: DwdIconEuForecastTemplateConfig = DwdIconEuForecastTemplateConfig() + region_job_class: type[DwdIconEuForecastRegionJob] = DwdIconEuForecastRegionJob + + def operational_kubernetes_resources(self, image_tag: str) -> Sequence[CronJob]: + """Return the kubernetes cron job definitions to operationally update and validate this dataset.""" + # operational_update_cron_job = ReformatCronJob( + # name=f"{self.dataset_id}-operational-update", + # schedule=_OPERATIONAL_CRON_SCHEDULE, + # pod_active_deadline=timedelta(minutes=30), + # image=image_tag, + # dataset_id=self.dataset_id, + # cpu="14", + # memory="30G", + # shared_memory="12G", + # ephemeral_storage="30G", + # secret_names=self.storage_config.k8s_secret_names, + # ) + # validation_cron_job = ValidationCronJob( + # name=f"{self.dataset_id}-validation", + # schedule=_VALIDATION_CRON_SCHEDULE, + # pod_active_deadline=timedelta(minutes=10), + # image=image_tag, + # dataset_id=self.dataset_id, + # cpu="1.3", + # memory="7G", + # secret_names=self.storage_config.k8s_secret_names, + # ) + + # return [operational_update_cron_job, validation_cron_job] + raise NotImplementedError( + f"Implement `operational_kubernetes_resources` on {self.__class__.__name__}" + ) + + def validators(self) -> Sequence[validation.DataValidator]: + """Return a sequence of DataValidators to run on this dataset.""" + # return ( + # validation.check_analysis_current_data, + # validation.check_analysis_recent_nans, + # ) + raise NotImplementedError( + f"Implement `validators` on {self.__class__.__name__}" + ) diff --git a/src/reformatters/dwd/icon_eu/forecast/region_job.py b/src/reformatters/dwd/icon_eu/forecast/region_job.py new file mode 100644 index 00000000..9626014e --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/region_job.py @@ -0,0 +1,289 @@ +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path + +import xarray as xr +import zarr + +from reformatters.common.logging import get_logger +from reformatters.common.region_job import ( + CoordinateValueOrRange, + RegionJob, + SourceFileCoord, +) +from reformatters.common.types import ( + AppendDim, + ArrayFloat32, + DatetimeLike, + Dim, +) + +from .template_config import DwdIconEuDataVar + +log = get_logger(__name__) + + +class DwdIconEuForecastSourceFileCoord(SourceFileCoord): + """Coordinates of a single source file to process.""" + + def get_url(self) -> str: + raise NotImplementedError("Return the URL of the source file.") + + def out_loc( + self, + ) -> Mapping[Dim, CoordinateValueOrRange]: + """ + Returns a data array indexer which identifies the region in the output dataset + to write the data from the source file. The indexer is a dict from dimension + names to coordinate values or slices. + """ + # If the names of the coordinate attributes of your SourceFileCoord subclass are also all + # dimension names in the output dataset (e.g. init_time and lead_time), + # delete this implementation and use the default implementation of this method. + # + # Examples where you would override this method: + # - An analysis dataset created from forecast data: + # return {"time": self.init_time + self.lead_time} + return super().out_loc() + + +class DwdIconEuForecastRegionJob( + RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] +): + # Optionally, limit the number of variables downloaded together. + # If set to a value less than len(data_vars), downloading, reading/recompressing, + # and uploading steps will be pipelined within a region job. + # 5 is a reasonable default if it is possible to download less than all + # variables in a single file (e.g. you have a grib index). + # Leave unset if you have to download a whole file to get one variable out + # to avoid re-downloading the same file multiple times. + # + # max_vars_per_download_group: ClassVar[int | None] = None + + # Implement this method only if different variables must be retrieved from different urls + # + # # @classmethod + # def source_groups( + # cls, + # data_vars: Sequence[DwdIconEuDataVar], + # ) -> Sequence[Sequence[DwdIconEuDataVar]]: + # """ + # Return groups of variables, where all variables in a group can be retrieived from the same source file. + # """ + # grouped = defaultdict(list) + # for data_var in data_vars: + # grouped[data_var.internal_attrs.file_type].append(data_var) + # return list(grouped.values()) + + # Implement this method only if specific post processing in this dataset + # requires data from outside the region defined by self.region, + # e.g. for deaccumulation or interpolation along append_dim in an analysis dataset. + # + # def get_processing_region(self) -> slice: + # """ + # Return a slice of integer offsets into self.template_ds along self.append_dim that identifies + # the region to process. In most cases this is exactly self.region, but if additional data outside + # the region is required, for example for correct interpolation or deaccumulation, this method can + # return a modified slice (e.g. `slice(self.region.start - 1, self.region.stop + 1)`). + # """ + # return self.region + + def generate_source_file_coords( + self, + processing_region_ds: xr.Dataset, + data_var_group: Sequence[DwdIconEuDataVar], + ) -> Sequence[DwdIconEuForecastSourceFileCoord]: + """Return a sequence of coords, one for each source file required to process the data covered by processing_region_ds.""" + # return [ + # DwdIconEuForecastSourceFileCoord( + # init_time=init_time, + # lead_time=lead_time, + # ) + # for init_time, lead_time in itertools.product( + # processing_region_ds["init_time"].values, + # processing_region_ds["lead_time"].values, + # ) + # ] + raise NotImplementedError( + "Return a sequence of SourceFileCoord objects, one for each source file required to process the data covered by processing_region_ds." + ) + + def download_file(self, coord: DwdIconEuForecastSourceFileCoord) -> Path: + """Download the file for the given coordinate and return the local path.""" + # return http_download_to_disk(coord.get_url(), self.dataset_id) + raise NotImplementedError( + "Download the file for the given coordinate and return the local path." + ) + + def read_data( + self, + coord: DwdIconEuForecastSourceFileCoord, + data_var: DwdIconEuDataVar, + ) -> ArrayFloat32: + """Read and return an array of data for the given variable and source file coordinate.""" + # with rasterio.open(coord.downloaded_file_path) as reader: + # TODO: make a band index based on tag matching utility function + # matching_indexes = [ + # i + # for i in range(reader.count) + # if (tags := reader.tags(i))["GRIB_ELEMENT"] + # == data_var.internal_attrs.grib_element + # and tags["GRIB_COMMENT"] == data_var.internal_attrs.grib_comment + # ] + # assert len(matching_indexes) == 1, f"Expected exactly 1 matching band, found {matching_indexes}. {data_var.internal_attrs.grib_element=}, {data_var.internal_attrs.grib_description=}, {coord.downloaded_file_path=}" # fmt: skip + # rasterio_band_index = 1 + matching_indexes[0] # rasterio is 1-indexed + # return reader.read(rasterio_band_index, dtype=np.float32) + raise NotImplementedError( + "Read and return data for the given variable and source file coordinate." + ) + + # Implement this to apply transformations to the array (e.g. deaccumulation) + # + # def apply_data_transformations( + # self, data_array: xr.DataArray, data_var: DwdIconEuDataVar + # ) -> None: + # """ + # Apply in-place data transformations to the output data array for a given data variable. + + # This method is called after reading all data for a variable into the shared-memory array, + # and before writing shards to the output store. The default implementation applies binary + # rounding to float32 arrays if `data_var.internal_attrs.keep_mantissa_bits` is set. + + # Subclasses may override this method to implement additional transformations such as + # deaccumulation, interpolation or other custom logic. All transformations should be + # performed in-place (don't copy `data_array`, it's large). + + # Parameters + # ---------- + # data_array : xr.DataArray + # The output data array to be transformed in-place. + # data_var : DwdIconEuDataVar + # The data variable metadata object, which may contain transformation parameters. + # """ + # super().apply_data_transformations(data_array, data_var) + + def update_template_with_results( + self, process_results: Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] + ) -> xr.Dataset: + """ + Update template dataset based on processing results. This method is called + during operational updates. + + Subclasses should implement this method to apply dataset-specific adjustments + based on the processing results. Examples include: + - Trimming dataset along append_dim to only include successfully processed data + - Loading existing coordinate values from final_store and updating them based on results + - Updating metadata based on what was actually processed vs what was planned + + The default implementation trims along append_dim to end at the most recent + successfully processed coordinate (timestamp). + + Parameters + ---------- + process_results : Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] + Mapping from variable names to their source file coordinates with final processing status. + + Returns + ------- + xr.Dataset + Updated template dataset reflecting the actual processing results. + """ + # The super() implementation looks like this: + # + # max_append_dim_processed = max( + # ( + # c.out_loc()[self.append_dim] # type: ignore[type-var] + # for c in chain.from_iterable(process_results.values()) + # if c.status == SourceFileStatus.Succeeded + # ), + # default=None, + # ) + # if max_append_dim_processed is None: + # # No data was processed, trim the template to stop before this job's region + # # This is using isel's exclusive slice end behavior + # return self.template_ds.isel( + # {self.append_dim: slice(None, self.region.start)} + # ) + # else: + # return self.template_ds.sel( + # {self.append_dim: slice(None, max_append_dim_processed)} + # ) + # + # If you like the above behavior, skip implementing this method. + # If you need to customize the behavior, implement this method. + + raise NotImplementedError( + "Subclasses implement update_template_with_results() with dataset-specific logic" + ) + + @classmethod + def operational_update_jobs( + cls, + final_store: zarr.abc.store.Store, + tmp_store: Path, + get_template_fn: Callable[[DatetimeLike], xr.Dataset], + append_dim: AppendDim, + all_data_vars: Sequence[DwdIconEuDataVar], + reformat_job_name: str, + ) -> tuple[ + Sequence["RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]"], + xr.Dataset, + ]: + """ + Return the sequence of RegionJob instances necessary to update the dataset + from its current state to include the latest available data. + + Also return the template_ds, expanded along append_dim through the end of + the data to process. The dataset returned here may extend beyond the + available data at the source, in which case `update_template_with_results` + will trim the dataset to the actual data processed. + + The exact logic is dataset-specific, but it generally follows this pattern: + 1. Figure out the range of time to process: append_dim_start (inclusive) and append_dim_end (exclusive) + a. Read existing data from final_store to determine what's already processed + b. Optionally identify recent incomplete/non-final data for reprocessing + 2. Call get_template_fn(append_dim_end) to get the template_ds + 3. Create RegionJob instances by calling cls.get_jobs(..., filter_start=append_dim_start) + + Parameters + ---------- + final_store : zarr.abc.store.Store + The destination Zarr store to read existing data from and write updates to. + tmp_store : zarr.abc.store.Store | Path + The temporary Zarr store to write into while processing. + get_template_fn : Callable[[DatetimeLike], xr.Dataset] + Function to get the template_ds for the operational update. + append_dim : AppendDim + The dimension along which data is appended (e.g., "time"). + all_data_vars : Sequence[DwdIconEuDataVar] + Sequence of all data variable configs for this dataset. + reformat_job_name : str + The name of the reformatting job, used for progress tracking. + This is often the name of the Kubernetes job, or "local". + + Returns + ------- + Sequence[RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]] + RegionJob instances that need processing for operational updates. + xr.Dataset + The template_ds for the operational update. + """ + # existing_ds = xr.open_zarr(final_store) + # append_dim_start = existing_ds[append_dim].max() + # append_dim_end = pd.Timestamp.now() + # template_ds = get_template_fn(append_dim_end) + + # jobs = cls.get_jobs( + # kind="operational-update", + # final_store=final_store, + # tmp_store=tmp_store, + # template_ds=template_ds, + # append_dim=append_dim, + # all_data_vars=all_data_vars, + # reformat_job_name=reformat_job_name, + # filter_start=append_dim_start, + # ) + # return jobs, template_ds + + raise NotImplementedError( + "Subclasses implement operational_update_jobs() with dataset-specific logic" + ) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py new file mode 100644 index 00000000..7c90a0d8 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -0,0 +1,385 @@ +from collections.abc import Sequence +from typing import Any + +import numpy as np +import pandas as pd +import xarray as xr +from pydantic import computed_field + +from reformatters.common.config_models import ( + BaseInternalAttrs, + Coordinate, + CoordinateAttrs, # noqa: F401 + DatasetAttributes, + DataVar, + DataVarAttrs, # noqa: F401 + Encoding, # noqa: F401 + StatisticsApproximate, # noqa: F401 +) +from reformatters.common.template_config import ( + SPATIAL_REF_COORDS, # noqa: F401 + TemplateConfig, +) +from reformatters.common.types import AppendDim, Dim, Timedelta, Timestamp +from reformatters.common.zarr import ( + BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE, # noqa: F401 + BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE, # noqa: F401 +) + + +class DwdIconEuInternalAttrs(BaseInternalAttrs): + """ + Variable specific attributes used internally to drive processing. + Not written to the dataset. + """ + + # For example, + # grib_element: str + + +class DwdIconEuDataVar(DataVar[DwdIconEuInternalAttrs]): + pass + + +class DwdIconEuForecastTemplateConfig(TemplateConfig[DwdIconEuDataVar]): + dims: tuple[Dim, ...] = ("init_time", "lead_time", "latitude", "longitude") + append_dim: AppendDim = "init_time" + append_dim_start: Timestamp = pd.Timestamp("2020-01-01T00:00") + append_dim_frequency: Timedelta = pd.Timedelta("6h") + + @computed_field # type: ignore[prop-decorator] + @property + def dataset_attributes(self) -> DatasetAttributes: + # return DatasetAttributes( + # dataset_id="producer-model-variant", + # dataset_version="0.1.0", + # name="Producer Model Variant", + # description="Weather data from the Model operated by Producer.", + # attribution="Producer Model Variant data processed by dynamical.org from Producer Model.", + # spatial_domain="Global", + # spatial_resolution="0.25 degrees (~20km)", + # time_domain=f"Forecasts initialized {self.append_dim_start} UTC to Present", + # time_resolution=f"Forecasts initialized every {self.append_dim_frequency.total_seconds() / 3600:.0f} hours", + # forecast_domain="Forecast lead time 0-384 hours (0-16 days) ahead", + # forecast_resolution="Forecast step 0-120 hours: hourly, 123-384 hours: 3 hourly", + # ) + raise NotImplementedError("Subclasses implement `dataset_attributes`") + + def dimension_coordinates(self) -> dict[str, Any]: + """ + Returns a dictionary of dimension names to coordinates for the dataset. + """ + # return { + # self.append_dim: self.append_dim_coordinates( + # self.append_dim_start + self.append_dim_frequency + # ), + # "lead_time": ( + # pd.timedelta_range("0h", "120h", freq="1h").union( + # pd.timedelta_range("123h", "384h", freq="3h") + # ) + # ), + # "latitude": np.flip(np.arange(-90, 90.25, 0.25)), + # "longitude": np.arange(-180, 180, 0.25), + # } + raise NotImplementedError("Subclasses implement `dimension_coordinates`") + + def derive_coordinates( + self, ds: xr.Dataset + ) -> dict[str, xr.DataArray | tuple[tuple[str, ...], np.ndarray[Any, Any]]]: + """ + Return a dictionary of non-dimension coordinates for the dataset. + Called whenever len(ds.append_dim) changes. + """ + # Non-dimension coordinates are additional labels for data along + # one or more dimensions. Use them to make it easier to use and + # understand your dataset. + # return { + # "valid_time": ds["init_time"] + ds["lead_time"], + # "ingested_forecast_length": ( + # (self.append_dim,), + # np.full(ds[self.append_dim].size, np.timedelta64("NaT", "ns")), + # ), + # "spatial_ref": SPATIAL_REF_COORDS, + # } + raise NotImplementedError("Subclasses implement `derive_coordinates`") + + @computed_field # type: ignore[prop-decorator] + @property + def coords(self) -> Sequence[Coordinate]: + """Define metadata and encoding for each coordinate.""" + # dim_coords = self.dimension_coordinates() + # append_dim_coordinate_chunk_size = self.append_dim_coordinate_chunk_size() + + # return [ + # Coordinate( + # name=self.append_dim, + # encoding=Encoding( + # dtype="int64", + # fill_value=0, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # calendar="proleptic_gregorian", + # units="seconds since 1970-01-01 00:00:00", + # chunks=append_dim_coordinate_chunk_size, + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="seconds since 1970-01-01 00:00:00", + # statistics_approximate=StatisticsApproximate( + # min=dim_coords[self.append_dim].min().isoformat(), max="Present" + # ), + # ), + # ), + # Coordinate( + # name="lead_time", + # encoding=Encoding( + # dtype="int64", + # fill_value=-1, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # units="seconds", + # chunks=len(dim_coords["lead_time"]), + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="seconds", + # statistics_approximate=StatisticsApproximate( + # min=str(dim_coords["lead_time"].min()), + # max=str(dim_coords["lead_time"].max()), + # ), + # ), + # ), + # Coordinate( + # name="latitude", + # encoding=Encoding( + # dtype="float64", + # fill_value=np.nan, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # chunks=len(dim_coords["latitude"]), + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="degrees_north", + # statistics_approximate=StatisticsApproximate( + # min=float(dim_coords["latitude"].min()), + # max=float(dim_coords["latitude"].max()), + # ), + # ), + # ), + # Coordinate( + # name="longitude", + # encoding=Encoding( + # dtype="float64", + # fill_value=np.nan, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # chunks=len(dim_coords["longitude"]), + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="degrees_east", + # statistics_approximate=StatisticsApproximate( + # min=float(dim_coords["longitude"].min()), + # max=float(dim_coords["longitude"].max()), + # ), + # ), + # ), + # Coordinate( + # name="valid_time", + # encoding=Encoding( + # dtype="int64", + # fill_value=0, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # calendar="proleptic_gregorian", + # units="seconds since 1970-01-01 00:00:00", + # chunks=( + # append_dim_coordinate_chunk_size, + # len(dim_coords["lead_time"]), + # ), + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="seconds since 1970-01-01 00:00:00", + # statistics_approximate=StatisticsApproximate( + # min=self.append_dim_start.isoformat(), + # max="Present + 16 days", + # ), + # ), + # ), + # Coordinate( + # name="ingested_forecast_length", + # encoding=Encoding( + # dtype="int64", + # fill_value=-1, + # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + # units="seconds", + # chunks=append_dim_coordinate_chunk_size, + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units="seconds", + # statistics_approximate=StatisticsApproximate( + # min=str(dim_coords["lead_time"].min()), + # max=str(dim_coords["lead_time"].max()), + # ), + # ), + # ), + # Coordinate( + # name="spatial_ref", + # encoding=Encoding( + # dtype="int64", + # fill_value=0, + # chunks=(), # Scalar coordinate + # shards=None, + # ), + # attrs=CoordinateAttrs( + # units=None, + # statistics_approximate=None, + # # Deterived by running `ds.rio.write_crs("+proj=longlat +a=6371229 +b=6371229 +no_defs +type=crs")["spatial_ref"].attrs + # crs_wkt='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', + # semi_major_axis=6371229.0, + # semi_minor_axis=6371229.0, + # inverse_flattening=0.0, + # reference_ellipsoid_name="unknown", + # longitude_of_prime_meridian=0.0, + # prime_meridian_name="Greenwich", + # geographic_crs_name="unknown", + # horizontal_datum_name="unknown", + # grid_mapping_name="latitude_longitude", + # spatial_ref='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', + # comment="This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", + # ), + # ), + # ] + raise NotImplementedError("Subclasses implement `coords`") + + @computed_field # type: ignore[prop-decorator] + @property + def data_vars(self) -> Sequence[DwdIconEuDataVar]: + """Define metadata and encoding for each data variable.""" + # # Data variable chunking and sharding + # # + # # Aim for one of these roughly equivalent quantities: + # # 1-2mb chunks compressed + # # 4-8mb uncompressed + # # 4-8 million float32 values + # var_chunks: dict[Dim, int] = { + # "init_time": 1, + # "lead_time": 105, + # "latitude": 121, + # "longitude": 121, + # } + # # Aim for one of these roughly equivalent quantities: + # # 64-256MB shards compressed + # # 256-1024MB uncompressed + # # 256 million to 1 billion float32 values + # var_shards: dict[Dim, int] = { + # "init_time": 1, + # "lead_time": 105 * 2, + # "latitude": 121 * 6, + # "longitude": 121 * 6, + # } + + # encoding_float32_default = Encoding( + # dtype="float32", + # fill_value=np.nan, + # chunks=tuple(var_chunks[d] for d in self.dims), + # shards=tuple(var_shards[d] for d in self.dims), + # compressors=[BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE], + # ) + + # default_keep_mantissa_bits = 7 + + # # return [ + # DwdIconEuDataVar( + # name="temperature_2m", + # encoding=encoding_float32_default, + # attrs=DataVarAttrs( + # short_name="t2m", + # long_name="2 metre temperature", + # units="C", + # step_type="instant", + # standard_name="air_temperature", + # ), + # internal_attrs=DwdIconEuInternalAttrs( + # grib_element="TMP", + # grib_comment='2[m] HTGL="Specified height level above ground"', + # grib_index_level="2 m above ground", + # index_position=580, + # keep_mantissa_bits=default_keep_mantissa_bits, + # ), + # ), + # DwdIconEuDataVar( + # name="precipitation_surface", + # encoding=encoding_float32_default, + # attrs=DataVarAttrs( + # short_name="tp", + # long_name="Total Precipitation", + # units="mm/s", + # comment="Average precipitation rate since the previous forecast step.", + # step_type="avg", + # ), + # internal_attrs=DwdIconEuInternalAttrs( + # grib_element="APCP", + # grib_comment='0[-] SFC="Ground or water surface"', + # grib_index_level="surface", + # index_position=595, + # include_lead_time_suffix=True, + # deaccumulate_to_rate=True, + # window_reset_frequency=pd.Timedelta("6h"), + # keep_mantissa_bits=default_keep_mantissa_bits, + # ), + # ), + # DwdIconEuDataVar( + # name="pressure_surface", + # encoding=encoding_float32_default, + # attrs=DataVarAttrs( + # short_name="sp", + # long_name="Surface pressure", + # units="Pa", + # step_type="instant", + # standard_name="surface_air_pressure", + # ), + # internal_attrs=DwdIconEuInternalAttrs( + # grib_element="PRES", + # grib_comment='0[-] SFC="Ground or water surface"', + # grib_index_level="surface", + # index_position=560, + # keep_mantissa_bits=10, + # ), + # ), + # DwdIconEuDataVar( + # name="categorical_snow_surface", + # encoding=encoding_float32_default, + # attrs=DataVarAttrs( + # short_name="csnow", + # long_name="Categorical snow", + # units="0=no; 1=yes", + # step_type="avg", + # ), + # internal_attrs=DwdIconEuInternalAttrs( + # grib_element="CSNOW", + # grib_comment='0[-] SFC="Ground or water surface"', + # grib_index_level="surface", + # index_position=604, + # window_reset_frequency=pd.Timedelta("6h"), + # keep_mantissa_bits="no-rounding", + # ), + # ), + # DwdIconEuDataVar( + # name="total_cloud_cover_atmosphere", + # encoding=encoding_float32_default, + # attrs=DataVarAttrs( + # short_name="tcc", + # long_name="Total Cloud Cover", + # units="%", + # step_type="avg", + # ), + # internal_attrs=DwdIconEuInternalAttrs( + # grib_element="TCDC", + # grib_comment='0[-] EATM="Entire Atmosphere"', + # grib_index_level="entire atmosphere", + # index_position=635, + # window_reset_frequency=pd.Timedelta("6h"), + # keep_mantissa_bits=default_keep_mantissa_bits, + # ), + # ), + # ] + raise NotImplementedError("Subclasses implement `data_vars`") diff --git a/tests/dwd/icon_eu/forecast/__init__.py b/tests/dwd/icon_eu/forecast/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py new file mode 100644 index 00000000..ccc5a53c --- /dev/null +++ b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py @@ -0,0 +1,62 @@ +# from pathlib import Path + +# import numpy as np +# import pandas as pd +# import pytest +# import xarray as xr + +# from reformatters.common import validation +# from reformatters.dwd.icon_eu.forecast.dynamical_dataset import DwdIconEuForecastDataset + +# @pytest.mark.slow +# def test_backfill_local_and_operational_update(monkeypatch: pytest.MonkeyPatch) -> None: +# dataset = DwdIconEuForecastDataset() + +# # Local backfill reformat +# dataset.backfill_local(append_dim_end=pd.Timestamp("2000-01-02")) +# ds = xr.open_zarr(dataset._final_store(), chunks=None) +# assert ds.time.max() == pd.Timestamp("2000-01-01") + +# # Operational update +# monkeypatch.setattr( +# dataset.region_job_class, +# "_update_append_dim_end", +# lambda: pd.Timestamp("2000-01-03"), +# ) +# monkeypatch.setattr( +# dataset.region_job_class, +# "_update_append_dim_start", +# lambda existing_ds: pd.Timestamp(existing_ds.time.max().item()), +# ) + +# dataset.update("test-update") + +# # Check resulting dataset +# updated_ds = xr.open_zarr(dataset._final_store(), chunks=None) + +# np.testing.assert_array_equal( +# updated_ds.time, pd.date_range("1981-10-01", "1981-10-03") +# ) +# subset_ds = updated_ds.sel(latitude=48.583335, longitude=-94, method="nearest") +# np.testing.assert_array_equal( +# subset_ds["your_variable"].values, [190.0, 163.0, 135.0] +# ) + + +# def test_operational_kubernetes_resources( +# dataset: DwdIconEuForecastDataset, +# ) -> None: +# cron_jobs = dataset.operational_kubernetes_resources("test-image-tag") + +# assert len(cron_jobs) == 2 +# update_cron_job, validation_cron_job = cron_jobs +# assert update_cron_job.name == f"{dataset.dataset_id}-operational-update" +# assert validation_cron_job.name == f"{dataset.dataset_id}-validation" +# assert update_cron_job.secret_names == dataset.storage_config.k8s_secret_names +# assert validation_cron_job.secret_names == dataset.storage_config.k8s_secret_names + + +# def test_validators(dataset: DwdIconEuForecastDataset) -> None: +# validators = tuple(dataset.validators()) +# assert len(validators) == 2 +# assert all(isinstance(v, validation.DataValidator) for v in validators) diff --git a/tests/dwd/icon_eu/forecast/region_job_test.py b/tests/dwd/icon_eu/forecast/region_job_test.py new file mode 100644 index 00000000..0ab7d676 --- /dev/null +++ b/tests/dwd/icon_eu/forecast/region_job_test.py @@ -0,0 +1,37 @@ +# from unittest.mock import Mock + +# import pandas as pd + +# from reformatters.dwd.icon_eu.forecast.region_job import ( +# DwdIconEuForecastRegionJob, +# DwdIconEuForecastSourceFileCoord, +# ) +# from reformatters.dwd.icon_eu.forecast.template_config import DwdIconEuForecastTemplateConfig + +# def test_source_file_coord_get_url() -> None: +# coord = DwdIconEuForecastSourceFileCoord(time=pd.Timestamp("2000-01-01")) +# assert coord.get_url() == "https://example.com/data/2000-01-01.grib2" + + +# def test_region_job_generete_source_file_coords() -> None: +# template_config = DwdIconEuForecastTemplateConfig() +# template_ds = template_config.get_template(pd.Timestamp("2000-01-23")) + +# region_job = DwdIconEuForecastRegionJob( +# final_store=Mock(), +# tmp_store=Mock(), +# template_ds=template_ds, +# data_vars=[Mock(), Mock()], +# append_dim=template_config.append_dim, +# region=slice(0, 10), +# reformat_job_name="test", +# ) + +# processing_region_ds, output_region_ds = region_job._get_region_datasets() + +# source_file_coords = region_job.generate_source_file_coords( +# processing_region_ds, [Mock()] +# ) + +# assert len(source_file_coords) == ... +# assert ... diff --git a/tests/dwd/icon_eu/forecast/template_config_test.py b/tests/dwd/icon_eu/forecast/template_config_test.py new file mode 100644 index 00000000..110557d9 --- /dev/null +++ b/tests/dwd/icon_eu/forecast/template_config_test.py @@ -0,0 +1,48 @@ +# import json +# from copy import deepcopy +# from pathlib import Path + +# import pandas as pd +# import pytest + +# from reformatters.dwd.icon_eu.forecast.template_config import DwdIconEuForecastTemplateConfig + + +# def test_update_template(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: +# """ +# Ensure that `uv run main update-template` has been run and +# all changes to DwdIconEuForecastTemplateConfig are reflected in the on-disk Zarr template. +# """ +# template_config = DwdIconEuForecastTemplateConfig() +# with open(template_config.template_path() / "zarr.json") as f: +# existing_template = json.load(f) + +# test_template_path = tmp_path / "latest.zarr" +# monkeypatch.setattr( +# DwdIconEuForecastTemplateConfig, +# "template_path", +# lambda _self: test_template_path, +# ) + +# template_config.update_template() + +# with open(template_config.template_path() / "zarr.json") as f: +# updated_template = json.load(f) + +# assert existing_template == updated_template + + +# def test_get_template_spatial_ref() -> None: +# """Ensure the spatial reference system in the template matched our expectation.""" +# template_config = DwdIconEuForecastTemplateConfig() +# ds = template_config.get_template( +# template_config.append_dim_start + pd.Timedelta(days=10) +# ) +# original_attrs = deepcopy(ds.spatial_ref.attrs) + +# # TODO: Update to the CRS of your dataset. e.g. "EPSG:4269" +# expected_crs = None +# calculated_spatial_ref_attrs = ds.rio.write_crs(expected_crs).spatial_ref.attrs +# assert set(original_attrs) - set(calculated_spatial_ref_attrs) == {"comment"} +# original_attrs.pop("comment") +# assert original_attrs == calculated_spatial_ref_attrs From be3a19c54fdf06521ca5db09a23a2a6d9cf167dd Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Wed, 16 Jul 2025 17:27:52 +0100 Subject: [PATCH 02/35] Gemini CLI's first draft. And I've started updating. --- .../dwd/icon_eu/forecast/template_config.py | 775 +++++++++++------- 1 file changed, 459 insertions(+), 316 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 7c90a0d8..956a2ad9 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -28,13 +28,13 @@ class DwdIconEuInternalAttrs(BaseInternalAttrs): - """ - Variable specific attributes used internally to drive processing. + """Variable specific attributes used internally to drive processing. + Not written to the dataset. """ - # For example, - # grib_element: str + grib_element: str + grib_description: str class DwdIconEuDataVar(DataVar[DwdIconEuInternalAttrs]): @@ -44,342 +44,485 @@ class DwdIconEuDataVar(DataVar[DwdIconEuInternalAttrs]): class DwdIconEuForecastTemplateConfig(TemplateConfig[DwdIconEuDataVar]): dims: tuple[Dim, ...] = ("init_time", "lead_time", "latitude", "longitude") append_dim: AppendDim = "init_time" - append_dim_start: Timestamp = pd.Timestamp("2020-01-01T00:00") + append_dim_start: Timestamp = pd.Timestamp("2025-08-01T00:00") append_dim_frequency: Timedelta = pd.Timedelta("6h") @computed_field # type: ignore[prop-decorator] @property def dataset_attributes(self) -> DatasetAttributes: - # return DatasetAttributes( - # dataset_id="producer-model-variant", - # dataset_version="0.1.0", - # name="Producer Model Variant", - # description="Weather data from the Model operated by Producer.", - # attribution="Producer Model Variant data processed by dynamical.org from Producer Model.", - # spatial_domain="Global", - # spatial_resolution="0.25 degrees (~20km)", - # time_domain=f"Forecasts initialized {self.append_dim_start} UTC to Present", - # time_resolution=f"Forecasts initialized every {self.append_dim_frequency.total_seconds() / 3600:.0f} hours", - # forecast_domain="Forecast lead time 0-384 hours (0-16 days) ahead", - # forecast_resolution="Forecast step 0-120 hours: hourly, 123-384 hours: 3 hourly", - # ) - raise NotImplementedError("Subclasses implement `dataset_attributes`") + return DatasetAttributes( + dataset_id="dwd-icon_eu-forecast", + dataset_version="0.1.0", + name="DWD ICON-EU Forecast", + description="High-resolution weather forecasts for Europe from the ICON-EU model operated by Deutscher Wetterdienst (DWD).", + attribution="DWD ICON-EU data processed by dynamical.org from DWD.", + spatial_domain="Europe", + spatial_resolution="0.0625 degrees (~7km)", + time_domain=f"Forecasts initialized {self.append_dim_start} UTC to Present", + time_resolution=f"Forecasts initialized every {self.append_dim_frequency.total_seconds() / 3600:.0f} hours", + forecast_domain="Forecast lead time 0-120 hours (0-5 days) ahead", + forecast_resolution="Forecast step 0-78 hours: hourly, 81-120 hours: 3 hourly", + ) def dimension_coordinates(self) -> dict[str, Any]: - """ - Returns a dictionary of dimension names to coordinates for the dataset. - """ - # return { - # self.append_dim: self.append_dim_coordinates( - # self.append_dim_start + self.append_dim_frequency - # ), - # "lead_time": ( - # pd.timedelta_range("0h", "120h", freq="1h").union( - # pd.timedelta_range("123h", "384h", freq="3h") - # ) - # ), - # "latitude": np.flip(np.arange(-90, 90.25, 0.25)), - # "longitude": np.arange(-180, 180, 0.25), - # } - raise NotImplementedError("Subclasses implement `dimension_coordinates`") + """Returns a dictionary of dimension names to coordinates for the + dataset.""" + return { + self.append_dim: self.append_dim_coordinates( + self.append_dim_start + self.append_dim_frequency + ), + "lead_time": ( + pd.timedelta_range("0h", "78h", freq="1h").union( + pd.timedelta_range("81h", "120h", freq="3h") + ) + ), + # TODO: Continue checking Gemini's output from here (downwards): + "latitude": np.linspace(70.5, 29.5, 657), + "longitude": np.linspace(-23.5, 62.5, 1377), + } def derive_coordinates( self, ds: xr.Dataset ) -> dict[str, xr.DataArray | tuple[tuple[str, ...], np.ndarray[Any, Any]]]: - """ - Return a dictionary of non-dimension coordinates for the dataset. + """Return a dictionary of non-dimension coordinates for the dataset. + Called whenever len(ds.append_dim) changes. """ - # Non-dimension coordinates are additional labels for data along - # one or more dimensions. Use them to make it easier to use and - # understand your dataset. - # return { - # "valid_time": ds["init_time"] + ds["lead_time"], - # "ingested_forecast_length": ( - # (self.append_dim,), - # np.full(ds[self.append_dim].size, np.timedelta64("NaT", "ns")), - # ), - # "spatial_ref": SPATIAL_REF_COORDS, - # } - raise NotImplementedError("Subclasses implement `derive_coordinates`") + return { + "valid_time": ds["init_time"] + ds["lead_time"], + "ingested_forecast_length": ( + (self.append_dim,), + np.full(ds[self.append_dim].size, np.timedelta64("NaT", "ns")), + ), + "spatial_ref": SPATIAL_REF_COORDS, + } @computed_field # type: ignore[prop-decorator] @property def coords(self) -> Sequence[Coordinate]: """Define metadata and encoding for each coordinate.""" - # dim_coords = self.dimension_coordinates() - # append_dim_coordinate_chunk_size = self.append_dim_coordinate_chunk_size() + dim_coords = self.dimension_coordinates() + append_dim_coordinate_chunk_size = self.append_dim_coordinate_chunk_size() - # return [ - # Coordinate( - # name=self.append_dim, - # encoding=Encoding( - # dtype="int64", - # fill_value=0, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # calendar="proleptic_gregorian", - # units="seconds since 1970-01-01 00:00:00", - # chunks=append_dim_coordinate_chunk_size, - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="seconds since 1970-01-01 00:00:00", - # statistics_approximate=StatisticsApproximate( - # min=dim_coords[self.append_dim].min().isoformat(), max="Present" - # ), - # ), - # ), - # Coordinate( - # name="lead_time", - # encoding=Encoding( - # dtype="int64", - # fill_value=-1, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # units="seconds", - # chunks=len(dim_coords["lead_time"]), - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="seconds", - # statistics_approximate=StatisticsApproximate( - # min=str(dim_coords["lead_time"].min()), - # max=str(dim_coords["lead_time"].max()), - # ), - # ), - # ), - # Coordinate( - # name="latitude", - # encoding=Encoding( - # dtype="float64", - # fill_value=np.nan, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # chunks=len(dim_coords["latitude"]), - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="degrees_north", - # statistics_approximate=StatisticsApproximate( - # min=float(dim_coords["latitude"].min()), - # max=float(dim_coords["latitude"].max()), - # ), - # ), - # ), - # Coordinate( - # name="longitude", - # encoding=Encoding( - # dtype="float64", - # fill_value=np.nan, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # chunks=len(dim_coords["longitude"]), - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="degrees_east", - # statistics_approximate=StatisticsApproximate( - # min=float(dim_coords["longitude"].min()), - # max=float(dim_coords["longitude"].max()), - # ), - # ), - # ), - # Coordinate( - # name="valid_time", - # encoding=Encoding( - # dtype="int64", - # fill_value=0, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # calendar="proleptic_gregorian", - # units="seconds since 1970-01-01 00:00:00", - # chunks=( - # append_dim_coordinate_chunk_size, - # len(dim_coords["lead_time"]), - # ), - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="seconds since 1970-01-01 00:00:00", - # statistics_approximate=StatisticsApproximate( - # min=self.append_dim_start.isoformat(), - # max="Present + 16 days", - # ), - # ), - # ), - # Coordinate( - # name="ingested_forecast_length", - # encoding=Encoding( - # dtype="int64", - # fill_value=-1, - # compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], - # units="seconds", - # chunks=append_dim_coordinate_chunk_size, - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units="seconds", - # statistics_approximate=StatisticsApproximate( - # min=str(dim_coords["lead_time"].min()), - # max=str(dim_coords["lead_time"].max()), - # ), - # ), - # ), - # Coordinate( - # name="spatial_ref", - # encoding=Encoding( - # dtype="int64", - # fill_value=0, - # chunks=(), # Scalar coordinate - # shards=None, - # ), - # attrs=CoordinateAttrs( - # units=None, - # statistics_approximate=None, - # # Deterived by running `ds.rio.write_crs("+proj=longlat +a=6371229 +b=6371229 +no_defs +type=crs")["spatial_ref"].attrs - # crs_wkt='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', - # semi_major_axis=6371229.0, - # semi_minor_axis=6371229.0, - # inverse_flattening=0.0, - # reference_ellipsoid_name="unknown", - # longitude_of_prime_meridian=0.0, - # prime_meridian_name="Greenwich", - # geographic_crs_name="unknown", - # horizontal_datum_name="unknown", - # grid_mapping_name="latitude_longitude", - # spatial_ref='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', - # comment="This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", - # ), - # ), - # ] - raise NotImplementedError("Subclasses implement `coords`") + return [ + Coordinate( + name=self.append_dim, + encoding=Encoding( + dtype="int64", + fill_value=0, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + calendar="proleptic_gregorian", + units="seconds since 1970-01-01 00:00:00", + chunks=append_dim_coordinate_chunk_size, + shards=None, + ), + attrs=CoordinateAttrs( + units="seconds since 1970-01-01 00:00:00", + statistics_approximate=StatisticsApproximate( + min=dim_coords[self.append_dim].min().isoformat(), max="Present" + ), + ), + ), + Coordinate( + name="lead_time", + encoding=Encoding( + dtype="int64", + fill_value=-1, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + units="seconds", + chunks=len(dim_coords["lead_time"]), + shards=None, + ), + attrs=CoordinateAttrs( + units="seconds", + statistics_approximate=StatisticsApproximate( + min=str(dim_coords["lead_time"].min()), + max=str(dim_coords["lead_time"].max()), + ), + ), + ), + Coordinate( + name="latitude", + encoding=Encoding( + dtype="float64", + fill_value=np.nan, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + chunks=len(dim_coords["latitude"]), + shards=None, + ), + attrs=CoordinateAttrs( + units="degrees_north", + statistics_approximate=StatisticsApproximate( + min=float(dim_coords["latitude"].min()), + max=float(dim_coords["latitude"].max()), + ), + ), + ), + Coordinate( + name="longitude", + encoding=Encoding( + dtype="float64", + fill_value=np.nan, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + chunks=len(dim_coords["longitude"]), + shards=None, + ), + attrs=CoordinateAttrs( + units="degrees_east", + statistics_approximate=StatisticsApproximate( + min=float(dim_coords["longitude"].min()), + max=float(dim_coords["longitude"].max()), + ), + ), + ), + Coordinate( + name="valid_time", + encoding=Encoding( + dtype="int64", + fill_value=0, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + calendar="proleptic_gregorian", + units="seconds since 1970-01-01 00:00:00", + chunks=( + append_dim_coordinate_chunk_size, + len(dim_coords["lead_time"]), + ), + shards=None, + ), + attrs=CoordinateAttrs( + units="seconds since 1970-01-01 00:00:00", + statistics_approximate=StatisticsApproximate( + min=self.append_dim_start.isoformat(), + max="Present + 5 days", + ), + ), + ), + Coordinate( + name="ingested_forecast_length", + encoding=Encoding( + dtype="int64", + fill_value=-1, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + units="seconds", + chunks=append_dim_coordinate_chunk_size, + shards=None, + ), + attrs=CoordinateAttrs( + units="seconds", + statistics_approximate=StatisticsApproximate( + min=str(dim_coords["lead_time"].min()), + max=str(dim_coords["lead_time"].max()), + ), + ), + ), + Coordinate( + name="spatial_ref", + encoding=Encoding( + dtype="int64", + fill_value=0, + chunks=(), # Scalar coordinate + shards=None, + ), + attrs=CoordinateAttrs( + units=None, + statistics_approximate=None, + # Deterived by running `ds.rio.write_crs("+proj=longlat +a=6371229 +b=6371229 +no_defs +type=crs")["spatial_ref"].attrs + crs_wkt='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', + semi_major_axis=6371229.0, + semi_minor_axis=6371229.0, + inverse_flattening=0.0, + reference_ellipsoid_name="unknown", + longitude_of_prime_meridian=0.0, + prime_meridian_name="Greenwich", + geographic_crs_name="unknown", + horizontal_datum_name="unknown", + grid_mapping_name="latitude_longitude", + spatial_ref='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', + comment="This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", + ), + ), + ] @computed_field # type: ignore[prop-decorator] @property def data_vars(self) -> Sequence[DwdIconEuDataVar]: """Define metadata and encoding for each data variable.""" - # # Data variable chunking and sharding - # # - # # Aim for one of these roughly equivalent quantities: - # # 1-2mb chunks compressed - # # 4-8mb uncompressed - # # 4-8 million float32 values - # var_chunks: dict[Dim, int] = { - # "init_time": 1, - # "lead_time": 105, - # "latitude": 121, - # "longitude": 121, - # } - # # Aim for one of these roughly equivalent quantities: - # # 64-256MB shards compressed - # # 256-1024MB uncompressed - # # 256 million to 1 billion float32 values - # var_shards: dict[Dim, int] = { - # "init_time": 1, - # "lead_time": 105 * 2, - # "latitude": 121 * 6, - # "longitude": 121 * 6, - # } + # Data variable chunking and sharding + # + # Aim for one of these roughly equivalent quantities: + # 1-2mb chunks compressed + # 4-8mb uncompressed + # 4-8 million float32 values + var_chunks: dict[Dim, int] = { + "init_time": 1, + "lead_time": 121, + "latitude": 73, + "longitude": 153, + } + # Aim for one of these roughly equivalent quantities: + # 64-256MB shards compressed + # 256-1024MB uncompressed + # 256 million to 1 billion float32 values + var_shards: dict[Dim, int] = { + "init_time": 1, + "lead_time": 121, + "latitude": 657, + "longitude": 1377, + } - # encoding_float32_default = Encoding( - # dtype="float32", - # fill_value=np.nan, - # chunks=tuple(var_chunks[d] for d in self.dims), - # shards=tuple(var_shards[d] for d in self.dims), - # compressors=[BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE], - # ) + encoding_float32_default = Encoding( + dtype="float32", + fill_value=np.nan, + chunks=tuple(var_chunks[d] for d in self.dims), + shards=tuple(var_shards[d] for d in self.dims), + compressors=[BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE], + ) - # default_keep_mantissa_bits = 7 + default_keep_mantissa_bits = 7 - # # return [ - # DwdIconEuDataVar( - # name="temperature_2m", - # encoding=encoding_float32_default, - # attrs=DataVarAttrs( - # short_name="t2m", - # long_name="2 metre temperature", - # units="C", - # step_type="instant", - # standard_name="air_temperature", - # ), - # internal_attrs=DwdIconEuInternalAttrs( - # grib_element="TMP", - # grib_comment='2[m] HTGL="Specified height level above ground"', - # grib_index_level="2 m above ground", - # index_position=580, - # keep_mantissa_bits=default_keep_mantissa_bits, - # ), - # ), - # DwdIconEuDataVar( - # name="precipitation_surface", - # encoding=encoding_float32_default, - # attrs=DataVarAttrs( - # short_name="tp", - # long_name="Total Precipitation", - # units="mm/s", - # comment="Average precipitation rate since the previous forecast step.", - # step_type="avg", - # ), - # internal_attrs=DwdIconEuInternalAttrs( - # grib_element="APCP", - # grib_comment='0[-] SFC="Ground or water surface"', - # grib_index_level="surface", - # index_position=595, - # include_lead_time_suffix=True, - # deaccumulate_to_rate=True, - # window_reset_frequency=pd.Timedelta("6h"), - # keep_mantissa_bits=default_keep_mantissa_bits, - # ), - # ), - # DwdIconEuDataVar( - # name="pressure_surface", - # encoding=encoding_float32_default, - # attrs=DataVarAttrs( - # short_name="sp", - # long_name="Surface pressure", - # units="Pa", - # step_type="instant", - # standard_name="surface_air_pressure", - # ), - # internal_attrs=DwdIconEuInternalAttrs( - # grib_element="PRES", - # grib_comment='0[-] SFC="Ground or water surface"', - # grib_index_level="surface", - # index_position=560, - # keep_mantissa_bits=10, - # ), - # ), - # DwdIconEuDataVar( - # name="categorical_snow_surface", - # encoding=encoding_float32_default, - # attrs=DataVarAttrs( - # short_name="csnow", - # long_name="Categorical snow", - # units="0=no; 1=yes", - # step_type="avg", - # ), - # internal_attrs=DwdIconEuInternalAttrs( - # grib_element="CSNOW", - # grib_comment='0[-] SFC="Ground or water surface"', - # grib_index_level="surface", - # index_position=604, - # window_reset_frequency=pd.Timedelta("6h"), - # keep_mantissa_bits="no-rounding", - # ), - # ), - # DwdIconEuDataVar( - # name="total_cloud_cover_atmosphere", - # encoding=encoding_float32_default, - # attrs=DataVarAttrs( - # short_name="tcc", - # long_name="Total Cloud Cover", - # units="%", - # step_type="avg", - # ), - # internal_attrs=DwdIconEuInternalAttrs( - # grib_element="TCDC", - # grib_comment='0[-] EATM="Entire Atmosphere"', - # grib_index_level="entire atmosphere", - # index_position=635, - # window_reset_frequency=pd.Timedelta("6h"), - # keep_mantissa_bits=default_keep_mantissa_bits, - # ), - # ), - # ] - raise NotImplementedError("Subclasses implement `data_vars`") + return [ + DwdIconEuDataVar( + name="alb_rad", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Surface albedo", + units="%", + step_type="avg", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="ALB_RAD", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="aswdifd_s", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Surface downward diffuse short-wave radiation", + units="W m**-2", + step_type="avg", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="ASWDIFD_S", + deaccumulate_to_rate=True, + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="aswdir_s", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Surface downward direct short-wave radiation", + units="W m**-2", + step_type="avg", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="ASWDIR_S", + deaccumulate_to_rate=True, + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="cape_con", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Convective Available Potential Energy", + units="J kg**-1", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="CAPE_CON", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="clch", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="High cloud cover", + units="%", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="CLCH", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="clcl", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Low cloud cover", + units="%", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="CLCL", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="clcm", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Medium cloud cover", + units="%", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="CLCM", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="clct", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Total Cloud Cover", + units="%", + step_type="avg", + standard_name="cloud_area_fraction", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="CLCT", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="h_snow", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Snow depth", + units="m", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="H_SNOW", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="pmsl", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Mean sea level pressure", + units="Pa", + step_type="instant", + standard_name="air_pressure_at_mean_sea_level", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="PMSL", + keep_mantissa_bits=10, + ), + ), + DwdIconEuDataVar( + name="relhum_2m", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="2m Relative Humidity", + units="%", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="RELHUM_2M", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="runoff_g", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Grid-scale runoff", + units="kg m**-2", + step_type="accum", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="RUNOFF_G", + deaccumulate_to_rate=True, + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="t_2m", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="2 metre temperature", + units="C", + step_type="instant", + standard_name="air_temperature", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="T_2M", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="tot_prec", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Total Precipitation", + units="mm/s", + comment="Average precipitation rate since the previous forecast step.", + step_type="avg", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="TOT_PREC", + deaccumulate_to_rate=True, + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="u_10m", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="10 metre U wind component", + units="m s**-1", + step_type="instant", + standard_name="eastward_wind", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="U_10M", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="v_10m", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="10 metre V wind component", + units="m s**-1", + step_type="instant", + standard_name="northward_wind", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="V_10M", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="vmax_10m", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="10m Wind Gust Speed", + units="m s**-1", + step_type="max", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="VMAX_10M", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + DwdIconEuDataVar( + name="w_snow", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + long_name="Water equivalent of snow depth", + units="kg m**-2", + step_type="instant", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="W_SNOW", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), + ] From 063bf2458f76f0da92cb85be591f7d8b6d55a2f2 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 21 Jul 2025 10:57:10 +0100 Subject: [PATCH 03/35] All that's left to do in template_config is check the data_vars --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 956a2ad9..2b4ee299 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -76,8 +76,7 @@ def dimension_coordinates(self) -> dict[str, Any]: pd.timedelta_range("81h", "120h", freq="3h") ) ), - # TODO: Continue checking Gemini's output from here (downwards): - "latitude": np.linspace(70.5, 29.5, 657), + "latitude": np.linspace(29.5, 70.5, 657), "longitude": np.linspace(-23.5, 62.5, 1377), } From 12baf81489f7d28d797623cfcea1bcb82ed0e666 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 21 Jul 2025 19:55:14 +0100 Subject: [PATCH 04/35] First complete draft of ICON-EU template_config.py --- .../dwd/icon_eu/forecast/template_config.py | 170 +++++++++--------- 1 file changed, 84 insertions(+), 86 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 2b4ee299..2898c899 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -34,7 +34,6 @@ class DwdIconEuInternalAttrs(BaseInternalAttrs): """ grib_element: str - grib_description: str class DwdIconEuDataVar(DataVar[DwdIconEuInternalAttrs]): @@ -44,7 +43,9 @@ class DwdIconEuDataVar(DataVar[DwdIconEuInternalAttrs]): class DwdIconEuForecastTemplateConfig(TemplateConfig[DwdIconEuDataVar]): dims: tuple[Dim, ...] = ("init_time", "lead_time", "latitude", "longitude") append_dim: AppendDim = "init_time" - append_dim_start: Timestamp = pd.Timestamp("2025-08-01T00:00") + append_dim_start: Timestamp = pd.Timestamp( + "2020-01-01T00:00" # The start of OCF's ICON-EU archive on Hugging Face. + ) append_dim_frequency: Timedelta = pd.Timedelta("6h") @computed_field # type: ignore[prop-decorator] @@ -71,7 +72,7 @@ def dimension_coordinates(self) -> dict[str, Any]: self.append_dim: self.append_dim_coordinates( self.append_dim_start + self.append_dim_frequency ), - "lead_time": ( + "lead_time": ( # Called "step" in the ICON-EU GRIB files. pd.timedelta_range("0h", "78h", freq="1h").union( pd.timedelta_range("81h", "120h", freq="3h") ) @@ -254,7 +255,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: # 4-8 million float32 values var_chunks: dict[Dim, int] = { "init_time": 1, - "lead_time": 121, + "lead_time": 120, "latitude": 73, "longitude": 153, } @@ -264,7 +265,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: # 256 million to 1 billion float32 values var_shards: dict[Dim, int] = { "init_time": 1, - "lead_time": 121, + "lead_time": 120, "latitude": 657, "longitude": 1377, } @@ -281,246 +282,243 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: return [ DwdIconEuDataVar( - name="alb_rad", + name="downward_diffuse_short_wave_radiation_flux_surface", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Surface albedo", - units="%", + short_name="msdfswrf", # From ECMWF parameter database. + long_name="Downward diffusive short wave radiation flux at surface (mean over forecast time)", + units="W m-2", step_type="avg", + standard_name="Mean surface diffuse short-wave radiation flux", # From ECMWF. ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="ALB_RAD", + grib_element="aswdifd_s", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="aswdifd_s", + name="downward_direct_short_wave_radiation_flux_surface", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Surface downward diffuse short-wave radiation", - units="W m**-2", + short_name="avg_sdirswrf", # From ECMWF param DB. + long_name="Downward direct short wave radiation flux at surface (mean over forecast time)", + units="W m-2", step_type="avg", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="ASWDIFD_S", - deaccumulate_to_rate=True, + grib_element="aswdir_s", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="aswdir_s", + name="convective_available_potential_energy", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Surface downward direct short-wave radiation", - units="W m**-2", - step_type="avg", - ), - internal_attrs=DwdIconEuInternalAttrs( - grib_element="ASWDIR_S", - deaccumulate_to_rate=True, - keep_mantissa_bits=default_keep_mantissa_bits, - ), - ), - DwdIconEuDataVar( - name="cape_con", - encoding=encoding_float32_default, - attrs=DataVarAttrs( - long_name="Convective Available Potential Energy", - units="J kg**-1", + short_name="cape", + long_name="Convective available potential energy", + units="J kg-1", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="CAPE_CON", + grib_element="cape_con", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="clch", + name="high_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="High cloud cover", + short_name="hcc", + long_name="Cloud Cover (0 - 400 hPa)", units="%", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="CLCH", + grib_element="clch", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="clcl", + name="low_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Low cloud cover", + short_name="lcc", + long_name="Cloud Cover (800 hPa - Soil)", units="%", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="CLCL", + grib_element="clcl", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="clcm", + name="medium_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Medium cloud cover", + short_name="mcc", + long_name="Cloud Cover (400 - 800 hPa)", units="%", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="CLCM", + grib_element="clcm", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="clct", + name="total_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( + short_name="tcc", long_name="Total Cloud Cover", units="%", - step_type="avg", - standard_name="cloud_area_fraction", + step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="CLCT", + grib_element="clct", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="h_snow", + name="snow_depth", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Snow depth", + short_name="sde", + long_name="lwe_thickness_of_surface_snow_amount", units="m", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="H_SNOW", + grib_element="h_snow", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="pmsl", + name="pressure_reduced_to_msl", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Mean sea level pressure", + short_name="pmsl", + long_name="Pressure reduced to mean sea level (MSL)", units="Pa", step_type="instant", - standard_name="air_pressure_at_mean_sea_level", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="PMSL", - keep_mantissa_bits=10, + grib_element="pmsl", + keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="relhum_2m", + name="relative_humidity", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="2m Relative Humidity", + short_name="r", + long_name="2 metre relative humidity", units="%", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="RELHUM_2M", + grib_element="relhum_2m", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="runoff_g", + name="water_runoff", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Grid-scale runoff", - units="kg m**-2", + short_name="watr", + long_name="Water Runoff", + units="kg m-2", step_type="accum", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="RUNOFF_G", - deaccumulate_to_rate=True, + grib_element="runoff_g", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="t_2m", + name="temperature_2m", encoding=encoding_float32_default, attrs=DataVarAttrs( + short_name="t2m", # ECMWF calls this "2t". NOAA & DWD use "t2m". long_name="2 metre temperature", - units="C", + units="K", step_type="instant", - standard_name="air_temperature", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="T_2M", + grib_element="t_2m", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="tot_prec", + name="total_precipitation", encoding=encoding_float32_default, attrs=DataVarAttrs( + short_name="tp", long_name="Total Precipitation", - units="mm/s", - comment="Average precipitation rate since the previous forecast step.", - step_type="avg", + units="kg m**-2", + step_type="accum", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="TOT_PREC", - deaccumulate_to_rate=True, + grib_element="tot_prec", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="u_10m", + name="wind_u_10", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="10 metre U wind component", - units="m s**-1", + short_name="u10", + long_name="10 metre U wind component (eastward)", + units="m/s", step_type="instant", standard_name="eastward_wind", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="U_10M", + grib_element="u_10m", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="v_10m", + name="wind_v_10", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="10 metre V wind component", - units="m s**-1", + short_name="v10", + long_name="10 metre V wind component (northward)", + units="m/s", step_type="instant", standard_name="northward_wind", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="V_10M", + grib_element="v_10m", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="vmax_10m", + name="maximum_wind_10m", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="10m Wind Gust Speed", - units="m s**-1", + short_name="i10fg", + long_name="Time-maximum instantaneous 10 metre wind gust", + units="m/s", step_type="max", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="VMAX_10M", + grib_element="vmax_10m", keep_mantissa_bits=default_keep_mantissa_bits, ), ), DwdIconEuDataVar( - name="w_snow", + name="snow_depth_water_equivalent", encoding=encoding_float32_default, attrs=DataVarAttrs( - long_name="Water equivalent of snow depth", + short_name="sd", + long_name="Snow depth water equivalent", units="kg m**-2", step_type="instant", ), internal_attrs=DwdIconEuInternalAttrs( - grib_element="W_SNOW", + grib_element="w_snow", keep_mantissa_bits=default_keep_mantissa_bits, ), ), From bc3f3a3f89adf03c953bad3044f629febb8ba413 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 22 Jul 2025 17:04:05 +0100 Subject: [PATCH 05/35] Add fields for all DataVarAttrs from ICON PDF doc. --- .../dwd/icon_eu/forecast/template_config.py | 49 ++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 2898c899..3a50674d 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -290,6 +290,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: units="W m-2", step_type="avg", standard_name="Mean surface diffuse short-wave radiation flux", # From ECMWF. + comment="Downward solar diffuse radiation flux at the surface, averaged over forecast time.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="aswdifd_s", @@ -304,6 +305,12 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Downward direct short wave radiation flux at surface (mean over forecast time)", units="W m-2", step_type="avg", + comment=( + "Downward solar direct radiation flux at the surface, averaged over forecast time." + " This quantity is not directly provided by the radiation scheme." + " It is aposteriori diagnosed from the definition of the surface net" + " shortwave radiation flux." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="aswdir_s", @@ -314,10 +321,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="convective_available_potential_energy", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="cape", + short_name="cape_con", long_name="Convective available potential energy", units="J kg-1", step_type="instant", + comment="Convective available potential energy", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="cape_con", @@ -329,9 +337,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="hcc", - long_name="Cloud Cover (0 - 400 hPa)", + long_name="High level clouds", units="%", step_type="instant", + comment="Cloud Cover (0 - 400 hPa)", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clch", @@ -343,9 +352,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="lcc", - long_name="Cloud Cover (800 hPa - Soil)", + long_name="Low level clouds", units="%", step_type="instant", + comment="Cloud Cover (800 hPa - Soil)", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clcl", @@ -357,9 +367,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="mcc", - long_name="Cloud Cover (400 - 800 hPa)", + long_name="Mid level clouds", units="%", step_type="instant", + comment="Cloud Cover (400 - 800 hPa)", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clcm", @@ -374,6 +385,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Total Cloud Cover", units="%", step_type="instant", + comment="Total cloud cover", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clct", @@ -388,6 +400,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="lwe_thickness_of_surface_snow_amount", units="m", step_type="instant", + comment="Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="h_snow", @@ -402,6 +415,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Pressure reduced to mean sea level (MSL)", units="Pa", step_type="instant", + comment="Surface pressure reduced to MSL", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="pmsl", @@ -416,6 +430,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="2 metre relative humidity", units="%", step_type="instant", + comment="Relative humidity at 2m above ground", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="relhum_2m", @@ -423,19 +438,35 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="water_runoff", + name="soil_water_runoff", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="watr", - long_name="Water Runoff", + long_name="Soil water runoff", units="kg m-2", step_type="accum", + comment="Soil water runoff (accumulated since model start)", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="runoff_g", keep_mantissa_bits=default_keep_mantissa_bits, ), ), + DwdIconEuDataVar( + name="surface_water_runoff", + encoding=encoding_float32_default, + attrs=DataVarAttrs( + short_name="watr", + long_name="Surface water Runoff", + units="kg m-2", + step_type="accum", + comment="Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", + ), + internal_attrs=DwdIconEuInternalAttrs( + grib_element="runoff_s", + keep_mantissa_bits=default_keep_mantissa_bits, + ), + ), DwdIconEuDataVar( name="temperature_2m", encoding=encoding_float32_default, @@ -444,6 +475,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="2 metre temperature", units="K", step_type="instant", + comment="Temperature at 2m above ground. The average over all tiles of a grid point.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="t_2m", @@ -458,6 +490,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Total Precipitation", units="kg m**-2", step_type="accum", + comment="Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="tot_prec", @@ -473,6 +506,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: units="m/s", step_type="instant", standard_name="eastward_wind", + comment="Zonal wind at 10m above ground", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="u_10m", @@ -488,6 +522,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: units="m/s", step_type="instant", standard_name="northward_wind", + comment="Meridional wind at 10m above ground", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="v_10m", @@ -502,6 +537,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Time-maximum instantaneous 10 metre wind gust", units="m/s", step_type="max", + comment="Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="vmax_10m", @@ -516,6 +552,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Snow depth water equivalent", units="kg m**-2", step_type="instant", + comment="Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="w_snow", From b26950b1de861146ad3d1c8405c43fb7e3596df3 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 22 Jul 2025 17:19:42 +0100 Subject: [PATCH 06/35] Reformat the comment strings so they are all under 100 chars long. --- .../dwd/icon_eu/forecast/template_config.py | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 3a50674d..84ee6282 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -281,6 +281,8 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: default_keep_mantissa_bits = 7 return [ + # The `comment` text is taken from the DWD Database Reference PDF: + # https://www.dwd.de/DWD/forschung/nwv/fepub/icon_database_main.pdf DwdIconEuDataVar( name="downward_diffuse_short_wave_radiation_flux_surface", encoding=encoding_float32_default, @@ -400,7 +402,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="lwe_thickness_of_surface_snow_amount", units="m", step_type="instant", - comment="Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", + comment=( + "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to" + " H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="h_snow", @@ -460,7 +465,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Surface water Runoff", units="kg m-2", step_type="accum", - comment="Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", + comment=( + "Surface water runoff from interception and snow reservoir and from" + " limited infiltration rate. Sum over forecast." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="runoff_s", @@ -475,7 +483,9 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="2 metre temperature", units="K", step_type="instant", - comment="Temperature at 2m above ground. The average over all tiles of a grid point.", + comment=( + "Temperature at 2m above ground, averaged over all tiles of a grid point." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="t_2m", @@ -490,7 +500,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Total Precipitation", units="kg m**-2", step_type="accum", - comment="Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", + comment=( + "Total precipitation accumulated since model start." + " TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="tot_prec", @@ -537,7 +550,13 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Time-maximum instantaneous 10 metre wind gust", units="m/s", step_type="max", - comment="Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", + comment=( + "Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence" + " state in the atmospheric boundary layer, including a potential" + " enhancement by the SSO parameterization over mountainous terrain." + " In the presence of deep convection, it contains an additional" + " contribution due to convective gusts." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="vmax_10m", @@ -552,7 +571,10 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: long_name="Snow depth water equivalent", units="kg m**-2", step_type="instant", - comment="Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", + comment=( + "Snow depth water equivalent in kg/m2." + " Set to 0 above water surfaces and snow-free land points." + ), ), internal_attrs=DwdIconEuInternalAttrs( grib_element="w_snow", From dce7afe5fc84e34c67ff23a57203dce6d5a40701 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 22 Jul 2025 17:25:02 +0100 Subject: [PATCH 07/35] Add a note about alb_rad not being included --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 84ee6282..3b1a616a 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -283,6 +283,12 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: return [ # The `comment` text is taken from the DWD Database Reference PDF: # https://www.dwd.de/DWD/forschung/nwv/fepub/icon_database_main.pdf + # + # We don't include `alb_rad` (shortwave broadband albedo for + # diffuse radiation) in the Zarr because, to quote the DWD + # Database Reference: "Values over snow-free land points are based + # on a monthly mean MODIS climatology." It's much more data-efficient + # to just download those monthly means from DWD. DwdIconEuDataVar( name="downward_diffuse_short_wave_radiation_flux_surface", encoding=encoding_float32_default, From 159b89d74976dc755194f5460802d5393d893ddd Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 22 Jul 2025 19:19:12 +0100 Subject: [PATCH 08/35] Implemented first test for template_config. Add latest.zarr --- .../zarr.json | 88 + .../zarr.json | 89 + .../zarr.json | 88 + .../latest.zarr/high_cloud_cover/zarr.json | 88 + .../latest.zarr/ingested_forecast_length/c/0 | Bin 0 -> 80 bytes .../ingested_forecast_length/zarr.json | 52 + .../templates/latest.zarr/init_time/c/0 | Bin 0 -> 79 bytes .../templates/latest.zarr/init_time/zarr.json | 53 + .../templates/latest.zarr/latitude/c/0 | Bin 0 -> 632 bytes .../templates/latest.zarr/latitude/zarr.json | 53 + .../templates/latest.zarr/lead_time/c/0 | Bin 0 -> 260 bytes .../templates/latest.zarr/lead_time/zarr.json | 52 + .../templates/latest.zarr/longitude/c/0 | Bin 0 -> 1324 bytes .../templates/latest.zarr/longitude/zarr.json | 53 + .../latest.zarr/low_cloud_cover/zarr.json | 88 + .../latest.zarr/maximum_wind_10m/zarr.json | 88 + .../latest.zarr/medium_cloud_cover/zarr.json | 88 + .../pressure_reduced_to_msl/zarr.json | 88 + .../latest.zarr/relative_humidity/zarr.json | 88 + .../latest.zarr/snow_depth/zarr.json | 88 + .../snow_depth_water_equivalent/zarr.json | 88 + .../latest.zarr/soil_water_runoff/zarr.json | 88 + .../latest.zarr/spatial_ref/zarr.json | 49 + .../surface_water_runoff/zarr.json | 88 + .../latest.zarr/temperature_2m/zarr.json | 88 + .../latest.zarr/total_cloud_cover/zarr.json | 88 + .../latest.zarr/total_precipitation/zarr.json | 88 + .../templates/latest.zarr/valid_time/c/0/0 | Bin 0 -> 5220 bytes .../latest.zarr/valid_time/zarr.json | 56 + .../templates/latest.zarr/wind_u_10/zarr.json | 89 + .../templates/latest.zarr/wind_v_10/zarr.json | 89 + .../forecast/templates/latest.zarr/zarr.json | 1978 +++++++++++++++++ .../icon_eu/forecast/template_config_test.py | 48 +- 33 files changed, 3958 insertions(+), 23 deletions(-) create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/latitude/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/latitude/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/lead_time/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/lead_time/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/c/0/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json new file mode 100644 index 00000000..a2aa9338 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Convective available potential energy", + "short_name": "cape_con", + "units": "J kg-1", + "comment": "Convective available potential energy", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json new file mode 100644 index 00000000..dd1116d4 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json @@ -0,0 +1,89 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", + "short_name": "msdfswrf", + "standard_name": "Mean surface diffuse short-wave radiation flux", + "units": "W m-2", + "comment": "Downward solar diffuse radiation flux at the surface, averaged over forecast time.", + "step_type": "avg", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json new file mode 100644 index 00000000..1c4e11db --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", + "short_name": "avg_sdirswrf", + "units": "W m-2", + "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", + "step_type": "avg", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json new file mode 100644 index 00000000..5ea7a1db --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "High level clouds", + "short_name": "hcc", + "units": "%", + "comment": "Cloud Cover (0 - 400 hPa)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/c/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/c/0 new file mode 100644 index 0000000000000000000000000000000000000000..85d71c5a90ad594544454d42c929d70ce8a4b441 GIT binary patch literal 80 zcmZQ#oXFTPfti7Ufr%jiNJ;>)84ybWvBp;YzY9PTr3?%T3=9oG)|dYZ$&3k%Vldf; V35*P(3=9Gc42%rb6D$uf0RRcK4>)DG*BnvBp;YzY9PTxj;6|C`Vuyj6%{*{W8f1m8=H2TrWmcTM#En@78J z<31nFUH8)WsQBL%w=T(v@77x=QPE}iVai(Vy39h~{I(>{cZCc^48;tb?%x&~@7s1s zQ@Li{#-&15+&+)><6feRa$yza#438O=KjxXH-b22gE<<1?)$tJWa_dDjGt?tM<=Hr znAN+#VyU{!&GvIYZh0;5`LOfYG_Mynmw$AJR~?#I`=V$4+LBWo^RshSw)8P$6ZOL? z%7!*goK!-h4N}^a77?^Bw`+13m2zxlEXH4hSt^ zQek-UC?do_TS}|0i?!8$;fxomLP9kxN=fY&4lnp`GQTUZyulmFeAdA51ouUzGZhl+ z9Fz~R=`daAJUPkbaj@X`R`5bq%UFbe@d8Bix+HE=_KQIKDtZLoX5iO`qe zrSP)=!Vu6P-Z1Py`cMc0U>0yBfHIIkpiZz{z-rKV;D+#&0HYAGAiprpK-*C1VE1qZ z1Ar5dA)qg?J-|!QS>R{zbpV7Ak|3Zku0Xs{%3#=V=798&0tBECupYoH&^q8q5OYAC zaL1tW&>IB+Mlf(-nvlf6?(i4}AVW}XfR_dU00000T&NHbe(;3@5E_0}u(noJ+x^N` Kb&DmcND>6PM`?oq literal 0 HcmV?d00001 diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/lead_time/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/lead_time/zarr.json new file mode 100644 index 00000000..3bc64fe5 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/lead_time/zarr.json @@ -0,0 +1,52 @@ +{ + "shape": [ + 93 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 93 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "0 days 00:00:00", + "max": "5 days 00:00:00" + }, + "units": "seconds" + }, + "dimension_names": [ + "lead_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/c/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/c/0 new file mode 100644 index 0000000000000000000000000000000000000000..b342b310d6c6df1c8addcc5174d4c300b0c77fb0 GIT binary patch literal 1324 zcmZQ#oXE(b&AIRK0 zErXc>8dq2tA!s54lp$xFSSc#kzS<;>FFdqDX5FP3mND7hjIS8j8GkVBo4B6wKJz|~ z8sQ&WA7USzKCr#^?>6%vx#b^j|9h}q_E3Ix%l$IW{cnZqZZExG#Y9 z1L{%+G5x_zRS+{D%v1+4`@dL_4}U4 zx4lbi-*f)HC-iNv@|$Ys8{g|TynmZ;fA_(C-=oqu>`RC}Sk+X@@|yh`?;7{ltk*c# z@JEPmkV}v~uCpnagcL7(wH6XO@-)%6qS z-Ds4#Z_|6)#3wi>Dbv-`EW{C;xCIVza~$G(r=FWG_^WuWq`q|Y%Nogr_Sa{9T(uD{ zC;DTEbf?6;^ zZr>%EW*fICDM_zV_%-wDeh>dQ4t@Faj-^U11BowE#4fIcLwu0}4)r+17s=yLk4yYa zn^s3%+E@Kc!qyWW&t6_Qvn{jBS7+^tNq{#jo}xS^q; z?SG2O$-Ocq({4=obMoZ!EMD$uGS^p~o9f{Fc=;>eE}Jd2J16X@o+YvC&}mgC<_Qhj z&$fj2U*J6S%60aqlv%q~-tV8n9kj3Or`EG@!}!!|pHE#>>Yli%@_EUMLn$!_3@qf% zv85@!ykMZg*T4CF{P(>t`gB)`U!I;IA;W*3&1hmthJ=f#+S{KgD=)fq?yR$nN-@+7 qEQ+397<;T#@_K5_^pCrGUK{<>{Ujh1z@_@MCA9MYE;q&traS=Gb(Eq2 literal 0 HcmV?d00001 diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/zarr.json new file mode 100644 index 00000000..e37dfced --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/longitude/zarr.json @@ -0,0 +1,53 @@ +{ + "shape": [ + 1377 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "units": "degrees_east", + "statistics_approximate": { + "min": -23.5, + "max": 62.5 + }, + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json new file mode 100644 index 00000000..cfd8ec22 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Low level clouds", + "short_name": "lcc", + "units": "%", + "comment": "Cloud Cover (800 hPa - Soil)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json new file mode 100644 index 00000000..9cb3accf --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Time-maximum instantaneous 10 metre wind gust", + "short_name": "i10fg", + "units": "m/s", + "comment": "Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", + "step_type": "max", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json new file mode 100644 index 00000000..e404e227 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Mid level clouds", + "short_name": "mcc", + "units": "%", + "comment": "Cloud Cover (400 - 800 hPa)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json new file mode 100644 index 00000000..356a171e --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Pressure reduced to mean sea level (MSL)", + "short_name": "pmsl", + "units": "Pa", + "comment": "Surface pressure reduced to MSL", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json new file mode 100644 index 00000000..c07dd4c2 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "2 metre relative humidity", + "short_name": "r", + "units": "%", + "comment": "Relative humidity at 2m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json new file mode 100644 index 00000000..0ffdb29b --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "lwe_thickness_of_surface_snow_amount", + "short_name": "sde", + "units": "m", + "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json new file mode 100644 index 00000000..64d7afc5 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Snow depth water equivalent", + "short_name": "sd", + "units": "kg m**-2", + "comment": "Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json new file mode 100644 index 00000000..e4dd3558 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Soil water runoff", + "short_name": "watr", + "units": "kg m-2", + "comment": "Soil water runoff (accumulated since model start)", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json new file mode 100644 index 00000000..46717abc --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json @@ -0,0 +1,49 @@ +{ + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "comment": "This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", + "crs_wkt": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]", + "semi_major_axis": 6371229.0, + "semi_minor_axis": 6371229.0, + "inverse_flattening": 0.0, + "reference_ellipsoid_name": "unknown", + "longitude_of_prime_meridian": 0.0, + "prime_meridian_name": "Greenwich", + "geographic_crs_name": "unknown", + "horizontal_datum_name": "unknown", + "grid_mapping_name": "latitude_longitude", + "spatial_ref": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json new file mode 100644 index 00000000..4870a60a --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Surface water Runoff", + "short_name": "watr", + "units": "kg m-2", + "comment": "Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json new file mode 100644 index 00000000..ddded0a5 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "2 metre temperature", + "short_name": "t2m", + "units": "K", + "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json new file mode 100644 index 00000000..de70ef04 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Total Cloud Cover", + "short_name": "tcc", + "units": "%", + "comment": "Total cloud cover", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json new file mode 100644 index 00000000..ec05e50d --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json @@ -0,0 +1,88 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Total Precipitation", + "short_name": "tp", + "units": "kg m**-2", + "comment": "Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/c/0/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..89875d76d03e2d3d5167f45ec2f4d4593b502578 GIT binary patch literal 5220 zcmeI$|7%Tg9LMqZy=I$zJ-cQxOP3a-wcUhRtQ%Sjt!r&-G~6^_TIe=2W84m{g~YWM z5;|e)hSoJ-Lc?i^wS2iPW{uYMC84oPYi2grQ=emXxqemUo-&-?v;eLm-W%%&qI z)xFdtVwTcG5wBU~%qWo-bRvkTFp*SbqZs8lh(_q>MnB?4i=<&8N>B+8&f_k6FbGSy zNIG)h#7@+s32k_VVI)O}*suhpaN{_d@c?fjks`^Mg*9J2v^SFy148k&&{v!uY>_k19 z(1uqSMiTRA!x9Y8>a?qwBigS)EsT(g706*L^{)tQ2yT-arOv9^n(Qr(c4wFCt=jJ~$7_mjlAi9*3giZhWSz3r zy2p0de#&vddBb&Ied>9wefEX)Sj9gvFkLCIyIo$b!#^z35^T$z)t(0Zaqycp)#Aug zt5$if=G`9y&n&qw`$D16_HiS(b~G2Kq<19ls7uc1JCdLD&&pY^h)KSO=ILhRjZp(* z4vZ`q`+>118s~x(8Z|I#@ayZ)m;<8*#vJ_qQ!=t(WWmUSk%d1uC;wqTP~E1#nX7hl GPw^Aay}hmg literal 0 HcmV?d00001 diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json new file mode 100644 index 00000000..e73a7866 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json @@ -0,0 +1,56 @@ +{ + "shape": [ + 1, + 93 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 29200, + 93 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "2020-01-01T00:00:00", + "max": "Present + 5 days" + }, + "units": "seconds since 1970-01-01", + "calendar": "proleptic_gregorian" + }, + "dimension_names": [ + "init_time", + "lead_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json new file mode 100644 index 00000000..64c8303d --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json @@ -0,0 +1,89 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "10 metre U wind component (eastward)", + "short_name": "u10", + "standard_name": "eastward_wind", + "units": "m/s", + "comment": "Zonal wind at 10m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json new file mode 100644 index 00000000..e2251ed6 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json @@ -0,0 +1,89 @@ +{ + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "10 metre V wind component (northward)", + "short_name": "v10", + "standard_name": "northward_wind", + "units": "m/s", + "comment": "Meridional wind at 10m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json new file mode 100644 index 00000000..4a840055 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json @@ -0,0 +1,1978 @@ +{ + "attributes": { + "dataset_id": "dwd-icon_eu-forecast", + "dataset_version": "0.1.0", + "name": "DWD ICON-EU Forecast", + "description": "High-resolution weather forecasts for Europe from the ICON-EU model operated by Deutscher Wetterdienst (DWD).", + "attribution": "DWD ICON-EU data processed by dynamical.org from DWD.", + "spatial_domain": "Europe", + "spatial_resolution": "0.0625 degrees (~7km)", + "time_domain": "Forecasts initialized 2020-01-01 00:00:00 UTC to Present", + "time_resolution": "Forecasts initialized every 6 hours", + "forecast_domain": "Forecast lead time 0-120 hours (0-5 days) ahead", + "forecast_resolution": "Forecast step 0-78 hours: hourly, 81-120 hours: 3 hourly" + }, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "convective_available_potential_energy": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Convective available potential energy", + "short_name": "cape_con", + "units": "J kg-1", + "comment": "Convective available potential energy", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "downward_diffuse_short_wave_radiation_flux_surface": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", + "short_name": "msdfswrf", + "standard_name": "Mean surface diffuse short-wave radiation flux", + "units": "W m-2", + "comment": "Downward solar diffuse radiation flux at the surface, averaged over forecast time.", + "step_type": "avg", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "downward_direct_short_wave_radiation_flux_surface": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", + "short_name": "avg_sdirswrf", + "units": "W m-2", + "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", + "step_type": "avg", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "high_cloud_cover": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "High level clouds", + "short_name": "hcc", + "units": "%", + "comment": "Cloud Cover (0 - 400 hPa)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "ingested_forecast_length": { + "shape": [ + 1 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 29200 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "0 days 00:00:00", + "max": "5 days 00:00:00" + }, + "units": "seconds" + }, + "dimension_names": [ + "init_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "init_time": { + "shape": [ + 1 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 29200 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "2020-01-01T00:00:00", + "max": "Present" + }, + "units": "seconds since 1970-01-01", + "calendar": "proleptic_gregorian" + }, + "dimension_names": [ + "init_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "latitude": { + "shape": [ + 657 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 657 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "units": "degrees_north", + "statistics_approximate": { + "min": 29.5, + "max": 70.5 + }, + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "latitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lead_time": { + "shape": [ + 93 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 93 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "0 days 00:00:00", + "max": "5 days 00:00:00" + }, + "units": "seconds" + }, + "dimension_names": [ + "lead_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "longitude": { + "shape": [ + 1377 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "units": "degrees_east", + "statistics_approximate": { + "min": -23.5, + "max": 62.5 + }, + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "low_cloud_cover": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Low level clouds", + "short_name": "lcc", + "units": "%", + "comment": "Cloud Cover (800 hPa - Soil)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "maximum_wind_10m": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Time-maximum instantaneous 10 metre wind gust", + "short_name": "i10fg", + "units": "m/s", + "comment": "Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", + "step_type": "max", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "medium_cloud_cover": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Mid level clouds", + "short_name": "mcc", + "units": "%", + "comment": "Cloud Cover (400 - 800 hPa)", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "pressure_reduced_to_msl": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Pressure reduced to mean sea level (MSL)", + "short_name": "pmsl", + "units": "Pa", + "comment": "Surface pressure reduced to MSL", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "relative_humidity": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "2 metre relative humidity", + "short_name": "r", + "units": "%", + "comment": "Relative humidity at 2m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "snow_depth": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "lwe_thickness_of_surface_snow_amount", + "short_name": "sde", + "units": "m", + "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "snow_depth_water_equivalent": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Snow depth water equivalent", + "short_name": "sd", + "units": "kg m**-2", + "comment": "Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "soil_water_runoff": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Soil water runoff", + "short_name": "watr", + "units": "kg m-2", + "comment": "Soil water runoff (accumulated since model start)", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "spatial_ref": { + "shape": [], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "comment": "This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", + "crs_wkt": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]", + "semi_major_axis": 6371229.0, + "semi_minor_axis": 6371229.0, + "inverse_flattening": 0.0, + "reference_ellipsoid_name": "unknown", + "longitude_of_prime_meridian": 0.0, + "prime_meridian_name": "Greenwich", + "geographic_crs_name": "unknown", + "horizontal_datum_name": "unknown", + "grid_mapping_name": "latitude_longitude", + "spatial_ref": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "surface_water_runoff": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Surface water Runoff", + "short_name": "watr", + "units": "kg m-2", + "comment": "Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "temperature_2m": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "2 metre temperature", + "short_name": "t2m", + "units": "K", + "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point.", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "total_cloud_cover": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Total Cloud Cover", + "short_name": "tcc", + "units": "%", + "comment": "Total cloud cover", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "total_precipitation": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Total Precipitation", + "short_name": "tp", + "units": "kg m**-2", + "comment": "Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", + "step_type": "accum", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "valid_time": { + "shape": [ + 1, + 93 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 29200, + 93 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "2020-01-01T00:00:00", + "max": "Present + 5 days" + }, + "units": "seconds since 1970-01-01", + "calendar": "proleptic_gregorian" + }, + "dimension_names": [ + "init_time", + "lead_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "wind_u_10": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "10 metre U wind component (eastward)", + "short_name": "u10", + "standard_name": "eastward_wind", + "units": "m/s", + "comment": "Zonal wind at 10m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "wind_v_10": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 120, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 120, + 73, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "10 metre V wind component (northward)", + "short_name": "v10", + "standard_name": "northward_wind", + "units": "m/s", + "comment": "Meridional wind at 10m above ground", + "step_type": "instant", + "coordinates": "ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + } + } + }, + "node_type": "group" +} \ No newline at end of file diff --git a/tests/dwd/icon_eu/forecast/template_config_test.py b/tests/dwd/icon_eu/forecast/template_config_test.py index 110557d9..f5ee0eb3 100644 --- a/tests/dwd/icon_eu/forecast/template_config_test.py +++ b/tests/dwd/icon_eu/forecast/template_config_test.py @@ -1,35 +1,37 @@ -# import json +import json + # from copy import deepcopy -# from pathlib import Path +from pathlib import Path # import pandas as pd -# import pytest +import pytest -# from reformatters.dwd.icon_eu.forecast.template_config import DwdIconEuForecastTemplateConfig +from reformatters.dwd.icon_eu.forecast.template_config import ( + DwdIconEuForecastTemplateConfig, +) -# def test_update_template(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: -# """ -# Ensure that `uv run main update-template` has been run and -# all changes to DwdIconEuForecastTemplateConfig are reflected in the on-disk Zarr template. -# """ -# template_config = DwdIconEuForecastTemplateConfig() -# with open(template_config.template_path() / "zarr.json") as f: -# existing_template = json.load(f) - -# test_template_path = tmp_path / "latest.zarr" -# monkeypatch.setattr( -# DwdIconEuForecastTemplateConfig, -# "template_path", -# lambda _self: test_template_path, -# ) +def test_update_template(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """Ensure that `uv run main update-template` has been run and + all changes to DwdIconEuForecastTemplateConfig are reflected in the on-disk + Zarr template.""" + template_config = DwdIconEuForecastTemplateConfig() + with open(template_config.template_path() / "zarr.json") as f: + existing_template = json.load(f) + + test_template_path = tmp_path / "latest.zarr" + monkeypatch.setattr( + DwdIconEuForecastTemplateConfig, + "template_path", + lambda _self: test_template_path, + ) -# template_config.update_template() + template_config.update_template() -# with open(template_config.template_path() / "zarr.json") as f: -# updated_template = json.load(f) + with open(template_config.template_path() / "zarr.json") as f: + updated_template = json.load(f) -# assert existing_template == updated_template + assert existing_template == updated_template # def test_get_template_spatial_ref() -> None: From 46fc81b4c8de041d4107470ecc29a1dd50ca90af Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Wed, 23 Jul 2025 18:01:39 +0100 Subject: [PATCH 09/35] Use the correct CRS in template_config and its test --- .../dwd/icon_eu/forecast/template_config.py | 18 ++++--- .../latest.zarr/spatial_ref/zarr.json | 12 ++--- .../forecast/templates/latest.zarr/zarr.json | 12 ++--- .../icon_eu/forecast/template_config_test.py | 51 ++++++++++++------- 4 files changed, 57 insertions(+), 36 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 3b1a616a..e3beefbf 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -226,19 +226,23 @@ def coords(self) -> Sequence[Coordinate]: attrs=CoordinateAttrs( units=None, statistics_approximate=None, - # Deterived by running `ds.rio.write_crs("+proj=longlat +a=6371229 +b=6371229 +no_defs +type=crs")["spatial_ref"].attrs - crs_wkt='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', + # Derived by installing xarray, cfgrib, and rioxarray, and then running: + # ds = xr.load_dataset(ICON_EU_GRIB_FILENAME_FROM_DWD, engine='cfgrib') + # from pyproj import CRS + # spherical_crs = CRS.from_wkt(WKT_STRING_EXTRACTED_FROM_ICON_EU_GRIB_BY_GDALINFO) + # ds.rio.write_crs(spherical_crs)["spatial_ref"].attrs + crs_wkt='GEOGCS["Coordinate System imported from GRIB file",DATUM["unnamed",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]', semi_major_axis=6371229.0, semi_minor_axis=6371229.0, inverse_flattening=0.0, - reference_ellipsoid_name="unknown", + reference_ellipsoid_name="Sphere", longitude_of_prime_meridian=0.0, prime_meridian_name="Greenwich", - geographic_crs_name="unknown", - horizontal_datum_name="unknown", + geographic_crs_name="Coordinate System imported from GRIB file", + horizontal_datum_name="unnamed", grid_mapping_name="latitude_longitude", - spatial_ref='GEOGCS["unknown",DATUM["unknown",SPHEROID["unknown",6371229,0]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Longitude",EAST],AXIS["Latitude",NORTH]]', - comment="This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", + spatial_ref='GEOGCS["Coordinate System imported from GRIB file",DATUM["unnamed",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]', + comment="From the WKT string output by gdalinfo.", ), ), ] diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json index 46717abc..06abfd3d 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json @@ -30,18 +30,18 @@ } ], "attributes": { - "comment": "This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", - "crs_wkt": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]", + "comment": "From the WKT string output by gdalinfo.", + "crs_wkt": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]", "semi_major_axis": 6371229.0, "semi_minor_axis": 6371229.0, "inverse_flattening": 0.0, - "reference_ellipsoid_name": "unknown", + "reference_ellipsoid_name": "Sphere", "longitude_of_prime_meridian": 0.0, "prime_meridian_name": "Greenwich", - "geographic_crs_name": "unknown", - "horizontal_datum_name": "unknown", + "geographic_crs_name": "Coordinate System imported from GRIB file", + "horizontal_datum_name": "unnamed", "grid_mapping_name": "latitude_longitude", - "spatial_ref": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]" + "spatial_ref": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]" }, "zarr_format": 3, "node_type": "array", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json index 4a840055..b4a7f3a6 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json @@ -1369,18 +1369,18 @@ } ], "attributes": { - "comment": "This coordinate reference system matches the source data which follows WMO conventions of assuming the earth is a perfect sphere with a radius of 6,371,229m. It is similar to EPSG:4326, but EPSG:4326 uses a more accurate representation of the earth's shape.", - "crs_wkt": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]", + "comment": "From the WKT string output by gdalinfo.", + "crs_wkt": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]", "semi_major_axis": 6371229.0, "semi_minor_axis": 6371229.0, "inverse_flattening": 0.0, - "reference_ellipsoid_name": "unknown", + "reference_ellipsoid_name": "Sphere", "longitude_of_prime_meridian": 0.0, "prime_meridian_name": "Greenwich", - "geographic_crs_name": "unknown", - "horizontal_datum_name": "unknown", + "geographic_crs_name": "Coordinate System imported from GRIB file", + "horizontal_datum_name": "unnamed", "grid_mapping_name": "latitude_longitude", - "spatial_ref": "GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"unknown\",6371229,0]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Longitude\",EAST],AXIS[\"Latitude\",NORTH]]" + "spatial_ref": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]" }, "zarr_format": 3, "node_type": "array", diff --git a/tests/dwd/icon_eu/forecast/template_config_test.py b/tests/dwd/icon_eu/forecast/template_config_test.py index f5ee0eb3..e5d85314 100644 --- a/tests/dwd/icon_eu/forecast/template_config_test.py +++ b/tests/dwd/icon_eu/forecast/template_config_test.py @@ -1,10 +1,10 @@ import json - -# from copy import deepcopy +from copy import deepcopy from pathlib import Path -# import pandas as pd +import pandas as pd import pytest +from pyproj import CRS from reformatters.dwd.icon_eu.forecast.template_config import ( DwdIconEuForecastTemplateConfig, @@ -34,17 +34,34 @@ def test_update_template(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Non assert existing_template == updated_template -# def test_get_template_spatial_ref() -> None: -# """Ensure the spatial reference system in the template matched our expectation.""" -# template_config = DwdIconEuForecastTemplateConfig() -# ds = template_config.get_template( -# template_config.append_dim_start + pd.Timedelta(days=10) -# ) -# original_attrs = deepcopy(ds.spatial_ref.attrs) - -# # TODO: Update to the CRS of your dataset. e.g. "EPSG:4269" -# expected_crs = None -# calculated_spatial_ref_attrs = ds.rio.write_crs(expected_crs).spatial_ref.attrs -# assert set(original_attrs) - set(calculated_spatial_ref_attrs) == {"comment"} -# original_attrs.pop("comment") -# assert original_attrs == calculated_spatial_ref_attrs +def test_get_template_spatial_ref() -> None: + """Ensure the spatial reference system in the template matched our + expectation.""" + template_config = DwdIconEuForecastTemplateConfig() + ds = template_config.get_template( + template_config.append_dim_start + pd.Timedelta(days=10) + ) + original_attrs = deepcopy(ds.spatial_ref.attrs) + + # This WKT string is extracted from the ICON-EU GRIB by gdalinfo: + expected_crs = CRS.from_wkt("""GEOGCRS["Coordinate System imported from GRIB file", + DATUM["unnamed", + ELLIPSOID["Sphere",6371229,0, + LENGTHUNIT["metre",1, + ID["EPSG",9001]]]], + PRIMEM["Greenwich",0, + ANGLEUNIT["degree",0.0174532925199433, + ID["EPSG",9122]]], + CS[ellipsoidal,2], + AXIS["latitude",north, + ORDER[1], + ANGLEUNIT["degree",0.0174532925199433, + ID["EPSG",9122]]], + AXIS["longitude",east, + ORDER[2], + ANGLEUNIT["degree",0.0174532925199433, + ID["EPSG",9122]]]]""") + calculated_spatial_ref_attrs = ds.rio.write_crs(expected_crs).spatial_ref.attrs + assert set(original_attrs) - set(calculated_spatial_ref_attrs) == {"comment"} + original_attrs.pop("comment") + assert original_attrs == calculated_spatial_ref_attrs From eb014631efab9763b1170fad2ae4b23b4ce25a32 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Thu, 24 Jul 2025 14:14:16 +0100 Subject: [PATCH 10/35] Update the DataVarAttrs with short_name and standard_name from ICON metadata. --- .../dwd/icon_eu/forecast/template_config.py | 57 +++++++++---------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index e3beefbf..e59bf35a 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -31,6 +31,10 @@ class DwdIconEuInternalAttrs(BaseInternalAttrs): """Variable specific attributes used internally to drive processing. Not written to the dataset. + + Attributes: + grib_element (str): The name used in ICON-EU's GRIB filename for this variable. + For example, `alb_rad` (for `surface_albedo`). """ grib_element: str @@ -227,9 +231,9 @@ def coords(self) -> Sequence[Coordinate]: units=None, statistics_approximate=None, # Derived by installing xarray, cfgrib, and rioxarray, and then running: - # ds = xr.load_dataset(ICON_EU_GRIB_FILENAME_FROM_DWD, engine='cfgrib') # from pyproj import CRS # spherical_crs = CRS.from_wkt(WKT_STRING_EXTRACTED_FROM_ICON_EU_GRIB_BY_GDALINFO) + # ds = xr.load_dataset(ICON_EU_GRIB_FILENAME_FROM_DWD, engine='cfgrib') # ds.rio.write_crs(spherical_crs)["spatial_ref"].attrs crs_wkt='GEOGCS["Coordinate System imported from GRIB file",DATUM["unnamed",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]', semi_major_axis=6371229.0, @@ -251,22 +255,12 @@ def coords(self) -> Sequence[Coordinate]: @property def data_vars(self) -> Sequence[DwdIconEuDataVar]: """Define metadata and encoding for each data variable.""" - # Data variable chunking and sharding - # - # Aim for one of these roughly equivalent quantities: - # 1-2mb chunks compressed - # 4-8mb uncompressed - # 4-8 million float32 values var_chunks: dict[Dim, int] = { "init_time": 1, "lead_time": 120, "latitude": 73, "longitude": 153, } - # Aim for one of these roughly equivalent quantities: - # 64-256MB shards compressed - # 256-1024MB uncompressed - # 256 million to 1 billion float32 values var_shards: dict[Dim, int] = { "init_time": 1, "lead_time": 120, @@ -285,7 +279,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: default_keep_mantissa_bits = 7 return [ - # The `comment` text is taken from the DWD Database Reference PDF: + # Some of the `comment` text is taken from the DWD Database Reference PDF: # https://www.dwd.de/DWD/forschung/nwv/fepub/icon_database_main.pdf # # We don't include `alb_rad` (shortwave broadband albedo for @@ -297,12 +291,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="downward_diffuse_short_wave_radiation_flux_surface", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="msdfswrf", # From ECMWF parameter database. + short_name="aswdifd_s", long_name="Downward diffusive short wave radiation flux at surface (mean over forecast time)", units="W m-2", step_type="avg", standard_name="Mean surface diffuse short-wave radiation flux", # From ECMWF. - comment="Downward solar diffuse radiation flux at the surface, averaged over forecast time.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="aswdifd_s", @@ -313,7 +306,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="downward_direct_short_wave_radiation_flux_surface", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="avg_sdirswrf", # From ECMWF param DB. + short_name="aswdir_s", long_name="Downward direct short wave radiation flux at surface (mean over forecast time)", units="W m-2", step_type="avg", @@ -348,11 +341,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="high_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="hcc", + short_name="clch", long_name="High level clouds", units="%", step_type="instant", - comment="Cloud Cover (0 - 400 hPa)", + comment="Cloud Cover (0 - 400 hPa). Different agencies use different short_names for this same parameter: ECMWF: HCC; WMO GRIB table: HCDC.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clch", @@ -363,11 +356,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="low_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="lcc", + short_name="clcl", long_name="Low level clouds", units="%", step_type="instant", - comment="Cloud Cover (800 hPa - Soil)", + comment="Cloud Cover (800 hPa - Soil). Different agencies use different short_names for this same parameter: ECMWF: LCC; WMO GRIB table: LCDC.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clcl", @@ -378,11 +371,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="medium_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="mcc", + short_name="clcm", long_name="Mid level clouds", units="%", step_type="instant", - comment="Cloud Cover (400 - 800 hPa)", + comment="Cloud Cover (400 - 800 hPa). Different agencies use different short_names for this same parameter: ECMWF: MCC; WMO GRIB table: MCDC.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clcm", @@ -393,11 +386,11 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="total_cloud_cover", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="tcc", + short_name="clct", long_name="Total Cloud Cover", units="%", step_type="instant", - comment="Total cloud cover", + comment="Total cloud cover. Different agencies use different short_names for this same parameter: ECMWF: TCC; NOAA & WMO: TCDC.", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="clct", @@ -409,7 +402,8 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="sde", - long_name="lwe_thickness_of_surface_snow_amount", + long_name="Snow depth", + standard_name="lwe_thickness_of_surface_snow_amount", units="m", step_type="instant", comment=( @@ -426,7 +420,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="pressure_reduced_to_msl", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="pmsl", + short_name="prmsl", long_name="Pressure reduced to mean sea level (MSL)", units="Pa", step_type="instant", @@ -441,11 +435,12 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: name="relative_humidity", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="r", + short_name="r2", long_name="2 metre relative humidity", units="%", step_type="instant", - comment="Relative humidity at 2m above ground", + comment="Relative humidity at 2m above ground. Other short_names used for this parameter: rh, 2r, r.", + standard_name="relative_humidity", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="relhum_2m", @@ -465,6 +460,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: internal_attrs=DwdIconEuInternalAttrs( grib_element="runoff_g", keep_mantissa_bits=default_keep_mantissa_bits, + deaccumulate_to_rate=True, ), ), DwdIconEuDataVar( @@ -483,19 +479,21 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: internal_attrs=DwdIconEuInternalAttrs( grib_element="runoff_s", keep_mantissa_bits=default_keep_mantissa_bits, + deaccumulate_to_rate=True, ), ), DwdIconEuDataVar( name="temperature_2m", encoding=encoding_float32_default, attrs=DataVarAttrs( - short_name="t2m", # ECMWF calls this "2t". NOAA & DWD use "t2m". + short_name="t2m", long_name="2 metre temperature", units="K", step_type="instant", comment=( - "Temperature at 2m above ground, averaged over all tiles of a grid point." + "Temperature at 2m above ground, averaged over all tiles of a grid point. Different agencies use different short_names for this parameter: ECMWF: 2t; NOAA & DWD: t2m." ), + standard_name="air_temperature", ), internal_attrs=DwdIconEuInternalAttrs( grib_element="t_2m", @@ -518,6 +516,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: internal_attrs=DwdIconEuInternalAttrs( grib_element="tot_prec", keep_mantissa_bits=default_keep_mantissa_bits, + deaccumulate_to_rate=True, ), ), DwdIconEuDataVar( From eeddd2906c7e471447911c0fe50f974a9f4ac5d0 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Thu, 24 Jul 2025 14:15:12 +0100 Subject: [PATCH 11/35] Update latest.zarr metadata with short_name and standard_name from GRIB metadata --- .../zarr.json | 3 +- .../zarr.json | 2 +- .../latest.zarr/high_cloud_cover/zarr.json | 4 +-- .../latest.zarr/low_cloud_cover/zarr.json | 4 +-- .../latest.zarr/medium_cloud_cover/zarr.json | 4 +-- .../pressure_reduced_to_msl/zarr.json | 2 +- .../latest.zarr/relative_humidity/zarr.json | 5 +-- .../latest.zarr/snow_depth/zarr.json | 3 +- .../latest.zarr/temperature_2m/zarr.json | 3 +- .../latest.zarr/total_cloud_cover/zarr.json | 4 +-- .../forecast/templates/latest.zarr/zarr.json | 34 ++++++++++--------- 11 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json index dd1116d4..91961be2 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json @@ -69,10 +69,9 @@ ], "attributes": { "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", - "short_name": "msdfswrf", + "short_name": "aswdifd_s", "standard_name": "Mean surface diffuse short-wave radiation flux", "units": "W m-2", - "comment": "Downward solar diffuse radiation flux at the surface, averaged over forecast time.", "step_type": "avg", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json index 1c4e11db..dfef84d9 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json @@ -69,7 +69,7 @@ ], "attributes": { "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", - "short_name": "avg_sdirswrf", + "short_name": "aswdir_s", "units": "W m-2", "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", "step_type": "avg", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json index 5ea7a1db..09c9d227 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json @@ -69,9 +69,9 @@ ], "attributes": { "long_name": "High level clouds", - "short_name": "hcc", + "short_name": "clch", "units": "%", - "comment": "Cloud Cover (0 - 400 hPa)", + "comment": "Cloud Cover (0 - 400 hPa). Different agencies use different short_names for this same parameter: ECMWF: HCC; WMO GRIB table: HCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json index cfd8ec22..7a4707dd 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json @@ -69,9 +69,9 @@ ], "attributes": { "long_name": "Low level clouds", - "short_name": "lcc", + "short_name": "clcl", "units": "%", - "comment": "Cloud Cover (800 hPa - Soil)", + "comment": "Cloud Cover (800 hPa - Soil). Different agencies use different short_names for this same parameter: ECMWF: LCC; WMO GRIB table: LCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json index e404e227..86590ba4 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json @@ -69,9 +69,9 @@ ], "attributes": { "long_name": "Mid level clouds", - "short_name": "mcc", + "short_name": "clcm", "units": "%", - "comment": "Cloud Cover (400 - 800 hPa)", + "comment": "Cloud Cover (400 - 800 hPa). Different agencies use different short_names for this same parameter: ECMWF: MCC; WMO GRIB table: MCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json index 356a171e..b4d14950 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json @@ -69,7 +69,7 @@ ], "attributes": { "long_name": "Pressure reduced to mean sea level (MSL)", - "short_name": "pmsl", + "short_name": "prmsl", "units": "Pa", "comment": "Surface pressure reduced to MSL", "step_type": "instant", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json index c07dd4c2..821e6b2e 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json @@ -69,9 +69,10 @@ ], "attributes": { "long_name": "2 metre relative humidity", - "short_name": "r", + "short_name": "r2", + "standard_name": "relative_humidity", "units": "%", - "comment": "Relative humidity at 2m above ground", + "comment": "Relative humidity at 2m above ground. Other short_names used for this parameter: rh, 2r, r.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json index 0ffdb29b..8e1f9ea7 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json @@ -68,8 +68,9 @@ } ], "attributes": { - "long_name": "lwe_thickness_of_surface_snow_amount", + "long_name": "Snow depth", "short_name": "sde", + "standard_name": "lwe_thickness_of_surface_snow_amount", "units": "m", "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", "step_type": "instant", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json index ddded0a5..6549ab24 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json @@ -70,8 +70,9 @@ "attributes": { "long_name": "2 metre temperature", "short_name": "t2m", + "standard_name": "air_temperature", "units": "K", - "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point.", + "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point. Different agencies use different short_names for this parameter: ECMWF: 2t; NOAA & DWD: t2m.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json index de70ef04..557f0965 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json @@ -69,9 +69,9 @@ ], "attributes": { "long_name": "Total Cloud Cover", - "short_name": "tcc", + "short_name": "clct", "units": "%", - "comment": "Total cloud cover", + "comment": "Total cloud cover. Different agencies use different short_names for this same parameter: ECMWF: TCC; NOAA & WMO: TCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json index b4a7f3a6..01ffbec2 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json @@ -176,10 +176,9 @@ ], "attributes": { "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", - "short_name": "msdfswrf", + "short_name": "aswdifd_s", "standard_name": "Mean surface diffuse short-wave radiation flux", "units": "W m-2", - "comment": "Downward solar diffuse radiation flux at the surface, averaged over forecast time.", "step_type": "avg", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -265,7 +264,7 @@ ], "attributes": { "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", - "short_name": "avg_sdirswrf", + "short_name": "aswdir_s", "units": "W m-2", "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", "step_type": "avg", @@ -353,9 +352,9 @@ ], "attributes": { "long_name": "High level clouds", - "short_name": "hcc", + "short_name": "clch", "units": "%", - "comment": "Cloud Cover (0 - 400 hPa)", + "comment": "Cloud Cover (0 - 400 hPa). Different agencies use different short_names for this same parameter: ECMWF: HCC; WMO GRIB table: HCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -704,9 +703,9 @@ ], "attributes": { "long_name": "Low level clouds", - "short_name": "lcc", + "short_name": "clcl", "units": "%", - "comment": "Cloud Cover (800 hPa - Soil)", + "comment": "Cloud Cover (800 hPa - Soil). Different agencies use different short_names for this same parameter: ECMWF: LCC; WMO GRIB table: LCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -880,9 +879,9 @@ ], "attributes": { "long_name": "Mid level clouds", - "short_name": "mcc", + "short_name": "clcm", "units": "%", - "comment": "Cloud Cover (400 - 800 hPa)", + "comment": "Cloud Cover (400 - 800 hPa). Different agencies use different short_names for this same parameter: ECMWF: MCC; WMO GRIB table: MCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -968,7 +967,7 @@ ], "attributes": { "long_name": "Pressure reduced to mean sea level (MSL)", - "short_name": "pmsl", + "short_name": "prmsl", "units": "Pa", "comment": "Surface pressure reduced to MSL", "step_type": "instant", @@ -1056,9 +1055,10 @@ ], "attributes": { "long_name": "2 metre relative humidity", - "short_name": "r", + "short_name": "r2", + "standard_name": "relative_humidity", "units": "%", - "comment": "Relative humidity at 2m above ground", + "comment": "Relative humidity at 2m above ground. Other short_names used for this parameter: rh, 2r, r.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -1143,8 +1143,9 @@ } ], "attributes": { - "long_name": "lwe_thickness_of_surface_snow_amount", + "long_name": "Snow depth", "short_name": "sde", + "standard_name": "lwe_thickness_of_surface_snow_amount", "units": "m", "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", "step_type": "instant", @@ -1546,8 +1547,9 @@ "attributes": { "long_name": "2 metre temperature", "short_name": "t2m", + "standard_name": "air_temperature", "units": "K", - "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point.", + "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point. Different agencies use different short_names for this parameter: ECMWF: 2t; NOAA & DWD: t2m.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" @@ -1633,9 +1635,9 @@ ], "attributes": { "long_name": "Total Cloud Cover", - "short_name": "tcc", + "short_name": "clct", "units": "%", - "comment": "Total cloud cover", + "comment": "Total cloud cover. Different agencies use different short_names for this same parameter: ECMWF: TCC; NOAA & WMO: TCDC.", "step_type": "instant", "coordinates": "ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" From 7a7a9c89f88eb78fc45560ce02d8f7248a6fbbf1 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Thu, 24 Jul 2025 14:24:33 +0100 Subject: [PATCH 12/35] Removing dynamical_dataset.py and region_job.py from git repo. These files are currently just the output from the initialize-new-integration tool. These files will be added to a subsequent PR once I've modified them. --- .../dwd/icon_eu/forecast/dynamical_dataset.py | 55 ---- .../dwd/icon_eu/forecast/region_job.py | 289 ------------------ .../forecast/dynamical_dataset_test.py | 62 ---- tests/dwd/icon_eu/forecast/region_job_test.py | 37 --- 4 files changed, 443 deletions(-) delete mode 100644 src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py delete mode 100644 src/reformatters/dwd/icon_eu/forecast/region_job.py delete mode 100644 tests/dwd/icon_eu/forecast/dynamical_dataset_test.py delete mode 100644 tests/dwd/icon_eu/forecast/region_job_test.py diff --git a/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py b/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py deleted file mode 100644 index 2fb26ce1..00000000 --- a/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py +++ /dev/null @@ -1,55 +0,0 @@ -from collections.abc import Sequence - -from reformatters.common import validation -from reformatters.common.dynamical_dataset import DynamicalDataset -from reformatters.common.kubernetes import CronJob - -from .region_job import DwdIconEuForecastRegionJob, DwdIconEuForecastSourceFileCoord -from .template_config import DwdIconEuDataVar, DwdIconEuForecastTemplateConfig - - -class DwdIconEuForecastDataset( - DynamicalDataset[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] -): - template_config: DwdIconEuForecastTemplateConfig = DwdIconEuForecastTemplateConfig() - region_job_class: type[DwdIconEuForecastRegionJob] = DwdIconEuForecastRegionJob - - def operational_kubernetes_resources(self, image_tag: str) -> Sequence[CronJob]: - """Return the kubernetes cron job definitions to operationally update and validate this dataset.""" - # operational_update_cron_job = ReformatCronJob( - # name=f"{self.dataset_id}-operational-update", - # schedule=_OPERATIONAL_CRON_SCHEDULE, - # pod_active_deadline=timedelta(minutes=30), - # image=image_tag, - # dataset_id=self.dataset_id, - # cpu="14", - # memory="30G", - # shared_memory="12G", - # ephemeral_storage="30G", - # secret_names=self.storage_config.k8s_secret_names, - # ) - # validation_cron_job = ValidationCronJob( - # name=f"{self.dataset_id}-validation", - # schedule=_VALIDATION_CRON_SCHEDULE, - # pod_active_deadline=timedelta(minutes=10), - # image=image_tag, - # dataset_id=self.dataset_id, - # cpu="1.3", - # memory="7G", - # secret_names=self.storage_config.k8s_secret_names, - # ) - - # return [operational_update_cron_job, validation_cron_job] - raise NotImplementedError( - f"Implement `operational_kubernetes_resources` on {self.__class__.__name__}" - ) - - def validators(self) -> Sequence[validation.DataValidator]: - """Return a sequence of DataValidators to run on this dataset.""" - # return ( - # validation.check_analysis_current_data, - # validation.check_analysis_recent_nans, - # ) - raise NotImplementedError( - f"Implement `validators` on {self.__class__.__name__}" - ) diff --git a/src/reformatters/dwd/icon_eu/forecast/region_job.py b/src/reformatters/dwd/icon_eu/forecast/region_job.py deleted file mode 100644 index 9626014e..00000000 --- a/src/reformatters/dwd/icon_eu/forecast/region_job.py +++ /dev/null @@ -1,289 +0,0 @@ -from collections.abc import Callable, Mapping, Sequence -from pathlib import Path - -import xarray as xr -import zarr - -from reformatters.common.logging import get_logger -from reformatters.common.region_job import ( - CoordinateValueOrRange, - RegionJob, - SourceFileCoord, -) -from reformatters.common.types import ( - AppendDim, - ArrayFloat32, - DatetimeLike, - Dim, -) - -from .template_config import DwdIconEuDataVar - -log = get_logger(__name__) - - -class DwdIconEuForecastSourceFileCoord(SourceFileCoord): - """Coordinates of a single source file to process.""" - - def get_url(self) -> str: - raise NotImplementedError("Return the URL of the source file.") - - def out_loc( - self, - ) -> Mapping[Dim, CoordinateValueOrRange]: - """ - Returns a data array indexer which identifies the region in the output dataset - to write the data from the source file. The indexer is a dict from dimension - names to coordinate values or slices. - """ - # If the names of the coordinate attributes of your SourceFileCoord subclass are also all - # dimension names in the output dataset (e.g. init_time and lead_time), - # delete this implementation and use the default implementation of this method. - # - # Examples where you would override this method: - # - An analysis dataset created from forecast data: - # return {"time": self.init_time + self.lead_time} - return super().out_loc() - - -class DwdIconEuForecastRegionJob( - RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] -): - # Optionally, limit the number of variables downloaded together. - # If set to a value less than len(data_vars), downloading, reading/recompressing, - # and uploading steps will be pipelined within a region job. - # 5 is a reasonable default if it is possible to download less than all - # variables in a single file (e.g. you have a grib index). - # Leave unset if you have to download a whole file to get one variable out - # to avoid re-downloading the same file multiple times. - # - # max_vars_per_download_group: ClassVar[int | None] = None - - # Implement this method only if different variables must be retrieved from different urls - # - # # @classmethod - # def source_groups( - # cls, - # data_vars: Sequence[DwdIconEuDataVar], - # ) -> Sequence[Sequence[DwdIconEuDataVar]]: - # """ - # Return groups of variables, where all variables in a group can be retrieived from the same source file. - # """ - # grouped = defaultdict(list) - # for data_var in data_vars: - # grouped[data_var.internal_attrs.file_type].append(data_var) - # return list(grouped.values()) - - # Implement this method only if specific post processing in this dataset - # requires data from outside the region defined by self.region, - # e.g. for deaccumulation or interpolation along append_dim in an analysis dataset. - # - # def get_processing_region(self) -> slice: - # """ - # Return a slice of integer offsets into self.template_ds along self.append_dim that identifies - # the region to process. In most cases this is exactly self.region, but if additional data outside - # the region is required, for example for correct interpolation or deaccumulation, this method can - # return a modified slice (e.g. `slice(self.region.start - 1, self.region.stop + 1)`). - # """ - # return self.region - - def generate_source_file_coords( - self, - processing_region_ds: xr.Dataset, - data_var_group: Sequence[DwdIconEuDataVar], - ) -> Sequence[DwdIconEuForecastSourceFileCoord]: - """Return a sequence of coords, one for each source file required to process the data covered by processing_region_ds.""" - # return [ - # DwdIconEuForecastSourceFileCoord( - # init_time=init_time, - # lead_time=lead_time, - # ) - # for init_time, lead_time in itertools.product( - # processing_region_ds["init_time"].values, - # processing_region_ds["lead_time"].values, - # ) - # ] - raise NotImplementedError( - "Return a sequence of SourceFileCoord objects, one for each source file required to process the data covered by processing_region_ds." - ) - - def download_file(self, coord: DwdIconEuForecastSourceFileCoord) -> Path: - """Download the file for the given coordinate and return the local path.""" - # return http_download_to_disk(coord.get_url(), self.dataset_id) - raise NotImplementedError( - "Download the file for the given coordinate and return the local path." - ) - - def read_data( - self, - coord: DwdIconEuForecastSourceFileCoord, - data_var: DwdIconEuDataVar, - ) -> ArrayFloat32: - """Read and return an array of data for the given variable and source file coordinate.""" - # with rasterio.open(coord.downloaded_file_path) as reader: - # TODO: make a band index based on tag matching utility function - # matching_indexes = [ - # i - # for i in range(reader.count) - # if (tags := reader.tags(i))["GRIB_ELEMENT"] - # == data_var.internal_attrs.grib_element - # and tags["GRIB_COMMENT"] == data_var.internal_attrs.grib_comment - # ] - # assert len(matching_indexes) == 1, f"Expected exactly 1 matching band, found {matching_indexes}. {data_var.internal_attrs.grib_element=}, {data_var.internal_attrs.grib_description=}, {coord.downloaded_file_path=}" # fmt: skip - # rasterio_band_index = 1 + matching_indexes[0] # rasterio is 1-indexed - # return reader.read(rasterio_band_index, dtype=np.float32) - raise NotImplementedError( - "Read and return data for the given variable and source file coordinate." - ) - - # Implement this to apply transformations to the array (e.g. deaccumulation) - # - # def apply_data_transformations( - # self, data_array: xr.DataArray, data_var: DwdIconEuDataVar - # ) -> None: - # """ - # Apply in-place data transformations to the output data array for a given data variable. - - # This method is called after reading all data for a variable into the shared-memory array, - # and before writing shards to the output store. The default implementation applies binary - # rounding to float32 arrays if `data_var.internal_attrs.keep_mantissa_bits` is set. - - # Subclasses may override this method to implement additional transformations such as - # deaccumulation, interpolation or other custom logic. All transformations should be - # performed in-place (don't copy `data_array`, it's large). - - # Parameters - # ---------- - # data_array : xr.DataArray - # The output data array to be transformed in-place. - # data_var : DwdIconEuDataVar - # The data variable metadata object, which may contain transformation parameters. - # """ - # super().apply_data_transformations(data_array, data_var) - - def update_template_with_results( - self, process_results: Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] - ) -> xr.Dataset: - """ - Update template dataset based on processing results. This method is called - during operational updates. - - Subclasses should implement this method to apply dataset-specific adjustments - based on the processing results. Examples include: - - Trimming dataset along append_dim to only include successfully processed data - - Loading existing coordinate values from final_store and updating them based on results - - Updating metadata based on what was actually processed vs what was planned - - The default implementation trims along append_dim to end at the most recent - successfully processed coordinate (timestamp). - - Parameters - ---------- - process_results : Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] - Mapping from variable names to their source file coordinates with final processing status. - - Returns - ------- - xr.Dataset - Updated template dataset reflecting the actual processing results. - """ - # The super() implementation looks like this: - # - # max_append_dim_processed = max( - # ( - # c.out_loc()[self.append_dim] # type: ignore[type-var] - # for c in chain.from_iterable(process_results.values()) - # if c.status == SourceFileStatus.Succeeded - # ), - # default=None, - # ) - # if max_append_dim_processed is None: - # # No data was processed, trim the template to stop before this job's region - # # This is using isel's exclusive slice end behavior - # return self.template_ds.isel( - # {self.append_dim: slice(None, self.region.start)} - # ) - # else: - # return self.template_ds.sel( - # {self.append_dim: slice(None, max_append_dim_processed)} - # ) - # - # If you like the above behavior, skip implementing this method. - # If you need to customize the behavior, implement this method. - - raise NotImplementedError( - "Subclasses implement update_template_with_results() with dataset-specific logic" - ) - - @classmethod - def operational_update_jobs( - cls, - final_store: zarr.abc.store.Store, - tmp_store: Path, - get_template_fn: Callable[[DatetimeLike], xr.Dataset], - append_dim: AppendDim, - all_data_vars: Sequence[DwdIconEuDataVar], - reformat_job_name: str, - ) -> tuple[ - Sequence["RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]"], - xr.Dataset, - ]: - """ - Return the sequence of RegionJob instances necessary to update the dataset - from its current state to include the latest available data. - - Also return the template_ds, expanded along append_dim through the end of - the data to process. The dataset returned here may extend beyond the - available data at the source, in which case `update_template_with_results` - will trim the dataset to the actual data processed. - - The exact logic is dataset-specific, but it generally follows this pattern: - 1. Figure out the range of time to process: append_dim_start (inclusive) and append_dim_end (exclusive) - a. Read existing data from final_store to determine what's already processed - b. Optionally identify recent incomplete/non-final data for reprocessing - 2. Call get_template_fn(append_dim_end) to get the template_ds - 3. Create RegionJob instances by calling cls.get_jobs(..., filter_start=append_dim_start) - - Parameters - ---------- - final_store : zarr.abc.store.Store - The destination Zarr store to read existing data from and write updates to. - tmp_store : zarr.abc.store.Store | Path - The temporary Zarr store to write into while processing. - get_template_fn : Callable[[DatetimeLike], xr.Dataset] - Function to get the template_ds for the operational update. - append_dim : AppendDim - The dimension along which data is appended (e.g., "time"). - all_data_vars : Sequence[DwdIconEuDataVar] - Sequence of all data variable configs for this dataset. - reformat_job_name : str - The name of the reformatting job, used for progress tracking. - This is often the name of the Kubernetes job, or "local". - - Returns - ------- - Sequence[RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]] - RegionJob instances that need processing for operational updates. - xr.Dataset - The template_ds for the operational update. - """ - # existing_ds = xr.open_zarr(final_store) - # append_dim_start = existing_ds[append_dim].max() - # append_dim_end = pd.Timestamp.now() - # template_ds = get_template_fn(append_dim_end) - - # jobs = cls.get_jobs( - # kind="operational-update", - # final_store=final_store, - # tmp_store=tmp_store, - # template_ds=template_ds, - # append_dim=append_dim, - # all_data_vars=all_data_vars, - # reformat_job_name=reformat_job_name, - # filter_start=append_dim_start, - # ) - # return jobs, template_ds - - raise NotImplementedError( - "Subclasses implement operational_update_jobs() with dataset-specific logic" - ) diff --git a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py deleted file mode 100644 index ccc5a53c..00000000 --- a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py +++ /dev/null @@ -1,62 +0,0 @@ -# from pathlib import Path - -# import numpy as np -# import pandas as pd -# import pytest -# import xarray as xr - -# from reformatters.common import validation -# from reformatters.dwd.icon_eu.forecast.dynamical_dataset import DwdIconEuForecastDataset - -# @pytest.mark.slow -# def test_backfill_local_and_operational_update(monkeypatch: pytest.MonkeyPatch) -> None: -# dataset = DwdIconEuForecastDataset() - -# # Local backfill reformat -# dataset.backfill_local(append_dim_end=pd.Timestamp("2000-01-02")) -# ds = xr.open_zarr(dataset._final_store(), chunks=None) -# assert ds.time.max() == pd.Timestamp("2000-01-01") - -# # Operational update -# monkeypatch.setattr( -# dataset.region_job_class, -# "_update_append_dim_end", -# lambda: pd.Timestamp("2000-01-03"), -# ) -# monkeypatch.setattr( -# dataset.region_job_class, -# "_update_append_dim_start", -# lambda existing_ds: pd.Timestamp(existing_ds.time.max().item()), -# ) - -# dataset.update("test-update") - -# # Check resulting dataset -# updated_ds = xr.open_zarr(dataset._final_store(), chunks=None) - -# np.testing.assert_array_equal( -# updated_ds.time, pd.date_range("1981-10-01", "1981-10-03") -# ) -# subset_ds = updated_ds.sel(latitude=48.583335, longitude=-94, method="nearest") -# np.testing.assert_array_equal( -# subset_ds["your_variable"].values, [190.0, 163.0, 135.0] -# ) - - -# def test_operational_kubernetes_resources( -# dataset: DwdIconEuForecastDataset, -# ) -> None: -# cron_jobs = dataset.operational_kubernetes_resources("test-image-tag") - -# assert len(cron_jobs) == 2 -# update_cron_job, validation_cron_job = cron_jobs -# assert update_cron_job.name == f"{dataset.dataset_id}-operational-update" -# assert validation_cron_job.name == f"{dataset.dataset_id}-validation" -# assert update_cron_job.secret_names == dataset.storage_config.k8s_secret_names -# assert validation_cron_job.secret_names == dataset.storage_config.k8s_secret_names - - -# def test_validators(dataset: DwdIconEuForecastDataset) -> None: -# validators = tuple(dataset.validators()) -# assert len(validators) == 2 -# assert all(isinstance(v, validation.DataValidator) for v in validators) diff --git a/tests/dwd/icon_eu/forecast/region_job_test.py b/tests/dwd/icon_eu/forecast/region_job_test.py deleted file mode 100644 index 0ab7d676..00000000 --- a/tests/dwd/icon_eu/forecast/region_job_test.py +++ /dev/null @@ -1,37 +0,0 @@ -# from unittest.mock import Mock - -# import pandas as pd - -# from reformatters.dwd.icon_eu.forecast.region_job import ( -# DwdIconEuForecastRegionJob, -# DwdIconEuForecastSourceFileCoord, -# ) -# from reformatters.dwd.icon_eu.forecast.template_config import DwdIconEuForecastTemplateConfig - -# def test_source_file_coord_get_url() -> None: -# coord = DwdIconEuForecastSourceFileCoord(time=pd.Timestamp("2000-01-01")) -# assert coord.get_url() == "https://example.com/data/2000-01-01.grib2" - - -# def test_region_job_generete_source_file_coords() -> None: -# template_config = DwdIconEuForecastTemplateConfig() -# template_ds = template_config.get_template(pd.Timestamp("2000-01-23")) - -# region_job = DwdIconEuForecastRegionJob( -# final_store=Mock(), -# tmp_store=Mock(), -# template_ds=template_ds, -# data_vars=[Mock(), Mock()], -# append_dim=template_config.append_dim, -# region=slice(0, 10), -# reformat_job_name="test", -# ) - -# processing_region_ds, output_region_ds = region_job._get_region_datasets() - -# source_file_coords = region_job.generate_source_file_coords( -# processing_region_ds, [Mock()] -# ) - -# assert len(source_file_coords) == ... -# assert ... From 7035578892b38dc9d2117760bc7022045b98f040 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 15:53:43 +0100 Subject: [PATCH 13/35] Add expected_forecast_length. --- .../dwd/icon_eu/forecast/template_config.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index e59bf35a..b84f907f 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -98,6 +98,14 @@ def derive_coordinates( (self.append_dim,), np.full(ds[self.append_dim].size, np.timedelta64("NaT", "ns")), ), + "expected_forecast_length": ( + (self.append_dim,), + np.full( + ds[self.append_dim].size, + ds["lead_time"].max(), + dtype="timedelta64[ns]", + ), + ), "spatial_ref": SPATIAL_REF_COORDS, } @@ -219,6 +227,24 @@ def coords(self) -> Sequence[Coordinate]: ), ), ), + Coordinate( + name="expected_forecast_length", + encoding=Encoding( + dtype="int64", + fill_value=-1, + compressors=[BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE], + units="seconds", + chunks=append_dim_coordinate_chunk_size, + shards=None, + ), + attrs=CoordinateAttrs( + units="seconds", + statistics_approximate=StatisticsApproximate( + min=str(dim_coords["lead_time"].min()), + max=str(dim_coords["lead_time"].max()), + ), + ), + ), Coordinate( name="spatial_ref", encoding=Encoding( From ba89facd26dcb916cb0d806cadfdad5215c8f5eb Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 15:56:22 +0100 Subject: [PATCH 14/35] Update append_dim_start datetime, as per Alden's review --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index b84f907f..7fe6ee29 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -48,7 +48,7 @@ class DwdIconEuForecastTemplateConfig(TemplateConfig[DwdIconEuDataVar]): dims: tuple[Dim, ...] = ("init_time", "lead_time", "latitude", "longitude") append_dim: AppendDim = "init_time" append_dim_start: Timestamp = pd.Timestamp( - "2020-01-01T00:00" # The start of OCF's ICON-EU archive on Hugging Face. + "2025-08-08T00:00" # TODO: Update this when we actual deploy operationally. ) append_dim_frequency: Timedelta = pd.Timedelta("6h") From 85ead6b57543977aa22013328b74fe732826f0e5 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 15:59:09 +0100 Subject: [PATCH 15/35] Add dwd/__init__.py and dwd/icon_eu/__init__.py as per Alden's review, in the hopes of fixing the mypy failure. mypy passes locally. --- src/reformatters/dwd/__init__.py | 0 src/reformatters/dwd/icon_eu/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/reformatters/dwd/__init__.py create mode 100644 src/reformatters/dwd/icon_eu/__init__.py diff --git a/src/reformatters/dwd/__init__.py b/src/reformatters/dwd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/reformatters/dwd/icon_eu/__init__.py b/src/reformatters/dwd/icon_eu/__init__.py new file mode 100644 index 00000000..e69de29b From 307e43c8cae08fbe3d246308edc5d43bbc410e7c Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:00:41 +0100 Subject: [PATCH 16/35] Remove noqa: F401, as per Alden's review --- .../dwd/icon_eu/forecast/template_config.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 7fe6ee29..960e41b7 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -9,21 +9,21 @@ from reformatters.common.config_models import ( BaseInternalAttrs, Coordinate, - CoordinateAttrs, # noqa: F401 + CoordinateAttrs, DatasetAttributes, DataVar, - DataVarAttrs, # noqa: F401 - Encoding, # noqa: F401 - StatisticsApproximate, # noqa: F401 + DataVarAttrs, + Encoding, + StatisticsApproximate, ) from reformatters.common.template_config import ( - SPATIAL_REF_COORDS, # noqa: F401 + SPATIAL_REF_COORDS, TemplateConfig, ) from reformatters.common.types import AppendDim, Dim, Timedelta, Timestamp from reformatters.common.zarr import ( - BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE, # noqa: F401 - BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE, # noqa: F401 + BLOSC_4BYTE_ZSTD_LEVEL3_SHUFFLE, + BLOSC_8BYTE_ZSTD_LEVEL3_SHUFFLE, ) From 96ce2f2d8a35d6696e481652e41c11fbe65c82e1 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:03:13 +0100 Subject: [PATCH 17/35] Replace hyphen with underscore in ICON EU dataset_id As per Alden's review Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 960e41b7..c79c7c13 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -56,7 +56,7 @@ class DwdIconEuForecastTemplateConfig(TemplateConfig[DwdIconEuDataVar]): @property def dataset_attributes(self) -> DatasetAttributes: return DatasetAttributes( - dataset_id="dwd-icon_eu-forecast", + dataset_id="dwd-icon-eu-forecast", dataset_version="0.1.0", name="DWD ICON-EU Forecast", description="High-resolution weather forecasts for Europe from the ICON-EU model operated by Deutscher Wetterdienst (DWD).", From a8e0885e3bec700f4de39771e05026b8bc5e06b5 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:28:16 +0100 Subject: [PATCH 18/35] Add a comment confirming that the coords are for pixel centers --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index c79c7c13..92845e8b 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -81,6 +81,7 @@ def dimension_coordinates(self) -> dict[str, Any]: pd.timedelta_range("81h", "120h", freq="3h") ) ), + # These coordinates are for the pixel centers: "latitude": np.linspace(29.5, 70.5, 657), "longitude": np.linspace(-23.5, 62.5, 1377), } From 6fbb7225b869b29e81c2d964300a024cb5647db2 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:30:06 +0100 Subject: [PATCH 19/35] Comment that the CRS is a perfect sphere extracted from GRIB. As per Alden's PR review Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 92845e8b..61f695b8 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -273,7 +273,7 @@ def coords(self) -> Sequence[Coordinate]: horizontal_datum_name="unnamed", grid_mapping_name="latitude_longitude", spatial_ref='GEOGCS["Coordinate System imported from GRIB file",DATUM["unnamed",SPHEROID["Sphere",6371229,0]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST]]', - comment="From the WKT string output by gdalinfo.", + comment="A perfect sphere geographic CRS with a radius of 6,371,229m, extracted from grib.", ), ), ] From 20cb35fbd721dca5a6dd704913ca56c98328d5cb Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:40:06 +0100 Subject: [PATCH 20/35] lead_time is 93 steps NOT 120 steps!\n\nBug found by Alden in PR review. --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 92845e8b..e8954251 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -284,13 +284,13 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: """Define metadata and encoding for each data variable.""" var_chunks: dict[Dim, int] = { "init_time": 1, - "lead_time": 120, - "latitude": 73, - "longitude": 153, + "lead_time": 93, + "latitude": 73, # 73 = 657 / 9 + "longitude": 153, # 153 = 1377 / 9 } var_shards: dict[Dim, int] = { "init_time": 1, - "lead_time": 120, + "lead_time": 93, "latitude": 657, "longitude": 1377, } From 516156bfd731579b0affc6195fd1ca70bdc85028 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:43:28 +0100 Subject: [PATCH 21/35] Change wind_v_10 to wind_v_10m Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index d9ce906d..e77f1209 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -563,7 +563,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="wind_v_10", + name="wind_v_10m", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="v10", From 8d5abcf25e06686cc23270079c39218779b38340 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:43:46 +0100 Subject: [PATCH 22/35] Change relative_humidity to relative_humidity_2m Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index e77f1209..0fd5f6b2 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -459,7 +459,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="relative_humidity", + name="relative_humidity_2m", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="r2", From 975edf42bd13b170f04b91c4bcfcebcd22dc1c63 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:44:12 +0100 Subject: [PATCH 23/35] Change pressure_reduced_to_msl to pressure_reduced_to_mean_sea_level Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 0fd5f6b2..0f63aa4d 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -444,7 +444,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="pressure_reduced_to_msl", + name="pressure_reduced_to_mean_sea_level", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="prmsl", From 248806a12af9972c1a26660bdfc97126f6f8fb4c Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:49:02 +0100 Subject: [PATCH 24/35] Use chunks with 165 pixels in the latitude dim As per Alden's review --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index d9ce906d..4fa1b95c 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -285,7 +285,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: var_chunks: dict[Dim, int] = { "init_time": 1, "lead_time": 93, - "latitude": 73, # 73 = 657 / 9 + "latitude": 165, # 165 = 657 / 3.98... "longitude": 153, # 153 = 1377 / 9 } var_shards: dict[Dim, int] = { From e5be1a5c82448393ede8f917e2f42609bf3b5d99 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:50:21 +0100 Subject: [PATCH 25/35] Add comment about chunk sizes in MB Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index fc3e9b36..d2d43200 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -282,6 +282,7 @@ def coords(self) -> Sequence[Coordinate]: @property def data_vars(self) -> Sequence[DwdIconEuDataVar]: """Define metadata and encoding for each data variable.""" + # Roughly 3.4MB uncompressed, 1.9MB compressed var_chunks: dict[Dim, int] = { "init_time": 1, "lead_time": 93, From 2a3e8933619da830756ef5bd30b70ef410640832 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:50:48 +0100 Subject: [PATCH 26/35] Add comment about size of shards in MB Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index d2d43200..850735d3 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -289,6 +289,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: "latitude": 165, # 165 = 657 / 3.98... "longitude": 153, # 153 = 1377 / 9 } + # Roughly 337MB uncompressed, 67MB compressed var_shards: dict[Dim, int] = { "init_time": 1, "lead_time": 93, From e974bfeae2c928601c8899c86f253e30304cbe12 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:57:34 +0100 Subject: [PATCH 27/35] Remove the text 'mean over forecast time'. As per Alden's review --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 850735d3..518103ea 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -321,7 +321,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="aswdifd_s", - long_name="Downward diffusive short wave radiation flux at surface (mean over forecast time)", + long_name="Downward diffusive short wave radiation flux at surface", units="W m-2", step_type="avg", standard_name="Mean surface diffuse short-wave radiation flux", # From ECMWF. @@ -336,7 +336,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="aswdir_s", - long_name="Downward direct short wave radiation flux at surface (mean over forecast time)", + long_name="Downward direct short wave radiation flux at surface", units="W m-2", step_type="avg", comment=( From d063a6c1aef4d112fa88591ae68fd78a1eb1d5d8 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 16:58:53 +0100 Subject: [PATCH 28/35] Replace 'total_precipitation' with 'precipitation_surface' Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 518103ea..ee198f3b 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -530,7 +530,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="total_precipitation", + name="precipitation_surface", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="tp", From 576b0a986852eb5750404f9242c148d886e58596 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 17:31:14 +0100 Subject: [PATCH 29/35] Add unmodified files created by initialize-new-integration --- .../dwd/icon_eu/forecast/dynamical_dataset.py | 55 ++++ .../dwd/icon_eu/forecast/region_job.py | 291 ++++++++++++++++++ .../forecast/dynamical_dataset_test.py | 62 ++++ tests/dwd/icon_eu/forecast/region_job_test.py | 37 +++ 4 files changed, 445 insertions(+) create mode 100644 src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py create mode 100644 src/reformatters/dwd/icon_eu/forecast/region_job.py create mode 100644 tests/dwd/icon_eu/forecast/dynamical_dataset_test.py create mode 100644 tests/dwd/icon_eu/forecast/region_job_test.py diff --git a/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py b/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py new file mode 100644 index 00000000..2fb26ce1 --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/dynamical_dataset.py @@ -0,0 +1,55 @@ +from collections.abc import Sequence + +from reformatters.common import validation +from reformatters.common.dynamical_dataset import DynamicalDataset +from reformatters.common.kubernetes import CronJob + +from .region_job import DwdIconEuForecastRegionJob, DwdIconEuForecastSourceFileCoord +from .template_config import DwdIconEuDataVar, DwdIconEuForecastTemplateConfig + + +class DwdIconEuForecastDataset( + DynamicalDataset[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] +): + template_config: DwdIconEuForecastTemplateConfig = DwdIconEuForecastTemplateConfig() + region_job_class: type[DwdIconEuForecastRegionJob] = DwdIconEuForecastRegionJob + + def operational_kubernetes_resources(self, image_tag: str) -> Sequence[CronJob]: + """Return the kubernetes cron job definitions to operationally update and validate this dataset.""" + # operational_update_cron_job = ReformatCronJob( + # name=f"{self.dataset_id}-operational-update", + # schedule=_OPERATIONAL_CRON_SCHEDULE, + # pod_active_deadline=timedelta(minutes=30), + # image=image_tag, + # dataset_id=self.dataset_id, + # cpu="14", + # memory="30G", + # shared_memory="12G", + # ephemeral_storage="30G", + # secret_names=self.storage_config.k8s_secret_names, + # ) + # validation_cron_job = ValidationCronJob( + # name=f"{self.dataset_id}-validation", + # schedule=_VALIDATION_CRON_SCHEDULE, + # pod_active_deadline=timedelta(minutes=10), + # image=image_tag, + # dataset_id=self.dataset_id, + # cpu="1.3", + # memory="7G", + # secret_names=self.storage_config.k8s_secret_names, + # ) + + # return [operational_update_cron_job, validation_cron_job] + raise NotImplementedError( + f"Implement `operational_kubernetes_resources` on {self.__class__.__name__}" + ) + + def validators(self) -> Sequence[validation.DataValidator]: + """Return a sequence of DataValidators to run on this dataset.""" + # return ( + # validation.check_analysis_current_data, + # validation.check_analysis_recent_nans, + # ) + raise NotImplementedError( + f"Implement `validators` on {self.__class__.__name__}" + ) diff --git a/src/reformatters/dwd/icon_eu/forecast/region_job.py b/src/reformatters/dwd/icon_eu/forecast/region_job.py new file mode 100644 index 00000000..dda6143d --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/region_job.py @@ -0,0 +1,291 @@ +from collections.abc import Callable, Mapping, Sequence +from pathlib import Path + +import xarray as xr +import zarr + +from reformatters.common.logging import get_logger +from reformatters.common.region_job import ( + CoordinateValueOrRange, + RegionJob, + SourceFileCoord, +) +from reformatters.common.types import ( + AppendDim, + ArrayFloat32, + DatetimeLike, + Dim, +) + +from .template_config import DwdIconEuDataVar + +log = get_logger(__name__) + + +class DwdIconEuForecastSourceFileCoord(SourceFileCoord): + """Coordinates of a single source file to process.""" + + def get_url(self) -> str: + raise NotImplementedError("Return the URL of the source file.") + + def out_loc( + self, + ) -> Mapping[Dim, CoordinateValueOrRange]: + """Returns a data array indexer which identifies the region in the + output dataset to write the data from the source file. + + The indexer is a dict from dimension names to coordinate values + or slices. + """ + # If the names of the coordinate attributes of your SourceFileCoord subclass are also all + # dimension names in the output dataset (e.g. init_time and lead_time), + # delete this implementation and use the default implementation of this method. + # + # Examples where you would override this method: + # - An analysis dataset created from forecast data: + # return {"time": self.init_time + self.lead_time} + return super().out_loc() + + +class DwdIconEuForecastRegionJob( + RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord] +): + # Optionally, limit the number of variables downloaded together. + # If set to a value less than len(data_vars), downloading, reading/recompressing, + # and uploading steps will be pipelined within a region job. + # 5 is a reasonable default if it is possible to download less than all + # variables in a single file (e.g. you have a grib index). + # Leave unset if you have to download a whole file to get one variable out + # to avoid re-downloading the same file multiple times. + # + # max_vars_per_download_group: ClassVar[int | None] = None + + # Implement this method only if different variables must be retrieved from different urls + # + # # @classmethod + # def source_groups( + # cls, + # data_vars: Sequence[DwdIconEuDataVar], + # ) -> Sequence[Sequence[DwdIconEuDataVar]]: + # """ + # Return groups of variables, where all variables in a group can be retrieived from the same source file. + # """ + # grouped = defaultdict(list) + # for data_var in data_vars: + # grouped[data_var.internal_attrs.file_type].append(data_var) + # return list(grouped.values()) + + # Implement this method only if specific post processing in this dataset + # requires data from outside the region defined by self.region, + # e.g. for deaccumulation or interpolation along append_dim in an analysis dataset. + # + # def get_processing_region(self) -> slice: + # """ + # Return a slice of integer offsets into self.template_ds along self.append_dim that identifies + # the region to process. In most cases this is exactly self.region, but if additional data outside + # the region is required, for example for correct interpolation or deaccumulation, this method can + # return a modified slice (e.g. `slice(self.region.start - 1, self.region.stop + 1)`). + # """ + # return self.region + + def generate_source_file_coords( + self, + processing_region_ds: xr.Dataset, + data_var_group: Sequence[DwdIconEuDataVar], + ) -> Sequence[DwdIconEuForecastSourceFileCoord]: + """Return a sequence of coords, one for each source file required to + process the data covered by processing_region_ds.""" + # return [ + # DwdIconEuForecastSourceFileCoord( + # init_time=init_time, + # lead_time=lead_time, + # ) + # for init_time, lead_time in itertools.product( + # processing_region_ds["init_time"].values, + # processing_region_ds["lead_time"].values, + # ) + # ] + raise NotImplementedError( + "Return a sequence of SourceFileCoord objects, one for each source file required to process the data covered by processing_region_ds." + ) + + def download_file(self, coord: DwdIconEuForecastSourceFileCoord) -> Path: + """Download the file for the given coordinate and return the local + path.""" + # return http_download_to_disk(coord.get_url(), self.dataset_id) + raise NotImplementedError( + "Download the file for the given coordinate and return the local path." + ) + + def read_data( + self, + coord: DwdIconEuForecastSourceFileCoord, + data_var: DwdIconEuDataVar, + ) -> ArrayFloat32: + """Read and return an array of data for the given variable and source + file coordinate.""" + # with rasterio.open(coord.downloaded_file_path) as reader: + # TODO: make a band index based on tag matching utility function + # matching_indexes = [ + # i + # for i in range(reader.count) + # if (tags := reader.tags(i))["GRIB_ELEMENT"] + # == data_var.internal_attrs.grib_element + # and tags["GRIB_COMMENT"] == data_var.internal_attrs.grib_comment + # ] + # assert len(matching_indexes) == 1, f"Expected exactly 1 matching band, found {matching_indexes}. {data_var.internal_attrs.grib_element=}, {data_var.internal_attrs.grib_description=}, {coord.downloaded_file_path=}" fmt: skip + # rasterio_band_index = 1 + matching_indexes[0] # rasterio is 1-indexed + # return reader.read(rasterio_band_index, dtype=np.float32) + raise NotImplementedError( + "Read and return data for the given variable and source file coordinate." + ) + + # Implement this to apply transformations to the array (e.g. deaccumulation) + # + # def apply_data_transformations( + # self, data_array: xr.DataArray, data_var: DwdIconEuDataVar + # ) -> None: + # """ + # Apply in-place data transformations to the output data array for a given data variable. + + # This method is called after reading all data for a variable into the shared-memory array, + # and before writing shards to the output store. The default implementation applies binary + # rounding to float32 arrays if `data_var.internal_attrs.keep_mantissa_bits` is set. + + # Subclasses may override this method to implement additional transformations such as + # deaccumulation, interpolation or other custom logic. All transformations should be + # performed in-place (don't copy `data_array`, it's large). + + # Parameters + # ---------- + # data_array : xr.DataArray + # The output data array to be transformed in-place. + # data_var : DwdIconEuDataVar + # The data variable metadata object, which may contain transformation parameters. + # """ + # super().apply_data_transformations(data_array, data_var) + + def update_template_with_results( + self, process_results: Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] + ) -> xr.Dataset: + """Update template dataset based on processing results. This method is + called during operational updates. + + Subclasses should implement this method to apply dataset-specific adjustments + based on the processing results. Examples include: + - Trimming dataset along append_dim to only include successfully processed data + - Loading existing coordinate values from final_store and updating them based on results + - Updating metadata based on what was actually processed vs what was planned + + The default implementation trims along append_dim to end at the most recent + successfully processed coordinate (timestamp). + + Parameters + ---------- + process_results : Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] + Mapping from variable names to their source file coordinates with final processing status. + + Returns + ------- + xr.Dataset + Updated template dataset reflecting the actual processing results. + """ + # The super() implementation looks like this: + # + # max_append_dim_processed = max( + # ( + # c.out_loc()[self.append_dim] # type: ignore[type-var] + # for c in chain.from_iterable(process_results.values()) + # if c.status == SourceFileStatus.Succeeded + # ), + # default=None, + # ) + # if max_append_dim_processed is None: + # # No data was processed, trim the template to stop before this job's region + # # This is using isel's exclusive slice end behavior + # return self.template_ds.isel( + # {self.append_dim: slice(None, self.region.start)} + # ) + # else: + # return self.template_ds.sel( + # {self.append_dim: slice(None, max_append_dim_processed)} + # ) + # + # If you like the above behavior, skip implementing this method. + # If you need to customize the behavior, implement this method. + + raise NotImplementedError( + "Subclasses implement update_template_with_results() with dataset-specific logic" + ) + + @classmethod + def operational_update_jobs( + cls, + final_store: zarr.abc.store.Store, + tmp_store: Path, + get_template_fn: Callable[[DatetimeLike], xr.Dataset], + append_dim: AppendDim, + all_data_vars: Sequence[DwdIconEuDataVar], + reformat_job_name: str, + ) -> tuple[ + Sequence["RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]"], + xr.Dataset, + ]: + """Return the sequence of RegionJob instances necessary to update the + dataset from its current state to include the latest available data. + + Also return the template_ds, expanded along append_dim through the end of + the data to process. The dataset returned here may extend beyond the + available data at the source, in which case `update_template_with_results` + will trim the dataset to the actual data processed. + + The exact logic is dataset-specific, but it generally follows this pattern: + 1. Figure out the range of time to process: append_dim_start (inclusive) and append_dim_end (exclusive) + a. Read existing data from final_store to determine what's already processed + b. Optionally identify recent incomplete/non-final data for reprocessing + 2. Call get_template_fn(append_dim_end) to get the template_ds + 3. Create RegionJob instances by calling cls.get_jobs(..., filter_start=append_dim_start) + + Parameters + ---------- + final_store : zarr.abc.store.Store + The destination Zarr store to read existing data from and write updates to. + tmp_store : zarr.abc.store.Store | Path + The temporary Zarr store to write into while processing. + get_template_fn : Callable[[DatetimeLike], xr.Dataset] + Function to get the template_ds for the operational update. + append_dim : AppendDim + The dimension along which data is appended (e.g., "time"). + all_data_vars : Sequence[DwdIconEuDataVar] + Sequence of all data variable configs for this dataset. + reformat_job_name : str + The name of the reformatting job, used for progress tracking. + This is often the name of the Kubernetes job, or "local". + + Returns + ------- + Sequence[RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]] + RegionJob instances that need processing for operational updates. + xr.Dataset + The template_ds for the operational update. + """ + # existing_ds = xr.open_zarr(final_store) + # append_dim_start = existing_ds[append_dim].max() + # append_dim_end = pd.Timestamp.now() + # template_ds = get_template_fn(append_dim_end) + + # jobs = cls.get_jobs( + # kind="operational-update", + # final_store=final_store, + # tmp_store=tmp_store, + # template_ds=template_ds, + # append_dim=append_dim, + # all_data_vars=all_data_vars, + # reformat_job_name=reformat_job_name, + # filter_start=append_dim_start, + # ) + # return jobs, template_ds + + raise NotImplementedError( + "Subclasses implement operational_update_jobs() with dataset-specific logic" + ) diff --git a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py new file mode 100644 index 00000000..ccc5a53c --- /dev/null +++ b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py @@ -0,0 +1,62 @@ +# from pathlib import Path + +# import numpy as np +# import pandas as pd +# import pytest +# import xarray as xr + +# from reformatters.common import validation +# from reformatters.dwd.icon_eu.forecast.dynamical_dataset import DwdIconEuForecastDataset + +# @pytest.mark.slow +# def test_backfill_local_and_operational_update(monkeypatch: pytest.MonkeyPatch) -> None: +# dataset = DwdIconEuForecastDataset() + +# # Local backfill reformat +# dataset.backfill_local(append_dim_end=pd.Timestamp("2000-01-02")) +# ds = xr.open_zarr(dataset._final_store(), chunks=None) +# assert ds.time.max() == pd.Timestamp("2000-01-01") + +# # Operational update +# monkeypatch.setattr( +# dataset.region_job_class, +# "_update_append_dim_end", +# lambda: pd.Timestamp("2000-01-03"), +# ) +# monkeypatch.setattr( +# dataset.region_job_class, +# "_update_append_dim_start", +# lambda existing_ds: pd.Timestamp(existing_ds.time.max().item()), +# ) + +# dataset.update("test-update") + +# # Check resulting dataset +# updated_ds = xr.open_zarr(dataset._final_store(), chunks=None) + +# np.testing.assert_array_equal( +# updated_ds.time, pd.date_range("1981-10-01", "1981-10-03") +# ) +# subset_ds = updated_ds.sel(latitude=48.583335, longitude=-94, method="nearest") +# np.testing.assert_array_equal( +# subset_ds["your_variable"].values, [190.0, 163.0, 135.0] +# ) + + +# def test_operational_kubernetes_resources( +# dataset: DwdIconEuForecastDataset, +# ) -> None: +# cron_jobs = dataset.operational_kubernetes_resources("test-image-tag") + +# assert len(cron_jobs) == 2 +# update_cron_job, validation_cron_job = cron_jobs +# assert update_cron_job.name == f"{dataset.dataset_id}-operational-update" +# assert validation_cron_job.name == f"{dataset.dataset_id}-validation" +# assert update_cron_job.secret_names == dataset.storage_config.k8s_secret_names +# assert validation_cron_job.secret_names == dataset.storage_config.k8s_secret_names + + +# def test_validators(dataset: DwdIconEuForecastDataset) -> None: +# validators = tuple(dataset.validators()) +# assert len(validators) == 2 +# assert all(isinstance(v, validation.DataValidator) for v in validators) diff --git a/tests/dwd/icon_eu/forecast/region_job_test.py b/tests/dwd/icon_eu/forecast/region_job_test.py new file mode 100644 index 00000000..0ab7d676 --- /dev/null +++ b/tests/dwd/icon_eu/forecast/region_job_test.py @@ -0,0 +1,37 @@ +# from unittest.mock import Mock + +# import pandas as pd + +# from reformatters.dwd.icon_eu.forecast.region_job import ( +# DwdIconEuForecastRegionJob, +# DwdIconEuForecastSourceFileCoord, +# ) +# from reformatters.dwd.icon_eu.forecast.template_config import DwdIconEuForecastTemplateConfig + +# def test_source_file_coord_get_url() -> None: +# coord = DwdIconEuForecastSourceFileCoord(time=pd.Timestamp("2000-01-01")) +# assert coord.get_url() == "https://example.com/data/2000-01-01.grib2" + + +# def test_region_job_generete_source_file_coords() -> None: +# template_config = DwdIconEuForecastTemplateConfig() +# template_ds = template_config.get_template(pd.Timestamp("2000-01-23")) + +# region_job = DwdIconEuForecastRegionJob( +# final_store=Mock(), +# tmp_store=Mock(), +# template_ds=template_ds, +# data_vars=[Mock(), Mock()], +# append_dim=template_config.append_dim, +# region=slice(0, 10), +# reformat_job_name="test", +# ) + +# processing_region_ds, output_region_ds = region_job._get_region_datasets() + +# source_file_coords = region_job.generate_source_file_coords( +# processing_region_ds, [Mock()] +# ) + +# assert len(source_file_coords) == ... +# assert ... From 6e7c81b3be91cfad5eff589810a361f110781020 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 18:27:53 +0100 Subject: [PATCH 30/35] Update pytest to 8.4.1. Doesn't fix the issue yet. --- pyproject.toml | 2 +- uv.lock | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3dde4da3..7a4fcb55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dev-dependencies = [ "pandas-stubs>=2.2.2.240909", "pre-commit>=3.8.0", "pyqt6>=6.7.1", - "pytest>=8.3.4", + "pytest>=8.4.1", "ruff==0.12.1", "types-requests>=2.32.0.20240914", ] diff --git a/uv.lock b/uv.lock index 12afa4b0..83a7f1e4 100644 --- a/uv.lock +++ b/uv.lock @@ -1174,17 +1174,18 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.4" +version = "8.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, + { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919, upload-time = "2024-12-01T12:54:25.98Z" } +sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083, upload-time = "2024-12-01T12:54:19.735Z" }, + { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] [[package]] @@ -1318,7 +1319,7 @@ dev = [ { name = "pandas-stubs", specifier = ">=2.2.2.240909" }, { name = "pre-commit", specifier = ">=3.8.0" }, { name = "pyqt6", specifier = ">=6.7.1" }, - { name = "pytest", specifier = ">=8.3.4" }, + { name = "pytest", specifier = ">=8.4.1" }, { name = "ruff", specifier = "==0.12.1" }, { name = "types-requests", specifier = ">=2.32.0.20240914" }, ] From 416748a8be61acf4b2bb4bf85779694645b8520b Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 18:31:31 +0100 Subject: [PATCH 31/35] Adding missing __init__.py files to test directories --- tests/dwd/__init__.py | 0 tests/dwd/icon_eu/__init__.py | 0 tests/noaa/gfs/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/dwd/__init__.py create mode 100644 tests/dwd/icon_eu/__init__.py create mode 100644 tests/noaa/gfs/__init__.py diff --git a/tests/dwd/__init__.py b/tests/dwd/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/dwd/icon_eu/__init__.py b/tests/dwd/icon_eu/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/noaa/gfs/__init__.py b/tests/noaa/gfs/__init__.py new file mode 100644 index 00000000..e69de29b From 1f79980d73feb11e9604de2eb98de517a3dbbc78 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 28 Jul 2025 18:56:53 +0100 Subject: [PATCH 32/35] Change latitude chunks to 219 so pydantic tests pass. And update comment about chunk sizes --- .../dwd/icon_eu/forecast/template_config.py | 4 +- .../zarr.json | 8 +- .../zarr.json | 10 +- .../zarr.json | 10 +- .../latest.zarr/expected_forecast_length/c/0 | Bin 0 -> 76 bytes .../expected_forecast_length/zarr.json | 52 +++ .../latest.zarr/high_cloud_cover/zarr.json | 8 +- .../latest.zarr/ingested_forecast_length/c/0 | Bin 80 -> 78 bytes .../ingested_forecast_length/zarr.json | 2 +- .../templates/latest.zarr/init_time/c/0 | Bin 79 -> 77 bytes .../templates/latest.zarr/init_time/zarr.json | 4 +- .../latest.zarr/low_cloud_cover/zarr.json | 8 +- .../latest.zarr/maximum_wind_10m/zarr.json | 8 +- .../latest.zarr/medium_cloud_cover/zarr.json | 8 +- .../zarr.json | 8 +- .../zarr.json | 8 +- .../zarr.json | 8 +- .../latest.zarr/snow_depth/zarr.json | 8 +- .../snow_depth_water_equivalent/zarr.json | 8 +- .../latest.zarr/soil_water_runoff/zarr.json | 8 +- .../latest.zarr/spatial_ref/zarr.json | 2 +- .../surface_water_runoff/zarr.json | 8 +- .../latest.zarr/temperature_2m/zarr.json | 8 +- .../latest.zarr/total_cloud_cover/zarr.json | 8 +- .../templates/latest.zarr/valid_time/c/0/0 | Bin 5220 -> 3986 bytes .../latest.zarr/valid_time/zarr.json | 4 +- .../templates/latest.zarr/wind_u_10/zarr.json | 8 +- .../{wind_v_10 => wind_v_10m}/zarr.json | 8 +- .../forecast/templates/latest.zarr/zarr.json | 390 ++++++++++-------- 29 files changed, 355 insertions(+), 251 deletions(-) create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/c/0 create mode 100644 src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/zarr.json rename src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/{total_precipitation => precipitation_surface}/zarr.json (92%) rename src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/{pressure_reduced_to_msl => pressure_reduced_to_mean_sea_level}/zarr.json (92%) rename src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/{relative_humidity => relative_humidity_2m}/zarr.json (92%) rename src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/{wind_v_10 => wind_v_10m}/zarr.json (92%) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index ee198f3b..6bfa9e06 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -282,11 +282,11 @@ def coords(self) -> Sequence[Coordinate]: @property def data_vars(self) -> Sequence[DwdIconEuDataVar]: """Define metadata and encoding for each data variable.""" - # Roughly 3.4MB uncompressed, 1.9MB compressed + # Roughly 4.5MB uncompressed, 2.5MB compressed var_chunks: dict[Dim, int] = { "init_time": 1, "lead_time": 93, - "latitude": 165, # 165 = 657 / 3.98... + "latitude": 219, # 219 = 657 / 3 "longitude": 153, # 153 = 1377 / 9 } # Roughly 337MB uncompressed, 67MB compressed diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json index a2aa9338..d9a5bf40 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/convective_available_potential_energy/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "J kg-1", "comment": "Convective available potential energy", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json index 91961be2..062b24d8 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_diffuse_short_wave_radiation_flux_surface/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -68,12 +68,12 @@ } ], "attributes": { - "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", + "long_name": "Downward diffusive short wave radiation flux at surface", "short_name": "aswdifd_s", "standard_name": "Mean surface diffuse short-wave radiation flux", "units": "W m-2", "step_type": "avg", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json index dfef84d9..04cf52f3 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/downward_direct_short_wave_radiation_flux_surface/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -68,12 +68,12 @@ } ], "attributes": { - "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", + "long_name": "Downward direct short wave radiation flux at surface", "short_name": "aswdir_s", "units": "W m-2", "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", "step_type": "avg", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/c/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/c/0 new file mode 100644 index 0000000000000000000000000000000000000000..ceddf3fae735b13b1a9369a8bc08bf2908648e16 GIT binary patch literal 76 zcmZQ#oXD84hKYfJfr-HfNJ;>)36$2@s{eNZNFtSiL7`zf8v`T5&;OPROd>#ekX%B- R>QDv-0S2IWw2o{7BLLZa4e9^@ literal 0 HcmV?d00001 diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/zarr.json new file mode 100644 index 00000000..129dc80c --- /dev/null +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/expected_forecast_length/zarr.json @@ -0,0 +1,52 @@ +{ + "shape": [ + 1 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 21900 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "0 days 00:00:00", + "max": "5 days 00:00:00" + }, + "units": "seconds" + }, + "dimension_names": [ + "init_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} \ No newline at end of file diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json index 09c9d227..4fd8a560 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/high_cloud_cover/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "%", "comment": "Cloud Cover (0 - 400 hPa). Different agencies use different short_names for this same parameter: ECMWF: HCC; WMO GRIB table: HCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/c/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/c/0 index 85d71c5a90ad594544454d42c929d70ce8a4b441..45c0da26fca9a778093796c0d0aac8a9ee15e82d 100644 GIT binary patch delta 44 ycmWIW<7Hx;$e6H(iGhKEiNS9oucm|ukg2g%|8GLV>QDv-0R{#}hG-qx1V#YA5eRVr delta 46 zcmeYZ;ALW*$k;G}nSp_Ui6LMjucnk3kg2g%|L=l^35*P(3=9Gc42%rb6D$uf0RYzI B3Bv#Y diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/zarr.json index 24a7ecc6..129dc80c 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/ingested_forecast_length/zarr.json @@ -7,7 +7,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200 + 21900 ] } }, diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/c/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/c/0 index 2747d344788b47f4a46175ee95d63742e69ae814..247632352163f6e8f7574ee1e16e5cb7eacaad15 100644 GIT binary patch delta 55 zcmebGkjh9CbeFEEJ!MK!kS|4m3(9m>EUzyK7F J){#wM1OQ>-466VD delta 57 zcmebE=VfA?$k;G}nSp_UiNSv&udM1r?l=ZUh9CbeFEEJ#MK!kS|6R~9fsrAUfkA)) LC{sPb@&FS6g;Wj< diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/zarr.json index 7ae6deb2..a5780852 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/init_time/zarr.json @@ -7,7 +7,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200 + 21900 ] } }, @@ -38,7 +38,7 @@ ], "attributes": { "statistics_approximate": { - "min": "2020-01-01T00:00:00", + "min": "2025-08-08T00:00:00", "max": "Present" }, "units": "seconds since 1970-01-01", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json index 7a4707dd..f02c8a2e 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/low_cloud_cover/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "%", "comment": "Cloud Cover (800 hPa - Soil). Different agencies use different short_names for this same parameter: ECMWF: LCC; WMO GRIB table: LCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json index 9cb3accf..5533ded4 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/maximum_wind_10m/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "m/s", "comment": "Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", "step_type": "max", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json index 86590ba4..52b5b6aa 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/medium_cloud_cover/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "%", "comment": "Cloud Cover (400 - 800 hPa). Different agencies use different short_names for this same parameter: ECMWF: MCC; WMO GRIB table: MCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/precipitation_surface/zarr.json similarity index 92% rename from src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json rename to src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/precipitation_surface/zarr.json index ec05e50d..619936dd 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_precipitation/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/precipitation_surface/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "kg m**-2", "comment": "Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_mean_sea_level/zarr.json similarity index 92% rename from src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json rename to src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_mean_sea_level/zarr.json index b4d14950..c0eab0e5 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_msl/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/pressure_reduced_to_mean_sea_level/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "Pa", "comment": "Surface pressure reduced to MSL", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity_2m/zarr.json similarity index 92% rename from src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json rename to src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity_2m/zarr.json index 821e6b2e..9fdc3a7c 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/relative_humidity_2m/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -74,7 +74,7 @@ "units": "%", "comment": "Relative humidity at 2m above ground. Other short_names used for this parameter: rh, 2r, r.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json index 8e1f9ea7..b227939e 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -74,7 +74,7 @@ "units": "m", "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json index 64d7afc5..4c6991ed 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/snow_depth_water_equivalent/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "kg m**-2", "comment": "Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json index e4dd3558..0a070f03 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/soil_water_runoff/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "kg m-2", "comment": "Soil water runoff (accumulated since model start)", "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json index 06abfd3d..aa70cba4 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/spatial_ref/zarr.json @@ -30,7 +30,7 @@ } ], "attributes": { - "comment": "From the WKT string output by gdalinfo.", + "comment": "A perfect sphere geographic CRS with a radius of 6,371,229m, extracted from grib.", "crs_wkt": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]", "semi_major_axis": 6371229.0, "semi_minor_axis": 6371229.0, diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json index 4870a60a..0ea8e4da 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/surface_water_runoff/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "kg m-2", "comment": "Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json index 6549ab24..53692173 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/temperature_2m/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -74,7 +74,7 @@ "units": "K", "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point. Different agencies use different short_names for this parameter: ECMWF: 2t; NOAA & DWD: t2m.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json index 557f0965..a789754e 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/total_cloud_cover/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -73,7 +73,7 @@ "units": "%", "comment": "Total cloud cover. Different agencies use different short_names for this same parameter: ECMWF: TCC; NOAA & WMO: TCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/c/0/0 b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/c/0/0 index 89875d76d03e2d3d5167f45ec2f4d4593b502578..5eb65ca3f7514afb7fd3206a8f64544272605a9a 100644 GIT binary patch literal 3986 zcmeI#+e=hY6vy#3CvU0QASM-NhmaDn97MxzGIWs&ktxhdtWIVjQZZCmbdfPcGNdLU zHB&i6hGfWelM0DJq(CAj$r@7RNFfoHQ%Hzq(^n|y59skcd|vi`TED&aO|e|D7_E~c zqDb9bkq|}XC*o&`*sv8Ps6Zp`;RQw^AtDQrfjpGrJetu3A3h-@R3s5u*p9=fhK?tA zjW37@6Ip__D1Z|eaT8DB$2UZWi!6m5g*bsrXh#o*@Ez6&krZsiUYtT5?%){=Od)Qz z$O>#mG0vhM9`xY@eqny3NIG(H2re|?0bb%Glqit|tb_w)s6s0qp&w%iixyF_8u>Vi z3%G%9yg>kwb3~Gmja@i~THJyc?=XRw7?EVGM-j^5#%=Us7?X%)jZ={W4W}`|QYJr4 zo7QKVU{ko1L`(M7QU=AEs}|Zy?d6Utt#h&HJ(ejTA}$YHHlyd|=3**%tqA-8-mO#3Da}tGtRiV`gCXfw>pV^T0e4&3i%2yHPPS;B9F3 dftkU~AHnQ{|Byl4pWlaOwEWF;FmaJ)`3)Cxtfc?| literal 5220 zcmeI$|7%Tg9LMqZy=I$zJ-cQxOP3a-wcUhRtQ%Sjt!r&-G~6^_TIe=2W84m{g~YWM z5;|e)hSoJ-Lc?i^wS2iPW{uYMC84oPYi2grQ=emXxqemUo-&-?v;eLm-W%%&qI z)xFdtVwTcG5wBU~%qWo-bRvkTFp*SbqZs8lh(_q>MnB?4i=<&8N>B+8&f_k6FbGSy zNIG)h#7@+s32k_VVI)O}*suhpaN{_d@c?fjks`^Mg*9J2v^SFy148k&&{v!uY>_k19 z(1uqSMiTRA!x9Y8>a?qwBigS)EsT(g706*L^{)tQ2yT-arOv9^n(Qr(c4wFCt=jJ~$7_mjlAi9*3giZhWSz3r zy2p0de#&vddBb&Ied>9wefEX)Sj9gvFkLCIyIo$b!#^z35^T$z)t(0Zaqycp)#Aug zt5$if=G`9y&n&qw`$D16_HiS(b~G2Kq<19ls7uc1JCdLD&&pY^h)KSO=ILhRjZp(* z4vZ`q`+>118s~x(8Z|I#@ayZ)m;<8*#vJ_qQ!=t(WWmUSk%d1uC;wqTP~E1#nX7hl GPw^Aay}hmg diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json index e73a7866..77932a03 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/valid_time/zarr.json @@ -8,7 +8,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200, + 21900, 93 ] } @@ -40,7 +40,7 @@ ], "attributes": { "statistics_approximate": { - "min": "2020-01-01T00:00:00", + "min": "2025-08-08T00:00:00", "max": "Present + 5 days" }, "units": "seconds since 1970-01-01", diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json index 64c8303d..7be962a3 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -74,7 +74,7 @@ "units": "m/s", "comment": "Zonal wind at 10m above ground", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10m/zarr.json similarity index 92% rename from src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json rename to src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10m/zarr.json index e2251ed6..e98873b8 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_v_10m/zarr.json @@ -11,7 +11,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -30,8 +30,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -74,7 +74,7 @@ "units": "m/s", "comment": "Meridional wind at 10m above ground", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json index 01ffbec2..27257963 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json @@ -1,13 +1,13 @@ { "attributes": { - "dataset_id": "dwd-icon_eu-forecast", + "dataset_id": "dwd-icon-eu-forecast", "dataset_version": "0.1.0", "name": "DWD ICON-EU Forecast", "description": "High-resolution weather forecasts for Europe from the ICON-EU model operated by Deutscher Wetterdienst (DWD).", "attribution": "DWD ICON-EU data processed by dynamical.org from DWD.", "spatial_domain": "Europe", "spatial_resolution": "0.0625 degrees (~7km)", - "time_domain": "Forecasts initialized 2020-01-01 00:00:00 UTC to Present", + "time_domain": "Forecasts initialized 2025-08-08 00:00:00 UTC to Present", "time_resolution": "Forecasts initialized every 6 hours", "forecast_domain": "Forecast lead time 0-120 hours (0-5 days) ahead", "forecast_resolution": "Forecast step 0-78 hours: hourly, 81-120 hours: 3 hourly" @@ -30,7 +30,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -49,8 +49,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -92,7 +92,7 @@ "units": "J kg-1", "comment": "Convective available potential energy", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -118,7 +118,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -137,8 +137,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -175,12 +175,12 @@ } ], "attributes": { - "long_name": "Downward diffusive short wave radiation flux at surface (mean over forecast time)", + "long_name": "Downward diffusive short wave radiation flux at surface", "short_name": "aswdifd_s", "standard_name": "Mean surface diffuse short-wave radiation flux", "units": "W m-2", "step_type": "avg", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -206,7 +206,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -225,8 +225,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -263,12 +263,12 @@ } ], "attributes": { - "long_name": "Downward direct short wave radiation flux at surface (mean over forecast time)", + "long_name": "Downward direct short wave radiation flux at surface", "short_name": "aswdir_s", "units": "W m-2", "comment": "Downward solar direct radiation flux at the surface, averaged over forecast time. This quantity is not directly provided by the radiation scheme. It is aposteriori diagnosed from the definition of the surface net shortwave radiation flux.", "step_type": "avg", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -281,6 +281,58 @@ "node_type": "array", "storage_transformers": [] }, + "expected_forecast_length": { + "shape": [ + 1 + ], + "data_type": "int64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 21900 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 8, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "attributes": { + "statistics_approximate": { + "min": "0 days 00:00:00", + "max": "5 days 00:00:00" + }, + "units": "seconds" + }, + "dimension_names": [ + "init_time" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, "high_cloud_cover": { "shape": [ 1, @@ -294,7 +346,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -313,8 +365,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -356,7 +408,7 @@ "units": "%", "comment": "Cloud Cover (0 - 400 hPa). Different agencies use different short_names for this same parameter: ECMWF: HCC; WMO GRIB table: HCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -378,7 +430,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200 + 21900 ] } }, @@ -430,7 +482,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200 + 21900 ] } }, @@ -461,7 +513,7 @@ ], "attributes": { "statistics_approximate": { - "min": "2020-01-01T00:00:00", + "min": "2025-08-08T00:00:00", "max": "Present" }, "units": "seconds since 1970-01-01", @@ -645,7 +697,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -664,8 +716,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -707,7 +759,7 @@ "units": "%", "comment": "Cloud Cover (800 hPa - Soil). Different agencies use different short_names for this same parameter: ECMWF: LCC; WMO GRIB table: LCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -733,7 +785,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -752,8 +804,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -795,7 +847,7 @@ "units": "m/s", "comment": "Maximum wind gust at 10 m above ground. It is diagnosed from the turbulence state in the atmospheric boundary layer, including a potential enhancement by the SSO parameterization over mountainous terrain. In the presence of deep convection, it contains an additional contribution due to convective gusts.", "step_type": "max", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -821,7 +873,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -840,8 +892,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -883,7 +935,95 @@ "units": "%", "comment": "Cloud Cover (400 - 800 hPa). Different agencies use different short_names for this same parameter: ECMWF: MCC; WMO GRIB table: MCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", + "_FillValue": "AAAAAAAA+H8=" + }, + "dimension_names": [ + "init_time", + "lead_time", + "latitude", + "longitude" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "precipitation_surface": { + "shape": [ + 1, + 93, + 657, + 1377 + ], + "data_type": "float32", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 1, + 93, + 657, + 1377 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "sharding_indexed", + "configuration": { + "chunk_shape": [ + 1, + 93, + 219, + 153 + ], + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "blosc", + "configuration": { + "typesize": 4, + "cname": "zstd", + "clevel": 3, + "shuffle": "shuffle", + "blocksize": 0 + } + } + ], + "index_codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "crc32c" + } + ], + "index_location": "end" + } + } + ], + "attributes": { + "long_name": "Total Precipitation", + "short_name": "tp", + "units": "kg m**-2", + "comment": "Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", + "step_type": "accum", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -896,7 +1036,7 @@ "node_type": "array", "storage_transformers": [] }, - "pressure_reduced_to_msl": { + "pressure_reduced_to_mean_sea_level": { "shape": [ 1, 93, @@ -909,7 +1049,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -928,8 +1068,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -971,7 +1111,7 @@ "units": "Pa", "comment": "Surface pressure reduced to MSL", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -984,7 +1124,7 @@ "node_type": "array", "storage_transformers": [] }, - "relative_humidity": { + "relative_humidity_2m": { "shape": [ 1, 93, @@ -997,7 +1137,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1016,8 +1156,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1060,7 +1200,7 @@ "units": "%", "comment": "Relative humidity at 2m above ground. Other short_names used for this parameter: rh, 2r, r.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1086,7 +1226,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1105,8 +1245,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1149,7 +1289,7 @@ "units": "m", "comment": "Snow depth in m. It is diagnosed from RHO_SNOW and W_SNOW according to H_SNOW = W_SNOW / RHO_SNOW and is limited to H_SNOW <= 40 m.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1175,7 +1315,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1194,8 +1334,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1237,7 +1377,7 @@ "units": "kg m**-2", "comment": "Snow depth water equivalent in kg/m2. Set to 0 above water surfaces and snow-free land points.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1263,7 +1403,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1282,8 +1422,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1325,7 +1465,7 @@ "units": "kg m-2", "comment": "Soil water runoff (accumulated since model start)", "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1370,7 +1510,7 @@ } ], "attributes": { - "comment": "From the WKT string output by gdalinfo.", + "comment": "A perfect sphere geographic CRS with a radius of 6,371,229m, extracted from grib.", "crs_wkt": "GEOGCS[\"Coordinate System imported from GRIB file\",DATUM[\"unnamed\",SPHEROID[\"Sphere\",6371229,0]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]", "semi_major_axis": 6371229.0, "semi_minor_axis": 6371229.0, @@ -1400,7 +1540,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1419,8 +1559,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1462,7 +1602,7 @@ "units": "kg m-2", "comment": "Surface water runoff from interception and snow reservoir and from limited infiltration rate. Sum over forecast.", "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1488,7 +1628,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1507,8 +1647,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1551,7 +1691,7 @@ "units": "K", "comment": "Temperature at 2m above ground, averaged over all tiles of a grid point. Different agencies use different short_names for this parameter: ECMWF: 2t; NOAA & DWD: t2m.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1577,7 +1717,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1596,8 +1736,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1639,95 +1779,7 @@ "units": "%", "comment": "Total cloud cover. Different agencies use different short_names for this same parameter: ECMWF: TCC; NOAA & WMO: TCDC.", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", - "_FillValue": "AAAAAAAA+H8=" - }, - "dimension_names": [ - "init_time", - "lead_time", - "latitude", - "longitude" - ], - "zarr_format": 3, - "node_type": "array", - "storage_transformers": [] - }, - "total_precipitation": { - "shape": [ - 1, - 93, - 657, - 1377 - ], - "data_type": "float32", - "chunk_grid": { - "name": "regular", - "configuration": { - "chunk_shape": [ - 1, - 120, - 657, - 1377 - ] - } - }, - "chunk_key_encoding": { - "name": "default", - "configuration": { - "separator": "/" - } - }, - "fill_value": 0.0, - "codecs": [ - { - "name": "sharding_indexed", - "configuration": { - "chunk_shape": [ - 1, - 120, - 73, - 153 - ], - "codecs": [ - { - "name": "bytes", - "configuration": { - "endian": "little" - } - }, - { - "name": "blosc", - "configuration": { - "typesize": 4, - "cname": "zstd", - "clevel": 3, - "shuffle": "shuffle", - "blocksize": 0 - } - } - ], - "index_codecs": [ - { - "name": "bytes", - "configuration": { - "endian": "little" - } - }, - { - "name": "crc32c" - } - ], - "index_location": "end" - } - } - ], - "attributes": { - "long_name": "Total Precipitation", - "short_name": "tp", - "units": "kg m**-2", - "comment": "Total precipitation accumulated since model start. TOT_PREC = RAIN_GSP + SNOW_GSP + RAIN_CON + SNOW_CON.", - "step_type": "accum", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1750,7 +1802,7 @@ "name": "regular", "configuration": { "chunk_shape": [ - 29200, + 21900, 93 ] } @@ -1782,7 +1834,7 @@ ], "attributes": { "statistics_approximate": { - "min": "2020-01-01T00:00:00", + "min": "2025-08-08T00:00:00", "max": "Present + 5 days" }, "units": "seconds since 1970-01-01", @@ -1809,7 +1861,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1828,8 +1880,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1872,7 +1924,7 @@ "units": "m/s", "comment": "Zonal wind at 10m above ground", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ @@ -1885,7 +1937,7 @@ "node_type": "array", "storage_transformers": [] }, - "wind_v_10": { + "wind_v_10m": { "shape": [ 1, 93, @@ -1898,7 +1950,7 @@ "configuration": { "chunk_shape": [ 1, - 120, + 93, 657, 1377 ] @@ -1917,8 +1969,8 @@ "configuration": { "chunk_shape": [ 1, - 120, - 73, + 93, + 219, 153 ], "codecs": [ @@ -1961,7 +2013,7 @@ "units": "m/s", "comment": "Meridional wind at 10m above ground", "step_type": "instant", - "coordinates": "ingested_forecast_length spatial_ref valid_time", + "coordinates": "expected_forecast_length ingested_forecast_length spatial_ref valid_time", "_FillValue": "AAAAAAAA+H8=" }, "dimension_names": [ From a09e0e666ec64e65547cff48fd6ca8db27060989 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 4 Aug 2025 18:24:49 +0100 Subject: [PATCH 33/35] Change `wind_u_10` to `wind_u_10m` Co-authored-by: Alden Keefe Sampson --- src/reformatters/dwd/icon_eu/forecast/template_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/template_config.py b/src/reformatters/dwd/icon_eu/forecast/template_config.py index 6bfa9e06..7d833f16 100644 --- a/src/reformatters/dwd/icon_eu/forecast/template_config.py +++ b/src/reformatters/dwd/icon_eu/forecast/template_config.py @@ -549,7 +549,7 @@ def data_vars(self) -> Sequence[DwdIconEuDataVar]: ), ), DwdIconEuDataVar( - name="wind_u_10", + name="wind_u_10m", encoding=encoding_float32_default, attrs=DataVarAttrs( short_name="u10", From 887c8f48cddb9de72d942728a5091dbcd113117a Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 4 Aug 2025 18:37:25 +0100 Subject: [PATCH 34/35] Update Zarr metadata. Tests pass. --- .../templates/latest.zarr/{wind_u_10 => wind_u_10m}/zarr.json | 0 .../dwd/icon_eu/forecast/templates/latest.zarr/zarr.json | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/{wind_u_10 => wind_u_10m}/zarr.json (100%) diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10m/zarr.json similarity index 100% rename from src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10/zarr.json rename to src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/wind_u_10m/zarr.json diff --git a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json index 27257963..f39ca724 100644 --- a/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json +++ b/src/reformatters/dwd/icon_eu/forecast/templates/latest.zarr/zarr.json @@ -1848,7 +1848,7 @@ "node_type": "array", "storage_transformers": [] }, - "wind_u_10": { + "wind_u_10m": { "shape": [ 1, 93, From 7f513adac1cc084ef307a1ee2805d39a64800244 Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Mon, 4 Aug 2025 20:46:56 +0100 Subject: [PATCH 35/35] Re-run initialize-new-integration after merging with main branch --- .../dwd/icon_eu/forecast/region_job.py | 48 +++++++++---------- .../forecast/dynamical_dataset_test.py | 4 +- tests/dwd/icon_eu/forecast/region_job_test.py | 2 +- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/src/reformatters/dwd/icon_eu/forecast/region_job.py b/src/reformatters/dwd/icon_eu/forecast/region_job.py index dda6143d..4e5de6b1 100644 --- a/src/reformatters/dwd/icon_eu/forecast/region_job.py +++ b/src/reformatters/dwd/icon_eu/forecast/region_job.py @@ -2,7 +2,6 @@ from pathlib import Path import xarray as xr -import zarr from reformatters.common.logging import get_logger from reformatters.common.region_job import ( @@ -10,6 +9,7 @@ RegionJob, SourceFileCoord, ) +from reformatters.common.storage import StoreFactory from reformatters.common.types import ( AppendDim, ArrayFloat32, @@ -31,11 +31,10 @@ def get_url(self) -> str: def out_loc( self, ) -> Mapping[Dim, CoordinateValueOrRange]: - """Returns a data array indexer which identifies the region in the - output dataset to write the data from the source file. - - The indexer is a dict from dimension names to coordinate values - or slices. + """ + Returns a data array indexer which identifies the region in the output dataset + to write the data from the source file. The indexer is a dict from dimension + names to coordinate values or slices. """ # If the names of the coordinate attributes of your SourceFileCoord subclass are also all # dimension names in the output dataset (e.g. init_time and lead_time), @@ -93,8 +92,7 @@ def generate_source_file_coords( processing_region_ds: xr.Dataset, data_var_group: Sequence[DwdIconEuDataVar], ) -> Sequence[DwdIconEuForecastSourceFileCoord]: - """Return a sequence of coords, one for each source file required to - process the data covered by processing_region_ds.""" + """Return a sequence of coords, one for each source file required to process the data covered by processing_region_ds.""" # return [ # DwdIconEuForecastSourceFileCoord( # init_time=init_time, @@ -110,8 +108,7 @@ def generate_source_file_coords( ) def download_file(self, coord: DwdIconEuForecastSourceFileCoord) -> Path: - """Download the file for the given coordinate and return the local - path.""" + """Download the file for the given coordinate and return the local path.""" # return http_download_to_disk(coord.get_url(), self.dataset_id) raise NotImplementedError( "Download the file for the given coordinate and return the local path." @@ -122,8 +119,7 @@ def read_data( coord: DwdIconEuForecastSourceFileCoord, data_var: DwdIconEuDataVar, ) -> ArrayFloat32: - """Read and return an array of data for the given variable and source - file coordinate.""" + """Read and return an array of data for the given variable and source file coordinate.""" # with rasterio.open(coord.downloaded_file_path) as reader: # TODO: make a band index based on tag matching utility function # matching_indexes = [ @@ -133,7 +129,7 @@ def read_data( # == data_var.internal_attrs.grib_element # and tags["GRIB_COMMENT"] == data_var.internal_attrs.grib_comment # ] - # assert len(matching_indexes) == 1, f"Expected exactly 1 matching band, found {matching_indexes}. {data_var.internal_attrs.grib_element=}, {data_var.internal_attrs.grib_description=}, {coord.downloaded_file_path=}" fmt: skip + # assert len(matching_indexes) == 1, f"Expected exactly 1 matching band, found {matching_indexes}. {data_var.internal_attrs.grib_element=}, {data_var.internal_attrs.grib_description=}, {coord.downloaded_file_path=}" # rasterio_band_index = 1 + matching_indexes[0] # rasterio is 1-indexed # return reader.read(rasterio_band_index, dtype=np.float32) raise NotImplementedError( @@ -168,13 +164,14 @@ def read_data( def update_template_with_results( self, process_results: Mapping[str, Sequence[DwdIconEuForecastSourceFileCoord]] ) -> xr.Dataset: - """Update template dataset based on processing results. This method is - called during operational updates. + """ + Update template dataset based on processing results. This method is called + during operational updates. Subclasses should implement this method to apply dataset-specific adjustments based on the processing results. Examples include: - Trimming dataset along append_dim to only include successfully processed data - - Loading existing coordinate values from final_store and updating them based on results + - Loading existing coordinate values from the primary store and updating them based on results - Updating metadata based on what was actually processed vs what was planned The default implementation trims along append_dim to end at the most recent @@ -221,7 +218,7 @@ def update_template_with_results( @classmethod def operational_update_jobs( cls, - final_store: zarr.abc.store.Store, + primary_store_factory: StoreFactory, tmp_store: Path, get_template_fn: Callable[[DatetimeLike], xr.Dataset], append_dim: AppendDim, @@ -231,8 +228,9 @@ def operational_update_jobs( Sequence["RegionJob[DwdIconEuDataVar, DwdIconEuForecastSourceFileCoord]"], xr.Dataset, ]: - """Return the sequence of RegionJob instances necessary to update the - dataset from its current state to include the latest available data. + """ + Return the sequence of RegionJob instances necessary to update the dataset + from its current state to include the latest available data. Also return the template_ds, expanded along append_dim through the end of the data to process. The dataset returned here may extend beyond the @@ -241,16 +239,16 @@ def operational_update_jobs( The exact logic is dataset-specific, but it generally follows this pattern: 1. Figure out the range of time to process: append_dim_start (inclusive) and append_dim_end (exclusive) - a. Read existing data from final_store to determine what's already processed + a. Read existing data from the primary store to determine what's already processed b. Optionally identify recent incomplete/non-final data for reprocessing 2. Call get_template_fn(append_dim_end) to get the template_ds 3. Create RegionJob instances by calling cls.get_jobs(..., filter_start=append_dim_start) Parameters ---------- - final_store : zarr.abc.store.Store - The destination Zarr store to read existing data from and write updates to. - tmp_store : zarr.abc.store.Store | Path + primary_store_factory : StoreFactory + The factory to get the primary store to read existing data from and write updates to. + tmp_store : Path The temporary Zarr store to write into while processing. get_template_fn : Callable[[DatetimeLike], xr.Dataset] Function to get the template_ds for the operational update. @@ -269,14 +267,14 @@ def operational_update_jobs( xr.Dataset The template_ds for the operational update. """ - # existing_ds = xr.open_zarr(final_store) + # existing_ds = xr.open_zarr(primary_store_factory.store()) # append_dim_start = existing_ds[append_dim].max() # append_dim_end = pd.Timestamp.now() # template_ds = get_template_fn(append_dim_end) # jobs = cls.get_jobs( # kind="operational-update", - # final_store=final_store, + # primary_store_factory=primary_store_factory, # tmp_store=tmp_store, # template_ds=template_ds, # append_dim=append_dim, diff --git a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py index ccc5a53c..8be542d5 100644 --- a/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py +++ b/tests/dwd/icon_eu/forecast/dynamical_dataset_test.py @@ -14,7 +14,7 @@ # # Local backfill reformat # dataset.backfill_local(append_dim_end=pd.Timestamp("2000-01-02")) -# ds = xr.open_zarr(dataset._final_store(), chunks=None) +# ds = xr.open_zarr(dataset.primary_store_factory.store(), chunks=None) # assert ds.time.max() == pd.Timestamp("2000-01-01") # # Operational update @@ -32,7 +32,7 @@ # dataset.update("test-update") # # Check resulting dataset -# updated_ds = xr.open_zarr(dataset._final_store(), chunks=None) +# updated_ds = xr.open_zarr(dataset.primary_store_factory.store(), chunks=None) # np.testing.assert_array_equal( # updated_ds.time, pd.date_range("1981-10-01", "1981-10-03") diff --git a/tests/dwd/icon_eu/forecast/region_job_test.py b/tests/dwd/icon_eu/forecast/region_job_test.py index 0ab7d676..4c30df08 100644 --- a/tests/dwd/icon_eu/forecast/region_job_test.py +++ b/tests/dwd/icon_eu/forecast/region_job_test.py @@ -18,7 +18,7 @@ # template_ds = template_config.get_template(pd.Timestamp("2000-01-23")) # region_job = DwdIconEuForecastRegionJob( -# final_store=Mock(), +# primary_store_factory=Mock(), # tmp_store=Mock(), # template_ds=template_ds, # data_vars=[Mock(), Mock()],