From 6d5b68d26d9329296f6f8df2d0e9c5f8aa6e4056 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 11 Mar 2025 10:05:42 +0000 Subject: [PATCH 01/13] add Partition class --- esmf_regrid/partition.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 esmf_regrid/partition.py diff --git a/esmf_regrid/partition.py b/esmf_regrid/partition.py new file mode 100644 index 00000000..4e385d36 --- /dev/null +++ b/esmf_regrid/partition.py @@ -0,0 +1,8 @@ +"""Provides an interface for splitting up a large regridding task.""" + + + +class Partition: + def __init__(self, src, tgt, scheme, **kwargs): + + pass \ No newline at end of file From fc896984dd3a1bad5a26bd282784b857ccacebfc Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 24 Mar 2025 18:16:27 +0000 Subject: [PATCH 02/13] partial work --- esmf_regrid/{ => experimental}/partition.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename esmf_regrid/{ => experimental}/partition.py (100%) diff --git a/esmf_regrid/partition.py b/esmf_regrid/experimental/partition.py similarity index 100% rename from esmf_regrid/partition.py rename to esmf_regrid/experimental/partition.py From 67331e9657c67caafa1c1e6d9f4c6963372ff930 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 9 Apr 2025 14:24:13 +0100 Subject: [PATCH 03/13] partial work --- esmf_regrid/experimental/partition.py | 28 ++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/esmf_regrid/experimental/partition.py b/esmf_regrid/experimental/partition.py index 4e385d36..87a23805 100644 --- a/esmf_regrid/experimental/partition.py +++ b/esmf_regrid/experimental/partition.py @@ -1,8 +1,34 @@ """Provides an interface for splitting up a large regridding task.""" +from esmf_regrid.schemes import _ESMFRegridder + +class PartialRegridder(_ESMFRegridder): + def __init__(self): + pass + + ## keep track of source indices and target indices + ## share reference to full source and target class Partition: - def __init__(self, src, tgt, scheme, **kwargs): + ## hold a list of files + ## hold a collection of source indices + ## note which indices are fully loaded + def __init__(self, src, tgt, scheme, file_names, **kwargs): + self.src = src + self.tgt = tgt + self.scheme = scheme + self.file_names = file_names + + self.saved_files = [] + + @property + def unsaved_files(self): + return list(set(self.file_names) - set(self.saved_files)) + + def generate_files(self, files_to_generate=None): + pass + + def apply_regridders(self): pass \ No newline at end of file From 596e69d76176ad5c6a4d300789af209f3c0fa8c1 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 15 Apr 2025 15:57:11 +0100 Subject: [PATCH 04/13] partial work --- src/esmf_regrid/experimental/partition.py | 203 +++++++++++++++++++++- src/esmf_regrid/schemes.py | 47 ++--- 2 files changed, 223 insertions(+), 27 deletions(-) diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 87a23805..453f7c5e 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -1,34 +1,223 @@ """Provides an interface for splitting up a large regridding task.""" +import numpy as np +from scipy import sparse +from esmf_regrid.experimental.io import load_regridder, save_regridder from esmf_regrid.schemes import _ESMFRegridder class PartialRegridder(_ESMFRegridder): - def __init__(self): - pass + def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): + self.src_slice = src_slice + self.tgt_slice = tgt_slice + self.scheme = scheme + + super().__init__( + src, + tgt, + scheme.method, + precomputed_weights=weights, + **kwargs, + ) + + def __call__(self, cube): + result = super().__call__(cube) + + # set everything outside the extent to 0 + inverse_slice = np.ones_like(result.data, dtype=bool) + inverse_slice[self.tgt_slice] = 0 + # TODO: make sure this works with lazy data + dims = self._get_cube_dims(cube) + slice_slice = [np.newaxis] * result.ndim + for dim in dims: + slice_slice[dim] = np.s_[:] + broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) + result.data[broadcast_slice] = 0 + return result ## keep track of source indices and target indices ## share reference to full source and target + ## works the same except everything out of bounds is 0 rather than masked + class Partition: ## hold a list of files ## hold a collection of source indices + ## alternately hold a collection of chunk indices ## note which indices are fully loaded - def __init__(self, src, tgt, scheme, file_names, **kwargs): + def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, auto_generate=False): self.src = src self.tgt = tgt self.scheme = scheme self.file_names = file_names + # TODO: consider deriving this from self.src.lazy_data() + self.src_chunks = src_chunks + assert len(src_chunks) == len(file_names) + self.tgt_chunks = tgt_chunks + assert tgt_chunks == None # We don't handle big targets currently + + # Note: this may need to become more sophisticated when both src and tgt are large + self.file_chunk_dict = {file: chunk for file, chunk in zip(self.file_names, self.src_chunks)} + + self.neighbouring_files = self._find_neighbours() self.saved_files = [] + self.partially_saved_files = [] + # self._src_slice_indices = [] + if auto_generate: + self.generate_files(self.file_names) @property def unsaved_files(self): - return list(set(self.file_names) - set(self.saved_files)) + return list(set(self.file_names) - set(self.saved_files) - set(self.partially_saved_files)) def generate_files(self, files_to_generate=None): - pass + if files_to_generate is None: + # TODO: consider adding logic to order the files more efficiently. + files = self.partially_saved_files + self.unsaved_files + else: + assert isinstance(files_to_generate, int) + files = (self.partially_saved_files + self.unsaved_files)[:files_to_generate] + + # Do this to ensure the last regridder is saved + files.append(None) + + previous_regridders = [] + previous_files = [] + for file in files: + if file in self.partially_saved_files: + next_regridder = load_regridder(file) + elif file is None: + next_regridder = None + else: + src = self.file_chunk_dict[file] + tgt = self.tgt + next_regridder = self.scheme.regridder(src, tgt) + previous_regridders, next_regridder = self._combine_regridders(previous_regridders, previous_files, next_regridder) + if previous_regridders: + for regridder, pre_file in zip(previous_regridders, previous_files): + neighbours = self.neighbouring_files[pre_file] + file_complete = True + for neighbour in neighbours: + if neighbour in self.unsaved_files: + file_complete = False + save_regridder(regridder, pre_file) + if file_complete: + self.saved_files.append(pre_file) + # self._src_slice_indices.append(regridder.src_slice) + else: + self.partially_saved_files.append(pre_file) + + # This will need to be more sophisticated for more complex cases + previous_regridders = [next_regridder] + previous_files = [file] + + + def apply_regridders(self, cube): + # for each target chunk, iterate through each associated regridder + # for now, assume one target chunk + assert len(self.unsaved_files) == 0 + current_result = self.tgt.copy() + for file in self.saved_files: + next_regridder = load_regridder(file) + cube_slice = next_regridder.src_slice + next_result = next_regridder(cube[cube_slice]) + current_result = self._combine_results(current_result, next_result) + return current_result + + + # def _src_slices(self): + # for indices in self._src_slice_indices: + # yield self.src[indices] + + def _find_neighbours(self): + # for the simplest case, neighbours will be next to each other in the list + files = self.file_names + neighbours = {file_1: (file_0, file_2) for file_0, file_1, file_2 in zip(files[:-2], files[1:-1], files[2:])} + neighbours.update({files[0]: (files[1]), files[-1]: (files[-2])}) + return neighbours + + # def _determine_mask(self): + # assert len(self.unsaved_files) == 0 + # # TODO: firgure out a way to do this when the target is big + # tgt_mask = np.ma.zeros_like(self.tgt.data) + # # TODO: calculate this mask + # return tgt_mask + + def _combine_regridders(self, existing, pre_files, current_regridder): + # For now, combine 2, in future, more regridders may be combined + if len(existing) == 0 or current_regridder is None: + # TODO: turn these into partial regridders. + return existing, current_regridder + else: + (previous_regridder,) = existing + current_range = current_regridder.regridder.weight_matrix.max(axis=1).nonzero()[0] + previous_range = previous_regridder.regridder.weight_matrix.max(axis=1).nonzero()[0] + mutual_overlaps = np.intersect1d(current_range, previous_range) + + if mutual_overlaps.shape != (0,): + + # Calculate a slice of the current chunk which contains all the overlapping source cells. + overlaps_next = current_regridder.regridder.weight_matrix[mutual_overlaps].nonzero()[1] + h_len = current_regridder._src["grid_x"].shape[0] + v_len = current_regridder._src["grid_y"].shape[-1] + # TODO: make this more rigorous + tgt_size = self.tgt.size + buffer = (overlaps_next % h_len).max() + 1 + + # Add this slice to the previous chunk. + # TODO: make this refer to slices + # new_src_cube = iris.cube.CubeList([previous_cube, cube[:buffer]]).concatenate_cube() + # new_cubes.append(new_src_cube) + pre_file, = pre_files + src_slice = self.file_chunk_dict[pre_file] + # assumes slice has form [[x_start, x_stop], [y_start, y_stop]] + src_slice[0][1] += buffer + # TODO: make this work + new_src_cube = self.src[src_slice] + tgt_slice = None + + # Create weights for new regridder + previous_wm = previous_regridder.regridder.weight_matrix + current_wm = current_regridder.regridder.weight_matrix + right_wm = sparse.csr_array((tgt_size, buffer * v_len)) + buffer_inds = sum(np.meshgrid(np.arange(buffer), np.arange(v_len) * h_len)).flatten() + right_wm[mutual_overlaps] = current_wm[mutual_overlaps][:, buffer_inds] + new_weight_matrix = _combine_sparse(previous_wm, right_wm, v_len, h_len, buffer, tgt_size) + new_weight_matrix = sparse.csr_matrix( + new_weight_matrix) # must be matrix for ier (likely to change in future) + + # Remove weights from current regridder which have been added to previous regridder. + current_regridder.regridder.weight_matrix[mutual_overlaps] = 0 + + # Construct replacement for previous regridder with new weights and source cube. + # previous_regridder = ESMFAreaWeightedRegridder(new_src_cube, tgt_mesh, + # precomputed_weights=new_weight_matrix) + # previous_regridder.regridder.esmf_version = 0 # Must be set to allow regridder to load/save + previous_regridder = PartialRegridder(new_src_cube, self.tgt, src_slice, tgt_slice, new_weight_matrix, self.scheme) + else: + current_regridder = PartialRegridder() + # add combine code here + pass + + def _combine_results(self, existing_results, next_result): + # iterate through for each target chunk + # for now, assume one target chunk + return existing_results + next_result + + +def _combine_sparse(left, right, w, a, b, t): + result = sparse.csr_array((t, w * (a + b))) + src_indices_left = (np.arange(a)[np.newaxis, :] + ((a + b) * np.arange(w)[:, np.newaxis])).flatten() + left_im = sparse.csr_array((np.ones(a * w), (np.arange(a * w), src_indices_left)), shape=(a * w, w * (a + b))) + src_indices_right = (np.arange(b)[np.newaxis, :] + a + ((a + b) * np.arange(w)[:, np.newaxis])).flatten() + right_im = sparse.csr_array((np.ones(b * w), (np.arange(b * w), src_indices_right)), shape=(b * w, w * (a + b))) + + result_add_left = left @ left_im + result += result_add_left + + result_add_right = right @ right_im + result += result_add_right + return result - def apply_regridders(self): - pass \ No newline at end of file diff --git a/src/esmf_regrid/schemes.py b/src/esmf_regrid/schemes.py index 0e90ac93..6e8cd1c8 100644 --- a/src/esmf_regrid/schemes.py +++ b/src/esmf_regrid/schemes.py @@ -1064,6 +1064,7 @@ def __init__( the regridder is saved . """ + self._method = Constants.Method.CONSERVATIVE if not (0 <= mdtol <= 1): msg = "Value for mdtol must be in range 0 - 1, got {}." raise ValueError(msg.format(mdtol)) @@ -1217,6 +1218,7 @@ def __init__( the regridder is saved . """ + self._method = Constants.Method.BILINEAR if not (0 <= mdtol <= 1): msg = "Value for mdtol must be in range 0 - 1, got {}." raise ValueError(msg.format(mdtol)) @@ -1366,6 +1368,7 @@ def __init__( arguments are recorded as a property of this regridder and are stored when the regridder is saved . """ + self._method = Constants.Method.NEAREST self.use_src_mask = use_src_mask self.use_tgt_mask = use_tgt_mask self.tgt_location = tgt_location @@ -1543,26 +1546,7 @@ def __init__( else: self._src = GridRecord(_get_coord(src, "x"), _get_coord(src, "y")) - def __call__(self, cube): - """Regrid this :class:`~iris.cube.Cube` onto the target grid of this regridder instance. - - The given :class:`~iris.cube.Cube` must be defined with the same grid as the source - :class:`~iris.cube.Cube` used to create this :class:`_ESMFRegridder` instance. - - Parameters - ---------- - cube : :class:`iris.cube.Cube` - A :class:`~iris.cube.Cube` instance to be regridded. - - Returns - ------- - :class:`iris.cube.Cube` - A :class:`~iris.cube.Cube` defined with the horizontal dimensions of the target - and the other dimensions from this :class:`~iris.cube.Cube`. The data values of - this :class:`~iris.cube.Cube` will be converted to values on the new grid using - regridding via :mod:`esmpy` generated weights. - - """ + def _get_cube_dims(self, cube): if cube.mesh is not None: # TODO: replace temporary hack when iris issues are sorted. # Ignore differences in var_name that might be caused by saving. @@ -1606,6 +1590,29 @@ def __call__(self, cube): else: # Due to structural reasons, the order here must be reversed. dims = cube.coord_dims(new_src_x)[::-1] + return dims + + def __call__(self, cube): + """Regrid this :class:`~iris.cube.Cube` onto the target grid of this regridder instance. + + The given :class:`~iris.cube.Cube` must be defined with the same grid as the source + :class:`~iris.cube.Cube` used to create this :class:`_ESMFRegridder` instance. + + Parameters + ---------- + cube : :class:`iris.cube.Cube` + A :class:`~iris.cube.Cube` instance to be regridded. + + Returns + ------- + :class:`iris.cube.Cube` + A :class:`~iris.cube.Cube` defined with the horizontal dimensions of the target + and the other dimensions from this :class:`~iris.cube.Cube`. The data values of + this :class:`~iris.cube.Cube` will be converted to values on the new grid using + regridding via :mod:`esmpy` generated weights. + + """ + dims = self._get_cube_dims(cube) regrid_info = RegridInfo( dims=dims, From 353fc04d8ead5f806a5b193f8b28280705dc7ddd Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 30 Apr 2025 11:44:42 +0100 Subject: [PATCH 05/13] partial work --- src/esmf_regrid/experimental/_partial.py | 42 ++++++ src/esmf_regrid/experimental/io.py | 162 ++++++++++++++-------- src/esmf_regrid/experimental/partition.py | 123 ++++++++-------- 3 files changed, 212 insertions(+), 115 deletions(-) create mode 100644 src/esmf_regrid/experimental/_partial.py diff --git a/src/esmf_regrid/experimental/_partial.py b/src/esmf_regrid/experimental/_partial.py new file mode 100644 index 00000000..29fe695b --- /dev/null +++ b/src/esmf_regrid/experimental/_partial.py @@ -0,0 +1,42 @@ +"""Provides a """ + +import numpy as np + +from esmf_regrid.schemes import _ESMFRegridder +class PartialRegridder(_ESMFRegridder): + def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): + self.src_slice = src_slice # this will be tuple-like + self.tgt_slice = tgt_slice # this will be a boolean array + self.scheme = scheme + + super().__init__( + src, + tgt, + scheme.method, + precomputed_weights=weights, + **kwargs, + ) + + def __call__(self, cube): + result = super().__call__(cube) + + # set everything outside the extent to 0 + inverse_slice = np.ones_like(result.data, dtype=bool) + inverse_slice[self.tgt_slice] = 0 + # TODO: make sure this works with lazy data + dims = self._get_cube_dims(cube) + slice_slice = [np.newaxis] * result.ndim + for dim in dims: + slice_slice[dim] = np.s_[:] + broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) + result.data[broadcast_slice] = 0 + return result + + def _get_src_slice(self, cube): + # TODO: write a method to handle cubes of different dimensionalities + return self.src_slice + + ## keep track of source indices and target indices + ## share reference to full source and target + + ## works the same except everything out of bounds is 0 rather than masked \ No newline at end of file diff --git a/src/esmf_regrid/experimental/io.py b/src/esmf_regrid/experimental/io.py index 48debe49..adec5429 100644 --- a/src/esmf_regrid/experimental/io.py +++ b/src/esmf_regrid/experimental/io.py @@ -14,9 +14,13 @@ GridToMeshESMFRegridder, MeshToGridESMFRegridder, ) +from esmf_regrid.experimental._partial import PartialRegridder from esmf_regrid.schemes import ( + ESMFAreaWeighted, ESMFAreaWeightedRegridder, + ESMFBilinear, ESMFBilinearRegridder, + ESMFNearest, ESMFNearestRegridder, GridRecord, MeshRecord, @@ -28,6 +32,7 @@ ESMFNearestRegridder, GridToMeshESMFRegridder, MeshToGridESMFRegridder, + PartialRegridder, ] _REGRIDDER_NAME_MAP = {rg_class.__name__: rg_class for rg_class in SUPPORTED_REGRIDDERS} _SOURCE_NAME = "regridder_source_field" @@ -47,6 +52,8 @@ _SOURCE_RESOLUTION = "src_resolution" _TARGET_RESOLUTION = "tgt_resolution" _ESMF_ARGS = "esmf_args" +_SRC_SLICE_NAME = "src_slice" +_TGT_SLICE_NAME = "tgt_slice" _VALID_ESMF_KWARGS = [ "pole_method", "regrid_pole_npoints", @@ -118,54 +125,39 @@ def _clean_var_names(cube): con.var_name = None -def save_regridder(rg, filename): - """Save a regridder scheme instance. - - Saves any of the regridder classes, i.e. - :class:`~esmf_regrid.experimental.unstructured_scheme.GridToMeshESMFRegridder`, - :class:`~esmf_regrid.experimental.unstructured_scheme.MeshToGridESMFRegridder`, - :class:`~esmf_regrid.schemes.ESMFAreaWeightedRegridder`, - :class:`~esmf_regrid.schemes.ESMFBilinearRegridder` or - :class:`~esmf_regrid.schemes.ESMFNearestRegridder`. - . - - Parameters - ---------- - rg : :class:`~esmf_regrid.schemes._ESMFRegridder` - The regridder instance to save. - filename : str - The file name to save to. - """ - regridder_type = rg.__class__.__name__ - - def _standard_grid_cube(grid, name): - if grid[0].ndim == 1: - shape = [coord.points.size for coord in grid] - else: - shape = grid[0].shape - data = np.zeros(shape) - cube = Cube(data, var_name=name, long_name=name) - if grid[0].ndim == 1: - cube.add_dim_coord(grid[0], 0) - cube.add_dim_coord(grid[1], 1) - else: - cube.add_aux_coord(grid[0], [0, 1]) - cube.add_aux_coord(grid[1], [0, 1]) - return cube - - def _standard_mesh_cube(mesh, location, name): - mesh_coords = mesh.to_MeshCoords(location) - data = np.zeros(mesh_coords[0].points.shape[0]) - cube = Cube(data, var_name=name, long_name=name) - for coord in mesh_coords: - cube.add_aux_coord(coord, 0) - return cube - +def _standard_grid_cube(grid, name): + if grid[0].ndim == 1: + shape = [coord.points.size for coord in grid] + else: + shape = grid[0].shape + data = np.zeros(shape) + cube = Cube(data, var_name=name, long_name=name) + if grid[0].ndim == 1: + cube.add_dim_coord(grid[0], 0) + cube.add_dim_coord(grid[1], 1) + else: + cube.add_aux_coord(grid[0], [0, 1]) + cube.add_aux_coord(grid[1], [0, 1]) + return cube + +def _standard_mesh_cube(mesh, location, name): + mesh_coords = mesh.to_MeshCoords(location) + data = np.zeros(mesh_coords[0].points.shape[0]) + cube = Cube(data, var_name=name, long_name=name) + for coord in mesh_coords: + cube.add_aux_coord(coord, 0) + return cube + +def _generate_src_tgt(regridder_type, rg, allow_partial): if regridder_type in [ "ESMFAreaWeightedRegridder", "ESMFBilinearRegridder", "ESMFNearestRegridder", + "PartialRegridder", ]: + if regridder_type == "PartialRegridder" and not allow_partial: + e_msg = "To save a PartialRegridder, `allow_partial=True` must be set." + raise ValueError(e_msg) src_grid = rg._src if isinstance(src_grid, GridRecord): src_cube = _standard_grid_cube( @@ -210,12 +202,38 @@ def _standard_mesh_cube(mesh, location, name): tgt_grid = (rg.grid_y, rg.grid_x) tgt_cube = _standard_grid_cube(tgt_grid, _TARGET_NAME) _add_mask_to_cube(rg.tgt_mask, tgt_cube, _TARGET_MASK_NAME) + else: e_msg = ( - f"Expected a regridder of type `GridToMeshESMFRegridder` or " - f"`MeshToGridESMFRegridder`, got type {regridder_type}." + f"Unexpected regridder type {regridder_type}." ) raise TypeError(e_msg) + return src_cube, tgt_cube + + + + +def save_regridder(rg, filename, allow_patrial=False): + """Save a regridder scheme instance. + + Saves any of the regridder classes, i.e. + :class:`~esmf_regrid.experimental.unstructured_scheme.GridToMeshESMFRegridder`, + :class:`~esmf_regrid.experimental.unstructured_scheme.MeshToGridESMFRegridder`, + :class:`~esmf_regrid.schemes.ESMFAreaWeightedRegridder`, + :class:`~esmf_regrid.schemes.ESMFBilinearRegridder` or + :class:`~esmf_regrid.schemes.ESMFNearestRegridder`. + . + + Parameters + ---------- + rg : :class:`~esmf_regrid.schemes._ESMFRegridder` + The regridder instance to save. + filename : str + The file name to save to. + """ + regridder_type = rg.__class__.__name__ + + src_cube, tgt_cube = _generate_src_tgt(regridder_type, rg, allow_patrial) method = str(check_method(rg.method).name) @@ -223,7 +241,7 @@ def _standard_mesh_cube(mesh, location, name): resolution = rg.resolution src_resolution = None tgt_resolution = None - elif regridder_type == "ESMFAreaWeightedRegridder": + elif method == "CONSERVATIVE": resolution = None src_resolution = rg.src_resolution tgt_resolution = rg.tgt_resolution @@ -264,6 +282,10 @@ def _standard_mesh_cube(mesh, location, name): if tgt_resolution is not None: attributes[_TARGET_RESOLUTION] = tgt_resolution + if regridder_type == "PartialRegridder": + attributes[_SRC_SLICE_NAME] = rg.src_slice # this slice is described by a tuple + _add_mask_to_cube(rg.tgt_mask, tgt_cube, _TGT_SLICE_NAME) # this slice is described by a boolean array + weights_cube = Cube(weight_data, var_name=_WEIGHTS_NAME, long_name=_WEIGHTS_NAME) row_coord = AuxCoord( weight_rows, var_name=_WEIGHTS_ROW_NAME, long_name=_WEIGHTS_ROW_NAME @@ -306,7 +328,7 @@ def _standard_mesh_cube(mesh, location, name): iris.fileformats.netcdf.save(cube_list, filename) -def load_regridder(filename): +def load_regridder(filename, allow_partial=False): """Load a regridder scheme instance. Loads any of the regridder classes, i.e. @@ -341,6 +363,10 @@ def load_regridder(filename): assert regridder_type in _REGRIDDER_NAME_MAP scheme = _REGRIDDER_NAME_MAP[regridder_type] + if regridder_type == "PartialRegridder" and not allow_partial: + e_msg = "PartialRegridder cannot be loaded without setting `allow_partial=True`." + raise ValueError(e_msg) + # Determine the regridding method, allowing for files created when # conservative regridding was the only method. method_string = weights_cube.attributes.get(_METHOD, "CONSERVATIVE") @@ -394,26 +420,46 @@ def load_regridder(filename): elif scheme is MeshToGridESMFRegridder: resolution_keyword = _TARGET_RESOLUTION kwargs = {resolution_keyword: resolution, "method": method, "mdtol": mdtol} - elif scheme is ESMFAreaWeightedRegridder: + # elif scheme is ESMFAreaWeightedRegridder: + elif method is Constants.Method.CONSERVATIVE: kwargs = { _SOURCE_RESOLUTION: src_resolution, _TARGET_RESOLUTION: tgt_resolution, "mdtol": mdtol, } - elif scheme is ESMFBilinearRegridder: + # elif scheme is ESMFBilinearRegridder: + elif method is Constants.Method.BILINEAR: kwargs = {"mdtol": mdtol} else: kwargs = {} - regridder = scheme( - src_cube, - tgt_cube, - precomputed_weights=weight_matrix, - use_src_mask=use_src_mask, - use_tgt_mask=use_tgt_mask, - esmf_args=esmf_args, - **kwargs, - ) + if scheme is PartialRegridder: + src_slice = src_cube.attributes[_SRC_SLICE_NAME] + tgt_slice = tgt_cube.coord(_TGT_SLICE_NAME).points + sub_scheme = { + Constants.Method.CONSERVATIVE: ESMFAreaWeighted, + Constants.Method.BILINEAR: ESMFBilinear, + Constants.Method.NEAREST: ESMFNearest, + }[method] + regridder = scheme( + src_cube, + tgt_cube, + src_slice, + tgt_slice, + weight_matrix, + sub_scheme, + **kwargs, + ) + else: + regridder = scheme( + src_cube, + tgt_cube, + precomputed_weights=weight_matrix, + use_src_mask=use_src_mask, + use_tgt_mask=use_tgt_mask, + esmf_args=esmf_args, + **kwargs, + ) esmf_version = weights_cube.attributes[_VERSION_ESMF] regridder.regridder.esmf_version = esmf_version diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 453f7c5e..9d05fb55 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -4,41 +4,47 @@ from scipy import sparse from esmf_regrid.experimental.io import load_regridder, save_regridder -from esmf_regrid.schemes import _ESMFRegridder - -class PartialRegridder(_ESMFRegridder): - def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): - self.src_slice = src_slice - self.tgt_slice = tgt_slice - self.scheme = scheme - - super().__init__( - src, - tgt, - scheme.method, - precomputed_weights=weights, - **kwargs, - ) - - def __call__(self, cube): - result = super().__call__(cube) - - # set everything outside the extent to 0 - inverse_slice = np.ones_like(result.data, dtype=bool) - inverse_slice[self.tgt_slice] = 0 - # TODO: make sure this works with lazy data - dims = self._get_cube_dims(cube) - slice_slice = [np.newaxis] * result.ndim - for dim in dims: - slice_slice[dim] = np.s_[:] - broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) - result.data[broadcast_slice] = 0 - return result - - ## keep track of source indices and target indices - ## share reference to full source and target - - ## works the same except everything out of bounds is 0 rather than masked +from esmf_regrid.experimental._partial import PartialRegridder + +# from esmf_regrid.schemes import _ESMFRegridder +# +# class PartialRegridder(_ESMFRegridder): +# def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): +# self.src_slice = src_slice +# self.tgt_slice = tgt_slice +# self.scheme = scheme +# +# super().__init__( +# src, +# tgt, +# scheme.method, +# precomputed_weights=weights, +# **kwargs, +# ) +# +# def __call__(self, cube): +# result = super().__call__(cube) +# +# # set everything outside the extent to 0 +# inverse_slice = np.ones_like(result.data, dtype=bool) +# inverse_slice[self.tgt_slice] = 0 +# # TODO: make sure this works with lazy data +# dims = self._get_cube_dims(cube) +# slice_slice = [np.newaxis] * result.ndim +# for dim in dims: +# slice_slice[dim] = np.s_[:] +# broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) +# result.data[broadcast_slice] = 0 +# return result +# +# def _get_src_slice(self, cube): +# # TODO: write a method to handle cubes of different dimensionalities +# return self.src_slice +# +# ## keep track of source indices and target indices +# ## share reference to full source and target +# +# ## works the same except everything out of bounds is 0 rather than masked class Partition: @@ -50,12 +56,13 @@ def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, au self.src = src self.tgt = tgt self.scheme = scheme + # TODO: consider abstracting away the idea of files self.file_names = file_names # TODO: consider deriving this from self.src.lazy_data() self.src_chunks = src_chunks assert len(src_chunks) == len(file_names) self.tgt_chunks = tgt_chunks - assert tgt_chunks == None # We don't handle big targets currently + assert tgt_chunks is None # We don't handle big targets currently # Note: this may need to become more sophisticated when both src and tgt are large self.file_chunk_dict = {file: chunk for file, chunk in zip(self.file_names, self.src_chunks)} @@ -94,11 +101,12 @@ def generate_files(self, files_to_generate=None): src = self.file_chunk_dict[file] tgt = self.tgt next_regridder = self.scheme.regridder(src, tgt) - previous_regridders, next_regridder = self._combine_regridders(previous_regridders, previous_files, next_regridder) + previous_regridders, next_regridder = self._combine_regridders(previous_regridders, next_regridder, previous_files, file) if previous_regridders: for regridder, pre_file in zip(previous_regridders, previous_files): neighbours = self.neighbouring_files[pre_file] file_complete = True + # TODO: consider any for neighbour in neighbours: if neighbour in self.unsaved_files: file_complete = False @@ -118,19 +126,17 @@ def apply_regridders(self, cube): # for each target chunk, iterate through each associated regridder # for now, assume one target chunk assert len(self.unsaved_files) == 0 - current_result = self.tgt.copy() + # TODO: this may work better as a cube of the correct shape for more complex cases + current_result = None for file in self.saved_files: + # TODO: make sure this works well with dask next_regridder = load_regridder(file) - cube_slice = next_regridder.src_slice - next_result = next_regridder(cube[cube_slice]) + cube_slice = next_regridder._get_src_slice(cube) + next_result = next_regridder(cube[*cube_slice]) current_result = self._combine_results(current_result, next_result) return current_result - # def _src_slices(self): - # for indices in self._src_slice_indices: - # yield self.src[indices] - def _find_neighbours(self): # for the simplest case, neighbours will be next to each other in the list files = self.file_names @@ -138,18 +144,16 @@ def _find_neighbours(self): neighbours.update({files[0]: (files[1]), files[-1]: (files[-2])}) return neighbours - # def _determine_mask(self): - # assert len(self.unsaved_files) == 0 - # # TODO: firgure out a way to do this when the target is big - # tgt_mask = np.ma.zeros_like(self.tgt.data) - # # TODO: calculate this mask - # return tgt_mask - - def _combine_regridders(self, existing, pre_files, current_regridder): + def _combine_regridders(self, existing, current_regridder, pre_files, file): # For now, combine 2, in future, more regridders may be combined - if len(existing) == 0 or current_regridder is None: + + if len(existing) == 0: # TODO: turn these into partial regridders. - return existing, current_regridder + if not isinstance(current_regridder, PartialRegridder): + src_slice = self.file_chunk_dict + current_regridder = PartialRegridder() + elif current_regridder is None: + existing = [PartialRegridder()] else: (previous_regridder,) = existing current_range = current_regridder.regridder.weight_matrix.max(axis=1).nonzero()[0] @@ -176,7 +180,7 @@ def _combine_regridders(self, existing, pre_files, current_regridder): src_slice[0][1] += buffer # TODO: make this work new_src_cube = self.src[src_slice] - tgt_slice = None + tgt_slice = None # should describe all valid target indices # Create weights for new regridder previous_wm = previous_regridder.regridder.weight_matrix @@ -197,14 +201,19 @@ def _combine_regridders(self, existing, pre_files, current_regridder): # previous_regridder.regridder.esmf_version = 0 # Must be set to allow regridder to load/save previous_regridder = PartialRegridder(new_src_cube, self.tgt, src_slice, tgt_slice, new_weight_matrix, self.scheme) else: + # TODO: make this work current_regridder = PartialRegridder() # add combine code here - pass + return existing, current_regridder def _combine_results(self, existing_results, next_result): # iterate through for each target chunk # for now, assume one target chunk - return existing_results + next_result + if existing_results is None: + combined_result = next_result + else: + combined_result = existing_results + next_result + return combined_result def _combine_sparse(left, right, w, a, b, t): From 9a510e2abd1214db64213d16715f43488417a186 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 30 Apr 2025 14:57:57 +0100 Subject: [PATCH 06/13] partial work --- src/esmf_regrid/experimental/_partial.py | 52 +++++--- src/esmf_regrid/experimental/io.py | 37 +++-- src/esmf_regrid/experimental/partition.py | 126 +++++++++--------- src/esmf_regrid/schemes.py | 3 + .../experimental/io/partition/__init__.py | 1 + .../io/partition/test_PartialRegridder.py | 7 + .../io/partition/test_Partition.py | 32 +++++ .../experimental/io/test_round_tripping.py | 28 +++- 8 files changed, 191 insertions(+), 95 deletions(-) create mode 100644 src/esmf_regrid/tests/unit/experimental/io/partition/__init__.py create mode 100644 src/esmf_regrid/tests/unit/experimental/io/partition/test_PartialRegridder.py create mode 100644 src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py diff --git a/src/esmf_regrid/experimental/_partial.py b/src/esmf_regrid/experimental/_partial.py index 29fe695b..f293a365 100644 --- a/src/esmf_regrid/experimental/_partial.py +++ b/src/esmf_regrid/experimental/_partial.py @@ -1,35 +1,53 @@ -"""Provides a """ +"""Provides a regridder class compatible with Partition""" -import numpy as np +from esmf_regrid.schemes import ( + _ESMFRegridder, + # ESMFAreaWeighted, + # ESMFAreaWeightedRegridder, + # ESMFBilinear, +) -from esmf_regrid.schemes import _ESMFRegridder class PartialRegridder(_ESMFRegridder): def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): self.src_slice = src_slice # this will be tuple-like - self.tgt_slice = tgt_slice # this will be a boolean array + self.tgt_slice = tgt_slice self.scheme = scheme - super().__init__( + # kwargs = { + # "use_src_mask": scheme.use_src_mask, + # "use_tgt_mask": scheme.use_tgt_mask, + # "tgt_location": scheme.tgt_location, + # "esmf_args": scheme.esmf_args, + # } + # if isinstance(scheme, (ESMFAreaWeighted, ESMFBilinear)): + # kwargs["mdtol"] = scheme.mdtol + + + # super().__init__( + # ESMFAreaWeightedRegridder.__init__( + # src, + # tgt, + # scheme._method, + # precomputed_weights=weights, + # **kwargs, + # ) + self._regridder = scheme.regridder( src, tgt, - scheme.method, precomputed_weights=weights, - **kwargs, + # TODO: turn this back on + # **kwargs, ) + self.__dict__.update(self._regridder.__dict__) def __call__(self, cube): result = super().__call__(cube) - # set everything outside the extent to 0 - inverse_slice = np.ones_like(result.data, dtype=bool) - inverse_slice[self.tgt_slice] = 0 - # TODO: make sure this works with lazy data - dims = self._get_cube_dims(cube) - slice_slice = [np.newaxis] * result.ndim - for dim in dims: - slice_slice[dim] = np.s_[:] - broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) - result.data[broadcast_slice] = 0 + blank_cube = cube.copy() + blank_cube.data[:] = 0 + result_mask = super().__call__(blank_cube).data.mask + result.data[result_mask] = 0 + return result def _get_src_slice(self, cube): diff --git a/src/esmf_regrid/experimental/io.py b/src/esmf_regrid/experimental/io.py index 472e3449..2153ee06 100644 --- a/src/esmf_regrid/experimental/io.py +++ b/src/esmf_regrid/experimental/io.py @@ -213,7 +213,7 @@ def _generate_src_tgt(regridder_type, rg, allow_partial): -def save_regridder(rg, filename, allow_patrial=False): +def save_regridder(rg, filename, allow_partial=False): """Save a regridder scheme instance. Saves any of the regridder classes, i.e. @@ -233,7 +233,7 @@ def save_regridder(rg, filename, allow_patrial=False): """ regridder_type = rg.__class__.__name__ - src_cube, tgt_cube = _generate_src_tgt(regridder_type, rg, allow_patrial) + src_cube, tgt_cube = _generate_src_tgt(regridder_type, rg, allow_partial) method = str(check_method(rg.method).name) @@ -282,9 +282,22 @@ def save_regridder(rg, filename, allow_patrial=False): if tgt_resolution is not None: attributes[_TARGET_RESOLUTION] = tgt_resolution + extra_cubes = [] if regridder_type == "PartialRegridder": - attributes[_SRC_SLICE_NAME] = rg.src_slice # this slice is described by a tuple - _add_mask_to_cube(rg.tgt_mask, tgt_cube, _TGT_SLICE_NAME) # this slice is described by a boolean array + src_slice = rg.src_slice # this slice is described by a tuple + if src_slice is None: + src_slice = "None" + # TODO: make this a cube + # attributes[_SRC_SLICE_NAME] = src_slice + src_slice_cube = Cube(src_slice, long_name=_SRC_SLICE_NAME, var_name=_SRC_SLICE_NAME) + tgt_slice = rg.tgt_slice # this slice is described by a tuple + if tgt_slice is None: + tgt_slice = "None" + # TODO: make this a cube + # attributes[_TGT_SLICE_NAME] = tgt_slice + tgt_slice_cube = Cube(src_slice, long_name=_TGT_SLICE_NAME, var_name=_TGT_SLICE_NAME) + extra_cubes = [src_slice_cube, tgt_slice_cube] + weights_cube = Cube(weight_data, var_name=_WEIGHTS_NAME, long_name=_WEIGHTS_NAME) row_coord = AuxCoord( @@ -320,7 +333,7 @@ def save_regridder(rg, filename, allow_patrial=False): # Save cubes while ensuring var_names do not conflict for the sake of consistency. with _managed_var_name(src_cube, tgt_cube): - cube_list = CubeList([src_cube, tgt_cube, weights_cube, weight_shape_cube]) + cube_list = CubeList([src_cube, tgt_cube, weights_cube, weight_shape_cube] + extra_cubes) for cube in cube_list: cube.attributes = attributes @@ -430,14 +443,20 @@ def load_regridder(filename, allow_partial=False): "mdtol": mdtol, } # elif scheme is ESMFBilinearRegridder: - elif method is Constants.Method.BILINEAR: + elif method is Constants.Method.BILINEAR: kwargs = {"mdtol": mdtol} else: kwargs = {} if scheme is PartialRegridder: - src_slice = src_cube.attributes[_SRC_SLICE_NAME] - tgt_slice = tgt_cube.coord(_TGT_SLICE_NAME).points + # src_slice = src_cube.attributes[_SRC_SLICE_NAME] + src_slice = cubes.extract_cube(_SRC_SLICE_NAME).data.tolist() + if src_slice == "None": + src_slice = None + # tgt_slice = tgt_cube.attributes[_TGT_SLICE_NAME] + tgt_slice = cubes.extract_cube(_TGT_SLICE_NAME).data.tolist() + if tgt_slice == "None": + tgt_slice = None sub_scheme = { Constants.Method.CONSERVATIVE: ESMFAreaWeighted, Constants.Method.BILINEAR: ESMFBilinear, @@ -449,7 +468,7 @@ def load_regridder(filename, allow_partial=False): src_slice, tgt_slice, weight_matrix, - sub_scheme, + sub_scheme(), **kwargs, ) else: diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 9d05fb55..9ad5457d 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -6,48 +6,19 @@ from esmf_regrid.experimental.io import load_regridder, save_regridder from esmf_regrid.experimental._partial import PartialRegridder -# from esmf_regrid.schemes import _ESMFRegridder -# -# class PartialRegridder(_ESMFRegridder): -# def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): -# self.src_slice = src_slice -# self.tgt_slice = tgt_slice -# self.scheme = scheme -# -# super().__init__( -# src, -# tgt, -# scheme.method, -# precomputed_weights=weights, -# **kwargs, -# ) -# -# def __call__(self, cube): -# result = super().__call__(cube) -# -# # set everything outside the extent to 0 -# inverse_slice = np.ones_like(result.data, dtype=bool) -# inverse_slice[self.tgt_slice] = 0 -# # TODO: make sure this works with lazy data -# dims = self._get_cube_dims(cube) -# slice_slice = [np.newaxis] * result.ndim -# for dim in dims: -# slice_slice[dim] = np.s_[:] -# broadcast_slice = np.broadcast_to(inverse_slice[*slice_slice], result.shape) -# result.data[broadcast_slice] = 0 -# return result -# -# def _get_src_slice(self, cube): -# # TODO: write a method to handle cubes of different dimensionalities -# return self.src_slice -# -# ## keep track of source indices and target indices -# ## share reference to full source and target -# -# ## works the same except everything out of bounds is 0 rather than masked + +def _interpret_slice(sl): + # return [slice(s) for s in sl] + return np.s_[sl[0][0]:sl[0][1], sl[1][0]:sl[1][1]] + +# TODO: consider if nearest is appropriate with this way of doing things + class Partition: + # TODO: add a way to save the Partition object. + # TODO: make a way to find out which files have been saved from the last session. + ## hold a list of files ## hold a collection of source indices ## alternately hold a collection of chunk indices @@ -87,6 +58,9 @@ def generate_files(self, files_to_generate=None): assert isinstance(files_to_generate, int) files = (self.partially_saved_files + self.unsaved_files)[:files_to_generate] + # sort files + files = [file for file in self.file_names if file in files] + # Do this to ensure the last regridder is saved files.append(None) @@ -94,11 +68,13 @@ def generate_files(self, files_to_generate=None): previous_files = [] for file in files: if file in self.partially_saved_files: - next_regridder = load_regridder(file) + next_regridder = load_regridder(file, allow_partial=True) elif file is None: next_regridder = None else: - src = self.file_chunk_dict[file] + src_chunk = self.file_chunk_dict[file] + # src = self.src[src_chunk[0][0]:src_chunk[0][1], src_chunk[1][0]:src_chunk[1][1]] + src = self.src[*_interpret_slice(src_chunk)] tgt = self.tgt next_regridder = self.scheme.regridder(src, tgt) previous_regridders, next_regridder = self._combine_regridders(previous_regridders, next_regridder, previous_files, file) @@ -110,7 +86,7 @@ def generate_files(self, files_to_generate=None): for neighbour in neighbours: if neighbour in self.unsaved_files: file_complete = False - save_regridder(regridder, pre_file) + save_regridder(regridder, pre_file, allow_partial=True) if file_complete: self.saved_files.append(pre_file) # self._src_slice_indices.append(regridder.src_slice) @@ -122,17 +98,22 @@ def generate_files(self, files_to_generate=None): previous_files = [file] - def apply_regridders(self, cube): + def apply_regridders(self, cube, allow_incomplete=False): # for each target chunk, iterate through each associated regridder # for now, assume one target chunk - assert len(self.unsaved_files) == 0 + # TODO: figure out how to mask parts of the target not covered by any source (e.g. start out with full mask) + if not allow_incomplete: + assert len(self.unsaved_files) == 0 # TODO: this may work better as a cube of the correct shape for more complex cases current_result = None - for file in self.saved_files: + # files = self.saved_files + files = self.file_names + + for file in files: # TODO: make sure this works well with dask - next_regridder = load_regridder(file) + next_regridder = load_regridder(file, allow_partial=True) cube_slice = next_regridder._get_src_slice(cube) - next_result = next_regridder(cube[*cube_slice]) + next_result = next_regridder(cube[*_interpret_slice(cube_slice)]) current_result = self._combine_results(current_result, next_result) return current_result @@ -141,19 +122,26 @@ def _find_neighbours(self): # for the simplest case, neighbours will be next to each other in the list files = self.file_names neighbours = {file_1: (file_0, file_2) for file_0, file_1, file_2 in zip(files[:-2], files[1:-1], files[2:])} - neighbours.update({files[0]: (files[1]), files[-1]: (files[-2])}) + neighbours.update({files[0]: (files[1],), files[-1]: (files[-2],)}) return neighbours def _combine_regridders(self, existing, current_regridder, pre_files, file): # For now, combine 2, in future, more regridders may be combined if len(existing) == 0: - # TODO: turn these into partial regridders. if not isinstance(current_regridder, PartialRegridder): - src_slice = self.file_chunk_dict - current_regridder = PartialRegridder() + src_slice = self.file_chunk_dict[file] + src_cube = self.src[*_interpret_slice(src_slice)] + weights = current_regridder.regridder.weight_matrix + current_regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) elif current_regridder is None: - existing = [PartialRegridder()] + previous_regridder, = existing + if not isinstance(previous_regridder, PartialRegridder): + src_slice = self.file_chunk_dict[pre_files[0]] + src_cube = self.src[*_interpret_slice(src_slice)] + weights = previous_regridder.regridder.weight_matrix + previous_regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) + existing = [previous_regridder] else: (previous_regridder,) = existing current_range = current_regridder.regridder.weight_matrix.max(axis=1).nonzero()[0] @@ -164,22 +152,25 @@ def _combine_regridders(self, existing, current_regridder, pre_files, file): # Calculate a slice of the current chunk which contains all the overlapping source cells. overlaps_next = current_regridder.regridder.weight_matrix[mutual_overlaps].nonzero()[1] - h_len = current_regridder._src["grid_x"].shape[0] - v_len = current_regridder._src["grid_y"].shape[-1] + # h_len = current_regridder._src["grid_x"].shape[0] + h_len = current_regridder._src[0].shape[0] + # v_len = current_regridder._src["grid_y"].shape[-1] + v_len = current_regridder._src[1].shape[-1] # TODO: make this more rigorous - tgt_size = self.tgt.size - buffer = (overlaps_next % h_len).max() + 1 + # tgt_size = self.tgt.size + tgt_size = np.prod(self.tgt.shape) + buffer = (overlaps_next % v_len).max() + 1 + # buffer = (overlaps_next % h_len).max() + 1 # Add this slice to the previous chunk. - # TODO: make this refer to slices # new_src_cube = iris.cube.CubeList([previous_cube, cube[:buffer]]).concatenate_cube() # new_cubes.append(new_src_cube) pre_file, = pre_files src_slice = self.file_chunk_dict[pre_file] # assumes slice has form [[x_start, x_stop], [y_start, y_stop]] + # TODO: consider how this affects file_chunk_dict src_slice[0][1] += buffer - # TODO: make this work - new_src_cube = self.src[src_slice] + new_src_cube = self.src[*_interpret_slice(src_slice)] tgt_slice = None # should describe all valid target indices # Create weights for new regridder @@ -200,10 +191,21 @@ def _combine_regridders(self, existing, current_regridder, pre_files, file): # precomputed_weights=new_weight_matrix) # previous_regridder.regridder.esmf_version = 0 # Must be set to allow regridder to load/save previous_regridder = PartialRegridder(new_src_cube, self.tgt, src_slice, tgt_slice, new_weight_matrix, self.scheme) + existing = [previous_regridder] else: - # TODO: make this work - current_regridder = PartialRegridder() - # add combine code here + if not isinstance(current_regridder, PartialRegridder): + src_slice = self.file_chunk_dict[file] + src_cube = self.src[*_interpret_slice(src_slice)] + weights = current_regridder.regridder.weight_matrix + current_regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) + previous_regridder, = existing + if not isinstance(previous_regridder, PartialRegridder): + src_slice = self.file_chunk_dict[pre_files[0]] + src_cube = self.src[*_interpret_slice(src_slice)] + weights = previous_regridder.regridder.weight_matrix + previous_regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) + existing = [previous_regridder] + return existing, current_regridder def _combine_results(self, existing_results, next_result): diff --git a/src/esmf_regrid/schemes.py b/src/esmf_regrid/schemes.py index 77c44e79..39b61889 100644 --- a/src/esmf_regrid/schemes.py +++ b/src/esmf_regrid/schemes.py @@ -1124,6 +1124,7 @@ def regridder( use_tgt_mask=None, tgt_location="face", esmf_args=None, + precomputed_weights=None, ): """Create regridder to perform regridding from ``src_grid`` to ``tgt_grid``. @@ -1276,6 +1277,7 @@ def regridder( tgt_location=None, extrapolate_gaps=False, esmf_args=None, + precomputed_weights=None, ): """Create regridder to perform regridding from ``src_grid`` to ``tgt_grid``. @@ -1418,6 +1420,7 @@ def regridder( use_tgt_mask=None, tgt_location=None, esmf_args=None, + precomputed_weights=None, ): """Create regridder to perform regridding from ``src_grid`` to ``tgt_grid``. diff --git a/src/esmf_regrid/tests/unit/experimental/io/partition/__init__.py b/src/esmf_regrid/tests/unit/experimental/io/partition/__init__.py new file mode 100644 index 00000000..656fc3a9 --- /dev/null +++ b/src/esmf_regrid/tests/unit/experimental/io/partition/__init__.py @@ -0,0 +1 @@ +"""Unit tests for :mod:`esmf_regrid.experimental.partition`.""" diff --git a/src/esmf_regrid/tests/unit/experimental/io/partition/test_PartialRegridder.py b/src/esmf_regrid/tests/unit/experimental/io/partition/test_PartialRegridder.py new file mode 100644 index 00000000..581e26ee --- /dev/null +++ b/src/esmf_regrid/tests/unit/experimental/io/partition/test_PartialRegridder.py @@ -0,0 +1,7 @@ +"""Unit tests for :mod:`esmf_regrid.experimental._partial`.""" + +from esmf_regrid.experimental._partial import PartialRegridder + +def test_PartialRegridder(): + #TODO: write unit tests + pass diff --git a/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py new file mode 100644 index 00000000..8ac91655 --- /dev/null +++ b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py @@ -0,0 +1,32 @@ +"""Unit tests for :mod:`esmf_regrid.experimental.partition`.""" +import numpy as np +from esmf_regrid import ESMFAreaWeighted +from esmf_regrid.experimental.partition import Partition + +from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import ( + _curvilinear_cube, + _grid_cube, +) +from esmf_regrid.tests.unit.schemes.test__mesh_to_MeshInfo import ( + _gridlike_mesh_cube, +) + +def test_Partition(tmp_path): + src = _grid_cube(150, 500, (-180, 180), (-90, 90), circular=True) + src.data = np.arange(150*500).reshape([500, 150]) + tgt = _grid_cube(16, 36, (-180, 180), (-90, 90), circular=True) + + files = [tmp_path / f"partial_{x}.nc" for x in range(5)] + scheme = ESMFAreaWeighted(mdtol=1) + chunks = [[[0, 100], [0, 150]], [[100, 200], [0, 150]], [[200, 300], [0, 150]], [[300, 400], [0, 150]], [[400, 500], [0, 150]]] + + partition = Partition(src, tgt, scheme, files, chunks) + + partition.generate_files() + + result = partition.apply_regridders(src) + + + expected = src.regrid(tgt, scheme) + + assert result == expected diff --git a/src/esmf_regrid/tests/unit/experimental/io/test_round_tripping.py b/src/esmf_regrid/tests/unit/experimental/io/test_round_tripping.py index 6ebb1b21..c8fa526a 100644 --- a/src/esmf_regrid/tests/unit/experimental/io/test_round_tripping.py +++ b/src/esmf_regrid/tests/unit/experimental/io/test_round_tripping.py @@ -13,6 +13,7 @@ esmpy, ) from esmf_regrid.esmf_regridder import ESMF_NO_VERSION +from esmf_regrid.experimental._partial import PartialRegridder from esmf_regrid.experimental.io import load_regridder, save_regridder from esmf_regrid.experimental.unstructured_scheme import ( GridToMeshESMFRegridder, @@ -77,7 +78,13 @@ def _make_grid_to_mesh_regridder( } if regridder == GridToMeshESMFRegridder: kwargs["method"] = method - rg = regridder(src, tgt, **kwargs) + if regridder == PartialRegridder: + pre_rg = ESMFAreaWeightedRegridder(src, tgt, **kwargs) + weights = pre_rg.regridder.weight_matrix + scheme = ESMFAreaWeighted() + rg = PartialRegridder(src, tgt, None, None, weights, scheme) + else: + rg = regridder(src, tgt, **kwargs) return rg, src @@ -153,6 +160,7 @@ def _compare_ignoring_var_names(x, y): (Constants.Method.BILINEAR, GridToMeshESMFRegridder), (Constants.Method.NEAREST, GridToMeshESMFRegridder), (None, ESMFAreaWeightedRegridder), + (None, PartialRegridder), ], ) def test_grid_to_mesh_round_trip(tmp_path, method, regridder): @@ -161,8 +169,14 @@ def test_grid_to_mesh_round_trip(tmp_path, method, regridder): method=method, regridder=regridder, circular=True ) filename = tmp_path / "regridder.nc" - save_regridder(original_rg, filename) - loaded_rg = load_regridder(str(filename)) + + if regridder == PartialRegridder: + allow_partial = True + else: + allow_partial = False + + save_regridder(original_rg, filename, allow_partial=allow_partial) + loaded_rg = load_regridder(str(filename), allow_partial=allow_partial) if regridder == GridToMeshESMFRegridder: assert original_rg.location == loaded_rg.location @@ -206,8 +220,8 @@ def test_grid_to_mesh_round_trip(tmp_path, method, regridder): assert original_rg.resolution == loaded_rg.resolution original_res_rg, _ = _make_grid_to_mesh_regridder(method=method, resolution=8) res_filename = tmp_path / "regridder_res.nc" - save_regridder(original_res_rg, res_filename) - loaded_res_rg = load_regridder(str(res_filename)) + save_regridder(original_res_rg, res_filename, allow_partial=allow_partial) + loaded_res_rg = load_regridder(str(res_filename), allow_partial=allow_partial) assert original_res_rg.resolution == loaded_res_rg.resolution assert ( original_res_rg.regridder.src.resolution @@ -232,8 +246,8 @@ def test_grid_to_mesh_round_trip(tmp_path, method, regridder): method=method, regridder=regridder, circular=True ) nc_filename = tmp_path / "non_circular_regridder.nc" - save_regridder(original_nc_rg, nc_filename) - loaded_nc_rg = load_regridder(str(nc_filename)) + save_regridder(original_nc_rg, nc_filename, allow_partial=allow_partial) + loaded_nc_rg = load_regridder(str(nc_filename), allow_partial=allow_partial) if regridder == GridToMeshESMFRegridder: _compare_ignoring_var_names(original_nc_rg.grid_x, loaded_nc_rg.grid_x) _compare_ignoring_var_names(original_nc_rg.grid_y, loaded_nc_rg.grid_y) From 0d346fda5b85248cc633bed96099e7770b97a343 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 19 May 2025 08:58:36 +0100 Subject: [PATCH 07/13] initial draft --- src/esmf_regrid/experimental/_partial.py | 37 +----------------- src/esmf_regrid/experimental/partition.py | 38 +++++++++---------- .../io/partition/test_Partition.py | 4 ++ 3 files changed, 23 insertions(+), 56 deletions(-) diff --git a/src/esmf_regrid/experimental/_partial.py b/src/esmf_regrid/experimental/_partial.py index f293a365..0f398929 100644 --- a/src/esmf_regrid/experimental/_partial.py +++ b/src/esmf_regrid/experimental/_partial.py @@ -2,9 +2,6 @@ from esmf_regrid.schemes import ( _ESMFRegridder, - # ESMFAreaWeighted, - # ESMFAreaWeightedRegridder, - # ESMFBilinear, ) class PartialRegridder(_ESMFRegridder): @@ -12,25 +9,8 @@ def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): self.src_slice = src_slice # this will be tuple-like self.tgt_slice = tgt_slice self.scheme = scheme + # TODO: consider disallowing ESMFNearest (unless out of bounds can be made masked) - # kwargs = { - # "use_src_mask": scheme.use_src_mask, - # "use_tgt_mask": scheme.use_tgt_mask, - # "tgt_location": scheme.tgt_location, - # "esmf_args": scheme.esmf_args, - # } - # if isinstance(scheme, (ESMFAreaWeighted, ESMFBilinear)): - # kwargs["mdtol"] = scheme.mdtol - - - # super().__init__( - # ESMFAreaWeightedRegridder.__init__( - # src, - # tgt, - # scheme._method, - # precomputed_weights=weights, - # **kwargs, - # ) self._regridder = scheme.regridder( src, tgt, @@ -40,21 +20,6 @@ def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): ) self.__dict__.update(self._regridder.__dict__) - def __call__(self, cube): - result = super().__call__(cube) - - blank_cube = cube.copy() - blank_cube.data[:] = 0 - result_mask = super().__call__(blank_cube).data.mask - result.data[result_mask] = 0 - - return result - def _get_src_slice(self, cube): # TODO: write a method to handle cubes of different dimensionalities return self.src_slice - - ## keep track of source indices and target indices - ## share reference to full source and target - - ## works the same except everything out of bounds is 0 rather than masked \ No newline at end of file diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 9ad5457d..bf50c238 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -23,7 +23,7 @@ class Partition: ## hold a collection of source indices ## alternately hold a collection of chunk indices ## note which indices are fully loaded - def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, auto_generate=False): + def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, auto_generate=False, saved_files=None, partially_saved=None): self.src = src self.tgt = tgt self.scheme = scheme @@ -39,16 +39,21 @@ def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, au self.file_chunk_dict = {file: chunk for file, chunk in zip(self.file_names, self.src_chunks)} self.neighbouring_files = self._find_neighbours() - - self.saved_files = [] - self.partially_saved_files = [] - # self._src_slice_indices = [] + if saved_files is None: + self.saved_files = [] + else: + self.saved_files = saved_files + if partially_saved is None: + self.partially_saved_files = [] + else: + self.partially_saved_files = partially_saved if auto_generate: self.generate_files(self.file_names) @property def unsaved_files(self): - return list(set(self.file_names) - set(self.saved_files) - set(self.partially_saved_files)) + files = list(set(self.file_names) - set(self.saved_files) - set(self.partially_saved_files)) + return [file for file in self.file_names if file in files] def generate_files(self, files_to_generate=None): if files_to_generate is None: @@ -73,7 +78,6 @@ def generate_files(self, files_to_generate=None): next_regridder = None else: src_chunk = self.file_chunk_dict[file] - # src = self.src[src_chunk[0][0]:src_chunk[0][1], src_chunk[1][0]:src_chunk[1][1]] src = self.src[*_interpret_slice(src_chunk)] tgt = self.tgt next_regridder = self.scheme.regridder(src, tgt) @@ -89,7 +93,6 @@ def generate_files(self, files_to_generate=None): save_regridder(regridder, pre_file, allow_partial=True) if file_complete: self.saved_files.append(pre_file) - # self._src_slice_indices.append(regridder.src_slice) else: self.partially_saved_files.append(pre_file) @@ -106,8 +109,7 @@ def apply_regridders(self, cube, allow_incomplete=False): assert len(self.unsaved_files) == 0 # TODO: this may work better as a cube of the correct shape for more complex cases current_result = None - # files = self.saved_files - files = self.file_names + files = self.saved_files for file in files: # TODO: make sure this works well with dask @@ -152,19 +154,13 @@ def _combine_regridders(self, existing, current_regridder, pre_files, file): # Calculate a slice of the current chunk which contains all the overlapping source cells. overlaps_next = current_regridder.regridder.weight_matrix[mutual_overlaps].nonzero()[1] - # h_len = current_regridder._src["grid_x"].shape[0] h_len = current_regridder._src[0].shape[0] - # v_len = current_regridder._src["grid_y"].shape[-1] v_len = current_regridder._src[1].shape[-1] # TODO: make this more rigorous - # tgt_size = self.tgt.size tgt_size = np.prod(self.tgt.shape) buffer = (overlaps_next % v_len).max() + 1 - # buffer = (overlaps_next % h_len).max() + 1 # Add this slice to the previous chunk. - # new_src_cube = iris.cube.CubeList([previous_cube, cube[:buffer]]).concatenate_cube() - # new_cubes.append(new_src_cube) pre_file, = pre_files src_slice = self.file_chunk_dict[pre_file] # assumes slice has form [[x_start, x_stop], [y_start, y_stop]] @@ -187,9 +183,6 @@ def _combine_regridders(self, existing, current_regridder, pre_files, file): current_regridder.regridder.weight_matrix[mutual_overlaps] = 0 # Construct replacement for previous regridder with new weights and source cube. - # previous_regridder = ESMFAreaWeightedRegridder(new_src_cube, tgt_mesh, - # precomputed_weights=new_weight_matrix) - # previous_regridder.regridder.esmf_version = 0 # Must be set to allow regridder to load/save previous_regridder = PartialRegridder(new_src_cube, self.tgt, src_slice, tgt_slice, new_weight_matrix, self.scheme) existing = [previous_regridder] else: @@ -214,11 +207,16 @@ def _combine_results(self, existing_results, next_result): if existing_results is None: combined_result = next_result else: - combined_result = existing_results + next_result + # combined_result = existing_results + next_result + combined_data = np.ma.filled(existing_results.data, 0) + np.ma.filled(next_result.data, 0) + combined_mask = np.ma.getmaskarray(existing_results.data) & np.ma.getmaskarray(next_result.data) + combined_result = existing_results.copy() + combined_result.data = np.ma.array(combined_data, mask=combined_mask) return combined_result def _combine_sparse(left, right, w, a, b, t): + # TODO: make this more clear result = sparse.csr_array((t, w * (a + b))) src_indices_left = (np.arange(a)[np.newaxis, :] + ((a + b) * np.arange(w)[:, np.newaxis])).flatten() left_im = sparse.csr_array((np.ones(a * w), (np.arange(a * w), src_indices_left)), shape=(a * w, w * (a + b))) diff --git a/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py index 8ac91655..5eb8ac14 100644 --- a/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py +++ b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py @@ -19,6 +19,10 @@ def test_Partition(tmp_path): files = [tmp_path / f"partial_{x}.nc" for x in range(5)] scheme = ESMFAreaWeighted(mdtol=1) chunks = [[[0, 100], [0, 150]], [[100, 200], [0, 150]], [[200, 300], [0, 150]], [[300, 400], [0, 150]], [[400, 500], [0, 150]]] + # TODO: consider different ways we could specify this in the API: + # chunks = 5 + # chunks = (5, None) + # chunks = None # (use dask chunks) partition = Partition(src, tgt, scheme, files, chunks) From f5c09d2c21097d3c18d18fdb0d54d9cc3097648c Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 12 Jun 2025 15:14:41 +0100 Subject: [PATCH 08/13] fix tests, improve kwarg handling --- src/esmf_regrid/experimental/_partial.py | 13 ++++++------- src/esmf_regrid/experimental/io.py | 16 +++++----------- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/esmf_regrid/experimental/_partial.py b/src/esmf_regrid/experimental/_partial.py index 0f398929..a63c47c8 100644 --- a/src/esmf_regrid/experimental/_partial.py +++ b/src/esmf_regrid/experimental/_partial.py @@ -1,4 +1,4 @@ -"""Provides a regridder class compatible with Partition""" +"""Provides a regridder class compatible with Partition.""" from esmf_regrid.schemes import ( _ESMFRegridder, @@ -11,15 +11,14 @@ def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): self.scheme = scheme # TODO: consider disallowing ESMFNearest (unless out of bounds can be made masked) + # Pop duplicate kwargs. + for arg in set(kwargs.keys()).intersection(vars(self.scheme)): + kwargs.pop(arg) + self._regridder = scheme.regridder( src, tgt, precomputed_weights=weights, - # TODO: turn this back on - # **kwargs, + **kwargs, ) self.__dict__.update(self._regridder.__dict__) - - def _get_src_slice(self, cube): - # TODO: write a method to handle cubes of different dimensionalities - return self.src_slice diff --git a/src/esmf_regrid/experimental/io.py b/src/esmf_regrid/experimental/io.py index 2153ee06..9672746d 100644 --- a/src/esmf_regrid/experimental/io.py +++ b/src/esmf_regrid/experimental/io.py @@ -286,15 +286,11 @@ def save_regridder(rg, filename, allow_partial=False): if regridder_type == "PartialRegridder": src_slice = rg.src_slice # this slice is described by a tuple if src_slice is None: - src_slice = "None" - # TODO: make this a cube - # attributes[_SRC_SLICE_NAME] = src_slice + src_slice = [] src_slice_cube = Cube(src_slice, long_name=_SRC_SLICE_NAME, var_name=_SRC_SLICE_NAME) tgt_slice = rg.tgt_slice # this slice is described by a tuple if tgt_slice is None: - tgt_slice = "None" - # TODO: make this a cube - # attributes[_TGT_SLICE_NAME] = tgt_slice + tgt_slice = [] tgt_slice_cube = Cube(src_slice, long_name=_TGT_SLICE_NAME, var_name=_TGT_SLICE_NAME) extra_cubes = [src_slice_cube, tgt_slice_cube] @@ -333,7 +329,7 @@ def save_regridder(rg, filename, allow_partial=False): # Save cubes while ensuring var_names do not conflict for the sake of consistency. with _managed_var_name(src_cube, tgt_cube): - cube_list = CubeList([src_cube, tgt_cube, weights_cube, weight_shape_cube] + extra_cubes) + cube_list = CubeList([src_cube, tgt_cube, weights_cube, weight_shape_cube, *extra_cubes]) for cube in cube_list: cube.attributes = attributes @@ -449,13 +445,11 @@ def load_regridder(filename, allow_partial=False): kwargs = {} if scheme is PartialRegridder: - # src_slice = src_cube.attributes[_SRC_SLICE_NAME] src_slice = cubes.extract_cube(_SRC_SLICE_NAME).data.tolist() - if src_slice == "None": + if src_slice == []: src_slice = None - # tgt_slice = tgt_cube.attributes[_TGT_SLICE_NAME] tgt_slice = cubes.extract_cube(_TGT_SLICE_NAME).data.tolist() - if tgt_slice == "None": + if tgt_slice == []: tgt_slice = None sub_scheme = { Constants.Method.CONSERVATIVE: ESMFAreaWeighted, From 043a6f796be2077e9f56a26bf7ba8d2269738c53 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 12 Jun 2025 16:23:54 +0100 Subject: [PATCH 09/13] fix tests --- src/esmf_regrid/experimental/partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index bf50c238..dd1b035a 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -114,7 +114,7 @@ def apply_regridders(self, cube, allow_incomplete=False): for file in files: # TODO: make sure this works well with dask next_regridder = load_regridder(file, allow_partial=True) - cube_slice = next_regridder._get_src_slice(cube) + cube_slice = next_regridder.src_slice next_result = next_regridder(cube[*_interpret_slice(cube_slice)]) current_result = self._combine_results(current_result, next_result) return current_result From 3eb6449753d3cfa144453f6dfbf985ced8b5731c Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 25 Jul 2025 13:57:23 +0100 Subject: [PATCH 10/13] alternate partition implementation --- src/esmf_regrid/esmf_regridder.py | 51 +++++++----- src/esmf_regrid/experimental/_partial.py | 17 ++++ src/esmf_regrid/experimental/partition.py | 81 ++++++++++++++++++- src/esmf_regrid/schemes.py | 3 + .../io/partition/test_Partition.py | 28 ++++++- 5 files changed, 157 insertions(+), 23 deletions(-) diff --git a/src/esmf_regrid/esmf_regridder.py b/src/esmf_regrid/esmf_regridder.py index 974a8de6..be18e20a 100644 --- a/src/esmf_regrid/esmf_regridder.py +++ b/src/esmf_regrid/esmf_regridder.py @@ -158,6 +158,35 @@ def _out_dtype(self, in_dtype): out_dtype = (np.ones(1, dtype=in_dtype) * np.ones(1, dtype=weight_dtype)).dtype return out_dtype + def _gen_weights_and_data(self, src_array): + extra_shape = src_array.shape[: -self.src.dims] + + flat_src = self.src._array_to_matrix(ma.filled(src_array, 0.0)) + flat_tgt = self.weight_matrix @ flat_src + + src_inverted_mask = self.src._array_to_matrix(~ma.getmaskarray(src_array)) + weight_sums = self.weight_matrix @ src_inverted_mask + + tgt_data = self.tgt._matrix_to_array(flat_tgt, extra_shape) + tgt_weights = self.tgt._matrix_to_array(weight_sums, extra_shape) + return tgt_weights, tgt_data + + def _regrid_from_weights_and_data(self, tgt_weights, tgt_data, norm_type=Constants.NormType.FRACAREA, mdtol=1): + # Set the minimum mdtol to be slightly higher than 0 to account for rounding + # errors. + mdtol = max(mdtol, 1e-8) + tgt_mask = tgt_weights > 1 - mdtol + masked_weight_sums = tgt_weights * tgt_mask + normalisations = np.ones_like(tgt_data) + if norm_type == Constants.NormType.FRACAREA: + normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] + elif norm_type == Constants.NormType.DSTAREA: + pass + normalisations = ma.array(normalisations, mask=np.logical_not(tgt_mask)) + + tgt_array = tgt_data * normalisations + return tgt_array + def regrid(self, src_array, norm_type=Constants.NormType.FRACAREA, mdtol=1): """Perform regridding on an array of data. @@ -195,25 +224,7 @@ def regrid(self, src_array, norm_type=Constants.NormType.FRACAREA, mdtol=1): f"got an array with shape ending in {main_shape}." ) raise ValueError(e_msg) - extra_shape = array_shape[: -self.src.dims] - extra_size = max(1, np.prod(extra_shape)) - src_inverted_mask = self.src._array_to_matrix(~ma.getmaskarray(src_array)) - weight_sums = self.weight_matrix @ src_inverted_mask - out_dtype = self._out_dtype(src_array.dtype) - # Set the minimum mdtol to be slightly higher than 0 to account for rounding - # errors. - mdtol = max(mdtol, 1e-8) - tgt_mask = weight_sums > 1 - mdtol - masked_weight_sums = weight_sums * tgt_mask - normalisations = np.ones([self.tgt.size, extra_size], dtype=out_dtype) - if norm_type == Constants.NormType.FRACAREA: - normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] - elif norm_type == Constants.NormType.DSTAREA: - pass - normalisations = ma.array(normalisations, mask=np.logical_not(tgt_mask)) - flat_src = self.src._array_to_matrix(ma.filled(src_array, 0.0)) - flat_tgt = self.weight_matrix @ flat_src - flat_tgt = flat_tgt * normalisations - tgt_array = self.tgt._matrix_to_array(flat_tgt, extra_shape) + tgt_weights, tgt_data = self._gen_weights_and_data(src_array) + tgt_array = self._regrid_from_weights_and_data(tgt_weights, tgt_data, norm_type=norm_type, mdtol=mdtol) return tgt_array diff --git a/src/esmf_regrid/experimental/_partial.py b/src/esmf_regrid/experimental/_partial.py index a63c47c8..c8067158 100644 --- a/src/esmf_regrid/experimental/_partial.py +++ b/src/esmf_regrid/experimental/_partial.py @@ -2,6 +2,7 @@ from esmf_regrid.schemes import ( _ESMFRegridder, + _create_cube, ) class PartialRegridder(_ESMFRegridder): @@ -22,3 +23,19 @@ def __init__(self, src, tgt, src_slice, tgt_slice, weights, scheme, **kwargs): **kwargs, ) self.__dict__.update(self._regridder.__dict__) + + def partial_regrid(self, src): + return self.regridder._gen_weights_and_data(src.data) + + def finish_regridding(self, src_cube, weights, data): + dims = self._get_cube_dims(src_cube) + + result_data = self.regridder._regrid_from_weights_and_data(weights, data) + result_cube = _create_cube( + result_data, + src_cube, + dims, + self._tgt, + len(self._tgt) + ) + return result_cube diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index dd1b035a..69054305 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -88,7 +88,7 @@ def generate_files(self, files_to_generate=None): file_complete = True # TODO: consider any for neighbour in neighbours: - if neighbour in self.unsaved_files: + if neighbour in self.unsaved_files and neighbour != file: file_complete = False save_regridder(regridder, pre_file, allow_partial=True) if file_complete: @@ -230,3 +230,82 @@ def _combine_sparse(left, right, w, a, b, t): result += result_add_right return result +class Partition2: + def __init__(self, src, tgt, scheme, file_names, src_chunks, tgt_chunks=None, auto_generate=False, saved_files=None, + partially_saved=None): + self.src = src + self.tgt = tgt + self.scheme = scheme + # TODO: consider abstracting away the idea of files + self.file_names = file_names + # TODO: consider deriving this from self.src.lazy_data() + self.src_chunks = src_chunks + assert len(src_chunks) == len(file_names) + self.tgt_chunks = tgt_chunks + assert tgt_chunks is None # We don't handle big targets currently + + # Note: this may need to become more sophisticated when both src and tgt are large + self.file_chunk_dict = {file: chunk for file, chunk in zip(self.file_names, self.src_chunks)} + + if saved_files is None: + self.saved_files = [] + else: + self.saved_files = saved_files + if auto_generate: + self.generate_files(self.file_names) + + @property + def unsaved_files(self): + files = set(self.file_names) - set(self.saved_files) + return [file for file in self.file_names if file in files] + + def generate_files(self, files_to_generate=None): + if files_to_generate is None: + # TODO: consider adding logic to order the files more efficiently. + files = self.unsaved_files + else: + assert isinstance(files_to_generate, int) + files = self.unsaved_files[:files_to_generate] + + for file in files: + src_chunk = self.file_chunk_dict[file] + src = self.src[*_interpret_slice(src_chunk)] + tgt = self.tgt + regridder = self.scheme.regridder(src, tgt) + src_slice = self.file_chunk_dict[file] + src_cube = self.src[*_interpret_slice(src_slice)] + print(src_cube) + weights = regridder.regridder.weight_matrix + regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) + # TODO: make partial? + save_regridder(regridder, file, allow_partial=True) + self.saved_files.append(file) + + def apply_regridders(self, cube, allow_incomplete=False): + # for each target chunk, iterate through each associated regridder + # for now, assume one target chunk + # TODO: figure out how to mask parts of the target not covered by any source (e.g. start out with full mask) + if not allow_incomplete: + assert len(self.unsaved_files) == 0 + # TODO: this may work better as a cube of the correct shape for more complex cases + current_result = None + current_weights = None + files = self.saved_files + + for file, chunk in zip(self.file_names, self.src_chunks): + if file in files: + next_regridder = load_regridder(file, allow_partial=True) + cube_chunk = cube[*_interpret_slice(chunk)] + next_weights, next_result = next_regridder.partial_regrid(cube_chunk) + if current_weights is None: + current_weights = next_weights + else: + current_weights += next_weights + if current_result is None: + current_result = next_result + else: + current_result += next_result + + non_zero_weights = current_weights > 0 + current_result[non_zero_weights] /= current_weights[non_zero_weights] + return next_regridder.finish_regridding(cube_chunk, current_weights, current_result) diff --git a/src/esmf_regrid/schemes.py b/src/esmf_regrid/schemes.py index 39b61889..4ee27136 100644 --- a/src/esmf_regrid/schemes.py +++ b/src/esmf_regrid/schemes.py @@ -1193,6 +1193,7 @@ def regridder( use_tgt_mask=use_tgt_mask, tgt_location="face", esmf_args=esmf_args, + precomputed_weights=precomputed_weights, ) @@ -1340,6 +1341,7 @@ def regridder( use_tgt_mask=use_tgt_mask, tgt_location=tgt_location, esmf_args=esmf_args, + precomputed_weights=precomputed_weights, ) @@ -1474,6 +1476,7 @@ def regridder( use_tgt_mask=use_tgt_mask, tgt_location=tgt_location, esmf_args=esmf_args, + precomputed_weights=precomputed_weights, ) diff --git a/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py index 5eb8ac14..09454d7d 100644 --- a/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py +++ b/src/esmf_regrid/tests/unit/experimental/io/partition/test_Partition.py @@ -1,7 +1,7 @@ """Unit tests for :mod:`esmf_regrid.experimental.partition`.""" import numpy as np from esmf_regrid import ESMFAreaWeighted -from esmf_regrid.experimental.partition import Partition +from esmf_regrid.experimental.partition import Partition, Partition2 from esmf_regrid.tests.unit.schemes.test__cube_to_GridInfo import ( _curvilinear_cube, @@ -30,7 +30,31 @@ def test_Partition(tmp_path): result = partition.apply_regridders(src) - expected = src.regrid(tgt, scheme) assert result == expected + assert np.array_equal(result.data, expected.data) + +def test_Partition2(tmp_path): + src = _grid_cube(150, 500, (-180, 180), (-90, 90), circular=True) + src.data = np.arange(150*500).reshape([500, 150]) + tgt = _grid_cube(16, 36, (-180, 180), (-90, 90), circular=True) + + files = [tmp_path / f"partial_{x}.nc" for x in range(5)] + scheme = ESMFAreaWeighted(mdtol=1) + chunks = [[[0, 100], [0, 150]], [[100, 200], [0, 150]], [[200, 300], [0, 150]], [[300, 400], [0, 150]], [[400, 500], [0, 150]]] + # TODO: consider different ways we could specify this in the API: + # chunks = 5 + # chunks = (5, None) + # chunks = None # (use dask chunks) + + partition = Partition2(src, tgt, scheme, files, chunks) + + partition.generate_files() + + result = partition.apply_regridders(src) + + + expected = src.regrid(tgt, scheme) + assert np.allclose(result.data, expected.data) + assert result == expected From 333bc18080702b6e58988c4e0809d88c7a08c377 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 29 Jul 2025 20:32:28 +0100 Subject: [PATCH 11/13] fix original implementation --- src/esmf_regrid/experimental/partition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 69054305..474a02fc 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -172,10 +172,10 @@ def _combine_regridders(self, existing, current_regridder, pre_files, file): # Create weights for new regridder previous_wm = previous_regridder.regridder.weight_matrix current_wm = current_regridder.regridder.weight_matrix - right_wm = sparse.csr_array((tgt_size, buffer * v_len)) - buffer_inds = sum(np.meshgrid(np.arange(buffer), np.arange(v_len) * h_len)).flatten() + right_wm = sparse.csr_array((tgt_size, buffer * h_len)) + buffer_inds = sum(np.meshgrid(np.arange(buffer), np.arange(h_len) * v_len)).flatten() right_wm[mutual_overlaps] = current_wm[mutual_overlaps][:, buffer_inds] - new_weight_matrix = _combine_sparse(previous_wm, right_wm, v_len, h_len, buffer, tgt_size) + new_weight_matrix = _combine_sparse(previous_wm, right_wm, h_len, v_len, buffer, tgt_size) new_weight_matrix = sparse.csr_matrix( new_weight_matrix) # must be matrix for ier (likely to change in future) From d5e637dfeb3757a68f2c3736451cc00863834dc3 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 29 Jul 2025 20:52:18 +0100 Subject: [PATCH 12/13] fix Partition2 --- src/esmf_regrid/experimental/partition.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index 474a02fc..eb20a14c 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -306,6 +306,4 @@ def apply_regridders(self, cube, allow_incomplete=False): else: current_result += next_result - non_zero_weights = current_weights > 0 - current_result[non_zero_weights] /= current_weights[non_zero_weights] return next_regridder.finish_regridding(cube_chunk, current_weights, current_result) From b7fc2ede3a28c20901c35bd6363fceb43e387094 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 29 Jul 2025 20:55:43 +0100 Subject: [PATCH 13/13] fix Partition2 --- src/esmf_regrid/experimental/partition.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/esmf_regrid/experimental/partition.py b/src/esmf_regrid/experimental/partition.py index eb20a14c..8dd6ba3e 100644 --- a/src/esmf_regrid/experimental/partition.py +++ b/src/esmf_regrid/experimental/partition.py @@ -274,7 +274,6 @@ def generate_files(self, files_to_generate=None): regridder = self.scheme.regridder(src, tgt) src_slice = self.file_chunk_dict[file] src_cube = self.src[*_interpret_slice(src_slice)] - print(src_cube) weights = regridder.regridder.weight_matrix regridder = PartialRegridder(src_cube, self.tgt, src_slice, None, weights, self.scheme) # TODO: make partial?