grass.tools: Add NumPy arrays IO to Tools (#5878)

wenzeslaus · web-flow · commit 063dea1a969a · 2025-09-05T09:37:06.000-04:00
This is adding NumPy array as input and output to tools when called through the Tools object. My focus with this PR was to create a good API which can be used in various contexts and is useful as is. However, the specifics of the implementation, especially low performance comparing to native data, are secondary issues for me in this addition as long as there is no performance hit for the cases when NumPy arrays are not used which is the case. Even with the performance hits, it works great as a replacement of explicit grass.script.array conversions (same code, just in the background) and in tests (replacing custom tests asserts, and data conversions). While the interface for inputs is clear (the array with data), the interface for outputs was a pick among many choices (type used as a flag over strings, booleans, empty objects, flags). Strict adherence to NumPy universal function was left out as well as control over the actual output array type (a generic array is documented; grass.script.array.array is used now). The NumPy import dependency is optional so that the imports and Tools objects work without NumPy installed. While the tests would fail, GRASS build should work without NumPy as of now. This combines well with the dynamic return value with control over consistency implemented in #6278 as the arrays are one of the possible return types, but can be also made as part of a consistent return type. This lends itself to single array, tuple of arrays, or object with named arrays as possible return types. Overall, this is building on top of Tools class addition in #2923. The big picture is also discussed in #5830.
diff --git a/lib/gis/parser_md_python.c b/lib/gis/parser_md_python.c
@@ -114,10 +114,20 @@ void print_python_option(FILE *file, const struct Option *opt,
     char prompt_description[KEYLENGTH];
     if (opt->gisprompt) {
         G__split_gisprompt(opt->gisprompt, age, element, prompt_description);
-        if (tools_api && !opt->multiple && opt->type == TYPE_STRING &&
-            G_strncasecmp("old", age, 3) == 0 &&
-            G_strncasecmp("file", element, 4) == 0) {
-            type = "str | io.StringIO";
+        if (tools_api && !opt->multiple && opt->type == TYPE_STRING) {
+            if (G_strncasecmp("old", age, 3) == 0 &&
+                G_strncasecmp("file", element, 4) == 0) {
+                type = "str | io.StringIO";
+            }
+            if (G_strncasecmp("old", age, 3) == 0 &&
+                G_strncasecmp("cell", element, 4) == 0) {
+                type = "str | np.ndarray";
+            }
+            if (G_strncasecmp("new", age, 3) == 0 &&
+                G_strncasecmp("cell", element, 4) == 0) {
+                type = "str | type(np.ndarray) | type(np.array) | "
+                       "type(gs.array.array)";
+            }
         }
     }
 
@@ -564,13 +574,46 @@ void G__md_print_python_long_version(FILE *file, const char *indent,
         return;
 
     fprintf(file, "\n%sReturns:\n\n", indent);
+
+    bool outputs_arrays = false;
+    char age[KEYLENGTH];
+    char element[KEYLENGTH];
+    char prompt_description[KEYLENGTH];
+    if (st->n_opts) {
+        opt = &st->first_option;
+        while (opt != NULL) {
+            if (opt->gisprompt) {
+                G__split_gisprompt(opt->gisprompt, age, element,
+                                   prompt_description);
+                if (tools_api && !opt->multiple && opt->type == TYPE_STRING) {
+                    if (G_strncasecmp("new", age, 3) == 0 &&
+                        G_strncasecmp("cell", element, 4) == 0) {
+                        outputs_arrays = true;
+                    }
+                }
+            }
+            opt = opt->next_opt;
+        }
+    }
+
     fprintf(file, "%s**result** : ", indent);
     fprintf(file, "grass.tools.support.ToolResult");
+    if (outputs_arrays) {
+        fprintf(file, " | np.ndarray | tuple[np.ndarray]");
+    }
     fprintf(file, " | None");
     fprintf(file, MD_NEWLINE);
     fprintf(file, "\n%s", indent);
     fprintf(file, "If the tool produces text as standard output, a "
                   "*ToolResult* object will be returned. "
                   "Otherwise, `None` will be returned.");
+    if (outputs_arrays) {
+        fprintf(file, " If an array type (e.g., *np.ndarray*) is used for one "
+                      "of the raster outputs, "
+                      "the result will be an array and will have the shape "
+                      "corresponding to the computational region. "
+                      "If an array type is used for more than one raster "
+                      "output, the result will be a tuple of arrays.");
+    }
     fprintf(file, "\n");
 }
diff --git a/python/grass/benchmark/runners.py b/python/grass/benchmark/runners.py
@@ -173,7 +173,7 @@ def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):
         region = gs.region()
         n_cells.append(region["cells"])
         print("\u2500" * term_size.columns)
-        print(f"Benchmark with {resolution} resolution...\n")
+        print(f"Benchmark with resolution {resolution}...\n")
         time_sum = 0
         measured_times = []
         for _ in range(repeat):
diff --git a/python/grass/tools/benchmark/benchmark_grass_tools_numpy.py b/python/grass/tools/benchmark/benchmark_grass_tools_numpy.py
@@ -0,0 +1,121 @@
+import time
+import numpy as np
+
+
+from grass.tools import Tools
+from grass.benchmark import (
+    num_cells_plot,
+    benchmark_resolutions,
+    load_results,
+    save_results,
+)
+
+
+class TimeMeasurer:
+    def __init__(self):
+        self._time = None
+        self._start = None
+
+    @property
+    def time(self):
+        return self._time
+
+    def start(self):
+        self._start = time.perf_counter()
+
+    def stop(self):
+        self._time = time.perf_counter() - self._start
+
+
+class PlainNumPyBenchmark(TimeMeasurer):
+    def run(self):
+        tools = Tools()
+        region = tools.g_region(flags="p", format="json")
+        a = np.full((region["rows"], region["cols"]), 1)
+        b = np.full((region["rows"], region["cols"]), 1)
+
+        self.start()
+        c = 2 * np.sqrt(a + b) * np.sqrt(a) + np.sqrt(b) + a / 2
+        self.stop()
+
+        print(c.sum())
+        print(c.size)
+
+        del a
+        del b
+        del c
+
+
+class PlainGRASSBenchmark(TimeMeasurer):
+    def run(self):
+        tools = Tools(overwrite=True)
+        tools.r_mapcalc(expression="a = 1")
+        tools.r_mapcalc(expression="b = 1")
+
+        self.start()
+        tools.r_mapcalc(expression="c = 2 * sqrt(a + b) * sqrt(a) * sqrt(b) + a / 2")
+        self.stop()
+
+        c_stats = tools.r_univar(map="c", format="json")
+        print(c_stats["sum"])
+        print(c_stats["cells"])
+
+
+class NumPyGRASSBenchmark(TimeMeasurer):
+    def run(self):
+        tools = Tools()
+        region = tools.g_region(flags="p", format="json")
+        a = np.full((region["rows"], region["cols"]), 1)
+        b = np.full((region["rows"], region["cols"]), 1)
+
+        self.start()
+        c = tools.r_mapcalc_simple(
+            expression="2* sqrt(A + B) * sqrt(A) * sqrt(B) + A / 2",
+            a=a,
+            b=b,
+            output=np.array,
+        )
+        self.stop()
+
+        c_stats = tools.r_univar(map=c, format="json")
+        print(c_stats["sum"])
+        print(c_stats["cells"])
+
+        del a
+        del b
+        del c
+
+
+def main():
+    resolutions = [5, 2, 1, 0.5]
+    repeat = 10
+    results = [
+        benchmark_resolutions(
+            module=PlainNumPyBenchmark(),
+            label="NumPy",
+            resolutions=resolutions,
+            repeat=repeat,
+        ),
+        benchmark_resolutions(
+            module=PlainGRASSBenchmark(),
+            label="GRASS",
+            resolutions=resolutions,
+            repeat=repeat,
+        ),
+        benchmark_resolutions(
+            module=NumPyGRASSBenchmark(),
+            label="NumPy GRASS",
+            resolutions=resolutions,
+            repeat=repeat,
+        ),
+    ]
+    print(results)
+    results = load_results(save_results(results))
+    print(results)
+    plot_file = "test_res_plot.png"
+    num_cells_plot(results.results, filename=plot_file)
+    print(plot_file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/grass/tools/session_tools.py b/python/grass/tools/session_tools.py
@@ -38,7 +38,7 @@ class Tools:
 
     >>> from grass.tools import Tools
     >>> tools = Tools(session=session)
-    >>> tools.g_region(rows=100, cols=100)  # doctest: +ELLIPSIS
+    >>> tools.g_region(rows=100, cols=100)
     >>> tools.r_random_surface(output="surface", seed=42)
 
     For tools outputting JSON, the results can be accessed directly:
@@ -71,6 +71,50 @@ class Tools:
     of strings as parameters (*run_cmd* and *call_cmd*).
     When a tool is run using the function corresponding to its name, the *run* function
     is used in the background.
+
+    Raster input and outputs can be NumPy arrays:
+
+    >>> import numpy as np
+    >>> tools.g_region(rows=2, cols=3)
+    >>> slope = tools.r_slope_aspect(elevation=np.ones((2, 3)), slope=np.ndarray)
+    >>> tools.r_grow(
+    ...     input=np.array([[1, np.nan, np.nan], [np.nan, np.nan, np.nan]]),
+    ...     radius=1.5,
+    ...     output=np.ndarray,
+    ... )
+    array([[1., 1., 0.],
+           [1., 1., 0.]])
+
+    The input array's shape and the computational region rows and columns need to
+    match. The output array's shape is determined by the computational region.
+
+    When multiple outputs are returned, they are returned as a tuple:
+
+    >>> (slope, aspect) = tools.r_slope_aspect(
+    ...     elevation=np.ones((2, 3)), slope=np.array, aspect=np.array
+    ... )
+
+    To access the arrays by name, e.g., with a high number of output arrays,
+    the standard result object can be requested with *consistent_return_value*:
+
+    >>> tools = Tools(session=session, consistent_return_value=True)
+    >>> result = tools.r_slope_aspect(
+    ...     elevation=np.ones((2, 3)), slope=np.array, aspect=np.array
+    ... )
+
+    The result object than includes the arrays under the *arrays* attribute
+    where they can be accessed as attributes by names corresponding to the
+    output parameter names:
+
+    >>> slope = result.arrays.slope
+    >>> aspect = result.arrays.aspect
+
+    Using `consistent_return_value=True` is also advantageous to obtain both arrays
+    and text outputs from the tool as the result object has the same
+    attributes and functionality as without arrays:
+
+    >>> result.text
+    ''
     """
 
     def __init__(
@@ -127,6 +171,8 @@ def __init__(
         *text* attributes of the result object will evaluate to `False`). This is
         advantageous when examining the *stdout* or *text* attributes directly, or
         when using the *returncode* attribute in combination with `errors="ignore"`.
+        Additionally, this can be used to obtain both NumPy arrays and text outputs
+        from a tool call.
 
         If *env* or other *Popen* arguments are provided to one of the tool running
         functions, the constructor parameters except *errors* are ignored.
@@ -214,13 +260,39 @@ def run(self, tool_name_: str, /, **kwargs):
         # Get a fixed env parameter at at the beginning of each execution,
         # but repeat it every time in case the referenced environment is modified.
         args, popen_options = gs.popen_args_command(tool_name_, **kwargs)
+
+        # Compute the environment for subprocesses and store it for later use.
+        if "env" not in popen_options:
+            popen_options["env"] = self._modified_env_if_needed()
+
+        object_parameter_handler.translate_objects_to_data(
+            kwargs, env=popen_options["env"]
+        )
+
         # We approximate original kwargs with the possibly-modified kwargs.
-        return self.run_cmd(
+        result = self.run_cmd(
             args,
             tool_kwargs=kwargs,
             input=object_parameter_handler.stdin,
             **popen_options,
         )
+        use_objects = object_parameter_handler.translate_data_to_objects(
+            kwargs, env=popen_options["env"]
+        )
+        if use_objects:
+            if self._consistent_return_value:
+                result.set_arrays(object_parameter_handler.all_array_results)
+            else:
+                result = object_parameter_handler.result
+
+        if object_parameter_handler.temporary_rasters:
+            self.call(
+                "g.remove",
+                type="raster",
+                name=object_parameter_handler.temporary_rasters,
+                flags="f",
+            )
+        return result
 
     def run_cmd(
         self,
diff --git a/python/grass/tools/support.py b/python/grass/tools/support.py
diff --git a/python/grass/tools/tests/grass_tools_session_tools_numpy_test.py b/python/grass/tools/tests/grass_tools_session_tools_numpy_test.py