Skip to content

Commit 063dea1

Browse files
authored
grass.tools: Add NumPy arrays IO to Tools (#5878)
This is adding NumPy array as input and output to tools when called through the Tools object. My focus with this PR was to create a good API which can be used in various contexts and is useful as is. However, the specifics of the implementation, especially low performance comparing to native data, are secondary issues for me in this addition as long as there is no performance hit for the cases when NumPy arrays are not used which is the case. Even with the performance hits, it works great as a replacement of explicit grass.script.array conversions (same code, just in the background) and in tests (replacing custom tests asserts, and data conversions). While the interface for inputs is clear (the array with data), the interface for outputs was a pick among many choices (type used as a flag over strings, booleans, empty objects, flags). Strict adherence to NumPy universal function was left out as well as control over the actual output array type (a generic array is documented; grass.script.array.array is used now). The NumPy import dependency is optional so that the imports and Tools objects work without NumPy installed. While the tests would fail, GRASS build should work without NumPy as of now. This combines well with the dynamic return value with control over consistency implemented in #6278 as the arrays are one of the possible return types, but can be also made as part of a consistent return type. This lends itself to single array, tuple of arrays, or object with named arrays as possible return types. Overall, this is building on top of Tools class addition in #2923. The big picture is also discussed in #5830.
1 parent 502ab0f commit 063dea1

File tree

6 files changed

+456
-9
lines changed

6 files changed

+456
-9
lines changed

lib/gis/parser_md_python.c

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,20 @@ void print_python_option(FILE *file, const struct Option *opt,
114114
char prompt_description[KEYLENGTH];
115115
if (opt->gisprompt) {
116116
G__split_gisprompt(opt->gisprompt, age, element, prompt_description);
117-
if (tools_api && !opt->multiple && opt->type == TYPE_STRING &&
118-
G_strncasecmp("old", age, 3) == 0 &&
119-
G_strncasecmp("file", element, 4) == 0) {
120-
type = "str | io.StringIO";
117+
if (tools_api && !opt->multiple && opt->type == TYPE_STRING) {
118+
if (G_strncasecmp("old", age, 3) == 0 &&
119+
G_strncasecmp("file", element, 4) == 0) {
120+
type = "str | io.StringIO";
121+
}
122+
if (G_strncasecmp("old", age, 3) == 0 &&
123+
G_strncasecmp("cell", element, 4) == 0) {
124+
type = "str | np.ndarray";
125+
}
126+
if (G_strncasecmp("new", age, 3) == 0 &&
127+
G_strncasecmp("cell", element, 4) == 0) {
128+
type = "str | type(np.ndarray) | type(np.array) | "
129+
"type(gs.array.array)";
130+
}
121131
}
122132
}
123133

@@ -564,13 +574,46 @@ void G__md_print_python_long_version(FILE *file, const char *indent,
564574
return;
565575

566576
fprintf(file, "\n%sReturns:\n\n", indent);
577+
578+
bool outputs_arrays = false;
579+
char age[KEYLENGTH];
580+
char element[KEYLENGTH];
581+
char prompt_description[KEYLENGTH];
582+
if (st->n_opts) {
583+
opt = &st->first_option;
584+
while (opt != NULL) {
585+
if (opt->gisprompt) {
586+
G__split_gisprompt(opt->gisprompt, age, element,
587+
prompt_description);
588+
if (tools_api && !opt->multiple && opt->type == TYPE_STRING) {
589+
if (G_strncasecmp("new", age, 3) == 0 &&
590+
G_strncasecmp("cell", element, 4) == 0) {
591+
outputs_arrays = true;
592+
}
593+
}
594+
}
595+
opt = opt->next_opt;
596+
}
597+
}
598+
567599
fprintf(file, "%s**result** : ", indent);
568600
fprintf(file, "grass.tools.support.ToolResult");
601+
if (outputs_arrays) {
602+
fprintf(file, " | np.ndarray | tuple[np.ndarray]");
603+
}
569604
fprintf(file, " | None");
570605
fprintf(file, MD_NEWLINE);
571606
fprintf(file, "\n%s", indent);
572607
fprintf(file, "If the tool produces text as standard output, a "
573608
"*ToolResult* object will be returned. "
574609
"Otherwise, `None` will be returned.");
610+
if (outputs_arrays) {
611+
fprintf(file, " If an array type (e.g., *np.ndarray*) is used for one "
612+
"of the raster outputs, "
613+
"the result will be an array and will have the shape "
614+
"corresponding to the computational region. "
615+
"If an array type is used for more than one raster "
616+
"output, the result will be a tuple of arrays.");
617+
}
575618
fprintf(file, "\n");
576619
}

python/grass/benchmark/runners.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def benchmark_resolutions(module, resolutions, label, repeat=5, nprocs=None):
173173
region = gs.region()
174174
n_cells.append(region["cells"])
175175
print("\u2500" * term_size.columns)
176-
print(f"Benchmark with {resolution} resolution...\n")
176+
print(f"Benchmark with resolution {resolution}...\n")
177177
time_sum = 0
178178
measured_times = []
179179
for _ in range(repeat):
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import time
2+
import numpy as np
3+
4+
5+
from grass.tools import Tools
6+
from grass.benchmark import (
7+
num_cells_plot,
8+
benchmark_resolutions,
9+
load_results,
10+
save_results,
11+
)
12+
13+
14+
class TimeMeasurer:
15+
def __init__(self):
16+
self._time = None
17+
self._start = None
18+
19+
@property
20+
def time(self):
21+
return self._time
22+
23+
def start(self):
24+
self._start = time.perf_counter()
25+
26+
def stop(self):
27+
self._time = time.perf_counter() - self._start
28+
29+
30+
class PlainNumPyBenchmark(TimeMeasurer):
31+
def run(self):
32+
tools = Tools()
33+
region = tools.g_region(flags="p", format="json")
34+
a = np.full((region["rows"], region["cols"]), 1)
35+
b = np.full((region["rows"], region["cols"]), 1)
36+
37+
self.start()
38+
c = 2 * np.sqrt(a + b) * np.sqrt(a) + np.sqrt(b) + a / 2
39+
self.stop()
40+
41+
print(c.sum())
42+
print(c.size)
43+
44+
del a
45+
del b
46+
del c
47+
48+
49+
class PlainGRASSBenchmark(TimeMeasurer):
50+
def run(self):
51+
tools = Tools(overwrite=True)
52+
tools.r_mapcalc(expression="a = 1")
53+
tools.r_mapcalc(expression="b = 1")
54+
55+
self.start()
56+
tools.r_mapcalc(expression="c = 2 * sqrt(a + b) * sqrt(a) * sqrt(b) + a / 2")
57+
self.stop()
58+
59+
c_stats = tools.r_univar(map="c", format="json")
60+
print(c_stats["sum"])
61+
print(c_stats["cells"])
62+
63+
64+
class NumPyGRASSBenchmark(TimeMeasurer):
65+
def run(self):
66+
tools = Tools()
67+
region = tools.g_region(flags="p", format="json")
68+
a = np.full((region["rows"], region["cols"]), 1)
69+
b = np.full((region["rows"], region["cols"]), 1)
70+
71+
self.start()
72+
c = tools.r_mapcalc_simple(
73+
expression="2* sqrt(A + B) * sqrt(A) * sqrt(B) + A / 2",
74+
a=a,
75+
b=b,
76+
output=np.array,
77+
)
78+
self.stop()
79+
80+
c_stats = tools.r_univar(map=c, format="json")
81+
print(c_stats["sum"])
82+
print(c_stats["cells"])
83+
84+
del a
85+
del b
86+
del c
87+
88+
89+
def main():
90+
resolutions = [5, 2, 1, 0.5]
91+
repeat = 10
92+
results = [
93+
benchmark_resolutions(
94+
module=PlainNumPyBenchmark(),
95+
label="NumPy",
96+
resolutions=resolutions,
97+
repeat=repeat,
98+
),
99+
benchmark_resolutions(
100+
module=PlainGRASSBenchmark(),
101+
label="GRASS",
102+
resolutions=resolutions,
103+
repeat=repeat,
104+
),
105+
benchmark_resolutions(
106+
module=NumPyGRASSBenchmark(),
107+
label="NumPy GRASS",
108+
resolutions=resolutions,
109+
repeat=repeat,
110+
),
111+
]
112+
print(results)
113+
results = load_results(save_results(results))
114+
print(results)
115+
plot_file = "test_res_plot.png"
116+
num_cells_plot(results.results, filename=plot_file)
117+
print(plot_file)
118+
119+
120+
if __name__ == "__main__":
121+
main()

python/grass/tools/session_tools.py

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class Tools:
3838
3939
>>> from grass.tools import Tools
4040
>>> tools = Tools(session=session)
41-
>>> tools.g_region(rows=100, cols=100) # doctest: +ELLIPSIS
41+
>>> tools.g_region(rows=100, cols=100)
4242
>>> tools.r_random_surface(output="surface", seed=42)
4343
4444
For tools outputting JSON, the results can be accessed directly:
@@ -71,6 +71,50 @@ class Tools:
7171
of strings as parameters (*run_cmd* and *call_cmd*).
7272
When a tool is run using the function corresponding to its name, the *run* function
7373
is used in the background.
74+
75+
Raster input and outputs can be NumPy arrays:
76+
77+
>>> import numpy as np
78+
>>> tools.g_region(rows=2, cols=3)
79+
>>> slope = tools.r_slope_aspect(elevation=np.ones((2, 3)), slope=np.ndarray)
80+
>>> tools.r_grow(
81+
... input=np.array([[1, np.nan, np.nan], [np.nan, np.nan, np.nan]]),
82+
... radius=1.5,
83+
... output=np.ndarray,
84+
... )
85+
array([[1., 1., 0.],
86+
[1., 1., 0.]])
87+
88+
The input array's shape and the computational region rows and columns need to
89+
match. The output array's shape is determined by the computational region.
90+
91+
When multiple outputs are returned, they are returned as a tuple:
92+
93+
>>> (slope, aspect) = tools.r_slope_aspect(
94+
... elevation=np.ones((2, 3)), slope=np.array, aspect=np.array
95+
... )
96+
97+
To access the arrays by name, e.g., with a high number of output arrays,
98+
the standard result object can be requested with *consistent_return_value*:
99+
100+
>>> tools = Tools(session=session, consistent_return_value=True)
101+
>>> result = tools.r_slope_aspect(
102+
... elevation=np.ones((2, 3)), slope=np.array, aspect=np.array
103+
... )
104+
105+
The result object than includes the arrays under the *arrays* attribute
106+
where they can be accessed as attributes by names corresponding to the
107+
output parameter names:
108+
109+
>>> slope = result.arrays.slope
110+
>>> aspect = result.arrays.aspect
111+
112+
Using `consistent_return_value=True` is also advantageous to obtain both arrays
113+
and text outputs from the tool as the result object has the same
114+
attributes and functionality as without arrays:
115+
116+
>>> result.text
117+
''
74118
"""
75119

76120
def __init__(
@@ -127,6 +171,8 @@ def __init__(
127171
*text* attributes of the result object will evaluate to `False`). This is
128172
advantageous when examining the *stdout* or *text* attributes directly, or
129173
when using the *returncode* attribute in combination with `errors="ignore"`.
174+
Additionally, this can be used to obtain both NumPy arrays and text outputs
175+
from a tool call.
130176
131177
If *env* or other *Popen* arguments are provided to one of the tool running
132178
functions, the constructor parameters except *errors* are ignored.
@@ -214,13 +260,39 @@ def run(self, tool_name_: str, /, **kwargs):
214260
# Get a fixed env parameter at at the beginning of each execution,
215261
# but repeat it every time in case the referenced environment is modified.
216262
args, popen_options = gs.popen_args_command(tool_name_, **kwargs)
263+
264+
# Compute the environment for subprocesses and store it for later use.
265+
if "env" not in popen_options:
266+
popen_options["env"] = self._modified_env_if_needed()
267+
268+
object_parameter_handler.translate_objects_to_data(
269+
kwargs, env=popen_options["env"]
270+
)
271+
217272
# We approximate original kwargs with the possibly-modified kwargs.
218-
return self.run_cmd(
273+
result = self.run_cmd(
219274
args,
220275
tool_kwargs=kwargs,
221276
input=object_parameter_handler.stdin,
222277
**popen_options,
223278
)
279+
use_objects = object_parameter_handler.translate_data_to_objects(
280+
kwargs, env=popen_options["env"]
281+
)
282+
if use_objects:
283+
if self._consistent_return_value:
284+
result.set_arrays(object_parameter_handler.all_array_results)
285+
else:
286+
result = object_parameter_handler.result
287+
288+
if object_parameter_handler.temporary_rasters:
289+
self.call(
290+
"g.remove",
291+
type="raster",
292+
name=object_parameter_handler.temporary_rasters,
293+
flags="f",
294+
)
295+
return result
224296

225297
def run_cmd(
226298
self,

0 commit comments

Comments
 (0)