Skip to content

Commit 864d879

Browse files
committed
Adding c8y_tk.analytics package and to_numpy, to_series and to_data_frame functions.
1 parent e3fb464 commit 864d879

File tree

3 files changed

+585
-0
lines changed

3 files changed

+585
-0
lines changed

c8y_tk/analytics/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Copyright (c) 2025 Cumulocity GmbH
2+
3+
from c8y_tk.analytics._wrappers import to_data_frame, to_numpy, to_series

c8y_tk/analytics/_wrappers.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Copyright (c) 2025 Cumulocity GmbH
2+
3+
from __future__ import annotations
4+
5+
import re
6+
7+
import numpy as np
8+
import pandas as pd
9+
10+
from c8y_api.model import Series
11+
12+
13+
def encode(n):
14+
"""Encode a column name."""
15+
return re.sub(r'[ \\.+-]', '_', n)
16+
17+
18+
def to_numpy(data: Series, series: str | list[str] = None, value: str = None, timestamps: bool | str = None):
19+
"""Build a NumPy array from a Cumulocity Series object.
20+
21+
This functions extracts the min and/or max values or one or multiple
22+
series define within the Series object.
23+
The result is either a 1-dimensional array if only a single series
24+
and value is extracted or a 2-dimensional array if either multiple
25+
series and/or multiple values are extracted.
26+
27+
The arrays 'columns' are ordered as defined via the `series` argument
28+
or as defined in the source Series object. If both min and max values
29+
are extracted, they will be grouped adjacent to each other in the result.
30+
31+
If the `timestamps` argument is set, the result is a tuple of two NumPy
32+
arrays; the first holding the data the second the isolated timestamps
33+
as 1-dimensional array.
34+
35+
Args:
36+
data (Series): A c8y_api Series object
37+
series (str|list): A series' name or a collection of series names;
38+
If omitted, all available series are extracted.
39+
value (str): The value (min/max) to extract; If omitted, both min
40+
and max will be extracted.
41+
timestamps (bool|str): Whether to extract the series timestamps;
42+
If True, the timestamp strings will be used and this function
43+
returns a tuple (data, timestamps); Use 'datetime' or 'epoch'
44+
to parse the timestamp strings.
45+
46+
Returns:
47+
A NumPy array or a 2-tuple of NumPy arrays if timestamps are included.
48+
"""
49+
collected = data.collect(series=series, value=value, timestamps=timestamps)
50+
51+
# handle empty result separately
52+
if not collected:
53+
return np.empty(0) if not timestamps else (np.empty(0), np.empty(0))
54+
55+
# extract timestamps if requested
56+
if timestamps:
57+
timestamps = [x[0] for x in collected]
58+
collected = [x[1:] for x in collected]
59+
60+
# if there are multiple series and both min/max values are collected,
61+
# we need to flatten these min/max tuples; timestamps are separate
62+
array = np.array(collected)
63+
if array.ndim > 1:
64+
if array.ndim == 2 and array.shape[1] == 1:
65+
array = array.reshape(len(collected))
66+
else:
67+
array = array.reshape(len(collected), -1)
68+
69+
# timestamps cannot be part of the result as they may have a different
70+
# data type (string or datetime), hence we return a tuple in this case
71+
if timestamps:
72+
if timestamps == 'datetime':
73+
timestamps = pd.to_datetime(timestamps)
74+
return array, np.array(timestamps)
75+
76+
return array
77+
78+
79+
def to_data_frame(data: Series, series: str | list[str] = None, value: str = None,
80+
timestamps: bool | str = None):
81+
"""Build a Pandas DataFrame from a Cumulocity Series object.
82+
83+
Args:
84+
data (Series): A c8y_api Series object
85+
series (str|list): A series' name or a collection of series names;
86+
If omitted, all available series are extracted. The series names
87+
will be used as column names (special characters will be replaced)
88+
value (str): The value (min/max) to extract; If omitted, both min
89+
and max will be extract and the column names will be suffixed
90+
accordingly.
91+
timestamps (bool|str): Whether to extract the series timestamps as
92+
index; If True, the timestamp string will be used; Use 'datetime'
93+
or 'epoch' to parse the timestamp string.
94+
95+
Returns:
96+
A Pandas DataFrame object.
97+
"""
98+
99+
def assemble_column_names():
100+
names = series if not isinstance(series, str) else [series]
101+
encoded_names = [encode(n) for n in names]
102+
# we don't append min/max suffixes if there is only one column
103+
if isinstance(value, str):
104+
return encoded_names
105+
return [f'{n}_{v}' for n in encoded_names for v in ['min', 'max']]
106+
107+
if not series:
108+
series = [s.series for s in data.specs]
109+
110+
columns = assemble_column_names()
111+
112+
if timestamps:
113+
array, array_ts = to_numpy(data, series=series, value=value, timestamps=timestamps)
114+
else:
115+
array, array_ts = to_numpy(data, series=series, value=value), None
116+
117+
return pd.DataFrame(data=array, columns=columns, index=array_ts)
118+
119+
120+
def to_series(data: Series, series: str = None, value: str = 'min', timestamps: bool | str = None):
121+
"""Build a Pandas Series from a Cumulocity Series object.
122+
123+
Args:
124+
data (Series): A c8y_api Series object
125+
series (str): A series' name; can be left blank if `data` holds only
126+
the values of one series
127+
value (str): The value (min/max) to extract; defaults to 'min'
128+
timestamps (bool|str): Whether to extract the series' timestamps as
129+
index; If True, the timestamp string will be used; Use 'datetime'
130+
or 'epoch' to parse the timestamp string.
131+
132+
Returns:
133+
A Pandas Series object.
134+
"""
135+
136+
if not series:
137+
series = [s.series for s in data.specs]
138+
if len(data.specs) > 1:
139+
raise ValueError(f"Multiple potential series found ({', '.join(series)}).")
140+
series = series[0]
141+
142+
if timestamps:
143+
array, array_ts = to_numpy(data, series=series, value=value, timestamps=timestamps)
144+
else:
145+
array, array_ts = to_numpy(data, series=series, value=value), None
146+
147+
return pd.Series(array, index=array_ts, name=encode(series))

0 commit comments

Comments
 (0)