@@ -158,21 +158,34 @@ def load_turbine_data_from_oedb(schema="supply", table="wind_turbine_library"):
158
158
159
159
160
160
def store_turbine_data_from_oedb (
161
- schema = "supply" , table = "wind_turbine_library"
161
+ schema = "supply" , table = "wind_turbine_library" , threshold = 0.2
162
162
):
163
163
r"""
164
164
Loads turbine library from the OpenEnergy database (oedb).
165
165
166
166
Turbine data is saved to csv files ('oedb_power_curves.csv',
167
167
'oedb_power_coefficient_curves.csv' and 'oedb_nominal_power') for offline
168
168
usage of the windpowerlib. If the files already exist they are overwritten.
169
+ In case the turbine library on the oedb contains too many faulty turbines,
170
+ the already existing files are not overwritten. The accepted percentage of faulty
171
+ turbines can be set through the parameter `threshold`.
169
172
170
173
Parameters
171
174
----------
172
175
schema : str
173
176
Database schema of the turbine library.
174
177
table : str
175
178
Table name of the turbine library.
179
+ threshold : float
180
+ In case there are turbines in the turbine library with faulty data (e.g.
181
+ duplicate wind speed entries in the power (coefficient) curve data), the
182
+ threshold defines the share of accepted faulty turbine ata up to which the
183
+ existing turbine data is overwritten by the newly downloaded data.
184
+ For example, a threshold of 0.1 means that more than 10% of the
185
+ turbines would need to have invalid data in order to discard the downloaded
186
+ data. This is to make sure that in the rare case the oedb data is too buggy,
187
+ the turbine data that is by default provided with the windpowerlib is not
188
+ overwritten by poor data.
176
189
177
190
Returns
178
191
-------
@@ -182,11 +195,40 @@ def store_turbine_data_from_oedb(
182
195
183
196
"""
184
197
turbine_data = fetch_turbine_data_from_oedb (schema = schema , table = table )
185
- # standard file name for saving data
186
- filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
198
+ turbine_data = _process_and_save_oedb_data (
199
+ turbine_data , threshold = threshold
200
+ )
201
+ check_turbine_data (
202
+ filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
203
+ )
204
+ return turbine_data
205
+
206
+
207
+ def _process_and_save_oedb_data (turbine_data , threshold = 0.2 ):
208
+ """
209
+ Helper function to extract power (coefficient) curve data from the turbine library.
210
+
211
+ Parameters
212
+ -----------
213
+ turbine_data : :pandas:`pandas.DataFrame<frame>`
214
+ Raw turbine data downloaded from the oedb with
215
+ :func:`fetch_turbine_data_from_oedb`.
216
+ threshold : float
217
+ See parameter `threshold` in func:`store_turbine_data_from_oedb`
218
+ for more information.
219
+
220
+ Returns
221
+ --------
222
+ :pandas:`pandas.DataFrame<frame>`
223
+ Turbine data of different turbines such as 'manufacturer',
224
+ 'turbine_type', 'nominal_power'.
187
225
188
- # get all power (coefficient) curves and save them to file
189
- for curve_type in ["power_curve" , "power_coefficient_curve" ]:
226
+ """
227
+ curve_types = ["power_curve" , "power_coefficient_curve" ]
228
+ # get all power (coefficient) curves
229
+ curve_dict = {}
230
+ broken_turbines_dict = {}
231
+ for curve_type in curve_types :
190
232
broken_turbine_data = []
191
233
curves_df = pd .DataFrame (columns = ["wind_speed" ])
192
234
for index in turbine_data .index :
@@ -222,67 +264,82 @@ def store_turbine_data_from_oedb(
222
264
curves_df = pd .merge (
223
265
left = curves_df , right = df , how = "outer" , on = "wind_speed"
224
266
)
267
+ else :
268
+ broken_turbine_data .append (
269
+ turbine_data .loc [index , "turbine_type" ])
225
270
except :
226
271
broken_turbine_data .append (turbine_data .loc [index , "turbine_type" ])
227
-
228
- # warning in case of broken turbine data
229
- if len (broken_turbine_data ) > 0 :
230
- issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
231
- "/issues/28" )
232
- # in case only some data is faulty, only give out warning
233
- if len (broken_turbine_data ) < 0.2 * len (turbine_data ):
234
- logging .warning (
235
- f"The turbine library data contains faulty { curve_type } s. The "
236
- f"{ curve_type } data can therefore not be loaded for the following "
237
- f"turbines: { broken_turbine_data } . "
238
- f"Please report this in the following issue, in case it hasn't "
239
- f"already been reported: { issue_link } "
240
- )
241
- save_turbine_data = True
272
+ curve_dict [curve_type ] = curves_df
273
+ broken_turbines_dict [curve_type ] = broken_turbine_data
274
+
275
+ # check if there are faulty turbines and if so, raise warning
276
+ # if there are too many, don't save downloaded data to disk but keep existing data
277
+ if any (len (_ ) > 0 for _ in broken_turbines_dict .values ()):
278
+ issue_link = ("https://github.com/OpenEnergyPlatform/data-preprocessing"
279
+ "/issues/28" )
280
+ # in case only some data is faulty, only give out warning
281
+ if all (len (_ ) < threshold * len (turbine_data )
282
+ for _ in broken_turbines_dict .values ()):
283
+ save_turbine_data = True
284
+ for curve_type in curve_types :
285
+ if len (broken_turbines_dict [curve_type ]) > 0 :
286
+ logging .warning (
287
+ f"The turbine library data contains faulty { curve_type } s. The "
288
+ f"{ curve_type } data can therefore not be loaded for the "
289
+ f"following turbines: { broken_turbine_data } . "
290
+ f"Please report this in the following issue, in case it hasn't "
291
+ f"already been reported: { issue_link } "
292
+ )
242
293
# set has_power_(coefficient)_curve to False for faulty turbines
243
- for turb in broken_turbine_data :
294
+ for turb in broken_turbines_dict [ curve_type ] :
244
295
ind = turbine_data [turbine_data .turbine_type == turb ].index [0 ]
245
296
col = ("has_power_curve" if curve_type == "power_curve"
246
297
else "has_cp_curve" )
247
298
turbine_data .at [ind , col ] = False
248
- # in case most data is faulty, do not store downloaded data
249
- else :
250
- logging .warning (
251
- f"The turbine library data contains too many faulty { curve_type } s,"
252
- f"wherefore { curve_type } data is not loaded from the oedb. "
253
- f"Please report this in the following issue, in case it hasn't "
254
- f"already been reported: { issue_link } "
255
- )
256
- save_turbine_data = False
299
+ # in case most data is faulty, do not store downloaded data
257
300
else :
258
- save_turbine_data = True
259
-
260
- if save_turbine_data :
261
- curves_df = curves_df .set_index ("wind_speed" ).sort_index ().transpose ()
301
+ logging .warning (
302
+ f"The turbine library data contains too many faulty turbine datasets "
303
+ f"wherefore it is not loaded from the oedb. "
304
+ f"In case you want to circumvent this behaviour, you can specify a "
305
+ f"higher tolerance through the parameter 'threshold'."
306
+ f"Please report this in the following issue, in case it hasn't "
307
+ f"already been reported: { issue_link } "
308
+ )
309
+ save_turbine_data = False
310
+ else :
311
+ save_turbine_data = True
312
+
313
+ if save_turbine_data :
314
+ # standard file name for saving data
315
+ filename = os .path .join (os .path .dirname (__file__ ), "oedb" , "{0}.csv" )
316
+ # save curve data to csv
317
+ for curve_type in curve_types :
318
+ curves_df = curve_dict [curve_type ].set_index (
319
+ "wind_speed" ).sort_index ().transpose ()
262
320
# power curve values in W
263
321
if curve_type == "power_curve" :
264
322
curves_df *= 1000
265
323
curves_df .index .name = "turbine_type"
266
324
curves_df .sort_index (inplace = True )
267
325
curves_df .to_csv (filename .format ("{}s" .format (curve_type )))
268
326
269
- # get turbine data and save to file (excl. curves)
270
- turbine_data_df = turbine_data .drop (
271
- [
272
- "power_curve_wind_speeds" ,
273
- "power_curve_values" ,
274
- "power_coefficient_curve_wind_speeds" ,
275
- "power_coefficient_curve_values" ,
276
- "thrust_coefficient_curve_wind_speeds" ,
277
- "thrust_coefficient_curve_values" ,
278
- ],
279
- axis = 1 ,
280
- ).set_index ("turbine_type" )
281
- # nominal power in W
282
- turbine_data_df ["nominal_power" ] *= 1000
283
- turbine_data_df .sort_index (inplace = True )
284
- turbine_data_df .to_csv (filename .format ("turbine_data" ))
285
- check_turbine_data (filename )
327
+ # save turbine data to file (excl. curves)
328
+ turbine_data_df = turbine_data .drop (
329
+ [
330
+ "power_curve_wind_speeds" ,
331
+ "power_curve_values" ,
332
+ "power_coefficient_curve_wind_speeds" ,
333
+ "power_coefficient_curve_values" ,
334
+ "thrust_coefficient_curve_wind_speeds" ,
335
+ "thrust_coefficient_curve_values" ,
336
+ ],
337
+ axis = 1 ,
338
+ ).set_index ("turbine_type" )
339
+ # nominal power in W
340
+ turbine_data_df ["nominal_power" ] *= 1000
341
+ turbine_data_df .sort_index (inplace = True )
342
+ turbine_data_df .to_csv (filename .format ("turbine_data" ))
286
343
return turbine_data
287
344
288
345
0 commit comments