@@ -45,23 +45,42 @@ def get_metadata_from_database() -> pd.DataFrame:
4545 return pv_systems_df
4646
4747
48- def get_pv_power_from_database (history_duration : timedelta ) -> pd .DataFrame :
48+ def get_pv_power_from_database (
49+ history_duration : timedelta , interpolate_minutes : int , load_extra_minutes : int
50+ ) -> pd .DataFrame :
4951 """
5052 Get pv power from database
5153
52- Returns: pandas data frame with the following columns
53- - pv systems indexes
54+ Args:
55+ history_duration: a timedelta of how many minutes to load in the past
56+ interpolate_minutes: how many minutes we should interpolate the data froward for
57+ load_extra_minutes: the extra minutes we should load, in order to load more data.
58+ This is because some data from a site lags significantly behind 'now'
59+
60+ Returns:pandas data frame with the following columns pv systems indexes
5461 The index is the datetime
5562
5663 """
5764
65+ logger .info ("Loading PV data from database" )
66+ logger .debug (f"{ history_duration = } { interpolate_minutes = } { load_extra_minutes = } " )
67+
68+ extra_duration = timedelta (minutes = load_extra_minutes )
69+ now = datetime .now (tz = timezone .utc )
70+ start_utc = now - history_duration
71+ start_utc_extra = start_utc - extra_duration
72+
73+ # create empty dataframe with 5 mins periods
74+ empty_df = pd .DataFrame (index = pd .date_range (start = start_utc_extra , end = now , freq = "5T" ))
75+
5876 # make database connection
5977 url = os .getenv ("DB_URL_PV" )
6078 db_connection = DatabaseConnection (url = url , base = Base_PV )
6179
6280 with db_connection .get_session () as session :
63- start_utc = datetime .now (tz = timezone .utc ) - history_duration
64- pv_yields : List [PVYieldSQL ] = get_pv_yield (session = session , start_utc = start_utc )
81+ pv_yields : List [PVYieldSQL ] = get_pv_yield (session = session , start_utc = start_utc_extra )
82+
83+ logger .debug (f"Found { len (pv_yields )} PV yields from the database" )
6584
6685 pv_yields_df = pd .DataFrame (
6786 [(PVYield .from_orm (pv_yield )).__dict__ for pv_yield in pv_yields ]
@@ -72,9 +91,10 @@ def get_pv_power_from_database(history_duration: timedelta) -> pd.DataFrame:
7291 else :
7392 logger .debug (f"Found { len (pv_yields_df )} pv yields" )
7493
75- # get the system id from 'pv_system_id=xxxx provider=.....'
76- print (pv_yields_df .columns )
77- print (pv_yields_df ["pv_system" ])
94+ if len (pv_yields_df ) == 0 :
95+ return pv_yields_df
96+
97+ # get the system id from 'pv_system_id=xxxx provider=.....'
7898 pv_yields_df ["pv_system_id" ] = (
7999 pv_yields_df ["pv_system" ].astype (str ).str .split (" " ).str [0 ].str .split ("=" ).str [- 1 ]
80100 )
@@ -90,7 +110,16 @@ def get_pv_power_from_database(history_duration: timedelta) -> pd.DataFrame:
90110
91111 pv_yields_df .columns = encode_label (pv_yields_df .columns , label = "pvoutput" )
92112
93- # interpolate in between, maximum 30 mins
94- pv_yields_df .interpolate (limit = 3 , limit_area = "inside" , inplace = True )
113+ # interpolate in between, maximum 'live_interpolate_minutes' mins
114+ # note data is in 5 minutes chunks
115+ pv_yields_df = empty_df .join (pv_yields_df )
116+ limit = int (interpolate_minutes / 5 )
117+ if limit > 0 :
118+ pv_yields_df .interpolate (limit = limit , inplace = True )
119+
120+ # filter out the extra minutes loaded
121+ logger .debug (f"{ len (pv_yields_df )} of datetimes before filter on { start_utc } " )
122+ pv_yields_df = pv_yields_df [pv_yields_df .index >= start_utc ]
123+ logger .debug (f"{ len (pv_yields_df )} of datetimes after filter on { start_utc } " )
95124
96125 return pv_yields_df
0 commit comments