@@ -45,23 +45,40 @@ def get_metadata_from_database() -> pd.DataFrame:
4545 return pv_systems_df
4646
4747
48- def get_pv_power_from_database (history_duration : timedelta ) -> pd .DataFrame :
48+ def get_pv_power_from_database (
49+ history_duration : timedelta , interpolate_minutes : int = 30 , load_extra_minutes : int = 60
50+ ) -> pd .DataFrame :
4951 """
5052 Get pv power from database
5153
54+ :param history_duration: a timedelta of how many minutes to load in the past
55+ :param interpolate_minutes: how many minutes we should interpolate the data froward for
56+ :param load_extra_minutes: the extra minutes we should load, in order to load more data.
57+ This is because some data from a site lags significantly behind 'now'
58+
5259 Returns: pandas data frame with the following columns
5360 - pv systems indexes
5461 The index is the datetime
5562
5663 """
5764
65+ logger .info ("Loading PV data from database" )
66+ logger .debug (f"{ history_duration = } { interpolate_minutes = } { load_extra_minutes = } " )
67+
68+ extra_duration = timedelta (minutes = load_extra_minutes )
69+ now = datetime .now (tz = timezone .utc )
70+ start_utc = now - history_duration
71+ start_utc_extra = start_utc - extra_duration
72+
73+ # create empty dataframe with 5 mins periods
74+ empty_df = pd .DataFrame (index = pd .date_range (start = start_utc_extra , end = now , freq = "5T" ))
75+
5876 # make database connection
5977 url = os .getenv ("DB_URL_PV" )
6078 db_connection = DatabaseConnection (url = url , base = Base_PV )
6179
6280 with db_connection .get_session () as session :
63- start_utc = datetime .now (tz = timezone .utc ) - history_duration
64- pv_yields : List [PVYieldSQL ] = get_pv_yield (session = session , start_utc = start_utc )
81+ pv_yields : List [PVYieldSQL ] = get_pv_yield (session = session , start_utc = start_utc_extra )
6582
6683 pv_yields_df = pd .DataFrame (
6784 [(PVYield .from_orm (pv_yield )).__dict__ for pv_yield in pv_yields ]
@@ -72,9 +89,10 @@ def get_pv_power_from_database(history_duration: timedelta) -> pd.DataFrame:
7289 else :
7390 logger .debug (f"Found { len (pv_yields_df )} pv yields" )
7491
75- # get the system id from 'pv_system_id=xxxx provider=.....'
76- print (pv_yields_df .columns )
77- print (pv_yields_df ["pv_system" ])
92+ if len (pv_yields_df ) == 0 :
93+ return pv_yields_df
94+
95+ # get the system id from 'pv_system_id=xxxx provider=.....'
7896 pv_yields_df ["pv_system_id" ] = (
7997 pv_yields_df ["pv_system" ].astype (str ).str .split (" " ).str [0 ].str .split ("=" ).str [- 1 ]
8098 )
@@ -90,7 +108,14 @@ def get_pv_power_from_database(history_duration: timedelta) -> pd.DataFrame:
90108
91109 pv_yields_df .columns = encode_label (pv_yields_df .columns , label = "pvoutput" )
92110
93- # interpolate in between, maximum 30 mins
94- pv_yields_df .interpolate (limit = 3 , limit_area = "inside" , inplace = True )
111+ # interpolate in between, maximum 'live_interpolate_minutes' mins
112+ # note data is in 5 minutes chunks
113+ pv_yields_df = empty_df .join (pv_yields_df )
114+ limit = int (interpolate_minutes / 5 )
115+ if limit > 0 :
116+ pv_yields_df .interpolate (limit = limit , inplace = True )
117+
118+ # filter out the extra minutes loaded
119+ pv_yields_df = pv_yields_df [pv_yields_df .index >= start_utc ]
95120
96121 return pv_yields_df
0 commit comments