22import logging
33import os
44import threading
5- from datetime import datetime , timezone
5+ from datetime import datetime , timezone , timedelta
66from typing import List , Optional , Any , Dict , Tuple
77
8- import pandas as pd
98from flask import Flask
109
1110from investing_algorithm_framework .app .algorithm import Algorithm
1918 AppMode , BacktestDateRange , DATABASE_DIRECTORY_NAME , DataSource , \
2019 BACKTESTING_INITIAL_AMOUNT , SNAPSHOT_INTERVAL , Backtest , DataError , \
2120 PortfolioConfiguration , SnapshotInterval , DataType , combine_backtests , \
22- PortfolioProvider , OrderExecutor , ImproperlyConfigured , \
21+ PortfolioProvider , OrderExecutor , ImproperlyConfigured , TimeFrame , \
2322 DataProvider , INDEX_DATETIME , tqdm , BacktestPermutationTest , \
2423 LAST_SNAPSHOT_DATETIME , BACKTESTING_FLAG , generate_backtest_summary_metrics
2524from investing_algorithm_framework .infrastructure import setup_sqlalchemy , \
@@ -794,8 +793,9 @@ def get_market_credentials(self) -> List[MarketCredential]:
794793 def check_data_completeness (
795794 self ,
796795 strategies : List [TradingStrategy ],
797- backtest_date_range : BacktestDateRange
798- ) -> None :
796+ backtest_date_range : BacktestDateRange ,
797+ show_progress : bool = True
798+ ) -> Tuple [bool , Dict [str , Any ]]:
799799 """
800800 Function to check the data completeness for a set of strategies
801801 over a given backtest date range. This method checks if all data
@@ -807,18 +807,23 @@ def check_data_completeness(
807807 to check data completeness for.
808808 backtest_date_range (BacktestDateRange): The date range to
809809 check data completeness for.
810+ show_progress (bool): Whether to show a progress bar when
811+ checking data completeness.
810812 Returns:
811- None
813+ Tuple[bool, Dict[str, Any]]: A tuple containing a boolean
814+ indicating if the data is complete and a dictionary
815+ with information about missing data for each data source.
812816 """
813817 data_sources = []
818+ missing_data_info = {}
814819
815820 for strategy in strategies :
816821 data_sources .extend (strategy .data_sources )
817822
818823 self .initialize_data_sources_backtest (
819824 data_sources ,
820825 backtest_date_range ,
821- show_progress = True
826+ show_progress = show_progress
822827 )
823828 data_provider_service = self .container .data_provider_service ()
824829
@@ -827,37 +832,60 @@ def check_data_completeness(
827832 for data_source in strategy .data_sources :
828833
829834 if DataType .OHLCV .equals (data_source .data_type ):
830- df = data_provider_service .get_ohlcv_data (
831- symbol = data_source .symbol ,
832- start_date = backtest_date_range .start_date ,
833- end_date = backtest_date_range .end_date ,
834- pandas = True ,
835- add_pandas_index = False ,
836- add_datetime_column = True ,
837- time_frame = data_source .time_frame
838- )
839- df = df .copy ()
840- df ['Datetime' ] = pd .to_datetime (df ['Datetime' ])
841- df = df .sort_values ('Datetime' )\
842- .tail (data_source .window_size )
843- start = df ['Datetime' ].iloc [0 ]
844- end = df ['Datetime' ].iloc [- 1 ]
845- freq = pd .to_timedelta (data_source .time_frame .value )
846- expected = pd .date_range (start , end , freq = freq )
847- actual = df ['Datetime' ]
848- missing = expected .difference (actual )
849-
850- # Calculate the percentage completeness
851- completeness = len (actual ) / len (expected ) * 100
852-
853- if completeness < 100 :
835+ required_start_date = backtest_date_range .start_date - \
836+ timedelta (
837+ minutes = TimeFrame .from_value (
838+ data_source .time_frame
839+ ).amount_of_minutes * data_source .window_size
840+ )
841+ number_of_required_data_points = \
842+ data_source .get_number_of_required_data_points (
843+ backtest_date_range .start_date ,
844+ backtest_date_range .end_date
845+ )
846+
847+ try :
848+ data_provider = data_provider_service .get (data_source )
849+ number_of_available_data_points = \
850+ data_provider .get_number_of_data_points (
851+ backtest_date_range .start_date ,
852+ backtest_date_range .end_date
853+ )
854+
855+ missing_dates = \
856+ data_provider .get_missing_data_dates (
857+ required_start_date ,
858+ backtest_date_range .end_date
859+ )
860+ if number_of_available_data_points > 0 :
861+ missing_data_info [data_source .identifier ] = {
862+ "data_source_id" : data_source .identifier ,
863+ "completeness_percentage" : (
864+ (
865+ number_of_available_data_points /
866+ number_of_required_data_points
867+ ) * 100
868+ ),
869+ "missing_data_points" : len (
870+ missing_dates
871+ ),
872+ "missing_dates" : missing_dates ,
873+ "data_source_file_path" :
874+ data_provider .get_data_source_file_path ()
875+ }
876+
877+ except Exception as e :
854878 raise DataError (
855- f"Data completeness for data source "
879+ f"Error getting data provider for data source "
856880 f"{ data_source .identifier } "
857- f"({ data_source .symbol } ) is { completeness :.2f} % "
858- f"complete. Missing data points: { len (missing )} "
881+ f"({ data_source .symbol } ): { str (e )} "
859882 )
860883
884+ if len (missing_data_info .keys ()) > 0 :
885+ return False , missing_data_info
886+
887+ return True , missing_data_info
888+
861889 def run_vector_backtests (
862890 self ,
863891 initial_amount ,
@@ -1071,13 +1099,10 @@ def run_vector_backtest(
10711099 progress bar when initializing data sources.
10721100 market (str): The market to use for the backtest. This is used
10731101 to create a portfolio configuration if no portfolio
1074- configuration is found for the strategy. If not provided,
1075- the first portfolio configuration found will be used.
1102+ configuration is provided in the strategy.
10761103 trading_symbol (str): The trading symbol to use for the backtest.
10771104 This is used to create a portfolio configuration if no
1078- portfolio configuration is found for the strategy. If not
1079- provided, the first trading symbol found in the portfolio
1080- configuration will be used.
1105+ portfolio configuration is provided in the strategy.
10811106 initial_amount (float): The initial amount to start the
10821107 backtest with. This will be the amount of trading currency
10831108 that the portfolio will start with. If not provided,
@@ -1181,7 +1206,7 @@ def run_backtests(
11811206 backtest_date_ranges (List[BacktestDateRange]): List of date ranges
11821207 initial_amount (float): The initial amount to start the
11831208 backtest with. This will be the amount of trading currency
1184- that the portfolio will start with.
1209+ that the backtest portfolio will start with.
11851210 snapshot_interval (SnapshotInterval): The snapshot interval to use
11861211 for the backtest. This is used to determine how often the
11871212 portfolio snapshot should be taken during the backtest.
@@ -1406,11 +1431,11 @@ def run_permutation_test(
14061431 the risk-free rate from the US Treasury website.
14071432 market (str): The market to use for the backtest. This is used
14081433 to create a portfolio configuration if no portfolio
1409- configuration is found for the strategy. If not provided,
1434+ configuration is provided in the strategy. If not provided,
14101435 the first portfolio configuration found will be used.
14111436 trading_symbol (str): The trading symbol to use for the backtest.
14121437 This is used to create a portfolio configuration if no
1413- portfolio configuration is found for the strategy. If not
1438+ portfolio configuration is provided in the strategy. If not
14141439 provided, the first trading symbol found in the portfolio
14151440 configuration will be used.
14161441
0 commit comments