-
Notifications
You must be signed in to change notification settings - Fork 5k
feat: add comprehensive short selling support to qlib backtest framework #1986
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
JakobWong
wants to merge
16
commits into
microsoft:main
Choose a base branch
from
JakobWong:feature/shortable-trading-support
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
c6a9ae3
feat: add comprehensive short selling support to qlib backtest framework
2d88fc8
feat: enhance shortable trading system with Long-Short strategies
4fc0a22
fix: Apply Black formatting to resolve PR build failures
aaf7e8b
fix(backtest/data): align account/exchange freq to executor; robust c…
5ee5282
style: apply Black formatting to maintain code consistency
80c3ce9
feat: enhance LongShortTopKStrategy with improved long-only mode and …
8fdb7bc
feat: add configurable long_share parameter to LongShortTopKStrategy
82b8f8c
fix: improve floating point precision and add debug capabilities
eaacff4
fix: resolve circular import issues and move examples to project root
2011549
feat: add CryptoPortAnaRecord for crypto-specific portfolio analysis
f11018c
feat: add intelligent benchmark selection for multi-data source compa…
83c8b1a
refactor: convert Chinese comments to English across all files
5a16295
style: apply linter fixes across codebase
3f09cc4
style: apply comprehensive linter fixes and code improvements
eea20ff
style: final linter fixes and code formatting improvements
41e7936
config: restore default region to REG_CN; examples: set crypto region…
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import os | ||
import pandas as pd | ||
import qlib | ||
from qlib.data import D | ||
from qlib.constant import REG_CRYPTO | ||
|
||
from qlib.backtest.shortable_backtest import ShortableExecutor, ShortableAccount | ||
from qlib.backtest.shortable_exchange import ShortableExchange | ||
from qlib.backtest.decision import OrderDir | ||
from qlib.contrib.strategy.signal_strategy import LongShortTopKStrategy | ||
from qlib.backtest.utils import CommonInfrastructure | ||
|
||
|
||
def main(): | ||
provider = os.path.expanduser("~/.qlib/qlib_data/crypto_data_perp") | ||
qlib.init(provider_uri=provider, region=REG_CRYPTO, kernels=1) | ||
|
||
start = pd.Timestamp("2021-07-11") | ||
end = pd.Timestamp("2021-08-10") | ||
|
||
# Universe | ||
inst_conf = D.instruments("all") | ||
codes = D.list_instruments(inst_conf, start_time=start, end_time=end, freq="day", as_list=True)[:20] | ||
if not codes: | ||
print("No instruments.") | ||
return | ||
|
||
# Exchange | ||
ex = ShortableExchange( | ||
freq="day", | ||
start_time=start, | ||
end_time=end, | ||
codes=codes, | ||
deal_price="$close", | ||
open_cost=0.0005, | ||
close_cost=0.0015, | ||
min_cost=0.0, | ||
impact_cost=0.0, | ||
limit_threshold=None, | ||
) | ||
|
||
# Account and executor | ||
account = ShortableAccount(benchmark_config={"benchmark": None}) | ||
exe = ShortableExecutor( | ||
time_per_step="day", | ||
generate_portfolio_metrics=True, | ||
trade_exchange=ex, | ||
region="crypto", | ||
verbose=False, | ||
account=account, | ||
) | ||
# Build and inject common infrastructure to executor (and later strategy) | ||
common_infra = CommonInfrastructure(trade_account=account, trade_exchange=ex) | ||
exe.reset(common_infra=common_infra, start_time=start, end_time=end) | ||
|
||
# Precompute momentum signal for the whole period (shift=1 used by strategy) | ||
feat = D.features(codes, ["$close"], start, end, freq="day", disk_cache=True) | ||
if feat is None or feat.empty: | ||
print("No features to build signal.") | ||
return | ||
feat = feat.sort_index() | ||
grp = feat.groupby("instrument")["$close"] | ||
prev_close = grp.shift(1) | ||
mom = (feat["$close"] / prev_close - 1.0).rename("score") | ||
# Use MultiIndex Series (instrument, datetime) | ||
signal_series = mom.dropna() | ||
|
||
# Strategy (TopK-aligned, long-short) | ||
strat = LongShortTopKStrategy( | ||
topk_long=3, | ||
topk_short=3, | ||
n_drop_long=1, | ||
n_drop_short=1, | ||
only_tradable=False, | ||
forbid_all_trade_at_limit=True, | ||
signal=signal_series, | ||
trade_exchange=ex, | ||
) | ||
# Bind strategy infra explicitly with the same common_infra | ||
strat.reset(level_infra=exe.get_level_infra(), common_infra=common_infra) | ||
|
||
# Drive by executor calendar | ||
while not exe.finished(): | ||
td = strat.generate_trade_decision() | ||
exe.execute(td) | ||
|
||
# Output metrics | ||
df, meta = exe.trade_account.get_portfolio_metrics() | ||
print("Portfolio metrics meta:", meta) | ||
print("Portfolio df tail:\n", df.tail() if hasattr(df, "tail") else df) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import os | ||
import pandas as pd | ||
import qlib | ||
from qlib.data import D | ||
from qlib.constant import REG_CRYPTO | ||
from qlib.backtest.decision import OrderDir | ||
from qlib.backtest.shortable_exchange import ShortableExchange | ||
|
||
|
||
def main(): | ||
provider = os.path.expanduser("~/.qlib/qlib_data/crypto_data_perp") | ||
qlib.init(provider_uri=provider, region=REG_CRYPTO, kernels=1) | ||
|
||
start = pd.Timestamp("2021-07-11") | ||
end = pd.Timestamp("2021-08-10") | ||
day = pd.Timestamp("2021-08-10") | ||
|
||
inst_conf = D.instruments("all") | ||
codes = D.list_instruments(inst_conf, start_time=start, end_time=end, freq="day", as_list=True)[:10] | ||
|
||
ex = ShortableExchange( | ||
freq="day", | ||
start_time=start, | ||
end_time=end, | ||
codes=codes, | ||
deal_price="$close", | ||
open_cost=0.0005, | ||
close_cost=0.0015, | ||
min_cost=0.0, | ||
impact_cost=0.0, | ||
limit_threshold=None, | ||
) | ||
|
||
feat = D.features(codes, ["$close"], day - pd.Timedelta(days=10), day, freq="day", disk_cache=True) | ||
g = feat.groupby("instrument")["$close"] | ||
last = g.last() | ||
# Use the second-to-last value per group and drop the datetime level, ensuring index is instrument | ||
prev = g.apply(lambda s: s.iloc[-2]) | ||
sig = (last / prev - 1.0).dropna().sort_values(ascending=False) | ||
|
||
longs = sig.head(3).index.tolist() | ||
shorts = sig.tail(3).index.tolist() | ||
|
||
equity = 1_000_000.0 | ||
long_weight = 0.5 / max(len(longs), 1) | ||
short_weight = -0.5 / max(len(shorts), 1) | ||
|
||
print("day:", day.date()) | ||
for leg, lst, w, dir_ in [ | ||
("LONG", longs, long_weight, OrderDir.BUY), | ||
("SHORT", shorts, short_weight, OrderDir.SELL), | ||
]: | ||
print(f"\n{leg} candidates:") | ||
for code in lst: | ||
try: | ||
px = ex.get_deal_price(code, day, day, dir_) | ||
fac = ex.get_factor(code, day, day) | ||
unit = ex.get_amount_of_trade_unit(fac, code, day, day) | ||
tradable = ex.is_stock_tradable(code, day, day, dir_) | ||
raw = (w * equity) / px if px else 0.0 | ||
rounded = ex.round_amount_by_trade_unit(abs(raw), fac) if px else 0.0 | ||
if dir_ == OrderDir.SELL: | ||
rounded = -rounded | ||
print( | ||
code, | ||
{ | ||
"price": px, | ||
"factor": fac, | ||
"unit": unit, | ||
"tradable": tradable, | ||
"raw_shares": raw, | ||
"rounded": rounded, | ||
}, | ||
) | ||
except Exception as e: | ||
print(code, "error:", e) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
""" | ||
Long-Short workflow by code (Crypto Perp). | ||
|
||
This script mirrors `workflow_by_code_longshort.py` but switches to a crypto futures | ||
dataset/provider and sets the benchmark to BTCUSDT. Other parts are kept the same. | ||
""" | ||
# pylint: disable=C0301 | ||
|
||
import sys | ||
import multiprocessing as mp | ||
import os | ||
import qlib | ||
from qlib.utils import init_instance_by_config, flatten_dict | ||
from qlib.constant import REG_CRYPTO | ||
|
||
|
||
if __name__ == "__main__": | ||
# Windows compatibility: spawn mode needs freeze_support and avoid heavy top-level imports | ||
if sys.platform.startswith("win"): | ||
mp.freeze_support() | ||
# Emulate Windows spawn on POSIX if needed | ||
if os.environ.get("WINDOWS_SPAWN_TEST") == "1" and not sys.platform.startswith("win"): | ||
try: | ||
mp.set_start_method("spawn", force=True) | ||
except RuntimeError: | ||
pass | ||
# Lazy imports to avoid circular import issues on Windows spawn mode | ||
from qlib.workflow import R | ||
from qlib.workflow.record_temp import SignalRecord, SigAnaRecord | ||
from qlib.data import D | ||
|
||
# Initialize with crypto perp data provider (ensure this path exists in your env) | ||
PROVIDER_URI = "~/.qlib/qlib_data/crypto_data_perp" | ||
# Use crypto-specific region to align trading rules/calendars with provider data | ||
qlib.init(provider_uri=PROVIDER_URI, region=REG_CRYPTO, kernels=1) | ||
|
||
# Auto-select benchmark by data source: cn_data -> SH000300; crypto -> BTCUSDT | ||
# Fallback: if path not resolvable, default to SH000300 for safety | ||
try: | ||
from qlib.config import C | ||
|
||
data_roots = {k: str(C.dpm.get_data_uri(k)) for k in C.dpm.provider_uri.keys()} | ||
DATA_ROOTS_STR = " ".join(data_roots.values()).lower() | ||
IS_CN = ("cn_data" in DATA_ROOTS_STR) or ("cn\x5fdata" in DATA_ROOTS_STR) | ||
BENCHMARK_AUTO = "SH000300" if IS_CN else "BTCUSDT" | ||
except Exception: # pylint: disable=W0718 | ||
BENCHMARK_AUTO = "SH000300" | ||
|
||
# Dataset & model | ||
data_handler_config = { | ||
"start_time": "2019-01-02", | ||
"end_time": "2025-08-07", | ||
"fit_start_time": "2019-01-02", | ||
"fit_end_time": "2022-12-19", | ||
"instruments": "all", | ||
"label": ["Ref($close, -2) / Ref($close, -1) - 1"], | ||
} | ||
|
||
DEBUG_FAST = os.environ.get("FAST_DEBUG") == "1" | ||
if DEBUG_FAST: | ||
# Use the latest available calendar to auto-derive a tiny, non-empty window | ||
cal = D.calendar(freq="day", future=False) | ||
if len(cal) >= 45: | ||
end_dt = cal[-1] | ||
# last 45 days: 20d fit, 10d valid, 15d test | ||
fit_start_dt = cal[-45] | ||
fit_end_dt = cal[-25] | ||
valid_start_dt = cal[-24] | ||
valid_end_dt = cal[-15] | ||
test_start_dt = cal[-14] | ||
test_end_dt = end_dt | ||
data_handler_config.update( | ||
{ | ||
"fit_start_time": fit_start_dt, | ||
"fit_end_time": fit_end_dt, | ||
"start_time": fit_start_dt, | ||
"end_time": end_dt, | ||
} | ||
) | ||
|
||
dataset_config = { | ||
"class": "DatasetH", | ||
"module_path": "qlib.data.dataset", | ||
"kwargs": { | ||
"handler": { | ||
"class": "Alpha158", | ||
"module_path": "qlib.contrib.data.handler", | ||
"kwargs": data_handler_config, | ||
}, | ||
"segments": { | ||
# train uses fit window; split the rest to valid/test roughly | ||
"train": (data_handler_config["fit_start_time"], data_handler_config["fit_end_time"]), | ||
"valid": ("2022-12-20", "2023-12-31"), | ||
"test": ("2024-01-01", data_handler_config["end_time"]), | ||
}, | ||
}, | ||
} | ||
|
||
# Predefine debug dates to avoid linter used-before-assignment warning | ||
VALID_START_DT = VALID_END_DT = TEST_START_DT = TEST_END_DT = None | ||
|
||
if DEBUG_FAST and len(D.calendar(freq="day", future=False)) >= 45: | ||
dataset_config["kwargs"]["segments"] = { | ||
"train": (data_handler_config["fit_start_time"], data_handler_config["fit_end_time"]), | ||
"valid": (VALID_START_DT, VALID_END_DT), | ||
"test": (TEST_START_DT, TEST_END_DT), | ||
} | ||
|
||
model_config = { | ||
"class": "LGBModel", | ||
"module_path": "qlib.contrib.model.gbdt", | ||
"kwargs": { | ||
"loss": "mse", | ||
"colsample_bytree": 0.8879, | ||
"learning_rate": 0.0421, | ||
"subsample": 0.8789, | ||
"lambda_l1": 205.6999, | ||
"lambda_l2": 580.9768, | ||
"max_depth": 8, | ||
"num_leaves": 210, | ||
"num_threads": 20, | ||
}, | ||
} | ||
|
||
if DEBUG_FAST: | ||
model_config["kwargs"].update({"num_threads": 2, "num_boost_round": 10}) | ||
|
||
model = init_instance_by_config(model_config) | ||
dataset = init_instance_by_config(dataset_config) | ||
|
||
# Prefer contrib's crypto version; fallback to default PortAnaRecord (no external local dependency) | ||
try: | ||
from qlib.contrib.workflow.crypto_record_temp import CryptoPortAnaRecord as PortAnaRecord # type: ignore | ||
|
||
print("Using contrib's crypto version of CryptoPortAnaRecord as PortAnaRecord") | ||
except Exception: # pylint: disable=W0718 | ||
from qlib.workflow.record_temp import PortAnaRecord | ||
|
||
print("Using default version of PortAnaRecord") | ||
|
||
# Align backtest time to test segment | ||
test_start, test_end = dataset_config["kwargs"]["segments"]["test"] | ||
|
||
# Strategy params (shrink for fast validation) | ||
TOPK_L, TOPK_S, DROP_L, DROP_S = 20, 20, 10, 10 | ||
if DEBUG_FAST: | ||
TOPK_L = TOPK_S = 5 | ||
DROP_L = DROP_S = 1 | ||
|
||
port_analysis_config = { | ||
"executor": { | ||
"class": "ShortableExecutor", | ||
"module_path": "qlib.backtest.shortable_backtest", | ||
"kwargs": { | ||
"time_per_step": "day", | ||
"generate_portfolio_metrics": True, | ||
}, | ||
}, | ||
"strategy": { | ||
"class": "LongShortTopKStrategy", | ||
"module_path": "qlib.contrib.strategy.signal_strategy", | ||
"kwargs": { | ||
"signal": (model, dataset), | ||
"topk_long": TOPK_L, | ||
"topk_short": TOPK_S, | ||
"n_drop_long": DROP_L, | ||
"n_drop_short": DROP_S, | ||
"hold_thresh": 3, | ||
"only_tradable": True, | ||
"forbid_all_trade_at_limit": False, | ||
}, | ||
}, | ||
"backtest": { | ||
"start_time": test_start, | ||
"end_time": test_end, | ||
"account": 100000000, | ||
"benchmark": BENCHMARK_AUTO, | ||
"exchange_kwargs": { | ||
"exchange": { | ||
"class": "ShortableExchange", | ||
"module_path": "qlib.backtest.shortable_exchange", | ||
}, | ||
"freq": "day", | ||
# Crypto has no daily price limit; set to 0.0 to avoid false limit locks | ||
"limit_threshold": 0.0, | ||
"deal_price": "close", | ||
"open_cost": 0.0002, | ||
"close_cost": 0.0005, | ||
"min_cost": 0, | ||
}, | ||
}, | ||
} | ||
|
||
# Preview prepared data | ||
example_df = dataset.prepare("train") | ||
print(example_df.head()) | ||
|
||
# Start experiment | ||
with R.start(experiment_name="workflow_longshort_crypto"): | ||
R.log_params(**flatten_dict({"model": model_config, "dataset": dataset_config})) | ||
model.fit(dataset) | ||
R.save_objects(**{"params.pkl": model}) | ||
|
||
# Prediction | ||
recorder = R.get_recorder() | ||
sr = SignalRecord(model, dataset, recorder) | ||
sr.generate() | ||
|
||
# Signal Analysis | ||
sar = SigAnaRecord(recorder) | ||
sar.generate() | ||
|
||
# Backtest with long-short strategy (Crypto metrics) | ||
par = PortAnaRecord(recorder, port_analysis_config, "day") | ||
par.generate() |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this file necessary?