diff --git a/.github/run-benchmark-as-lit-jobs.py b/.github/run-benchmark-as-lit-jobs.py index 5b9de62bd1..f5e89405e1 100644 --- a/.github/run-benchmark-as-lit-jobs.py +++ b/.github/run-benchmark-as-lit-jobs.py @@ -16,7 +16,7 @@ def main(gh_run_id: str = ""): print("Uploading package and benchmark script...") s.upload_folder("dist", remote_path="dist") pkg_path = glob.glob("dist/*.whl")[0] - s.upload_file("examples/coverage/requirements.txt", remote_path="benchmarks/requirements.txt") + s.upload_file("examples/quickstart/benchmarks/requirements.txt", remote_path="benchmarks/requirements.txt") s.upload_file("thunder/benchmarks/benchmark_hf.py", remote_path="benchmarks/benchmark_hf.py") print("Starting studio...") diff --git a/examples/quickstart/benchmarks/requirements.txt b/examples/quickstart/benchmarks/requirements.txt new file mode 100644 index 0000000000..5f16f94900 --- /dev/null +++ b/examples/quickstart/benchmarks/requirements.txt @@ -0,0 +1,5 @@ +transformers==4.52.4 +numpy<2.0 +torch==2.7.1 +nvfuser-cu128-torch27 +nvidia-cudnn-frontend diff --git a/thunder/benchmarks/benchmark_hf.py b/thunder/benchmarks/benchmark_hf.py index 68bd56dc81..a84391b956 100644 --- a/thunder/benchmarks/benchmark_hf.py +++ b/thunder/benchmarks/benchmark_hf.py @@ -5,17 +5,10 @@ import thunder from thunder.dev_utils.benchmark import benchmark_n -from thunder.recipes.base import BaseRecipe +from thunder.recipes.hf_transformers import HFTransformers from torch.profiler import profile, record_function, ProfilerActivity - -class DebugRecipe(BaseRecipe): - def setup_config(self): - config = super().setup_config() - return config - - device = "cuda" model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" @@ -28,8 +21,8 @@ def setup_config(self): plugins_str = "" if plugins is None else plugins if isinstance(plugins, str) else "-".join(plugins) # Define recipes -nvfuser_recipe = DebugRecipe() -torchcompile_recipe = DebugRecipe(fuser="torch.compile") +nvfuser_recipe = HFTransformers() +torchcompile_recipe = HFTransformers(fuser="torch.compile") recipes = [nvfuser_recipe, torchcompile_recipe] @@ -68,6 +61,8 @@ def run_and_profile(tag: str, fn, model, inp, compiled_models: dict[str, torch.n with open(profile_path, "w") as f: f.write(thunder_prof.key_averages().table(sort_by="cpu_time_total")) + print(timings) + with open(root / f"{tag}_timings_{plugins_str}.txt", "w") as f: f.write("\n".join(timings)) diff --git a/thunder/recipes/hf_transformers.py b/thunder/recipes/hf_transformers.py index 0dd3f1bc31..2440beaa41 100644 --- a/thunder/recipes/hf_transformers.py +++ b/thunder/recipes/hf_transformers.py @@ -181,6 +181,7 @@ class HFTransformers(BaseRecipe): Args: show_progress (bool, optional): Forwarded to :class:`BaseRecipe`. + fuser (str, optional): which fuser to use, default NvFuser. interpreter (str, optional): Thunder interpreter to use. plugins (Iterable | None, optional): Extra Thunder plugins. """ @@ -188,10 +189,11 @@ class HFTransformers(BaseRecipe): def __init__( self, show_progress=False, + fuser="nvfuser", interpreter="thunder.jit", plugins=None, ): - super().__init__(show_progress=show_progress, interpreter=interpreter, plugins=plugins) + super().__init__(show_progress=show_progress, fuser=fuser, interpreter=interpreter, plugins=plugins) # for kv-cache inplace ops self.inplace_index_copy_transform = InplaceIndexCopyTransform()