Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "truss"
version = "0.11.12"
version = "0.11.13.rc1"
description = "A seamless bridge from model development to model delivery"
authors = [
{ name = "Pankaj Gupta", email = "no-reply@baseten.co" },
Expand Down
4 changes: 3 additions & 1 deletion truss/base/trt_llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class TrussTRTLLMQuantizationType(str, Enum):
FP8_KV = "fp8_kv"
FP4 = "fp4"
FP4_KV = "fp4_kv"
FP4_MLP_ONLY = "fp4_mlp_only"


class TrussTRTLLMPluginConfiguration(PydanticTrTBaseModel):
Expand Down Expand Up @@ -713,7 +714,8 @@ def trt_llm_common_validation(config: "TrussConfig"):
"accelerators or newer (CUDA_COMPUTE>=89)"
)
elif trt_llm_config.build.quantization_type in [
TrussTRTLLMQuantizationType.FP4
TrussTRTLLMQuantizationType.FP4,
TrussTRTLLMQuantizationType.FP4_MLP_ONLY
] and config.resources.accelerator.accelerator in [
truss_config.Accelerator.H100,
truss_config.Accelerator.L4,
Expand Down
Loading