diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h b/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h index 075e61b3c..e6a25fdc3 100644 --- a/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h +++ b/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h @@ -20,6 +20,7 @@ #include "src/fastertransformer/utils/cuda_fp8_utils.h" #include "src/fastertransformer/utils/cuda_type_utils.cuh" #include +#include using namespace fastertransformer; diff --git a/src/fastertransformer/utils/cublasMMWrapper.h b/src/fastertransformer/utils/cublasMMWrapper.h index 0c7ecbffa..c656f8098 100644 --- a/src/fastertransformer/utils/cublasMMWrapper.h +++ b/src/fastertransformer/utils/cublasMMWrapper.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/src/fastertransformer/utils/cuda_fp8_utils.cu b/src/fastertransformer/utils/cuda_fp8_utils.cu index f5e1d2465..618c5d8c0 100644 --- a/src/fastertransformer/utils/cuda_fp8_utils.cu +++ b/src/fastertransformer/utils/cuda_fp8_utils.cu @@ -16,6 +16,9 @@ #include "cuda_fp8_utils.h" +#include +#include + namespace fastertransformer { #ifdef ENABLE_FP8