Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ option(onnxruntime_EXTERNAL_TRANSFORMER_SRC_PATH "Path to external transformer s
option(onnxruntime_ENABLE_CUDA_PROFILING "Enable CUDA kernel profiling" OFF)

option(onnxruntime_ENABLE_CPUINFO "Enable cpuinfo" ON)
cmake_dependent_option(onnxruntime_ENABLE_CPUINFO "Enable cpuinfo" ON "WIN32" OFF)

# ATen fallback support
option(onnxruntime_ENABLE_ATEN "Enable ATen fallback" OFF)
Expand Down
3 changes: 3 additions & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,9 @@ endif()
foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
target_include_directories(${mlas_target} PRIVATE ${MLAS_INC_DIR} ${MLAS_SRC_DIR})
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
onnxruntime_add_include_to_target(${mlas_target} cpuinfo::cpuinfo)
endif()

target_compile_definitions(${mlas_target} PRIVATE ${mlas_private_compile_definitions})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <math.h>

#include "endian.h"
#include "core/common/endian.h"
#if defined(CUDA_VERSION) && CUDA_VERSION >= 11000
#include "cuda_bf16.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#if !defined(DISABLE_FLOAT8_TYPES)

#include "endian.h"
#include "core/common/endian.h"

#if defined(__CUDACC__)
// Needed for CUDA_VERSION check below
Expand Down
6 changes: 3 additions & 3 deletions include/onnxruntime/core/framework/data_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
#include <gsl/gsl>
#include "core/common/common.h"
#include "core/common/exceptions.h"
#include "core/framework/endian.h"
#include "core/framework/float8.h"
#include "core/framework/float16.h"
#include "core/common/endian.h"
#include "core/common/float8.h"
#include "core/common/float16.h"
#include "core/framework/int4.h"
#include "core/framework/float4.h"
#include "core/graph/onnx_protobuf.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#endif

#include "core/framework/float4.h"
#include "core/framework/float8.h"
#include "core/framework/float16.h"
#include "core/common/float8.h"
#include "core/common/float16.h"
#include "core/framework/int4.h"

namespace onnxruntime {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cpu/moe/moe_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "core/providers/cpu/math/gemm_helper.h"
#include "core/util/math_cpuonly.h"
#include "core/mlas/inc/mlas.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/framework/allocator.h"
#include "core/platform/threadpool.h"
#include "core/common/narrow.h"
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cpu/moe/moe_quantization_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "contrib_ops/cpu/moe/moe_quantization_cpu.h"
#include "core/framework/allocator.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/mlas/inc/mlas.h"
#include "core/mlas/inc/mlas_q4.h"
#include "core/platform/threadpool.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <vector>

#include "core/common/safeint.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/platform/threadpool.h"
#include <iostream>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "core/common/common.h"
#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/framework/int4.h"
#include "core/framework/op_kernel.h"
#include "core/platform/threadpool.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <type_traits>

#include "core/common/common.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/providers/common.h"
#include "core/platform/threadpool.h"

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cpu/utils/console_dumper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <string>
#include <iostream>
#include "core/framework/ort_value.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "contrib_ops/cpu/utils/debug_macros.h"

namespace onnxruntime {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/cuda/quantization/matmul_nbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <cstdint>

#include "core/common/status.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/providers/cpu/math/matmul_helper.h"
#include "core/providers/cuda/cuda_type_conversion.h"
#include "contrib_ops/cuda/utils/dump_cuda_tensor.h"
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/rocm/bert/gemm_fast_gelu_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "contrib_ops/rocm/bert/gemm_fast_gelu_common.h"
#include "core/common/status.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"

namespace onnxruntime {
namespace contrib {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/rocm/math/gemm_float8.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the MIT License.

#include "core/common/common.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/providers/rocm/rocm_kernel.h"
#include "contrib_ops/rocm/math/gemm_float8_ck.cuh"

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/rocm/math/gemm_float8_ck.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#endif

#if !defined(DISABLE_FLOAT8_TYPES)
#include "core/framework/float8.h"
#include "core/common/float8.h"
#endif
#include "core/providers/rocm/tunable/gemm_common.h"

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/framework/element_type_lists.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#include "boost/mp11.hpp"

#include "core/common/type_list.h"
#include "core/framework/float8.h"
#include "core/framework/float16.h"
#include "core/common/float8.h"
#include "core/common/float16.h"
#include "core/framework/int4.h"
#include "core/framework/float4.h"

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/endian_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <cassert>
#include <cstring>

#include "core/framework/endian.h"
#include "core/common/endian.h"

namespace onnxruntime {
namespace utils {
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/murmurhash3.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

/* Modifications Copyright (c) Microsoft. */

#include "core/framework/endian.h"
#include "core/common/endian.h"

#include "core/util/force_inline.h"

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/graph/contrib_ops/onnx_function_util.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "core/graph/contrib_ops/onnx_function_util.h"
#include "core/util/math.h"
#include "core/framework/float8.h"
#include "core/framework/float16.h"
#include "core/common/float8.h"
#include "core/common/float16.h"

namespace ONNX_NAMESPACE {

Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/mlas/inc/mlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -1865,13 +1865,13 @@ MlasHalfGemmConvertPackB(
void* PackedB
);

#if defined(__aarch64__) && defined(__linux__)

/**
* @brief Whether current CPU supports Bfloat16(bf16) acceleration.
*/
bool MLASCALL
MlasBf16AccelerationSupported();

#if defined(__aarch64__) && defined(__linux__)
/**
* @brief Interface for bf16 gemm post processors.
*
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/mlas/lib/mlasi.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ MLAS_FORCEINLINE void
#include "core/common/cpuid_info.h"
using MLAS_CPUIDINFO = onnxruntime::CPUIDInfo;

#include "core/framework/float16.h"
#include "core/common/float16.h"

#else // BUILD_MLAS_NO_ONNXRUNTIME

Expand Down
20 changes: 20 additions & 0 deletions onnxruntime/core/mlas/lib/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Module Name:

#include <thread>
#include <mutex>
#if defined(MLAS_TARGET_AMD64_IX86)
#include <cpuinfo.h>
#endif


#if defined(MLAS_TARGET_POWER)
#if defined(__linux__)
Expand Down Expand Up @@ -781,6 +785,22 @@ Return Value:
#endif
}

bool MLASCALL
MlasBf16AccelerationSupported()
{
#if defined(MLAS_TARGET_ARM64) && defined(__linux__)
return MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeon_BF16();
#elif defined(MLAS_TARGET_AMD64_IX86)
// cpuinfo is initialized early by the Env singleton (platform specific).
// Just query the feature flags here; if cpuinfo was unavailable initialization would have failed and
// the feature queries will safely return false.
return cpuinfo_has_x86_avx512bf16() || cpuinfo_has_x86_amx_bf16();
#else
return false;
#endif
}


#ifdef MLAS_TARGET_AMD64_IX86

bool
Expand Down
10 changes: 0 additions & 10 deletions onnxruntime/core/mlas/lib/sbgemm_kernel_neon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,6 @@ struct MLAS_SBGEMM_KERNEL_NEON {
static constexpr MLAS_SBGEMM_STRIDES Strides{128, 128, 256}; // M:N:K
};

bool MLASCALL
MlasBf16AccelerationSupported()
{
#if defined(MLAS_TARGET_ARM64)
return MLAS_CPUIDINFO::GetCPUIDInfo().HasArmNeon_BF16();
#else
return false;
#endif
}

/*
This routine converts fp32 to bf16 and copies elements from the source
matrix to the destination packed buffer.
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -866,11 +866,19 @@
InitializeCpuInfo();
}


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

/*
Discover all cores in a windows system.
Note - every "id" here, given it be group id, core id, or logical processor id, starts from 0.
*/
void WindowsEnv::InitializeCpuInfo() {
// Initialize cpuinfo once on Windows similar to PosixEnv constructor.
Comment on lines 873 to +874
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we nee a macro here if cpuinfo supported?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added code to force the library be available on Windows.

(void)cpuinfo_initialize(); //Ignore the error if it failed to initialize
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
(void)cpuinfo_initialize(); //Ignore the error if it failed to initialize
(void)cpuinfo_initialize(); // Ignore the error if it failed to initialize

// TODO: we should also call cpuinfo_deinitialize()

Check warning on line 877 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/platform/windows/env.cc:877: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
// TODO: the cpuinfo_initialize() function also gets called when creating ort thread pool, it would be better to

Check warning on line 878 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/platform/windows/env.cc:878: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
// put them in one place.
// TODO: test how it works in ARM64EC.

Check warning on line 880 in onnxruntime/core/platform/windows/env.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/platform/windows/env.cc:880: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]

DWORD returnLength = 0;
GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &returnLength);
auto last_error = GetLastError();
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/platform/windows/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ limitations under the License.
#include "core/platform/windows/telemetry.h"
#include "core/common/inlined_containers.h"
#include <Windows.h>
#if defined(CPUINFO_SUPPORTED)
#include <cpuinfo.h>
#endif

namespace onnxruntime {

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cann/cann_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "core/providers/shared_library/provider_api.h"
#include "core/providers/cann/cann_call.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"

namespace onnxruntime {
namespace cann {
Expand Down
7 changes: 5 additions & 2 deletions onnxruntime/core/providers/cpu/cpu_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -689,10 +689,12 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, string, Expand);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Gemm);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, Gemm);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, BFloat16, Gemm);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, MatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, MatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t, MatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int64_t, MatMul);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, BFloat16, MatMul);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Min);
class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Max);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Mean);
Expand Down Expand Up @@ -2426,13 +2428,14 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
MatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int32_t,
MatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int64_t,
MatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int64_t, MatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, BFloat16, MatMul)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Min)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Max)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Mean)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Gemm)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, Gemm)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, BFloat16, Gemm)>,
BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, Sign)>,
BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 18, Size)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, Sum)>,
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cpu/fp16/fp16_activations.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#pragma once

#include "core/mlas/inc/mlas.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/providers/cpu/activation/activations.h"

#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cpu/fp16/fp16_conv.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED

#include "core/common/safeint.h"
#include "core/framework/float16.h"
#include "core/common/float16.h"
#include "core/framework/op_kernel.h"
#include "core/providers/cpu/nn/conv_attributes.h"

Expand Down
Loading
Loading