From 6d6dc0f3da286f1e24da6a640b8114caaca461f1 Mon Sep 17 00:00:00 2001 From: Jonathan Clohessy Date: Tue, 14 Oct 2025 10:01:23 +0100 Subject: [PATCH 1/3] Fix for micro benchmark build failure with c++20 Signed-off-by: Jonathan Clohessy --- onnxruntime/test/onnx/microbenchmark/tptest.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/onnx/microbenchmark/tptest.cc b/onnxruntime/test/onnx/microbenchmark/tptest.cc index e51b12454294f..fd173ba3c6f9f 100644 --- a/onnxruntime/test/onnx/microbenchmark/tptest.cc +++ b/onnxruntime/test/onnx/microbenchmark/tptest.cc @@ -35,7 +35,7 @@ BENCHMARK(BM_CreateThreadPool) #pragma GCC optimize("O0") #endif void SimpleForLoop(ptrdiff_t first, ptrdiff_t last) { - size_t sum = 0; + [[maybe_unused]] size_t sum = 0; for (; first != last; ++first) { ++sum; } @@ -102,7 +102,8 @@ static void BM_ThreadPoolSimpleParallelFor(benchmark::State& state) { for (auto _ : state) { for (int j = 0; j < 100; j++) { ThreadPool::TrySimpleParallelFor(tp.get(), len, [&](size_t) { - for (volatile size_t x = 0; x < body; x++) { + for (size_t x = 0; x < body; x++) { + benchmark::DoNotOptimize(x); } }); } From c95575f9b51b170cd8e75611dd18fc6e49047a77 Mon Sep 17 00:00:00 2001 From: Jonathan Clohessy Date: Thu, 23 Oct 2025 12:44:50 +0100 Subject: [PATCH 2/3] update mlasi path and add do not optimize for sum Signed-off-by: Jonathan Clohessy --- onnxruntime/core/mlas/lib/kleidiai/mlasi_kleidiai.h | 2 +- onnxruntime/test/onnx/microbenchmark/tptest.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/mlas/lib/kleidiai/mlasi_kleidiai.h b/onnxruntime/core/mlas/lib/kleidiai/mlasi_kleidiai.h index 2e9c4574fd057..cd68a9d61a680 100644 --- a/onnxruntime/core/mlas/lib/kleidiai/mlasi_kleidiai.h +++ b/onnxruntime/core/mlas/lib/kleidiai/mlasi_kleidiai.h @@ -6,7 +6,7 @@ #pragma once -#include "mlasi.h" +#include "../mlasi.h" // Fix to ensure compatibility with MSVC build #if defined(_MSC_VER) diff --git a/onnxruntime/test/onnx/microbenchmark/tptest.cc b/onnxruntime/test/onnx/microbenchmark/tptest.cc index fd173ba3c6f9f..b4a19fb61b65c 100644 --- a/onnxruntime/test/onnx/microbenchmark/tptest.cc +++ b/onnxruntime/test/onnx/microbenchmark/tptest.cc @@ -35,7 +35,8 @@ BENCHMARK(BM_CreateThreadPool) #pragma GCC optimize("O0") #endif void SimpleForLoop(ptrdiff_t first, ptrdiff_t last) { - [[maybe_unused]] size_t sum = 0; + size_t sum = 0; + benchmark::DoNotOptimize(sum); for (; first != last; ++first) { ++sum; } From 9d782a31ac315a04b2e7ea00a8e441d6a494035e Mon Sep 17 00:00:00 2001 From: Jonathan Clohessy Date: Fri, 24 Oct 2025 11:20:30 +0100 Subject: [PATCH 3/3] Move do not optimize to within loop Signed-off-by: Jonathan Clohessy --- onnxruntime/test/onnx/microbenchmark/tptest.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxruntime/test/onnx/microbenchmark/tptest.cc b/onnxruntime/test/onnx/microbenchmark/tptest.cc index b4a19fb61b65c..1c377f1a5fa96 100644 --- a/onnxruntime/test/onnx/microbenchmark/tptest.cc +++ b/onnxruntime/test/onnx/microbenchmark/tptest.cc @@ -36,9 +36,8 @@ BENCHMARK(BM_CreateThreadPool) #endif void SimpleForLoop(ptrdiff_t first, ptrdiff_t last) { size_t sum = 0; - benchmark::DoNotOptimize(sum); for (; first != last; ++first) { - ++sum; + benchmark::DoNotOptimize(++sum); } } #ifdef _WIN32