1717#include < cstddef>
1818#include < type_traits>
1919
20- #define OV_THREAD_TBB 0
21- #define OV_THREAD_OMP 1
22- #define OV_THREAD_SEQ 2
23- #define OV_THREAD_TBB_AUTO 3
24-
25- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
20+ #define OV_THREAD_TBB 0
21+ #define OV_THREAD_OMP 1
22+ #define OV_THREAD_SEQ 2
23+ #define OV_THREAD_TBB_AUTO 3
24+ #define OV_THREAD_TBB_ADAPTIVE 4
25+
26+ #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
27+ # define OV_THREAD_USE_TBB 1
2628# ifndef NOMINMAX
2729# define NOMINMAX
2830# endif
@@ -66,7 +68,7 @@ inline int parallel_get_env_threads() {
6668inline void parallel_set_max_nested_levels (int levels) {
6769 return ;
6870}
69- # if OV_THREAD == OV_THREAD_TBB
71+ # if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
7072# define PARTITIONING , tbb::static_partitioner()
7173
7274// The TBB version less than 2018u1 has no static_partitioner argument for
@@ -81,6 +83,7 @@ inline void parallel_set_max_nested_levels(int levels) {
8183# define PARTITIONING
8284# endif
8385#elif OV_THREAD == OV_THREAD_OMP
86+ # define OV_THREAD_USE_TBB 0
8487# include < omp.h>
8588# if !defined(_OPENMP)
8689# error Undefined OpenMP version.
@@ -162,6 +165,7 @@ inline int parallel_get_nested_level() {
162165}
163166
164167#elif OV_THREAD == OV_THREAD_SEQ
168+ # define OV_THREAD_USE_TBB 0
165169# include < algorithm>
166170inline int parallel_get_env_threads () {
167171 return 1 ;
@@ -231,7 +235,7 @@ namespace ov {
231235
232236template <typename F>
233237void parallel_nt (int nthr, const F& func) {
234- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
238+ #if OV_THREAD_USE_TBB
235239 if (nthr == 0 )
236240 nthr = parallel_get_max_threads ();
237241 if (nthr == 1 ) {
@@ -279,7 +283,7 @@ void parallel_nt_static(int nthr, const F& func) {
279283
280284 if (nthr == 0 )
281285 nthr = parallel_get_max_threads ();
282- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
286+ #if OV_THREAD_USE_TBB
283287 tbb::parallel_for (
284288 0 ,
285289 nthr,
@@ -305,7 +309,7 @@ void parallel_nt_static(int nthr, const F& func) {
305309
306310template <typename I, typename F>
307311void parallel_sort (I begin, I end, const F& comparator) {
308- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
312+ #if OV_THREAD_USE_TBB
309313 tbb::parallel_sort (begin, end, comparator);
310314#elif OV_THREAD == OV_THREAD_OMP
311315 // TODO: propose OpenMP version
@@ -317,7 +321,7 @@ void parallel_sort(I begin, I end, const F& comparator) {
317321
318322template <typename T0, typename R, typename F>
319323R parallel_sum (const T0& D0, const R& input, const F& func) {
320- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
324+ #if OV_THREAD_USE_TBB
321325 return _TBB_REDUCE_FUNC (
322326 tbb::blocked_range<T0>(0 , D0),
323327 input,
@@ -351,7 +355,7 @@ R parallel_sum(const T0& D0, const R& input, const F& func) {
351355
352356template <typename T0, typename T1, typename R, typename F>
353357R parallel_sum2d (const T0& D0, const T1& D1, const R& input, const F& func) {
354- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
358+ #if OV_THREAD_USE_TBB
355359 return _TBB_REDUCE_FUNC (
356360 tbb::blocked_range2d<T0, T1>(0 , D0, 0 , D1),
357361 input,
@@ -391,7 +395,7 @@ R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
391395}
392396template <typename T0, typename T1, typename T2, typename R, typename F>
393397R parallel_sum3d (const T0& D0, const T1& D1, const T2& D2, const R& input, const F& func) {
394- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
398+ #if OV_THREAD_USE_TBB
395399 return _TBB_REDUCE_FUNC (
396400 tbb::blocked_range3d<T0, T1, T2>(0 , D0, 0 , D1, 0 , D2),
397401 input,
@@ -524,7 +528,7 @@ void parallel_for(const T0& D0, const F& func) {
524528 if (D0 == T0 (0 )) {
525529 return ;
526530 }
527- #if OV_THREAD == OV_THREAD_TBB
531+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
528532 auto work_amount = static_cast <size_t >(D0);
529533 int nthr = parallel_get_max_threads ();
530534 if (static_cast <size_t >(nthr) > work_amount)
@@ -590,7 +594,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
590594 if (D0 == T0 (0 ) || D1 == T1 (0 )) {
591595 return ;
592596 }
593- #if OV_THREAD == OV_THREAD_TBB
597+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
594598 auto work_amount = static_cast <size_t >(D0 * D1);
595599 int nthr = parallel_get_max_threads ();
596600 if (static_cast <size_t >(nthr) > work_amount)
@@ -636,7 +640,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
636640
637641template <typename T0, typename T1, typename F>
638642void parallel_for2d_dynamic (const T0& D0, const T1& D1, const F& func) {
639- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
643+ #if OV_THREAD_USE_TBB
640644 tbb::parallel_for (tbb::blocked_range2d<T0, T1>(0 , D0, 0 , D1), [=](const tbb::blocked_range2d<T0, T1>& r) {
641645 for (T0 d0 = r.rows ().begin (); d0 < r.rows ().end (); d0++) {
642646 for (T1 d1 = r.cols ().begin (); d1 < r.cols ().end (); d1++) {
@@ -674,7 +678,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
674678 if (D0 == T0 (0 ) || D1 == T1 (0 ) || D2 == T2 (0 )) {
675679 return ;
676680 }
677- #if OV_THREAD == OV_THREAD_TBB
681+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
678682 auto work_amount = static_cast <size_t >(D0 * D1 * D2);
679683 int nthr = parallel_get_max_threads ();
680684 if (static_cast <size_t >(nthr) > work_amount)
@@ -720,7 +724,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
720724
721725template <typename T0, typename T1, typename T2, typename F>
722726void parallel_for3d_dynamic (const T0& D0, const T1& D1, const T2& D2, const F& func) {
723- #if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
727+ #if OV_THREAD_USE_TBB
724728 tbb::parallel_for (tbb::blocked_range3d<T0, T1, T2>(0 , D0, 0 , D1, 0 , D2),
725729 [=](const tbb::blocked_range3d<T0, T1, T2>& r) {
726730 for (T0 d0 = r.pages ().begin (); d0 < r.pages ().end (); d0++) {
@@ -762,7 +766,7 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
762766 if (D0 == T0 (0 ) || D1 == T1 (0 ) || D2 == T2 (0 ) || D3 == T3 (0 )) {
763767 return ;
764768 }
765- #if OV_THREAD == OV_THREAD_TBB
769+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
766770 auto work_amount = static_cast <size_t >(D0 * D1 * D2 * D3);
767771 int nthr = parallel_get_max_threads ();
768772 if (static_cast <size_t >(nthr) > work_amount)
@@ -838,7 +842,7 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
838842 if (D0 == T0 (0 ) || D1 == T1 (0 ) || D2 == T2 (0 ) || D3 == T3 (0 ) || D4 == T4 (0 )) {
839843 return ;
840844 }
841- #if OV_THREAD == OV_THREAD_TBB
845+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
842846 auto work_amount = static_cast <size_t >(D0 * D1 * D2 * D3 * D4);
843847 int nthr = parallel_get_max_threads ();
844848 if (static_cast <size_t >(nthr) > work_amount)
@@ -916,7 +920,7 @@ void parallel_for6d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
916920 if (D0 == T0 (0 ) || D1 == T1 (0 ) || D2 == T2 (0 ) || D3 == T3 (0 ) || D4 == T4 (0 ) || D5 == T5 (0 )) {
917921 return ;
918922 }
919- #if OV_THREAD == OV_THREAD_TBB
923+ #if ( OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_ADAPTIVE)
920924 auto work_amount = static_cast <size_t >(D0 * D1 * D2 * D3 * D4 * D5);
921925 int nthr = parallel_get_max_threads ();
922926 if (static_cast <size_t >(nthr) > work_amount)
0 commit comments