@@ -42,6 +42,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242#ifdef USE_SVE
4343#ifdef DOT_KERNEL_SVE
4444#include DOT_KERNEL_SVE
45+ #elif defined(A64FX )
46+ #include "dot_kernel_sve_v8.c"
4547#else
4648#include "dot_kernel_sve.c"
4749#endif
@@ -82,14 +84,43 @@ static inline int get_dot_optimal_nthreads_neoversev1(BLASLONG N, int ncpu) {
8284}
8385#endif
8486
87+ #if defined(DYNAMIC_ARCH ) || defined(A64FX )
88+ static inline int get_dot_optimal_nthreads_a64fx (BLASLONG N , int ncpu ) {
89+ #ifdef DOUBLE
90+ return (N <= 11000L ) ? 1
91+ : (N <= 20000L ) ? MIN (ncpu , 2 )
92+ : (N <= 35000L ) ? MIN (ncpu , 4 )
93+ : (N <= 50000L ) ? MIN (ncpu , 6 )
94+ : (N <= 440000L ) ? MIN (ncpu , 8 )
95+ : (N <= 880000L ) ? MIN (ncpu , 16 )
96+ : (N <= 1020000L ) ? MIN (ncpu , 24 )
97+ : ncpu ;
98+ #else
99+ return (N <= 22000L ) ? 1
100+ : (N <= 39000L ) ? MIN (ncpu , 2 )
101+ : (N <= 79000L ) ? MIN (ncpu , 4 )
102+ : (N <= 120000L ) ? MIN (ncpu , 6 )
103+ : (N <= 1020000L ) ? MIN (ncpu , 8 )
104+ : ncpu ;
105+ #endif
106+ }
107+ #endif
108+
85109static inline int get_dot_optimal_nthreads (BLASLONG n ) {
86110 int ncpu = num_cpu_avail (1 );
87111
88- #if defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
112+ #if defined(A64FX ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
113+ return get_dot_optimal_nthreads_a64fx (n , ncpu );
114+ #elif defined(NEOVERSEV1 ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
89115 return get_dot_optimal_nthreads_neoversev1 (n , ncpu );
90116#elif defined(DYNAMIC_ARCH ) && !defined(COMPLEX ) && !defined(BFLOAT16 )
91- if (strcmp (gotoblas_corename (), "neoversev1" ) == 0 ) {
92- return get_dot_optimal_nthreads_neoversev1 (n , ncpu );
117+ {
118+ const char * core = gotoblas_corename ();
119+ if (strcmp (core , "a64fx" ) == 0 ) {
120+ return get_dot_optimal_nthreads_a64fx (n , ncpu );
121+ } else if (strcmp (core , "neoversev1" ) == 0 ) {
122+ return get_dot_optimal_nthreads_neoversev1 (n , ncpu );
123+ }
93124 }
94125#endif
95126
0 commit comments