Skip to content

Commit 5805e88

Browse files
pedrocloboRKSimon
andauthored
[Headers][X86] Allow AVX512 reduction intrinsics to be used in constexpr (#152363)
Closes #152324. Part of #30794. This PR adds `constexpr` support for the following AVX512 integer reduction intrinsics: - `_mm512_reduce_add_epi32` - `_mm512_reduce_add_epi64` - `_mm512_reduce_mul_epi32` - `_mm512_reduce_mul_epi64` - `_mm512_reduce_and_epi32` - `_mm512_reduce_and_epi64` - `_mm512_reduce_or_epi32` - `_mm512_reduce_or_epi64` - `_mm512_reduce_max_epi32` - `_mm512_reduce_max_epi64` - `_mm512_reduce_min_epi32` - `_mm512_reduce_min_epi64` - `_mm512_reduce_max_epu32` - `_mm512_reduce_max_epu64` - `_mm512_reduce_min_epu32` - `_mm512_reduce_min_epu64` --------- Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
1 parent 95c32bf commit 5805e88

File tree

3 files changed

+38
-16
lines changed

3 files changed

+38
-16
lines changed

clang/lib/Headers/avx512fintrin.h

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9337,19 +9337,23 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
93379337
* This takes log2(n) steps where n is the number of elements in the vector.
93389338
*/
93399339

9340-
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9340+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9341+
_mm512_reduce_add_epi64(__m512i __W) {
93419342
return __builtin_reduce_add((__v8di)__W);
93429343
}
93439344

9344-
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9345+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9346+
_mm512_reduce_mul_epi64(__m512i __W) {
93459347
return __builtin_reduce_mul((__v8di)__W);
93469348
}
93479349

9348-
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9350+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9351+
_mm512_reduce_and_epi64(__m512i __W) {
93499352
return __builtin_reduce_and((__v8di)__W);
93509353
}
93519354

9352-
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9355+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
9356+
_mm512_reduce_or_epi64(__m512i __W) {
93539357
return __builtin_reduce_or((__v8di)__W);
93549358
}
93559359

@@ -9400,22 +9404,22 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
94009404
return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
94019405
}
94029406

9403-
static __inline__ int __DEFAULT_FN_ATTRS512
9407+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
94049408
_mm512_reduce_add_epi32(__m512i __W) {
94059409
return __builtin_reduce_add((__v16si)__W);
94069410
}
94079411

9408-
static __inline__ int __DEFAULT_FN_ATTRS512
9412+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
94099413
_mm512_reduce_mul_epi32(__m512i __W) {
94109414
return __builtin_reduce_mul((__v16si)__W);
94119415
}
94129416

9413-
static __inline__ int __DEFAULT_FN_ATTRS512
9417+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
94149418
_mm512_reduce_and_epi32(__m512i __W) {
94159419
return __builtin_reduce_and((__v16si)__W);
94169420
}
94179421

9418-
static __inline__ int __DEFAULT_FN_ATTRS512
9422+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
94199423
_mm512_reduce_or_epi32(__m512i __W) {
94209424
return __builtin_reduce_or((__v16si)__W);
94219425
}
@@ -9466,22 +9470,22 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
94669470
return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
94679471
}
94689472

9469-
static __inline__ long long __DEFAULT_FN_ATTRS512
9473+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
94709474
_mm512_reduce_max_epi64(__m512i __V) {
94719475
return __builtin_reduce_max((__v8di)__V);
94729476
}
94739477

9474-
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9478+
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
94759479
_mm512_reduce_max_epu64(__m512i __V) {
94769480
return __builtin_reduce_max((__v8du)__V);
94779481
}
94789482

9479-
static __inline__ long long __DEFAULT_FN_ATTRS512
9483+
static __inline__ long long __DEFAULT_FN_ATTRS512_CONSTEXPR
94809484
_mm512_reduce_min_epi64(__m512i __V) {
94819485
return __builtin_reduce_min((__v8di)__V);
94829486
}
94839487

9484-
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9488+
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512_CONSTEXPR
94859489
_mm512_reduce_min_epu64(__m512i __V) {
94869490
return __builtin_reduce_min((__v8du)__V);
94879491
}
@@ -9509,22 +9513,22 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
95099513
__V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
95109514
return __builtin_reduce_min((__v8du)__V);
95119515
}
9512-
static __inline__ int __DEFAULT_FN_ATTRS512
9516+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
95139517
_mm512_reduce_max_epi32(__m512i __V) {
95149518
return __builtin_reduce_max((__v16si)__V);
95159519
}
95169520

9517-
static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9521+
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
95189522
_mm512_reduce_max_epu32(__m512i __V) {
95199523
return __builtin_reduce_max((__v16su)__V);
95209524
}
95219525

9522-
static __inline__ int __DEFAULT_FN_ATTRS512
9526+
static __inline__ int __DEFAULT_FN_ATTRS512_CONSTEXPR
95239527
_mm512_reduce_min_epi32(__m512i __V) {
95249528
return __builtin_reduce_min((__v16si)__V);
95259529
}
95269530

9527-
static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9531+
static __inline__ unsigned int __DEFAULT_FN_ATTRS512_CONSTEXPR
95289532
_mm512_reduce_min_epu32(__m512i __V) {
95299533
return __builtin_reduce_min((__v16su)__V);
95309534
}

clang/test/CodeGen/X86/avx512-reduceIntrin.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,35 @@
11
// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
22

33
#include <immintrin.h>
4+
#include "builtin_test_helpers.h"
45

56
long long test_mm512_reduce_add_epi64(__m512i __W){
67
// CHECK-LABEL: @test_mm512_reduce_add_epi64(
78
// CHECK: call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
89
return _mm512_reduce_add_epi64(__W);
910
}
11+
TEST_CONSTEXPR(_mm512_reduce_add_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);
1012

1113
long long test_mm512_reduce_mul_epi64(__m512i __W){
1214
// CHECK-LABEL: @test_mm512_reduce_mul_epi64(
1315
// CHECK: call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
1416
return _mm512_reduce_mul_epi64(__W);
1517
}
18+
TEST_CONSTEXPR(_mm512_reduce_mul_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}) == 40320);
1619

1720
long long test_mm512_reduce_or_epi64(__m512i __W){
1821
// CHECK-LABEL: @test_mm512_reduce_or_epi64(
1922
// CHECK: call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
2023
return _mm512_reduce_or_epi64(__W);
2124
}
25+
TEST_CONSTEXPR(_mm512_reduce_or_epi64((__m512i)(__v8di){0x100, 0x200, 0x400, 0x800, 0, 0, 0, 0}) == 0xF00);
2226

2327
long long test_mm512_reduce_and_epi64(__m512i __W){
2428
// CHECK-LABEL: @test_mm512_reduce_and_epi64(
2529
// CHECK: call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
2630
return _mm512_reduce_and_epi64(__W);
2731
}
32+
TEST_CONSTEXPR(_mm512_reduce_and_epi64((__m512i)(__v8di){0xFFFF, 0xFF00, 0x00FF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFF00, 0x00FF}) == 0x0000);
2833

2934
long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
3035
// CHECK-LABEL: @test_mm512_mask_reduce_add_epi64(
@@ -59,23 +64,27 @@ int test_mm512_reduce_add_epi32(__m512i __W){
5964
// CHECK: call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
6065
return _mm512_reduce_add_epi32(__W);
6166
}
67+
TEST_CONSTEXPR(_mm512_reduce_add_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);
6268

6369
int test_mm512_reduce_mul_epi32(__m512i __W){
6470
// CHECK-LABEL: @test_mm512_reduce_mul_epi32(
6571
// CHECK: call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
6672
return _mm512_reduce_mul_epi32(__W);
6773
}
74+
TEST_CONSTEXPR(_mm512_reduce_mul_epi32((__m512i)(__v16si){1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 1, 1, -3, 1, 1}) == -36);
6875

6976
int test_mm512_reduce_or_epi32(__m512i __W){
7077
// CHECK: call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
7178
return _mm512_reduce_or_epi32(__W);
7279
}
80+
TEST_CONSTEXPR(_mm512_reduce_or_epi32((__m512i)(__v16si){0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0, 0, 0, 0, 0, 0, 0, 0}) == 0xFF);
7381

7482
int test_mm512_reduce_and_epi32(__m512i __W){
7583
// CHECK-LABEL: @test_mm512_reduce_and_epi32(
7684
// CHECK: call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
7785
return _mm512_reduce_and_epi32(__W);
7886
}
87+
TEST_CONSTEXPR(_mm512_reduce_and_epi32((__m512i)(__v16si){0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xF0, 0x0F, 0xFF, 0xFF, 0xFF, 0xFF, 0xF0, 0xF0, 0x0F, 0x0F}) == 0x00);
7988

8089
int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
8190
// CHECK-LABEL: @test_mm512_mask_reduce_add_epi32(

clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
22

33
#include <immintrin.h>
4+
#include "builtin_test_helpers.h"
45

56
long long test_mm512_reduce_max_epi64(__m512i __W){
67
// CHECK-LABEL: @test_mm512_reduce_max_epi64(
78
// CHECK: call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
89
return _mm512_reduce_max_epi64(__W);
910
}
11+
TEST_CONSTEXPR(_mm512_reduce_max_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == 3);
1012

1113
unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
1214
// CHECK-LABEL: @test_mm512_reduce_max_epu64(
1315
// CHECK: call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
1416
return _mm512_reduce_max_epu64(__W);
1517
}
18+
TEST_CONSTEXPR(_mm512_reduce_max_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 7);
1619

1720
double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
1821
// CHECK-LABEL: @test_mm512_reduce_max_pd(
@@ -27,12 +30,14 @@ long long test_mm512_reduce_min_epi64(__m512i __W){
2730
// CHECK: call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
2831
return _mm512_reduce_min_epi64(__W);
2932
}
33+
TEST_CONSTEXPR(_mm512_reduce_min_epi64((__m512i)(__v8di){-4, -3, -2, -1, 0, 1, 2, 3}) == -4);
3034

3135
unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
3236
// CHECK-LABEL: @test_mm512_reduce_min_epu64(
3337
// CHECK: call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
3438
return _mm512_reduce_min_epu64(__W);
3539
}
40+
TEST_CONSTEXPR(_mm512_reduce_min_epu64((__m512i)(__v8du){0, 1, 2, 3, 4, 5, 6, 7}) == 0);
3641

3742
double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
3843
// CHECK-LABEL: @test_mm512_reduce_min_pd(
@@ -89,12 +94,14 @@ int test_mm512_reduce_max_epi32(__m512i __W){
8994
// CHECK: call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
9095
return _mm512_reduce_max_epi32(__W);
9196
}
97+
TEST_CONSTEXPR(_mm512_reduce_max_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == 7);
9298

9399
unsigned int test_mm512_reduce_max_epu32(__m512i __W){
94100
// CHECK-LABEL: @test_mm512_reduce_max_epu32(
95101
// CHECK: call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
96102
return _mm512_reduce_max_epu32(__W);
97103
}
104+
TEST_CONSTEXPR(_mm512_reduce_max_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 15);
98105

99106
float test_mm512_reduce_max_ps(__m512 __W){
100107
// CHECK-LABEL: @test_mm512_reduce_max_ps(
@@ -107,12 +114,14 @@ int test_mm512_reduce_min_epi32(__m512i __W){
107114
// CHECK: call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
108115
return _mm512_reduce_min_epi32(__W);
109116
}
117+
TEST_CONSTEXPR(_mm512_reduce_min_epi32((__m512i)(__v16si){-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7}) == -8);
110118

111119
unsigned int test_mm512_reduce_min_epu32(__m512i __W){
112120
// CHECK-LABEL: @test_mm512_reduce_min_epu32(
113121
// CHECK: call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
114122
return _mm512_reduce_min_epu32(__W);
115123
}
124+
TEST_CONSTEXPR(_mm512_reduce_min_epu32((__m512i)(__v16su){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}) == 0);
116125

117126
float test_mm512_reduce_min_ps(__m512 __W){
118127
// CHECK-LABEL: @test_mm512_reduce_min_ps(

0 commit comments

Comments
 (0)