File tree Expand file tree Collapse file tree 1 file changed +22
-0
lines changed
common/cuda_hip/components Expand file tree Collapse file tree 1 file changed +22
-0
lines changed Original file line number Diff line number Diff line change @@ -163,6 +163,28 @@ GKO_BIND_ATOMIC_ADD(__half);
163
163
GKO_BIND_ATOMIC_ADD (__nv_bfloat16);
164
164
#endif // !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))
165
165
166
+
167
+ #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700
168
+ #define GKO_BIND_UNSUPPORT_ATOMIC_ADD (ValueType ) \
169
+ __forceinline__ __device__ ValueType atomic_add ( \
170
+ ValueType* __restrict__ addr, ValueType val) \
171
+ { \
172
+ asm (" trap;" ); \
173
+ return val; \
174
+ }
175
+
176
+ // CUDA only provides the __CUDA_ARCH__ on the device code, so we can not
177
+ // have compile time guard from host side. Providing an unsupported
178
+ // implementation for atomic operation. This is only for the compilation purpose
179
+ // and the implementation should not rely on this to throw an error.
180
+ GKO_BIND_UNSUPPORT_ATOMIC_ADD (__half);
181
+ // compute capability 7.x and higher already supported 16-bit atomicCAS, so
182
+ // __nv_bfloat16 can also rely on it before compute capability 8.x.
183
+ GKO_BIND_UNSUPPORT_ATOMIC_ADD (__nv_bfloat16);
184
+
185
+ #undef GKO_BIND_UNSUPPORT_ATOMIC_ADD
186
+ #endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700
187
+
166
188
#if !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))
167
189
// CUDA supports 32-bit __half2 floating-point atomicAdd on
168
190
// devices of compute capability 6.x and higher. note: The atomicity of the
You can’t perform that action at this time.
0 commit comments