Skip to content

Commit 47d2b13

Browse files
committed
add unsupported atomic add
1 parent 3f9ff8a commit 47d2b13

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

common/cuda_hip/components/atomic.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,28 @@ GKO_BIND_ATOMIC_ADD(__half);
163163
GKO_BIND_ATOMIC_ADD(__nv_bfloat16);
164164
#endif // !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))
165165

166+
167+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700
168+
#define GKO_BIND_UNSUPPORT_ATOMIC_ADD(ValueType) \
169+
__forceinline__ __device__ ValueType atomic_add( \
170+
ValueType* __restrict__ addr, ValueType val) \
171+
{ \
172+
asm("trap;"); \
173+
return val; \
174+
}
175+
176+
// CUDA only provides the __CUDA_ARCH__ on the device code, so we can not
177+
// have compile time guard from host side. Providing an unsupported
178+
// implementation for atomic operation. This is only for the compilation purpose
179+
// and the implementation should not rely on this to throw an error.
180+
GKO_BIND_UNSUPPORT_ATOMIC_ADD(__half);
181+
// compute capability 7.x and higher already supported 16-bit atomicCAS, so
182+
// __nv_bfloat16 can also rely on it before compute capability 8.x.
183+
GKO_BIND_UNSUPPORT_ATOMIC_ADD(__nv_bfloat16);
184+
185+
#undef GKO_BIND_UNSUPPORT_ATOMIC_ADD
186+
#endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700
187+
166188
#if !(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600))
167189
// CUDA supports 32-bit __half2 floating-point atomicAdd on
168190
// devices of compute capability 6.x and higher. note: The atomicity of the

0 commit comments

Comments
 (0)