@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
1313limitations under the License.
1414==============================================================================*/
1515
16- #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
16+ #if GOOGLE_CUDA // || TENSORFLOW_USE_ROCM
1717
1818#define EIGEN_USE_GPU
1919
@@ -52,7 +52,7 @@ __global__ void DynamicStitchKernelV2(const int32 slice_size,
5252 const int32* input_indices,
5353 T** input_ptrs,
5454 T* output) {
55- CUDA_1D_KERNEL_LOOP (output_index, output_size) {
55+ GPU_1D_KERNEL_LOOP (output_index, output_size) {
5656 const int32 slice_id = output_index / slice_size;
5757 const int32 slice_offset = output_index % slice_size;
5858 const int32 input_index = input_indices[slice_id];
@@ -65,7 +65,7 @@ __global__ void DynamicStitchKernelV2(const int32 slice_size,
6565__global__ void InitializeIndicesFlatWork (int32* indices_flat_work,
6666 const int32 flat_work_size,
6767 const int32 val) {
68- CUDA_1D_KERNEL_LOOP (output_index, flat_work_size) {
68+ GPU_1D_KERNEL_LOOP (output_index, flat_work_size) {
6969 indices_flat_work[output_index] = val;
7070 }
7171}
@@ -80,7 +80,7 @@ __global__ void DynamicStitchPrepKernel(const int32* indices_flat,
8080 const int32 slice_size,
8181 const int32 output_size) {
8282
83- CUDA_1D_KERNEL_LOOP (output_index, output_size) {
83+ GPU_1D_KERNEL_LOOP (output_index, output_size) {
8484 // for indices
8585 indices_flat_work[indices_flat[output_index]] = output_index;
8686 // find the partition id
@@ -123,7 +123,7 @@ void DynamicStitchGPUImplV2(const Eigen::GpuDevice& gpu_device,
123123 Tensor* input_ptrs,
124124 T* output) {
125125 const int32 output_size = first_dim_size * slice_size;
126- auto config = GetCudaLaunchConfig (output_size, gpu_device);
126+ auto config = GetGpuLaunchConfig (output_size, gpu_device);
127127
128128 DynamicStitchKernelV2<T>
129129 <<<config.block_count , config.thread_per_block , 0 , gpu_device.stream ()>>>(
@@ -146,13 +146,13 @@ void DynamicStitchGPUPrep(const Eigen::GpuDevice& gpu_device,
146146 const int32 first_dim_size) {
147147
148148 // initialize indices_flat_work by -1
149- auto config = GetCudaLaunchConfig (first_dim_size, gpu_device);
149+ auto config = GetGpuLaunchConfig (first_dim_size, gpu_device);
150150 InitializeIndicesFlatWork
151151 <<<config.block_count , config.thread_per_block , 0 , gpu_device.stream ()>>>(
152152 indices_flat_work->flat <int32>().data (),
153153 first_dim_size, -1 );
154154
155- config = GetCudaLaunchConfig (data_elements_size, gpu_device);
155+ config = GetGpuLaunchConfig (data_elements_size, gpu_device);
156156 DynamicStitchPrepKernel<T>
157157 <<<config.block_count , config.thread_per_block , 0 , gpu_device.stream ()>>>(
158158 indices_flat->flat <int32>().data (),
0 commit comments