@@ -123,7 +123,8 @@ class IndexSelectDim0GPUOp
123123 const bool skip_indices_sorting_fwd) {
124124 TENSORS_ON_SAME_CUDA_GPU_IF_NOT_OPTIONAL (input, indices);
125125 // Expect a 1D index tensor
126- TORCH_CHECK (indices.dim () == 1 , " Index tensor must be 1D" )
126+ TORCH_CHECK (
127+ indices.dim () == 1 , " Index tensor must be 1D, but got " , indices.dim ());
127128
128129 Tensor sorted_indices, orig_indices;
129130 if (skip_indices_sorting_fwd) {
@@ -149,7 +150,10 @@ class IndexSelectDim0GPUOp
149150 static torch::autograd::variable_list backward (
150151 torch::autograd::AutogradContext* ctx,
151152 torch::autograd::variable_list grad_outputs) {
152- TORCH_CHECK (grad_outputs.size () == 1 );
153+ TORCH_CHECK (
154+ grad_outputs.size () == 1 ,
155+ " The size of grad_outputs should be 1, but got " ,
156+ grad_outputs.size ());
153157 TENSOR_ON_CUDA_GPU (grad_outputs[0 ]);
154158
155159 bool skip_indices_sorting_fwd =
@@ -237,7 +241,8 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
237241 at::TensorOptions ().dtype (at::kByte ).pinned_memory (true ));
238242
239243 // Ensure that args_tensor is contiguous
240- TORCH_CHECK (args_tensor.is_contiguous ());
244+ TORCH_CHECK (
245+ args_tensor.is_contiguous (), " Tensor args_tensor must be contiguous." );
241246
242247 // Initialize raw pointers to point to Tensor args_tensor
243248 int64_t * input_ptrs = nullptr ;
@@ -288,7 +293,14 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
288293 // Verify that all input tensors have the same number of dimensions
289294 TORCH_CHECK (
290295 input_dim == input.dim (),
291- " All inputs in group_index_select must have the same number of dimensions" );
296+ " All inputs in group_index_select must have the same number of dimensions. Expect " ,
297+ input_dim,
298+ " but got group " ,
299+ i,
300+ " with " ,
301+ input.dim (),
302+ " . Group size is " ,
303+ group_size);
292304
293305 // Verify that all tensors are on the same GPU
294306 TENSORS_ON_SAME_CUDA_GPU_IF_NOT_OPTIONAL (input, indices);
@@ -298,7 +310,14 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
298310 // Verify that all input tensors have the same shape[0]
299311 TORCH_CHECK (
300312 num_output_rows == num_output_rows_,
301- " The number of indices to be selected must be the same for the entire group" );
313+ " The number of indices to be selected must be the same for the entire group of " ,
314+ group_size,
315+ " . Expect indices size to be " ,
316+ num_output_rows,
317+ " , but got group " ,
318+ i,
319+ " with indices size of " ,
320+ num_output_rows_);
302321 const auto input_reshaped_ = input.reshape ({input.size (0 ), -1 });
303322
304323 // Number of columns can be different
@@ -314,7 +333,7 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
314333 input_shape[0 ] = num_output_rows_;
315334 Tensor output = at::empty (input_shape, input.options ());
316335 // Ensure that the allocated output is contiguous
317- TORCH_CHECK (output.is_contiguous ())
336+ TORCH_CHECK (output.is_contiguous (), " output tensor must be contiguous. " );
318337 output_group.push_back (output);
319338
320339 // Store input and indices contigs to keep them alive during the kernel
@@ -360,7 +379,8 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
360379 auto saved_data_t = at::empty (
361380 {sizeof (saved_data) / sizeof (int64_t )},
362381 at::TensorOptions ().dtype (at::kLong ));
363- TORCH_CHECK (saved_data_t .is_contiguous ());
382+ TORCH_CHECK (
383+ saved_data_t .is_contiguous (), " Tensor saved_data_t must be contiguous." );
364384 memcpy (saved_data_t .data_ptr <int64_t >(), saved_data, sizeof (saved_data));
365385
366386 group_index_select_or_add_cuda (
@@ -389,7 +409,10 @@ static torch::autograd::variable_list group_index_select_dim0_forward_impl_gpu(
389409static torch::autograd::variable_list group_index_select_dim0_backward_impl_gpu (
390410 at::TensorList all_inputs,
391411 c10::SymIntArrayRef output_shape_group_ref) {
392- TORCH_CHECK (all_inputs.size () > 2 );
412+ TORCH_CHECK (
413+ all_inputs.size () > 2 ,
414+ " all_inputs size must be larger than 2, but got " ,
415+ all_inputs.size ());
393416
394417 // all_input size = group_size * 2 (from grads, indices)
395418 // + 1 args_tensor + 1 saved_data + 1 first input
@@ -412,11 +435,18 @@ static torch::autograd::variable_list group_index_select_dim0_backward_impl_gpu(
412435 all_inputs.cbegin () + group_size, all_inputs.cbegin () + 2 * group_size);
413436
414437 // Retrieve saved data
415- TORCH_CHECK (saved_data.device () == at::kCPU );
416- TORCH_CHECK (saved_data.is_contiguous ());
438+ TORCH_CHECK (
439+ saved_data.device () == at::kCPU , " Tensor saved_data must be on CPU." );
440+ TORCH_CHECK (
441+ saved_data.is_contiguous (), " Tensor saved_data must be contiguous." );
417442 int64_t * saved_data_ptr = saved_data.data_ptr <int64_t >();
418443 // Check that the size is the same
419- TORCH_CHECK (saved_data_ptr[0 ] == group_size);
444+ TORCH_CHECK (
445+ saved_data_ptr[0 ] == group_size,
446+ " The size of saved_data[0] must match group_size. Expect " ,
447+ group_size,
448+ " but got " ,
449+ saved_data_ptr[0 ]);
420450 const bool use_var_cols = saved_data_ptr[1 ];
421451 int64_t * warp_offsets_group = reinterpret_cast <int64_t *>(saved_data_ptr[2 ]);
422452 int32_t * num_cols_group = reinterpret_cast <int32_t *>(saved_data_ptr[3 ]);
@@ -448,7 +478,8 @@ static torch::autograd::variable_list group_index_select_dim0_backward_impl_gpu(
448478 {group_size * 3 },
449479 at::TensorOptions ().dtype (at::kLong ).pinned_memory (true ));
450480 // Ensure that args_tensor is contiguous
451- TORCH_CHECK (args_tensor.is_contiguous ());
481+ TORCH_CHECK (
482+ args_tensor.is_contiguous (), " Tensor args_tensor must be contiguous." );
452483 int64_t * grad_output_ptrs = args_tensor.data_ptr <int64_t >();
453484 int64_t * grad_input_ptrs = args_tensor.data_ptr <int64_t >() + group_size;
454485 int64_t * indices_ptrs = args_tensor.data_ptr <int64_t >() + 2 * group_size;
@@ -485,20 +516,33 @@ static torch::autograd::variable_list group_index_select_dim0_backward_impl_gpu(
485516 // Allocate a big tensor to avoid calling many small elementwise kernels
486517 const auto group_grad_input =
487518 at::zeros ({group_grad_input_numel}, fwd_input.options ());
488- TORCH_CHECK (group_grad_input.is_contiguous ());
519+ TORCH_CHECK (
520+ group_grad_input.is_contiguous (),
521+ " Tensor group_grad_input must be contiguous." );
489522
490523 // Split to output_group
491524 auto output_group = group_grad_input.split (grad_input_numels, 0 );
492525
493- TORCH_CHECK (output_group.size () == static_cast <size_t >(group_size));
526+ TORCH_CHECK (
527+ output_group.size () == static_cast <size_t >(group_size),
528+ " output_group size must be " ,
529+ group_size,
530+ " but got " ,
531+ output_group.size ());
494532
495533 // Reshape grad inputs and obtain their pointers
496534 for (int i = 0 ; i < group_size; i++) {
497535 const auto grad_input_shape = std::vector<int64_t >(
498536 output_shape_group.begin () + i * output_dim,
499537 output_shape_group.begin () + (i + 1 ) * output_dim);
500538 output_group[i] = output_group[i].reshape (grad_input_shape);
501- TORCH_CHECK (output_group[i].is_contiguous ());
539+ TORCH_CHECK (
540+ output_group[i].is_contiguous (),
541+ " Tensor output_group " ,
542+ i,
543+ " of " ,
544+ group_size,
545+ " must be contiguous." );
502546 grad_input_ptrs[i] = reinterpret_cast <int64_t >(output_group[i].data_ptr ());
503547
504548 // 2) Add group_size gradients for inputs
0 commit comments