TORCH_CHECK(x.is_cuda(),"x must reside on CUDA device");
TORCH_CHECK(b.numel()==0||(b.dtype()==x.dtype()&&b.device()==x.device()),"b must have the same dtype and device as x");
TORCH_CHECK(xref.numel()==0||(xref.sizes()==x.sizes()&&xref.dtype()==x.dtype()&&xref.device()==x.device()),"xref must have the same shape, dtype, and device as x");
TORCH_CHECK(yref.numel()==0||(yref.sizes()==x.sizes()&&yref.dtype()==x.dtype()&&yref.device()==x.device()),"yref must have the same shape, dtype, and device as x");
TORCH_CHECK(dy.numel()==0||(dy.sizes()==x.sizes()&&dy.dtype()==x.dtype()&&dy.device()==x.device()),"dy must have the same dtype and device as x");
TORCH_CHECK(x.numel()<=INT_MAX,"x is too large");
TORCH_CHECK(b.dim()==1,"b must have rank 1");
TORCH_CHECK(b.numel()==0||(dim>=0&&dim<x.dim()),"dim is out of bounds");
TORCH_CHECK(b.numel()==0||b.numel()==x.size(dim),"b has wrong number of elements");
TORCH_CHECK(grad>=0,"grad must be non-negative");
// Validate layout.
TORCH_CHECK(x.is_non_overlapping_and_dense(),"x must be non-overlapping and dense");
TORCH_CHECK(b.is_contiguous(),"b must be contiguous");
TORCH_CHECK(xref.numel()==0||has_same_layout(xref,x),"xref must have the same layout as x");
TORCH_CHECK(yref.numel()==0||has_same_layout(yref,x),"yref must have the same layout as x");
TORCH_CHECK(dy.numel()==0||has_same_layout(dy,x),"dy must have the same layout as x");