#define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include #include #include #include #include #ifndef AT_PER_OPERATOR_HEADERS #include #else #include #include #include #include #endif namespace at::native { template class Op> std::vector foreach_binary_op( TensorList tensors, const Tensor& scalar, const Scalar& alpha = 1) { TORCH_CHECK( scalar.dim() == 0 && scalar.numel() == 1, "scalar tensor expected to be 0 dim but it has ", scalar.dim(), " dimensions and ", scalar.numel(), " elements."); TORCH_CHECK( tensors[0].device() == scalar.device(), "scalar tensor expected to be on ", tensors[0].device(), " but is on ", scalar.device()); std::vector> tensor_lists; std::vector vec_res; vec_res.reserve(tensors.size()); for (const auto& t : tensors) { vec_res.emplace_back(at::native::empty_like(t)); } tensor_lists.emplace_back(tensors.vec()); tensor_lists.emplace_back(std::move(vec_res)); using opmath_t = at::opmath_type; multi_tensor_apply<2>( tensor_lists, BinaryOpScalarTensorFunctor< T, /* depth */ 2, /* r_args_depth */ 1, /* res_arg_index */ 1>(), Op(), scalar.data_ptr(), alpha.to()); return tensor_lists[1]; } template class Op> void foreach_binary_op_( TensorList tensors, const Tensor& scalar, const Scalar& alpha = 1) { TORCH_CHECK( scalar.dim() == 0 && scalar.numel() == 1, "scalar tensor expected to be 0 dim but has ", scalar.dim(), " dimensions and ", scalar.numel(), " elements."); TORCH_CHECK( tensors[0].device() == scalar.device(), "scalar tensor is expected to be on ", tensors[0].device(), " but is on ", scalar.device()); std::vector> tensor_lists; tensor_lists.emplace_back(tensors.vec()); using opmath_t = at::opmath_type; multi_tensor_apply<1>( tensor_lists, BinaryOpScalarTensorFunctor< T, /* depth */ 1, /* r_args_depth */ 1, /* res_arg_index */ 0>(), Op(), scalar.data_ptr(), alpha.to()); increment_version(tensors); } // TODO(crcrpar): Nest dispatch by looking up `scalar.scalar_type` for better // coverage? #define FOREACH_BINARY_OP_SCALAR_TENSOR(FUNCTION, NAME, OP, DIVISION_OP) \ void foreach_tensor_##NAME##_tensor_kernel_cuda_( \ TensorList tensors, const Tensor& scalar) { \ if (scalar.device().type() == DeviceType::CPU) { \ return at::native::foreach_tensor_##NAME##_scalar_kernel_cuda_( \ tensors, scalar.item()); \ } \ check_foreach_api_restrictions(tensors); \ if (!(can_use_fast_route( \ ArrayRef{tensors}, {}, DIVISION_OP) && \ tensors[0].scalar_type() == scalar.scalar_type())) { \ return at::native::foreach_tensor_##NAME##_tensor_kernel_slow_( \ tensors, scalar); \ } \ \ FUNCTION##_(tensors, scalar); \ } \ \ std::vector foreach_tensor_##NAME##_tensor_kernel_cuda( \ TensorList tensors, const Tensor& scalar) { \ if (scalar.device().type() == DeviceType::CPU) { \ return at::native::foreach_tensor_##NAME##_scalar_kernel_cuda( \ tensors, scalar.item()); \ } \ check_foreach_api_restrictions(tensors); \ if (!(can_use_fast_route( \ ArrayRef{tensors}, {}, DIVISION_OP) && \ tensors[0].scalar_type() == scalar.scalar_type())) { \ return at::native::foreach_tensor_##NAME##_tensor_kernel_slow( \ tensors, scalar); \ } \ \ return FUNCTION(tensors, scalar); \ } #define FOREACH_BINARY_OP_SCALAR_TENSOR_ALPHA(FUNCTION, NAME, OP) \ void foreach_tensor_##NAME##_tensor_kernel_cuda_( \ TensorList tensors, const Tensor& scalar, const Scalar& alpha) { \ check_foreach_api_restrictions(tensors); \ if (!(can_use_fast_route(ArrayRef{tensors}, alpha) && \ tensors[0].scalar_type() == scalar.scalar_type())) { \ return at::native::foreach_tensor_##NAME##_tensor_kernel_slow_( \ tensors, scalar, alpha); \ } \ \ FUNCTION##_(tensors, scalar, alpha); \ } \ \ std::vector foreach_tensor_##NAME##_tensor_kernel_cuda( \ TensorList tensors, const Tensor& scalar, const Scalar& alpha) { \ check_foreach_api_restrictions(tensors); \ if (!(can_use_fast_route(ArrayRef{tensors}, alpha) && \ tensors[0].scalar_type() == scalar.scalar_type())) { \ return at::native::foreach_tensor_##NAME##_tensor_kernel_slow( \ tensors, scalar, alpha); \ } \ \ return FUNCTION(tensors, scalar, alpha); \ } template