#define TORCH_ASSERT_ONLY_METHOD_OPERATORS #include #include #include #ifndef AT_PER_OPERATOR_HEADERS #include #include #else #include #include #include #endif namespace at::native { namespace { using namespace mps; #ifndef PYTORCH_JIT_COMPILE_SHADERS static auto& lib = MetalShaderLibrary::getBundledLibrary(); #else #include #endif void renorm_out_mps(const Tensor& self, const Scalar& p, int64_t dim, const Scalar& maxnorm, const Tensor& out) { auto self_sizes = self.sizes(); dim = c10::maybe_wrap_dim(dim, self_sizes.size()); DimVector reduce_dims(self_sizes.size()); std::iota(reduce_dims.begin(), reduce_dims.end(), 0); reduce_dims.erase(reduce_dims.begin() + dim); Tensor norm = at::linalg_vector_norm(self, p.toDouble(), reduce_dims, /*keepdim=*/true); auto factor = at::empty(norm.sizes(), self.options()); id device = MPSDevice::getInstance()->device(); id normBuffer = getMTLBufferStorage(norm); id factorBuffer = getMTLBufferStorage(factor); std::string key = "renorm_" + scalarToMetalTypeString(self); MPSStream* mpsStream = getCurrentMPSStream(); id computeEncoder = mpsStream->commandEncoder(); id renormPSO = lib.getPipelineStateForFunc(key); dispatch_sync(mpsStream->queue(), ^() { @autoreleasepool { // this function call is a no-op if MPSProfiler is not enabled getMPSProfiler().beginProfileKernel(renormPSO, key, {norm}); [computeEncoder setComputePipelineState:renormPSO]; mtl_setArgs(computeEncoder, norm, factor, maxnorm.to()); mtl_dispatch1DJob(computeEncoder, renormPSO, norm.numel()); getMPSProfiler().endProfileKernel(renormPSO); } }); at::mul_outf(self, factor, const_cast(out)); } } // namespace TORCH_IMPL_FUNC(renorm_out_mps) (const Tensor& self, const Scalar& p, int64_t dim, const Scalar& maxnorm, const Tensor& out) { renorm_out_mps(self, p, dim, maxnorm, out); } } // namespace at::native