// clang-format off
#pragma once
#include <ATen/Context.h>
#include <ATen/DeviceGuard.h>
#include <ATen/TensorUtils.h>
#include <ATen/TracerMode.h>
#include <ATen/core/Generator.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <c10/core/Scalar.h>
#include <c10/core/Storage.h>
#include <c10/core/TensorOptions.h>
#include <c10/util/Deprecated.h>
#include <c10/util/Optional.h>
// @generated by torchgen/gen_executorch.py from Functions.h

#include <ATen/Functions.h>
#include "CustomOpsNativeFunctions.h"

namespace torch {
namespace executor {


namespace aten {

// aten::add.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & add_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out) {
    return at::add_outf(self, other, alpha, out);
}


// aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & baddbmm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) {
    return at::baddbmm_outf(self, batch1, batch2, beta, alpha, out);
}


// aten::bmm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & bmm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & mat2, at::Tensor & out) {
    return at::bmm_outf(self, mat2, out);
}


// aten::cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & cat_outf(torch::executor::KernelRuntimeContext & context, const at::ITensorListRef & tensors, int64_t dim, at::Tensor & out) {
    return at::cat_outf(tensors, dim, out);
}


// aten::clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & clamp_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const ::std::optional<at::Scalar> & min, const ::std::optional<at::Scalar> & max, at::Tensor & out) {
    return at::clamp_outf(self, min, max, out);
}


// aten::cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & cumsum_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, ::std::optional<at::ScalarType> dtype, at::Tensor & out) {
    return at::cumsum_outf(self, dim, dtype, out);
}


// aten::div.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & div_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::div_outf(self, other, out);
}


// aten::exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & exp_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::exp_outf(self, out);
}


// aten::floor_divide.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & floor_divide_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::floor_divide_outf(self, other, out);
}


// aten::index.Tensor_out(Tensor self, Tensor?[] indices, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & index_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const c10::List<::std::optional<at::Tensor>> & indices, at::Tensor & out) {
    return at::index_outf(self, indices, out);
}


// aten::mean.out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & mean_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional<at::ScalarType> dtype, at::Tensor & out) {
    return at::mean_outf(self, dim, keepdim, dtype, out);
}


// aten::mm.out(Tensor self, Tensor mat2, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & mm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & mat2, at::Tensor & out) {
    return at::mm_outf(self, mat2, out);
}


// aten::mul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & mul_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::mul_outf(self, other, out);
}


// aten::native_batch_norm.out(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, *, Tensor(a!) out, Tensor(b!) save_mean, Tensor(c!) save_invstd) -> (Tensor(a!), Tensor(b!), Tensor(c!))
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> native_batch_norm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, const ::std::optional<at::Tensor> & weight, const ::std::optional<at::Tensor> & bias, const ::std::optional<at::Tensor> & running_mean, const ::std::optional<at::Tensor> & running_var, bool training, double momentum, double eps, at::Tensor & out, at::Tensor & save_mean, at::Tensor & save_invstd) {
    return at::native_batch_norm_outf(input, weight, bias, running_mean, running_var, training, momentum, eps, out, save_mean, save_invstd);
}


// aten::round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & round_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::round_outf(self, out);
}


// aten::gelu.out(Tensor self, *, str approximate='none', Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & gelu_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, c10::string_view approximate, at::Tensor & out) {
    return at::gelu_outf(self, approximate, out);
}


// aten::sigmoid.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & sigmoid_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::sigmoid_outf(self, out);
}


// aten::logit.out(Tensor self, float? eps=None, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & logit_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, ::std::optional<double> eps, at::Tensor & out) {
    return at::logit_outf(self, eps, out);
}


// aten::_softmax.out(Tensor self, int dim, bool half_to_float, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & _softmax_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out) {
    return at::_softmax_outf(self, dim, half_to_float, out);
}


// aten::stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & stack_outf(torch::executor::KernelRuntimeContext & context, at::TensorList tensors, int64_t dim, at::Tensor & out) {
    return at::stack_outf(tensors, dim, out);
}


// aten::sum.IntList_out(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & sum_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional<at::ScalarType> dtype, at::Tensor & out) {
    return at::sum_outf(self, dim, keepdim, dtype, out);
}


// aten::tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & tanh_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::tanh_outf(self, out);
}


// aten::sub.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & sub_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out) {
    return at::sub_outf(self, other, alpha, out);
}


// aten::addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & addmm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) {
    return at::addmm_outf(self, mat1, mat2, beta, alpha, out);
}


// aten::bitwise_and.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & bitwise_and_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::bitwise_and_outf(self, other, out);
}


// aten::ne.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & ne_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
    return at::ne_outf(self, other, out);
}


// aten::eq.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & eq_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
    return at::eq_outf(self, other, out);
}


// aten::eq.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & eq_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::eq_outf(self, other, out);
}


// aten::gt.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & gt_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
    return at::gt_outf(self, other, out);
}


// aten::index_select.out(Tensor self, int dim, Tensor index, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & index_select_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, const at::Tensor & index, at::Tensor & out) {
    return at::index_select_outf(self, dim, index, out);
}


// aten::nonzero.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & nonzero_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::nonzero_outf(self, out);
}


// aten::remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & remainder_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & other, at::Tensor & out) {
    return at::remainder_outf(self, other, out);
}


// aten::max.unary_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & max_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::max_outf(self, out);
}


// aten::minimum.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & minimum_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & other, at::Tensor & out) {
    return at::minimum_outf(self, other, out);
}


// aten::sort.values(Tensor self, int dim=-1, bool descending=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &> sort_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, bool descending, at::Tensor & values, at::Tensor & indices) {
    return at::sort_outf(self, dim, descending, values, indices);
}


// aten::topk.values(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &> topk_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices) {
    return at::topk_outf(self, k, dim, largest, sorted, values, indices);
}


// aten::leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & leaky_relu_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out) {
    return at::leaky_relu_outf(self, negative_slope, out);
}


// aten::softplus.out(Tensor self, Scalar beta=1, Scalar threshold=20, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & softplus_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold, at::Tensor & out) {
    return at::softplus_outf(self, beta, threshold, out);
}


// aten::avg_pool2d.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & avg_pool2d_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional<int64_t> divisor_override, at::Tensor & out) {
    return at::avg_pool2d_outf(self, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override, out);
}


// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!))
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &> max_pool2d_with_indices_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices) {
    return at::max_pool2d_with_indices_outf(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices);
}


// aten::upsample_nearest2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & upsample_nearest2d_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional<double> scales_h, ::std::optional<double> scales_w, at::Tensor & out) {
    return at::upsample_nearest2d_outf(self, output_size, scales_h, scales_w, out);
}


// aten::linalg_inv_ex.inverse(Tensor A, *, bool check_errors=False, Tensor(a!) inverse, Tensor(b!) info) -> (Tensor(a!) inverse, Tensor(b!) info)
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &> linalg_inv_ex_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & A, bool check_errors, at::Tensor & inverse, at::Tensor & info) {
    return at::linalg_inv_ex_outf(A, check_errors, inverse, info);
}


// aten::unbind_copy.int_out(Tensor self, int dim=0, *, Tensor(a!)[] out) -> ()
TORCH_API inline void unbind_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, at::TensorList out) {
    return at::unbind_copy_outf(self, dim, out);
}


// aten::split_copy.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()
TORCH_API inline void split_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t split_size, int64_t dim, at::TensorList out) {
    return at::split_copy_outf(self, split_size, dim, out);
}


// aten::split_with_sizes_copy.out(Tensor self, SymInt[] split_sizes, int dim=0, *, Tensor(a!)[] out) -> ()
TORCH_API inline void split_with_sizes_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim, at::TensorList out) {
    return at::split_with_sizes_copy_outf(self, split_sizes, dim, out);
}


// aten::constant_pad_nd.out(Tensor self, SymInt[] pad, Scalar value=0, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & constant_pad_nd_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value, at::Tensor & out) {
    return at::constant_pad_nd_outf(self, pad, value, out);
}


// aten::convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & convolution_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, const at::Tensor & weight, const ::std::optional<at::Tensor> & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups, at::Tensor & out) {
    return at::convolution_outf(input, weight, bias, stride, padding, dilation, transposed, output_padding, groups, out);
}


// aten::embedding.out(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & embedding_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse, at::Tensor & out) {
    return at::embedding_outf(weight, indices, padding_idx, scale_grad_by_freq, sparse, out);
}


// aten::grid_sampler_2d.out(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & grid_sampler_2d_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out) {
    return at::grid_sampler_2d_outf(input, grid, interpolation_mode, padding_mode, align_corners, out);
}


// aten::index_put.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & index_put_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const c10::List<::std::optional<at::Tensor>> & indices, const at::Tensor & values, bool accumulate, at::Tensor & out) {
    return at::index_put_outf(self, indices, values, accumulate, out);
}


// aten::native_layer_norm.out(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> native_layer_norm_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional<at::Tensor> & weight, const ::std::optional<at::Tensor> & bias, double eps, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) {
    return at::native_layer_norm_outf(input, normalized_shape, weight, bias, eps, out0, out1, out2);
}


// aten::pixel_shuffle.out(Tensor self, int upscale_factor, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & pixel_shuffle_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t upscale_factor, at::Tensor & out) {
    return at::pixel_shuffle_outf(self, upscale_factor, out);
}


// aten::repeat.out(Tensor self, SymInt[] repeats, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & repeat_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef repeats, at::Tensor & out) {
    return at::repeat_outf(self, repeats, out);
}


// aten::relu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & relu_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::relu_outf(self, out);
}


// aten::unsafe_split.Tensor_out(Tensor self, SymInt split_size, int dim=0, *, Tensor(a!)[] out) -> ()
TORCH_API inline void unsafe_split_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t split_size, int64_t dim, at::TensorList out) {
    return at::unsafe_split_outf(self, split_size, dim, out);
}


// aten::_unique2.out(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &,at::Tensor &> _unique2_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, bool sorted, bool return_inverse, bool return_counts, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2) {
    return at::_unique2_outf(self, sorted, return_inverse, return_counts, out0, out1, out2);
}


// aten::zeros_like.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & zeros_like_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, ::std::optional<at::MemoryFormat> memory_format, at::Tensor & out) {
    return at::zeros_like_outf(self, memory_format, out);
}


// aten::clone.out(Tensor self, *, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & clone_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, ::std::optional<at::MemoryFormat> memory_format, at::Tensor & out) {
    return at::clone_outf(self, memory_format, out);
}


// aten::rsub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & rsub_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out) {
    return at::rsub_outf(self, other, alpha, out);
}


// aten::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams.out(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max, *, Tensor(a!) out0, Tensor(b!) out1) -> (Tensor(a!), Tensor(b!))
TORCH_API inline ::std::tuple<at::Tensor &,at::Tensor &> _fake_quantize_per_tensor_affine_cachemask_tensor_qparams_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, const at::Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max, at::Tensor & out0, at::Tensor & out1) {
    return at::_fake_quantize_per_tensor_affine_cachemask_tensor_qparams_outf(self, scale, zero_point, fake_quant_enabled, quant_min, quant_max, out0, out1);
}


// aten::_to_copy.out(Tensor self, *, bool non_blocking=False, MemoryFormat? memory_format=None, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & _to_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, bool non_blocking, ::std::optional<at::MemoryFormat> memory_format, at::Tensor & out) {
    return at::_to_copy_outf(self, non_blocking, memory_format, out);
}


// aten::masked_fill.Scalar_out(Tensor self, Tensor mask, Scalar value, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & masked_fill_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, const at::Tensor & mask, const at::Scalar & value, at::Tensor & out) {
    return at::masked_fill_outf(self, mask, value, out);
}


// aten::upsample_bilinear2d.vec_out(Tensor input, SymInt[]? output_size, bool align_corners, float[]? scale_factors, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & upsample_bilinear2d_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional<at::ArrayRef<double>> scale_factors, at::Tensor & out) {
    return at::upsample_bilinear2d_outf(input, output_size, align_corners, scale_factors, out);
}


// aten::upsample_nearest2d.vec_out(Tensor input, SymInt[]? output_size, float[]? scale_factors, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & upsample_nearest2d_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional<at::ArrayRef<double>> scale_factors, at::Tensor & out) {
    return at::upsample_nearest2d_outf(input, output_size, scale_factors, out);
}


// aten::expand_copy.out(Tensor self, SymInt[] size, *, bool implicit=False, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & expand_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef size, bool implicit, at::Tensor & out) {
    return at::expand_copy_outf(self, size, implicit, out);
}


// aten::permute_copy.out(Tensor self, int[] dims, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & permute_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef dims, at::Tensor & out) {
    return at::permute_copy_outf(self, dims, out);
}


// aten::_reshape_alias_copy.out(Tensor self, SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & _reshape_alias_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, at::Tensor & out) {
    return at::_reshape_alias_copy_outf(self, size, stride, out);
}


// aten::select_copy.int_out(Tensor self, int dim, SymInt index, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & select_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, int64_t index, at::Tensor & out) {
    return at::select_copy_outf(self, dim, index, out);
}


// aten::detach_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & detach_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::Tensor & out) {
    return at::detach_copy_outf(self, out);
}


// aten::slice_copy.Tensor_out(Tensor self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & slice_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, ::std::optional<int64_t> start, ::std::optional<int64_t> end, int64_t step, at::Tensor & out) {
    return at::slice_copy_outf(self, dim, start, end, step, out);
}


// aten::transpose_copy.int_out(Tensor self, int dim0, int dim1, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & transpose_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim0, int64_t dim1, at::Tensor & out) {
    return at::transpose_copy_outf(self, dim0, dim1, out);
}


// aten::unsqueeze_copy.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & unsqueeze_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, int64_t dim, at::Tensor & out) {
    return at::unsqueeze_copy_outf(self, dim, out);
}


// aten::view_copy.out(Tensor self, SymInt[] size, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & view_copy_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & self, at::IntArrayRef size, at::Tensor & out) {
    return at::view_copy_outf(self, size, out);
}

} // namespace aten
        
namespace custom {

// custom::add_3.out(Tensor a, Tensor b, Tensor c, *, Tensor(a!) out) -> Tensor(a!)
TORCH_API inline at::Tensor & add_3_outf(torch::executor::KernelRuntimeContext & context, const at::Tensor & a, const at::Tensor & b, const at::Tensor & c, at::Tensor & out) {
    return ::custom::native::add_3_out(a, b, c, out);
}

} // namespace custom

} // namespace executor
} // namespace torch