#pragma once #include #include #include #include #include #include #include namespace torch::jit::fuser { struct FusedKernel { AT_DISALLOW_COPY_AND_ASSIGN(FusedKernel); FusedKernel( std::string name, std::string code, std::vector input_desc, std::vector output_desc, std::vector chunk_desc, std::vector concat_desc, bool has_random) : name_(std::move(name)), code_(std::move(code)), input_desc_(std::move(input_desc)), output_desc_(std::move(output_desc)), chunk_desc_(std::move(chunk_desc)), concat_desc_(std::move(concat_desc)), has_random_(has_random) {} virtual ~FusedKernel() = default; // arguments is a list of pointers to the arguments for the compiled CUDA/CPU // code. // The format of arguments is suitable for directly passing to a call to // cuLaunchKernel as the kernel arguments. // Currently the first argument is a pointer to numel (for passing to // CUDA code), and the remainder are pointers to the TensorInfo structs // that compiled code uses to load Tensor data. // launch_with_tensors handles packing at::Tensors into this arguments array. // CPU code uses the same convension so that launch_with_tensors can be // shared. virtual void launch_raw(const uint32_t numel, std::vector& arguments) const = 0; virtual at::Backend backend() const = 0; // Getters const std::string& name() const { return name_; } const std::string& code() const { return code_; } const std::vector& inputDesc() const { return input_desc_; } const std::vector& outputDesc() const { return output_desc_; } const std::vector& chunkDesc() const { return chunk_desc_; } const std::vector& concatDesc() const { return concat_desc_; } bool hasRandom() const { return has_random_; } protected: // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::string name_; // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::string code_; // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::vector input_desc_; // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::vector output_desc_; // same size as input_desc, describes whether an // input should be broken into subtensors (chunks) // to be consumed by the fusion group // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::vector chunk_desc_; // same size as output_desc, describes whether // an output is actually a concatenation of // many subtensors that the fusion group produces // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const std::vector concat_desc_; // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const bool has_random_; }; } // namespace torch::jit::fuser