// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include #include "xnnpack/common.h" #ifdef __cplusplus extern "C" { #endif // When parallelizing GEMMs, try to tile the computation such that we have at // least this many tiles per thread. #define XNN_GEMM_TILES_PER_THREAD 5 // Computes the largest `nc`, the largest multiple of `nr` such that there are // at least five tiles per thread (if `num_threads > 1`). size_t xnn_gemm_best_nc(size_t num_groups, size_t m, size_t n, size_t mr, size_t nr, size_t num_threads); // The total tile size needed to cover kernel_size. XNN_INTERNAL size_t xnn_dwconv_multipass_tile_size( size_t kernel_size, size_t first_pass_tile, size_t middle_pass_tile, size_t last_pass_tile); // The total count of weights (in bytes) needed for multipass dwconv. size_t xnn_dwconv_multipass_weights_size( size_t tile_size, size_t channels, size_t channel_tile, size_t channel_subtile, size_t channel_round, size_t bias_element_size, size_t log2_filter_element_size, size_t extra_weights_byte); // Calculate the number of bytes read. size_t xnn_dwconv_multipass_bytes_read( size_t kernel_size, size_t first_pass_tile, size_t middle_pass_tile, size_t last_pass_tile, size_t channels, size_t channel_tile, size_t channel_subtile, size_t channel_round, size_t log2_input_size, size_t log2_filter_size, size_t bias_element_size, size_t log2_accumulator_size); // Calculate the number of bytes written. size_t xnn_dwconv_multipass_bytes_written( size_t kernel_size, size_t first_pass_tile, size_t middle_pass_tile, size_t last_pass_tile, size_t channels, size_t channel_round, size_t log2_accumulator_size, size_t log2_output_size); #ifdef __cplusplus } #endif