// Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #pragma once #include "xnnpack.h" namespace models { struct QD8AttentionWeights { std::vector query_data; std::vector query_scale; std::vector key_data; std::vector key_scale; std::vector value_data; std::vector value_scale; std::vector post_proj_data; std::vector post_proj_scale; }; xnn_subgraph_t FP32Attention(size_t b, size_t t, size_t h, size_t n, size_t s); xnn_subgraph_t FP32MobileNetV1(); xnn_subgraph_t FP32MobileNetV2(); xnn_subgraph_t FP32MobileNetV3Large(); xnn_subgraph_t FP32MobileNetV3Small(); xnn_subgraph_t QD8Attention(size_t batch_size, size_t seq_len, size_t embedding_dim, size_t num_heads, size_t head_dim, QD8AttentionWeights &weights); xnn_subgraph_t QS8MobileNetV2(); } // namespace models