// Copyright 2020 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include #include #include #include #include #include #include "xnnpack.h" #include "xnnpack/allocation-type.h" #include "xnnpack/common.h" #include "xnnpack/config-types.h" #include "xnnpack/config.h" #include "xnnpack/datatype.h" #include "xnnpack/log.h" #include "xnnpack/math.h" #include "xnnpack/packq.h" #include "xnnpack/params.h" #include "xnnpack/subgraph.h" static void set_allocation_type(struct xnn_value* value) { if (value->data != NULL) { value->allocation_type = xnn_allocation_type_static; } else if ((value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0) { value->allocation_type = xnn_allocation_type_external; } else if ((value->flags & XNN_VALUE_FLAG_PERSISTENT) != 0) { value->allocation_type = xnn_allocation_type_persistent; } else { value->allocation_type = xnn_allocation_type_workspace; } } static void set_shape(struct xnn_value* value, size_t num_dims, const size_t* dims) { value->shape.num_dims = num_dims; if (num_dims != 0) { memcpy(value->shape.dim, dims, num_dims * sizeof(size_t)); } } static enum xnn_status check_zero_point( enum xnn_datatype datatype, int32_t zero_point) { switch (datatype) { case xnn_datatype_qcint4: case xnn_datatype_qbint4: if (zero_point < 0 || zero_point > 15) { xnn_log_error( "failed to create Quantized Dense Tensor value: invalid zero point %" PRId32" outside the [0, 15] range", zero_point); return xnn_status_invalid_parameter; } break; case xnn_datatype_qcint8: case xnn_datatype_qint8: if ((int32_t) (int8_t) zero_point != zero_point) { xnn_log_error( "failed to create Quantized Dense Tensor value: invalid zero point %" PRId32" outside the [-128, 127] range", zero_point); return xnn_status_invalid_parameter; } break; case xnn_datatype_quint8: if ((int32_t) (uint8_t) zero_point != zero_point) { xnn_log_error( "failed to create Quantized Dense Tensor value: invalid zero point %" PRId32" outside the [0, 255] range", zero_point); return xnn_status_invalid_parameter; } break; case xnn_datatype_qcint32: case xnn_datatype_qint32: if (zero_point != 0) { xnn_log_error( "failed to create Quantized Dense Tensor value: invalid non-zero zero point %" PRId32, zero_point); return xnn_status_invalid_parameter; } break; default: xnn_log_error("failed to create Quantized Dense Tensor value: unsupported datatype %s (%d)", xnn_datatype_to_string(datatype), datatype); return xnn_status_unsupported_parameter; } return xnn_status_success; } enum xnn_status xnn_define_tensor_value( xnn_subgraph_t subgraph, enum xnn_datatype datatype, size_t num_dims, const size_t* dims, const void* data, uint32_t external_id, uint32_t flags, uint32_t* id_out) { if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { xnn_log_error("failed to create Dense Tensor value: XNNPACK is not initialized"); return xnn_status_uninitialized; } if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { xnn_log_error( "failed to create Dense Tensor value: " "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", external_id, subgraph->external_value_ids); return xnn_status_invalid_parameter; } if (num_dims > XNN_MAX_TENSOR_DIMS) { xnn_log_error("failed to create Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", XNN_MAX_TENSOR_DIMS); return xnn_status_unsupported_parameter; } switch (datatype) { case xnn_datatype_fp32: case xnn_datatype_fp16: case xnn_datatype_bf16: case xnn_datatype_int32: case xnn_datatype_pfp32: // TODO: Does this really belong here? break; default: xnn_log_error("failed to create Dense Tensor value: unsupported datatype %s (%d)", xnn_datatype_to_string(datatype), datatype); return xnn_status_unsupported_parameter; } struct xnn_value* value = subgraph->values + external_id; if (external_id == XNN_INVALID_VALUE_ID) { value = xnn_subgraph_new_internal_value(subgraph); if (value == NULL) { return xnn_status_out_of_memory; } } value->type = xnn_value_type_dense_tensor; value->datatype = datatype; set_shape(value, num_dims, dims); value->size = xnn_tensor_get_size_by_id(subgraph, value->id); value->flags = flags; value->data = (void*) (uintptr_t) data; set_allocation_type(value); *id_out = value->id; return xnn_status_success; } enum xnn_status xnn_define_quantized_tensor_value( xnn_subgraph_t subgraph, enum xnn_datatype datatype, int32_t zero_point, float scale, size_t num_dims, const size_t* dims, const void* data, uint32_t external_id, uint32_t flags, uint32_t* id_out) { if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { xnn_log_error("failed to create Quantized Dense Tensor value: XNNPACK is not initialized"); return xnn_status_uninitialized; } if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { xnn_log_error( "failed to create Quantized Dense Tensor value: " "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", external_id, subgraph->external_value_ids); return xnn_status_invalid_parameter; } enum xnn_status status = xnn_validate_quantized_tensor( datatype, zero_point, scale, num_dims, dims); if (status != xnn_status_success) { return status; } struct xnn_value* value = subgraph->values + external_id; if (external_id == XNN_INVALID_VALUE_ID) { value = xnn_subgraph_new_internal_value(subgraph); if (value == NULL) { return xnn_status_out_of_memory; } } value->type = xnn_value_type_dense_tensor; value->datatype = datatype; value->quantization.zero_point = zero_point; value->quantization.scale = scale; set_shape(value, num_dims, dims); value->size = xnn_tensor_get_size_by_id(subgraph, value->id); value->flags = flags; value->data = (void*) (uintptr_t) data; set_allocation_type(value); *id_out = value->id; return xnn_status_success; } enum xnn_status xnn_define_dynamically_quantized_tensor_value( xnn_subgraph_t subgraph, enum xnn_datatype datatype, size_t num_dims, size_t num_nonbatch_dims, const size_t* dims, uint32_t external_id, uint32_t flags, uint32_t* id_out) { if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { xnn_log_error("failed to create Dynamically Quantized Dense Tensor value: XNNPACK is not initialized"); return xnn_status_uninitialized; } if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { xnn_log_error( "failed to create Dynamically Quantized Dense Tensor value: " "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", external_id, subgraph->external_value_ids); return xnn_status_invalid_parameter; } if (num_dims > XNN_MAX_TENSOR_DIMS) { xnn_log_error( "failed to create Dynamically Quantized Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", XNN_MAX_TENSOR_DIMS); return xnn_status_unsupported_parameter; } if (num_nonbatch_dims > num_dims) { xnn_log_error( "failed to create Dynamically Quantized Dense Tensor value: " "non batch dimensions %zu is greater than number of dimensions %zu", num_nonbatch_dims, num_dims); return xnn_status_invalid_parameter; } switch (datatype) { case xnn_datatype_qdint8: case xnn_datatype_qpint8: break; default: xnn_log_error("failed to create Dynamically Quantized Dense Tensor value: unsupported datatype %s (%d)", xnn_datatype_to_string(datatype), datatype); return xnn_status_unsupported_parameter; } if ((flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0) { xnn_log_error( "failed to create Dynamically Quantized Dense Tensor value: " "external dynamically quantized tensors are not supported."); return xnn_status_unsupported_parameter; } struct xnn_value* value = subgraph->values + external_id; if (external_id == XNN_INVALID_VALUE_ID) { value = xnn_subgraph_new_internal_value(subgraph); if (value == NULL) { return xnn_status_out_of_memory; } } value->type = xnn_value_type_dense_tensor; value->datatype = datatype; value->quantization.num_nonbatch_dims = num_nonbatch_dims; set_shape(value, num_dims, dims); value->size = xnn_tensor_get_size_by_id(subgraph, value->id); value->quantization.dynamic_params_size = xnn_tensor_get_dynamic_quant_param_size(value); value->flags = flags; value->data = NULL; set_allocation_type(value); *id_out = value->id; return xnn_status_success; } enum xnn_status xnn_define_channelwise_quantized_tensor_value( xnn_subgraph_t subgraph, enum xnn_datatype datatype, const float* scale, size_t num_dims, size_t channel_dim, const size_t* dims, const void* data, uint32_t external_id, uint32_t flags, uint32_t* id_out) { return xnn_define_channelwise_quantized_tensor_value_v2( subgraph, datatype, /*zero_point=*/0, scale, num_dims, channel_dim, dims, data, external_id, flags, id_out); } enum xnn_status xnn_validate_quantized_tensor( enum xnn_datatype datatype, int32_t zero_point, float scale, size_t num_dims, const size_t* dims) { if (num_dims > XNN_MAX_TENSOR_DIMS) { xnn_log_error( "failed to create Quantized Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", XNN_MAX_TENSOR_DIMS); return xnn_status_unsupported_parameter; } enum xnn_status status = check_zero_point(datatype, zero_point); if (status != xnn_status_success) { return status; } if (scale <= 0.0f || !isnormal(scale)) { xnn_log_error( "failed to create Quantized Dense Tensor value with %.7g scale: scale must be finite, normalized, and positive", scale); return xnn_status_invalid_parameter; } return xnn_status_success; } enum xnn_status xnn_validate_channelwise_quantized_tensor( enum xnn_datatype datatype, int32_t zero_point, const float* scale, size_t num_dims, size_t channel_dim, const size_t* dims) { if (num_dims == 0) { xnn_log_error( "failed to create Channelwise Quantized Dense Tensor value: no channel dimension exists"); return xnn_status_invalid_parameter; } if (num_dims > XNN_MAX_TENSOR_DIMS) { xnn_log_error( "failed to create Channelwise Quantized Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", XNN_MAX_TENSOR_DIMS); return xnn_status_unsupported_parameter; } if (channel_dim >= num_dims) { xnn_log_error( "failed to create Channelwise Quantized Dense Tensor value: " "channel dimension index %zu is out of range for %zu-dimensional tensor", channel_dim, num_dims); return xnn_status_invalid_parameter; } enum xnn_status status = check_zero_point(datatype, zero_point); if (status != xnn_status_success) { return status; } switch (datatype) { case xnn_datatype_qcint4: case xnn_datatype_qcint8: case xnn_datatype_qcint32: break; default: xnn_log_error("failed to create Channelwise Quantized Dense Tensor value: unsupported datatype %s (%d)", xnn_datatype_to_string(datatype), datatype); return xnn_status_unsupported_parameter; } const size_t channels = dims[channel_dim]; for (size_t channel = 0; channel < channels; channel++) { if (scale[channel] <= 0.0f || !isnormal(scale[channel])) { xnn_log_error( "failed to create Channelwise Quantized Dense Tensor value with %.7g scale in channel #%zu: " "scale must be finite, normalized, and positive", scale[channel], channel); return xnn_status_invalid_parameter; } } return xnn_status_success; } enum xnn_status xnn_define_channelwise_quantized_tensor_value_v2( xnn_subgraph_t subgraph, enum xnn_datatype datatype, int32_t zero_point, const float* scale, size_t num_dims, size_t channel_dim, const size_t* dims, const void* data, uint32_t external_id, uint32_t flags, uint32_t* id_out) { if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { xnn_log_error("failed to create Channelwise Quantized Dense Tensor value: XNNPACK is not initialized"); return xnn_status_uninitialized; } if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { xnn_log_error( "failed to create Channelwise Quantized Dense Tensor value: " "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", external_id, subgraph->external_value_ids); return xnn_status_invalid_parameter; } enum xnn_status status = xnn_validate_channelwise_quantized_tensor( datatype, zero_point, scale, num_dims, channel_dim, dims); if (status != xnn_status_success) { return status; } struct xnn_value* value = subgraph->values + external_id; if (external_id == XNN_INVALID_VALUE_ID) { value = xnn_subgraph_new_internal_value(subgraph); if (value == NULL) { return xnn_status_out_of_memory; } } value->type = xnn_value_type_dense_tensor; value->datatype = datatype; value->quantization.zero_point = zero_point; value->quantization.channelwise_scale = scale; value->quantization.channel_dimension = channel_dim; set_shape(value, num_dims, dims); value->size = xnn_tensor_get_size_by_id(subgraph, value->id); value->flags = flags; value->data = (void*) (uintptr_t) data; set_allocation_type(value); *id_out = value->id; return xnn_status_success; } enum xnn_status xnn_define_blockwise_quantized_tensor_value( xnn_subgraph_t subgraph, enum xnn_datatype datatype, int32_t zero_point, const uint16_t* scale, size_t num_dims, size_t channel_dim, size_t block_size, const size_t* dims, const void* data, uint32_t external_id, uint32_t flags, uint32_t* id_out) { if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) { xnn_log_error("failed to create Blockwise Quantized Dense Tensor value: XNNPACK is not initialized"); return xnn_status_uninitialized; } if (external_id != XNN_INVALID_VALUE_ID && external_id >= subgraph->external_value_ids) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value: " "external ID %" PRIu32 " exceeds the number of reserved external IDs in subgraph (%" PRIu32 ")", external_id, subgraph->external_value_ids); return xnn_status_invalid_parameter; } if (num_dims == 0) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value: no channel dimension exists"); return xnn_status_invalid_parameter; } if (num_dims > XNN_MAX_TENSOR_DIMS) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value: num of dimensions exceeds XNNPACK limit (%d)", XNN_MAX_TENSOR_DIMS); return xnn_status_unsupported_parameter; } if (channel_dim >= num_dims) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value: " "channel dimension index %zu is out of range for %zu-dimensional tensor", channel_dim, num_dims); return xnn_status_invalid_parameter; } if (block_size <= 0) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value: " "block size is invalid. Got %zu\n", block_size); } enum xnn_status status = check_zero_point(datatype, zero_point); if (status != xnn_status_success) { return status; } switch (datatype) { case xnn_datatype_qbint4: break; default: xnn_log_error("failed to create Blockwise Quantized Dense Tensor value: unsupported datatype %s (%d)", xnn_datatype_to_string(datatype), datatype); return xnn_status_unsupported_parameter; } const size_t block_count = dims[0] * dims[1] / block_size; for (size_t block = 0; block < block_count; block++) { float float_scale = math_cvt_fp32_bf16(scale[block]); if (float_scale <= 0.0f || !isnormal(float_scale)) { xnn_log_error( "failed to create Blockwise Quantized Dense Tensor value with %.7g scale in block #%zu: " "scale must be finite, normalized, and positive", float_scale, block); return xnn_status_invalid_parameter; } } struct xnn_value* value = subgraph->values + external_id; if (external_id == XNN_INVALID_VALUE_ID) { value = xnn_subgraph_new_internal_value(subgraph); if (value == NULL) { return xnn_status_out_of_memory; } } value->type = xnn_value_type_dense_tensor; value->datatype = datatype; value->quantization.zero_point = zero_point; value->quantization.blockwise_scale = (const xnn_bfloat16*) scale; value->quantization.channel_dimension_blockwise = channel_dim; value->quantization.block_size = block_size; set_shape(value, num_dims, dims); value->size = xnn_tensor_get_size_by_id(subgraph, value->id); value->flags = flags; value->data = (void*) (uintptr_t) data; set_allocation_type(value); *id_out = value->id; return xnn_status_success; } size_t xnn_shape_multiply_all_dims( const struct xnn_shape shape[restrict XNN_MIN_ELEMENTS(1)]) { size_t batch_size = 1; for (size_t i = 0; i < shape->num_dims; i++) { batch_size *= shape->dim[i]; } return batch_size; } size_t xnn_shape_multiply_batch_dims( const struct xnn_shape shape[restrict XNN_MIN_ELEMENTS(1)], size_t num_nonbatch_dims) { size_t batch_size = 1; for (size_t i = 0; i + num_nonbatch_dims < shape->num_dims; i++) { batch_size *= shape->dim[i]; } return batch_size; } size_t xnn_shape_multiply_non_channel_dims( const struct xnn_shape shape[restrict XNN_MIN_ELEMENTS(1)]) { size_t batch_size = 1; for (size_t i = 0; i + 1 < shape->num_dims; i++) { batch_size *= shape->dim[i]; } return batch_size; } size_t xnn_shape_multiply_leading_dims( const struct xnn_shape shape[restrict XNN_MIN_ELEMENTS(1)], size_t num_leading_dims) { size_t batch_size = 1; for (size_t i = 0; i < num_leading_dims; i++) { batch_size *= shape->dim[i]; } return batch_size; } size_t xnn_shape_multiply_trailing_dims( const struct xnn_shape shape[1], size_t start_dim) { size_t product = 1; for (size_t i = start_dim; i < shape->num_dims; i++) { product *= shape->dim[i]; } return product; } size_t xnn_tensor_get_size(const struct xnn_value* value) { assert(value->type == xnn_value_type_dense_tensor); assert(value->datatype != xnn_datatype_invalid); // Special handling for packed quantized types. if (value->datatype == xnn_datatype_qpint8) { const size_t m = xnn_shape_multiply_batch_dims(&value->shape, 1); const size_t k = value->shape.dim[value->shape.num_dims - 1]; return xnn_x8_packq_f32qp8_gemm_packed_size(m, k); } uint64_t size_bits = xnn_datatype_size_bits(value->datatype); size_bits *= xnn_shape_multiply_all_dims(&value->shape); // Round size up to the nearest byte. // TODO: We should not be using this helper for non-byte-addressable types, // perhaps we should just assert here. return round_up_po2(size_bits, 8) >> 3; } // Return size of the dynamic quantization params in this value size_t xnn_tensor_get_dynamic_quant_param_size(const struct xnn_value* value) { switch (value->datatype) { case xnn_datatype_qdint8: case xnn_datatype_qduint8: { const size_t batch_dims_size = xnn_shape_multiply_batch_dims( &value->shape, value->quantization.num_nonbatch_dims); return batch_dims_size * sizeof(struct xnn_quantization_params); } case xnn_datatype_qpint8: return 0; default: XNN_UNREACHABLE; } } size_t xnn_tensor_get_size_by_id(xnn_subgraph_t subgraph, uint32_t value_id) { assert(value_id < subgraph->num_values); const struct xnn_value* value = subgraph->values + value_id; return xnn_tensor_get_size(value); }