Skip to content

Commit

Permalink
Merge pull request #45 from tum-ei-eda/sync
Browse files Browse the repository at this point in the history
Complete Sync with CMSIS-NN
  • Loading branch information
ParkerJones567 authored Jun 24, 2024
2 parents 0021d3f + 3db1e13 commit 76a5065
Show file tree
Hide file tree
Showing 8 changed files with 2,212 additions and 6 deletions.
1 change: 1 addition & 0 deletions Include/CMSIS/NN/Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ extern "C" {
#define arm_convolve_wrapper_s16_get_buffer_size_dsp muriscv_nn_convolve_wrapper_s16_get_buffer_size_dsp
#define arm_convolve_wrapper_s16_get_buffer_size_mve muriscv_nn_convolve_wrapper_s16_get_buffer_size_mve
#define arm_convolve_s4 muriscv_nn_convolve_s4
#define arm_convolve_even_s4 muriscv_nn_convolve_even_s4
#define arm_convolve_s8 muriscv_nn_convolve_s8
#define arm_convolve_s4_get_buffer_size muriscv_nn_convolve_s4_get_buffer_size
#define arm_convolve_s8_get_buffer_size muriscv_nn_convolve_s8_get_buffer_size
Expand Down
46 changes: 44 additions & 2 deletions Include/muriscv_nn_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
* Title: muriscv_nn_functions.h
* Description: Public header file for MURISCV NN Library
*
* $Date: 23 April 2024
* $Revision: V.16.0.0
* $Date: 04 Jun 2024
* $Revision: V.16.1.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -361,6 +361,48 @@ muriscv_nn_status muriscv_nn_convolve_s4(const muriscv_nn_context *ctx,
const int32_t *bias_data,
const muriscv_nn_dims *output_dims,
int8_t *output_data);

/**
* @brief Basic s4 convolution function with a requirement of even number of kernels.
* @param[in, out] ctx Function context that contains the additional buffer if required by the function.
* muriscv_nn_convolve_s4_get_buffer_size will return the buffer_size if required.
* The caller is expected to clear the buffer ,if applicable, for security reasons.
* @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
* Range of conv_params->input_offset : [-127, 128]
* Range of conv_params->output_offset : [-128, 127]
* @param[in] quant_params Per-channel quantization info.
* It contains the multiplier and shift values to be applied to each output channel
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int8
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
* spatial filter dimensions. Note the product must be even.
* @param[in] filter_data Packed Filter data pointer. Data type: int8 packed with 2x int4
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8
*
* @return The function returns <code>MURISCV_NN_SUCCESS</code> if successful or
* <code>MURISCV_NN_ARG_ERROR</code> if incorrect arguments or
* <code>MURISCV_NN_NO_IMPL_ERROR</code> if not for MVE
*
* @details
* 1. Supported framework: TensorFlow Lite micro
* 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
*
*/
muriscv_nn_status muriscv_nn_convolve_even_s4(const muriscv_nn_context *ctx,
const muriscv_nn_conv_params *conv_params,
const muriscv_nn_per_channel_quant_params *quant_params,
const muriscv_nn_dims *input_dims,
const int8_t *input_data,
const muriscv_nn_dims *filter_dims,
const int8_t *filter_data,
const muriscv_nn_dims *bias_dims,
const int32_t *bias_data,
const muriscv_nn_dims *output_dims,
int8_t *output_data);

/**
* @brief Basic s8 convolution function
* @param[in, out] ctx Function context that contains the additional buffer if required by the function.
Expand Down
53 changes: 51 additions & 2 deletions Include/muriscv_nn_support_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
* Title: muriscv_nn_support_functions.h
* Description: Public header file of support functions for MURISCV NN Library
*
* $Date: 30 April 2024
* $Revision: V.22.0.0
* $Date: 27 May 2024
* $Revision: V.22.1.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -604,6 +604,55 @@ muriscv_nn_status muriscv_nn_mat_mult_nt_t_s4(const int8_t *lhs,
const int32_t activation_max,
const int32_t lhs_cols_offset);

/**
* @brief General Matrix-multiplication function with per-channel requantization.
* This function assumes:
* - LHS input matrix NOT transposed (nt)
* - RHS input matrix transposed (t)
* - RHS is int8 packed with 2x int4
* - LHS is int8
* - LHS/RHS input columns must be even numbered
* - LHS must be interleaved. Compare to muriscv_nn_mat_mult_nt_t_s4 where LHS is not interleaved.
*
* @note This operation also performs the broadcast bias addition before the requantization
*
* @param[in] lhs Pointer to the LHS input matrix
* @param[in] rhs Pointer to the RHS input matrix
* @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of
* output columns (or RHS input rows)
* @param[out] dst Pointer to the output matrix with "m" rows and "n" columns
* @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization.
* The length of this vector is equal to the number of output columns (or RHS input
* rows)
* @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length
* of this vector is equal to the number of output columns (or RHS input rows)
* @param[in] lhs_rows Number of LHS input rows
* @param[in] rhs_rows Number of RHS input rows
* @param[in] rhs_cols Number of LHS/RHS input columns. Note this must be even.
* @param[in] lhs_offset Offset to be applied to the LHS input value
* @param[in] dst_offset Offset to be applied the output result
* @param[in] activation_min Minimum value to clamp down the output. Range : int8
* @param[in] activation_max Maximum value to clamp up the output. Range : int8
* @param[in] lhs_cols_offset Column offset between subsequent lhs_rows
*
* @return The function returns <code>MURISCV_NN_SUCCESS</code>
*
*/
muriscv_nn_status muriscv_nn_mat_mult_nt_interleaved_t_even_s4(const int8_t *lhs,
const int8_t *rhs,
const int32_t *bias,
int8_t *dst,
const int32_t *dst_multipliers,
const int32_t *dst_shifts,
const int32_t lhs_rows,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t lhs_offset,
const int32_t dst_offset,
const int32_t activation_min,
const int32_t activation_max,
const int32_t lhs_cols_offset);

/**
* @brief General Matrix-multiplication function with per-channel requantization.
* This function assumes:
Expand Down
1 change: 1 addition & 0 deletions Integration/tvm/setup_tvm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ BUILDS=(mlf mlf_vext mlf_pext)
echo "Download and install TVM sources."
python3 -m venv .venv
source .venv/bin/activate
pip install numpy==1.26.4
pip install apache-tvm
pip install -r requirements.txt
pip install typing-extensions
Expand Down
3 changes: 2 additions & 1 deletion Source/ConvolutionFunctions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,5 @@ target_sources(${MURISCVNN_LIB} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_convolve_wrapper_s4.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_mat_mult_kernel_row_offset_s8_s16.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_mat_mult_kernel_s16.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_convolve_1_x_n_s4.c)
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_convolve_1_x_n_s4.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_convolve_even_s4.c)
231 changes: 231 additions & 0 deletions Source/ConvolutionFunctions/muriscv_nn_convolve_even_s4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
// Modifications copyright (C) 2024 Chair of Electronic Design Automation, TUM
/*
* SPDX-FileCopyrightText: Copyright 2023-2024 Arm Limited and/or its affiliates <[email protected]>
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: muriscv_nn_convolve_even_s4.c
* Description: s8 version of convolution using symmetric quantization with 4 bit weights.
*
* $Date: 05 Jun 2024
* $Revision: V.1.0.0
*
* Target : Arm(R) M-Profile Architecture
*
* -------------------------------------------------------------------- */

#include "muriscv_nn_functions.h"
#include "muriscv_nn_support_functions.h"

/**
* @ingroup Public
*/

/**
* @addtogroup NNConv
* @{
*/

/*
* Basic s8 convolution function with int4 packed RHS (weights) and even RHS columns,
*
* Refer header file for details.
*
*/
muriscv_nn_status muriscv_nn_convolve_even_s4(const muriscv_nn_context *ctx,
const muriscv_nn_conv_params *conv_params,
const muriscv_nn_per_channel_quant_params *quant_params,
const muriscv_nn_dims *input_dims,
const int8_t *input_data,
const muriscv_nn_dims *filter_dims,
const int8_t *packed_filter_data,
const muriscv_nn_dims *bias_dims,
const int32_t *bias_data,
const muriscv_nn_dims *output_dims,
int8_t *output_data)
{
(void)bias_dims;

//#if defined(USE_VEXT)
//
// if (ctx->buf == NULL)
// {
// return MURISCV_NN_ARG_ERROR;
// }
//
// int16_t *buffer_a = (int16_t *)ctx->buf;
//
// const int32_t input_batches = input_dims->n;
// const uint16_t input_x = input_dims->w;
// const uint16_t input_y = input_dims->h;
// const uint16_t input_ch = input_dims->c;
// const uint16_t kernel_x = filter_dims->w;
// const uint16_t kernel_y = filter_dims->h;
// const uint16_t output_x = output_dims->w;
// const uint16_t output_y = output_dims->h;
// const uint16_t output_ch = output_dims->c;
//
// const uint16_t pad_x = conv_params->padding.w;
// const uint16_t pad_y = conv_params->padding.h;
// const uint16_t stride_x = conv_params->stride.w;
// const uint16_t stride_y = conv_params->stride.h;
// const int32_t dilation_x = conv_params->dilation.w;
// const int32_t dilation_y = conv_params->dilation.h;
// const int32_t out_offset = conv_params->output_offset;
// const int32_t out_activation_min = conv_params->activation.min;
// const int32_t out_activation_max = conv_params->activation.max;
// const int32_t rhs_cols = kernel_x * kernel_y * input_ch;
// const int32_t input_offset = conv_params->input_offset;
//
// if (rhs_cols & 0x1)
// {
// return MURISCV_NN_ARG_ERROR;
// }
//
// const int32_t blk_cnt = rhs_cols >> 5;
//
// int32_t *output_mult = quant_params->multiplier;
// int32_t *output_shift = quant_params->shift;
//
// int i_batch;
//
// for (i_batch = 0; i_batch < input_batches; i_batch++)
// {
// /* Generate up to four columns from the input tensor a GEMM computation */
// int8_t *im2col_buf = (int8_t *)buffer_a;
// const int32_t rhs_rows = output_dims->c;
// int8_t *out = output_data;
// int32_t lhs_rows = 0;
//
// /* This part implements the im2col function */
// for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
// {
// for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
// {
// const int32_t base_idx_x = stride_x * i_out_x - pad_x;
// const int32_t base_idx_y = stride_y * i_out_y - pad_y;
//
// for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
// {
// for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
// {
// const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
// const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
//
// if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
// {
// muriscv_nn_memset_s8(im2col_buf, (int8_t)-input_offset, sizeof(int8_t) * input_ch);
// }
// else
// {
// muriscv_nn_memcpy_s8(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch);
// }
// im2col_buf += input_ch;
// }
// }
//
// /* Reformat most of the buffer by interleaving it */
// int8_t *im2col_buf_interleaved = (int8_t *)buffer_a + lhs_rows * rhs_cols;
// for (int j = blk_cnt; j > 0; --j)
// {
// int8x16x2_t x2 = vld2q_s8(im2col_buf_interleaved);
//
// vstrbq_s8(im2col_buf_interleaved, x2.val[1]);
// im2col_buf_interleaved += 16;
//
// vstrbq_s8(im2col_buf_interleaved, x2.val[0]);
// im2col_buf_interleaved += 16;
// }
//
// lhs_rows++;
//
// /* Computation is filed for every 4 columns */
// if (lhs_rows == 4)
// {
// muriscv_nn_mat_mult_nt_interleaved_t_even_s4((int8_t *)buffer_a,
// packed_filter_data,
// bias_data,
// out,
// output_mult,
// output_shift,
// lhs_rows,
// rhs_rows,
// rhs_cols,
// input_offset,
// out_offset,
// out_activation_min,
// out_activation_max,
// rhs_cols);
//
// out += lhs_rows * rhs_rows;
//
// lhs_rows = 0;
// im2col_buf = (int8_t *)buffer_a;
// }
// }
// }
//
// /* Handle left over columns */
// if (lhs_rows != 0)
// {
// muriscv_nn_mat_mult_nt_interleaved_t_even_s4((int8_t *)buffer_a,
// packed_filter_data,
// bias_data,
// out,
// output_mult,
// output_shift,
// lhs_rows,
// rhs_rows,
// rhs_cols,
// input_offset,
// out_offset,
// out_activation_min,
// out_activation_max,
// rhs_cols);
// out += lhs_rows * rhs_rows;
// lhs_rows = 0;
// im2col_buf = (int8_t *)buffer_a;
// }
//
// /* Advance to the next batch */
// input_data += (input_x * input_y * input_ch);
// output_data += (output_x * output_y * output_ch);
// }
//#else
(void)ctx;
(void)conv_params;
(void)quant_params;
(void)input_dims;
(void)input_data;
(void)filter_dims;
(void)packed_filter_data;
(void)bias_data;
(void)output_dims;
(void)output_data;

return MURISCV_NN_NO_IMPL_ERROR;

//#endif // #if defined(USE_VEXT)
//
// /* Return to application */
// return MURISCV_NN_SUCCESS;
}
//
///**
// * @} end of NNConv group
// */
3 changes: 2 additions & 1 deletion Source/NNSupportFunctions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ target_sources(${MURISCVNN_LIB} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_d
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_vec_mat_mul_result_acc_s16.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_lstm_step_s16.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_mat_mul_core_1x_s4.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_depthwise_conv_nt_t_s4.c)
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_depthwise_conv_nt_t_s4.c
${CMAKE_CURRENT_SOURCE_DIR}/muriscv_nn_mat_mult_nt_interleaved_t_even_s4.c)
Loading

0 comments on commit 76a5065

Please sign in to comment.