Skip to content

Commit

Permalink
Merge branch 'main' of github.com:tum-ei-eda/mlonmcu into main
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilippvK committed Feb 6, 2024
2 parents 3d51c80 + 9d3caef commit ca8c015
Show file tree
Hide file tree
Showing 10 changed files with 1,642 additions and 180 deletions.
1,331 changes: 1,331 additions & 0 deletions ipynb/Examples/TVM-UMA/TVM-UMA.ipynb

Large diffs are not rendered by default.

129 changes: 129 additions & 0 deletions ipynb/Examples/TVM-UMA/environment.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
---
# The MLONMCU_HOME is filled in automatically when creating the environment
home: "{{ home_dir }}"
logging:
level: DEBUG
to_file: false
rotate: false
cleanup:
auto: true
keep: 50
# Default locations for certain directoriescan be changed here
# Non-absolute paths will always be threated relative to the MLONMCU_HOME
paths:
# Where the dependencies are downloaded and installed
deps: deps
# If logging to file is used keep logs in this directory
logs: logs
# Location where reports and artifacts are written to
results: results
# Directory where custom extensions can be integrated
plugins: plugins
# Directory for intermediate build products, should be located on a large enough drive
temp: temp
# A collection of models which will be used to look for models
# The paths will be checked in the order defined here stopping at the first match
# Non-existant paths will be skipped without throwing an error
models:
- "{{ home_dir }}/models"
- "{{ config_dir }}/models"
# Here default clone_urls
repos:
tvm:
url: "https://github.com/tum-ei-eda/tvm.git"
ref: patch_uma_tvmc
etiss:
url: "https://github.com/tum-ei-eda/etiss.git"
ref: 739573d5f12798074bc7d375a46c005f23f59f67
mlif:
url: "https://github.com/tum-ei-eda/mlonmcu-sw.git"
ref: 1bfcf3c9b90dd532011011386f3a08ad20573c09
etiss_accelerator_plugins:
url: "https://github.com/tum-ei-eda/etiss-accelerator-plugins.git"
ref: main
# Here all supported frameworks with their specific features are defined
# Optionally disable unwanted or incomatible backends or features here
# The configured defaults are used if no backend was specified in the command line options
frameworks:
default: tvm
tvm:
enabled: true
backends:
default: tvmaot
tvmrt:
enabled: true
features:
autotuned: true
tvmaot:
enabled: true
features:
unpacked_api: true
usmp: true
autotuned: true
uma_backends: true
tvmaotplus:
enabled: true
features:
autotuned: true
uma_backends: true
tvmllvm:
enabled: true
features:
autotuned: true
features:
cmsisnnbyoc: false
muriscvnnbyoc: false
# Some frontends are experimental and therefore disabled here
# Features like packing are only available in certain environments
frontends:
tflite:
enabled: true
features:
validate: true
relay:
enabled: true
features:
relayviz: true
onnx:
enabled: true
# TODO: saved_model (TF->TFLITE), ipynb (IPYNB->?)
# Some targets/platforms support multiple toolchains
toolchains:
gcc: true
llvm: true
# Platform extend the number of supported targets
platforms:
mlif:
enabled: true
features:
debug: true
validate: true
benchmark: true
microtvm:
enabled: true
features: []
# List of supported targets in the environment
targets:
default: etiss
etiss:
enabled: true
features:
gdbserver: true
etissdbg: true
trace: true
log_instrs: true
vanilla_accelerator: true
# vext: true
# pext: true
host_x86:
enabled: true
features:
gdbserver: true
postprocesses:
use: []
vars:
allow_extensions: false
# tvm.make_tool: "ninja"
runs_per_stage: true
riscv_gcc.dl_url: "https://syncandshare.lrz.de/dl/fiWBtDLWz17RBc1Yd4VDW7/GCC/default/2023.11.27/Ubuntu/20.04/rv32gc_ilp32d.tar.xz"
flags: []
Binary file added ipynb/Examples/TVM-UMA/qnn_model.tflite
Binary file not shown.
11 changes: 11 additions & 0 deletions ipynb/Examples/TVM-UMA/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-r ../../requirements.txt
attrs
cloudpickle
decorator
numpy
psutil
scipy
synr
tornado
typing_extensions
tflite
Binary file added ipynb/Examples/TVM-UMA/umatest.tflite
Binary file not shown.
3 changes: 2 additions & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ tox==3.14.0
pytest==6.2.5
pytest-console-scripts==1.2.1
coverage
black>=22.8.0
black>=24.1.1
pylint
twine
black[jupyter]
Original file line number Diff line number Diff line change
Expand Up @@ -25,64 +25,64 @@ extern "C"
*
*/

typedef struct regs
{
uint32_t ifmap;
uint32_t weights;
uint32_t bias;
uint32_t result;
int32_t oc;
int32_t iw;
int32_t ih;
int32_t ic;
int32_t kh;
int32_t kw;
int32_t i_zp;
int32_t k_zp;
uint32_t control;
uint32_t status;
} regs_t;
// typedef struct regs
// {
// uint32_t ifmap;
// uint32_t weights;
// uint32_t bias;
// uint32_t result;
// int32_t oc;
// int32_t iw;
// int32_t ih;
// int32_t ic;
// int32_t kh;
// int32_t kw;
// int32_t i_zp;
// int32_t k_zp;
// uint32_t control;
// uint32_t status;
// } regs_t;

int32_t q_vanilla_accelerator_conv2dnchw(int8_t* q_vanilla_accelerator_0_i0, int8_t* q_vanilla_accelerator_0_i1, int32_t* bias_data, int32_t* compute,
int32_t oc, int32_t iw, int32_t ih, int32_t ic, int32_t kh, int32_t kw, int32_t i_zp, int32_t k_zp) {




// QVanillaAcceleratorT (with timing) base_adr = 0x70002000,
// for QVanillaAccelerator (w/o timing) replace this file with conv2dnchw1.cc contents or interchange the names!
regs_t *p_regs = (regs_t *)0x70002000;

p_regs->ifmap = (uint32_t) q_vanilla_accelerator_0_i0;
p_regs->weights = (uint32_t) q_vanilla_accelerator_0_i1;
p_regs->bias = (uint32_t) bias_data;
p_regs->result = (uint32_t) compute;

p_regs->oc = oc;
p_regs->iw = iw;
p_regs->ih = ih;
p_regs->ic = ic;
p_regs->kh = kh;
p_regs->kw = kw;
p_regs->i_zp = i_zp;
p_regs->k_zp = k_zp;
p_regs->control = 1; //issue start signal

volatile uint32_t * status_reg = (int32_t*) 0x70000034;

volatile int32_t ready = 0;


while (!ready) {

ready = 0x1 & (p_regs->status);

// printf("ready = %d\n", ready);

}

printf("staus: completed (driver)\n");



return 0;
*(uint32_t**)0x70002000 = (uint32_t*)q_vanilla_accelerator_0_i0;
*(uint32_t**)0x70002004 = (uint32_t*)q_vanilla_accelerator_0_i1;
*(uint32_t**)0x70002008 = (uint32_t*)bias_data;
*(uint32_t**)0x7000200c = (uint32_t*)compute;

*(int32_t*)0x70002010 = oc;
*(int32_t*)0x70002014 = iw;
*(int32_t*)0x70002018 = ih;
*(int32_t*)0x7000201c = ic;
*(int32_t*)0x70002020 = kh;
*(int32_t*)0x70002024 = kw;
*(int32_t*)0x70002028 = i_zp;
*(int32_t*)0x7000202c = k_zp;

//issue start signal
// printf("issue start ...\n");
*(uint32_t*)0x70002030 = 0x00000001;


volatile uint32_t* status_reg = (uint32_t*) 0x70002034;

volatile uint32_t ready = 0;

while (!ready) {

ready = 0x1 & (*status_reg);

// printf("ready = %d\n", ready);

}

// printf("staus: completed (driver)\n");
return 0;
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,25 @@

/*
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
*/
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>


// TODO(mjklaiber): leverage pragma import_c in the future
#ifdef __cplusplus
extern "C"
#endif
Expand All @@ -21,40 +37,61 @@ extern "C"
* \return error code
*
*/

typedef struct regs
{
uint32_t ifmap;
uint32_t weights;
uint32_t result;
uint32_t oc;
uint32_t iw;
uint32_t ih;
uint32_t ic;
uint32_t kh;
uint32_t kw;
uint32_t control;
} regs_t;

int32_t vanilla_accelerator_conv2dnchw(float* ifmap, float* weights, float* result, int32_t oc, int32_t iw, int32_t ih, int32_t ic,
int32_t
vanilla_accelerator_conv2dnchw(float* ifmap, float* weights, float* result, int32_t oc, int32_t iw, int32_t ih, int32_t ic,
int32_t kh, int32_t kw) {

//VanillaAccelerator base_adr: 0x70001000
regs_t *p_regs = (regs_t *)0x70001000; // set the base address of the peripheral, that would come form some hw ip header.
p_regs->ifmap = (uint32_t) ifmap;
p_regs->weights = (uint32_t) weights;
p_regs->result = (uint32_t) result;
int kw_low = kw / 2;
int kh_low = kh / 2;
int kw_high = iw + kw / 2;
int kh_high = ih + kh / 2;

p_regs->oc = oc;
p_regs->iw = iw;
p_regs->ih = ih;
p_regs->ic = ic;
p_regs->kh = kh;
p_regs->kw = kw;
p_regs->control = 1; // last command, to start the operation
int padded_iw = iw + 2 * kw_low;
int padded_ih = ih + 2 * kh_low;

// This is only example code. A real hardware accelerator would call a device specific malloc
// function.
float* pad_temp = (float*)malloc(
(((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));

if (pad_temp == NULL) {
return -1;
}

for (int i1 = 0; i1 < ic; ++i1) {
for (int i2 = 0; i2 < padded_ih; ++i2) {
for (int i3 = 0; i3 < padded_iw; ++i3) {
((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] =
(((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high))
? ifmap[((((i1 * iw * ih) + ((i2 - kh_low) * iw)) + i3 - kw_low))]
: 0.000000e+00f);
}
}
}
for (int i11 = 0; i11 < oc; ++i11) {
for (int i21 = 0; i21 < ih; ++i21) {
for (int i31 = 0; i31 < iw; ++i31) {
for (int i4 = 0; i4 < ic; ++i4) {
for (int i5 = 0; i5 < kh; ++i5) {
for (int i6 = 0; i6 < kw; ++i6) {
int cse_var_1 = (((i11 * iw * ih) + (i21 * iw)) + i31);
if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
result[cse_var_1] = 0.000000e+00f;
}
result[cse_var_1] =
(result[cse_var_1] +
(((float*)
pad_temp)[i4 * padded_iw * padded_ih + (i21 + i5) * padded_iw + i31 + i6] *
weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
}
}
}
}
}
}

// This is only example code. A real hardware accelerator would call a device specific free
// function.
free(pad_temp);
return 0;
}
Loading

0 comments on commit ca8c015

Please sign in to comment.