Merge branch 'main' of github.com:tum-ei-eda/mlonmcu into main

tum-ei-eda · Feb 6, 2024 · ca8c015 · ca8c015
2 parents 3d51c80 + 9d3caef
commit ca8c015
Show file tree

Hide file tree

Showing 10 changed files with 1,642 additions and 180 deletions.
diff --git a/ipynb/Examples/TVM-UMA/TVM-UMA.ipynb b/ipynb/Examples/TVM-UMA/TVM-UMA.ipynb
diff --git a/ipynb/Examples/TVM-UMA/environment.yml.j2 b/ipynb/Examples/TVM-UMA/environment.yml.j2
@@ -0,0 +1,129 @@
+---
+# The MLONMCU_HOME is filled in automatically when creating the environment
+home: "{{ home_dir }}"
+logging:
+  level: DEBUG
+  to_file: false
+  rotate: false
+cleanup:
+  auto: true
+  keep: 50
+# Default locations for certain directoriescan be changed here
+# Non-absolute paths will always be threated relative to the MLONMCU_HOME
+paths:
+  # Where the dependencies are downloaded and installed
+  deps: deps
+  # If logging to file is used keep logs in this directory
+  logs: logs
+  # Location where reports and artifacts are written to
+  results: results
+  # Directory where custom extensions can be integrated
+  plugins: plugins
+  # Directory for intermediate build products, should be located on a large enough drive
+  temp: temp
+  # A collection of models which will be used to look for models
+  # The paths will be checked in the order defined here stopping at the first match
+  # Non-existant paths will be skipped without throwing an error
+  models:
+    - "{{ home_dir }}/models"
+    - "{{ config_dir }}/models"
+# Here default clone_urls
+repos:
+  tvm:
+    url: "https://github.com/tum-ei-eda/tvm.git"
+    ref: patch_uma_tvmc
+  etiss:
+    url: "https://github.com/tum-ei-eda/etiss.git"
+    ref: 739573d5f12798074bc7d375a46c005f23f59f67
+  mlif:
+    url: "https://github.com/tum-ei-eda/mlonmcu-sw.git"
+    ref: 1bfcf3c9b90dd532011011386f3a08ad20573c09
+  etiss_accelerator_plugins:
+    url: "https://github.com/tum-ei-eda/etiss-accelerator-plugins.git"
+    ref: main
+# Here all supported frameworks with their specific features are defined
+# Optionally disable unwanted or incomatible backends or features here
+# The configured defaults are used if no backend was specified in the command line options
+frameworks:
+  default: tvm
+  tvm:
+    enabled: true
+    backends:
+      default: tvmaot
+      tvmrt:
+        enabled: true
+        features:
+          autotuned: true
+      tvmaot:
+        enabled: true
+        features:
+          unpacked_api: true
+          usmp: true
+          autotuned: true
+          uma_backends: true
+      tvmaotplus:
+        enabled: true
+        features:
+          autotuned: true
+          uma_backends: true
+      tvmllvm:
+        enabled: true
+        features:
+          autotuned: true
+    features:
+      cmsisnnbyoc: false
+      muriscvnnbyoc: false
+# Some frontends are experimental and therefore disabled here
+# Features like packing are only available in certain environments
+frontends:
+  tflite:
+    enabled: true
+    features:
+      validate: true
+  relay:
+    enabled: true
+    features:
+      relayviz: true
+  onnx:
+    enabled: true
+  # TODO: saved_model (TF->TFLITE), ipynb (IPYNB->?)
+# Some targets/platforms support multiple toolchains
+toolchains:
+  gcc: true
+  llvm: true
+# Platform extend the number of supported targets
+platforms:
+  mlif:
+    enabled: true
+    features:
+      debug: true
+      validate: true
+      benchmark: true
+  microtvm:
+    enabled: true
+    features: []
+# List of supported targets in the environment
+targets:
+  default: etiss
+  etiss:
+    enabled: true
+    features:
+      gdbserver: true
+      etissdbg: true
+      trace: true
+      log_instrs: true
+      vanilla_accelerator: true
+      # vext: true
+      # pext: true
+  host_x86:
+    enabled: true
+    features:
+      gdbserver: true
+postprocesses:
+  use: []
+vars:
+  allow_extensions: false
+  # tvm.make_tool: "ninja"
+  runs_per_stage: true
+  riscv_gcc.dl_url: "https://syncandshare.lrz.de/dl/fiWBtDLWz17RBc1Yd4VDW7/GCC/default/2023.11.27/Ubuntu/20.04/rv32gc_ilp32d.tar.xz"
+flags: []
diff --git a/ipynb/Examples/TVM-UMA/qnn_model.tflite b/ipynb/Examples/TVM-UMA/qnn_model.tflite
diff --git a/ipynb/Examples/TVM-UMA/requirements.txt b/ipynb/Examples/TVM-UMA/requirements.txt
@@ -0,0 +1,11 @@
+-r ../../requirements.txt
+attrs
+cloudpickle
+decorator
+numpy
+psutil
+scipy
+synr
+tornado
+typing_extensions
+tflite
diff --git a/ipynb/Examples/TVM-UMA/umatest.tflite b/ipynb/Examples/TVM-UMA/umatest.tflite
diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -8,6 +8,7 @@ tox==3.14.0
 pytest==6.2.5
 pytest-console-scripts==1.2.1
 coverage
-black>=22.8.0
+black>=24.1.1
 pylint
 twine
+black[jupyter]
diff --git a/resources/frameworks/tvm/tvmc_extension/QVanilla_Accelerator/conv2dnchw.cc b/resources/frameworks/tvm/tvmc_extension/QVanilla_Accelerator/conv2dnchw.cc
@@ -25,64 +25,64 @@ extern "C"
      *
      */
 
-typedef struct regs
-        {
-            uint32_t ifmap;
-            uint32_t weights;
-            uint32_t bias;
-            uint32_t result;
-            int32_t oc;
-            int32_t iw;
-            int32_t ih;
-            int32_t ic;
-            int32_t kh;
-            int32_t kw;
-            int32_t i_zp;
-            int32_t k_zp;
-            uint32_t control;
-            uint32_t status;
-        } regs_t;
+// typedef struct regs
+//         {
+//             uint32_t ifmap;
+//             uint32_t weights;
+//             uint32_t bias;
+//             uint32_t result;
+//             int32_t oc;
+//             int32_t iw;
+//             int32_t ih;
+//             int32_t ic;
+//             int32_t kh;
+//             int32_t kw;
+//             int32_t i_zp;
+//             int32_t k_zp;
+//             uint32_t control;
+//             uint32_t status;
+//         } regs_t;
 
 int32_t q_vanilla_accelerator_conv2dnchw(int8_t* q_vanilla_accelerator_0_i0, int8_t* q_vanilla_accelerator_0_i1, int32_t* bias_data, int32_t* compute,
                                       int32_t oc, int32_t iw, int32_t ih, int32_t ic, int32_t kh, int32_t kw, int32_t i_zp, int32_t k_zp) {
 
 
 
+
    // QVanillaAcceleratorT (with timing) base_adr = 0x70002000,
    // for QVanillaAccelerator (w/o timing) replace this file with conv2dnchw1.cc contents or interchange the names!
-  regs_t *p_regs = (regs_t *)0x70002000;
-
-  p_regs->ifmap   = (uint32_t) q_vanilla_accelerator_0_i0;
-	p_regs->weights = (uint32_t) q_vanilla_accelerator_0_i1;
-	p_regs->bias    = (uint32_t) bias_data;
-	p_regs->result  = (uint32_t) compute;
-
-	p_regs->oc = oc;
-	p_regs->iw = iw;
-	p_regs->ih = ih;
-	p_regs->ic = ic;
-	p_regs->kh = kh;
-	p_regs->kw = kw;
-	p_regs->i_zp = i_zp;
-	p_regs->k_zp = k_zp;
-	p_regs->control = 1;  //issue start signal
-
-  volatile uint32_t * status_reg = (int32_t*) 0x70000034;
-
-  volatile int32_t ready = 0;
-
-
-  while (!ready) {
-
-    ready = 0x1 & (p_regs->status);
-
-    // printf("ready = %d\n", ready);
-
-  }
-
-  printf("staus: completed (driver)\n");
-
-
 
-  return 0;
+   *(uint32_t**)0x70002000 = (uint32_t*)q_vanilla_accelerator_0_i0;
+   *(uint32_t**)0x70002004 = (uint32_t*)q_vanilla_accelerator_0_i1;
+   *(uint32_t**)0x70002008 = (uint32_t*)bias_data;
+   *(uint32_t**)0x7000200c = (uint32_t*)compute;
+
+   *(int32_t*)0x70002010 = oc;
+   *(int32_t*)0x70002014 = iw;
+   *(int32_t*)0x70002018 = ih;
+   *(int32_t*)0x7000201c = ic;
+   *(int32_t*)0x70002020 = kh;
+   *(int32_t*)0x70002024 = kw;
+   *(int32_t*)0x70002028 = i_zp;
+   *(int32_t*)0x7000202c = k_zp;
+
+   //issue start signal
+   // printf("issue start ...\n");
+   *(uint32_t*)0x70002030 = 0x00000001;
+
+
+   volatile uint32_t* status_reg = (uint32_t*) 0x70002034;
+
+   volatile uint32_t ready = 0;
+
+   while (!ready) {
+
+     ready = 0x1 & (*status_reg);
+
+     // printf("ready = %d\n", ready);
+
+   }
+
+   // printf("staus: completed (driver)\n");
+   return 0;
 }
diff --git a/resources/frameworks/tvm/tvmc_extension/Vanilla_Accelerator/conv2dnchw.cc b/resources/frameworks/tvm/tvmc_extension/Vanilla_Accelerator/conv2dnchw.cc
@@ -1,9 +1,25 @@
-
+/*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+*/
 #include <stdlib.h>
 #include <stdint.h>
-#include <stdio.h>
-
 
+// TODO(mjklaiber): leverage pragma import_c in the future
 #ifdef __cplusplus
 extern "C"
 #endif
@@ -21,40 +37,61 @@ extern "C"
      * \return error code
      *
      */
-
-typedef struct regs
-        {
-            uint32_t ifmap;
-            uint32_t weights;
-            uint32_t result;
-            uint32_t oc;
-            uint32_t iw;
-            uint32_t ih;
-            uint32_t ic;
-            uint32_t kh;
-            uint32_t kw;
-            uint32_t control;
-        } regs_t;
-
-int32_t vanilla_accelerator_conv2dnchw(float* ifmap, float* weights, float* result, int32_t oc, int32_t iw, int32_t ih, int32_t ic,
+    int32_t
+    vanilla_accelerator_conv2dnchw(float* ifmap, float* weights, float* result, int32_t oc, int32_t iw, int32_t ih, int32_t ic,
                         int32_t kh, int32_t kw) {
 
-  //VanillaAccelerator base_adr: 0x70001000
-  regs_t *p_regs  = (regs_t *)0x70001000;  // set the base address of the peripheral, that would come form some hw ip header.
-	p_regs->ifmap   = (uint32_t) ifmap;
-	p_regs->weights = (uint32_t) weights;
-	p_regs->result  = (uint32_t) result;
+  int kw_low = kw / 2;
+  int kh_low = kh / 2;
+  int kw_high = iw + kw / 2;
+  int kh_high = ih + kh / 2;
 
-	p_regs->oc = oc;
-	p_regs->iw = iw;
-	p_regs->ih = ih;
-	p_regs->ic = ic;
-	p_regs->kh = kh;
-	p_regs->kw = kw;
-	p_regs->control = 1;  // last command, to start the operation
+  int padded_iw = iw + 2 * kw_low;
+  int padded_ih = ih + 2 * kh_low;
 
+  // This is only example code. A real hardware accelerator would call a device specific malloc
+  // function.
+  float* pad_temp = (float*)malloc(
+      (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * sizeof(float));
 
+  if (pad_temp == NULL) {
+    return -1;
+  }
 
+  for (int i1 = 0; i1 < ic; ++i1) {
+    for (int i2 = 0; i2 < padded_ih; ++i2) {
+      for (int i3 = 0; i3 < padded_iw; ++i3) {
+        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) + i3)] =
+            (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < kw_high))
+                 ? ifmap[((((i1 * iw * ih) + ((i2 - kh_low) * iw)) + i3 - kw_low))]
+                 : 0.000000e+00f);
+      }
+    }
+  }
+  for (int i11 = 0; i11 < oc; ++i11) {
+    for (int i21 = 0; i21 < ih; ++i21) {
+      for (int i31 = 0; i31 < iw; ++i31) {
+        for (int i4 = 0; i4 < ic; ++i4) {
+          for (int i5 = 0; i5 < kh; ++i5) {
+            for (int i6 = 0; i6 < kw; ++i6) {
+              int cse_var_1 = (((i11 * iw * ih) + (i21 * iw)) + i31);
+              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
+                result[cse_var_1] = 0.000000e+00f;
+              }
+              result[cse_var_1] =
+                  (result[cse_var_1] +
+                   (((float*)
+                         pad_temp)[i4 * padded_iw * padded_ih + (i21 + i5) * padded_iw + i31 + i6] *
+                    weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * kw)) + i6)]));
+            }
+          }
+        }
+      }
+    }
+  }
 
+  // This is only example code. A real hardware accelerator would call a device specific free
+  // function.
+  free(pad_temp);
   return 0;
 }