From 062d455a05f777c86e6867a213d95a1dd251b3d4 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 06:13:51 +0800
Subject: [PATCH 01/10] [onni] Refactor Interpreter.

---
 tools/onni/CMakeLists.txt      |   2 +-
 tools/onni/InterpreterPass.cpp | 131 +++++++++++++++++++++++++++++++++
 tools/onni/InterpreterPass.h   |  46 ++++++++++++
 tools/onni/Makefile.am         |   3 +-
 tools/onni/ONNIApp.cpp         |  94 ++---------------------
 tools/onni/onnc-runtime.h      |  83 ---------------------
 6 files changed, 186 insertions(+), 173 deletions(-)
 create mode 100644 tools/onni/InterpreterPass.cpp
 create mode 100644 tools/onni/InterpreterPass.h
 delete mode 100644 tools/onni/onnc-runtime.h
diff --git a/tools/onni/CMakeLists.txt b/tools/onni/CMakeLists.txt
index c2ac954da..539f14bf0 100644
--- a/tools/onni/CMakeLists.txt
+++ b/tools/onni/CMakeLists.txt
@@ -1,7 +1,7 @@
 
 include_directories(${ONNC_INCLUDE_DIRS})
 
-add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp)
+add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp InterpreterPass.cpp)
 target_link_libraries(onni libonnc)
 
 install(TARGETS onni
diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
new file mode 100644
index 000000000..565244552
--- /dev/null
+++ b/tools/onni/InterpreterPass.cpp
@@ -0,0 +1,131 @@
+//===- InterpreterPass.cpp ------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "InterpreterPass.h"
+
+#include "Interpreter.h"
+
+#include <onnc/IR/Compute/Tensor.h>
+#include <onnc/IR/Compute/OutputOperator.h>
+#include <onnc/Support/Casting.h>
+#include <onnc/Support/IOStream.h>
+
+#include <cassert>
+#include <algorithm>
+
+#define restrict __restrict__
+extern "C" {
+#include <onnc/Runtime/onnc-runtime.h>
+}
+#undef restrict
+
+using namespace onnc;
+
+//===----------------------------------------------------------------------===//
+// InterpreterPass
+//===----------------------------------------------------------------------===//
+InterpreterPass::InterpreterPass(TargetBackend *pBackend,
+                                 char *pInputMem,
+                                 unsigned int pVerbose)
+  : ModulePass(ID),
+    m_pBackend(pBackend), m_pInputMem(pInputMem), m_Verbose(pVerbose) {
+}
+
+Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
+{
+  // TODO: Refactor
+  void *runtime_context = ONNC_RUNTIME_init_runtime();
+
+  Interpreter interpreter(runtime_context);
+
+  // XXX: Use onnc-runtime to handle memory
+  char *heap = NULL;
+
+  // XXX: Use Pass or something to get internal memory size
+  uint64_t max_size = 0;
+  for (ComputeOperand *co : pModule.getComputeOperands()) {
+    if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
+      Value *v = co->getValue();
+      if (mem->isInput()) {
+        // XXX: Multiple inputs
+        interpreter.m_ATable[v] = m_pInputMem;
+      } else if (mem->isWeight()) {
+        // XXX
+        FloatTensor *t = static_cast<FloatTensor *>(v);
+        interpreter.m_ATable[v] = t->getValues().data();
+      } else {
+        max_size = std::max(max_size, static_cast<uint64_t>(mem->start()) + mem->length());
+      }
+    }
+  }
+
+  // TODO: aligned_alloc after c++17
+  // XXX: Refactor
+  // TODO: posix_memalign(&interpreter.m_mem, backend->......, max_size)
+  int fail = posix_memalign(reinterpret_cast<void **>(&heap), 16, max_size);
+  assert((!fail) && "posix_memalign failed!");
+
+  // Fixup memory address
+  for (ComputeOperand *co : pModule.getComputeOperands()) {
+    if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
+      if (mem->isOutput() || mem->isInternal()) {
+        interpreter.m_ATable[co->getValue()] = heap + mem->start();
+      }
+    }
+  }
+
+  for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+    if (m_Verbose > 0) {
+      cm.print(outs());
+      outs() << std::endl;
+    }
+
+    cm.accept(interpreter);
+  }
+
+  // Hack for that: Due to the wrong ComputeOperand design,
+  //                there is no output ComputeOperand.
+  //                So that I have to use the OutputOperator's input tensor to
+  //                get the real output.
+  for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+    if (OutputOperator *out = dyn_cast<OutputOperator>(&cm)) {
+      for (int i = 0; i < out->getNumOfInputs(); ++i) {
+        Value *v = out->getInput(i);
+        float *output = static_cast<float *>(interpreter.m_ATable[v]);
+
+        Tensor *t = static_cast<Tensor *>(v);
+        size_t size = 1;
+        for (auto i: t->getDimensions()) {
+          size *= i;
+        }
+        outs() << '[';
+        for (size_t i = 0; i < size; ++i) {
+          outs() << std::fixed << output[i] << ", ";
+        }
+        outs() << ']' << std::endl;
+      }
+    }
+  }
+
+  ONNC_RUNTIME_shutdown_runtime(runtime_context);
+
+  // TODO: write output to file
+  free(heap);
+
+  return Pass::kModuleNoChanged;
+}
+
+//===----------------------------------------------------------------------===//
+// Factory method
+//===----------------------------------------------------------------------===//
+char InterpreterPass::ID = 0;
+
+InterpreterPass *onnc::CreateInterpreterPass(TargetBackend *pBackend,
+                                             char *pInputMem,
+                                             unsigned int pVerbose) {
+  return new InterpreterPass(pBackend, pInputMem, pVerbose);
+}
diff --git a/tools/onni/InterpreterPass.h b/tools/onni/InterpreterPass.h
new file mode 100644
index 000000000..da8b22034
--- /dev/null
+++ b/tools/onni/InterpreterPass.h
@@ -0,0 +1,46 @@
+//===- InterpreterPass.h -------------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef ONNC_INTERPRETER_PASS_H
+#define ONNC_INTERPRETER_PASS_H
+#include <onnc/Core/ModulePass.h>
+
+namespace onnc {
+
+class TargetBackend;
+
+// XXX: Experimental
+
+/** \class InterpreterPass
+ *  \brief Run interpreter.
+ */
+class InterpreterPass : public ModulePass
+{
+public:
+  static char ID;
+
+public:
+  InterpreterPass(TargetBackend *pBackend,
+                  char *pInputMem,
+                  unsigned int pVerbose);
+
+  ReturnType runOnModule(Module& pModule) override;
+
+private:
+  TargetBackend *m_pBackend;
+  char *m_pInputMem;
+  unsigned int m_Verbose;
+};
+
+// XXX: Experimental
+InterpreterPass *CreateInterpreterPass(TargetBackend *pBackend,
+                                       char *pInputMem,
+                                       unsigned int pVerbose);
+
+} // namespace of onnc
+
+#endif
diff --git a/tools/onni/Makefile.am b/tools/onni/Makefile.am
index 7bdd3caae..b240a1261 100644
--- a/tools/onni/Makefile.am
+++ b/tools/onni/Makefile.am
@@ -24,7 +24,8 @@ onni_LDADD = @LIBONNC_LIBS@ @SKYPAT_LIBS@
 nodist_onni_SOURCES = main.cpp \
 	ONNIApp.cpp \
 	ONNIConfig.cpp \
-	Interpreter.cpp
+	Interpreter.cpp \
+	InterpreterPass.cpp
 
 if HAVE_PTHREADS
 onni_LDADD += -lpthread
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index fbdfbd4f9..3af8715d9 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "ONNIApp.h"
-#include "Interpreter.h"
+#include "InterpreterPass.h"
+
 #include <cstdlib>
 #include <onnc/Config/ONNX.h>
 #include <onnc/Target/TargetSelect.h>
@@ -15,21 +16,12 @@
 #include <onnc/Target/TargetOptions.h>
 #include <onnc/IRReader/ONNXReader.h>
 #include <onnc/IR/Module.h>
-#include <onnc/IR/Compute/Tensor.h>
-#include <onnc/IR/Compute/OutputOperator.h>
 #include <onnc/IR/ONNXUtils.h>
 #include <onnc/Core/PassManager.h>
 #include <onnc/ADT/Color.h>
 #include <onnc/Support/IOStream.h>
-#include <onnc/Support/Casting.h>
 #include <string>
 #include <fstream>
-#include <algorithm>
-#include <cassert>
-
-extern "C" {
-#include "onnc-runtime.h"
-}
 
 using namespace onnc;
 
@@ -73,17 +65,8 @@ int ONNIApp::run()
   backend->addTensorSel(pm);
   backend->addMemAlloc(pm);
 
-  pm.run(module);
-
-  // TODO: Refactor
-  void *runtime_context = ONNC_RUNTIME_init_runtime();
-
-  Interpreter interpreter(runtime_context);
-
-  // XXX: Use onnc-runtime to handle memory
-  char *input_mem = NULL;
-  char *heap = NULL;
   // FIXME: Use onnc-runtime to handle input
+  char *input_mem = NULL;
   {
     xTensorProto tensor;
     std::ifstream input_fin(options().input().native());
@@ -92,75 +75,10 @@ int ONNIApp::run()
     input_mem = new char[raw_data_str.size()];
     memcpy(input_mem, raw_data_str.data(), raw_data_str.size());
   }
+  pm.add(CreateInterpreterPass(backend, input_mem, options().verbose()));
 
-  // XXX: Refactor
-  uint64_t max_size = 0;
-  for (ComputeOperand *co : module.getComputeOperands()) {
-    if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
-      Value *v = co->getValue();
-      if (mem->isInput()) {
-        // XXX: Multiple inputs
-        interpreter.m_ATable[v] = input_mem;
-      } else if (mem->isWeight()) {
-        // XXX
-        FloatTensor *t = static_cast<FloatTensor *>(v);
-        interpreter.m_ATable[v] = t->getValues().data();
-      } else {
-        max_size = std::max(max_size, static_cast<uint64_t>(mem->start()) + mem->length());
-      }
-    }
-  }
-
-  // TODO: aligned_alloc after c++17
-  // XXX: Refactor
-  // TODO: posix_memalign(&interpreter.m_mem, backend->......, max_size)
-  int fail = posix_memalign(reinterpret_cast<void **>(&heap), 16, max_size);
-  assert((!fail) && "posix_memalign failed!");
-
-  // Fixup memory address
-  for (ComputeOperand *co : module.getComputeOperands()) {
-    if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
-      if (mem->isOutput() || mem->isInternal()) {
-        interpreter.m_ATable[co->getValue()] = heap + mem->start();
-      }
-    }
-  }
-
-  for (ComputeOperator &cm : *module.getRootComputeGraph()) {
-    if (options().verbose() > 0) {
-      cm.print(outs());
-      outs() << std::endl;
-    }
-    cm.accept(interpreter);
-  }
-
-  // Hack for that: Due to the wrong ComputeOperand design,
-  //                there is no output ComputeOperand.
-  //                So that I have to use the OutputOperator's input tensor to
-  //                get the real output.
-  for (ComputeOperator &cm : *module.getRootComputeGraph()) {
-    if (OutputOperator *out = dyn_cast<OutputOperator>(&cm)) {
-      for (int i = 0; i < out->getNumOfInputs(); ++i) {
-        Value *v = out->getInput(i);
-        float *output = static_cast<float *>(interpreter.m_ATable[v]);
-
-        Tensor *t = static_cast<Tensor *>(v);
-        size_t size = 1;
-        for (auto i: t->getDimensions()) {
-          size *= i;
-        }
-        outs() << '[';
-        for (size_t i = 0; i < size; ++i) {
-          outs() << std::fixed << output[i] << ", ";
-        }
-        outs() << ']' << std::endl;
-      }
-    }
-  }
-
-  ONNC_RUNTIME_shutdown_runtime(runtime_context);
-
-  // TODO: write output to file
+  pm.run(module);
 
+  delete input_mem;
   return EXIT_SUCCESS;
 }
diff --git a/tools/onni/onnc-runtime.h b/tools/onni/onnc-runtime.h
deleted file mode 100644
index 25fc2c5de..000000000
--- a/tools/onni/onnc-runtime.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#pragma once
-
-// internal
-#include <stddef.h>
-
-typedef struct ONNC_RUNTIME_Context {
-  void *input_context;
-  void *weight_context;
-  void *output_context;
-  void **mem; /* Deprecated */
-  size_t mem_i; /* Deprecated */
-} Context;
-
-
-typedef struct ONNC_RUNTIME_Tensor_offset TensorOffset;
-typedef struct ONNC_RUNTIME_Tensor_offset_table TensorOffsetTable;
-
-
-#include <stdint.h>
-#include <stdbool.h>
-
-struct ONNC_RUNTIME_Tensor_offset {
-  uint64_t offset; /* Tensor offset */
-  uint64_t size;   /* Size of tensor in bytes */
-};
-
-#define ONNC_RUNTIME_TENSOR_FILE_MAGIC ".TSR"
-
-struct ONNC_RUNTIME_Tensor_offset_table {
-  uint8_t magic[8];                                    /* Tensor File magic number. */
-  uint64_t number_of_tensors;
-  struct ONNC_RUNTIME_Tensor_offset tensor_offsets[];
-};
-
-/**
- * ONNC generated entry point.
- * @param context The ONNC Runtime Context.
- */
-void model_main(void *context);
-
-/**
- * Initialize runtime.
- * @deprecated
- * @return The ONNC Runtime Context, should be passed to every ONNC Runtime functions.
- */
-void *ONNC_RUNTIME_init_runtime() {
-  Context *context = (Context *)calloc(1 , sizeof(Context));
-  // XXX: design!
-  context->mem = (void **)calloc(2048 , sizeof(void *));
-  context->mem_i = 0;
-
-  return context;
-}
-
-/**
- * Shutdown runtime.
- * @deprecated
- * @param onnc_runtime_context The ONNC Runtime Context.
- * @return True if shutdown successfully. False if something wrong.
- */
-bool ONNC_RUNTIME_shutdown_runtime(void *onnc_runtime_context) {
-  if (onnc_runtime_context == NULL) {
-    return true;
-  }
-
-  Context *context = (Context *)onnc_runtime_context;
-  for (size_t i = 0; i < context->mem_i; ++i) {
-    free(context->mem[i]);
-  }
-
-  free(context);
-  return true;
-}
-
-/**
- * Get tensor address from tensor table.
- * @param tensor_table The tensor table start address.
- * @param index Tensor index.
- * @return The memory address of the TensorTable[index].
- */
-void *ONNC_RUNTIME_load_from_tensor_table(void *tensor_table, uint32_t index) {
-  return NULL;
-}

From 28b3ae3f6727fa98793681eea6d12bd0f5fd6361 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 10:28:27 +0800
Subject: [PATCH 02/10] [onni] Refactor & add dry-run flag.

---
 tools/onni/Interpreter.h       |  6 +--
 tools/onni/InterpreterPass.cpp | 97 +++++++++++++++++++++-------------
 tools/onni/InterpreterPass.h   | 11 +++-
 tools/onni/ONNIApp.cpp         |  8 ++-
 tools/onni/ONNIConfig.h        |  5 ++
 tools/onni/main.cpp            | 13 ++++-
 6 files changed, 92 insertions(+), 48 deletions(-)

diff --git a/tools/onni/Interpreter.h b/tools/onni/Interpreter.h
index 25aa94626..df3629f86 100644
--- a/tools/onni/Interpreter.h
+++ b/tools/onni/Interpreter.h
@@ -20,11 +20,10 @@ namespace onnc {
 class Interpreter : public ComputeVisitor
 {
 public:
-  Interpreter(void *pContext) : m_pContext(pContext) {};
-
   // XXX
   typedef std::unordered_map<Value *, void *> AddressTable;
   AddressTable m_ATable;
+  void *m_pContext;
 
   virtual void visit(Abs& pAbs);
   virtual void visit(Acos& pAcos);
@@ -139,9 +138,6 @@ class Interpreter : public ComputeVisitor
   virtual void visit(Scale& pScale);
   virtual void visit(ScaledTanh& pScaledTanh);
   virtual void visit(ThresholdedRelu& pThresholdedRelu);
-
-private:
-  void *m_pContext;
 };
 
 } // namespace of onnc
diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index 565244552..3d78f2219 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -13,6 +13,7 @@
 #include <onnc/IR/Compute/OutputOperator.h>
 #include <onnc/Support/Casting.h>
 #include <onnc/Support/IOStream.h>
+#include <onnc/Support/Timer.h>
 
 #include <cassert>
 #include <algorithm>
@@ -30,63 +31,87 @@ using namespace onnc;
 //===----------------------------------------------------------------------===//
 InterpreterPass::InterpreterPass(TargetBackend *pBackend,
                                  char *pInputMem,
-                                 unsigned int pVerbose)
+                                 unsigned int pVerbose,
+                                 bool pIsDryRun)
   : ModulePass(ID),
-    m_pBackend(pBackend), m_pInputMem(pInputMem), m_Verbose(pVerbose) {
+    m_pBackend(pBackend), m_pInputMem(pInputMem),
+    m_Verbose(pVerbose), m_DryRun(pIsDryRun) {
 }
 
 Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
 {
-  // TODO: Refactor
-  void *runtime_context = ONNC_RUNTIME_init_runtime();
-
-  Interpreter interpreter(runtime_context);
-
-  // XXX: Use onnc-runtime to handle memory
-  char *heap = NULL;
-
   // XXX: Use Pass or something to get internal memory size
-  uint64_t max_size = 0;
+  uint64_t internal_memory_size = 0;
   for (ComputeOperand *co : pModule.getComputeOperands()) {
     if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
       Value *v = co->getValue();
       if (mem->isInput()) {
         // XXX: Multiple inputs
-        interpreter.m_ATable[v] = m_pInputMem;
+        m_Interpreter.m_ATable[v] = m_pInputMem;
       } else if (mem->isWeight()) {
         // XXX
         FloatTensor *t = static_cast<FloatTensor *>(v);
-        interpreter.m_ATable[v] = t->getValues().data();
+        m_Interpreter.m_ATable[v] = t->getValues().data();
       } else {
-        max_size = std::max(max_size, static_cast<uint64_t>(mem->start()) + mem->length());
+        internal_memory_size =
+            std::max(internal_memory_size,
+                     static_cast<uint64_t>(mem->start()) + mem->length());
       }
     }
   }
+  if (m_Verbose >= 1) {
+    errs() << "internal memory: " << internal_memory_size << std::endl;
+  }
 
-  // TODO: aligned_alloc after c++17
-  // XXX: Refactor
-  // TODO: posix_memalign(&interpreter.m_mem, backend->......, max_size)
-  int fail = posix_memalign(reinterpret_cast<void **>(&heap), 16, max_size);
-  assert((!fail) && "posix_memalign failed!");
-
-  // Fixup memory address
-  for (ComputeOperand *co : pModule.getComputeOperands()) {
-    if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
-      if (mem->isOutput() || mem->isInternal()) {
-        interpreter.m_ATable[co->getValue()] = heap + mem->start();
+  if (!m_DryRun) {
+    // XXX: Use onnc-runtime to handle memory
+    char *heap = NULL;
+
+    // TODO: aligned_alloc after c++17
+    // XXX: Refactor into interpreter
+    // TODO: posix_memalign(&interpreter.m_mem,
+    //                      backend->......,
+    //                      internal_memory_size)
+    int fail = posix_memalign(reinterpret_cast<void **>(&heap),
+                              16,
+                              internal_memory_size);
+    assert((!fail) && "posix_memalign failed!");
+
+    // Fixup memory address
+    for (ComputeOperand *co : pModule.getComputeOperands()) {
+      if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
+        if (mem->isOutput() || mem->isInternal()) {
+          m_Interpreter.m_ATable[co->getValue()] = heap + mem->start();
+        }
       }
     }
+
+    Pass::ReturnType r = runInterpreter(pModule);
+
+    // TODO: (use runtime) write output to file
+    free(heap);
+
+    return r;
+  } else {
+    return Pass::kModuleNoChanged;
   }
+}
+
+Pass::ReturnType InterpreterPass::runInterpreter(Module &pModule)
+{
+  // TODO: Refactor into Interpreter
+  m_Interpreter.m_pContext = ONNC_RUNTIME_init_runtime();
 
   for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
-    if (m_Verbose > 0) {
-      cm.print(outs());
-      outs() << std::endl;
-    }
 
-    cm.accept(interpreter);
+    if (m_Verbose >= 2) cm.print(outs());
+
+    cm.accept(m_Interpreter);
+
+    if (m_Verbose >= 2) outs() << std::endl;
   }
 
+
   // Hack for that: Due to the wrong ComputeOperand design,
   //                there is no output ComputeOperand.
   //                So that I have to use the OutputOperator's input tensor to
@@ -95,7 +120,7 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
     if (OutputOperator *out = dyn_cast<OutputOperator>(&cm)) {
       for (int i = 0; i < out->getNumOfInputs(); ++i) {
         Value *v = out->getInput(i);
-        float *output = static_cast<float *>(interpreter.m_ATable[v]);
+        float *output = static_cast<float *>(m_Interpreter.m_ATable[v]);
 
         Tensor *t = static_cast<Tensor *>(v);
         size_t size = 1;
@@ -111,10 +136,7 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
     }
   }
 
-  ONNC_RUNTIME_shutdown_runtime(runtime_context);
-
-  // TODO: write output to file
-  free(heap);
+  ONNC_RUNTIME_shutdown_runtime(m_Interpreter.m_pContext);
 
   return Pass::kModuleNoChanged;
 }
@@ -126,6 +148,7 @@ char InterpreterPass::ID = 0;
 
 InterpreterPass *onnc::CreateInterpreterPass(TargetBackend *pBackend,
                                              char *pInputMem,
-                                             unsigned int pVerbose) {
-  return new InterpreterPass(pBackend, pInputMem, pVerbose);
+                                             unsigned int pVerbose,
+                                             bool pIsDryRun) {
+  return new InterpreterPass(pBackend, pInputMem, pVerbose, pIsDryRun);
 }
diff --git a/tools/onni/InterpreterPass.h b/tools/onni/InterpreterPass.h
index da8b22034..ab36f19ac 100644
--- a/tools/onni/InterpreterPass.h
+++ b/tools/onni/InterpreterPass.h
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 #ifndef ONNC_INTERPRETER_PASS_H
 #define ONNC_INTERPRETER_PASS_H
+#include "Interpreter.h"
 #include <onnc/Core/ModulePass.h>
 
 namespace onnc {
@@ -26,20 +27,26 @@ class InterpreterPass : public ModulePass
 public:
   InterpreterPass(TargetBackend *pBackend,
                   char *pInputMem,
-                  unsigned int pVerbose);
+                  unsigned int pVerbose,
+                  bool pIsDryRun);
 
   ReturnType runOnModule(Module& pModule) override;
 
 private:
+  ReturnType runInterpreter(Module& pModule);
+
   TargetBackend *m_pBackend;
   char *m_pInputMem;
   unsigned int m_Verbose;
+  bool m_DryRun;
+  Interpreter m_Interpreter;
 };
 
 // XXX: Experimental
 InterpreterPass *CreateInterpreterPass(TargetBackend *pBackend,
                                        char *pInputMem,
-                                       unsigned int pVerbose);
+                                       unsigned int pVerbose,
+                                       bool pIsDryRun);
 
 } // namespace of onnc
 
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index 3af8715d9..bd4f88720 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -64,10 +64,13 @@ int ONNIApp::run()
   TargetBackend* backend = target->createBackend(options().target());
   backend->addTensorSel(pm);
   backend->addMemAlloc(pm);
+  if (options().verbose() >= 3) {
+    // TODO: Add statistics
+  }
 
   // FIXME: Use onnc-runtime to handle input
   char *input_mem = NULL;
-  {
+  if (!options().dryRun()) {
     xTensorProto tensor;
     std::ifstream input_fin(options().input().native());
     tensor.ParseFromIstream(&input_fin);
@@ -75,7 +78,8 @@ int ONNIApp::run()
     input_mem = new char[raw_data_str.size()];
     memcpy(input_mem, raw_data_str.data(), raw_data_str.size());
   }
-  pm.add(CreateInterpreterPass(backend, input_mem, options().verbose()));
+  pm.add(CreateInterpreterPass(backend, input_mem,
+                               options().verbose(), options().dryRun()));
 
   pm.run(module);
 
diff --git a/tools/onni/ONNIConfig.h b/tools/onni/ONNIConfig.h
index 211e4f633..7bb83449e 100644
--- a/tools/onni/ONNIConfig.h
+++ b/tools/onni/ONNIConfig.h
@@ -63,6 +63,10 @@ class ONNIConfig
 
   unsigned int verbose() const { return m_Verbose; }
 
+  void setDryRun(bool pIsDryRun) { m_DryRun = pIsDryRun; }
+
+  bool dryRun() const { return m_DryRun; }
+
 private:
   onnc::Path m_Model;
   onnc::Path m_Input;
@@ -71,6 +75,7 @@ class ONNIConfig
   std::string m_Arch;
   onnc::TargetOptions m_TargetOptions;
   unsigned int m_Verbose;
+  bool m_DryRun;
 };
 
 #endif
diff --git a/tools/onni/main.cpp b/tools/onni/main.cpp
index 391f563c8..580de5aa7 100644
--- a/tools/onni/main.cpp
+++ b/tools/onni/main.cpp
@@ -18,7 +18,7 @@ static AboutData g_About("onni",
                          "onni",
                          "0.1.0",
                          AboutLicense::kPrivate,
-                         "ONNI is the interpreter of ONNC");
+                         "[Experimental] ONNI is the interpreter of ONNC");
 
 static cl::opt<Path> OptModel("model", cl::kPositional, cl::kOptional,
     cl::kValueRequired,
@@ -62,9 +62,15 @@ OptQuiet("quiet", cl::kLong, cl::kOptional, cl::kValueDisallowed,
     cl::desc("Set verbose level to 0."),
     cl::about(g_About));
 
+static cl::opt<bool>
+OptDryRun("dry-run", cl::kLong, cl::kOptional, cl::kValueDisallowed,
+    cl::init(false),
+    cl::desc("Do not do the inference, just print statistics."),
+    cl::about(g_About));
+
 static cl::opt<std::string> OptQuadruple("mquadruple", cl::kShort, cl::kOptional,
     cl::kValueRequired, cl::desc("target quadruple"), cl::about(g_About));
-    
+
 static cl::opt<std::string> OptMArch("march", cl::kShort, cl::kOptional,
     cl::kValueRequired, cl::desc("target architecture"), cl::about(g_About));
 
@@ -87,6 +93,9 @@ int main(int pArgc, char* pArgv[])
   if (OptQuiet)
     onni.options().setVerbose(0);
 
+  // --dry-run
+  onni.options().setDryRun(OptDryRun);
+
   // --help
   if (OptHelp) {
     g_About.print(outs(), ONNIConfig::kNormal < onni.options().verbose());

From a169304a3b3f3b7c6c8b83901dc612e5e1135dfe Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 10:28:58 +0800
Subject: [PATCH 03/10] [onni] Print weight memory. Add verbose level 3.

---
 tools/onni/InterpreterPass.cpp | 56 +++++++++++++++++++++++++++++++---
 tools/onni/ONNIApp.cpp         |  2 +-
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index 3d78f2219..2a83e7f22 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -24,6 +24,33 @@ extern "C" {
 }
 #undef restrict
 
+// TODO: ====== REMOVE THIS AFTER REWRITE Support/Timer.h ======
+#include <time.h>
+#if defined(HAVE_SYS_TIMES_H)
+#include <sys/times.h>
+#endif
+#if defined(HAVE_SYS_TIME_H) && defined(ENABLE_GETTIMEOFDAY)
+#include <sys/time.h>
+#endif
+namespace {
+  onnc::Timer::Interval ns() {
+#if defined(HAVE_CLOCK_GETTIME) && defined(ENABLE_CLOCK_GETTIME)
+    struct timespec ts;
+    int r = clock_gettime(CLOCK_MONOTONIC, &ts);
+    return r == -1 ? -1 : ts.tv_sec * 1000000000LL + ts.tv_nsec;
+#elif defined(HAVE_GETTIMEOFDAY) && defined(ENABLE_GETTIMEOFDAY)
+    struct timeval tv;
+    int r = gettimeofday(&tv, NULL);
+    return r == -1 ? -1 : tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000LL);
+#else
+    struct tms tm;
+    clock_t r = times(&tm);
+    return r == -1 ? -1 : r * 1000000000LL / g_ClkTick;
+#endif
+  }
+}
+// ========= REMOVE THIS AFTER REWRITE Support/Timer.h =========
+
 using namespace onnc;
 
 //===----------------------------------------------------------------------===//
@@ -41,6 +68,7 @@ InterpreterPass::InterpreterPass(TargetBackend *pBackend,
 Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
 {
   // XXX: Use Pass or something to get internal memory size
+  uint64_t weight_memory_size = 0;
   uint64_t internal_memory_size = 0;
   for (ComputeOperand *co : pModule.getComputeOperands()) {
     if (ComputeMemOperand *mem = dyn_cast<ComputeMemOperand>(co)) {
@@ -52,6 +80,8 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
         // XXX
         FloatTensor *t = static_cast<FloatTensor *>(v);
         m_Interpreter.m_ATable[v] = t->getValues().data();
+        weight_memory_size +=
+            t->getValues().size() * sizeof(FloatTensor::ValueList::value_type);
       } else {
         internal_memory_size =
             std::max(internal_memory_size,
@@ -60,7 +90,8 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
     }
   }
   if (m_Verbose >= 1) {
-    errs() << "internal memory: " << internal_memory_size << std::endl;
+    outs() << "[v1] weight memory: " << weight_memory_size << std::endl;
+    outs() << "[v1] internal memory: " << internal_memory_size << std::endl;
   }
 
   if (!m_DryRun) {
@@ -102,13 +133,30 @@ Pass::ReturnType InterpreterPass::runInterpreter(Module &pModule)
   // TODO: Refactor into Interpreter
   m_Interpreter.m_pContext = ONNC_RUNTIME_init_runtime();
 
+  Timer::Interval total;
+  // TODO: Timer can not nested. Should rewrite it.
+  if (m_Verbose >= 1) total = ::ns();
   for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+    if (m_Verbose >= 2) {
+      outs() << "[v2] ";
+      cm.print(outs());
+      outs() << std::endl;
+    }
 
-    if (m_Verbose >= 2) cm.print(outs());
+    Timer timer;
 
+    if (m_Verbose >= 3) timer.start();
     cm.accept(m_Interpreter);
-
-    if (m_Verbose >= 2) outs() << std::endl;
+    if (m_Verbose >= 3) {
+      timer.stop();
+      outs() << "[v3] " << cm.name()
+             << " runs in " << timer.interval() << ' ' << timer.unit()
+             << std::endl;
+    }
+  }
+  if (m_Verbose >= 1) {
+    total = ns() - total;
+    outs() << "[v1] total inference time: " << total << " ns" << std::endl;
   }
 
 
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index bd4f88720..69f5e8150 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -65,7 +65,7 @@ int ONNIApp::run()
   backend->addTensorSel(pm);
   backend->addMemAlloc(pm);
   if (options().verbose() >= 3) {
-    // TODO: Add statistics
+    // TODO: Add statistics pass
   }
 
   // FIXME: Use onnc-runtime to handle input

From 1bc71c8f4e2e58617c8bec7981c5d58c8ce3b6a2 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 11:53:16 +0800
Subject: [PATCH 04/10] [onni] Add CountOperatorsPass.

---
 tools/onni/CMakeLists.txt         |  2 +-
 tools/onni/CountOperatorsPass.cpp | 70 +++++++++++++++++++++++++++++++
 tools/onni/CountOperatorsPass.h   | 43 +++++++++++++++++++
 tools/onni/InterpreterPass.cpp    |  2 +-
 tools/onni/InterpreterPass.h      |  2 +-
 tools/onni/Makefile.am            |  1 +
 tools/onni/ONNIApp.cpp            |  4 +-
 7 files changed, 120 insertions(+), 4 deletions(-)
 create mode 100644 tools/onni/CountOperatorsPass.cpp
 create mode 100644 tools/onni/CountOperatorsPass.h

diff --git a/tools/onni/CMakeLists.txt b/tools/onni/CMakeLists.txt
index 539f14bf0..8aeda1909 100644
--- a/tools/onni/CMakeLists.txt
+++ b/tools/onni/CMakeLists.txt
@@ -1,7 +1,7 @@
 
 include_directories(${ONNC_INCLUDE_DIRS})
 
-add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp InterpreterPass.cpp)
+add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp InterpreterPass.cpp CountOperatorsPass.cpp)
 target_link_libraries(onni libonnc)
 
 install(TARGETS onni
diff --git a/tools/onni/CountOperatorsPass.cpp b/tools/onni/CountOperatorsPass.cpp
new file mode 100644
index 000000000..65286ca8e
--- /dev/null
+++ b/tools/onni/CountOperatorsPass.cpp
@@ -0,0 +1,70 @@
+//===- CountOperatorsPass.cpp ---------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "CountOperatorsPass.h"
+
+#include <onnc/IR/ComputeOperator.h>
+#include <onnc/IR/Module.h>
+#include <onnc/Support/IOStream.h>
+
+#include <algorithm>
+#include <iomanip>
+#include <unordered_map>
+
+using namespace onnc;
+
+//===----------------------------------------------------------------------===//
+// CountOperatorsPass
+//===----------------------------------------------------------------------===//
+Pass::ReturnType CountOperatorsPass::runOnModule(Module &pModule)
+{
+  std::unordered_map<std::string, int> count;
+  size_t op_len = 8;
+  uint64_t total = 0;
+
+  for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+    onnc::StringRef name = cm.name(); 
+    count[name] += 1;
+    op_len = std::max(op_len, name.size());
+    ++total;
+  }
+
+  const std::string sep{" |"};
+  size_t count_len = (total > 99999) ? 10 : 5;
+
+  count_len += 1;
+
+  outs() << m_Prefix << std::setw(op_len) << "Operator" << sep
+         << std::setw(count_len) << "Count" << std::endl;
+  outs() << m_Prefix
+         << std::setfill('-')
+         << std::setw(op_len) << '-' << "-+" << std::setw(count_len) << '-'
+         << std::setfill(' ')
+         << std::endl;
+  for (auto c : count) {
+    outs() << m_Prefix << std::setw(op_len) << c.first << sep
+           << std::setw(count_len) << c.second << std::endl;
+  }
+  outs() << m_Prefix
+         << std::setfill('-')
+         << std::setw(op_len) << '-' << "-+" << std::setw(count_len) << '-'
+         << std::setfill(' ')
+         << std::endl;
+  outs() << m_Prefix << std::setw(op_len) << "Total" << sep
+         << std::setw(count_len) << total << std::endl;
+
+  return Pass::kModuleNoChanged;
+}
+
+//===----------------------------------------------------------------------===//
+// Factory method
+//===----------------------------------------------------------------------===//
+char CountOperatorsPass::ID = 0;
+
+CountOperatorsPass *onnc::CreateCountOperatorsPass(const std::string &pPrefix) {
+  return new CountOperatorsPass(pPrefix);
+}
diff --git a/tools/onni/CountOperatorsPass.h b/tools/onni/CountOperatorsPass.h
new file mode 100644
index 000000000..d2c630364
--- /dev/null
+++ b/tools/onni/CountOperatorsPass.h
@@ -0,0 +1,43 @@
+//===- CountOperatorsPass.h ------------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef ONNC_COUNT_OPERATORS_PASS_H
+#define ONNC_COUNT_OPERATORS_PASS_H
+#include <onnc/Core/ModulePass.h>
+
+#include <string>
+
+namespace onnc {
+
+class TargetBackend;
+
+// XXX: Experimental
+
+/** \class CountOperatorsPass
+ *  \brief Count & print Operators count statistics
+ */
+class CountOperatorsPass : public ModulePass
+{
+public:
+  static char ID;
+
+public:
+  CountOperatorsPass(const std::string &pPrefix)
+      : ModulePass(ID), m_Prefix(pPrefix) {}
+
+  ReturnType runOnModule(Module& pModule) override;
+
+private:
+  std::string m_Prefix;
+};
+
+// XXX: Experimental
+CountOperatorsPass *CreateCountOperatorsPass(const std::string &pPrefix);
+
+} // namespace of onnc
+
+#endif
diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index 2a83e7f22..c322b14ed 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -1,4 +1,4 @@
-//===- InterpreterPass.cpp ------------------------------------------===//
+//===- InterpreterPass.cpp ------------------------------------------------===//
 //
 //                             The ONNC Project
 //
diff --git a/tools/onni/InterpreterPass.h b/tools/onni/InterpreterPass.h
index ab36f19ac..741776d90 100644
--- a/tools/onni/InterpreterPass.h
+++ b/tools/onni/InterpreterPass.h
@@ -1,4 +1,4 @@
-//===- InterpreterPass.h -------------------------------------------------===//
+//===- InterpreterPass.h --------------------------------------------------===//
 //
 //                             The ONNC Project
 //
diff --git a/tools/onni/Makefile.am b/tools/onni/Makefile.am
index b240a1261..e82a33153 100644
--- a/tools/onni/Makefile.am
+++ b/tools/onni/Makefile.am
@@ -22,6 +22,7 @@ onni_LDFLAGS = @LIBONNC_LDFLAGS@
 onni_LDADD = @LIBONNC_LIBS@ @SKYPAT_LIBS@
 
 nodist_onni_SOURCES = main.cpp \
+	CountOperatorsPass.cpp \
 	ONNIApp.cpp \
 	ONNIConfig.cpp \
 	Interpreter.cpp \
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index 69f5e8150..fb393c820 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "ONNIApp.h"
+
+#include "CountOperatorsPass.h"
 #include "InterpreterPass.h"
 
 #include <cstdlib>
@@ -65,7 +67,7 @@ int ONNIApp::run()
   backend->addTensorSel(pm);
   backend->addMemAlloc(pm);
   if (options().verbose() >= 3) {
-    // TODO: Add statistics pass
+    pm.add(CreateCountOperatorsPass("[v3] "));
   }
 
   // FIXME: Use onnc-runtime to handle input

From 44ee5192a5009c53a89d49af73e6fe60ce6e2f10 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 12:21:02 +0800
Subject: [PATCH 05/10] [onni] Add OnnxOptPass.

---
 tools/onni/CMakeLists.txt  |  3 ++-
 tools/onni/Makefile.am     |  3 ++-
 tools/onni/ONNIApp.cpp     |  6 +++++
 tools/onni/ONNIConfig.cpp  |  2 +-
 tools/onni/ONNIConfig.h    |  7 ++++-
 tools/onni/OnnxOptPass.cpp | 55 ++++++++++++++++++++++++++++++++++++++
 tools/onni/OnnxOptPass.h   | 36 +++++++++++++++++++++++++
 tools/onni/main.cpp        | 10 +++++++
 8 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 tools/onni/OnnxOptPass.cpp
 create mode 100644 tools/onni/OnnxOptPass.h

diff --git a/tools/onni/CMakeLists.txt b/tools/onni/CMakeLists.txt
index 8aeda1909..77018912e 100644
--- a/tools/onni/CMakeLists.txt
+++ b/tools/onni/CMakeLists.txt
@@ -1,7 +1,8 @@
 
 include_directories(${ONNC_INCLUDE_DIRS})
 
-add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp InterpreterPass.cpp CountOperatorsPass.cpp)
+add_executable(onni main.cpp ONNIApp.cpp ONNIConfig.cpp Interpreter.cpp
+               InterpreterPass.cpp CountOperatorsPass.cpp OnnxOptPass.cpp)
 target_link_libraries(onni libonnc)
 
 install(TARGETS onni
diff --git a/tools/onni/Makefile.am b/tools/onni/Makefile.am
index e82a33153..d91b31af1 100644
--- a/tools/onni/Makefile.am
+++ b/tools/onni/Makefile.am
@@ -26,7 +26,8 @@ nodist_onni_SOURCES = main.cpp \
 	ONNIApp.cpp \
 	ONNIConfig.cpp \
 	Interpreter.cpp \
-	InterpreterPass.cpp
+	InterpreterPass.cpp \
+	OnnxOptPass.cpp
 
 if HAVE_PTHREADS
 onni_LDADD += -lpthread
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index fb393c820..09f3ddebc 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -9,6 +9,7 @@
 
 #include "CountOperatorsPass.h"
 #include "InterpreterPass.h"
+#include "OnnxOptPass.h"
 
 #include <cstdlib>
 #include <onnc/Config/ONNX.h>
@@ -63,6 +64,11 @@ int ONNIApp::run()
   }
 
   PassManager pm;
+
+  if (options().onnxOpt()) {
+    pm.add(CreateOnnxOptPass());
+  }
+
   TargetBackend* backend = target->createBackend(options().target());
   backend->addTensorSel(pm);
   backend->addMemAlloc(pm);
diff --git a/tools/onni/ONNIConfig.cpp b/tools/onni/ONNIConfig.cpp
index 52e217d54..3cb2717ac 100644
--- a/tools/onni/ONNIConfig.cpp
+++ b/tools/onni/ONNIConfig.cpp
@@ -15,7 +15,7 @@ using namespace onnc;
 ONNIConfig::ONNIConfig()
   : m_Model(), m_Input(), m_Output(),
     m_Quadruple(), m_Arch(), m_TargetOptions(),
-    m_Verbose() {
+    m_Verbose(), m_DryRun(), m_OnnxOpt() {
 }
 
 ONNIConfig::~ONNIConfig()
diff --git a/tools/onni/ONNIConfig.h b/tools/onni/ONNIConfig.h
index 7bb83449e..ab11ee02b 100644
--- a/tools/onni/ONNIConfig.h
+++ b/tools/onni/ONNIConfig.h
@@ -63,10 +63,14 @@ class ONNIConfig
 
   unsigned int verbose() const { return m_Verbose; }
 
-  void setDryRun(bool pIsDryRun) { m_DryRun = pIsDryRun; }
+  void setDryRun(bool pDryRun) { m_DryRun = pDryRun; }
 
   bool dryRun() const { return m_DryRun; }
 
+  void setOnnxOpt(bool pIsOnnxOpt) { m_OnnxOpt = pIsOnnxOpt; }
+
+  bool onnxOpt() const { return m_OnnxOpt; }
+
 private:
   onnc::Path m_Model;
   onnc::Path m_Input;
@@ -76,6 +80,7 @@ class ONNIConfig
   onnc::TargetOptions m_TargetOptions;
   unsigned int m_Verbose;
   bool m_DryRun;
+  bool m_OnnxOpt;
 };
 
 #endif
diff --git a/tools/onni/OnnxOptPass.cpp b/tools/onni/OnnxOptPass.cpp
new file mode 100644
index 000000000..5a298b967
--- /dev/null
+++ b/tools/onni/OnnxOptPass.cpp
@@ -0,0 +1,55 @@
+//===- OnnxOptPass.cpp ----------------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "OnnxOptPass.h"
+
+#include <onnc/Config/ONNX.h>
+#include <onnc/IR/ONNXUtils.h>
+#include <onnc/ONNXWrapper/ONNXWrapper.h>
+
+#include <onnx/optimizer/optimize.h>
+
+#include <memory>
+
+
+
+using namespace onnc;
+
+//===----------------------------------------------------------------------===//
+// OnnxOptPass
+//===----------------------------------------------------------------------===//
+Pass::ReturnType OnnxOptPass::runOnModule(Module &pModule)
+{
+  onnxInferShape(pModule);
+
+  xProto mp;
+  onnc::ExportModelProto(mp, pModule);
+  mp = onnx::optimization::Optimize(mp, {
+    "extract_constant_to_initializer",
+    "fuse_add_bias_into_conv",
+    "fuse_bn_into_conv",
+    "fuse_consecutive_squeezes",
+    "fuse_consecutive_transposes",
+    "fuse_transpose_into_gemm",
+    "eliminate_identity",
+    "eliminate_nop_pad",
+    "eliminate_nop_transpose",
+    "eliminate_unused_initializer"
+  });
+  pModule.delegate(xImportModelProto(mp));
+
+  return Pass::kModuleChanged;
+}
+
+//===----------------------------------------------------------------------===//
+// Factory method
+//===----------------------------------------------------------------------===//
+char OnnxOptPass::ID = 0;
+
+OnnxOptPass *onnc::CreateOnnxOptPass() {
+  return new OnnxOptPass();
+}
diff --git a/tools/onni/OnnxOptPass.h b/tools/onni/OnnxOptPass.h
new file mode 100644
index 000000000..56d576d38
--- /dev/null
+++ b/tools/onni/OnnxOptPass.h
@@ -0,0 +1,36 @@
+//===- OnnxOptPass.h ------------------------------------------------------===//
+//
+//                             The ONNC Project
+//
+// See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef ONNC_ONNX_OPT_PASS_H
+#define ONNC_ONNX_OPT_PASS_H
+#include <onnc/Core/ModulePass.h>
+
+namespace onnc {
+
+class TargetBackend;
+
+// XXX: Experimental
+
+/** \class OnnxOptPass
+ *  \brief [ONNX IR] Call ONNX optimizer
+ */
+class OnnxOptPass : public ModulePass
+{
+public:
+  static char ID;
+
+public:
+  OnnxOptPass() : ModulePass(ID) {}
+
+  ReturnType runOnModule(Module& pModule) override;
+};
+
+OnnxOptPass *CreateOnnxOptPass();
+
+} // namespace of onnc
+
+#endif
diff --git a/tools/onni/main.cpp b/tools/onni/main.cpp
index 580de5aa7..96e5a429a 100644
--- a/tools/onni/main.cpp
+++ b/tools/onni/main.cpp
@@ -68,6 +68,13 @@ OptDryRun("dry-run", cl::kLong, cl::kOptional, cl::kValueDisallowed,
     cl::desc("Do not do the inference, just print statistics."),
     cl::about(g_About));
 
+// TODO: General way to enable passes
+static cl::opt<bool>
+OptOnnxOpt("onnx-opt", cl::kLong, cl::kOptional, cl::kValueDisallowed,
+    cl::init(false),
+    cl::desc("Enable onnx optimizer"),
+    cl::about(g_About));
+
 static cl::opt<std::string> OptQuadruple("mquadruple", cl::kShort, cl::kOptional,
     cl::kValueRequired, cl::desc("target quadruple"), cl::about(g_About));
 
@@ -96,6 +103,9 @@ int main(int pArgc, char* pArgv[])
   // --dry-run
   onni.options().setDryRun(OptDryRun);
 
+  // --onnx-optimizer
+  onni.options().setOnnxOpt(OptOnnxOpt);
+
   // --help
   if (OptHelp) {
     g_About.print(outs(), ONNIConfig::kNormal < onni.options().verbose());

From 6850f4af98c74e87aa738c55471c4a04bb8d0699 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 14:52:28 +0800
Subject: [PATCH 06/10] [onni] Add memory statistic, verbose level 4.

---
 tools/onni/InterpreterPass.cpp | 80 +++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 11 deletions(-)

diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index c322b14ed..20aaaf4e9 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -10,13 +10,17 @@
 #include "Interpreter.h"
 
 #include <onnc/IR/Compute/Tensor.h>
+#include <onnc/IR/Compute/Initializer.h>
+#include <onnc/IR/Compute/InputOperator.h>
 #include <onnc/IR/Compute/OutputOperator.h>
 #include <onnc/Support/Casting.h>
 #include <onnc/Support/IOStream.h>
 #include <onnc/Support/Timer.h>
 
-#include <cassert>
 #include <algorithm>
+#include <cassert>
+#include <iomanip>
+#include <unordered_map>
 
 #define restrict __restrict__
 extern "C" {
@@ -67,6 +71,9 @@ InterpreterPass::InterpreterPass(TargetBackend *pBackend,
 
 Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
 {
+  std::unordered_map<Value *, int64_t> mem_start;
+  std::unordered_map<Value *, int64_t> mem_length;
+
   // XXX: Use Pass or something to get internal memory size
   uint64_t weight_memory_size = 0;
   uint64_t internal_memory_size = 0;
@@ -83,6 +90,8 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
         weight_memory_size +=
             t->getValues().size() * sizeof(FloatTensor::ValueList::value_type);
       } else {
+        mem_start[co->getValue()] = mem->start();
+        mem_length[co->getValue()] = mem->length();
         internal_memory_size =
             std::max(internal_memory_size,
                      static_cast<uint64_t>(mem->start()) + mem->length());
@@ -94,6 +103,60 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
     outs() << "[v1] internal memory: " << internal_memory_size << std::endl;
   }
 
+  if (m_Verbose >= 2) {
+    for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+      outs() << "[v2] ";
+      cm.print(outs());
+      outs() << std::endl;
+    }
+  }
+
+  if (m_Verbose >= 4) {
+    std::ios old_state(nullptr);
+    old_state.copyfmt(outs());
+
+    // TODO: Refactor this. We need a table printer.
+    size_t val_len = 8;
+    for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+      if (dyn_cast<InputOperator>(&cm)) continue;
+      if (dyn_cast<Initializer>(&cm)) continue;
+      if (dyn_cast<OutputOperator>(&cm)) continue;
+      for (int i = 0; i < cm.getNumOfOutputs(); ++i) {
+        val_len = std::max(val_len, cm.getOutput(i)->getName().size());
+      }
+    }
+    const std::string sep{" | "};
+    size_t ptr_len = 8;
+    outs() << "[v4] " << std::setw(val_len) << "Value" << sep
+           << std::setw(ptr_len) << "offset" << "   "
+           << std::setw(ptr_len) << "end" << "   "
+           << std::setw(ptr_len) << "size" << std::endl;
+    outs() << "[v4] "
+           << std::setfill('-')
+           << std::setw(val_len) << '-' << "-+"
+           << std::setw(ptr_len * 3 + 2 * 2 + 3) << '-'
+           << std::setfill(' ')
+           << std::endl;
+    for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+      if (dyn_cast<InputOperator>(&cm)) continue;
+      if (dyn_cast<Initializer>(&cm)) continue;
+      if (dyn_cast<OutputOperator>(&cm)) continue;
+      for (int i = 0; i < cm.getNumOfOutputs(); ++i) {
+        Value *v = cm.getOutput(i);
+        outs() << "[v4] " << std::setw(val_len) << v->getName() << sep
+               << std::internal << std::hex << std::setfill('0')
+               << "0x" << std::setw(ptr_len) << mem_start[v] << ' '
+               << "0x" << std::setw(ptr_len) << mem_start[v] + mem_length[v] << ' '
+               << std::right << std::dec << std::setfill(' ')
+               << std::setw(ptr_len) << mem_length[v] << ' '
+               << std::endl;
+      }
+    }
+
+    outs().copyfmt(old_state);
+  }
+
+
   if (!m_DryRun) {
     // XXX: Use onnc-runtime to handle memory
     char *heap = NULL;
@@ -137,21 +200,16 @@ Pass::ReturnType InterpreterPass::runInterpreter(Module &pModule)
   // TODO: Timer can not nested. Should rewrite it.
   if (m_Verbose >= 1) total = ::ns();
   for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
-    if (m_Verbose >= 2) {
-      outs() << "[v2] ";
-      cm.print(outs());
-      outs() << std::endl;
-    }
-
     Timer timer;
 
-    if (m_Verbose >= 3) timer.start();
+    if (m_Verbose >= 3) {
+      outs() << "[v3] " << cm.name() << " runs in ";
+      timer.start();
+    }
     cm.accept(m_Interpreter);
     if (m_Verbose >= 3) {
       timer.stop();
-      outs() << "[v3] " << cm.name()
-             << " runs in " << timer.interval() << ' ' << timer.unit()
-             << std::endl;
+      outs() << timer.interval() << ' ' << timer.unit() << std::endl;
     }
   }
   if (m_Verbose >= 1) {

From 0895d5510880759db61a9ea39f6a34e18d94eb20 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 15:22:28 +0800
Subject: [PATCH 07/10] [X86] No need to allocate input memory.

---
 lib/Target/X86/X86RemoveWeightFromLiveIntervals.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/Target/X86/X86RemoveWeightFromLiveIntervals.cpp b/lib/Target/X86/X86RemoveWeightFromLiveIntervals.cpp
index 5c566cff2..a0d9f89e6 100644
--- a/lib/Target/X86/X86RemoveWeightFromLiveIntervals.cpp
+++ b/lib/Target/X86/X86RemoveWeightFromLiveIntervals.cpp
@@ -9,6 +9,7 @@
 #include <onnc/CodeGen/LiveIntervals.h>
 #include <onnc/Core/PassAnalysisSupport.h>
 #include <onnc/IR/Compute/Initializer.h>
+#include <onnc/IR/Compute/InputOperator.h>
 
 using namespace onnc;
 
@@ -24,7 +25,7 @@ Pass::ReturnType X86RemoveWeightFromLiveIntervals::runOnModule(Module& pModule)
     // TODO: check if Define is ComputeOperator before casting.
     ComputeOperator* op = static_cast<ComputeOperator*>(v->getDefine());
     // If the value is weight, remove it from liveness table.
-    if (isa<Initializer>(op))
+    if (isa<Initializer>(op) || isa<InputOperator>(op))
       liveIntrvlPass->removeLiveInterval(v);
   }
 

From 7a078dd9f5578667b9fefcab31885b396287ea78 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Mon, 1 Oct 2018 17:15:19 +0800
Subject: [PATCH 08/10] [onni] Do not show I/O/W operator.

---
 tools/onni/CountOperatorsPass.cpp | 6 ++++++
 tools/onni/InterpreterPass.cpp    | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/tools/onni/CountOperatorsPass.cpp b/tools/onni/CountOperatorsPass.cpp
index 65286ca8e..3281aa304 100644
--- a/tools/onni/CountOperatorsPass.cpp
+++ b/tools/onni/CountOperatorsPass.cpp
@@ -8,6 +8,9 @@
 #include "CountOperatorsPass.h"
 
 #include <onnc/IR/ComputeOperator.h>
+#include <onnc/IR/Compute/Initializer.h>
+#include <onnc/IR/Compute/InputOperator.h>
+#include <onnc/IR/Compute/OutputOperator.h>
 #include <onnc/IR/Module.h>
 #include <onnc/Support/IOStream.h>
 
@@ -27,6 +30,9 @@ Pass::ReturnType CountOperatorsPass::runOnModule(Module &pModule)
   uint64_t total = 0;
 
   for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+    if (dyn_cast<InputOperator>(&cm)) continue;
+    if (dyn_cast<Initializer>(&cm)) continue;
+    if (dyn_cast<OutputOperator>(&cm)) continue;
     onnc::StringRef name = cm.name(); 
     count[name] += 1;
     op_len = std::max(op_len, name.size());
diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index 20aaaf4e9..bd0708b6e 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -105,6 +105,9 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
 
   if (m_Verbose >= 2) {
     for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
+      if (dyn_cast<InputOperator>(&cm)) continue;
+      if (dyn_cast<Initializer>(&cm)) continue;
+      if (dyn_cast<OutputOperator>(&cm)) continue;
       outs() << "[v2] ";
       cm.print(outs());
       outs() << std::endl;

From 1d56ccd723957e4f5df86ccd1822febb7fdff209 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Tue, 2 Oct 2018 15:48:43 +0800
Subject: [PATCH 09/10] [onni] Code cleanup.

---
 tools/onni/InterpreterPass.cpp | 44 +++++++++++++++++-----------------
 tools/onni/ONNIApp.cpp         |  1 +
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/tools/onni/InterpreterPass.cpp b/tools/onni/InterpreterPass.cpp
index bd0708b6e..e6625d22c 100644
--- a/tools/onni/InterpreterPass.cpp
+++ b/tools/onni/InterpreterPass.cpp
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cassert>
 #include <iomanip>
+#include <sstream>
 #include <unordered_map>
 
 #define restrict __restrict__
@@ -115,9 +116,7 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
   }
 
   if (m_Verbose >= 4) {
-    std::ios old_state(nullptr);
-    old_state.copyfmt(outs());
-
+    std::ostringstream os;
     // TODO: Refactor this. We need a table printer.
     size_t val_len = 8;
     for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
@@ -130,33 +129,34 @@ Pass::ReturnType InterpreterPass::runOnModule(Module &pModule)
     }
     const std::string sep{" | "};
     size_t ptr_len = 8;
-    outs() << "[v4] " << std::setw(val_len) << "Value" << sep
-           << std::setw(ptr_len) << "offset" << "   "
-           << std::setw(ptr_len) << "end" << "   "
-           << std::setw(ptr_len) << "size" << std::endl;
-    outs() << "[v4] "
-           << std::setfill('-')
-           << std::setw(val_len) << '-' << "-+"
-           << std::setw(ptr_len * 3 + 2 * 2 + 3) << '-'
-           << std::setfill(' ')
-           << std::endl;
+    os << "[v4] " << std::setw(val_len) << "Value" << sep
+       << std::setw(ptr_len) << "[offset" << "   "
+       << std::setw(ptr_len) << ")end" << "   "
+       << std::setw(ptr_len) << "size" << std::endl;
+    os << "[v4] "
+       << std::setfill('-')
+       << std::setw(val_len) << '-' << "-+"
+       << std::setw(ptr_len * 3 + 2 * 2 + 3) << '-'
+       << std::setfill(' ')
+       << std::endl;
     for (ComputeOperator &cm : *pModule.getRootComputeGraph()) {
       if (dyn_cast<InputOperator>(&cm)) continue;
       if (dyn_cast<Initializer>(&cm)) continue;
       if (dyn_cast<OutputOperator>(&cm)) continue;
       for (int i = 0; i < cm.getNumOfOutputs(); ++i) {
         Value *v = cm.getOutput(i);
-        outs() << "[v4] " << std::setw(val_len) << v->getName() << sep
-               << std::internal << std::hex << std::setfill('0')
-               << "0x" << std::setw(ptr_len) << mem_start[v] << ' '
-               << "0x" << std::setw(ptr_len) << mem_start[v] + mem_length[v] << ' '
-               << std::right << std::dec << std::setfill(' ')
-               << std::setw(ptr_len) << mem_length[v] << ' '
-               << std::endl;
+        uint64_t mem_end = mem_start[v] + mem_length[v];
+        os << "[v4] " << std::setw(val_len) << v->getName() << sep
+           << std::internal << std::hex << std::setfill('0')
+           << "0x" << std::setw(ptr_len) << mem_start[v]
+           << (mem_end == internal_memory_size ? '*' : ' ')
+           << "0x" << std::setw(ptr_len) << mem_end << ' '
+           << std::right << std::dec << std::setfill(' ')
+           << std::setw(ptr_len) << mem_length[v] << ' '
+           << std::endl;
       }
     }
-
-    outs().copyfmt(old_state);
+    outs() << os.str();
   }
 
 
diff --git a/tools/onni/ONNIApp.cpp b/tools/onni/ONNIApp.cpp
index 09f3ddebc..391a02abb 100644
--- a/tools/onni/ONNIApp.cpp
+++ b/tools/onni/ONNIApp.cpp
@@ -71,6 +71,7 @@ int ONNIApp::run()
 
   TargetBackend* backend = target->createBackend(options().target());
   backend->addTensorSel(pm);
+  backend->addTensorSched(pm);
   backend->addMemAlloc(pm);
   if (options().verbose() >= 3) {
     pm.add(CreateCountOperatorsPass("[v3] "));

From e57426c290d62aeb1ae660e1948b48584a126f66 Mon Sep 17 00:00:00 2001
From: TDYa127 <a127a127@skymizer.com>
Date: Tue, 2 Oct 2018 23:46:22 +0800
Subject: [PATCH 10/10] Fix autotools build.

---
 lib/Makefile.am | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Makefile.am b/lib/Makefile.am
index b5399558e..599ac26b3 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -192,6 +192,7 @@ ONNC_SOURCES = \
 	Option/Option.cpp \
 	Option/OptionPool.cpp \
 	Option/OptParser.cpp \
+	Runtime/onnc-runtime.c \
 	Runtime/operator/abs.c \
 	Runtime/operator/acos.c \
 	Runtime/operator/add.c \