Skip to content

Commit

Permalink
Merge pull request #298 from ucb-bar/dev
Browse files Browse the repository at this point in the history
v0.7.1 Release

This is an incremental release:

* Support for rectangular convolutions
* New Timeloop configurations included in repo
* Area improvements for runtime-configurable dataflows
* Faster build and elaboration times
  • Loading branch information
hngenc authored May 22, 2023
2 parents 6f57972 + 2efd84a commit be2e9f2
Show file tree
Hide file tree
Showing 60 changed files with 493 additions and 260 deletions.
2 changes: 2 additions & 0 deletions .github/scripts/defaults.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ LOCAL_SIM_DIR=$LOCAL_CHIPYARD_DIR/sims/verilator
LOCAL_VERILATOR_DIR=$HOME/verilator-install
LOCAL_CONDA=/opt/conda/

CICONFIG=chipyard.config.WithNoDebug_GemminiRocketConfig

echo "::set-output name=LOCAL_WORK_DIR::$LOCAL_WORK_DIR"
echo "::set-output name=LOCAL_CHECKOUT_DIR::$LOCAL_CHECKOUT_DIR"
echo "::set-output name=LOCAL_RISCV_DIR::$LOCAL_RISCV_DIR"
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/do-rtl-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ source env.sh

cd $LOCAL_SIM_DIR
make -C $LOCAL_SIM_DIR clean
make -j$LOCAL_MAKE_NPROC -C $LOCAL_SIM_DIR VERILATOR_OPT_FLAGS="-O0 -OG" JAVA_OPTS="-Xmx2500M -Xss8M" SBT_OPTS="-Dsbt.ivy.home=$LOCAL_CHIPYARD_DIR/.ivy2 -Dsbt.supershell=false -Dsbt.global.base=$LOCAL_CHIPYARD_DIR/.sbt -Dsbt.boot.directory=$LOCAL_CHIPYARD_DIR/.sbt/boot" CONFIG=GemminiRocketConfig
make -j$LOCAL_MAKE_NPROC -C $LOCAL_SIM_DIR VERILATOR_OPT_FLAGS="-O0 -OG" JAVA_OPTS="-Xmx2500M -Xss8M" SBT_OPTS="-Dsbt.ivy.home=$LOCAL_CHIPYARD_DIR/.ivy2 -Dsbt.supershell=false -Dsbt.global.base=$LOCAL_CHIPYARD_DIR/.sbt -Dsbt.boot.directory=$LOCAL_CHIPYARD_DIR/.sbt/boot" CONFIG=$CICONFIG

9 changes: 4 additions & 5 deletions .github/scripts/install-gemmini.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,16 @@ cd $LOCAL_CHIPYARD_DIR
git fetch
git checkout $(cat $LOCAL_CHECKOUT_DIR/CHIPYARD.hash)

./build-setup.sh esp-tools
./build-setup.sh riscv-tools -f -s 6 -s 7 -s 8 -s 9

source env.sh

cd toolchains/esp-tools/riscv-isa-sim/build
git checkout $(cat $LOCAL_CHECKOUT_DIR/SPIKE.hash)
make && make install

cd $LOCAL_CHECKOUT_DIR
chown -R $(whoami) .
git config --global --add safe.directory $LOCAL_CHECKOUT_DIR
git config --global --add safe.directory '*'

cd $LOCAL_CHECKOUT_DIR
git submodule update --init --recursive software/gemmini-rocc-tests
rm -rf $LOCAL_CHIPYARD_DIR/generators/gemmini/* $LOCAL_CHIPYARD_DIR/generators/gemmini/.git*
mv -f $LOCAL_CHECKOUT_DIR/* $LOCAL_CHECKOUT_DIR/.git* $LOCAL_CHIPYARD_DIR/generators/gemmini/
Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/run-tests-rtl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ cd $LOCAL_CHIPYARD_DIR/generators/gemmini/software/gemmini-rocc-tests
CFLAGS=-DFAST ./build.sh

cd build
make test-baremetal-bareMetalC RUNNER="'make -C $LOCAL_CHIPYARD_DIR/sims/verilator/ CONFIG=GemminiRocketConfig run-binary-hex BINARY='"
make test-baremetal-bareMetalC RUNNER="'make -C $LOCAL_CHIPYARD_DIR/sims/verilator/ CONFIG=$CICONFIG run-binary-hex BINARY='"

11 changes: 11 additions & 0 deletions .github/scripts/run-tests-spike.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,17 @@ source $SCRIPT_DIR/enable-conda.sh
cd $LOCAL_CHIPYARD_DIR
source env.sh

cd $LOCAL_CHECKOUT_DIR
chown -R $(whoami) .
git config --global --add safe.directory $LOCAL_CHECKOUT_DIR
git config --global --add safe.directory '*'

cd $LOCAL_CHECKOUT_DIR
# Delete the stale libgemmini first installed by chipyard, switch to the one submoduled here
rm -rf $RISCV/lib/libgemmini.so
git submodule update --init software/libgemmini
make -C software/libgemmini install

cd $LOCAL_CHIPYARD_DIR/generators/gemmini/software/gemmini-rocc-tests
./build.sh

Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "software/onnxruntime-riscv"]
path = software/onnxruntime-riscv
url = https://github.com/pranav-prakash/onnxruntime-riscv.git
[submodule "software/libgemmini"]
path = software/libgemmini
url = https://github.com/ucb-bar/libgemmini.git
2 changes: 1 addition & 1 deletion CHIPYARD.hash
Original file line number Diff line number Diff line change
@@ -1 +1 @@
004297b6a8c01be1b2110c4cf4f9393ae1ff8805
569917e2f30616f85a841d16a92914ae98ad7184
21 changes: 8 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,17 @@ Run these steps to install Chipyard and Spike (make sure to checkout the correct
```shell
git clone https://github.com/ucb-bar/chipyard.git
cd chipyard
git checkout 1.8.1
./build-setup.sh esp-tools
git checkout 1.9.1
./build-setup.sh riscv-tools

source env.sh

cd generators/gemmini
git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*"
git fetch && git checkout v0.7.0
git fetch && git checkout v0.7.1
git submodule update --init --recursive

SPIKE_HASH=$(cat SPIKE.hash)

cd -
cd toolchains/esp-tools/riscv-isa-sim/build
git fetch && git checkout $SPIKE_HASH
make && make install
make -C software/libgemmini install

# The final step is only necessary if you want to run MIDAS simulations with
# realistic DRAM models
Expand Down Expand Up @@ -368,9 +363,8 @@ Afterwards, the test binaries will be found in `software/gemmini-rocc-tests/buil
Binaries whose names end in `-baremetal` are meant to be run in a bare-metal environment, while binaries whose names end in `-linux` are meant to run in a Linux environment.
You can run the tests either on a cycle-accurate RTL simulator, or on a (much faster) functional ISA simulator called Spike.

We use a special fork of Spike, found [here](https://github.com/ucb-bar/esp-isa-sim), which has support for Gemmini instructions.
(You can find the required commit hash in `SPIKE.hash`).
If you are using Chipyard, you can easily build Spike by running `./scripts/build-toolchains.sh esp-tools` from Chipyard's root directory.
We use a special extension of Spike, found [here](https://github.com/ucb-bar/libgemmini), which has support for Gemmini instructions.
If you are using Chipyard, you can easily build Spike by running `./scripts/build-toolchains.sh riscv-tools` from Chipyard's root directory, then by running `make -C software/libgemmini install` in the Gemmini directory.
Then, to run the `mvin_mvout` test, which simply moves a matrix into Gemmini's scratchpad before moving it back out into main memory, run the following commands:

```shell
Expand Down Expand Up @@ -502,8 +496,9 @@ This limitation may be lifted in the future.
- `rs1[1:0]` must be `01`
- `rs1[2]` is 0 if `mvin`s to the accumulator are of type `accType`, and 1 if they are `inputType`
- `rs1[4:3]` is 0 if the stride is being set for `mvin`, 1 if the stride is being set for `mvin2`, and 2 if the stride is being set for `mvin3`
- `rs1[31:16]` is the scratchpad-memory stride (also called the "private-memory stride" above)
- `rs1[63:32]` is the "scale" by which to multiply data as it's being moved in to the scratchpad. This is ignored if Gemmini isn't configured to have the ability to scale values during `mvin`s.
- `rs2` = the stride in bytes
- `rs2` is the main-memory stride in bytes
- `funct` = 0

**Action:** stride <= rs2; scale <= rs1[63:32]
Expand Down
1 change: 0 additions & 1 deletion SPIKE.hash

This file was deleted.

6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ name := "gemmini"

version := "3.1.0"

scalaVersion := "2.12.10"
scalaVersion := "2.13.10"

libraryDependencies ++= Seq(
"edu.berkeley.cs" %% "chisel3" % "3.4.+",
"edu.berkeley.cs" %% "chisel3" % "3.5.6",
"edu.berkeley.cs" %% "rocketchip" % "1.2.+",
"edu.berkeley.cs" %% "chisel-iotesters" % "1.5.+",
"edu.berkeley.cs" %% "chisel-iotesters" % "2.5.6",
"org.scalanlp" %% "breeze" % "1.1")

resolvers ++= Seq(
Expand Down
64 changes: 64 additions & 0 deletions modeling/timeloop/arch/arch_default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# A Timeloop-compatible YAML definition of an architecture similar to Gemmini
# WARNING: Not correlated with Gemmini RTL
architecture:
version: 0.3

subtree:
- name: System
local:
- name: DRAM
class: DRAM
attributes:
instances: 1
word-bits: 8
block_size: 64
shared_bandwidth: 8

subtree:
- name: Chip
attributes:
technology: 40nm
local:
- name: Scratchpad
class: SRAM
attributes:
entries: 262144
depth: 16384
width: 128
instances: 1
meshX: 1
word-bits: 8
n_rdwr_ports: 2
n_banks: 4
subtree:
- name: PECols[0..15] # only the K dim can be parallelized across Accumulator columns
local:
- name: Accumulator
class: SRAM
attributes:
entries: 1024 # acc size / pe_dim = 16384/16
depth: 1024
width: 32
instances: 16
word-bits: 32
network-word-bits: 16
n_rdwr_ports: 2
n_banks: 2
subtree:
- name: PERows[0..15]
local:
- name: Registers
class: SRAM
attributes:
depth: 1
width: 8
entries: 1
instances: 256
word-bits: 8
n_rdwr_ports: 2
n_banks: 1
- name: MACC
class: intmac
attributes:
datawidth: 8
word-bits: 8
50 changes: 50 additions & 0 deletions modeling/timeloop/mapspace/mapspace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
mapspace_constraints:
- target: Registers
type: bypass
bypass:
- Outputs
- Inputs
keep:
- Weights
- target: Accumulator
type: bypass
bypass:
- Weights
- Inputs
keep:
- Outputs
- target: Scratchpad
type: bypass
keep:
- Inputs
- Weights
bypass:
- Outputs
- target: DRAM
type: bypass
keep:
- Weights
- Inputs
- Outputs
bypass: []
- target: Registers
type: temporal
factors: R=1 S=1 P=1 Q=1 C=1 K=1 N=1
permutation: PQRSCKN
- target: Accumulator
type: spatial
factors: R=1 S=1 P=1 Q=1 C<=16 K=1 N=1
permutation: QKC
- target: Accumulator
type: temporal
permutation: QPNCSRK
- target: Scratchpad
type: spatial
factors: R=1 S=1 P=1 Q=1 N=1 C=1 K<=16
- target: Scratchpad
type: temporal
factors: R=1 S=1 P=1 Q=1 C=1 K=1 N=1
- target: DRAM
type: temporal
permutation: CSRKQPN

6 changes: 3 additions & 3 deletions scripts/build-spike.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ echo Generating new gemmini_params.h file...
make verilog CONFIG=CustomGemminiSoCConfig &> build.log

cd -
cp software/gemmini-rocc-tests/include/gemmini_params.h ../../toolchains/esp-tools/riscv-isa-sim/gemmini/gemmini_params.h
cd ../../toolchains/esp-tools/riscv-isa-sim/build
make && make install
cp software/gemmini-rocc-tests/include/gemmini_params.h software/libgemmini/gemmini_params.h
make -C software/libgemmini clean
make -C software/libgemmini install
14 changes: 12 additions & 2 deletions scripts/run-midas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,17 @@ if [ ! -f ./${simulator}${DEBUG} ]; then
echo 'Did you run `./scripts/build-midas.sh`?'
fi

sim_args="+vcs+initreg+0 +vcs+initmem+0 +fesvr-step-size=128 +mm_relaxFunctionalModel_0=0 +mm_openPagePolicy_0=1 +mm_backendLatency_0=2 +mm_dramTimings_tAL_0=0 +mm_dramTimings_tCAS_0=14 +mm_dramTimings_tCMD_0=1 +mm_dramTimings_tCWD_0=10 +mm_dramTimings_tCCD_0=4 +mm_dramTimings_tFAW_0=25 +mm_dramTimings_tRAS_0=33 +mm_dramTimings_tREFI_0=7800 +mm_dramTimings_tRC_0=47 +mm_dramTimings_tRCD_0=14 +mm_dramTimings_tRFC_0=160 +mm_dramTimings_tRRD_0=8 +mm_dramTimings_tRP_0=14 +mm_dramTimings_tRTP_0=8 +mm_dramTimings_tRTRS_0=2 +mm_dramTimings_tWR_0=15 +mm_dramTimings_tWTR_0=8 +mm_rowAddr_offset_0=18 +mm_rowAddr_mask_0=65535 +mm_rankAddr_offset_0=16 +mm_rankAddr_mask_0=3 +mm_bankAddr_offset_0=13 +mm_bankAddr_mask_0=7 +shmemportname0=0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +macaddr0=00:00:00:00:00:02 +niclog0=niclog0 +linklatency0=6405 +netbw0=100 +netburst0=8 +nic-loopback0 +tracefile=TRACEFILE +blkdev-in-mem0=128 +blkdev-log0=blkdev-log0 +autocounter-readrate=1000 +autocounter-filename=AUTOCOUNTERFILE +max-cycles=100000000 +dramsim +dramsim_ini_dir=/home/eecs/hngenc/chip/generators/testchipip/src/main/resources/dramsim2_ini"

if [ $dram_model == "DDR3FRFCFS" ] || [ $dram_model == "DDR3FRFCFSLLC4MB" ]; then
sim_args="$sim_args +mm_schedulerWindowSize_0=8 +mm_transactionQueueDepth_0=8"
fi

if [ $dram_model == "DDR3FRFCFSLLC4MB" ]; then
sim_args="$sim_args +mm_llc_wayBits_0=3 +mm_llc_setBits_0=12 +mm_llc_blockBits_0=7 +mm_llc_activeMSHRs_0=8"
fi

./${simulator}${DEBUG} ${PK} ${full_binary_path} ${waveform_flag} \
+vcs+initreg+0 +vcs+initmem+0 +fesvr-step-size=128 +mm_relaxFunctionalModel_0=0 +mm_openPagePolicy_0=1 +mm_backendLatency_0=2 +mm_schedulerWindowSize_0=8 +mm_transactionQueueDepth_0=8 +mm_dramTimings_tAL_0=0 +mm_dramTimings_tCAS_0=14 +mm_dramTimings_tCMD_0=1 +mm_dramTimings_tCWD_0=10 +mm_dramTimings_tCCD_0=4 +mm_dramTimings_tFAW_0=25 +mm_dramTimings_tRAS_0=33 +mm_dramTimings_tREFI_0=7800 +mm_dramTimings_tRC_0=47 +mm_dramTimings_tRCD_0=14 +mm_dramTimings_tRFC_0=160 +mm_dramTimings_tRRD_0=8 +mm_dramTimings_tRP_0=14 +mm_dramTimings_tRTP_0=8 +mm_dramTimings_tRTRS_0=2 +mm_dramTimings_tWR_0=15 +mm_dramTimings_tWTR_0=8 +mm_rowAddr_offset_0=18 +mm_rowAddr_mask_0=65535 +mm_rankAddr_offset_0=16 +mm_rankAddr_mask_0=3 +mm_bankAddr_offset_0=13 +mm_bankAddr_mask_0=7 +mm_llc_wayBits_0=3 +mm_llc_setBits_0=12 +mm_llc_blockBits_0=7 +mm_llc_activeMSHRs_0=8 +shmemportname0=0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +macaddr0=00:00:00:00:00:02 +niclog0=niclog0 +linklatency0=6405 +netbw0=100 +netburst0=8 +nic-loopback0 +tracefile=TRACEFILE +blkdev-in-mem0=128 +blkdev-log0=blkdev-log0 +autocounter-readrate=1000 +autocounter-filename=AUTOCOUNTERFILE +max-cycles=100000000 \
+dramsim +dramsim_ini_dir=/home/eecs/hngenc/chip/generators/testchipip/src/main/resources/dramsim2_ini \
$sim_args \
2>/dev/null

4 changes: 2 additions & 2 deletions software/gemmini-ort.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,6 @@
],
"overlay": "../onnxruntime-riscv/systolic_runner/imagenet_runner",
"rootfs-size": "16GiB",
"run": "run-ort.sh"
"run": "run-ort.sh",
"spike-args": "--extension=gemmini"
}

10 changes: 10 additions & 0 deletions software/gemmini-smoke.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"name" : "gemmini-smoke",
"workdir" : ".",
"base" : "br-base.json",
"overlay" : "overlay",
"host-init" : "host-init.sh",
"command": "/root/run-test-smoke.sh",
"rootfs-size" : "16GiB",
"spike-args" : "--extension=gemmini"
}
3 changes: 2 additions & 1 deletion software/gemmini-tests-full.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"base" : "br-base.json",
"overlay" : "overlay",
"host-init" : "host-init.sh",
"command": "/root/run-tests-full.sh"
"command": "/root/run-tests-full.sh",
"spike-args": "--extension=gemmini"
}
3 changes: 2 additions & 1 deletion software/gemmini-tests-interactive.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"base" : "br-base.json",
"overlay" : "overlay",
"host-init" : "host-init.sh",
"rootfs-size" : "16GiB"
"rootfs-size" : "16GiB",
"spike-args": "--extension=gemmini"
}
3 changes: 2 additions & 1 deletion software/gemmini-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
"overlay" : "overlay",
"host-init" : "host-init.sh",
"command": "/root/run-tests.sh",
"rootfs-size" : "16GiB"
"rootfs-size" : "16GiB",
"spike-args" : "--extension=gemmini"
}
1 change: 1 addition & 0 deletions software/libgemmini
Submodule libgemmini added at 4be220
9 changes: 9 additions & 0 deletions software/overlay/root/run-test-smoke.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

echo "*****************TEST RESULTS*************" > test_output.txt

echo "========mobilenet========="
/root/imagenet/mobilenet-linux >> test_output.txt

cat test_output.txt
poweroff -f
6 changes: 3 additions & 3 deletions src/main/scala/gemmini/AccumulatorMem.scala
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ class AccumulatorMem[T <: Data, U <: Data](
val wmask = Mux1H(w_q_head.asBools, w_q.map(_.mask))
val waddr = Mux1H(w_q_head.asBools, w_q.map(_.addr))
when (wen) {
w_q_head := (w_q_head << 1).asUInt() | w_q_head(nEntries-1)
w_q_head := (w_q_head << 1).asUInt | w_q_head(nEntries-1)
for (i <- 0 until nEntries) {
when (w_q_head(i)) {
w_q(i).valid := false.B
Expand All @@ -243,7 +243,7 @@ class AccumulatorMem[T <: Data, U <: Data](
when (w_q_push) {
assert(!w_q_full || wen, "we ran out of acc-sub-bank write q entries")

w_q_tail := (w_q_tail << 1).asUInt() | w_q_tail(nEntries-1)
w_q_tail := (w_q_tail << 1).asUInt | w_q_tail(nEntries-1)
for (i <- 0 until nEntries) {
when (w_q_tail(i)) {
w_q(i).valid := true.B
Expand Down Expand Up @@ -334,7 +334,7 @@ class AccumulatorMem[T <: Data, U <: Data](
io.write.ready := !block_write_req &&
!pipelined_writes.map(r => r.valid && r.bits.addr === io.write.bits.addr && io.write.bits.acc).reduce(_||_)

when (reset.asBool()) {
when (reset.asBool) {
pipelined_writes.foreach(_.valid := false.B)
}

Expand Down
Loading

0 comments on commit be2e9f2

Please sign in to comment.