diff --git a/CHIPYARD.hash b/CHIPYARD.hash index cba05d74..f41949c3 100644 --- a/CHIPYARD.hash +++ b/CHIPYARD.hash @@ -1 +1 @@ -ec1b075658fb92a624151536dd1de76bad94f51f +117624d8eea27bafd613eec09e9b9b3e31239e08 diff --git a/README.md b/README.md index 515264b8..5a595622 100644 --- a/README.md +++ b/README.md @@ -32,19 +32,19 @@ Run these steps to install Chipyard and Spike (make sure to checkout the correct ```shell git clone https://github.com/ucb-bar/chipyard.git cd chipyard -git checkout ec1b075658fb92a624151536dd1de76bad94f51f +git checkout 117624d8eea27bafd613eec09e9b9b3e31239e08 ./scripts/init-submodules-no-riscv-tools.sh ./scripts/build-toolchains.sh esp-tools source env.sh cd generators/gemmini -git fetch && git checkout v0.6.2 +git fetch && git checkout v0.6.3 git submodule update cd - cd toolchains/esp-tools/riscv-isa-sim/build -git fetch && git checkout 79486d67f99fa739c8c1d5916c9b74d0417b53c4 +git fetch && git checkout 090e82c473fd28b4eb2011ffcd771ead6076faab make && make install ``` diff --git a/SPIKE.hash b/SPIKE.hash index d0b80362..f08ac921 100644 --- a/SPIKE.hash +++ b/SPIKE.hash @@ -1 +1 @@ -79486d67f99fa739c8c1d5916c9b74d0417b53c4 +090e82c473fd28b4eb2011ffcd771ead6076faab diff --git a/software/gemmini-rocc-tests b/software/gemmini-rocc-tests index 96165453..e326e7c4 160000 --- a/software/gemmini-rocc-tests +++ b/software/gemmini-rocc-tests @@ -1 +1 @@ -Subproject commit 96165453758f047432a94431d4779f9f331b5e5a +Subproject commit e326e7c43457ff08669fe88edcaa395d846474d8 diff --git a/src/main/scala/gemmini/AccumulatorMem.scala b/src/main/scala/gemmini/AccumulatorMem.scala index 8f3fbaf5..18b53624 100644 --- a/src/main/scala/gemmini/AccumulatorMem.scala +++ b/src/main/scala/gemmini/AccumulatorMem.scala @@ -14,7 +14,6 @@ class AccumulatorReadReq[T <: Data](n: Int, shift_width: Int, scale_t: T) extend val fromDMA = Bool() - override def cloneType: this.type = new AccumulatorReadReq(n, shift_width, scale_t.cloneType).asInstanceOf[this.type] } class AccumulatorReadResp[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Vec[T]], scale_t: U, shift_width: Int) extends Bundle { @@ -24,14 +23,12 @@ class AccumulatorReadResp[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Ve val relu6_shift = UInt(shift_width.W) val act = UInt(2.W) // TODO magic number val acc_bank_id = UInt(2.W) // TODO don't hardcode - override def cloneType: this.type = new AccumulatorReadResp(fullDataType.cloneType, scale_t, shift_width).asInstanceOf[this.type] } class AccumulatorReadIO[T <: Data: Arithmetic, U <: Data](n: Int, shift_width: Int, fullDataType: Vec[Vec[T]], scale_t: U) extends Bundle { val req = Decoupled(new AccumulatorReadReq[U](n, shift_width, scale_t)) val resp = Flipped(Decoupled(new AccumulatorReadResp[T, U](fullDataType, scale_t, shift_width))) - override def cloneType: this.type = new AccumulatorReadIO(n, shift_width, fullDataType.cloneType, scale_t.cloneType).asInstanceOf[this.type] } class AccumulatorWriteReq[T <: Data: Arithmetic](n: Int, t: Vec[Vec[T]]) extends Bundle { @@ -41,7 +38,6 @@ class AccumulatorWriteReq[T <: Data: Arithmetic](n: Int, t: Vec[Vec[T]]) extends val mask = Vec(t.getWidth / 8, Bool()) // TODO Use aligned_to here // val current_waddr = Flipped(Valid(UInt(log2Ceil(n).W))) // This is the raddr that is being fed into the SRAM right now - override def cloneType: this.type = new AccumulatorWriteReq(n, t).asInstanceOf[this.type] } @@ -60,7 +56,6 @@ class AccumulatorMemIO [T <: Data: Arithmetic, U <: Data](n: Int, t: Vec[Vec[T]] val sum = Input(t.cloneType) } - override def cloneType: this.type = new AccumulatorMemIO(n, t, scale_t, acc_sub_banks, use_shared_ext_mem).asInstanceOf[this.type] } class AccPipe[T <: Data : Arithmetic](latency: Int, t: T)(implicit ev: Arithmetic[T]) extends Module { @@ -117,7 +112,7 @@ class AccumulatorMem[T <: Data, U <: Data]( val pipelined_writes = Reg(Vec(acc_latency, Valid(new AccumulatorWriteReq(n, t)))) val oldest_pipelined_write = pipelined_writes(acc_latency-1) - pipelined_writes(0).valid := io.write.fire() + pipelined_writes(0).valid := io.write.fire pipelined_writes(0).bits := io.write.bits for (i <- 1 until acc_latency) { pipelined_writes(i) := pipelined_writes(i-1) @@ -148,8 +143,8 @@ class AccumulatorMem[T <: Data, U <: Data]( mem.io.mask := oldest_pipelined_write.bits.mask rdata_for_adder := mem.io.rdata rdata_for_read_resp := mem.io.rdata - mem.io.raddr := Mux(io.write.fire() && io.write.bits.acc, io.write.bits.addr, io.read.req.bits.addr) - mem.io.ren := io.read.req.fire() || (io.write.fire() && io.write.bits.acc) + mem.io.raddr := Mux(io.write.fire && io.write.bits.acc, io.write.bits.addr, io.read.req.bits.addr) + mem.io.ren := io.read.req.fire || (io.write.fire && io.write.bits.acc) } else { val rmw_req = Wire(Decoupled(UInt())) rmw_req.valid := io.write.valid && io.write.bits.acc @@ -203,14 +198,13 @@ class AccumulatorMem[T <: Data, U <: Data]( val data = Vec(mask_len, mask_elem) val mask = Vec(mask_len, Bool()) val addr = UInt(log2Ceil(n/acc_sub_banks).W) - override def cloneType: this.type = new W_Q_Entry(mask_len, mask_elem).asInstanceOf[this.type] } val w_q = Reg(Vec(nEntries, new W_Q_Entry(mask_len, mask_elem))) for (e <- w_q) { when (e.valid) { assert(!( - io.write.fire() && io.write.bits.acc && + io.write.fire && io.write.bits.acc && isThisBank(io.write.bits.addr) && getBankIdx(io.write.bits.addr) === e.addr && ((io.write.bits.mask.asUInt & e.mask.asUInt) =/= 0.U) ), "you cannot accumulate to an AccumulatorMem address until previous writes to that address have completed") @@ -276,7 +270,7 @@ class AccumulatorMem[T <: Data, U <: Data]( // 1. incoming reads for RMW // 2. writes from RMW // 3. incoming reads - when (rmw_req.fire() && isThisBank(rmw_req.bits)) { + when (rmw_req.fire && isThisBank(rmw_req.bits)) { ren := true.B when (isThisBank(only_read_req.bits)) { only_read_req.ready := false.B @@ -287,7 +281,7 @@ class AccumulatorMem[T <: Data, U <: Data]( only_read_req.ready := false.B } } .otherwise { - ren := isThisBank(only_read_req.bits) && only_read_req.fire() + ren := isThisBank(only_read_req.bits) && only_read_req.fire raddr := getBankIdx(only_read_req.bits) } @@ -304,7 +298,7 @@ class AccumulatorMem[T <: Data, U <: Data]( q.io.enq.bits.act := RegNext(io.read.req.bits.act) q.io.enq.bits.fromDMA := RegNext(io.read.req.bits.fromDMA) q.io.enq.bits.acc_bank_id := DontCare - q.io.enq.valid := RegNext(io.read.req.fire()) + q.io.enq.valid := RegNext(io.read.req.fire) val p = q.io.deq @@ -317,7 +311,7 @@ class AccumulatorMem[T <: Data, U <: Data]( io.read.resp.valid := p.valid p.ready := io.read.resp.ready - val q_will_be_empty = (q.io.count +& q.io.enq.fire()) - q.io.deq.fire() === 0.U + val q_will_be_empty = (q.io.count +& q.io.enq.fire) - q.io.deq.fire === 0.U io.read.req.ready := q_will_be_empty && ( // Make sure we aren't accumulating, which would take over both ports !(io.write.valid && io.write.bits.acc) && @@ -333,5 +327,5 @@ class AccumulatorMem[T <: Data, U <: Data]( } // assert(!(io.read.req.valid && io.write.en && io.write.acc), "reading and accumulating simultaneously is not supported") - assert(!(io.read.req.fire() && io.write.fire() && io.read.req.bits.addr === io.write.bits.addr), "reading from and writing to same address is not supported") + assert(!(io.read.req.fire && io.write.fire && io.read.req.bits.addr === io.write.bits.addr), "reading from and writing to same address is not supported") } diff --git a/src/main/scala/gemmini/AccumulatorScale.scala b/src/main/scala/gemmini/AccumulatorScale.scala index 5e4997f8..2d23af1d 100644 --- a/src/main/scala/gemmini/AccumulatorScale.scala +++ b/src/main/scala/gemmini/AccumulatorScale.scala @@ -8,7 +8,6 @@ import Util._ class AccumulatorReadRespWithFullData[T <: Data: Arithmetic, U <: Data](fullDataType: Vec[Vec[T]], scale_t: U, shift_width: Int) extends Bundle { val resp = new AccumulatorReadResp(fullDataType, scale_t, shift_width) val full_data = fullDataType.cloneType - override def cloneType: this.type = new AccumulatorReadRespWithFullData(fullDataType.cloneType, scale_t, shift_width).asInstanceOf[this.type] } @@ -17,7 +16,6 @@ class AccumulatorScaleResp[T <: Data: Arithmetic](fullDataType: Vec[Vec[T]], rDa val data = rDataType.cloneType val acc_bank_id = UInt(2.W) val fromDMA = Bool() - override def cloneType: this.type = new AccumulatorScaleResp(fullDataType, rDataType).asInstanceOf[this.type] } class AccumulatorScaleIO[T <: Data: Arithmetic, U <: Data]( @@ -26,8 +24,6 @@ class AccumulatorScaleIO[T <: Data: Arithmetic, U <: Data]( ) extends Bundle { val in = Flipped(Decoupled(new AccumulatorReadResp[T,U](fullDataType, scale_t, shift_width))) val out = Decoupled(new AccumulatorScaleResp[T](fullDataType, rDataType)) - override def cloneType: this.type = new AccumulatorScaleIO(fullDataType, scale_t, - shift_width, rDataType).asInstanceOf[this.type] } class AccScaleDataWithIndex[T <: Data: Arithmetic, U <: Data](t: T, u: U) extends Bundle { @@ -40,7 +36,6 @@ class AccScaleDataWithIndex[T <: Data: Arithmetic, U <: Data](t: T, u: U) extend val full_data = t.cloneType val id = UInt(2.W) // TODO hardcoded val index = UInt() - override def cloneType: this.type = new AccScaleDataWithIndex(t, u).asInstanceOf[this.type] } class AccScalePipe[T <: Data : Arithmetic, U <: Data](t: T, rDataType: Vec[Vec[T]], scale_func: (T, U) => T, scale_t: U, latency: Int, has_nonlinear_activations: Boolean)(implicit ev: Arithmetic[T]) extends Module { @@ -123,7 +118,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data]( val tail_oh = RegInit(1.U(nEntries.W)) out.valid := Mux1H(head_oh.asBools, (regs zip completed_masks).map({case (r, c) => r.valid && c.reduce(_&&_)})) out.bits := Mux1H(head_oh.asBools, out_regs) - when (out.fire()) { + when (out.fire) { for (i <- 0 until nEntries) { when (head_oh(i)) { regs(i).valid := false.B @@ -132,8 +127,8 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data]( head_oh := (head_oh << 1) | head_oh(nEntries-1) } - io.in.ready := !Mux1H(tail_oh.asBools, regs.map(_.valid)) || (tail_oh === head_oh && out.fire()) - when (io.in.fire()) { + io.in.ready := !Mux1H(tail_oh.asBools, regs.map(_.valid)) || (tail_oh === head_oh && out.fire) + when (io.in.fire) { for (i <- 0 until nEntries) { when (tail_oh(i)) { regs(i).valid := true.B @@ -160,7 +155,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data]( input.bits.relu6_shift := regs(i).bits.relu6_shift input.bits.id := i.U input.bits.index := w.U - when (input.fire()) { + when (input.fire) { fired_masks(i)(w) := true.B } } @@ -185,7 +180,7 @@ class AccumulatorScale[T <: Data: Arithmetic, U <: Data]( if ((j*width+w) % num_scale_units == i) { val id0 = w % io.in.bits.data(0).size val id1 = w / io.in.bits.data(0).size - when (pipe_out.fire() && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) { + when (pipe_out.fire && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) { out_regs(j).data (id1)(id0) := pipe_out.bits.data out_regs(j).full_data(id1)(id0) := pipe_out.bits.full_data completed_masks(j)(w) := true.B diff --git a/src/main/scala/gemmini/BeatMerger.scala b/src/main/scala/gemmini/BeatMerger.scala index ac7b284f..a845327b 100644 --- a/src/main/scala/gemmini/BeatMerger.scala +++ b/src/main/scala/gemmini/BeatMerger.scala @@ -61,7 +61,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid io.req.ready := !req.valid - io.in.ready := io.req.fire() || (req.valid && bytesRead =/= (1.U << req.bits.lg_len_req).asUInt()) + io.in.ready := io.req.fire || (req.valid && bytesRead =/= (1.U << req.bits.lg_len_req).asUInt()) io.out.valid := req.valid && usefulBytesRead > bytesSent && (usefulBytesRead - bytesSent >= rowBytes || usefulBytesRead === req.bits.bytes_to_read) @@ -92,7 +92,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid req.pop() } - when (io.out.fire()) { + when (io.out.fire) { bytesSent := bytesSent_next when (last_sending && bytesRead === (1.U << req.bits.lg_len_req).asUInt()) { @@ -101,18 +101,18 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid } } - when (io.req.fire()) { + when (io.req.fire) { req.push(io.req.bits) bytesRead := 0.U bytesSent := 0.U } - when (io.in.fire()) { - val current_bytesRead = Mux(io.req.fire(), 0.U, bytesRead) - val current_bytesDiscarded = Mux(io.req.fire(), 0.U, bytesDiscarded) - val current_usefulBytesRead = Mux(io.req.fire(), 0.U, usefulBytesRead) - val current_shift = Mux(io.req.fire(), io.req.bits.shift, req.bits.shift) - val current_lg_len_req = Mux(io.req.fire(), io.req.bits.lg_len_req, req.bits.lg_len_req) + when (io.in.fire) { + val current_bytesRead = Mux(io.req.fire, 0.U, bytesRead) + val current_bytesDiscarded = Mux(io.req.fire, 0.U, bytesDiscarded) + val current_usefulBytesRead = Mux(io.req.fire, 0.U, usefulBytesRead) + val current_shift = Mux(io.req.fire, io.req.bits.shift, req.bits.shift) + val current_lg_len_req = Mux(io.req.fire, io.req.bits.lg_len_req, req.bits.lg_len_req) val current_len_req = (1.U << current_lg_len_req).asUInt() when (current_shift - current_bytesDiscarded <= beatBytes.U /* && @@ -127,7 +127,7 @@ class BeatMerger[U <: Data](beatBits: Int, maxShift: Int, spadWidth: Int, accWid bytesRead := satAdd(current_bytesRead, beatBytes.U, current_len_req) - when (!io.req.fire() && bytesSent === req.bits.bytes_to_read && last_reading) { + when (!io.req.fire && bytesSent === req.bits.bytes_to_read && last_reading) { req.pop() } } diff --git a/src/main/scala/gemmini/CmdFSM.scala b/src/main/scala/gemmini/CmdFSM.scala index 5a3b51f9..8ee3a696 100644 --- a/src/main/scala/gemmini/CmdFSM.scala +++ b/src/main/scala/gemmini/CmdFSM.scala @@ -64,7 +64,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data] status := DontCare //========================================================================== - // Combinational Output Defaults + // Combinational Output Defaults //========================================================================== io.cmd.ready := false.B io.tiler.valid := false.B @@ -90,7 +90,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data] io.busy := (state === s_EX_PENDING) //========================================================================== - // FSM + // FSM //========================================================================== def reset_and_listen(): Unit = { // Reset all data-validity @@ -109,13 +109,13 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data] // Wait for tiling/ execution to complete, // let any further commands queue up io.tiler.valid := true.B - when (io.tiler.fire()) { + when (io.tiler.fire) { state := s_LISTENING } }.elsewhen (state === s_ERROR) { // In s_ERROR state - only update based on RESET commands io.cmd.ready := true.B - when (io.cmd.fire()) { + when (io.cmd.fire) { val cmd = io.cmd.bits val funct = cmd.inst.funct when (funct === RESET) { @@ -124,7 +124,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data] } }.otherwise { // s_LISTENING State io.cmd.ready := true.B - when (io.cmd.fire()) { + when (io.cmd.fire) { val cmd = io.cmd.bits val funct = cmd.inst.funct val rs1 = cmd.rs1 @@ -143,7 +143,7 @@ class CmdFSM[T <: Data: Arithmetic, U <: Data, V <: Data] // Signal to the Tiler, and move to our EXEC state // FIXME: check all valid io.tiler.valid := true.B - when (io.tiler.fire()) { + when (io.tiler.fire) { state := s_LISTENING }.otherwise { state := s_EX_PENDING diff --git a/src/main/scala/gemmini/Controller.scala b/src/main/scala/gemmini/Controller.scala index 74f23b4c..d1019ef1 100644 --- a/src/main/scala/gemmini/Controller.scala +++ b/src/main/scala/gemmini/Controller.scala @@ -18,7 +18,6 @@ class GemminiCmd(rob_entries: Int)(implicit p: Parameters) extends Bundle { val cmd = new RoCCCommand val rob_id = UDValid(UInt(log2Up(rob_entries).W)) - override def cloneType: this.type = new GemminiCmd(rob_entries).asInstanceOf[this.type] } class Gemmini[T <: Data : Arithmetic, U <: Data, V <: Data](val config: GemminiArrayConfig[T, U, V]) @@ -389,7 +388,7 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] .otherwise { reservation_station.io.alloc.valid := true.B - when(reservation_station.io.alloc.fire()) { + when(reservation_station.io.alloc.fire) { // compressed_cmd.ready := true.B unrolled_cmd.ready := true.B } @@ -414,5 +413,5 @@ class GemminiModule[T <: Data: Arithmetic, U <: Data, V <: Data] //========================================================================= // Performance Counters Access //========================================================================= - + } diff --git a/src/main/scala/gemmini/CounterFile.scala b/src/main/scala/gemmini/CounterFile.scala index 9f0482f7..35f50c20 100644 --- a/src/main/scala/gemmini/CounterFile.scala +++ b/src/main/scala/gemmini/CounterFile.scala @@ -155,7 +155,7 @@ class CounterIO(nPerfCounter: Int, counterWidth: Int) extends Bundle { // A simple counter file. Every counter is incremented when the corresponding event signal is high on rising edge. // There are two type of counters: Built-in counters and external counters. External counters have their value -// stored in other modules and can incremented by arbitary values. +// stored in other modules and can incremented by arbitary values. class CounterFile(nPerfCounter: Int, counterWidth: Int) extends Module { val io = IO(new CounterIO(nPerfCounter, counterWidth)) @@ -182,8 +182,8 @@ class CounterFile(nPerfCounter: Int, counterWidth: Int) extends Module } // Snapshot: In case a sequence of access instructions get interrupted (i.e. preempted by OS), it is possible // to take a snapshot when reading counter value by setting a bit in the instruction. All subsequent readings - // return the values from the snapshot until it is cleared by a instruction with "clear" bit marked. - // When the snapshot bit is set, the normal counters are still being incremented. + // return the values from the snapshot until it is cleared by a instruction with "clear" bit marked. + // When the snapshot bit is set, the normal counters are still being incremented. when (io.snapshot_reset) { snapshot_enable := false.B } .elsewhen (io.snapshot) { @@ -227,7 +227,7 @@ class CounterController(nPerfCounter: Int, counterWidth: Int)(implicit p: Parame val module = Module(new CounterFile(nPerfCounter: Int, counterWidth: Int)) module.io.event_io <> io.event_io - + val out_reg = Reg(io.out.bits.cloneType) val out_valid_reg = RegInit(false.B) @@ -243,16 +243,16 @@ class CounterController(nPerfCounter: Int, counterWidth: Int)(implicit p: Parame io.in.ready := !out_valid_reg module.io.addr := io.in.bits.rs1(nCounterIndexBit + 3, 4) - module.io.counter_reset := io.in.bits.rs1(0) & io.in.fire() - module.io.snapshot_reset := io.in.bits.rs1(1) & io.in.fire() - module.io.snapshot := io.in.bits.rs1(2) & io.in.fire() - module.io.config_address.valid := io.in.bits.rs1(3) & io.in.fire() + module.io.counter_reset := io.in.bits.rs1(0) & io.in.fire + module.io.snapshot_reset := io.in.bits.rs1(1) & io.in.fire + module.io.snapshot := io.in.bits.rs1(2) & io.in.fire + module.io.config_address.valid := io.in.bits.rs1(3) & io.in.fire module.io.config_address.bits := io.in.bits.rs1(17, 12) module.io.external := io.in.bits.rs1(31) - when (io.out.fire()) { + when (io.out.fire) { out_valid_reg := false.B - } .elsewhen (io.in.fire()) { + } .elsewhen (io.in.fire) { out_valid_reg := true.B out_reg.rd := io.in.bits.inst.rd out_reg.data := 0.U @@ -264,4 +264,4 @@ class CounterController(nPerfCounter: Int, counterWidth: Int)(implicit p: Parame } else { io <> DontCare } -} \ No newline at end of file +} diff --git a/src/main/scala/gemmini/DMA.scala b/src/main/scala/gemmini/DMA.scala index 5952be5b..9761228f 100644 --- a/src/main/scala/gemmini/DMA.scala +++ b/src/main/scala/gemmini/DMA.scala @@ -31,7 +31,6 @@ class StreamReadRequest[U <: Data](spad_rows: Int, acc_rows: Int, mvin_scale_t_b val block_stride = UInt(16.W) // TODO magic number val cmd_id = UInt(8.W) // TODO magic number - override def cloneType: StreamReadRequest.this.type = new StreamReadRequest(spad_rows, acc_rows, mvin_scale_t_bits).asInstanceOf[this.type] } class StreamReadResponse[U <: Data](spadWidth: Int, accWidth: Int, spad_rows: Int, acc_rows: Int, aligned_to: Int, mvin_scale_t_bits: Int) @@ -50,7 +49,6 @@ class StreamReadResponse[U <: Data](spadWidth: Int, accWidth: Int, spad_rows: In val bytes_read = UInt(8.W) // TODO magic number val cmd_id = UInt(8.W) // TODO magic number - override def cloneType: StreamReadResponse.this.type = new StreamReadResponse(spadWidth, accWidth, spad_rows, acc_rows, aligned_to, mvin_scale_t_bits).asInstanceOf[this.type] } class StreamReader[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, U, V], nXacts: Int, beatBits: Int, maxBytes: Int, spadWidth: Int, accWidth: Int, aligned_to: Int, @@ -83,8 +81,8 @@ class StreamReader[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T core.module.io.flush := io.flush xactTracker.io.alloc <> core.module.io.reserve - xactTracker.io.peek.xactid := RegEnableThru(core.module.io.beatData.bits.xactid, beatPacker.io.req.fire()) - xactTracker.io.peek.pop := beatPacker.io.in.fire() && core.module.io.beatData.bits.last + xactTracker.io.peek.xactid := RegEnableThru(core.module.io.beatData.bits.xactid, beatPacker.io.req.fire) + xactTracker.io.peek.pop := beatPacker.io.in.fire && core.module.io.beatData.bits.last core.module.io.beatData.ready := beatPacker.io.in.ready beatPacker.io.req.valid := core.module.io.beatData.valid @@ -101,12 +99,12 @@ class StreamReader[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T io.resp.bits.is_acc := beatPacker.io.out.bits.is_acc io.resp.bits.accumulate := beatPacker.io.out.bits.accumulate io.resp.bits.has_acc_bitwidth := beatPacker.io.out.bits.has_acc_bitwidth - io.resp.bits.scale := RegEnable(xactTracker.io.peek.entry.scale, beatPacker.io.req.fire()) - io.resp.bits.repeats := RegEnable(xactTracker.io.peek.entry.repeats, beatPacker.io.req.fire()) - io.resp.bits.pixel_repeats := RegEnable(xactTracker.io.peek.entry.pixel_repeats, beatPacker.io.req.fire()) - io.resp.bits.len := RegEnable(xactTracker.io.peek.entry.len, beatPacker.io.req.fire()) - io.resp.bits.cmd_id := RegEnable(xactTracker.io.peek.entry.cmd_id, beatPacker.io.req.fire()) - io.resp.bits.bytes_read := RegEnable(xactTracker.io.peek.entry.bytes_to_read, beatPacker.io.req.fire()) + io.resp.bits.scale := RegEnable(xactTracker.io.peek.entry.scale, beatPacker.io.req.fire) + io.resp.bits.repeats := RegEnable(xactTracker.io.peek.entry.repeats, beatPacker.io.req.fire) + io.resp.bits.pixel_repeats := RegEnable(xactTracker.io.peek.entry.pixel_repeats, beatPacker.io.req.fire) + io.resp.bits.len := RegEnable(xactTracker.io.peek.entry.len, beatPacker.io.req.fire) + io.resp.bits.cmd_id := RegEnable(xactTracker.io.peek.entry.cmd_id, beatPacker.io.req.fire) + io.resp.bits.bytes_read := RegEnable(xactTracker.io.peek.entry.bytes_to_read, beatPacker.io.req.fire) io.resp.bits.last := beatPacker.io.out.bits.last io.counter.collect(core.module.io.counter) @@ -270,7 +268,7 @@ class StreamReaderCore[T <: Data, U <: Data, V <: Data](config: GemminiArrayConf if (bytesRequested.getWidth >= log2Up(spadWidthBytes+1)) bytesRequested / spadWidthBytes.U else 0.U) io.reserve.entry.spad_row_offset := Mux(req.has_acc_bitwidth, bytesRequested % accWidthBytes.U, bytesRequested % spadWidthBytes.U) - when (untranslated_a.fire()) { + when (untranslated_a.fire) { val next_vaddr = req.vaddr + read_bytes_read // send_size val new_page = next_vaddr(pgIdxBits-1, 0) === 0.U req.vaddr := next_vaddr @@ -295,7 +293,7 @@ class StreamReaderCore[T <: Data, U <: Data, V <: Data](config: GemminiArrayConf // TODO the size data is already returned from TileLink, so there's no need for us to store it in the XactTracker ourselves // Accepting requests to kick-start the state machine - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits bytesRequested := 0.U @@ -312,7 +310,7 @@ class StreamReaderCore[T <: Data, U <: Data, V <: Data](config: GemminiArrayConf val total_bytes_read = RegInit(0.U(CounterExternal.EXTERNAL_WIDTH.W)) when (io.counter.external_reset) { total_bytes_read := 0.U - }.elsewhen (tl.d.fire()) { + }.elsewhen (tl.d.fire) { total_bytes_read := total_bytes_read + (1.U << tl.d.bits.size) } @@ -390,7 +388,7 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: val xactBusy_fire = WireInit(false.B) val xactBusy_add = Mux(xactBusy_fire, (1.U << xactId).asUInt(), 0.U) - val xactBusy_remove = ~Mux(tl.d.fire(), (1.U << tl.d.bits.source).asUInt(), 0.U) + val xactBusy_remove = ~Mux(tl.d.fire, (1.U << tl.d.bits.source).asUInt(), 0.U) xactBusy := (xactBusy | xactBusy_add) & xactBusy_remove.asUInt() val state_machine_ready_for_req = WireInit(state === s_idle) @@ -502,7 +500,7 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: } val untranslated_a = Wire(Decoupled(new TLBundleAWithInfo)) - xactBusy_fire := untranslated_a.fire() && state === s_writing_new_block + xactBusy_fire := untranslated_a.fire && state === s_writing_new_block untranslated_a.valid := (state === s_writing_new_block || state === s_writing_beats) && !xactBusy.andR() untranslated_a.bits.tl_a := Mux(write_full, putFull, putPartial) untranslated_a.bits.vaddr := write_vaddr @@ -521,7 +519,7 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: val tlb_q = Module(new Queue(new TLBundleAWithInfo, 1, pipe=true)) tlb_q.io.enq <> tlb_arb.io.out - io.tlb.req.valid := tlb_q.io.deq.fire() + io.tlb.req.valid := tlb_q.io.deq.fire io.tlb.req.bits.tlb_req.vaddr := tlb_q.io.deq.bits.vaddr io.tlb.req.bits.tlb_req.passthrough := false.B io.tlb.req.bits.tlb_req.size := 0.U // send_size @@ -543,11 +541,11 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: tl.a.valid := translate_q.io.deq.valid && !io.tlb.resp.miss tl.a.bits := translate_q.io.deq.bits.tl_a - tl.a.bits.address := RegEnableThru(io.tlb.resp.paddr, RegNext(io.tlb.req.fire())) + tl.a.bits.address := RegEnableThru(io.tlb.resp.paddr, RegNext(io.tlb.req.fire)) tl.d.ready := xactBusy.orR() - when (untranslated_a.fire()) { + when (untranslated_a.fire) { when (state === s_writing_new_block) { beatsLeft := write_beats - 1.U @@ -584,7 +582,7 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: } // Accepting requests to kick-start the state machine - when (io.req.fire()) { + when (io.req.fire) { val pooled = { val cols = dataWidth / inputType.getWidth val v1 = io.req.bits.data.asTypeOf(Vec(cols, inputType)) @@ -615,7 +613,7 @@ class StreamWriter[T <: Data: Arithmetic](nXacts: Int, beatBits: Int, maxBytes: // External counters val total_bytes_sent = RegInit(0.U(CounterExternal.EXTERNAL_WIDTH.W)) - when (tl.d.fire()) { + when (tl.d.fire) { total_bytes_sent := total_bytes_sent + (1.U << tl.d.bits.size) } diff --git a/src/main/scala/gemmini/DMACommandTracker.scala b/src/main/scala/gemmini/DMACommandTracker.scala index a687e918..3390cbdf 100644 --- a/src/main/scala/gemmini/DMACommandTracker.scala +++ b/src/main/scala/gemmini/DMACommandTracker.scala @@ -21,7 +21,6 @@ class DMACommandTracker[T <: Data](val nCmds: Int, val maxBytes: Int, tag_t: => val bytes_to_read = Input(UInt(log2Up(maxBytes+1).W)) val cmd_id = Output(cmd_id_t.cloneType) - override def cloneType: this.type = new BitsT(tag_t.cloneType, cmd_id_t.cloneType).asInstanceOf[this.type] } val bits = new BitsT(tag_t.cloneType, cmd_id_t.cloneType) @@ -34,7 +33,6 @@ class DMACommandTracker[T <: Data](val nCmds: Int, val maxBytes: Int, tag_t: => val bytes_read = UInt(log2Up(maxBytes+1).W) val cmd_id = cmd_id_t.cloneType - override def cloneType: this.type = new RequestReturnedT(cmd_id_t.cloneType).asInstanceOf[this.type] } val request_returned = Flipped(Valid(new RequestReturnedT(cmd_id_t.cloneType))) @@ -43,7 +41,6 @@ class DMACommandTracker[T <: Data](val nCmds: Int, val maxBytes: Int, tag_t: => val cmd_id = cmd_id_t.cloneType val tag = tag_t.cloneType - override def cloneType: this.type = new CmdCompletedT(cmd_id_t.cloneType, tag_t.cloneType).asInstanceOf[this.type] } val cmd_completed = Decoupled(new CmdCompletedT(cmd_id_t.cloneType, tag_t.cloneType)) @@ -85,7 +82,7 @@ class DMACommandTracker[T <: Data](val nCmds: Int, val maxBytes: Int, tag_t: => cmds(next_empty_alloc).bytes_left := io.alloc.bits.bytes_to_read } - when (io.request_returned.fire()) { + when (io.request_returned.fire) { val cmd_id = io.request_returned.bits.cmd_id cmds(cmd_id).bytes_left := cmds(cmd_id).bytes_left - io.request_returned.bits.bytes_read @@ -93,7 +90,7 @@ class DMACommandTracker[T <: Data](val nCmds: Int, val maxBytes: Int, tag_t: => assert(cmds(cmd_id).bytes_left >= io.request_returned.bits.bytes_read) } - when (io.cmd_completed.fire()) { + when (io.cmd_completed.fire) { cmds(io.cmd_completed.bits.cmd_id).valid := false.B } diff --git a/src/main/scala/gemmini/ExecuteController.scala b/src/main/scala/gemmini/ExecuteController.scala index 6891c09b..de85a31b 100644 --- a/src/main/scala/gemmini/ExecuteController.scala +++ b/src/main/scala/gemmini/ExecuteController.scala @@ -682,7 +682,7 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In } } is(flush) { - when(mesh.io.req.fire()) { + when(mesh.io.req.fire) { control_state := flushing } } @@ -810,9 +810,9 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In val accReadValid = VecInit(io.acc.read_resp.map(bank => ex_read_from_acc.B && bank.valid && !bank.bits.fromDMA)) val im2ColValid = io.im2col.resp.valid - mesh_cntl_signals_q.io.deq.ready := (!cntl.a_fire || mesh.io.a.fire() || !mesh.io.a.ready) && - (!cntl.b_fire || mesh.io.b.fire() || !mesh.io.b.ready) && - (!cntl.d_fire || mesh.io.d.fire() || !mesh.io.d.ready) && + mesh_cntl_signals_q.io.deq.ready := (!cntl.a_fire || mesh.io.a.fire || !mesh.io.a.ready) && + (!cntl.b_fire || mesh.io.b.fire || !mesh.io.b.ready) && + (!cntl.d_fire || mesh.io.d.fire || !mesh.io.d.ready) && (!cntl.first || mesh.io.req.ready) val dataA_valid = cntl.a_garbage || cntl.a_unpadded_cols === 0.U || Mux(cntl.im2colling, im2ColValid, Mux(cntl.a_read_from_acc, accReadValid(cntl.a_bank_acc), readValid(cntl.a_bank))) @@ -840,8 +840,8 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In val dataD = VecInit(dataD_unpadded.asTypeOf(Vec(block_size, inputType)).zipWithIndex.map { case (d, i) => Mux(i.U < cntl.d_unpadded_cols, d, inputType.zero)}) // Pop responses off the scratchpad io ports - when (mesh_cntl_signals_q.io.deq.fire()) { - when (cntl.a_fire && mesh.io.a.fire() && !cntl.a_garbage && cntl.a_unpadded_cols > 0.U && !cntl.im2colling) { + when (mesh_cntl_signals_q.io.deq.fire) { + when (cntl.a_fire && mesh.io.a.fire && !cntl.a_garbage && cntl.a_unpadded_cols > 0.U && !cntl.im2colling) { when (cntl.a_read_from_acc) { io.acc.read_resp(cntl.a_bank_acc).ready := !io.acc.read_resp(cntl.a_bank_acc).bits.fromDMA }.otherwise { @@ -849,7 +849,7 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In } } - when (cntl.b_fire && mesh.io.b.fire() && !cntl.b_garbage && !cntl.accumulate_zeros && cntl.b_unpadded_cols > 0.U) { + when (cntl.b_fire && mesh.io.b.fire && !cntl.b_garbage && !cntl.accumulate_zeros && cntl.b_unpadded_cols > 0.U) { when (cntl.b_read_from_acc) { io.acc.read_resp(cntl.b_bank_acc).ready := !io.acc.read_resp(cntl.b_bank_acc).bits.fromDMA }.otherwise { @@ -857,7 +857,7 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In } } - when (cntl.d_fire && mesh.io.d.fire() && !cntl.d_garbage && !cntl.preload_zeros && cntl.d_unpadded_cols > 0.U) { + when (cntl.d_fire && mesh.io.d.fire && !cntl.d_garbage && !cntl.preload_zeros && cntl.d_unpadded_cols > 0.U) { when (cntl.d_read_from_acc) { io.acc.read_resp(cntl.d_bank_acc).ready := !io.acc.read_resp(cntl.d_bank_acc).bits.fromDMA }.otherwise { @@ -882,7 +882,7 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In mesh.io.b.bits := dataB.asTypeOf(Vec(meshColumns, Vec(tileColumns, inputType))) mesh.io.d.bits := dataD.asTypeOf(Vec(meshColumns, Vec(tileColumns, inputType))) - mesh.io.req.valid := mesh_cntl_signals_q.io.deq.fire() && (cntl.a_fire || cntl.b_fire || cntl.d_fire) + mesh.io.req.valid := mesh_cntl_signals_q.io.deq.fire && (cntl.a_fire || cntl.b_fire || cntl.d_fire) mesh.io.req.bits.tag.addr := cntl.c_addr @@ -970,7 +970,7 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In //val complete_lock = RegInit(false.B) //Seah: added for WS accumulator - when(mesh.io.resp.fire() && mesh.io.resp.bits.tag.rob_id.valid) { + when(mesh.io.resp.fire && mesh.io.resp.bits.tag.rob_id.valid) { output_counter := wrappingAdd(output_counter, 1.U, w_total_output_rows) val last = mesh.io.resp.bits.last @@ -1005,29 +1005,29 @@ class ExecuteController[T <: Data, U <: Data, V <: Data](xLen: Int, tagWidth: In // Performance counter CounterEventIO.init(io.counter) io.counter.connectEventSignal(CounterEvent.EXE_ACTIVE_CYCLE, control_state === compute) - io.counter.connectEventSignal(CounterEvent.EXE_FLUSH_CYCLE, + io.counter.connectEventSignal(CounterEvent.EXE_FLUSH_CYCLE, control_state === flushing || control_state === flush) - io.counter.connectEventSignal(CounterEvent.EXE_CONTROL_Q_BLOCK_CYCLE, + io.counter.connectEventSignal(CounterEvent.EXE_CONTROL_Q_BLOCK_CYCLE, !mesh_cntl_signals_q.io.enq.ready && mesh_cntl_signals_q.io.enq.valid) - io.counter.connectEventSignal(CounterEvent.EXE_PRELOAD_HAZ_CYCLE, + io.counter.connectEventSignal(CounterEvent.EXE_PRELOAD_HAZ_CYCLE, cmd.valid(0) && DoPreloads(0) && cmd.valid(1) && raw_hazard_pre) - io.counter.connectEventSignal(CounterEvent.EXE_OVERLAP_HAZ_CYCLE, + io.counter.connectEventSignal(CounterEvent.EXE_OVERLAP_HAZ_CYCLE, cmd.valid(0) && DoPreloads(1) && cmd.valid(1) && DoComputes(0) && cmd.valid(2) && raw_hazard_mulpre) io.counter.connectEventSignal(CounterEvent.A_GARBAGE_CYCLES, cntl.a_garbage) io.counter.connectEventSignal(CounterEvent.B_GARBAGE_CYCLES, cntl.b_garbage) io.counter.connectEventSignal(CounterEvent.D_GARBAGE_CYCLES, cntl.d_garbage) - io.counter.connectEventSignal(CounterEvent.ACC_A_WAIT_CYCLE, - !(!cntl.a_fire || mesh.io.a.fire() || !mesh.io.a.ready) && cntl.a_read_from_acc && !cntl.im2colling) - io.counter.connectEventSignal(CounterEvent.ACC_B_WAIT_CYCLE, - !(!cntl.b_fire || mesh.io.b.fire() || !mesh.io.b.ready) && cntl.b_read_from_acc) - io.counter.connectEventSignal(CounterEvent.ACC_D_WAIT_CYCLE, - !(!cntl.d_fire || mesh.io.d.fire() || !mesh.io.d.ready) && cntl.d_read_from_acc) - io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_A_WAIT_CYCLE, - !(!cntl.a_fire || mesh.io.a.fire() || !mesh.io.a.ready) && !cntl.a_read_from_acc && !cntl.im2colling) - io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_B_WAIT_CYCLE, - !(!cntl.b_fire || mesh.io.b.fire() || !mesh.io.b.ready) && !cntl.b_read_from_acc) - io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_D_WAIT_CYCLE, - !(!cntl.d_fire || mesh.io.d.fire() || !mesh.io.d.ready) && !cntl.d_read_from_acc) + io.counter.connectEventSignal(CounterEvent.ACC_A_WAIT_CYCLE, + !(!cntl.a_fire || mesh.io.a.fire || !mesh.io.a.ready) && cntl.a_read_from_acc && !cntl.im2colling) + io.counter.connectEventSignal(CounterEvent.ACC_B_WAIT_CYCLE, + !(!cntl.b_fire || mesh.io.b.fire || !mesh.io.b.ready) && cntl.b_read_from_acc) + io.counter.connectEventSignal(CounterEvent.ACC_D_WAIT_CYCLE, + !(!cntl.d_fire || mesh.io.d.fire || !mesh.io.d.ready) && cntl.d_read_from_acc) + io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_A_WAIT_CYCLE, + !(!cntl.a_fire || mesh.io.a.fire || !mesh.io.a.ready) && !cntl.a_read_from_acc && !cntl.im2colling) + io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_B_WAIT_CYCLE, + !(!cntl.b_fire || mesh.io.b.fire || !mesh.io.b.ready) && !cntl.b_read_from_acc) + io.counter.connectEventSignal(CounterEvent.SCRATCHPAD_D_WAIT_CYCLE, + !(!cntl.d_fire || mesh.io.d.fire || !mesh.io.d.ready) && !cntl.d_read_from_acc) if (use_firesim_simulation_counters) { val ex_flush_cycle = control_state === flushing || control_state === flush diff --git a/src/main/scala/gemmini/FrontendTLB.scala b/src/main/scala/gemmini/FrontendTLB.scala index bc028ee9..6e7168e9 100644 --- a/src/main/scala/gemmini/FrontendTLB.scala +++ b/src/main/scala/gemmini/FrontendTLB.scala @@ -54,25 +54,27 @@ class DecoupledTLB(entries: Int, maxSize: Int, use_firesim_simulation_counters: tlb.io.sfence.bits.rs2 := false.B tlb.io.sfence.bits.addr := DontCare tlb.io.sfence.bits.asid := DontCare + tlb.io.sfence.bits.hv := false.B + tlb.io.sfence.bits.hg := false.B io.ptw <> tlb.io.ptw tlb.io.ptw.status := io.req.bits.status val exception = io.req.valid && Mux(io.req.bits.tlb_req.cmd === M_XRD, tlb.io.resp.pf.ld || tlb.io.resp.ae.ld, tlb.io.resp.pf.st || tlb.io.resp.ae.st) when (exception) { interrupt := true.B } - when (interrupt && tlb.io.sfence.fire()) { + when (interrupt && tlb.io.sfence.fire) { interrupt := false.B } assert(!io.exp.flush_retry || !io.exp.flush_skip, "TLB: flushing with both retry and skip at same time") CounterEventIO.init(io.counter) - io.counter.connectEventSignal(CounterEvent.DMA_TLB_HIT_REQ, io.req.fire() && !tlb.io.resp.miss) - io.counter.connectEventSignal(CounterEvent.DMA_TLB_TOTAL_REQ, io.req.fire()) + io.counter.connectEventSignal(CounterEvent.DMA_TLB_HIT_REQ, io.req.fire && !tlb.io.resp.miss) + io.counter.connectEventSignal(CounterEvent.DMA_TLB_TOTAL_REQ, io.req.fire) io.counter.connectEventSignal(CounterEvent.DMA_TLB_MISS_CYCLE, tlb.io.resp.miss) if (use_firesim_simulation_counters) { - PerfCounter(io.req.fire() && !tlb.io.resp.miss, "tlb_hits", "total number of tlb hits") - PerfCounter(io.req.fire(), "tlb_reqs", "total number of tlb reqs") + PerfCounter(io.req.fire && !tlb.io.resp.miss, "tlb_hits", "total number of tlb hits") + PerfCounter(io.req.fire, "tlb_reqs", "total number of tlb reqs") PerfCounter(tlb.io.resp.miss, "tlb_miss_cycles", "total number of cycles where the tlb is resolving a miss") } } @@ -123,7 +125,7 @@ class FrontendTLB(nClients: Int, entries: Int, maxSize: Int, use_tlb_register_fi val tlb = if (use_shared_tlb) tlbs.head else tlbs(i) val tlbReq = if (use_shared_tlb) tlbArbOpt.get.io.in(i).bits else tlb.io.req.bits val tlbReqValid = if (use_shared_tlb) tlbArbOpt.get.io.in(i).valid else tlb.io.req.valid - val tlbReqFire = if (use_shared_tlb) tlbArbOpt.get.io.in(i).fire() else tlb.io.req.fire() + val tlbReqFire = if (use_shared_tlb) tlbArbOpt.get.io.in(i).fire else tlb.io.req.fire tlbReqValid := RegNext(client.req.valid && !l0_tlb_hit) tlbReq := RegNext(client.req.bits) diff --git a/src/main/scala/gemmini/GemminiISA.scala b/src/main/scala/gemmini/GemminiISA.scala index 0b28316d..9cb15ac9 100644 --- a/src/main/scala/gemmini/GemminiISA.scala +++ b/src/main/scala/gemmini/GemminiISA.scala @@ -71,9 +71,6 @@ object GemminiISA { val num_cols = UInt(mvin_cols_bits.W) val _spacer0 = UInt((MVIN_RS2_ADDR_WIDTH - local_addr_t.getWidth).W) val local_addr = local_addr_t.cloneType - - override def cloneType: MvinRs2.this.type = - (new MvinRs2(mvin_rows_bits, mvin_cols_bits, local_addr_t)).asInstanceOf[this.type] } val MVOUT_RS2_ADDR_WIDTH = 32 @@ -87,9 +84,6 @@ object GemminiISA { val num_cols = UInt(mvout_cols_bits.W) val _spacer0 = UInt((MVOUT_RS2_ADDR_WIDTH - local_addr_t.getWidth).W) val local_addr = local_addr_t.cloneType - - override def cloneType: MvoutRs2.this.type = - (new MvoutRs2(mvout_rows_bits, mvout_cols_bits, local_addr_t)).asInstanceOf[this.type] } val CONFIG_MVIN_RS1_UNUSED_WIDTH = 2 @@ -111,9 +105,6 @@ object GemminiISA { val state_id = UInt(CONFIG_MVIN_RS1_STATE_ID_WIDTH.W) val shrink = UInt(CONFIG_MVIN_RS1_SHRINK_WIDTH.W) val _unused = UInt(CONFIG_MVIN_RS1_UNUSED_WIDTH.W) - - override def cloneType: ConfigMvinRs1.this.type = - (new ConfigMvinRs1(scale_bits, stride_bits, pixel_repeat_bits)).asInstanceOf[this.type] } val CONFIG_MVOUT_RS1_UNUSED_WIDTH = 2 @@ -142,8 +133,6 @@ object GemminiISA { val pool_stride = UInt(CONFIG_MVOUT_RS1_MAX_POOLING_STRIDE_WIDTH.W) val activation = UInt(CONFIG_MVOUT_RS1_ACTIVATION_WIDTH.W) val _unused = UInt(CONFIG_MVOUT_RS1_UNUSED_WIDTH.W) - - override def cloneType: ConfigMvoutRs1.this.type = (new ConfigMvoutRs1).asInstanceOf[this.type] } val CONFIG_MVOUT_RS2_ACC_SCALE_WIDTH = 32 @@ -154,9 +143,6 @@ object GemminiISA { val acc_scale = UInt(acc_scale_bits.W) val _spacer0 = UInt((CONFIG_MVOUT_RS2_STRIDE_WIDTH - stride_bits).W) val stride = UInt(stride_bits.W) - - override def cloneType: ConfigMvoutRs2.this.type = - (new ConfigMvoutRs2(acc_scale_bits, stride_bits)).asInstanceOf[this.type] } val CONFIG_EX_RS1_CMD_TYPE_WIDTH = 2 @@ -182,9 +168,6 @@ object GemminiISA { val activation = UInt(CONFIG_EX_RS1_ACTIVATION_WIDTH.W) val dataflow = UInt(CONFIG_EX_RS1_DATAFLOW_WIDTH.W) val cmd_type = UInt(CONFIG_EX_RS1_CMD_TYPE_WIDTH.W) - - override def cloneType: ConfigExRs1.this.type = - (new ConfigExRs1(acc_scale_bits)).asInstanceOf[this.type] } val CONFIG_EX_RS2_IN_SHIFT_WIDTH = 32 @@ -195,8 +178,6 @@ object GemminiISA { val c_stride = UInt(CONFIG_EX_RS2_C_STRIDE_WIDTH.W) val relu6_shift = UInt(CONFIG_EX_RS2_RELU6_SHIFT_WIDTH.W) val in_shift = UInt(CONFIG_EX_RS2_IN_SHIFT_WIDTH.W) - - override def cloneType: ConfigExRs2.this.type = (new ConfigExRs2).asInstanceOf[this.type] } val PRELOAD_RS_ADDR_WIDTH = 32 @@ -210,9 +191,6 @@ object GemminiISA { val num_cols = UInt(preload_cols_bits.W) val _spacer0 = UInt((PRELOAD_RS_ADDR_WIDTH - local_addr_t.getWidth).W) val local_addr = local_addr_t.cloneType - - override def cloneType: PreloadRs.this.type = - (new PreloadRs(preload_rows_bits, preload_cols_bits, local_addr_t)).asInstanceOf[this.type] } val COMPUTED_RS_ADDR_WIDTH = 32 @@ -226,9 +204,6 @@ object GemminiISA { val num_cols = UInt(compute_cols_bits.W) val _spacer0 = UInt((COMPUTED_RS_ADDR_WIDTH - local_addr_t.getWidth).W) val local_addr = local_addr_t.cloneType - - override def cloneType: ComputeRs.this.type = - (new ComputeRs(compute_rows_bits, compute_cols_bits, local_addr_t)).asInstanceOf[this.type] } } diff --git a/src/main/scala/gemmini/Im2Col.scala b/src/main/scala/gemmini/Im2Col.scala index 2c7f8cbf..a317902b 100644 --- a/src/main/scala/gemmini/Im2Col.scala +++ b/src/main/scala/gemmini/Im2Col.scala @@ -24,7 +24,6 @@ class Im2ColReadReq[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[ val weight_triple_bank = Bool() val start_inputting = Bool() //start_inputting_a - override def cloneType: Im2ColReadReq.this.type = new Im2ColReadReq(config).asInstanceOf[this.type] } @@ -38,7 +37,6 @@ class Im2ColReadResp[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig //added for sync val im2col_delay = Bool() - override def cloneType: Im2ColReadResp.this.type = new Im2ColReadResp(config).asInstanceOf[this.type] } diff --git a/src/main/scala/gemmini/InstructionCompression.scala b/src/main/scala/gemmini/InstructionCompression.scala index 38f373e5..96bc77ee 100644 --- a/src/main/scala/gemmini/InstructionCompression.scala +++ b/src/main/scala/gemmini/InstructionCompression.scala @@ -25,17 +25,17 @@ class InstCompressor(implicit p: Parameters) extends Module { fused_cmd.rs1 := Cat(buf(0).bits.rs1(31, 0), buf(1).bits.rs1(31, 0)) fused_cmd.rs2 := Cat(buf(0).bits.rs2(31, 0), buf(1).bits.rs2(31, 0)) - io.in.ready := !buf(0).valid || (buf(0).valid && is_preload && !buf(1).valid) || io.out.fire() + io.in.ready := !buf(0).valid || (buf(0).valid && is_preload && !buf(1).valid) || io.out.fire io.out.valid := (buf(0).valid && !is_preload) || (buf(0).valid && is_preload && buf(1).valid) io.out.bits := Mux(is_preload, fused_cmd, buf(0).bits) io.busy := buf(0).valid - when (io.out.fire()) { + when (io.out.fire) { buf.foreach(_.pop()) } - when (io.in.fire()) { + when (io.in.fire) { val waddr = Mux(buf(0).valid && is_preload && !buf(1).valid, 1.U, 0.U) buf(waddr).push(io.in.bits) } @@ -62,11 +62,11 @@ class InstDecompressor(rob_entries: Int)(implicit p: Parameters) extends Module unfused_cmd.cmd.rs1 := Mux(pushed_preload, cmd.rs1(31, 0), cmd.rs1(63, 32)) unfused_cmd.cmd.rs2 := Mux(pushed_preload, cmd.rs2(31, 0), cmd.rs2(63, 32)) - io.in.ready := !buf.valid || (io.out.fire() && !(is_compute && !pushed_preload)) + io.in.ready := !buf.valid || (io.out.fire && !(is_compute && !pushed_preload)) io.out.valid := buf.valid io.out.bits := Mux(is_compute, unfused_cmd, buf.bits) - when (io.out.fire()) { + when (io.out.fire) { when (is_compute && !pushed_preload) { pushed_preload := true.B }.otherwise { @@ -74,7 +74,7 @@ class InstDecompressor(rob_entries: Int)(implicit p: Parameters) extends Module } } - when (io.in.fire()) { + when (io.in.fire) { buf.push(io.in.bits) pushed_preload := false.B } diff --git a/src/main/scala/gemmini/LoadController.scala b/src/main/scala/gemmini/LoadController.scala index 49d7b409..2ebee2ca 100644 --- a/src/main/scala/gemmini/LoadController.scala +++ b/src/main/scala/gemmini/LoadController.scala @@ -116,7 +116,7 @@ class LoadController[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig Mux(io.dma.req.bits.has_acc_bitwidth, cols * actual_rows_read * config.accType.getWidth.U, cols * actual_rows_read * config.inputType.getWidth.U) / 8.U cmd_tracker.io.alloc.bits.tag.rob_id := cmd.bits.rob_id.bits - cmd_tracker.io.request_returned.valid := io.dma.resp.fire() // TODO use a bundle connect + cmd_tracker.io.request_returned.valid := io.dma.resp.fire // TODO use a bundle connect cmd_tracker.io.request_returned.bits.cmd_id := io.dma.resp.bits.cmd_id // TODO use a bundle connect cmd_tracker.io.request_returned.bits.bytes_read := io.dma.resp.bits.bytesRead cmd_tracker.io.cmd_completed.ready := io.completed.ready @@ -130,7 +130,7 @@ class LoadController[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig io.busy := cmd.valid || cmd_tracker.io.busy // Row counter - when (io.dma.req.fire()) { + when (io.dma.req.fire) { row_counter := wrappingAdd(row_counter, 1.U, actual_rows_read) assert(block_stride >= rows) @@ -150,19 +150,19 @@ class LoadController[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig } .elsewhen(DoLoad && cmd_tracker.io.alloc.fire()) { - control_state := Mux(io.dma.req.fire(), sending_rows, waiting_for_dma_req_ready) + control_state := Mux(io.dma.req.fire, sending_rows, waiting_for_dma_req_ready) } } } is (waiting_for_dma_req_ready) { - when (io.dma.req.fire()) { + when (io.dma.req.fire) { control_state := sending_rows } } is (sending_rows) { - val last_row = row_counter === 0.U || (row_counter === actual_rows_read-1.U && io.dma.req.fire()) + val last_row = row_counter === 0.U || (row_counter === actual_rows_read-1.U && io.dma.req.fire) when (last_row) { control_state := waiting_for_command diff --git a/src/main/scala/gemmini/LocalAddr.scala b/src/main/scala/gemmini/LocalAddr.scala index d6e4f309..92e46ffc 100644 --- a/src/main/scala/gemmini/LocalAddr.scala +++ b/src/main/scala/gemmini/LocalAddr.scala @@ -94,7 +94,6 @@ class LocalAddr(sp_banks: Int, sp_bank_entries: Int, acc_banks: Int, acc_bank_en data := ~(0.U(maxAddrBits.W)) } - override def cloneType: LocalAddr.this.type = new LocalAddr(sp_banks, sp_bank_entries, acc_banks, acc_bank_entries).asInstanceOf[this.type] } object LocalAddr { diff --git a/src/main/scala/gemmini/LoopConv.scala b/src/main/scala/gemmini/LoopConv.scala index cfb9bd8d..a50cc9ac 100644 --- a/src/main/scala/gemmini/LoopConv.scala +++ b/src/main/scala/gemmini/LoopConv.scala @@ -182,7 +182,7 @@ class LoopConvLdBias(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitwi // Sending outputs when (skip) { state := idle - }.elsewhen(command_p.io.in.fire()) { + }.elsewhen(command_p.io.in.fire) { when (state === config) { state := ld }.otherwise { @@ -202,7 +202,7 @@ class LoopConvLdBias(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitwi } // Accepting requests - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := config b := 0.U @@ -353,7 +353,7 @@ class LoopConvLdInput(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitw } // Sending outputs - when(command_p.io.in.fire()) { + when(command_p.io.in.fire) { when (state === config) { state := ld }.otherwise { @@ -379,7 +379,7 @@ class LoopConvLdInput(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitw } // Accepting requests - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := config b := 0.S @@ -527,7 +527,7 @@ class LoopConvLdWeight(block_size: Int, coreMaxAddrBits: Int, large_iterator_bit } // Sending outputs - when(command_p.io.in.fire()) { + when(command_p.io.in.fire) { when (state === config) { state := ld }.otherwise { @@ -550,7 +550,7 @@ class LoopConvLdWeight(block_size: Int, coreMaxAddrBits: Int, large_iterator_bit } // Accepting requests - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := config kch := 0.U @@ -759,12 +759,12 @@ class LoopConvExecute(block_size: Int, large_iterator_bitwidth: Int, small_itera } // Updating "new_weights" - when (state === comp && command_p.io.in.fire()) { + when (state === comp && command_p.io.in.fire) { new_weights := false.B } // Sending outputs - when (command_p.io.in.fire() || skip_iteration) { + when (command_p.io.in.fire || skip_iteration) { when (state === config) { state := pre }.elsewhen (state === pre) { @@ -804,7 +804,7 @@ class LoopConvExecute(block_size: Int, large_iterator_bitwidth: Int, small_itera } // Accepting requests - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := Mux(io.req.bits.trans_input_3120, config, pre) @@ -998,7 +998,7 @@ class LoopConvSt(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitwidth: // Sending outputs when (skip) { state := idle - }.elsewhen(command_p.io.in.fire()) { + }.elsewhen(command_p.io.in.fire) { when (req.no_pool) { val next_och = floorAdd(och, block_size.U, ochs) val next_ocol = floorAdd(ocol, block_size.U, ocols, next_och === 0.U) @@ -1029,7 +1029,7 @@ class LoopConvSt(block_size: Int, coreMaxAddrBits: Int, large_iterator_bitwidth: } // Accepting requests - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := Mux(io.req.bits.no_pool, st, pre_pool_config) @@ -1345,7 +1345,7 @@ class LoopConv (block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: I ld_bias.io.req.valid := !loop_requesting_ld_bias.ld_bias_started && loop_requesting_ld_bias.configured - when (ld_bias.io.req.fire()) { + when (ld_bias.io.req.fire) { loop_requesting_ld_bias.running := true.B loop_requesting_ld_bias.ld_bias_started := true.B @@ -1370,7 +1370,7 @@ class LoopConv (block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: I ld_input.io.req.valid := !loop_requesting_ld_input.ld_input_started && loop_requesting_ld_input.configured - when (ld_input.io.req.fire()) { + when (ld_input.io.req.fire) { loop_requesting_ld_input.running := true.B loop_requesting_ld_input.ld_input_started := true.B } @@ -1388,7 +1388,7 @@ class LoopConv (block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: I ld_weights.io.req.valid := !loop_requesting_ld_weights.ld_weights_started && loop_requesting_ld_weights.configured - when (ld_weights.io.req.fire()) { + when (ld_weights.io.req.fire) { loop_requesting_ld_weights.running := true.B loop_requesting_ld_weights.ld_weights_started := true.B } @@ -1412,7 +1412,7 @@ class LoopConv (block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: I ex.io.req.valid := !loop_requesting_ex.ex_started && loop_requesting_ex.ld_bias_started && loop_requesting_ex.ld_input_started && loop_requesting_ex.ld_weights_started && loop_requesting_ex.configured - when (ex.io.req.fire()) { + when (ex.io.req.fire) { loop_requesting_ex.running := true.B loop_requesting_ex.ex_started := true.B @@ -1435,7 +1435,7 @@ class LoopConv (block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: I st.io.req.valid := !loop_requesting_st.st_started && loop_requesting_st.ex_started && loop_requesting_st.configured - when (st.io.req.fire()) { + when (st.io.req.fire) { loop_requesting_st.running := true.B loop_requesting_st.st_started := true.B diff --git a/src/main/scala/gemmini/LoopMatmul.scala b/src/main/scala/gemmini/LoopMatmul.scala index 4841e2be..80ece4fc 100644 --- a/src/main/scala/gemmini/LoopMatmul.scala +++ b/src/main/scala/gemmini/LoopMatmul.scala @@ -90,7 +90,7 @@ class LoopMatmulLdA(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In io.loop_id := req.loop_id - when (io.cmd.fire()) { + when (io.cmd.fire) { // The order here is k, j, i val i_blocks = Mux(req.transpose, max_blocks, 1.U) val k_blocks = Mux(req.transpose, 1.U, max_blocks) @@ -106,7 +106,7 @@ class LoopMatmulLdA(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In } } - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := ld i := 0.U @@ -198,7 +198,7 @@ class LoopMatmulLdB(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In io.loop_id := req.loop_id - when (io.cmd.fire()) { + when (io.cmd.fire) { // The order here is k, j, i val j_blocks = Mux(req.transpose, 1.U, max_blocks) val k_blocks = Mux(req.transpose, max_blocks, 1.U) @@ -214,7 +214,7 @@ class LoopMatmulLdB(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In } } - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := ld j := 0.U @@ -296,7 +296,7 @@ class LoopMatmulLdD(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In when (req.dram_addr === 0.U) { state := idle - }.elsewhen (io.cmd.fire()) { + }.elsewhen (io.cmd.fire) { // The order here is k, j, i val next_i = floorAdd(i, 1.U, req.max_i) val next_j = floorAdd(j, max_blocks, req.max_j, next_i === 0.U) @@ -309,7 +309,7 @@ class LoopMatmulLdD(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In } } - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := ld j := 0.U @@ -450,7 +450,7 @@ class LoopMatmulExecute(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth io.loop_id := req.loop_id - when (io.cmd.fire()) { + when (io.cmd.fire) { when (state === pre) { state := comp }.otherwise { @@ -466,7 +466,7 @@ class LoopMatmulExecute(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth } } - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := pre j := 0.U @@ -566,7 +566,7 @@ class LoopMatmulStC(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In when (req.dram_addr === 0.U) { state := idle - }.elsewhen (io.cmd.fire()) { + }.elsewhen (io.cmd.fire) { // The order here is k, j, i val next_i = floorAdd(i, 1.U, req.max_i) val next_j = floorAdd(j, max_blocks, req.max_j, next_i === 0.U) @@ -579,7 +579,7 @@ class LoopMatmulStC(block_size: Int, coreMaxAddrBits: Int, iterator_bitwidth: In } } - when (io.req.fire()) { + when (io.req.fire) { req := io.req.bits state := st j := 0.U @@ -827,7 +827,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: ldA.io.req.valid := !loop_requesting_ldA.lda_started && loop_requesting_ldA.configured - when (ldA.io.req.fire()) { + when (ldA.io.req.fire) { loop_requesting_ldA.running := true.B loop_requesting_ldA.lda_started := true.B } @@ -846,7 +846,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: ldB.io.req.valid := !loop_requesting_ldB.ldb_started && loop_requesting_ldB.configured - when (ldB.io.req.fire()) { + when (ldB.io.req.fire) { loop_requesting_ldB.running := true.B loop_requesting_ldB.ldb_started := true.B } @@ -870,7 +870,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: ex.io.req.valid := !loop_requesting_ex.ex_started && loop_requesting_ex.lda_started && loop_requesting_ex.ldb_started && loop_requesting_ex.ldd_started && loop_requesting_ex.configured - when (ex.io.req.fire()) { + when (ex.io.req.fire) { loop_requesting_ex.running := true.B loop_requesting_ex.ex_started := true.B @@ -893,7 +893,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: ldD.io.req.valid := !loop_requesting_ldD.ldd_started && loop_requesting_ldD.configured - when (ldD.io.req.fire()) { + when (ldD.io.req.fire) { loop_requesting_ldD.running := true.B loop_requesting_ldD.ldd_started := true.B @@ -917,7 +917,7 @@ class LoopMatmul(block_size: Int, coreMaxAddrBits: Int, rob_size: Int, max_lds: stC.io.req.valid := !loop_requesting_st.st_started && loop_requesting_st.ex_started && loop_requesting_st.configured - when (stC.io.req.fire()) { + when (stC.io.req.fire) { loop_requesting_st.running := true.B loop_requesting_st.st_started := true.B diff --git a/src/main/scala/gemmini/LoopUnroller.scala b/src/main/scala/gemmini/LoopUnroller.scala index 47c33f68..02ac7d71 100644 --- a/src/main/scala/gemmini/LoopUnroller.scala +++ b/src/main/scala/gemmini/LoopUnroller.scala @@ -84,11 +84,11 @@ class LoopUnroller(block_size: Int)(implicit p: Parameters) extends Module { when (cmd.valid) { when (is_loop && (state === idle || state === preload)) { - when (io.out.fire()) { + when (io.out.fire) { state := compute } }.elsewhen(is_loop && state === compute) { - when (io.out.fire()) { + when (io.out.fire) { increment() state := Mux(last_iteration, idle, preload) cmd.ready := last_iteration diff --git a/src/main/scala/gemmini/MeshWithDelays.scala b/src/main/scala/gemmini/MeshWithDelays.scala index db40debf..edd28cf6 100644 --- a/src/main/scala/gemmini/MeshWithDelays.scala +++ b/src/main/scala/gemmini/MeshWithDelays.scala @@ -14,7 +14,6 @@ class MeshWithDelaysReq[T <: Data: Arithmetic, TagT <: TagQueueTag with Data](ac val tag = tagType val flush = UInt(2.W) // TODO magic number - override def cloneType: MeshWithDelaysReq.this.type = new MeshWithDelaysReq(accType, tagType, block_size).asInstanceOf[this.type] } class MeshWithDelaysResp[T <: Data: Arithmetic, TagT <: TagQueueTag with Data](outType: T, meshCols: Int, tileCols: Int, block_size: Int, tagType: TagT) extends Bundle { @@ -23,7 +22,6 @@ class MeshWithDelaysResp[T <: Data: Arithmetic, TagT <: TagQueueTag with Data](o val tag = tagType val last = Bool() - override def cloneType: MeshWithDelaysResp.this.type = new MeshWithDelaysResp(outType, meshCols, tileCols, block_size, tagType).asInstanceOf[this.type] } // TODO Add io.out.ready back in. Before it was removed, it didn't work when banking, and it seemed to assume that SRAM outputs stay steady when ren is low @@ -99,9 +97,9 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] val total_fires = req.bits.total_rows val fire_counter = RegInit(0.U(log2Up(block_size).W)) - val a_buf = RegEnable(io.a.bits, io.a.fire()) - val b_buf = RegEnable(io.b.bits, io.b.fire()) - val d_buf = RegEnable(io.d.bits, io.d.fire()) + val a_buf = RegEnable(io.a.bits, io.a.fire) + val b_buf = RegEnable(io.b.bits, io.b.fire) + val d_buf = RegEnable(io.d.bits, io.d.fire) val a_written = RegInit(false.B) val b_written = RegInit(false.B) @@ -113,7 +111,7 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] val last_fire = fire_counter === total_fires - 1.U && input_next_row_into_spatial_array - when (io.req.fire()) { + when (io.req.fire) { req.push(io.req.bits) in_prop := io.req.bits.pe_control.propagate ^ in_prop matmul_id := wrappingAdd(matmul_id, 1.U, max_simultaneous_matmuls) @@ -130,15 +128,15 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] fire_counter := wrappingAdd(fire_counter, 1.U, total_fires) } - when (io.a.fire()) { + when (io.a.fire) { a_written := true.B } - when (io.b.fire()) { + when (io.b.fire) { b_written := true.B } - when (io.d.fire()) { + when (io.d.fire) { d_written := true.B } @@ -216,14 +214,13 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] tag.make_this_garbage() } - override def cloneType: TagWithIdAndTotalRows.this.type = (new TagWithIdAndTotalRows).asInstanceOf[this.type] } val matmul_id_of_output = wrappingAdd(matmul_id, Mux(io.req.bits.pe_control.dataflow === Dataflow.OS.id.U, 3.U, 2.U), max_simultaneous_matmuls) val matmul_id_of_current = wrappingAdd(matmul_id, 1.U, max_simultaneous_matmuls) val tagq = Module(new TagQueue(new TagWithIdAndTotalRows, tagqlen)) - tagq.io.enq.valid := io.req.fire() && io.req.bits.flush === 0.U + tagq.io.enq.valid := io.req.fire && io.req.bits.flush === 0.U tagq.io.enq.bits.tag := io.req.bits.tag tagq.io.enq.bits.total_rows := DontCare tagq.io.enq.bits.id := matmul_id_of_output @@ -240,7 +237,7 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] tagq.io.deq.ready := io.resp.valid && io.resp.bits.last && out_matmul_id === tagq.io.deq.bits.id val total_rows_q = Module(new Queue(new TagWithIdAndTotalRows, tagqlen)) - total_rows_q.io.enq.valid := io.req.fire() && io.req.bits.flush === 0.U + total_rows_q.io.enq.valid := io.req.fire && io.req.bits.flush === 0.U total_rows_q.io.enq.bits.tag := DontCare total_rows_q.io.enq.bits.total_rows := io.req.bits.total_rows total_rows_q.io.enq.bits.id := matmul_id_of_current @@ -257,5 +254,5 @@ class MeshWithDelays[T <: Data: Arithmetic, U <: TagQueueTag with Data] req.valid := false.B } - assert(!(io.req.fire() && !tagq.io.enq.ready && io.req.bits.flush === 0.U)) + assert(!(io.req.fire && !tagq.io.enq.ready && io.req.bits.flush === 0.U)) } diff --git a/src/main/scala/gemmini/MultiHeadedQueue.scala b/src/main/scala/gemmini/MultiHeadedQueue.scala index c029f6f5..79900dfa 100644 --- a/src/main/scala/gemmini/MultiHeadedQueue.scala +++ b/src/main/scala/gemmini/MultiHeadedQueue.scala @@ -33,7 +33,7 @@ class MultiHeadedQueue[T <: Data](gen: T, entries: Int, heads: Int, maxpop: Int } // Pushing - when (io.enq.fire()) { + when (io.enq.fire) { regs(waddr) := io.enq.bits waddr := wrappingAdd(waddr, 1.U, entries) len := len + 1.U @@ -42,7 +42,7 @@ class MultiHeadedQueue[T <: Data](gen: T, entries: Int, heads: Int, maxpop: Int // Popping when(io.deq.pop > 0.U) { raddr := wrappingAdd(raddr, io.deq.pop, entries) - len := len - io.deq.pop + io.enq.fire() + len := len - io.deq.pop + io.enq.fire } assert(io.deq.pop <= len && io.deq.pop <= heads.U && io.deq.pop <= maxpop.U) diff --git a/src/main/scala/gemmini/MultiTailedQueue.scala b/src/main/scala/gemmini/MultiTailedQueue.scala index ea16728b..7a0bb3d7 100644 --- a/src/main/scala/gemmini/MultiTailedQueue.scala +++ b/src/main/scala/gemmini/MultiTailedQueue.scala @@ -4,7 +4,7 @@ import chisel3._ import chisel3.util._ import Util._ -class MultiTailedQueue[T <: Data](gen: T, entries: Int, maxpush: Int) +class MultiTailedQueue[T <: Data](gen: T, entries: Int, maxpush: Int) extends Module { val io = IO(new Bundle { val enq = new Bundle { @@ -36,10 +36,10 @@ class MultiTailedQueue[T <: Data](gen: T, entries: Int, maxpush: Int) // pop interface io.deq.bits := regs(raddr) io.deq.valid := (avail < entries.U) - raddr := wrappingAdd(raddr, io.deq.fire(), entries) + raddr := wrappingAdd(raddr, io.deq.fire, entries) // countgth calc - avail := avail - io.enq.push + io.deq.fire() + avail := avail - io.enq.push + io.deq.fire } object MultiTailedQueue { diff --git a/src/main/scala/gemmini/PE.scala b/src/main/scala/gemmini/PE.scala index e10318a3..5f7205bd 100644 --- a/src/main/scala/gemmini/PE.scala +++ b/src/main/scala/gemmini/PE.scala @@ -9,7 +9,6 @@ class PEControl[T <: Data : Arithmetic](accType: T) extends Bundle { val propagate = UInt(1.W) // Which register should be propagated (and which should be accumulated)? val shift = UInt(log2Up(accType.getWidth).W) // TODO this isn't correct for Floats - override def cloneType: PEControl.this.type = new PEControl(accType).asInstanceOf[this.type] } // TODO update documentation diff --git a/src/main/scala/gemmini/Pipeline.scala b/src/main/scala/gemmini/Pipeline.scala index 323686d1..0aeafd18 100644 --- a/src/main/scala/gemmini/Pipeline.scala +++ b/src/main/scala/gemmini/Pipeline.scala @@ -42,7 +42,7 @@ class Pipeline[T <: Data] (gen: T, latency: Int)(comb: Seq[T => T] = Seq.fill(la } } // When the pipeline stage behind you is valid then become true - when(io.in.fire()) { + when(io.in.fire) { valids.head := true.B } (valids.tail, valids.init).zipped.foreach { case (v2, v1) => @@ -52,7 +52,7 @@ class Pipeline[T <: Data] (gen: T, latency: Int)(comb: Seq[T => T] = Seq.fill(la } // Stages - when(io.in.fire()) { + when(io.in.fire) { stages.head := comb.head(io.in.bits) } io.out.bits := comb.last(stages.last) diff --git a/src/main/scala/gemmini/PixelRepeater.scala b/src/main/scala/gemmini/PixelRepeater.scala index e0eb4fd7..ddab4422 100644 --- a/src/main/scala/gemmini/PixelRepeater.scala +++ b/src/main/scala/gemmini/PixelRepeater.scala @@ -15,8 +15,6 @@ class PixelRepeaterReq[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_c val tag: Tag = tag_t.cloneType assert(block_cols <= 255, "len must be longer") - - override def cloneType: PixelRepeaterReq.this.type = new PixelRepeaterReq(t, laddr_t, block_cols, aligned_to, tag_t).asInstanceOf[this.type] } class PixelRepeaterResp[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_cols: Int, aligned_to: Int, tag_t: Tag) extends Bundle { @@ -25,8 +23,6 @@ class PixelRepeaterResp[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_ val laddr: LocalAddr = laddr_t.cloneType val last: Bool = Bool() val tag: Tag = tag_t.cloneType - - override def cloneType: PixelRepeaterResp.this.type = new PixelRepeaterResp(t, laddr_t, block_cols, aligned_to, tag_t).asInstanceOf[this.type] } class PixelRepeater[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_cols: Int, aligned_to: Int, tag_t: Tag, passthrough: Boolean) extends Module { @@ -75,7 +71,7 @@ class PixelRepeater[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_cols io.resp.valid := req.valid && !underflow - when(io.resp.fire() || underflow) { + when(io.resp.fire || underflow) { req.bits.pixel_repeats := req.bits.pixel_repeats - 1.U when(req.bits.pixel_repeats === 0.U) { @@ -83,12 +79,12 @@ class PixelRepeater[T <: Data, Tag <: Data](t: T, laddr_t: LocalAddr, block_cols } } - when(io.req.fire()) { + when(io.req.fire) { req.push(io.req.bits) req.bits.pixel_repeats := io.req.bits.pixel_repeats - 1.U } - when(reset.toBool()) { + when(reset.asBool()) { req.pop() } } diff --git a/src/main/scala/gemmini/ReservationStation.scala b/src/main/scala/gemmini/ReservationStation.scala index 7135969f..e8c6ed26 100644 --- a/src/main/scala/gemmini/ReservationStation.scala +++ b/src/main/scala/gemmini/ReservationStation.scala @@ -21,7 +21,6 @@ class ReservationStationIssue[T <: Data](cmd_t: T, rob_entries: Int) extends Bun def fire(dummy: Int=0) = valid && ready - override def cloneType: this.type = new ReservationStationIssue(cmd_t, rob_entries).asInstanceOf[this.type] } // TODO we don't need to store the full command in here. We should be able to release the command directly into the relevant controller and only store the associated metadata in the ROB. This would reduce the size considerably @@ -70,7 +69,7 @@ class ReservationStation[T <: Data : Arithmetic, U <: Data, V <: Data](config: G } val instructions_allocated = RegInit(0.U(32.W)) - when (io.alloc.fire()) { + when (io.alloc.fire) { instructions_allocated := instructions_allocated + 1.U } dontTouch(instructions_allocated) @@ -131,7 +130,7 @@ class ReservationStation[T <: Data : Arithmetic, U <: Data, V <: Data](config: G val new_partial_allocs = Wire(Vec(reservation_station_partial_entries, Bool())) new_partial_allocs.foreach(_ := false.B) val new_entry_oh = new_full_allocs ++ new_partial_allocs - val alloc_fire = io.alloc.fire() + val alloc_fire = io.alloc.fire val raws_probe = WireInit(0.U(rob_entries.W)) val waws_probe = WireInit(0.U(rob_entries.W)) @@ -367,7 +366,7 @@ class ReservationStation[T <: Data : Arithmetic, U <: Data, V <: Data](config: G new_full_allocs(full_alloc_id) := true.B } - when (io.alloc.fire()) { + when (io.alloc.fire) { when (new_entry.is_config && new_entry.q === exq && !is_im2col) { a_stride := new_entry.cmd.rs1(31, 16) // TODO magic numbers // TODO this needs to be kept in sync with ExecuteController.scala c_stride := new_entry.cmd.rs2(63, 48) // TODO magic numbers // TODO this needs to be kept in sync with ExecuteController.scala @@ -420,7 +419,7 @@ class ReservationStation[T <: Data : Arithmetic, U <: Data, V <: Data](config: G } // Mark entries as completed once they've returned - when (io.completed.fire()) { + when (io.completed.fire) { entries.foreach(_.bits.deps(io.completed.bits) := false.B) for ((e, i) <- entries.zipWithIndex) { @@ -461,7 +460,7 @@ class ReservationStation[T <: Data : Arithmetic, U <: Data, V <: Data](config: G val cycles_since_issue = RegInit(0.U(16.W)) - when (io.issue.ld.fire() || io.issue.st.fire() || io.issue.ex.fire() || !io.busy || io.completed.fire()) { + when (io.issue.ld.fire() || io.issue.st.fire() || io.issue.ex.fire() || !io.busy || io.completed.fire) { cycles_since_issue := 0.U }.elsewhen(io.busy) { cycles_since_issue := cycles_since_issue + 1.U diff --git a/src/main/scala/gemmini/Scratchpad.scala b/src/main/scala/gemmini/Scratchpad.scala index 2b97d5a9..bf3be036 100644 --- a/src/main/scala/gemmini/Scratchpad.scala +++ b/src/main/scala/gemmini/Scratchpad.scala @@ -24,7 +24,6 @@ class ScratchpadMemReadRequest[U <: Data](local_addr_t: LocalAddr, scale_t_bits: val cmd_id = UInt(8.W) // TODO don't use a magic number here val status = new MStatus - override def cloneType: this.type = new ScratchpadMemReadRequest(local_addr_t, scale_t_bits).asInstanceOf[this.type] } class ScratchpadMemWriteRequest(local_addr_t: LocalAddr, scale_t_bits: Int) @@ -45,7 +44,6 @@ class ScratchpadMemWriteRequest(local_addr_t: LocalAddr, scale_t_bits: Int) val pool_en = Bool() val store_en = Bool() - override def cloneType: this.type = new ScratchpadMemWriteRequest(local_addr_t, scale_t_bits).asInstanceOf[this.type] } class ScratchpadMemWriteResponse extends Bundle { @@ -61,7 +59,6 @@ class ScratchpadReadMemIO[U <: Data](local_addr_t: LocalAddr, scale_t_bits: Int) val req = Decoupled(new ScratchpadMemReadRequest(local_addr_t, scale_t_bits)) val resp = Flipped(Valid(new ScratchpadMemReadResponse)) - override def cloneType: this.type = new ScratchpadReadMemIO(local_addr_t, scale_t_bits).asInstanceOf[this.type] } class ScratchpadWriteMemIO(local_addr_t: LocalAddr, scale_t_bits: Int) @@ -69,7 +66,6 @@ class ScratchpadWriteMemIO(local_addr_t: LocalAddr, scale_t_bits: Int) val req = Decoupled(new ScratchpadMemWriteRequest(local_addr_t, scale_t_bits)) val resp = Flipped(Valid(new ScratchpadMemWriteResponse)) - override def cloneType: this.type = new ScratchpadWriteMemIO(local_addr_t, scale_t_bits).asInstanceOf[this.type] } class ScratchpadReadReq(val n: Int) extends Bundle { @@ -142,7 +138,7 @@ class ScratchpadBank(n: Int, w: Int, aligned_to: Int, single_ported: Boolean, us } val raddr = io.read.req.bits.addr - val ren = io.read.req.fire() + val ren = io.read.req.fire val rdata = if (single_ported) { assert(!(ren && io.write.en)) read(raddr, ren && !io.write.en).asUInt() @@ -158,7 +154,7 @@ class ScratchpadBank(n: Int, w: Int, aligned_to: Int, single_ported: Boolean, us q.io.enq.bits.data := rdata q.io.enq.bits.fromDMA := RegNext(fromDMA) - val q_will_be_empty = (q.io.count +& q.io.enq.fire()) - q.io.deq.fire() === 0.U + val q_will_be_empty = (q.io.count +& q.io.enq.fire) - q.io.deq.fire === 0.U io.read.req.ready := q_will_be_empty && !singleport_busy_with_write io.read.resp <> q.io.deq @@ -292,7 +288,7 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, io.dma.write.resp.valid := false.B io.dma.write.resp.bits.cmd_id := write_dispatch_q.bits.cmd_id - when (write_dispatch_q.bits.laddr.is_garbage() && write_dispatch_q.fire()) { + when (write_dispatch_q.bits.laddr.is_garbage() && write_dispatch_q.fire) { io.dma.write.resp.valid := true.B } @@ -397,9 +393,9 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, reader.module.io.resp.ready := Mux(reader.module.io.resp.bits.is_acc && reader.module.io.resp.bits.has_acc_bitwidth, mvin_scale_acc_in.ready, mvin_scale_in.ready) - val mvin_scale_finished = mvin_scale_pixel_repeater.io.resp.fire() && mvin_scale_pixel_repeater.io.resp.bits.last - val mvin_scale_acc_finished = mvin_scale_acc_out.fire() && mvin_scale_acc_out.bits.last - val zero_writer_finished = zero_writer_pixel_repeater.io.resp.fire() && zero_writer_pixel_repeater.io.resp.bits.last + val mvin_scale_finished = mvin_scale_pixel_repeater.io.resp.fire && mvin_scale_pixel_repeater.io.resp.bits.last + val mvin_scale_acc_finished = mvin_scale_acc_out.fire && mvin_scale_acc_out.bits.last + val zero_writer_finished = zero_writer_pixel_repeater.io.resp.fire && zero_writer_pixel_repeater.io.resp.bits.last val zero_writer_bytes_read = Mux(zero_writer_pixel_repeater.io.resp.bits.laddr.is_acc_addr, zero_writer_pixel_repeater.io.resp.bits.tag.cols * (accType.getWidth / 8).U, @@ -460,7 +456,7 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, bio.read.req.bits.addr := write_dispatch_q.bits.laddr.sp_row() bio.read.req.bits.fromDMA := true.B - when (bio.read.req.fire()) { + when (bio.read.req.fire) { write_dispatch_q.ready := true.B write_scale_q.io.enq.valid := true.B @@ -485,7 +481,7 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, dma_read_pipe.ready := writer.module.io.req.ready && !write_issue_q.io.deq.bits.laddr.is_acc_addr && write_issue_q.io.deq.bits.laddr.sp_bank() === i.U && // I believe we don't need to check that write_issue_q is valid here, because if the SRAM's resp is valid, then that means that the write_issue_q's deq should also be valid !write_issue_q.io.deq.bits.laddr.is_garbage() - when (dma_read_pipe.fire()) { + when (dma_read_pipe.fire) { writeData.valid := true.B writeData.bits := dma_read_pipe.bits.data } @@ -630,7 +626,7 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, bio.read.req.bits.scale := write_dispatch_q.bits.acc_scale.asTypeOf(bio.read.req.bits.scale) bio.read.req.bits.fromDMA := true.B - when (bio.read.req.fire()) { + when (bio.read.req.fire) { write_dispatch_q.ready := true.B write_scale_q.io.enq.valid := true.B @@ -701,10 +697,10 @@ class Scratchpad[T <: Data, U <: Data, V <: Data](config: GemminiArrayConfig[T, val consecutive_write_block = RegInit(false.B) if (acc_singleported) { val consecutive_write_sub_bank = RegInit(0.U((1 max log2Ceil(acc_sub_banks)).W)) - when (bio.write.fire() && bio.write.bits.acc && + when (bio.write.fire && bio.write.bits.acc && (bio.write.bits.addr(log2Ceil(acc_sub_banks)-1,0) === consecutive_write_sub_bank)) { consecutive_write_block := true.B - } .elsewhen (bio.write.fire() && bio.write.bits.acc) { + } .elsewhen (bio.write.fire && bio.write.bits.acc) { consecutive_write_block := false.B consecutive_write_sub_bank := bio.write.bits.addr(log2Ceil(acc_sub_banks)-1,0) } .otherwise { diff --git a/src/main/scala/gemmini/SharedExtMem.scala b/src/main/scala/gemmini/SharedExtMem.scala index 9d0e1802..f3acdd2e 100644 --- a/src/main/scala/gemmini/SharedExtMem.scala +++ b/src/main/scala/gemmini/SharedExtMem.scala @@ -20,7 +20,6 @@ class ExtMemIO extends Bundle { class ExtSpadMemIO(sp_banks: Int, acc_banks: Int, acc_sub_banks: Int) extends Bundle { val spad = Vec(sp_banks, new ExtMemIO) val acc = Vec(acc_banks, Vec(acc_sub_banks, new ExtMemIO)) - override def cloneType: this.type = new ExtSpadMemIO(sp_banks, acc_banks, acc_sub_banks).asInstanceOf[this.type] } diff --git a/src/main/scala/gemmini/StoreController.scala b/src/main/scala/gemmini/StoreController.scala index 28de72c3..e2c82dd5 100644 --- a/src/main/scala/gemmini/StoreController.scala +++ b/src/main/scala/gemmini/StoreController.scala @@ -156,7 +156,7 @@ class StoreController[T <: Data : Arithmetic, U <: Data, V <: Data](config: Gemm cmd_tracker.io.alloc.bits.bytes_to_read := Mux(!pooling_is_enabled, Mux(mvout_1d_enabled, mvout_1d_rows, rows*blocks), pool_total_rows) // TODO do we have to add upad and lpad to this? cmd_tracker.io.alloc.bits.tag.rob_id := cmd.bits.rob_id.bits - cmd_tracker.io.request_returned.valid := io.dma.resp.fire() // TODO use a bundle connect + cmd_tracker.io.request_returned.valid := io.dma.resp.fire // TODO use a bundle connect cmd_tracker.io.request_returned.bits.cmd_id := io.dma.resp.bits.cmd_id // TODO use a bundle connect cmd_tracker.io.request_returned.bits.bytes_read := 1.U cmd_tracker.io.cmd_completed.ready := io.completed.ready @@ -170,7 +170,7 @@ class StoreController[T <: Data : Arithmetic, U <: Data, V <: Data](config: Gemm io.busy := cmd.valid || cmd_tracker.io.busy // Row counter - when (io.dma.req.fire()) { + when (io.dma.req.fire) { when (!pooling_is_enabled) { //where does rows come from? //row_counter := wrappingAdd(row_counter, 1.U, rows) @@ -223,20 +223,20 @@ class StoreController[T <: Data : Arithmetic, U <: Data, V <: Data](config: Gemm } .elsewhen(DoStore && cmd_tracker.io.alloc.fire()) { val next_state = Mux(pooling_is_enabled, pooling, sending_rows) - control_state := Mux(io.dma.req.fire(), next_state, waiting_for_dma_req_ready) + control_state := Mux(io.dma.req.fire, next_state, waiting_for_dma_req_ready) } } } is (waiting_for_dma_req_ready) { - when (io.dma.req.fire()) { + when (io.dma.req.fire) { control_state := Mux(pooling_is_enabled, pooling, sending_rows) } } is (sending_rows) { - val last_block = block_counter === blocks - 1.U && io.dma.req.fire() - val last_row = Mux(mvout_1d_enabled, row_counter === mvout_1d_rows - 1.U, row_counter === rows - 1.U) && io.dma.req.fire() + val last_block = block_counter === blocks - 1.U && io.dma.req.fire + val last_row = Mux(mvout_1d_enabled, row_counter === mvout_1d_rows - 1.U, row_counter === rows - 1.U) && io.dma.req.fire //normal mvout: row, 1D mvout: orows*ocols val only_one_dma_req = block_counter === 0.U && row_counter === 0.U // This is a special case when only one DMA request is made @@ -251,7 +251,7 @@ class StoreController[T <: Data : Arithmetic, U <: Data, V <: Data](config: Gemm // TODO Is it really possible for all the counters to be 0 here? val last_row = (porow_counter === 0.U && pocol_counter === 0.U && wrow_counter === 0.U && wcol_counter === 0.U) || (porow_counter === pool_porows - 1.U && pocol_counter === pool_pocols - 1.U && - wrow_counter === pool_size - 1.U && wcol_counter === pool_size - 1.U && io.dma.req.fire()) + wrow_counter === pool_size - 1.U && wcol_counter === pool_size - 1.U && io.dma.req.fire) when (last_row) { control_state := waiting_for_command diff --git a/src/main/scala/gemmini/SyncMem.scala b/src/main/scala/gemmini/SyncMem.scala index 799e45c5..43200015 100644 --- a/src/main/scala/gemmini/SyncMem.scala +++ b/src/main/scala/gemmini/SyncMem.scala @@ -10,7 +10,6 @@ class SinglePortedSyncMemIO[T <: Data](n: Int, t: T) extends Bundle { val wen = Input(Bool()) val ren = Input(Bool()) - override def cloneType = (new SinglePortedSyncMemIO(n, t)).asInstanceOf[this.type] } class SinglePortSyncMem[T <: Data](n: Int, t: T) extends Module { diff --git a/src/main/scala/gemmini/TagQueue.scala b/src/main/scala/gemmini/TagQueue.scala index 57b9cf4e..9a6464c3 100644 --- a/src/main/scala/gemmini/TagQueue.scala +++ b/src/main/scala/gemmini/TagQueue.scala @@ -28,19 +28,19 @@ class TagQueue[T <: Data with TagQueueTag](t: T, entries: Int) extends Module { io.deq.bits := regs(raddr) io.all := regs - when (io.enq.fire()) { + when (io.enq.fire) { regs(waddr) := io.enq.bits waddr := wrappingAdd(waddr, 1.U, entries) } - when (io.deq.fire()) { + when (io.deq.fire) { regs(raddr).make_this_garbage() raddr := wrappingAdd(raddr, 1.U, entries) } - when (io.enq.fire() && !io.deq.fire()) { + when (io.enq.fire && !io.deq.fire) { len := len + 1.U - }.elsewhen(!io.enq.fire() && io.deq.fire()) { + }.elsewhen(!io.enq.fire && io.deq.fire) { len := len - 1.U } diff --git a/src/main/scala/gemmini/TilerController.scala b/src/main/scala/gemmini/TilerController.scala index 87ebff98..f3275790 100644 --- a/src/main/scala/gemmini/TilerController.scala +++ b/src/main/scala/gemmini/TilerController.scala @@ -26,8 +26,6 @@ class TilerCmd(OTYPE_BITS_IDX: Int) val repeating_bias = Bool() val status = new MStatus - override def cloneType: this.type = - (new TilerCmd(OTYPE_BITS_IDX)).asInstanceOf[this.type] } diff --git a/src/main/scala/gemmini/TilerFSM.scala b/src/main/scala/gemmini/TilerFSM.scala index b60a5991..db400f96 100644 --- a/src/main/scala/gemmini/TilerFSM.scala +++ b/src/main/scala/gemmini/TilerFSM.scala @@ -107,7 +107,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] // combinational calculation of optimal output-groups. this is updated at // the s_IDLE -> s_RESET_OUTPUT_GROUP state transition //------------------------------------------------------------------------ - val g_OG_DIM_SELECT = OG_HEIGHT_MAP.zipWithIndex.map{ case(h,i) => + val g_OG_DIM_SELECT = OG_HEIGHT_MAP.zipWithIndex.map{ case(h,i) => val w = TOTAL_ACC_TILES/h if (h < w) WireDefault(g_TILE_ROW_END < h.U) else if(h > w) WireDefault(g_TILE_COL_END < w.U) @@ -198,9 +198,9 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] // continuous assigns (only added in the switch-cases that call this!) def update_tile_dims(dummy: Int = 0) = { - gbl_item_rows := Mux(gbl_tile_row_n === g_TILE_ROW_END, + gbl_item_rows := Mux(gbl_tile_row_n === g_TILE_ROW_END, g_LAST_M_ITEMS, DIM.U) - gbl_item_cols := Mux(gbl_tile_col_n === g_TILE_COL_END, + gbl_item_cols := Mux(gbl_tile_col_n === g_TILE_COL_END, g_LAST_N_ITEMS, DIM.U) loop2_k_item_dims := Mux(loop2_k_tile_col_n === g_K_TILE_COL_END, g_LAST_K_ITEMS, DIM.U) @@ -246,7 +246,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] g_LAST_N_ITEMS := Mux(cmd.n(LOG2_DIM-1,0).orR,cmd.n(LOG2_DIM-1,0),DIM.U) g_LAST_K_ITEMS := Mux(cmd.k(LOG2_DIM-1,0).orR,cmd.k(LOG2_DIM-1,0),DIM.U) - g_TILE_ROW_END := (cmd.m >> LOG2_DIM) + cmd.m(LOG2_DIM-1,0).orR - 1.U + g_TILE_ROW_END := (cmd.m >> LOG2_DIM) + cmd.m(LOG2_DIM-1,0).orR - 1.U g_TILE_COL_END := (cmd.n >> LOG2_DIM) + cmd.n(LOG2_DIM-1,0).orR - 1.U g_K_TILE_COL_END := (cmd.k >> LOG2_DIM) + cmd.k(LOG2_DIM-1,0).orR - 1.U @@ -256,7 +256,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] // issue gemmini commands // NOTE: the "h10000".U(17) is because a_addr_stride was added to ExecuteController - when(io.cmd_in.fire()) { + when(io.cmd_in.fire) { sched.push := 2.U sched.bits(0).inst.funct := CONFIG_CMD sched.bits(0).rs1 := (g_ACC_OUT_RSHIFT << 32) | @@ -639,7 +639,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] val l_did_row_incr = WireDefault(false.B) val l_did_col_incr = WireDefault(false.B) - when (gbl_tile_col === g_TILE_COL_END && + when (gbl_tile_col === g_TILE_COL_END && gbl_tile_row === g_TILE_ROW_END) { // update next state state := s_IDLE @@ -658,7 +658,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] update_tile_dims() l_did_col_incr := true.B } - + // reset global state that resets for each new output-group gbl_CD_acc_row_addr := 0.U @@ -672,11 +672,11 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] loop1_tile_col_start := l_tile_col_start loop1_tile_col_end := l_tile_col_end - + loop1_tile_row_start := l_tile_row_start loop1_tile_row_end := l_tile_row_end - - + + // update all derived pointers to matrices in memory when(l_did_row_incr) { loop1_A_mem_addr := g_A_MEM_ADDR + (l_tile_row_start * @@ -693,7 +693,7 @@ class TilerFSM[T <: Data : Arithmetic, U <: Data, V <: Data] loop1_A_mem_addr := loop1_A_mem_addr + 0.U loop1_B_mem_addr := loop1_B_mem_addr + g_I_BYTE_COLS_PER_GROUP loop1_C_mem_addr := loop1_C_mem_addr + g_I_BYTE_COLS_PER_GROUP - loop1_D_mem_addr := loop1_D_mem_addr + + loop1_D_mem_addr := loop1_D_mem_addr + Mux(!g_HAS_BIAS, 0.U, g_O_BYTE_COLS_PER_GROUP) } diff --git a/src/main/scala/gemmini/TilerScheduler.scala b/src/main/scala/gemmini/TilerScheduler.scala index a1225ed7..c09ff949 100644 --- a/src/main/scala/gemmini/TilerScheduler.scala +++ b/src/main/scala/gemmini/TilerScheduler.scala @@ -48,9 +48,9 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] val is_acc = Bool() val start = UInt(30.W) // TODO magic number val end = UInt(30.W) // TODO magic number - def overlaps(other: SPRange) = valid && other.valid && + def overlaps(other: SPRange) = valid && other.valid && (is_acc === other.is_acc) && - (start < other.end) && + (start < other.end) && (end > other.start) } @@ -93,14 +93,14 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] val new_entry = Wire(new Entry) new_entry := DontCare - val new_entry_id = MuxCase((ROB_ENTRIES-1).U, entries.zipWithIndex.map { + val new_entry_id = MuxCase((ROB_ENTRIES-1).U, entries.zipWithIndex.map { case (e, i) => !e.valid -> i.U }) - val alloc_fire = io.cmd_in.fire() + val alloc_fire = io.cmd_in.fire - when (io.cmd_in.fire()) { + when (io.cmd_in.fire) { val cmd = io.cmd_in.bits val funct = cmd.inst.funct - val funct_is_compute = funct === COMPUTE_AND_STAY_CMD || + val funct_is_compute = funct === COMPUTE_AND_STAY_CMD || funct === COMPUTE_AND_FLIP_CMD val funct_is_compute_preload = funct === COMPUTE_AND_FLIP_CMD val config_cmd_type = cmd.rs1(1,0) // TODO magic numbers @@ -121,22 +121,22 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] new_entry.op2.valid := funct_is_compute || funct === STORE_CMD new_entry.op2.is_acc := cmd.rs2(31) new_entry.op2.start := cmd.rs2(29,0) - new_entry.op2.end := cmd.rs2(29,0) + + new_entry.op2.end := cmd.rs2(29,0) + Mux(funct_is_compute, DIM.U, mvin_mvout_rows) new_entry.dst.valid := funct === PRELOAD_CMD || funct === LOAD_CMD new_entry.dst.is_acc := cmd.rs2(31) new_entry.dst.start := cmd.rs2(29,0) - new_entry.dst.end := cmd.rs2(29,0) + - Mux(funct === PRELOAD_CMD, DIM.U, + new_entry.dst.end := cmd.rs2(29,0) + + Mux(funct === PRELOAD_CMD, DIM.U, mvin_mvout_rows) - val is_load = (funct === LOAD_CMD) || + val is_load = (funct === LOAD_CMD) || (funct === CONFIG_CMD && config_cmd_type === CONFIG_LOAD) - val is_store = (funct === STORE_CMD) || + val is_store = (funct === STORE_CMD) || (funct === CONFIG_CMD && config_cmd_type === CONFIG_STORE) val is_exec = funct === PRELOAD_CMD || - funct_is_compute || + funct_is_compute || (funct === CONFIG_CMD && config_cmd_type === CONFIG_EX) val is_preload = funct === PRELOAD_CMD @@ -153,22 +153,22 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] when(new_entry.is_config) { when (new_entry.is_load) { printf( - "cycle[%d], entry[%d], accept[%d], config_mvin[stride=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + "cycle[%d], entry[%d], accept[%d], config_mvin[stride=%x]\n", + debug_cycle, new_entry_id, cmd_id.value, new_entry.cmd.rs2) } .elsewhen (new_entry.is_store) { printf( - "cycle[%d], entry[%d], accept[%d], config_mvout[stride=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + "cycle[%d], entry[%d], accept[%d], config_mvout[stride=%x]\n", + debug_cycle, new_entry_id, cmd_id.value, new_entry.cmd.rs2) } .otherwise { assert(new_entry.is_exec) printf( "cycle[%d], entry[%d], accept[%d], " + - "config_ex[matmul_rshift=%x, acc_rshift=%x, relu6_lshift=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + "config_ex[matmul_rshift=%x, acc_rshift=%x, relu6_lshift=%x]\n", + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1(63,32), cmd.rs2(31,0), cmd.rs2(63,32)) } } @@ -176,20 +176,20 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] printf( "cycle[%d], entry[%d], accept[%d], " + "mvin[dram=%x, spad=%x, rows=%x, cols=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1, cmd.rs2(31,0), cmd.rs2(63,48), cmd.rs2(47,32)) } .elsewhen (new_entry.is_store) { printf( - "cycle[%d], entry[%d], accept[%d], " + + "cycle[%d], entry[%d], accept[%d], " + "mvout[dram=%x, spad=%x, rows=%x, cols=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1, cmd.rs2(31,0), cmd.rs2(63,48), cmd.rs2(47,32)) } .elsewhen (new_entry.is_preload) { printf( "cycle[%d], entry[%d], accept[%d], preload[B=%x, C=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1(31,0), cmd.rs2(31,0)) } .otherwise { @@ -197,13 +197,13 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] when (funct_is_compute_preload) { printf( "cycle[%d], entry[%d], accept[%d], ex.pre[A=%x, D=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1(31,0), cmd.rs2(31,0)) } .otherwise { printf( "cycle[%d], entry[%d], accept[%d], ex.acc[A=%x, D=%x]\n", - debug_cycle, new_entry_id, cmd_id.value, + debug_cycle, new_entry_id, cmd_id.value, cmd.rs1(31,0), cmd.rs2(31,0)) } } @@ -228,12 +228,12 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] )} // We search for all entries which write to an address that we write to - val waws = entries.map { e => e.valid && + val waws = entries.map { e => e.valid && new_entry.dst.overlaps(e.bits.dst) } - val older_in_same_q = entries.map { e => e.valid && - e.bits.q === new_entry.q && + val older_in_same_q = entries.map { e => e.valid && + e.bits.q === new_entry.q && !e.bits.issued } @@ -247,11 +247,11 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] (new_entry.q === exq && new_entry.is_config) } - new_entry.deps := (Cat(raws) | - Cat(wars) | - Cat(waws) | + new_entry.deps := (Cat(raws) | + Cat(wars) | + Cat(waws) | Cat(older_in_same_q) | - Cat(is_st_and_must_wait_for_prior_ex_config) | + Cat(is_st_and_must_wait_for_prior_ex_config) | Cat(is_ex_config_and_must_wait_for_prior_st) ).asBools().reverse @@ -264,20 +264,20 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] } // Issue commands which are ready to be issued - Seq((ldq, io.issue.load), - (stq, io.issue.store), + Seq((ldq, io.issue.load), + (stq, io.issue.store), (exq, io.issue.exec)).foreach { case (q, io) => - val issue_id = MuxCase((ROB_ENTRIES-1).U, entries.zipWithIndex.map { - case (e, i) => (e.valid && e.bits.ready() && + val issue_id = MuxCase((ROB_ENTRIES-1).U, entries.zipWithIndex.map { + case (e, i) => (e.valid && e.bits.ready() && !e.bits.issued && e.bits.q === q) -> i.U }) - io.valid := entries.map(e => e.valid && e.bits.ready() && !e.bits.issued + io.valid := entries.map(e => e.valid && e.bits.ready() && !e.bits.issued && e.bits.q === q).reduce(_ || _) io.bits.cmd := entries(issue_id).bits.cmd io.bits.rob_id.push(issue_id) // ssteff: added for debug - when(io.fire()) { + when(io.fire) { //====================================================================== // debug //====================================================================== @@ -287,7 +287,7 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] "cycle[%d], entry[%d], issue[%d], config_mvin\n", debug_cycle, issue_id, entries(issue_id).bits.cmd_id) printf( - "cycle[%d], entry[%d], final[%d], config_mvin\n", + "cycle[%d], entry[%d], final[%d], config_mvin\n", debug_cycle, issue_id, entries(issue_id).bits.cmd_id) } .elsewhen (entries(issue_id).bits.is_store) { @@ -295,7 +295,7 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] "cycle[%d], entry[%d], issue[%d], config_mvout\n", debug_cycle, issue_id, entries(issue_id).bits.cmd_id) printf( - "cycle[%d], entry[%d], final[%d], config_mvout\n", + "cycle[%d], entry[%d], final[%d], config_mvout\n", debug_cycle, issue_id, entries(issue_id).bits.cmd_id) } .otherwise { @@ -330,7 +330,7 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] entries(issue_id).bits.issued := true.B - // Clear out all the dependency bits for instructions which + // Clear out all the dependency bits for instructions which // depend on the same queue entries.zipWithIndex.foreach { case (e, i) => val is_same_q = Mux(alloc_fire && new_entry_id === i.U, @@ -347,7 +347,7 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] } // Mark entries as completed once they've returned - when (io.completed.fire()) { + when (io.completed.fire) { //====================================================================== // debug //====================================================================== @@ -356,32 +356,32 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] assert(entries(io.completed.bits).bits.is_exec) printf( "cycle[%d], entry[%d], final[%d], config_ex\n", - debug_cycle, io.completed.bits, + debug_cycle, io.completed.bits, entries(io.completed.bits).bits.cmd_id) } .elsewhen (entries(io.completed.bits).bits.is_load) { printf( "cycle[%d], entry[%d], final[%d], mvin\n", - debug_cycle, io.completed.bits, + debug_cycle, io.completed.bits, entries(io.completed.bits).bits.cmd_id) } .elsewhen (entries(io.completed.bits).bits.is_store) { printf( "cycle[%d], entry[%d], final[%d], mvout\n", - debug_cycle, io.completed.bits, + debug_cycle, io.completed.bits, entries(io.completed.bits).bits.cmd_id) } .elsewhen (entries(io.completed.bits).bits.is_preload) { printf( "cycle[%d], entry[%d], final[%d], preload\n", - debug_cycle, io.completed.bits, + debug_cycle, io.completed.bits, entries(io.completed.bits).bits.cmd_id) } .otherwise { assert(entries(io.completed.bits).bits.is_exec) printf( "cycle[%d], entry[%d], final[%d], ex\n", - debug_cycle, io.completed.bits, + debug_cycle, io.completed.bits, entries(io.completed.bits).bits.cmd_id) } //====================================================================== @@ -393,14 +393,14 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] } val util = PopCount(entries.map(e => e.valid)) - val util_ld_q_unissued = PopCount(entries.map(e => e.valid && - !e.bits.issued && + val util_ld_q_unissued = PopCount(entries.map(e => e.valid && + !e.bits.issued && e.bits.q === ldq)) - val util_st_q_unissued = PopCount(entries.map(e => e.valid && - !e.bits.issued && + val util_st_q_unissued = PopCount(entries.map(e => e.valid && + !e.bits.issued && e.bits.q === stq)) - val util_ex_q_unissued = PopCount(entries.map(e => e.valid && - !e.bits.issued && + val util_ex_q_unissued = PopCount(entries.map(e => e.valid && + !e.bits.issued && e.bits.q === exq)) val util_ld_q = PopCount(entries.map(e => e.valid && e.bits.q === ldq)) val util_st_q = PopCount(entries.map(e => e.valid && e.bits.q === stq)) @@ -417,9 +417,9 @@ class TilerScheduler[T <: Data: Arithmetic, U <: Data, V <: Data] val cycles_since_issue = RegInit(0.U(32.W)) - when (io.issue.load.fire() || - io.issue.store.fire() || - io.issue.exec.fire() || + when (io.issue.load.fire || + io.issue.store.fire || + io.issue.exec.fire || !io.busy) { cycles_since_issue := 0.U } .elsewhen (io.busy) { diff --git a/src/main/scala/gemmini/TransposePreloadUnroller.scala b/src/main/scala/gemmini/TransposePreloadUnroller.scala index 0bac0e5b..fb3ef127 100644 --- a/src/main/scala/gemmini/TransposePreloadUnroller.scala +++ b/src/main/scala/gemmini/TransposePreloadUnroller.scala @@ -65,9 +65,9 @@ class TransposePreloadUnroller[T <: Data, U <: Data, V <: Data](config: GemminiA (state === second_preload) -> second_preload_cmd, )) - q.pop := Mux(io.out.fire() && !(first_preload && unroll_preload) && state =/= first_compute, 1.U, 0.U) + q.pop := Mux(io.out.fire && !(first_preload && unroll_preload) && state =/= first_compute, 1.U, 0.U) - when (io.out.fire()) { + when (io.out.fire) { when (is_config) { val set_only_strides = cmds(0).cmd.rs1(7) when (!set_only_strides) { diff --git a/src/main/scala/gemmini/Transposer.scala b/src/main/scala/gemmini/Transposer.scala index 1abbd840..23fc5365 100644 --- a/src/main/scala/gemmini/Transposer.scala +++ b/src/main/scala/gemmini/Transposer.scala @@ -19,8 +19,8 @@ class PipelinedTransposer[T <: Data](val dim: Int, val dataType: T) extends Tran val regArrayT = regArray.transpose val sMoveUp :: sMoveLeft :: Nil = Enum(2) val state = RegInit(sMoveUp) - val leftCounter = RegInit(0.U(log2Ceil(dim+1).W)) //(io.inRow.fire() && state === sMoveLeft, dim+1) - val upCounter = RegInit(0.U(log2Ceil(dim+1).W)) //Counter(io.inRow.fire() && state === sMoveUp, dim+1) + val leftCounter = RegInit(0.U(log2Ceil(dim+1).W)) //(io.inRow.fire && state === sMoveLeft, dim+1) + val upCounter = RegInit(0.U(log2Ceil(dim+1).W)) //Counter(io.inRow.fire && state === sMoveUp, dim+1) io.outCol.valid := 0.U io.inRow.ready := 0.U @@ -28,14 +28,14 @@ class PipelinedTransposer[T <: Data](val dim: Int, val dataType: T) extends Tran is(sMoveUp) { io.inRow.ready := upCounter <= dim.U io.outCol.valid := leftCounter > 0.U - when(io.inRow.fire()) { + when(io.inRow.fire) { upCounter := upCounter + 1.U } when(upCounter === (dim-1).U) { state := sMoveLeft leftCounter := 0.U } - when(io.outCol.fire()) { + when(io.outCol.fire) { leftCounter := leftCounter - 1.U } } @@ -45,11 +45,11 @@ class PipelinedTransposer[T <: Data](val dim: Int, val dataType: T) extends Tran when(leftCounter === (dim-1).U) { state := sMoveUp } - when(io.inRow.fire()) { + when(io.inRow.fire) { leftCounter := leftCounter + 1.U upCounter := 0.U } - when(io.outCol.fire()) { + when(io.outCol.fire) { upCounter := upCounter - 1.U } } @@ -131,7 +131,7 @@ class AlwaysOutTransposer[T <: Data](val dim: Int, val dataType: T) extends Tran // Wire up global signals pes.flatten.foreach(_.io.dir := dir) - pes.flatten.foreach(_.io.en := io.inRow.fire()) + pes.flatten.foreach(_.io.en := io.inRow.fire) io.outCol.valid := true.B io.inRow.ready := true.B @@ -141,11 +141,11 @@ class AlwaysOutTransposer[T <: Data](val dim: Int, val dataType: T) extends Tran io.outCol.bits := Mux(dir === LEFT_DIR, left_out, up_out) - when (io.inRow.fire()) { + when (io.inRow.fire) { counter := wrappingAdd(counter, 1.U, dim) } - when (counter === (dim-1).U && io.inRow.fire()) { + when (counter === (dim-1).U && io.inRow.fire) { dir := ~dir } } @@ -155,7 +155,7 @@ class NaiveTransposer[T <: Data](val dim: Int, val dataType: T) extends Transpos val regArrayT = regArray.transpose // state = 0 => filling regArray row-wise, state = 1 => draining regArray column-wise val state = RegInit(0.U(1.W)) - val countInc = io.inRow.fire() || io.outCol.fire() + val countInc = io.inRow.fire || io.outCol.fire val (countValue, countWrap) = Counter(countInc, dim) io.inRow.ready := state === 0.U @@ -163,7 +163,7 @@ class NaiveTransposer[T <: Data](val dim: Int, val dataType: T) extends Transpos for (i <- 0 until dim) { for (j <- 0 until dim) { - when(countValue === i.U && io.inRow.fire()) { + when(countValue === i.U && io.inRow.fire) { regArray(i)(j) := io.inRow.bits(j) } } @@ -178,13 +178,13 @@ class NaiveTransposer[T <: Data](val dim: Int, val dataType: T) extends Transpos } } - when (io.inRow.fire() && countWrap) { + when (io.inRow.fire && countWrap) { state := 1.U } - when (io.outCol.fire() && countWrap) { + when (io.outCol.fire && countWrap) { state := 0.U } - assert(!(state === 0.U) || !io.outCol.fire()) - assert(!(state === 1.U) || !io.inRow.fire()) + assert(!(state === 0.U) || !io.outCol.fire) + assert(!(state === 1.U) || !io.inRow.fire) } diff --git a/src/main/scala/gemmini/Util.scala b/src/main/scala/gemmini/Util.scala index 907c4ad2..51dc1377 100644 --- a/src/main/scala/gemmini/Util.scala +++ b/src/main/scala/gemmini/Util.scala @@ -140,7 +140,6 @@ object Util { bits } - override def cloneType: this.type = new UDValid(t.cloneType).asInstanceOf[this.type] } object UDValid { diff --git a/src/main/scala/gemmini/VectorScalarMultiplier.scala b/src/main/scala/gemmini/VectorScalarMultiplier.scala index 271bf290..2311b381 100644 --- a/src/main/scala/gemmini/VectorScalarMultiplier.scala +++ b/src/main/scala/gemmini/VectorScalarMultiplier.scala @@ -13,7 +13,6 @@ class VectorScalarMultiplierReq[T <: Data, U <: Data, Tag <: Data](block_cols: I val last: Bool = Bool() val tag: Tag = tag_t.cloneType - override def cloneType: VectorScalarMultiplierReq.this.type = new VectorScalarMultiplierReq(block_cols, t, u, tag_t).asInstanceOf[this.type] } class VectorScalarMultiplierResp[T <: Data, Tag <: Data](block_cols: Int, t: T, tag_t: Tag) extends Bundle { @@ -22,7 +21,6 @@ class VectorScalarMultiplierResp[T <: Data, Tag <: Data](block_cols: Int, t: T, val last: Bool = Bool() val tag: Tag = tag_t.cloneType - override def cloneType: VectorScalarMultiplierResp.this.type = new VectorScalarMultiplierResp(block_cols, t, tag_t).asInstanceOf[this.type] } class DataWithIndex[T <: Data, U <: Data](t: T, u: U) extends Bundle { @@ -30,7 +28,6 @@ class DataWithIndex[T <: Data, U <: Data](t: T, u: U) extends Bundle { val scale = u.cloneType val id = UInt(2.W) // TODO hardcoded val index = UInt() - override def cloneType: DataWithIndex.this.type = new DataWithIndex(t, u).asInstanceOf[this.type] } class ScalePipe[T <: Data, U <: Data](t: T, mvin_scale_args: ScaleArguments[T, U]) extends Module { @@ -69,7 +66,7 @@ class VectorScalarMultiplier[T <: Data, U <: Data, Tag <: Data]( val in_fire = WireInit(false.B) io.req.ready := !in.valid || (in.bits.repeats === 0.U && in_fire) - when (io.req.fire()) { + when (io.req.fire) { in.valid := io.req.valid in.bits := io.req.bits } .elsewhen (in_fire) { @@ -88,7 +85,7 @@ class VectorScalarMultiplier[T <: Data, U <: Data, Tag <: Data]( latency )()) io.resp <> pipe.io.out - in_fire := pipe.io.in.fire() + in_fire := pipe.io.in.fire pipe.io.in.valid := in.valid pipe.io.in.bits.tag := in.bits.tag @@ -111,7 +108,7 @@ class VectorScalarMultiplier[T <: Data, U <: Data, Tag <: Data]( io.resp.valid := Mux1H(head_oh.asBools, (regs zip completed_masks).map({case (r,c) => r.valid && c.reduce(_&&_)})) io.resp.bits := Mux1H(head_oh.asBools, out_regs) - when (io.resp.fire()) { + when (io.resp.fire) { for (i <- 0 until nEntries) { when (head_oh(i)) { regs(i).valid := false.B @@ -153,7 +150,7 @@ class VectorScalarMultiplier[T <: Data, U <: Data, Tag <: Data]( input.bits.scale := regs(i).bits.scale.asTypeOf(u) input.bits.id := i.U input.bits.index := w.U - when (input.fire()) { + when (input.fire) { fired_masks(i)(w) := true.B } } @@ -176,7 +173,7 @@ class VectorScalarMultiplier[T <: Data, U <: Data, Tag <: Data]( for (j <- 0 until nEntries) { for (w <- 0 until width) { if ((j*width+w) % num_scale_units == i) { - when (pipe_out.fire() && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) { + when (pipe_out.fire && pipe_out.bits.id === j.U && pipe_out.bits.index === w.U) { out_regs(j).out(w) := pipe_out.bits.data completed_masks(j)(w) := true.B } diff --git a/src/main/scala/gemmini/WeightedArbiter.scala b/src/main/scala/gemmini/WeightedArbiter.scala index a27decde..90a37273 100644 --- a/src/main/scala/gemmini/WeightedArbiter.scala +++ b/src/main/scala/gemmini/WeightedArbiter.scala @@ -61,7 +61,7 @@ class WeightedArbiter[T <: Data](t: T, maxWeightA: Int, staticWeightAEnabled: Bo } } - when (io.out.fire()) { + when (io.out.fire) { when (A_chosen) { count := satAdd(count, 1.U, weightA + 1.U) }.elsewhen(B_chosen) { diff --git a/src/main/scala/gemmini/XactTracker.scala b/src/main/scala/gemmini/XactTracker.scala index 84821d4e..277626a1 100644 --- a/src/main/scala/gemmini/XactTracker.scala +++ b/src/main/scala/gemmini/XactTracker.scala @@ -23,7 +23,6 @@ class XactTrackerEntry[U <: Data](maxShift: Int, spadWidth: Int, accWidth: Int, val bytes_to_read = UInt(log2Up(maxReqBytes+1).W) val cmd_id = UInt(log2Up(nCmds).W) - override def cloneType: XactTrackerEntry.this.type = new XactTrackerEntry(maxShift, spadWidth, accWidth, spadRows, accRows, maxReqBytes, mvin_scale_t_bits, nCmds).asInstanceOf[this.type] } class XactTrackerAllocIO[U <: Data](nXacts: Int, maxShift: Int, spadWidth: Int, accWidth :Int, @@ -36,7 +35,6 @@ class XactTrackerAllocIO[U <: Data](nXacts: Int, maxShift: Int, spadWidth: Int, def fire(dummy: Int = 0) = valid && ready - override def cloneType: XactTrackerAllocIO.this.type = new XactTrackerAllocIO(nXacts, maxShift, spadWidth, accWidth, spadRows, accRows, maxReqBytes, mvin_scale_t_bits, nCmds).asInstanceOf[this.type] } class XactTrackerPeekIO[U <: Data](val nXacts: Int, val maxShift: Int, val spadWidth: Int, val accWidth: Int, diff --git a/src/main/scala/gemmini/ZeroWriter.scala b/src/main/scala/gemmini/ZeroWriter.scala index 5a30aad5..a5c10abe 100644 --- a/src/main/scala/gemmini/ZeroWriter.scala +++ b/src/main/scala/gemmini/ZeroWriter.scala @@ -11,7 +11,6 @@ class ZeroWriterReq[Tag <: Data](laddr_t: LocalAddr, max_cols: Int, tag_t: Tag) val block_stride = UInt(16.W) // TODO magic number val tag = tag_t - override def cloneType: ZeroWriterReq.this.type = new ZeroWriterReq(laddr_t.cloneType, max_cols, tag_t.cloneType).asInstanceOf[this.type] } class ZeroWriterResp[Tag <: Data](laddr_t: LocalAddr, block_cols: Int, tag_t: Tag) extends Bundle { @@ -20,7 +19,6 @@ class ZeroWriterResp[Tag <: Data](laddr_t: LocalAddr, block_cols: Int, tag_t: Ta val last = Bool() val tag = tag_t - override def cloneType: ZeroWriterResp.this.type = new ZeroWriterResp(laddr_t, block_cols, tag_t.cloneType).asInstanceOf[this.type] } class ZeroWriter[T <: Data, U <: Data, V <: Data, Tag <: Data](config: GemminiArrayConfig[T, U, V], tag_t: Tag) @@ -47,7 +45,7 @@ class ZeroWriter[T <: Data, U <: Data, V <: Data, Tag <: Data](config: GemminiAr io.resp.bits.last := col_counter +& block_cols.U >= req.bits.cols io.resp.bits.tag := req.bits.tag - when (io.resp.fire()) { + when (io.resp.fire) { val next_col_counter = floorAdd(col_counter, block_cols.U, req.bits.cols) col_counter := next_col_counter @@ -58,7 +56,7 @@ class ZeroWriter[T <: Data, U <: Data, V <: Data, Tag <: Data](config: GemminiAr } } - when (io.req.fire()) { + when (io.req.fire) { req.push(io.req.bits) col_counter := 0.U