Skip to content

Commit

Permalink
[vanadis] Implement the missing Zicntr instructions.
Browse files Browse the repository at this point in the history
Chapter 8 in
https://github.com/riscv/riscv-isa-manual/releases/download/riscv-isa-release-f797123-2024-06-27/riscv-unprivileged.pdf
details the RISC-V counter interface.

There are 3 bespoke counters detailed in the Zicntr extensions in 8.1, and up to
29 more user-programmable counters detailed in the Zihpm extenstion in 8.2. This
commit provides space for all 32 counters in the register file, but only
currently implements the three Zicntr counters.

The implementation consists of a few changes.

1. I have extended the register file structure with 32, 64 bit counters, and
   `increment` and `get` members in order to update and read those counters.
2. I have added a new instruction, `Zicntr::VanadisReadCounterInstruction` in
   order to read those counters. This instruction can be tailored for any of the
   3 `Zicntr`s (implemented as tag types in `inst/zicntr.h`), XLEN=64 or 32, and
   with or without the `[H]` extension.
3. I have added decoding to `decoder/vriscv64decoder.h` to handle `RDCYCLE`,
   `RDTIME`, and `RDINSTRET`. Because the `[H]` and XLEN=32 versions are only
   available in `riscv32`, the `riscv64` decoder simply injects a decoding
   failure if those occur.
4. I have extended the core's `tick` to update the three `Zicntr`s.
5. I have added a small misc test example to demonstrate usage.

I have not implemented any functionality for programming the 29 `Zihpm`
counters, nor have I added any special decoding to read these registers.

I have decided that the `Zicntr` instructions will be processed by the
arithmetic functional unit. I don't know if this is appropriate, but it should
manage any register port contention properly and seemed like the most expedient
solution.

I have removed the `cycle_count` and the `getCycleCount` from the decoder base
class, as it is no longer being used. The current cycle is still passed to the
decoder `tick()` even though it is _also_ still unused.
  • Loading branch information
ldalessa committed Jun 29, 2024
1 parent d6244da commit f0eea4a
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 17 deletions.
2 changes: 0 additions & 2 deletions src/sst/elements/vanadis/decoder/vdecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ class VanadisDecoder : public SST::SubComponent
void setThreadLocalStoragePointer(uint64_t new_tls) { tls_ptr = new_tls; }

uint64_t getThreadLocalStoragePointer() const { return tls_ptr; }
uint64_t getCycleCount() const { return cycle_count; }

// VanadisCircularQueue<VanadisInstruction*>* getDecodedQueue() { return
// decoded_q; }
Expand All @@ -242,7 +241,6 @@ class VanadisDecoder : public SST::SubComponent
uint32_t core;

uint64_t tls_ptr;
uint64_t cycle_count;

bool wantDelegatedLoad;
VanadisCircularQueue<VanadisInstruction*>* thread_rob;
Expand Down
4 changes: 1 addition & 3 deletions src/sst/elements/vanadis/decoder/vmipsdecoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,11 @@ class VanadisMIPSDecoder : public VanadisDecoder
stat_decode_cop1_eq = registerStatistic<uint64_t>("ins_decode_cop1_eq", "1");
}

virtual void tick(SST::Output* output, uint64_t cycle)
virtual void tick(SST::Output* output, uint64_t)
{
output->verbose(CALL_INFO, 16, VANADIS_DBG_DECODER_FLG, "-> Decode step for thr: %" PRIu32 "\n", hw_thr);
output->verbose(CALL_INFO, 16, VANADIS_DBG_DECODER_FLG, "---> Max decodes per cycle: %" PRIu16 "\n", max_decodes_per_cycle);

cycle_count = cycle;

ins_loader->printStatus(output);

uint16_t decodes_performed = 0;
Expand Down
33 changes: 22 additions & 11 deletions src/sst/elements/vanadis/decoder/vriscv64decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,13 @@ class VanadisRISCV64Decoder : public VanadisDecoder
}


void tick(SST::Output* output, uint64_t cycle) override
void tick(SST::Output* output, uint64_t) override
{
if(output->getVerboseLevel() >= 16) {
output->verbose(CALL_INFO, 16, 0, "-> Decode step for thr: %" PRIu32 "\n", hw_thr);
output->verbose(CALL_INFO, 16, 0, "---> Max decodes per cycle: %" PRIu16 "\n", max_decodes_per_cycle);
}

cycle_count = cycle;

for ( uint16_t i = 0; i < max_decodes_per_cycle; ++i ) {
if ( ! thread_rob->full() ) {
if ( ins_loader->hasBundleAt(ip) ) {
Expand Down Expand Up @@ -1107,16 +1105,29 @@ class VanadisRISCV64Decoder : public VanadisDecoder
} break;
default:
{
using namespace Zicntr;
uint64_t csrNum = uimm64 & 0xfff;
switch ( csrNum ) {
case 0xc00:
{
if ( 0 == rs1 ) {
auto thread_call = std::bind(&VanadisRISCV64Decoder::getCycleCount, this);
bundle->addInstruction( new VanadisSetRegisterByCallInstruction<int64_t>( ins_address, hw_thr, options, rd, thread_call));
decode_fault = false;
}
} break;
case 0xc00: // RDCYCLE
bundle->addInstruction( new VanadisReadCounterInstruction( CYCLE, ins_address, hw_thr, options, rd ) );
decode_fault = 0 != rs1;
break;

case 0xc01: // RDTIME
bundle->addInstruction( new VanadisReadCounterInstruction( TIME, ins_address, hw_thr, options, rd ) );
decode_fault = 0 != rs1;
break;

case 0xc02: // RDINSTRET
bundle->addInstruction( new VanadisReadCounterInstruction( INSTRET, ins_address, hw_thr, options, rd ) );
decode_fault = 0 != rs1;
break;

case 0xc80: // RDCYCLEH
case 0xc81: // RDTIMEH
case 0xc82: // RDINSTRETH
output->verbose( CALL_INFO, 16, 0, "riscv64 does not support Zicntr [H] suffix" );
break;
}

} break;
Expand Down
16 changes: 16 additions & 0 deletions src/sst/elements/vanadis/inst/regfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class VanadisRegisterFile
void init( ) {
std::memset(int_reg_storage, 0, (int_reg_width * count_int_regs));
std::memset(fp_reg_storage, 0, (fp_reg_width * count_fp_regs));
std::fill_n(counters, sizeof(counters), 0);
}

~VanadisRegisterFile()
Expand Down Expand Up @@ -212,6 +213,18 @@ class VanadisRegisterFile
}
}

void incrementCounter(int i, uint64_t n = 1)
{
assert(0 <= i and i < sizeof(counters));
counters[i] += n;
}

uint64_t getCounter(int i) const
{
assert(0 <= i and i < sizeof(counters));
return counters[i];
}

private:
char* getIntReg(const uint16_t reg)
{
Expand Down Expand Up @@ -263,6 +276,9 @@ class VanadisRegisterFile
VanadisFPRegisterMode fp_reg_mode;
const uint32_t fp_reg_width;
const uint32_t int_reg_width;

// Counters from 8 https://github.com/riscv/riscv-isa-manual/releases/download/riscv-isa-release-f797123-2024-06-27/riscv-unprivileged.pdf
uint64_t counters[32];
};

} // namespace Vanadis
Expand Down
4 changes: 4 additions & 0 deletions src/sst/elements/vanadis/inst/vinstall.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,8 @@
#include "inst/vfpclass.h"
#include "inst/vmin.h"

// Zicntr
#include "inst/vzicntr.h"
#include "inst/vzicntr_readcounter.h"

#endif
17 changes: 17 additions & 0 deletions src/sst/elements/vanadis/inst/vzicntr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

#ifndef _H_VANADIS_ZICNTR
#define _H_VANADIS_ZICNTR

#include <utility>

namespace SST::Vanadis
{
// Tags to be used to with the regfile and VanadisReadCounterInstruction
namespace Zicntr {
inline constexpr std::integral_constant<uint32_t, 0> CYCLE;
inline constexpr std::integral_constant<uint32_t, 1> TIME;
inline constexpr std::integral_constant<uint32_t, 2> INSTRET;
}
} // namespace Vanadis::SST

#endif
91 changes: 91 additions & 0 deletions src/sst/elements/vanadis/inst/vzicntr_readcounter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@

#ifndef _H_VANADIS_ZICNTR_READ_COUNTER
#define _H_VANADIS_ZICNTR_READ_COUNTER

#include "inst/vinst.h"
#include "inst/vzicntr.h"

namespace SST::Vanadis
{
namespace Zicntr
{
template <uint32_t id, size_t XLEN = 64, bool H = false>
class VanadisReadCounterInstruction : public VanadisInstruction
{
static_assert( id < 3 );
static_assert( XLEN == 64 or XLEN == 32 );
static_assert( XLEN != 64 or H == false );

public:
VanadisReadCounterInstruction(
const std::integral_constant<uint32_t, id>,
const uint64_t addr,
const uint32_t hw_thr,
const VanadisDecoderOptions* isa_opts,
const uint16_t dest)
: VanadisInstruction(addr, hw_thr, isa_opts, 0, 1, 0, 1, 0, 0, 0, 0)
{
isa_int_regs_out[0] = dest;
}

VanadisReadCounterInstruction* clone() override
{
return new VanadisReadCounterInstruction(*this);
}

VanadisFunctionalUnitType getInstFuncType() const override
{
return INST_INT_ARITH; // Is this appropriate?
}

const char* getInstCode() const override
{
switch ( id ) {
case Zicntr::CYCLE: return H ? "RDCYCLEH" : "RDCYCLE";
case Zicntr::TIME: return H ? "RDTIMEH" : "RDTIME";
case Zicntr::INSTRET: return H ? "RDINSTRETH" : "RDINSTRET";
}
__builtin_unreachable();
}

void printToBuffer(char* const buffer, const size_t buffer_size) override
{
snprintf(
buffer, buffer_size,
"%s %5" PRIu16 " (phys: %5" PRIu16 ")",
getInstCode(), isa_int_regs_out[0], phys_int_regs_out[0]);
}

void execute(SST::Output* const output, VanadisRegisterFile* const regFile) override
{
#ifdef VANADIS_BUILD_DEBUG
if(output->getVerboseLevel() >= 16) {
output->verbose(
CALL_INFO, 16, 0,
"Execute: 0x%" PRI_ADDR " %s phys: out=%" PRIu16 ", isa: out=%" PRIu16 "\n",
getInstructionAddress(), getInstCode(), phys_int_regs_out[0], isa_int_regs_out[0]);
}
#endif

static constexpr uint64_t mask = 0x00000000'FFFFFFFF;
const uint64_t count64 = regFile->getCounter(id);
const uint32_t count32 = count64 & mask;
const uint32_t count32H = count64 >> 32;

if constexpr ( XLEN == 64 ) {
regFile->setIntReg(phys_int_regs_out[0], count64);
}
else if constexpr ( XLEN == 32 and H ) {
regFile->setIntReg(phys_int_regs_out[0], count32H);
}
else {
regFile->setIntReg(phys_int_regs_out[0], count32);
}

markExecuted();
}
};
} // namespace Zicntr
} // namespace Vanadis::SST

#endif
Binary file added src/sst/elements/vanadis/sst-vanadis-tracediff
Binary file not shown.
1 change: 1 addition & 0 deletions src/sst/elements/vanadis/tests/basic_vanadis.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#exe = "uname"
#exe = "mem-test"
#exe = "checkpoint"
#exe = "zicntr"

physMemSize = "4GiB"

Expand Down
Binary file not shown.
29 changes: 29 additions & 0 deletions src/sst/elements/vanadis/tests/small/misc/zicntr/zicntr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>

uint64_t read_cycles() {
uint64_t cycles;
asm volatile ("rdcycle %0" : "=r" (cycles));
return cycles;
}

uint64_t read_time() {
uint64_t time;
asm volatile ("rdtime %0" : "=r" (time));
return time;
}

uint64_t read_instructions() {
uint64_t instructions;
asm volatile ("rdinstret %0" : "=r" (instructions));
return instructions;
}

int main()
{
uint64_t cycles = read_cycles();
uint64_t time = read_time();
uint64_t instructions = read_instructions();
printf("cycles: %" PRIu64 " time: %" PRIu64 " instructions: %" PRIu64 "\n", cycles, time, instructions);
}
12 changes: 11 additions & 1 deletion src/sst/elements/vanadis/vanadis.cc
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,11 @@ VANADIS_COMPONENT::performRetire(int rob_num, VanadisCircularQueue<VanadisInstru

ins_retired_this_cycle++;

// Concurrent RDINSTRET operations in Execute will see this
// instruction included in its count. I'm not sure if that's proper,
// but it seems like a reasonable option.
register_files.at(rob_num)->incrementCounter(Zicntr::INSTRET);

if ( perform_delay_cleanup ) {

VanadisInstruction* delay_ins = rob->pop();
Expand Down Expand Up @@ -1299,7 +1304,7 @@ VANADIS_COMPONENT::tick(SST::Cycle_t cycle)
if ( cnt ) {
auto thr = m_curRetireHwThread;
rc[thr] = performRetire(thr, rob[thr], cycle);

++m_curRetireHwThread;
m_curRetireHwThread %= hw_threads;
cnt = hw_threads;
Expand Down Expand Up @@ -1417,6 +1422,11 @@ VANADIS_COMPONENT::tick(SST::Cycle_t cycle)
#endif

current_cycle++;
for (VanadisRegisterFile* reg : register_files) {
assert(reg);
reg->incrementCounter(Zicntr::CYCLE);
reg->incrementCounter(Zicntr::TIME);
}

uint64_t used_phys_int = 0;
uint64_t used_phys_fp = 0;
Expand Down

0 comments on commit f0eea4a

Please sign in to comment.