Skip to content

Commit

Permalink
Added 1 in 1 out support and updated passthrough_kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
jackl-xilinx committed Feb 22, 2025
1 parent 9214068 commit 036f53c
Show file tree
Hide file tree
Showing 8 changed files with 279 additions and 148 deletions.
8 changes: 5 additions & 3 deletions programming_examples/basic/passthrough_kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ else()
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif ()

set(PASSTHROUGH_SIZE 4096 CACHE STRING "size")
set(IN1_SIZE 4096 CACHE STRING "in1 buffer size")
set(OUT_SIZE 4096 CACHE STRING "out buffer size")
set(TARGET_NAME test CACHE STRING "Target to be built")

SET (ProjectName ${TARGET_NAME})
Expand All @@ -46,8 +47,9 @@ add_executable(${currentTarget}
)

target_compile_definitions(${currentTarget} PUBLIC
PASSTHROUGH_SIZE=${PASSTHROUGH_SIZE}
DISABLE_ABI_CHECK=1
IN1_SIZE=${IN1_SIZE}
OUT_SIZE=${OUT_SIZE}
DISABLE_ABI_CHECK=1
)

target_include_directories (${currentTarget} PUBLIC
Expand Down
17 changes: 8 additions & 9 deletions programming_examples/basic/passthrough_kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ include ${srcdir}/../../makefile-common
device = npu
targetname = passthrough_kernel
VPATH := ${srcdir}/../../../aie_kernels/generic
data_size = 4096
in1_size = 4096 # in bytes
out_size = 4096 # in bytes, should always be equal to in1_size
trace_size = 8192
PASSTHROUGH_SIZE = ${data_size}

data_size = in1_size
aie_py_src=${targetname}.py
use_alt?=0

Expand All @@ -32,11 +34,11 @@ all: build/final_${data_size}.xclbin

build/aie2_lineBased_8b_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} 0 > $@
python3 $< ${device} ${in1_size} ${out_size} 0 > $@

build/aie2_trace_lineBased_8b_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} ${trace_size} > $@
python3 $< ${device} ${in1_size} ${out_size} ${trace_size} > $@

build/passThrough.cc.o: passThrough.cc
mkdir -p ${@D}
Expand Down Expand Up @@ -74,7 +76,7 @@ endif
${targetname}_${data_size}.exe: ${srcdir}/test.cpp
rm -rf _build
mkdir -p _build
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname} -DPASSTHROUGH_SIZE=${data_size}
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname} -DIN1_SIZE=${in1_size} -DOUT_SIZE=${out_size}
cd _build && ${powershell} cmake --build . --config Release
ifeq "${powershell}" "powershell.exe"
cp _build/${targetname}.exe $@
Expand All @@ -85,19 +87,16 @@ endif
run: ${targetname}_${data_size}.exe build/final_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} ./$< -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE

#run-g: ${targetname}.exe build/final_${data_size}.xclbin build/insts.txt
# ${powershell} ./$< -x build/final_${data_size}.xclbin -i build/insts.txt -k MLIR_AIE -t ${trace_size}

run_py: build/final_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -s ${data_size} -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE
${powershell} python3 ${srcdir}/test.py -s ${data_size} -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -i1s ${in1_size} -os ${out_size}

trace: ${targetname}_${data_size}.exe build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} ./$< -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie2_trace_lineBased_8b_${data_size}.mlir --colshift 1 > trace_passthrough_kernel.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_passthrough_kernel.json

trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -s ${data_size}
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -i1s ${in1_size} -os ${out_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie2_trace_lineBased_8b_${data_size}.mlir --colshift 1 > trace_passthrough_kernel.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_passthrough_kernel.json

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
import aie.utils.trace as trace_utils


def passthroughKernel(dev, vector_size, trace_size):
N = vector_size
def passthroughKernel(dev, in1_size, out_size, trace_size):
N = in1_size
lineWidthInBytes = N // 4 # chop input in 4 sub-tensors

@device(dev)
Expand Down Expand Up @@ -83,20 +83,24 @@ def sequence(inTensor, outTensor, notUsed):


try:
if (len(sys.argv) < 4):
raise ValueError("[ERROR] Need at least 4 arguments (dev, in1_size, out_size)")

device_name = str(sys.argv[1])
if device_name == "npu":
dev = AIEDevice.npu1_1col
elif device_name == "npu2":
dev = AIEDevice.npu2
else:
raise ValueError("[ERROR] Device name {} is unknown".format(sys.argv[1]))
vector_size = int(sys.argv[2])
if vector_size % 64 != 0 or vector_size < 512:
print("Vector size must be a multiple of 64 and greater than or equal to 512")
in1_size = int(sys.argv[2])
if in1_size % 64 != 0 or in1_size < 512:
print("In1 buffer size must be a multiple of 64 and greater than or equal to 512")
raise ValueError
trace_size = 0 if (len(sys.argv) != 4) else int(sys.argv[3])
out_size = int(sys.argv[3])
trace_size = 0 if (len(sys.argv) != 5) else int(sys.argv[4])
except ValueError:
print("Argument has inappropriate value")
with mlir_mod_ctx() as ctx:
passthroughKernel(dev, vector_size, trace_size)
passthroughKernel(dev, in1_size, out_size, trace_size)
print(ctx.module)
139 changes: 47 additions & 92 deletions programming_examples/basic/passthrough_kernel/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,114 +8,69 @@
//
//===----------------------------------------------------------------------===//

#include "xrt_test_wrapper.h"
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>

#include "test_utils.h"
#include "xrt/xrt_bo.h"
//*****************************************************************************
// Modify this section to customize buffer datatypes, initialization functions,
// and verify function. The other place to reconfigure your design is the
// Makefile.
//*****************************************************************************

#ifndef DATATYPES_USING_DEFINED
#define DATATYPES_USING_DEFINED
// ------------------------------------------------------
// Configure this to match your buffer data type
// ------------------------------------------------------
using DATATYPE = std::uint8_t;
using DATATYPE_IN1 = std::uint8_t;
using DATATYPE_OUT = std::uint8_t;
#endif

namespace po = boost::program_options;

int main(int argc, const char *argv[]) {

// Program arguments parsing
po::options_description desc("Allowed options");
po::variables_map vm;
test_utils::add_default_options(desc);

test_utils::parse_options(argc, argv, desc, vm);
int verbosity = vm["verbosity"].as<int>();
int trace_size = vm["trace_sz"].as<int>();

std::cout << std::endl << "Running...";

// Load instruction sequence
std::vector<uint32_t> instr_v =
test_utils::load_instr_sequence(vm["instr"].as<std::string>());

if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << "\n";

// Start the XRT context and load the kernel
xrt::device device;
xrt::kernel kernel;

test_utils::init_xrt_load_kernel(device, kernel, verbosity,
vm["xclbin"].as<std::string>(),
vm["kernel"].as<std::string>());

// set up the buffer objects
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
auto bo_inA = xrt::bo(device, PASSTHROUGH_SIZE * sizeof(DATATYPE),
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
auto bo_out =
xrt::bo(device, PASSTHROUGH_SIZE * sizeof(DATATYPE) + trace_size,
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";

// Copy instruction stream to xrt buffer object
void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));
// Initialize Input buffer 1
void initialize_bufIn1(DATATYPE_IN1 *bufIn1, int SIZE) {
for (int i = 0; i < SIZE; i++)
bufIn1[i] = i;
}

// Initialize buffer bo_inA
DATATYPE *bufInA = bo_inA.map<DATATYPE *>();
for (int i = 0; i < PASSTHROUGH_SIZE; i++)
bufInA[i] = i;
// Initialize Output buffer
void initialize_bufOut(DATATYPE_OUT *bufOut, int SIZE) {
memset(bufOut, 0, SIZE);
}

// Zero out buffer bo_out
DATATYPE *bufOut = bo_out.map<DATATYPE *>();
memset(bufOut, 0, PASSTHROUGH_SIZE * sizeof(DATATYPE) + trace_size);
// Functional correctness verifyer
int verify_passthrough_kernel(DATATYPE_IN1 *bufIn1,
DATATYPE_OUT *bufOut, int SIZE, int verbosity) {
int errors = 0;

// sync host to device memories
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
for (int i = 0; i < SIZE; i++) {
int32_t ref = bufIn1[i];
int32_t test = bufOut[i];
if (test != ref) {
if (verbosity >= 1)
std::cout << "Error in output " << test << " != " << ref << std::endl;
errors++;
} else {
if (verbosity >= 1)
std::cout << "Correct output " << test << " == " << ref << std::endl;
}
}
return errors;
}

// Execute the kernel and wait to finish
if (verbosity >= 1)
std::cout << "Running Kernel.\n";
unsigned int opcode = 3;
auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out);
run.wait();
//*****************************************************************************
// Should not need to modify below section
//*****************************************************************************

// Sync device to host memories
bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
int main(int argc, const char *argv[]) {

// Compare out to in
int errors = 0;
for (int i = 0; i < PASSTHROUGH_SIZE; i++) {
if (bufOut[i] != bufInA[i])
errors++;
}
constexpr int IN1_VOLUME = IN1_SIZE / sizeof(DATATYPE_IN1);
constexpr int OUT_VOLUME = OUT_SIZE / sizeof(DATATYPE_OUT);

if (trace_size > 0) {
test_utils::write_out_trace(((char *)bufOut) +
(PASSTHROUGH_SIZE * sizeof(DATATYPE)),
trace_size, vm["trace_file"].as<std::string>());
}
args myargs = parse_args(argc, argv);

// Print Pass/Fail result of our test
if (!errors) {
std::cout << std::endl << "PASS!" << std::endl << std::endl;
return 0;
} else {
std::cout << std::endl
<< errors << " mismatches." << std::endl
<< std::endl;
std::cout << std::endl << "fail." << std::endl << std::endl;
return 1;
}
int res = xrt_test_run<DATATYPE_IN1, DATATYPE_OUT,
initialize_bufIn1,
initialize_bufOut, verify_passthrough_kernel>(
IN1_VOLUME, OUT_VOLUME, myargs);
return res;
}
75 changes: 43 additions & 32 deletions programming_examples/basic/passthrough_kernel/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,50 +7,61 @@
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
import numpy as np
import sys
from aie.utils.xrt import setup_aie, execute as execute_on_aie
import aie.utils.xrt as xrt_utils
import aie.utils.test as test_utils


def main(opts):
print("Running...\n")
in1_size = int(opts.in1_size) # in bytes
in2_size = int(opts.in2_size) # in bytes
out_size = int(opts.out_size) # in bytes

# --------------------------------------------------------------------------
# ----- Edit your data types -----------------------------------------------
# --------------------------------------------------------------------------

in1_dtype = np.uint8
out_dtype = in1_dtype

# --------------------------------------------------------------------------

in1_volume = in1_size // np.dtype(in1_dtype).itemsize
out_volume = out_size // np.dtype(out_dtype).itemsize

# --------------------------------------------------------------------------
# ----- Edit your data init and reference data here ------------------------
# --------------------------------------------------------------------------

data_size = int(opts.size)
dtype = np.uint8
# check buffer sizes
assert out_size == in1_size

app = setup_aie(
opts.xclbin,
opts.instr,
data_size,
dtype,
# Initialize data
in1_data = np.arange(0, in1_volume, dtype=in1_dtype)
out_data = np.zeros([out_volume], dtype=out_dtype)

# Define reference data
ref = in1_data

# --------------------------------------------------------------------------

print("Running...\n")
res = xrt_utils.xrt_test_run(
in1_dtype,
None,
out_dtype,
in1_data,
None,
out_data,
in1_volume,
None,
data_size,
dtype,
out_volume,
ref,
opts,
)
input = np.arange(1, data_size + 1, dtype=dtype)
aie_output = execute_on_aie(app, input)

# Copy output results and verify they are correct
errors = 0
if opts.verify:
if opts.verbosity >= 1:
print("Verifying results ...")
e = np.equal(input, aie_output)
errors = np.size(e) - np.count_nonzero(e)

if not errors:
print("\nPASS!\n")
sys.exit(0)
else:
print("\nError count: ", errors)
print("\nFailed.\n")
sys.exit(1)
sys.exit(res)


if __name__ == "__main__":
p = test_utils.create_default_argparser()
p.add_argument(
"-s", "--size", required=True, dest="size", help="Passthrough kernel size"
)
opts = p.parse_args(sys.argv[1:])
main(opts)
1 change: 0 additions & 1 deletion programming_examples/basic/vector_scalar_mul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ VPATH := ${srcdir}/../../../aie_kernels/aie2

device = npu
targetname = vector_scalar_mul
# in1_size = 4096
in1_size = 8192 # in bytes
in2_size = 4 # in bytes, should always be 4 (1x int32)
out_size = 8192 # in bytes, should always be equal to in1_size
Expand Down
4 changes: 0 additions & 4 deletions programming_examples/basic/vector_scalar_mul/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ int verify_vector_scalar_mul(DATATYPE_IN1 *bufIn1, DATATYPE_IN2 *bufIn2,

int main(int argc, const char *argv[]) {

// constexpr int IN1_VOLUME = VECTORSCALARMUL_SIZE; // 1024; define via
// Makefile constexpr int IN2_VOLUME = 1; constexpr int OUT_VOLUME =
// IN1_VOLUME; // define via Makefile

constexpr int IN1_VOLUME = IN1_SIZE / sizeof(DATATYPE_IN1);
constexpr int IN2_VOLUME = IN2_SIZE / sizeof(DATATYPE_IN2);
constexpr int OUT_VOLUME = OUT_SIZE / sizeof(DATATYPE_OUT);
Expand Down
Loading

0 comments on commit 036f53c

Please sign in to comment.