Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor host test code and Makefile to help prevent buffer mismatch hangs #2059

Merged
merged 7 commits into from
Feb 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions programming_examples/basic/vector_scalar_mul/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ else()
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(VECTORSCALARMUL_SIZE 4096 CACHE STRING "vector size")
set(IN1_SIZE 8192 CACHE STRING "in1 buffer size")
set(IN2_SIZE 4 CACHE STRING "in2 buffer size")
set(OUT_SIZE 8192 CACHE STRING "out buffer size")
set(TARGET_NAME test CACHE STRING "Target to be built")

SET (ProjectName ${TARGET_NAME})
Expand All @@ -51,7 +53,9 @@ add_executable(${currentTarget}
)

target_compile_definitions(${currentTarget} PUBLIC
VECTORSCALARMUL_SIZE=${VECTORSCALARMUL_SIZE}
IN1_SIZE=${IN1_SIZE}
IN2_SIZE=${IN2_SIZE}
OUT_SIZE=${OUT_SIZE}
DISABLE_ABI_CHECK=1
)

Expand Down
25 changes: 15 additions & 10 deletions programming_examples/basic/vector_scalar_mul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ VPATH := ${srcdir}/../../../aie_kernels/aie2

device = npu
targetname = vector_scalar_mul
data_size = 4096
# in1_size = 4096
in1_size = 8192 # in bytes
in2_size = 4 # in bytes, should always be 4 (1x int32)
out_size = 8192 # in bytes, should always be equal to in1_size
trace_size = 8192
CHESS ?= false

data_size = in1_size
aie_py_src=${targetname}.py
use_alt?=0

Expand All @@ -45,11 +49,11 @@ endif

build/aie_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} 0 > $@
python3 $< ${device} ${in1_size} ${in2_size} ${out_size} 0 > $@

build/aie_trace_${data_size}.mlir: ${srcdir}/${aie_py_src}
mkdir -p ${@D}
python3 $< ${device} ${data_size} ${trace_size} > $@
python3 $< ${device} ${in1_size} ${in2_size} ${out_size} ${trace_size} > $@

#build/insts_${data_size}.txt: build/final_${data_size}.xclbin
build/final_${data_size}.xclbin: build/aie_${data_size}.mlir build/scale.o
Expand Down Expand Up @@ -87,7 +91,7 @@ endif
${targetname}_${data_size}.exe: ${srcdir}/test.cpp
rm -rf _build
mkdir -p _build
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname}_${data_size} -DVECTORSCALARMUL_SIZE=${data_size}
cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname}_${data_size} -DIN1_SIZE=${in1_size} -DIN2_SIZE=${in2_size} -DOUT_SIZE=${out_size}
cd _build && ${powershell} cmake --build . --config Release
ifeq "${powershell}" "powershell.exe"
cp _build/${targetname}_${data_size}.exe $@
Expand All @@ -99,17 +103,18 @@ run: ${targetname}_${data_size}.exe build/final_${data_size}.xclbin build/insts_
${powershell} ./$< -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE

run_py: build/final_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -s ${data_size}
${powershell} python3 ${srcdir}/test.py -x build/final_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -i1s ${in1_size} -i2s ${in2_size} -os ${out_size}

trace: ${targetname}_${data_size}.exe build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} ./$< -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vector_scalar_mul.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vector_scalar_mul.json

trace_py: build/final_trace_${data_size}.xclbin build/insts_${data_size}.txt
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -s ${data_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vs.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vs.json
#${powershell} python3 ${srcdir}/test_orig.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -i1s ${in1_size} -i2s ${in2_size} -os ${out_size} --size 4096
${powershell} python3 ${srcdir}/test.py -x build/final_trace_${data_size}.xclbin -i build/insts_${data_size}.txt -k MLIR_AIE -t ${trace_size} -i1s ${in1_size} -i2s ${in2_size} -os ${out_size}
${srcdir}/../../utils/parse_trace.py --filename trace.txt --mlir build/aie_trace_${data_size}.mlir --colshift 1 > trace_vector_scalar_mul.json
${srcdir}/../../utils/get_trace_summary.py --filename trace_vector_scalar_mul.json


clean_trace:
Expand Down
160 changes: 50 additions & 110 deletions programming_examples/basic/vector_scalar_mul/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,115 +8,48 @@
//
//===----------------------------------------------------------------------===//

#include "xrt_test_wrapper.h"
#include <cstdint>
#include <fstream>
#include <iostream>
#include <sstream>

#include "test_utils.h"
#include "xrt/xrt_bo.h"
//*****************************************************************************
// Modify this section to customize buffer datatypes, initialization functions,
// and verify function. The other place to reconfigure your design is the
// Makefile.
//*****************************************************************************

#ifndef DATATYPES_USING_DEFINED
#define DATATYPES_USING_DEFINED
// ------------------------------------------------------
// Configure this to match your buffer data type
// ------------------------------------------------------
// using DATATYPE = std::uint8_t;
// using DATATYPE = std::uint32_t;
using DATATYPE = std::uint16_t;
using DATATYPE_IN1 = std::uint16_t;
using DATATYPE_IN2 = std::int32_t;
using DATATYPE_OUT = std::uint16_t;
#endif

const int scaleFactor = 3;

namespace po = boost::program_options;

int main(int argc, const char *argv[]) {

// Program arguments parsing
po::options_description desc("Allowed options");
po::variables_map vm;
test_utils::add_default_options(desc);

test_utils::parse_options(argc, argv, desc, vm);
int verbosity = vm["verbosity"].as<int>();
int trace_size = vm["trace_sz"].as<int>();

constexpr bool VERIFY = true;
constexpr int IN_VOLUME = VECTORSCALARMUL_SIZE;
constexpr int OUT_VOLUME = IN_VOLUME;

int IN_SIZE = IN_VOLUME * sizeof(DATATYPE);
int OUT_SIZE = OUT_VOLUME * sizeof(DATATYPE) + trace_size;

// Load instruction sequence
std::vector<uint32_t> instr_v =
test_utils::load_instr_sequence(vm["instr"].as<std::string>());

if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << "\n";

// Start the XRT context and load the kernel
xrt::device device;
xrt::kernel kernel;

test_utils::init_xrt_load_kernel(device, kernel, verbosity,
vm["xclbin"].as<std::string>(),
vm["kernel"].as<std::string>());

// set up the buffer objects
auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1));
auto bo_inA =
xrt::bo(device, IN_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
auto bo_inFactor = xrt::bo(device, 1 * sizeof(int32_t),
XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));
auto bo_outC =
xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";

// Copy instruction stream to xrt buffer object
void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));

// Initialize buffer bo_inA
DATATYPE *bufInA = bo_inA.map<DATATYPE *>();
for (int i = 0; i < IN_VOLUME; i++)
bufInA[i] = i + 1;

// Initialize buffer bo_inFactor
int32_t *bufInFactor = bo_inFactor.map<int32_t *>();
*bufInFactor = (DATATYPE)scaleFactor;

// Zero out buffer bo_outC
DATATYPE *bufOut = bo_outC.map<DATATYPE *>();
memset(bufOut, 0, OUT_SIZE);

// sync host to device memories
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_inFactor.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_outC.sync(XCL_BO_SYNC_BO_TO_DEVICE);
// Initialize Input buffer 1
void initialize_bufIn1(DATATYPE_IN1 *bufIn1, int SIZE) {
for (int i = 0; i < SIZE; i++)
bufIn1[i] = i + 1;
}

// Execute the kernel and wait to finish
if (verbosity >= 1)
std::cout << "Running Kernel.\n";
unsigned int opcode = 3;
auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_inFactor, bo_outC);
run.wait();
// Initialize Input buffer 2
void initialize_bufIn2(DATATYPE_IN2 *bufIn2, int SIZE) {
bufIn2[0] = 3; // scaleFactor
}

// Sync device to host memories
bo_outC.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
// Initialize Output buffer
void initialize_bufOut(DATATYPE_OUT *bufOut, int SIZE) {
memset(bufOut, 0, SIZE);
}

// Compare out to golden
// Functional correctness verifyer
int verify_vector_scalar_mul(DATATYPE_IN1 *bufIn1, DATATYPE_IN2 *bufIn2,
DATATYPE_OUT *bufOut, int SIZE, int verbosity) {
int errors = 0;
if (verbosity >= 1) {
std::cout << "Verifying results ..." << std::endl;
}
for (uint32_t i = 0; i < IN_VOLUME; i++) {
int32_t ref = bufInA[i] * scaleFactor;

for (int i = 0; i < SIZE; i++) {
int32_t ref = bufIn1[i] * bufIn2[0];
int32_t test = bufOut[i];
if (test != ref) {
if (verbosity >= 1)
Expand All @@ -127,21 +60,28 @@ int main(int argc, const char *argv[]) {
std::cout << "Correct output " << test << " == " << ref << std::endl;
}
}
return errors;
}

if (trace_size > 0) {
test_utils::write_out_trace(((char *)bufOut) + IN_SIZE, trace_size,
vm["trace_file"].as<std::string>());
}
//*****************************************************************************
// Should not need to modify below section
//*****************************************************************************

// Print Pass/Fail result of our test
if (!errors) {
std::cout << std::endl << "PASS!" << std::endl << std::endl;
return 0;
} else {
std::cout << std::endl
<< errors << " mismatches." << std::endl
<< std::endl;
std::cout << std::endl << "fail." << std::endl << std::endl;
return 1;
}
int main(int argc, const char *argv[]) {

// constexpr int IN1_VOLUME = VECTORSCALARMUL_SIZE; // 1024; define via
// Makefile constexpr int IN2_VOLUME = 1; constexpr int OUT_VOLUME =
// IN1_VOLUME; // define via Makefile

constexpr int IN1_VOLUME = IN1_SIZE / sizeof(DATATYPE_IN1);
constexpr int IN2_VOLUME = IN2_SIZE / sizeof(DATATYPE_IN2);
constexpr int OUT_VOLUME = OUT_SIZE / sizeof(DATATYPE_OUT);

args myargs = parse_args(argc, argv);

int res = xrt_test_run<DATATYPE_IN1, DATATYPE_IN2, DATATYPE_OUT,
initialize_bufIn1, initialize_bufIn2,
initialize_bufOut, verify_vector_scalar_mul>(
IN1_VOLUME, IN2_VOLUME, OUT_VOLUME, myargs);
return res;
}
102 changes: 45 additions & 57 deletions programming_examples/basic/vector_scalar_mul/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,79 +7,67 @@
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
import numpy as np
import sys
import time
from aie.utils.xrt import setup_aie, write_out_trace, execute
import aie.utils.xrt as xrt_utils
import aie.utils.test as test_utils


def main(opts):
print("Running...\n")
in1_size = int(opts.in1_size) # in bytes
in2_size = int(opts.in2_size) # in bytes
out_size = int(opts.out_size) # in bytes

data_size = int(opts.size)
vector_dtype = np.int16
scalar_dtype = np.int32
scale_factor = 3
size_out = data_size * 2
print("output buffer size: " + str(size_out))
# --------------------------------------------------------------------------
# ----- Edit your data types -----------------------------------------------
# --------------------------------------------------------------------------

enable_trace = opts.trace_size > 0
in1_dtype = np.int16
in2_dtype = np.int32
out_dtype = in1_dtype

app = setup_aie(
opts.xclbin,
opts.instr,
data_size,
vector_dtype,
1,
scalar_dtype,
data_size,
vector_dtype,
enable_trace=enable_trace,
trace_size=opts.trace_size,
)
input_vector = np.arange(1, data_size + 1, dtype=vector_dtype)
input_factor = np.array([3], dtype=scalar_dtype)
# aie_output = execute_on_aie(app, input_vector, input_factor)
# --------------------------------------------------------------------------

in1_volume = in1_size // np.dtype(in1_dtype).itemsize
in2_volume = in2_size // np.dtype(in2_dtype).itemsize
out_volume = out_size // np.dtype(out_dtype).itemsize

start = time.time_ns()
full_output = execute(app, input_vector, input_factor)
stop = time.time_ns()
npu_time = stop - start
print("npu_time: ", npu_time)
# --------------------------------------------------------------------------
# ----- Edit your data init and reference data here ------------------------
# --------------------------------------------------------------------------

# aie_output = full_output[:size_out].view(np.int8)
# aie_output = full_output[:size_out].view(np.uint8)
aie_output = full_output[:size_out].view(np.int16)
if enable_trace:
trace_buffer = full_output[size_out:].view(np.uint32)
# check buffer sizes
assert in2_size == 4
assert out_size == in1_size

ref = np.arange(1, data_size + 1, dtype=vector_dtype) * scale_factor
scale_factor = 3

# Initialize data
in1_data = np.arange(1, in1_volume + 1, dtype=in1_dtype)
in2_data = np.array([scale_factor], dtype=in2_dtype)
out_data = np.zeros([out_volume], dtype=out_dtype)

if enable_trace:
# trace_buffer = full_output[3920:]
print("trace_buffer shape: ", trace_buffer.shape)
print("trace_buffer dtype: ", trace_buffer.dtype)
# write_out_trace(trace_buffer, str(opts.trace_file))
write_out_trace(trace_buffer, "trace.txt")
# Define reference data
ref = np.arange(1, in1_volume + 1, dtype=out_dtype) * scale_factor

# Copy output results and verify they are correct
errors = 0
if opts.verify:
if opts.verbosity >= 1:
print("Verifying results ...")
e = np.equal(ref, aie_output)
errors = np.size(e) - np.count_nonzero(e)
# --------------------------------------------------------------------------

if not errors:
print("\nPASS!\n")
sys.exit(0)
else:
print("\nError count: ", errors)
print("\nFailed.\n")
sys.exit(1)
print("Running...\n")
res = xrt_utils.xrt_test_run(
in1_dtype,
in2_dtype,
out_dtype,
in1_data,
in2_data,
out_data,
in1_volume,
in2_volume,
out_volume,
ref,
opts,
)
sys.exit(res)


if __name__ == "__main__":
p = test_utils.create_default_argparser()
p.add_argument("-s", "--size", required=True, dest="size", help="Vector size")
opts = p.parse_args(sys.argv[1:])
main(opts)
Loading
Loading