From 27b47f54500d9f01985af2448fd208a7052e314d Mon Sep 17 00:00:00 2001 From: Patrick Damme Date: Mon, 22 Apr 2024 19:37:23 +0200 Subject: [PATCH] [DOC] Initial docs on kernel extensions. - A very initial version of the documentation of implementing/building/using a custom kernel extension, as part of the user docs. - All files displayed in these docs are also in scripts/examples/extensions/myKernels/ for easy use. --- doc/Extensions.md | 225 ++++++++++++++++++ doc/README.md | 1 + .../examples/extensions/myKernels/.gitignore | 2 + .../examples/extensions/myKernels/Makefile | 22 ++ .../examples/extensions/myKernels/demo.daphne | 20 ++ .../extensions/myKernels/demoSIMD.daphne | 17 ++ .../extensions/myKernels/demoSeq.daphne | 17 ++ .../extensions/myKernels/myKernels.cpp | 66 +++++ .../extensions/myKernels/myKernels.json | 18 ++ 9 files changed, 388 insertions(+) create mode 100644 doc/Extensions.md create mode 100644 scripts/examples/extensions/myKernels/.gitignore create mode 100644 scripts/examples/extensions/myKernels/Makefile create mode 100644 scripts/examples/extensions/myKernels/demo.daphne create mode 100644 scripts/examples/extensions/myKernels/demoSIMD.daphne create mode 100644 scripts/examples/extensions/myKernels/demoSeq.daphne create mode 100644 scripts/examples/extensions/myKernels/myKernels.cpp create mode 100644 scripts/examples/extensions/myKernels/myKernels.json diff --git a/doc/Extensions.md b/doc/Extensions.md new file mode 100644 index 000000000..f9f049db6 --- /dev/null +++ b/doc/Extensions.md @@ -0,0 +1,225 @@ + + +# Custom Extensions to DAPHNE + +DAPHNE will be extensible in various respects. +Users will be able to add their own kernels, data types, value types, compiler passes, runtime schedulers, etc. without changing the DAPHNE source code itself. + +So far, DAPHNE has initial support for adding custom kernels. + +## Custom Kernel Extensions + +Users can add their own custom kernels (physical operators) to DAPHNE following a three-step approach: + +1. The extension is implemented as a stand-alone code base. +2. The extension is compiled as a shared library. +3. The extension is used in a DaphneDSL script or via DAPHNE's Python API. + +Since this feature is still in an early stage, we only mention the most important points here rather than providing a full reference of what's supported. + +Furthermore, we include a running example of adding two custom kernels for the summation of a dense matrix of single-precision floating-point values. +We are interested in two variants: one sequential implementation for the CPU and one implementation that uses SIMD instructions from Intel's AVX (256-bit vector registers) on the CPU. +All files shown below can be found in `/scripts/examples/extensions/myKernels/`. + +### Step 1: Implementing a Kernel Extension + +A kernel extension consists at least of the following: + +- A *C++ source file*, which includes some essential DAPHNE headers and defines one or multiple kernel functions. + The kernel functions have to follow a certain interface **(*)** and have `extern "C"` linkage. + Within the kernel functions, extension developers have a lot of freedom. + Nevertheless, we also plan to provide some best practices and helpers to make extension development more productive. +- A *kernel catalog JSON file*, which provides some essential information on the kernels provided in the extension, such that DAPHNE knows how to use them. + This information includes: the mnemonic of the DaphneIR operation **(*)**, the name of the kernel function, the list of result/argument types, the backend (e.g. CPU or a specific hardware accelerator), and the path to the shared library of the extension (relative to this JSON file). +- To build the extension, it is recommendable (but not required) to include a Makefile or similar as well. + +**(*)** *We will add a concrete list of DaphneIR operations for which custom kernels can be added later. +This list will be understandable by DAPHNE users, and will contain the operations' mnemonics, arguments, results, as well as expected C++ kernel function interfaces. +In the meantime, developers familiar with DAPHNE internals can already find references of the DaphneIR operations in `src/ir/daphneir/DaphneOps.td` and a reference of the kernel interfaces in `build/runtime/local/kernels/kernels.cpp` (generated during the DAPHNE build).* + +*Running example:* + +C++ source file `myKernels.cpp`: +```c++ +#include + +#include // for the SIMD-enabled kernel +#include +#include + +class DaphneContext; + +extern "C" { + // Custom sequential sum-kernel. + void mySumSeq( + float * res, + const DenseMatrix * arg, + DaphneContext * ctx + ) { + std::cerr << "hello from mySumSeq()" << std::endl; + const float * valuesArg = arg->getValues(); + *res = 0; + for(size_t r = 0; r < arg->getNumRows(); r++) { + for(size_t c = 0; c < arg->getNumCols(); c++) + *res += valuesArg[c]; + valuesArg += arg->getRowSkip(); + } + } + + // Custom SIMD-enabled sum-kernel. + void mySumSIMD( + float * res, + const DenseMatrix * arg, + DaphneContext * ctx + ) { + std::cerr << "hello from mySumSIMD()" << std::endl; + + // Validation. + const size_t numCells = arg->getNumRows() * arg->getNumCols(); + if(numCells % 8) + throw std::runtime_error( + "for simplicity, the number of cells must be " + "a multiple of 8" + ); + if(arg->getNumCols() != arg->getRowSkip()) + throw std::runtime_error( + "for simplicity, the argument must not be " + "a column segment of another matrix" + ); + + // SIMD accumulation (8x f32). + const float * valuesArg = arg->getValues(); + __m256 acc = _mm256_setzero_ps(); + for(size_t i = 0; i < numCells / 8; i++) { + acc = _mm256_add_ps(acc, _mm256_loadu_ps(valuesArg)); + valuesArg += 8; + } + + // Summation of accumulator elements. + *res = + (reinterpret_cast(&acc))[0] + + (reinterpret_cast(&acc))[1] + + (reinterpret_cast(&acc))[2] + + (reinterpret_cast(&acc))[3] + + (reinterpret_cast(&acc))[4] + + (reinterpret_cast(&acc))[5] + + (reinterpret_cast(&acc))[6] + + (reinterpret_cast(&acc))[7]; + } +} +``` + +Kernel catalog file `myKernels.json`: +```json +[ + { + "opMnemonic": "sumAll", + "kernelFuncName": "mySumSeq", + "resTypes": ["float"], + "argTypes": ["DenseMatrix"], + "backend": "CPP", + "libPath": "libMyKernels.so" + }, + { + "opMnemonic": "sumAll", + "kernelFuncName": "mySumSIMD", + "resTypes": ["float"], + "argTypes": ["DenseMatrix"], + "backend": "CPP", + "libPath": "libMyKernels.so" + } +] +``` + +`Makefile`: +```make +libMyKernels.so: myKernels.o + g++ -shared myKernels.o -o libMyKernels.so + +myKernels.o: myKernels.cpp + g++ -c -fPIC myKernels.cpp -I../../../../src/ -std=c++17 -O3 -mavx2 -o myKernels.o + +clean: + rm -rf myKernels.o libMyKernels.so +``` + +### Step 2: Building a Kernel Extension + +The kernel extension must be built as a shared library. +Additional details will follow. + +*Running example:* + +Given the Makefile above, the extension is built by simply running `make` in the extension's directory, which produces the shared library `libMyKernels.so`: + +```bash +make +``` + +### Step 3: Using a Kernel Extension + +The kernels in a kernel extension can be used either automatically by DAPHNE or manually by the user. +Automatic use is currently restricted to the selection of the kernel based on result/argument data/value types, but in the future we plan to support custom cost models as well. +Besides that, the manual employment of custom kernels is very useful for experimentation, e.g., to see the impact of the kernel in the context of a larger integrated data analysis pipeline. +To this end, DaphneDSL [compiler hints](/doc/DaphneDSL/LanguageRef.md#compiler-hints) tell DAPHNE to use a specific kernel, even though DAPHNE's optimizing compiler may not choose the kernel, otherwise. + +*Running example:* + +A minimal example using a summation on a matrix of single-precision floating-point values could look as follows: + +`demo.daphne`: +```R +# Create a matrix of random f32 values in [0, 1] (400 MiB). +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +# Calculate the sum over the matrix. +s = sum(X); +# Print the sum. +print(s); +``` + +We execute this script from the DAPHNE root directory by: +```bash +bin/daphne scripts/examples/extensions/myKernels/demo.daphne +``` + +In order to manually use our custom sequential `sum`-kernel, we add the DAPHNE compiler hint `::mySumSeq` to the script: + +`demoSeq.daphne`: +```R +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +s = sum::mySumSeq(X); +print(s); +``` + +We execute this script with the following command, whereby the argument `--kernel-ext` specified the kernel catalog JSON file of the extension to use: +```bash +bin/daphne --kernel-ext scripts/examples/extensions/myKernels/myKernels.json scripts/examples/extensions/myKernels/demoSeq.daphne +``` + +Alternatively, we can try our custom SIMD-enabled `sum`-kernel by adapting the compiler hint accordingly: + +`demoSIMD.daphne`: +```R +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +s = sum::mySumSIMD(X); +print(s); +``` + +We execute this script by: +```bash +bin/daphne --kernel-ext scripts/examples/extensions/myKernels/myKernels.json scripts/examples/extensions/myKernels/demoSIMD.daphne +``` \ No newline at end of file diff --git a/doc/README.md b/doc/README.md index 7cdd0c069..7e4fe140c 100644 --- a/doc/README.md +++ b/doc/README.md @@ -17,6 +17,7 @@ - [A Few Early Example Algorithms in DaphneDSL](/scripts/algorithms/) - [FileMetaData Format (reading and writing data)](/doc/FileMetaDataFormat.md) - [Profiling DAPHNE using PAPI](/doc/Profiling.md) +- [Custom Extensions to DAPHNE](/doc/Extensions.md) ## Developer Documentation diff --git a/scripts/examples/extensions/myKernels/.gitignore b/scripts/examples/extensions/myKernels/.gitignore new file mode 100644 index 000000000..1ca678074 --- /dev/null +++ b/scripts/examples/extensions/myKernels/.gitignore @@ -0,0 +1,2 @@ +myKernels.o +libMyKernels.so \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/Makefile b/scripts/examples/extensions/myKernels/Makefile new file mode 100644 index 000000000..cf11ec404 --- /dev/null +++ b/scripts/examples/extensions/myKernels/Makefile @@ -0,0 +1,22 @@ +# Copyright 2024 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +libMyKernels.so: myKernels.o + g++ -shared myKernels.o -o libMyKernels.so + +myKernels.o: myKernels.cpp + g++ -c -fPIC myKernels.cpp -I../../../../src/ -std=c++17 -O3 -mavx2 -o myKernels.o + +clean: + rm -rf myKernels.o libMyKernels.so \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/demo.daphne b/scripts/examples/extensions/myKernels/demo.daphne new file mode 100644 index 000000000..25739fb38 --- /dev/null +++ b/scripts/examples/extensions/myKernels/demo.daphne @@ -0,0 +1,20 @@ +# Copyright 2024 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Create a matrix of random f32 values in [0, 1] (400 MiB). +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +# Calculate the sum over the matrix. +s = sum(X); +# Print the sum. +print(s); \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/demoSIMD.daphne b/scripts/examples/extensions/myKernels/demoSIMD.daphne new file mode 100644 index 000000000..e90768b1f --- /dev/null +++ b/scripts/examples/extensions/myKernels/demoSIMD.daphne @@ -0,0 +1,17 @@ +# Copyright 2024 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +s = sum::mySumSIMD(X); +print(s); \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/demoSeq.daphne b/scripts/examples/extensions/myKernels/demoSeq.daphne new file mode 100644 index 000000000..949c70aad --- /dev/null +++ b/scripts/examples/extensions/myKernels/demoSeq.daphne @@ -0,0 +1,17 @@ +# Copyright 2024 The DAPHNE Consortium +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +X = rand(10^4, 10^4, as.f32(0), as.f32(1), 1, 12345); +s = sum::mySumSeq(X); +print(s); \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/myKernels.cpp b/scripts/examples/extensions/myKernels/myKernels.cpp new file mode 100644 index 000000000..a03bbc0b7 --- /dev/null +++ b/scripts/examples/extensions/myKernels/myKernels.cpp @@ -0,0 +1,66 @@ +#include + +#include // for the SIMD-enabled kernel +#include +#include + +class DaphneContext; + +extern "C" { + // Custom sequential sum-kernel. + void mySumSeq( + float * res, + const DenseMatrix * arg, + DaphneContext * ctx + ) { + std::cerr << "hello from mySumSeq()" << std::endl; + const float * valuesArg = arg->getValues(); + *res = 0; + for(size_t r = 0; r < arg->getNumRows(); r++) { + for(size_t c = 0; c < arg->getNumCols(); c++) + *res += valuesArg[c]; + valuesArg += arg->getRowSkip(); + } + } + + // Custom SIMD-enabled sum-kernel. + void mySumSIMD( + float * res, + const DenseMatrix * arg, + DaphneContext * ctx + ) { + std::cerr << "hello from mySumSIMD()" << std::endl; + + // Validation. + const size_t numCells = arg->getNumRows() * arg->getNumCols(); + if(numCells % 8) + throw std::runtime_error( + "for simplicity, the number of cells must be " + "a multiple of 8" + ); + if(arg->getNumCols() != arg->getRowSkip()) + throw std::runtime_error( + "for simplicity, the argument must not be " + "a column segment of another matrix" + ); + + // SIMD accumulation (8x f32). + const float * valuesArg = arg->getValues(); + __m256 acc = _mm256_setzero_ps(); + for(size_t i = 0; i < numCells / 8; i++) { + acc = _mm256_add_ps(acc, _mm256_loadu_ps(valuesArg)); + valuesArg += 8; + } + + // Summation of accumulator elements. + *res = + (reinterpret_cast(&acc))[0] + + (reinterpret_cast(&acc))[1] + + (reinterpret_cast(&acc))[2] + + (reinterpret_cast(&acc))[3] + + (reinterpret_cast(&acc))[4] + + (reinterpret_cast(&acc))[5] + + (reinterpret_cast(&acc))[6] + + (reinterpret_cast(&acc))[7]; + } +} \ No newline at end of file diff --git a/scripts/examples/extensions/myKernels/myKernels.json b/scripts/examples/extensions/myKernels/myKernels.json new file mode 100644 index 000000000..6fc2fcd51 --- /dev/null +++ b/scripts/examples/extensions/myKernels/myKernels.json @@ -0,0 +1,18 @@ +[ + { + "opMnemonic": "sumAll", + "kernelFuncName": "mySumSeq", + "resTypes": ["float"], + "argTypes": ["DenseMatrix"], + "backend": "CPP", + "libPath": "libMyKernels.so" + }, + { + "opMnemonic": "sumAll", + "kernelFuncName": "mySumSIMD", + "resTypes": ["float"], + "argTypes": ["DenseMatrix"], + "backend": "CPP", + "libPath": "libMyKernels.so" + } +] \ No newline at end of file