-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduces experimental kernels folder
This commit introduces the `experimental/` folder for kernels. This folder will contain alternative, experimental implementations of our kernels. These implementations are using our kernel catalog feature from While being experimental, the kernels are not tightly coupled to DAPHNE, e.g, they are not used per default without specifying the required kernel hint to call the kernel and they are not tested as part of our test suite. Once an experimental kernel is to be moved to our default kernels, passing all the tests is required. Making kernels executable when running DAPHNE while not yet being part of our kernel library helps the development when improving kernels. - Dependencies can be used while being experimental without having to worry about making them a requirement for all DAPHNE users - Prototyping and development speed is increased, as only the kernel has to be recompiled this is drasticly faster than having to compile the `libAllKernels.so` each time - Provides a playing ground for developers to try out alternative implementations - Makes it easy to benchmark and compare different implementations of the same kernel by using different kernel hints The `gemv/` folder contains a simple example of such an alternative implementation. It is using AVX2 instructions to implement the SpMV kernel, making it unsuitable to be the default kernel implementation as it requires hardware-specific instructions. Additionally, it uses the LIKWID library to benchmark CPU performance counters (similar to PAPI). Without bringing these dependency to all DAPHNE users, one can already test the kernel, compare it with the default, run benchmarks with DAPHNE using this kernel. For more information on kernel extensions see https://daphne-eu.github.io/daphne/Extensions/.
- Loading branch information
1 parent
73bf668
commit bfcd803
Showing
4 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2024 The DAPHNE Consortium | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
CXX = g++ | ||
CXXFLAGS = -DLIKWID_PERFMON -ggdb3 -fPIC -fno-omit-frame-pointer -O3 -march=native -fopenmp -std=c++17 | ||
ASMFLAGS = -fverbose-asm -S | ||
|
||
INCLUDES = -I/usr/local/include/ -I../../../../../ | ||
|
||
SRCS = gemv.cpp | ||
OBJS = $(SRCS:.cpp=.o) | ||
TARGET = gemv.so | ||
|
||
.PHONY: clean | ||
|
||
all: $(TARGET) | ||
@echo " ==> Built target $(TARGET)" | ||
|
||
$(TARGET): $(OBJS) | ||
@echo " ==> COMPILING $@" | ||
$(CXX) $(CXXFLAGS) $(INCLUDES) -shared -o $(TARGET) $(OBJS) -llikwid | ||
|
||
asm: $(SRCS) | ||
@echo " ==> COMPILING $@" | ||
$(CXX) $(ASMFLAGS) $(CXXFLAGS) $(INCLUDES) -o $(SRCS:.cpp=.s) -cpp $< -llikwid | ||
|
||
.cpp.o: | ||
@echo " ==> COMPILING $@" | ||
$(CXX) -c $(CXXFLAGS) $(INCLUDES) -o $@ -cpp $< -llikwid | ||
|
||
clean: | ||
@echo "==> CLEANING" | ||
$(RM) *.o *.s *.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#include <immintrin.h> | ||
#include <runtime/local/datastructures/CSRMatrix.h> | ||
#include <runtime/local/datastructures/DenseMatrix.h> | ||
#include <unistd.h> | ||
|
||
#include "runtime/local/datastructures/DataObjectFactory.h" | ||
|
||
#include <iostream> | ||
#include <stdexcept> | ||
|
||
#ifdef LIKWID_PERFMON | ||
#include <likwid-marker.h> | ||
#else | ||
#define LIKWID_MARKER_INIT | ||
#define LIKWID_MARKER_THREADINIT | ||
#define LIKWID_MARKER_SWITCH | ||
#define LIKWID_MARKER_REGISTER(regionTag) | ||
#define LIKWID_MARKER_START(regionTag) | ||
#define LIKWID_MARKER_STOP(regionTag) | ||
#define LIKWID_MARKER_CLOSE | ||
#define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) | ||
#endif | ||
|
||
class DaphneContext; | ||
|
||
// Horizontal sum of [4 x double] __m256d | ||
inline double hsum_double_avx2(__m256d v) { | ||
__m128d vlow = _mm256_castpd256_pd128(v); | ||
__m128d vhigh = _mm256_extractf128_pd(v, 1); | ||
vlow = _mm_add_pd(vlow, vhigh); | ||
__m128d high64 = _mm_unpackhi_pd(vlow, vlow); | ||
return _mm_cvtsd_f64(_mm_add_sd(vlow, high64)); | ||
} | ||
|
||
extern "C" { | ||
|
||
void spmv_simd_parallel_omp(DenseMatrix<double> *&res, | ||
const CSRMatrix<double> *lhs, | ||
const DenseMatrix<double> *rhs, bool transa, | ||
bool transb, DaphneContext *ctx) { | ||
LIKWID_MARKER_INIT; | ||
const size_t nr_lhs = lhs->getNumRows(); | ||
[[maybe_unused]] const size_t nc_lhs = lhs->getNumCols(); | ||
|
||
[[maybe_unused]] const size_t nr_rhs = rhs->getNumRows(); | ||
const size_t nc_rhs = rhs->getNumCols(); | ||
|
||
if (nc_lhs != nr_rhs) { | ||
throw std::runtime_error( | ||
"Gemv - #cols of mat and #rows of vec must be the same"); | ||
} | ||
|
||
if (res == nullptr) | ||
res = DataObjectFactory::create<DenseMatrix<double>>(nr_lhs, nc_rhs, | ||
false); | ||
|
||
const auto *valuesRhs = rhs->getValues(); | ||
auto *valuesRes = res->getValues(); | ||
memset(valuesRes, double(0), sizeof(double) * nr_lhs * nc_rhs); | ||
|
||
auto *row_offsets = lhs->getRowOffsets(); | ||
auto *values = lhs->getValues(); | ||
auto *col_idx = lhs->getColIdxs(); | ||
|
||
#pragma omp parallel | ||
{ | ||
LIKWID_MARKER_START("spmv_simd_parallel_omp"); | ||
#pragma omp for | ||
for (size_t row = 0; row < nr_lhs; ++row) { | ||
double row_sum = 0; | ||
// Initialize [4 x double] row-accumulator | ||
__m256d row_acc = _mm256_setzero_pd(); | ||
// Iterate over non-zero elements in row | ||
auto values_in_row = row_offsets[row + 1] - row_offsets[row]; | ||
int rounds = values_in_row / 4; | ||
for (int i = 0; i < rounds; ++i) { | ||
int idx = row_offsets[row] + i * 4; | ||
// Load doubles from LHS matrix | ||
__m256d mat_v = _mm256_loadu_pd(&values[idx]); | ||
// Load RHS column indices | ||
__m256i col_idxs = | ||
_mm256_loadu_si256((const __m256i *)&col_idx[idx]); | ||
// Gather values from RHS vector | ||
__m256d vec_v = _mm256_i64gather_pd(valuesRhs, col_idxs, 8); | ||
// Multiply and add to accumulator | ||
row_acc = _mm256_fmadd_pd(mat_v, vec_v, row_acc); | ||
} | ||
// Horizontal sum of accumulator | ||
row_sum = hsum_double_avx2(row_acc); | ||
// Handle remaining elements | ||
for (auto i = row_offsets[row] + rounds * 4; | ||
i < row_offsets[row + 1]; ++i) { | ||
row_sum += values[i] * valuesRhs[col_idx[i]]; | ||
} | ||
// Store result | ||
valuesRes[row] = row_sum; | ||
} | ||
|
||
LIKWID_MARKER_STOP("spmv_simd_parallel_omp"); | ||
} | ||
LIKWID_MARKER_CLOSE; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[ | ||
{ | ||
"opMnemonic": "gemv", | ||
"kernelFuncName": "spmv_simd_parallel_omp", | ||
"resTypes": ["DenseMatrix<double>"], | ||
"argTypes": ["CSRMatrix<double>", "DenseMatrix<double>"], | ||
"backend": "CPP", | ||
"libPath": "gemv.so" | ||
} | ||
] |