diff --git a/src/openmp/ompt_target_daxpy.c b/src/openmp/ompt_target_daxpy.c new file mode 100644 index 00000000..4854079e --- /dev/null +++ b/src/openmp/ompt_target_daxpy.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include + +void daxpy( double * __restrict__ a, double * __restrict__ b, + double scalar, int num_elements ) { + +#pragma omp target teams distribute parallel for simd map(tofrom:a[0:num_elements]) map(to:b[0:num_elements]) + for (size_t j=0; j 0.000001 ) { + num_errors++; + } + } + +#pragma omp target exit data map(release:c[0:num_elements]) +#pragma omp target exit data map(release:a[0:num_elements]) +#pragma omp target exit data map(release:b[0:num_elements]) + + free(a); + free(b); + free(c); + + if(num_errors == 0) printf( "Success!\n" ); + + assert(num_errors == 0); + + return 0; +} \ No newline at end of file diff --git a/src/openmp/ompt_target_matmult.c b/src/openmp/ompt_target_matmult.c new file mode 100644 index 00000000..76cbaf2f --- /dev/null +++ b/src/openmp/ompt_target_matmult.c @@ -0,0 +1,162 @@ +/****************************************************************************** +* OpenMp Example - Matrix Multiply - C Version +* Demonstrates a matrix multiply using OpenMP. +* +* Modified from here: +* https://computing.llnl.gov/tutorials/openMP/samples/C/omp_mm.c +* +* For PAPI_FP_INS, the exclusive count for the event: +* for (null) [OpenMP location: file:matmult.c ] +* should be 2E+06 / Number of Threads +******************************************************************************/ +#include +#include + +#ifndef MATRIX_SIZE +#define MATRIX_SIZE 4096 +#endif + +#define MAX_ITERATIONS 3 +#define NRA MATRIX_SIZE /* number of rows in matrix A */ +#define NCA MATRIX_SIZE /* number of columns in matrix A */ +#define NCB MATRIX_SIZE /* number of columns in matrix B */ + +#define elem(_m,_i,_j) (_m[((_i)*NRA) + (_j)]) + +double* allocateMatrix(int rows, int cols) { + int i; + double *matrix = (double*)malloc((sizeof(double*)) * rows * cols); + #pragma omp target enter data map(alloc:matrix[0:rows*cols]) + return matrix; +} + +void initialize(double *matrix, int rows, int cols) { + int i,j; +#pragma omp parallel private(i,j) shared(matrix) + { + //set_num_threads(); + /*** Initialize matrices ***/ +#pragma omp for nowait + for (i=0; i +#include +#include +#include +#include + +#define ARRAY_SIZE 1024*1024*512 +#define ITERATIONS 350 + +int run_cpu( int argc, char** argv ) { + printf( "The total memory allocated is %7.3lf MB.\n", + 2.0*sizeof(double)*ARRAY_SIZE/1024/1024 ); + + double* a = NULL; + double* b = NULL; + int num_errors = 0; + double time = 0; + double start_time = 0; + double scalar = 8.0; + int iterations = ITERATIONS; + double iteration_time[ITERATIONS]; + + a = (double *) malloc( sizeof(double)*ARRAY_SIZE ); + b = (double *) malloc( sizeof(double)*ARRAY_SIZE ); + + // initialize on the host +#pragma omp parallel for +for (size_t j=0; j 0.000001 ) { + num_errors++; + } + } + + free(a); + free(b); + + if( num_errors == 0 ) printf( "Success!\n" ); + + assert(num_errors == 0); + + return 0; +} + +int run_gpu( int argc, char** argv ) +{ + printf( "The total memory allocated is %7.3lf MB.\n", + 2.0*sizeof(double)*ARRAY_SIZE/1024/1024 ); + + double* a = NULL; + double* b = NULL; + int num_errors = 0; + double time = 0; + double start_time = 0; + double scalar = 8.0; + int iterations = ITERATIONS; + double iteration_time[ITERATIONS]; + + a = (double *) malloc( sizeof(double)*ARRAY_SIZE ); + b = (double *) malloc( sizeof(double)*ARRAY_SIZE ); + + // initialize on the host +#pragma omp parallel for +for (size_t j=0; j 0.000001 ) { + num_errors++; + } + } + +#pragma omp target exit data map(release:a[0:ARRAY_SIZE]) +#pragma omp target exit data map(release:b[0:ARRAY_SIZE]) + + free(a); + free(b); + + if( num_errors == 0 ) printf( "Success!\n" ); + + assert(num_errors == 0); + + return 0; +} + + +int main( int argc, char** argv ) { + run_cpu(argc, argv); + run_gpu(argc, argv); +} \ No newline at end of file