Skip to content

Commit

Permalink
plots
Browse files Browse the repository at this point in the history
  • Loading branch information
romnn committed Nov 14, 2023
1 parent 0579a31 commit 0ff9984
Show file tree
Hide file tree
Showing 23 changed files with 284 additions and 152 deletions.
398 changes: 281 additions & 117 deletions gpucachesim/microbench/pchase.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions gpucachesim/remote/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,8 @@ def submit_pchase(
slurm_script += "export {}={}\n".format(k, v)
slurm_script += "{} {}\n".format(executable, " ".join(args))

print([str(executable)] + args)

# upload slurm script
self.upload_data(data=StringIO(slurm_script), remote_path=remote_slurm_job_path)

Expand Down
Binary file modified plot/A4000/cache_line_size-l1data-native.pdf
Binary file not shown.
Binary file modified plot/A4000/cache_line_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/A4000/cache_set_mapping-l1data-native-cc86-random.pdf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added plot/A4000/cache_sets.l1data.native (copy).pdf
Binary file not shown.
Binary file modified plot/A4000/cache_size-l1data-native.pdf
Binary file not shown.
Binary file modified plot/A4000/cache_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/A4000/l2_prefetch_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/A4000/latency_distribution-l1data-native.pdf
Binary file not shown.
Binary file modified plot/GTX980/cache_size-l1data-native.pdf
Binary file not shown.
Binary file modified plot/GTX980/cache_size-l2-native.pdf
Binary file not shown.
Binary file not shown.
Binary file modified plot/TitanX-Pascal/cache_line_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/TitanX-Pascal/cache_size-l1data-native.pdf
Binary file not shown.
Binary file modified plot/TitanX-Pascal/cache_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/TitanX-Pascal/l2_prefetch_size-l2-native.pdf
Binary file not shown.
Binary file modified plot/cache_set_mapping-l1data-native-random.pdf
Binary file not shown.
Binary file modified plot/cache_size-l2-native.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion test-apps/microbenches/chxw/pchase.cu
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ int parametric_measure_global(unsigned int *h_a, unsigned int *d_a, memory mem,
for (size_t k = 0; k < iter_size; k++) {
unsigned int index = indexof(h_a, N, h_index[k]);
assert(index == (N + h_index[k] - stride) % N);
if (k == 0) {
if (k == 0 && warmup_iterations < 1) {
assert(index == 0);
}
unsigned int latency = (int)h_latency[k] - (int)clock_overhead;
Expand Down
34 changes: 0 additions & 34 deletions test-apps/microbenches/chxw/random_set_mapping.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,10 @@
#include <stdint.h>
#include <stdio.h>
#include <unordered_set>
// #include <random>

#include "common.hpp"
#include "cuda_runtime.h"

// __global__ __noinline__ void
// global_latency_l1_random_set_mapping_host_mapped(
// unsigned int *array, int array_length, unsigned int *latency,
// unsigned int *index, int iter_size, size_t warmup_iterations,
// unsigned int overflow_index) {
// unsigned int start_time, end_time;
// volatile uint32_t j = 0;
//
// for (int k = (int)warmup_iterations * -iter_size; k < iter_size; k++) {
// if (k >= 0 && j == 0) {
// // overflow the cache now
// index[k] = array[array_length + overflow_index];
// }
// if (k >= 0) {
// start_time = clock();
// j = array[j];
// index[k] = j;
// end_time = clock();
//
// latency[k] = end_time - start_time;
// } else {
// j = array[j];
// }
// }
//
// // store to avoid caching in readonly?
// array[array_length] = j;
// array[array_length + 1] = array[j];
// }

__global__ __noinline__ void global_latency_l1_random_set_mapping_host_mapped(
unsigned int *array, int array_length, unsigned int *latency,
unsigned int *index, int iter_size, size_t warmup_iterations,
Expand All @@ -51,10 +20,8 @@ __global__ __noinline__ void global_latency_l1_random_set_mapping_host_mapped(
volatile uint32_t j = start_j;

for (int k = (int)warmup_iterations * -iter_size; k < iter_size; k++) {
// if (k >= 0 && j == 0) {
if (k == round_size) {
// overflow the cache now
// index[k] = array[array_length + overflow_index];
index[k] = array[(array_length + overflow_index) % (2 * array_length)];
}
if (k >= 0) {
Expand Down Expand Up @@ -84,7 +51,6 @@ __global__ __noinline__ void global_latency_l2_random_set_mapping_host_mapped(
volatile uint32_t j = start_j;

for (int k = (int)warmup_iterations * -iter_size; k < iter_size; k++) {
// if (k >= 0 && j == 0) {
if (k == round_size) {
// overflow the cache now
index[k] = array[(array_length + overflow_index) % (2 * array_length)];
Expand Down

0 comments on commit 0ff9984

Please sign in to comment.