Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for POTRF #317

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions examples/potrf/potrf.h
Original file line number Diff line number Diff line change
Expand Up @@ -680,16 +680,22 @@ namespace potrf {
auto keymap3 = [&](const Key3& key) { return A.rank_of(key[0], key[1]); };

/**
* Device map hints: we try to keep tiles on one row on the same device to minimize
* data movement between devices. This provides hints for load-balancing up front
* and avoids movement of the TRSM result to GEMM tasks.
* Set a device map, 2d block-cyclic
*/
auto devmap1 = [&](const Key1& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };
int num_devices = ttg::device::num_devices();
int gp = std::sqrt(num_devices);
int gq = (num_devices > 0) ? (num_devices / gp) : 1;
auto mapper = [&A, gp,gq,num_devices](int i){
auto device = (((i/A.P())%gp)*gq) + (i/A.Q())%gq;
return device;
};

auto devmap2a = [&](const Key2& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };
auto devmap2b = [&](const Key2& key) { return (key[1] / A.P()) % ttg::device::num_devices(); };
auto devmap1 = [=](const Key1& key) { return mapper(key[0]); };

auto devmap3 = [&](const Key3& key) { return (key[0] / A.P()) % ttg::device::num_devices(); };
auto devmap2a = [=](const Key2& key) { return mapper(key[0]); };
auto devmap2b = [=](const Key2& key) { return mapper(key[1]); };

auto devmap3 = [=](const Key3& key) { return mapper(key[0]); };

ttg::Edge<Key1, MatrixTile<T>> syrk_potrf("syrk_potrf"), disp_potrf("disp_potrf");

Expand Down
23 changes: 20 additions & 3 deletions examples/potrf/testing_dpotrf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ int main(int argc, char **argv)
char *opt = nullptr;
int ret = EXIT_SUCCESS;
int niter = 3;
bool print_dot = false;

if( (opt = getCmdOption(argv+1, argv+argc, "-N")) != nullptr ) {
N = M = atoi(opt);
Expand All @@ -58,6 +59,10 @@ int main(int argc, char **argv)
niter = atoi(opt);
}

/* whether to print the TTG dot */
print_dot = cmdOptionExists(argv+1, argv+argc, "-dot");


bool check = !cmdOptionExists(argv+1, argv+argc, "-x");
bool cow_hint = !cmdOptionExists(argv+1, argv+argc, "-w");

Expand Down Expand Up @@ -103,6 +108,16 @@ int main(int argc, char **argv)
dcA.mat = parsec_data_allocate((size_t)dcA.super.nb_local_tiles *
(size_t)dcA.super.bsiz *
(size_t)parsec_datadist_getsizeoftype(dcA.super.mtype));

/* would be nice to have proper abstractions for this */
parsec_data_collection_t *o = &(dcA.super.super);
for (int devid = 1; devid < parsec_nb_devices; ++devid) {
auto* device = parsec_mca_device_get(devid);
if (device->memory_register) {
o->register_memory(o, device); // TODO: check device IDs
}
}

parsec_data_collection_set_key((parsec_data_collection_t*)&dcA, (char*)"Matrix A");

if(!check) {
Expand Down Expand Up @@ -139,9 +154,11 @@ int main(int argc, char **argv)
TTGUNUSED(connected);

if (world.rank() == 0) {
std::cout << "==== begin dot ====\n";
std::cout << ttg::Dot()(init_tt.get()) << std::endl;
std::cout << "==== end dot ====\n";
if (print_dot) {
std::cout << "==== begin dot ====\n";
std::cout << ttg::Dot()(init_tt.get()) << std::endl;
std::cout << "==== end dot ====\n";
}
beg = std::chrono::high_resolution_clock::now();
}

Expand Down
Loading