Skip to content

Commit

Permalink
Merge pull request #11 from rohany/update-summa
Browse files Browse the repository at this point in the history
legion/summaMM: update summa generation and driver code
  • Loading branch information
rohany authored Jun 6, 2021
2 parents 7f780f3 + f91752b commit 8a3ea89
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 20 deletions.
41 changes: 26 additions & 15 deletions legion/summaMM/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,49 @@

using namespace Legion;

typedef int32_t valType;
typedef double valType;

// Defined by the generated TACO code.
void registerTacoTasks();
LogicalPartition placeLegionA(Context ctx, Runtime* runtime, LogicalRegion a);
LogicalPartition placeLegionB(Context ctx, Runtime* runtime, LogicalRegion b);
LogicalPartition placeLegionC(Context ctx, Runtime* runtime, LogicalRegion c);
LogicalPartition placeLegionA(Context ctx, Runtime* runtime, LogicalRegion a, int gx, int gy);
LogicalPartition placeLegionB(Context ctx, Runtime* runtime, LogicalRegion b, int gx, int gy);
LogicalPartition placeLegionC(Context ctx, Runtime* runtime, LogicalRegion c, int gx, int gy);
void computeLegion(Context ctx, Runtime* runtime, LogicalRegion a, LogicalRegion b, LogicalRegion c, LogicalPartition aPartition);

void top_level_task(const Task* task, const std::vector<PhysicalRegion>& regions, Context ctx, Runtime* runtime) {
// Create the regions.
auto args = runtime->get_input_args();
int n = -1;
int gx = -1;
int gy = -1;
// Parse input args.
for (int i = 1; i < args.argc; i++) {
if (strcmp(args.argv[i], "-n") == 0) {
n = atoi(args.argv[++i]);
continue;
}
if (strcmp(args.argv[i], "-gx") == 0) {
gx = atoi(args.argv[++i]);
continue;
}
if (strcmp(args.argv[i], "-gy") == 0) {
gy = atoi(args.argv[++i]);
continue;
}
// TODO (rohany): Add a flag to do the validation or not.
}
if (n == -1) {
std::cout << "Please provide an input matrix size with -n." << std::endl;
return;
}
if (gx == -1) {
std::cout << "Please provide a grid x size with -gx." << std::endl;
return;
}
if (gy == -1) {
std::cout << "Please provide a gris y size with -gy." << std::endl;
return;
}

auto fspace = runtime->create_field_space(ctx);
allocate_tensor_fields<valType>(ctx, runtime, fspace);
Expand All @@ -39,21 +57,14 @@ void top_level_task(const Task* task, const std::vector<PhysicalRegion>& regions
tacoFill<valType>(ctx, runtime, A, 0); tacoFill<valType>(ctx, runtime, B, 1); tacoFill<valType>(ctx, runtime, C, 1);

// Place the tensors.
auto part = placeLegionA(ctx, runtime, A);
placeLegionB(ctx, runtime, B);
placeLegionC(ctx, runtime, C);
auto part = placeLegionA(ctx, runtime, A, gx, gy);
placeLegionB(ctx, runtime, B, gx, gy);
placeLegionC(ctx, runtime, C, gx, gy);

// Compute on the tensors.
benchmark([&]() { computeLegion(ctx, runtime, A, B, C, part); });

auto a_reg = getRegionToWrite(ctx, runtime, A, A);
FieldAccessor<READ_WRITE,valType,2,coord_t, Realm::AffineAccessor<valType, 2, coord_t>> a_rw(a_reg, FID_VAL);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
assert(a_rw[Point<2>(i, j)] == n);
}
}
runtime->unmap_region(ctx, a_reg);
tacoValidate<valType>(ctx, runtime, A, valType(n));
}

TACO_MAIN(valType)
14 changes: 9 additions & 5 deletions test/tests-distributed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,18 @@ TEST(distributed, basicComputeOnto) {

TEST(distributed, summaMM) {
int dim = 10;
Tensor<int> a("a", {dim, dim}, Format{Dense, Dense});
Tensor<int> b("b", {dim, dim}, Format{Dense, Dense});
Tensor<int> c("c", {dim, dim}, Format{Dense, Dense});
Tensor<double> a("a", {dim, dim}, Format{Dense, Dense});
Tensor<double> b("b", {dim, dim}, Format{Dense, Dense});
Tensor<double> c("c", {dim, dim}, Format{Dense, Dense});

IndexVar i("i"), j("j"), in("in"), jn("jn"), il("il"), jl("jl"), k("k"), ki("ki"), ko("ko");

a(i, j) = b(i, k) * c(k, j);

// Place each tensor onto a processor grid.
auto grid = Grid(2, 2);
auto gx = ir::Var::make("gridX", Int32, false, false, true);
auto gy = ir::Var::make("gridY", Int32, false, false, true);
auto grid = Grid(gx, gy);
auto placement = GridPlacement({0, 1});
auto placeA = a.partition(grid).place(grid, placement);
auto placeB = b.partition(grid).place(grid, placement);
Expand All @@ -137,13 +139,15 @@ TEST(distributed, summaMM) {
auto placeBLowered = lower(placeB, "placeLegionB", false, true);
auto placeCLowered = lower(placeC, "placeLegionC", false, true);

std::shared_ptr<LeafCallInterface> gemm = std::make_shared<GEMM>();
auto stmt = a.getAssignment().concretize();
stmt = stmt
.distributeOnto({i, j}, {in, jn}, {il, jl}, a(i, j))
.split(k, ko, ki, 256)
.split(k, ko, ki, 512)
.reorder({ko, il, jl})
.pushCommUnder(b(i, k), ko)
.pushCommUnder(c(k, j), ko)
.swapLeafKernel(il, gemm)
;

auto lowered = lower(stmt, "computeLegion", false, true);
Expand Down

0 comments on commit 8a3ea89

Please sign in to comment.