Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Analysis] Use verify-diagnostics for print-based tests (NFC) #5970

Merged
merged 2 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 51 additions & 62 deletions test/Analysis/test-alias.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: triton-opt %s --mlir-disable-threading -test-print-alias -split-input-file 2>&1 | FileCheck %s
// RUN: triton-opt %s -mlir-disable-threading -test-print-alias -verify-diagnostics -o /dev/null

#AL = #ttg.blocked<{sizePerThread = [1, 4], threadsPerWarp = [4, 8], warpsPerCTA = [4, 1], order = [1, 0]}>
#BL = #ttg.blocked<{sizePerThread = [1, 4], threadsPerWarp = [1, 32], warpsPerCTA = [4, 1], order = [1, 0]}>
Expand All @@ -11,8 +11,6 @@

module attributes {"ttg.num-warps" = 4 : i32, "ttg.target" = "cuda:80"} {

// CHECK-LABEL: matmul_loop
// CHECK-NOT: ->
// There shouldn't be any aliasing with the dot op encoding.
tt.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
%a_ptr_init = tt.splat %A : !tt.ptr<f16> -> tensor<128x32x!tt.ptr<f16>, #AL>
Expand All @@ -38,47 +36,42 @@ tt.func @matmul_loop(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
tt.return
}

// CHECK-LABEL: alloc
tt.func @alloc(%A : !tt.ptr<f16>) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%cst2 = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
tt.return
}

// CHECK-LABEL: alloc_init
tt.func @alloc_init(%A : !tt.ptr<f16>) {
%cst0 = arith.constant dense<0.000000e+00> : tensor<16x16xf16, #AL>
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%cst1 = ttg.local_alloc %cst0 : (tensor<16x16xf16, #AL>) -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory>
tt.return
}

// CHECK-LABEL: trans
tt.func @trans(%A : !tt.ptr<f16>) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%tensor = ttg.local_alloc : () -> !ttg.memdesc<16x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK: %1 -> %0
// expected-remark @below {{%1 -> %0}}
%b = ttg.memdesc_trans %tensor {order=array<i32: 1,0>} : !ttg.memdesc<16x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<32x16xf16, #A_SHARED_T, #ttg.shared_memory, mutable>
tt.return
}

// CHECK-LABEL: subview
tt.func @subview(%A : !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory>) {
%index = arith.constant 0 : i32
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%a = ttg.local_alloc : () -> !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %1 -> %0
// expected-remark @below {{%1 -> %0}}
%cst1 = ttg.memdesc_subview %a[%index, %index, %index] : !ttg.memdesc<1x16x16xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
tt.return
}

// CHECK-LABEL: if_alias
tt.func @if_alias(%i1 : i1) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%a = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK: %1 -> %1
// expected-remark @below {{%1 -> %1}}
%b = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %2 -> %0,%1
// expected-remark @below {{%2 -> %0,%1}}
%cst2 = scf.if %i1 -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable> {
scf.yield %a : !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
} else {
Expand All @@ -87,46 +80,44 @@ tt.func @if_alias(%i1 : i1) {
tt.return
}

// CHECK-LABEL: for
tt.func @for(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%a = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK: %1 -> %1
// expected-remark @below {{%1 -> %1}}
%b = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK: %2 -> %2
// expected-remark @below {{%2 -> %2}}
%c = ttg.local_alloc : () -> !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %arg6 -> %0
// CHECK-NEXT: %arg7 -> %1
// CHECK-NEXT: %arg8 -> %2
// CHECK-NEXT: %3#0 -> %0,%1
// CHECK-NEXT: %3#1 -> %0,%1
// CHECK-NEXT: %3#2 -> %0,%1,%2
// expected-remark @below {{%arg6 -> %0}}
// expected-remark @below {{%arg7 -> %1}}
// expected-remark @below {{%arg8 -> %2}}
// expected-remark @below {{%3#0 -> %0,%1}}
// expected-remark @below {{%3#1 -> %0,%1}}
// expected-remark @below {{%3#2 -> %0,%1,%2}}
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a, %b_shared = %b, %c_shared = %c) ->
(!ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
scf.yield %b_shared, %a_shared, %a_shared : !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<16x16xf16, #A_SHARED, #ttg.shared_memory, mutable>
}
tt.return
}

// CHECK-LABEL: for_if
tt.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%a_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %1 -> %1
// expected-remark @below {{%1 -> %1}}
%b_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %2 -> %2
// expected-remark @below {{%2 -> %2}}
%c_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %arg7 -> %0
// CHECK-NEXT: %arg8 -> %1
// CHECK-NEXT: %arg9 -> %2
// CHECK-NEXT: %3#0 -> %0,%1
// CHECK-NEXT: %3#1 -> %0,%1
// CHECK-NEXT: %3#2 -> %0,%1,%2
// expected-remark @below {{%arg7 -> %0}}
// expected-remark @below {{%arg8 -> %1}}
// expected-remark @below {{%arg9 -> %2}}
// expected-remark @below {{%3#0 -> %0,%1}}
// expected-remark @below {{%3#1 -> %0,%1}}
// expected-remark @below {{%3#2 -> %0,%1,%2}}
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a_shared_init, %b_shared = %b_shared_init, %c_shared = %c_shared_init) ->
(!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
scf.if %i1 {
%index = arith.constant 8 : i32
// CHECK-NEXT: %4 -> %0,%1
// expected-remark @below {{%4 -> %0,%1}}
%cst0 = ttg.memdesc_subview %a_shared[%index, %index] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<32xf16, #A_SHARED, #ttg.shared_memory, mutable>
scf.yield
}
Expand All @@ -135,32 +126,31 @@ tt.func @for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B :
tt.return
}

// CHECK-LABEL: for_for_if
tt.func @for_for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>, %B : !tt.ptr<f16>, %i1 : i1) {
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%a_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %1 -> %1
// expected-remark @below {{%1 -> %1}}
%b_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %2 -> %2
// expected-remark @below {{%2 -> %2}}
%c_shared_init = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %arg7 -> %0
// CHECK-NEXT: %arg8 -> %1
// CHECK-NEXT: %arg9 -> %2
// CHECK-NEXT: %3#0 -> %0
// CHECK-NEXT: %3#1 -> %1
// CHECK-NEXT: %3#2 -> %2,%6,%6
// expected-remark @below {{%arg7 -> %0}}
// expected-remark @below {{%arg8 -> %1}}
// expected-remark @below {{%arg9 -> %2}}
// expected-remark @below {{%3#0 -> %0}}
// expected-remark @below {{%3#1 -> %1}}
// expected-remark @below {{%3#2 -> %2,%6,%6}}
%a_shared, %b_shared, %c_shared = scf.for %iv = %lb to %ub step %step iter_args(%a_shared = %a_shared_init, %b_shared = %b_shared_init, %c_shared = %c_shared_init) ->
(!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
// CHECK-NEXT: %arg11 -> %2,%6,%6
// CHECK-NEXT: %4 -> %2,%6,%6
// expected-remark @below {{%arg11 -> %2,%6,%6}}
// expected-remark @below {{%4 -> %2,%6,%6}}
%c_shared_next = scf.for %jv = %lb to %ub step %step iter_args(%c_shared_next = %c_shared) -> (!ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>) {
// CHECK-NEXT: %5 -> %6,%6
// expected-remark @below {{%5 -> %6,%6}}
%c_shared_next_next = scf.if %i1 -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> {
// CHECK-NEXT: %6 -> %6
// expected-remark @below {{%6 -> %6}}
%cst0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
scf.yield %cst0 : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
} else {
// CHECK-NEXT: %6 -> %6
// expected-remark @below {{%6 -> %6}}
%cst0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
scf.yield %cst0 : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
}
Expand All @@ -171,32 +161,31 @@ tt.func @for_for_if(%lb : index, %ub : index, %step : index, %A : !tt.ptr<f16>,
tt.return
}

// CHECK-LABEL: cf_for
tt.func @cf_for(%arg0: index, %arg1: index, %arg2: index, %arg3: !tt.ptr<f16>, %arg4: !tt.ptr<f16>) {
%idx = arith.constant 0 : i32
// CHECK: %0 -> %0
// expected-remark @below {{%0 -> %0}}
%cst = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %1 -> %1
// expected-remark @below {{%1 -> %1}}
%cst_0 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %2 -> %0
// expected-remark @below {{%2 -> %0}}
%0 = ttg.memdesc_subview %cst[%idx, %idx] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
gpu.barrier
// CHECK-NEXT: %3 -> %3
// expected-remark @below {{%3 -> %3}}
%cst_1 = ttg.local_alloc : () -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
// CHECK-NEXT: %5 -> %0,%1,%3
// CHECK-NEXT: %6 -> %0,%1,%3
// CHECK-NEXT: %7 -> %0,%1,%3
cf.br ^bb1(%arg0, %cst, %cst_0, %cst_1 : index, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>)
^bb1(%1: index, %2: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, %3: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, %4: !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>): // 2 preds: ^bb0, ^bb2
%5 = arith.cmpi slt, %1, %arg1 : index
// expected-remark @below {{%5 -> %0,%1,%3}}
// expected-remark @below {{%6 -> %0,%1,%3}}
// expected-remark @below {{%7 -> %0,%1,%3}}
cf.cond_br %5, ^bb2, ^bb3
^bb2: // pred: ^bb1
gpu.barrier
%8 = arith.addi %1, %arg2 : index
cf.br ^bb1(%8, %4, %2, %3 : index, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>, !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>)
^bb3: // pred: ^bb1
gpu.barrier
// CHECK-NEXT: %10 -> %0
// expected-remark @below {{%10 -> %0}}
%9 = ttg.memdesc_subview %0[%idx, %idx] : !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable> -> !ttg.memdesc<128x32xf16, #A_SHARED, #ttg.shared_memory, mutable>
tt.return
}
Expand Down
Loading
Loading