-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Auto-Upgrade some nvvm.annotations to attributes #119261
[NVPTX] Auto-Upgrade some nvvm.annotations to attributes #119261
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFull diff: https://github.com/llvm/llvm-project/pull/119261.diff 6 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693365161330f5..bb2e4ad48b51d8 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -39,6 +39,7 @@ set(NVPTXCodeGen_sources
NVVMReflect.cpp
NVPTXProxyRegErasure.cpp
NVPTXCtorDtorLowering.cpp
+ NVVMUpgradeAnnotations.cpp
)
add_llvm_target(NVPTXCodeGen
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index ca915cd3f3732f..53418148be3615 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -52,6 +52,7 @@ FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
bool NoTrapAfterNoreturn);
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
+ModulePass *createNVVMUpgradeAnnotationsPass();
struct NVVMIntrRangePass : PassInfoMixin<NVVMIntrRangePass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
@@ -74,6 +75,10 @@ struct NVPTXCopyByValArgsPass : PassInfoMixin<NVPTXCopyByValArgsPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+struct NVVMUpgradeAnnotationsPass : PassInfoMixin<NVVMUpgradeAnnotationsPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a5c5e9420ee737..b4fd36625adc9c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -254,6 +254,8 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
+ PM.addPass(NVVMUpgradeAnnotationsPass());
+
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// Note: NVVMIntrRangePass was causing numerical discrepancies at one
@@ -349,6 +351,8 @@ void NVPTXPassConfig::addIRPasses() {
AAR.addAAResult(WrapperPass->getResult());
}));
+ addPass(createNVVMUpgradeAnnotationsPass());
+
// NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 98bffd92a087b6..04e83576cbf958 100644
--- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -311,11 +311,16 @@ std::optional<unsigned> getMaxNReg(const Function &F) {
}
bool isKernelFunction(const Function &F) {
+ if (F.getCallingConv() == CallingConv::PTX_Kernel)
+ return true;
+
+ if (F.hasFnAttribute("nvvm.kernel"))
+ return true;
+
if (const auto X = findOneNVVMAnnotation(&F, "kernel"))
return (*X == 1);
- // There is no NVVM metadata, check the calling convention
- return F.getCallingConv() == CallingConv::PTX_Kernel;
+ return false;
}
MaybeAlign getAlign(const Function &F, unsigned Index) {
diff --git a/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
new file mode 100644
index 00000000000000..ca550434835a2c
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVVMUpgradeAnnotations.cpp
@@ -0,0 +1,130 @@
+//===- NVVMUpgradeAnnotations.cpp - Upgrade NVVM Annotations --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces deprecated metadata in nvvm.annotation with a more modern
+// IR representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include <cstdint>
+
+#define DEBUG_TYPE "nvvm-upgrade-annotations"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVVMUpgradeAnnotationsLegacyPassPass(PassRegistry &);
+} // namespace llvm
+
+namespace {
+
+class NVVMUpgradeAnnotationsLegacyPass : public ModulePass {
+public:
+ static char ID;
+ NVVMUpgradeAnnotationsLegacyPass() : ModulePass(ID) {
+ initializeNVVMUpgradeAnnotationsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+ }
+ bool runOnModule(Module &M) override;
+};
+} // namespace
+
+char NVVMUpgradeAnnotationsLegacyPass::ID = 0;
+
+bool static autoUpgradeAnnotation(Function *F, StringRef K, const Metadata *V) {
+ if (K == "kernel") {
+ assert(mdconst::extract<ConstantInt>(V)->getZExtValue() == 1);
+ F->addFnAttr("nvvm.kernel");
+ return true;
+ }
+ if (K == "align") {
+ const uint64_t AlignBits = mdconst::extract<ConstantInt>(V)->getZExtValue();
+ const unsigned Idx = (AlignBits >> 16);
+ const Align StackAlign = Align(AlignBits & 0xFFFF);
+ // TODO: Skip adding the stackalign attribute for returns, for now.
+ if (!Idx)
+ return false;
+ F->addAttributeAtIndex(
+ Idx, Attribute::getWithStackAlignment(F->getContext(), StackAlign));
+ return true;
+ }
+
+ return false;
+}
+
+// Iterate over nvvm.annotations rewriting them as appropiate.
+void static upgradeNVAnnotations(Module &M) {
+ NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
+ if (!NamedMD)
+ return;
+
+ SmallVector<MDNode *, 8> NewNodes;
+ SmallSet<const MDNode *, 8> SeenNodes;
+ for (MDNode *MD : NamedMD->operands()) {
+ if (SeenNodes.contains(MD))
+ continue;
+ SeenNodes.insert(MD);
+
+ Function *F = mdconst::dyn_extract_or_null<Function>(MD->getOperand(0));
+ if (!F)
+ continue;
+
+ assert(MD && "Invalid MDNode for annotation");
+ assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
+
+ SmallVector<Metadata *, 8> NewOperands;
+ // start index = 1, to skip the global variable key
+ // increment = 2, to skip the value for each property-value pairs
+ for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
+ MDString *K = cast<MDString>(MD->getOperand(j));
+ const MDOperand &V = MD->getOperand(j + 1);
+ bool Upgraded = autoUpgradeAnnotation(F, K->getString(), V);
+ if (!Upgraded)
+ NewOperands.append({K, V});
+ }
+
+ if (!NewOperands.empty()) {
+ NewOperands.insert(NewOperands.begin(), MD->getOperand(0));
+ NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
+ }
+ }
+
+ NamedMD->clearOperands();
+ for (MDNode *N : NewNodes)
+ NamedMD->addOperand(N);
+}
+
+PreservedAnalyses NVVMUpgradeAnnotationsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ upgradeNVAnnotations(M);
+ return PreservedAnalyses::all();
+}
+
+bool NVVMUpgradeAnnotationsLegacyPass::runOnModule(Module &M) {
+ upgradeNVAnnotations(M);
+ return false;
+}
+
+INITIALIZE_PASS(NVVMUpgradeAnnotationsLegacyPass, DEBUG_TYPE,
+ "NVVMUpgradeAnnotations", false, false)
+
+ModulePass *llvm::createNVVMUpgradeAnnotationsPass() {
+ return new NVVMUpgradeAnnotationsLegacyPass();
+}
diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
new file mode 100644
index 00000000000000..68dc2353858cb3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s
+
+define i32 @foo(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+define i32 @bar(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: ret i32 0
+;
+ ret i32 0
+}
+
+!nvvm.annotations = !{!0, !1, !2}
+
+!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010}
+!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008}
+!2 = !{ptr @bar, !"kernel", i32 1}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "nvvm.kernel" }
+;.
+; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8}
+;.
|
@Artem-B, This functionality is currently implemented as a pass that we add at the beginning of llc and opt. Do you think it would make more sense to put in AutoUpgrade? |
Maybe. I've mostly used autoupgrade for intrinsics, but this kind of change may fit there, too. Give it a try. Upgrading on load would probably be better than relying on a bespoke pass -- then we can rely on function attributes as the ground truth, instead of having to look at both, because the special pass may not have run yet. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Few nits. LGTM overall, except for the "kernel/nvvm.kernel" distinction question.
;. | ||
; CGSCC: attributes #[[ATTR0]] = { "llvm.assume"="ompx_aligned_barrier" } | ||
; CGSCC: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } | ||
; CGSCC: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } | ||
; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } | ||
; CGSCC: attributes #[[ATTR4]] = { "kernel" } | ||
; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) } | ||
; CGSCC: attributes #[[ATTR4]] = { "kernel" "nvvm.kernel" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need a distinction between "kernel" and "nvvm.kernel" ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately, I think we do. "kernel" is really more like "OpenMP kernel" and the semantics for this do not seem to be a perfect match for "nvvm.kernel". For example, @multiple_blocks_functions_non_kernel_effects_2
in this test has "kernel" but is not an nvvm kernel. I'm very unfamiliar with the OpenMP semantics so I thought keeping it separate would be the safest approach, it also may be clearest to have a common "nvvm.*" prefix for all attributes currently represented as nvvm.annotations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought OpenMP just used nvvm.kernel
as well? As far as I was aware it just controlled whether or not the function got .entry
in the PTX.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is that OpenMP seems to need to be able to draw a distinction between OpenMP kernels and nvvm kernels. For example here it seems like OpenMP only wants to look at "kernel" not "nvvm.kernel". As a result it seems like these attributes cannot be easily unified.
llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Lines 5932 to 5938 in c835b48
// We are only interested in OpenMP target regions. Others, such as kernels | |
// generated by CUDA but linked together, are not interesting to this pass. | |
if (isOpenMPKernel(*KernelFn)) { | |
++NumOpenMPTargetRegionKernels; | |
Kernels.insert(KernelFn); | |
} else | |
++NumNonOpenMPTargetRegionKernels; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, as the comment pointed out. Only OpenMP target regions are marked as kernel
. Nothing else, though I'm not really sure if that is a good name. Lol.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Honestly, us putting
nvvm.kernel
on everything should be fixed.
+1
It is likely because the OpenMPGPUCodeGen for the AMDGPU part shares the same code as NVPTX and even doesn't bother to emit the attribute based on actual target. 😮💨
¯_(ツ)_/¯
Is there any way around this?
That should not be worked around. To have both is wrong at the first place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does PTX support calling a kernel entry point as an ordinary function from another function? I've never understood how PTX doesn't set an IR calling convention for the entries.
AMDGPU also does have multiple types of entry points for graphics, but even if we didn't the kernels would have a separate calling convention for callable functions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's an error at the PTX level so the backend just lets it happen and waits until ptxas
blows up AFAICT.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah that really should just be a separate calling convention
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, fair enough. I'll start switch us over to a calling convention in #120806
b57629a
to
416d908
Compare
// not support stackalign attribute for this. | ||
if (Index == 0) { | ||
std::vector<unsigned> Vs; | ||
if (findAllNVVMAnnotation(&F, "align", Vs)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Offtopic: I think if findAllNVVMAnnotation()
returned ArrayRef
it would work much nicer than copying data into a temp array. Bonus points for making it plural.
if (Index == 0) {
for (unsigned V : findAllNVVMAnnotation())
do stuff;
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea, I agree the NVVM annotation APIs could be cleaned up significantly, hopefully this work will remove the need for them altogether though.
416d908
to
6e1b7af
Compare
Now that #122320 is landed, all in-tree frontends and tests use the @Artem-B please take another look when you have a moment. |
885d162
to
7f126e1
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/18648 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/8137 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/13869 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/13758 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/10992 Here is the relevant piece of the build log for the reference
|
With this change, we run into a failure with this cuda call: https://github.com/openxla/xla/blob/main/xla/stream_executor/cuda/cuda_executor.cc#L222 Apparently we don't find the function anymore. I checked the emitted ptx, and the difference is here: before:
after:
Also we are missing the alignment information, for example .align 128 @Artem-B maybe you have an idea what we may need to do for XLA to make it work again? |
Ok, I found a fix with the help of @d0k, it seems now we need to also set the calling convention of the function to llvm::CallingConv::PTX_Kernel, as the legacy annotations are not accepted anymore (the change to |
Yep this seems like the appropriate solution to me. You should be able to also remove any code adding the As a heads up, I hope to deprecate and remove nearly all |
Add a new AutoUpgrade function to convert some legacy nvvm.annotations metadata to function level attributes. These attributes are quicker to look-up so improve compile time and are more idiomatic than using metadata which should not include required information that changes the meaning of the program. Currently supported annotations are: - !"kernel" -> ptx_kernel calling convention - !"align" -> alignstack parameter attributes (return not yet supported)
…erm/restore-nvmm-annotations [NVPTX] Auto-Upgrade some nvvm.annotations to attributes (llvm#119261)
Hi, we run into a failure in downstream project (i.e., IREE) with the change. I'm not an expert of this area, but I'd like to see how to fix our problem properly. Without the test, we are generating something like:
With the patch, the
I can provide more artifacts if it helps, thanks in advance! |
Hi @hanhanW, my guess here is that your downstream project contains a front-end that is still generating IR that designates kernels via the If my assumption is correct, to fix this you should update your frontend to mark kernels via the calling convention. (Similar to this change for clang: https://github.com/llvm/llvm-project/pull/120806/files#diff-148cd424d4a2056106448d04727450e281ca1bf03ac68d5db2e8dff59d397090R261)
|
Thank you, this is very helpful! I have a local fix, which generates the right code! E.g., replacing the below line with your suggestion. 🙏🏻 |
Add a new AutoUpgrade function to convert some legacy nvvm.annotations metadata to function level attributes. These attributes are quicker to look-up so improve compile time and are more idiomatic than using metadata which should not include required information that changes the meaning of the program.
Currently supported annotations are: