Skip to content

Commit

Permalink
Merge branch 'main' into users/rampitec/01-16-_amdgpu_add_test_for_va…
Browse files Browse the repository at this point in the history
…lu_hoisiting_from_wwm_region._nfc
  • Loading branch information
rampitec authored Jan 17, 2025
2 parents 7501423 + 21704a6 commit 9c7987f
Show file tree
Hide file tree
Showing 55 changed files with 4,234 additions and 851 deletions.
4 changes: 2 additions & 2 deletions bolt/lib/Passes/Inliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
if (MIB.isPseudo(Inst))
continue;

MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());

// Fix branch target. Strictly speaking, we don't have to do this as
// targets of direct branches will be fixed later and don't matter
// in the CFG state. However, disassembly may look misleading, and
// hence we do the fixing.
if (MIB.isBranch(Inst)) {
if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
assert(!MIB.isIndirectBranch(Inst) &&
"unexpected indirect branch in callee");
const BinaryBasicBlock *TargetBB =
Expand Down
30 changes: 30 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;

MCPhysReg getStackPointer() const override { return AArch64::SP; }

bool isPush(const MCInst &Inst) const override { return false; }

bool isPop(const MCInst &Inst) const override { return false; }

void createCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
createDirectCall(Inst, Target, Ctx, false);
}

bool convertTailCallToCall(MCInst &Inst) override {
int NewOpcode;
switch (Inst.getOpcode()) {
default:
return false;
case AArch64::B:
NewOpcode = AArch64::BL;
break;
case AArch64::BR:
NewOpcode = AArch64::BLR;
break;
}

Inst.setOpcode(NewOpcode);
removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
clearOffset(Inst);
return true;
}

bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
Expand Down
42 changes: 42 additions & 0 deletions bolt/test/AArch64/inline-small-function-1.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
## This test checks that inline is properly handled by BOLT on aarch64.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A \
# RUN: %t.exe -o %t.bolt | FileCheck %s

# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
# CHECK: Binary Function "_Z3barP1A" after inlining {
# CHECK-NOT: bl _Z3fooP1A
# CHECK: ldr x8, [x0]
# CHECK-NEXT: ldr w0, [x8]

.text
.globl _Z3fooP1A
.type _Z3fooP1A,@function
_Z3fooP1A:
ldr x8, [x0]
ldr w0, [x8]
ret
.size _Z3fooP1A, .-_Z3fooP1A

.globl _Z3barP1A
.type _Z3barP1A,@function
_Z3barP1A:
stp x29, x30, [sp, #-16]!
mov x29, sp
bl _Z3fooP1A
mul w0, w0, w0
ldp x29, x30, [sp], #16
ret
.size _Z3barP1A, .-_Z3barP1A

.globl main
.p2align 2
.type main,@function
main:
mov w0, wzr
ret
.size main, .-main
48 changes: 48 additions & 0 deletions bolt/test/AArch64/inline-small-function-2.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
## This test checks that inline is properly handled by BOLT on aarch64.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=test \
# RUN: %t.exe -o %t.bolt | FileCheck %s

#CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
#CHECK: Binary Function "test" after inlining {
#CHECK-NOT: bl indirect
#CHECK: add w0, w1, w0
#CHECK-NEXT: blr x2

.text
.globl indirect
.type indirect,@function
indirect:
add w0, w1, w0
br x2
.size indirect, .-indirect

.globl test
.type test,@function
test:
stp x29, x30, [sp, #-32]!
stp x20, x19, [sp, #16]
mov x29, sp
mov w19, w1
mov w20, w0
bl indirect
add w8, w19, w20
cmp w0, #0
csinc w0, w8, wzr, eq
ldp x20, x19, [sp, #16]
ldp x29, x30, [sp], #32
ret
.size test, .-test

.globl main
.type main,@function
main:
mov w0, wzr
ret
.size main, .-main


3 changes: 3 additions & 0 deletions clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class PCHContainerGenerator : public ASTConsumer {
if (!TD->isCompleteDefinition())
return true;

if (D->hasAttr<NoDebugAttr>())
return true;

QualType QualTy = Ctx.getTypeDeclType(D);
if (!QualTy.isNull() && CanRepresent(QualTy.getTypePtr()))
DI.getOrCreateStandaloneType(QualTy, D->getLocation());
Expand Down
14 changes: 14 additions & 0 deletions clang/test/Modules/gmodules-nodebug.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// REQUIRES: asserts

// RUN: %clang_cc1 -std=c++23 -x c++-header -emit-pch -fmodule-format=obj \
// RUN: -o %t.pch %s \
// RUN: -mllvm -debug-only=pchcontainer &>%t-pch.ll
// RUN: cat %t-pch.ll | FileCheck %s

template<class...>
using __void_t [[gnu::nodebug]] = void;

__void_t<> func() {}

// CHECK: !DICompileUnit
// CHECK-NOT: __void_t
5 changes: 3 additions & 2 deletions flang/test/Lower/module_use.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
! RUN: bbc -emit-fir %S/module_definition.f90
! RUN: bbc -emit-fir %s -o - | FileCheck %s
! RUN: rm -fr %t && mkdir -p %t
! RUN: bbc -emit-fir -module %t %S/module_definition.f90
! RUN: bbc -emit-fir -J %t %s -o - | FileCheck %s

! Test use of module data not defined in this file.
! The modules are defined in module_definition.f90
Expand Down
1 change: 0 additions & 1 deletion lld/COFF/COFFLinkerContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ class COFFLinkerContext : public CommonLinkerContext {
std::vector<ObjFile *> objFileInstances;
std::map<std::string, PDBInputFile *> pdbInputFileInstances;
std::vector<ImportFile *> importFileInstances;
std::vector<BitcodeFile *> bitcodeFileInstances;

MergeChunk *mergeChunkInstances[Log2MaxSectionAlignment + 1] = {};

Expand Down
41 changes: 22 additions & 19 deletions lld/COFF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ void LinkerDriver::addFile(InputFile *file) {
<< " linked in after "
"doing LTO compilation.";
}
ctx.bitcodeFileInstances.push_back(f);
f->symtab.bitcodeFileInstances.push_back(f);
} else if (auto *f = dyn_cast<ImportFile>(file)) {
ctx.importFileInstances.push_back(f);
}
Expand Down Expand Up @@ -285,7 +285,7 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
addFile(make<ArchiveFile>(ctx, mbref));
break;
case file_magic::bitcode:
addFile(make<BitcodeFile>(ctx, mbref, "", 0, lazy));
addFile(BitcodeFile::create(ctx, mbref, "", 0, lazy));
break;
case file_magic::coff_object:
case file_magic::coff_import_library:
Expand Down Expand Up @@ -374,8 +374,8 @@ void LinkerDriver::addArchiveBuffer(MemoryBufferRef mb, StringRef symName,
if (magic == file_magic::coff_object) {
obj = ObjFile::create(ctx, mb);
} else if (magic == file_magic::bitcode) {
obj =
make<BitcodeFile>(ctx, mb, parentName, offsetInArchive, /*lazy=*/false);
obj = BitcodeFile::create(ctx, mb, parentName, offsetInArchive,
/*lazy=*/false);
} else if (magic == file_magic::coff_cl_gl_object) {
Err(ctx) << mb.getBufferIdentifier()
<< ": is not a native COFF file. Recompile without /GL?";
Expand Down Expand Up @@ -2571,19 +2571,19 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
}
}

// If any inputs are bitcode files, the LTO code generator may create
// references to library functions that are not explicit in the bitcode
// file's symbol table. If any of those library functions are defined in a
// bitcode file in an archive member, we need to arrange to use LTO to
// compile those archive members by adding them to the link beforehand.
if (!ctx.bitcodeFileInstances.empty()) {
llvm::Triple TT(
ctx.bitcodeFileInstances.front()->obj->getTargetTriple());
for (auto *s : lto::LTO::getRuntimeLibcallSymbols(TT))
ctx.symtab.addLibcall(s);
}

ctx.forEachSymtab([&](SymbolTable &symtab) {
// If any inputs are bitcode files, the LTO code generator may create
// references to library functions that are not explicit in the bitcode
// file's symbol table. If any of those library functions are defined in
// a bitcode file in an archive member, we need to arrange to use LTO to
// compile those archive members by adding them to the link beforehand.
if (!symtab.bitcodeFileInstances.empty()) {
llvm::Triple TT(
symtab.bitcodeFileInstances.front()->obj->getTargetTriple());
for (auto *s : lto::LTO::getRuntimeLibcallSymbols(TT))
symtab.addLibcall(s);
}

// Windows specific -- if __load_config_used can be resolved, resolve
// it.
if (symtab.findUnderscore("_load_config_used"))
Expand Down Expand Up @@ -2639,8 +2639,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// If we are going to do codegen for link-time optimization, check for
// unresolvable symbols first, so we don't spend time generating code that
// will fail to link anyway.
if (!ctx.bitcodeFileInstances.empty() && !config->forceUnresolved)
ctx.symtab.reportUnresolvable();
if (!config->forceUnresolved)
ctx.forEachSymtab([](SymbolTable &symtab) {
if (!symtab.bitcodeFileInstances.empty())
symtab.reportUnresolvable();
});
if (errorCount())
return;

Expand All @@ -2655,7 +2658,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
// link those files (unless -thinlto-index-only was given, in which case we
// resolve symbols and write indices, but don't generate native code or link).
ltoCompilationDone = true;
ctx.symtab.compileBitcodeFiles();
ctx.forEachSymtab([](SymbolTable &symtab) { symtab.compileBitcodeFiles(); });

if (Defined *d =
dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore("_tls_used")))
Expand Down
19 changes: 13 additions & 6 deletions lld/COFF/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1229,10 +1229,15 @@ void ImportFile::parse() {
}
}

BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName, uint64_t offsetInArchive,
bool lazy)
: InputFile(ctx.symtab, BitcodeKind, mb, lazy) {
BitcodeFile::BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
std::unique_ptr<lto::InputFile> &o, bool lazy)
: InputFile(symtab, BitcodeKind, mb, lazy) {
obj.swap(o);
}

BitcodeFile *BitcodeFile::create(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName,
uint64_t offsetInArchive, bool lazy) {
std::string path = mb.getBufferIdentifier().str();
if (ctx.config.thinLTOIndexOnly)
path = replaceThinLTOSuffix(mb.getBufferIdentifier(),
Expand All @@ -1252,7 +1257,9 @@ BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
sys::path::filename(path) +
utostr(offsetInArchive)));

obj = check(lto::InputFile::create(mbref));
std::unique_ptr<lto::InputFile> obj = check(lto::InputFile::create(mbref));
return make<BitcodeFile>(ctx.getSymtab(getMachineType(obj.get())), mb, obj,
lazy);
}

BitcodeFile::~BitcodeFile() = default;
Expand Down Expand Up @@ -1329,7 +1336,7 @@ void BitcodeFile::parseLazy() {
}
}

MachineTypes BitcodeFile::getMachineType() const {
MachineTypes BitcodeFile::getMachineType(const llvm::lto::InputFile *obj) {
Triple t(obj->getTargetTriple());
switch (t.getArch()) {
case Triple::x86_64:
Expand Down
14 changes: 10 additions & 4 deletions lld/COFF/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,13 +386,19 @@ class ImportFile : public InputFile {
// Used for LTO.
class BitcodeFile : public InputFile {
public:
explicit BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName, uint64_t offsetInArchive,
bool lazy);
explicit BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
std::unique_ptr<llvm::lto::InputFile> &obj, bool lazy);
~BitcodeFile();

static BitcodeFile *create(COFFLinkerContext &ctx, MemoryBufferRef mb,
StringRef archiveName, uint64_t offsetInArchive,
bool lazy);
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
ArrayRef<Symbol *> getSymbols() { return symbols; }
MachineTypes getMachineType() const override;
MachineTypes getMachineType() const override {
return getMachineType(obj.get());
}
static MachineTypes getMachineType(const llvm::lto::InputFile *obj);
void parseLazy();
std::unique_ptr<llvm::lto::InputFile> obj;

Expand Down
17 changes: 8 additions & 9 deletions lld/COFF/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,8 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) {
/// defined symbol imported" diagnostic for symbols in localImports.
/// objFiles and bitcodeFiles (if not nullptr) are used to report where
/// undefined symbols are referenced.
static void reportProblemSymbols(
COFFLinkerContext &ctx, const SmallPtrSetImpl<Symbol *> &undefs,
void SymbolTable::reportProblemSymbols(
const SmallPtrSetImpl<Symbol *> &undefs,
const DenseMap<Symbol *, Symbol *> *localImports, bool needBitcodeFiles) {
// Return early if there is nothing to report (which should be
// the common case).
Expand Down Expand Up @@ -392,7 +392,7 @@ static void reportProblemSymbols(
processFile(file, file->getSymbols());

if (needBitcodeFiles)
for (BitcodeFile *file : ctx.bitcodeFileInstances)
for (BitcodeFile *file : bitcodeFileInstances)
processFile(file, file->getSymbols());

for (const UndefinedDiag &undefDiag : undefDiags)
Expand Down Expand Up @@ -423,8 +423,7 @@ void SymbolTable::reportUnresolvable() {
undefs.insert(sym);
}

reportProblemSymbols(ctx, undefs,
/* localImports */ nullptr, true);
reportProblemSymbols(undefs, /*localImports=*/nullptr, true);
}

bool SymbolTable::resolveRemainingUndefines() {
Expand Down Expand Up @@ -506,8 +505,8 @@ bool SymbolTable::resolveRemainingUndefines() {
}

reportProblemSymbols(
ctx, undefs,
ctx.config.warnLocallyDefinedImported ? &localImports : nullptr, false);
undefs, ctx.config.warnLocallyDefinedImported ? &localImports : nullptr,
false);
return foundLazy;
}

Expand Down Expand Up @@ -1124,13 +1123,13 @@ Symbol *SymbolTable::addUndefined(StringRef name) {
}

void SymbolTable::compileBitcodeFiles() {
if (ctx.bitcodeFileInstances.empty())
if (bitcodeFileInstances.empty())
return;

llvm::TimeTraceScope timeScope("Compile bitcode");
ScopedTimer t(ctx.ltoTimer);
lto.reset(new BitcodeCompiler(ctx));
for (BitcodeFile *f : ctx.bitcodeFileInstances)
for (BitcodeFile *f : bitcodeFileInstances)
lto->add(*f);
for (InputFile *newObj : lto->compile()) {
ObjFile *obj = cast<ObjFile>(newObj);
Expand Down
Loading

0 comments on commit 9c7987f

Please sign in to comment.