forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathllvm-cpufeatures.cpp
156 lines (130 loc) · 4.32 KB
/
llvm-cpufeatures.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// This file is a part of Julia. License is MIT: https://julialang.org/license
// Lower intrinsics that expose subtarget information to the language. This makes it
// possible to write code that changes behavior based on, e.g., the availability of
// specific CPU features.
//
// The following intrinsics are supported:
// - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA.
//
// Some of these intrinsics are overloaded, i.e., they are suffixed with a type name.
// To extend support, make sure codegen (in intrinsics.cpp) knows how to emit them.
//
// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig
// instead of using the global target machine?
#include "llvm-version.h"
#include "passes.h"
#include <llvm/ADT/Statistic.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/PassManager.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/Support/Debug.h>
#include "julia.h"
#include "jitlayers.h"
#define DEBUG_TYPE "cpufeatures"
using namespace llvm;
STATISTIC(LoweredWithFMA, "Number of have_fma's that were lowered to true");
STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
extern JuliaOJIT *jl_ExecutionEngine;
// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
Optional<bool> always_have_fma(Function &intr) {
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
#if defined(_CPU_AARCH64_)
return typ == "f32" || typ == "f64";
#else
(void)typ;
return {};
#endif
}
bool have_fma(Function &intr, Function &caller) {
auto unconditional = always_have_fma(intr);
if (unconditional.hasValue())
return unconditional.getValue();
auto intr_name = intr.getName();
auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
Attribute FSAttr = caller.getFnAttribute("target-features");
StringRef FS =
FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
SmallVector<StringRef, 6> Features;
FS.split(Features, ',');
for (StringRef Feature : Features)
#if defined _CPU_ARM_
if (Feature == "+vfp4")
return typ == "f32" || typ == "f64";
else if (Feature == "+vfp4sp")
return typ == "f32";
#else
if (Feature == "+fma" || Feature == "+fma4")
return typ == "f32" || typ == "f64";
#endif
return false;
}
void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
if (have_fma(intr, caller)) {
++LoweredWithFMA;
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
} else {
++LoweredWithoutFMA;
I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));
}
return;
}
bool lowerCPUFeatures(Module &M)
{
SmallVector<Instruction*,6> Materialized;
for (auto &F: M.functions()) {
auto FN = F.getName();
if (FN.startswith("julia.cpu.have_fma.")) {
for (Use &U: F.uses()) {
User *RU = U.getUser();
CallInst *I = cast<CallInst>(RU);
lowerHaveFMA(F, *I->getParent()->getParent(), I);
Materialized.push_back(I);
}
}
}
if (!Materialized.empty()) {
for (auto I: Materialized) {
I->eraseFromParent();
}
assert(!verifyModule(M));
return true;
} else {
return false;
}
}
PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM)
{
if (lowerCPUFeatures(M)) {
return PreservedAnalyses::allInSet<CFGAnalyses>();
}
return PreservedAnalyses::all();
}
namespace {
struct CPUFeaturesLegacy : public ModulePass {
static char ID;
CPUFeaturesLegacy() : ModulePass(ID) {};
bool runOnModule(Module &M)
{
return lowerCPUFeatures(M);
}
};
char CPUFeaturesLegacy::ID = 0;
static RegisterPass<CPUFeaturesLegacy>
Y("CPUFeatures",
"Lower calls to CPU feature testing intrinsics.",
false,
false);
}
Pass *createCPUFeaturesPass()
{
return new CPUFeaturesLegacy();
}
extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
{
unwrap(PM)->add(createCPUFeaturesPass());
}