PR #11424: [XLA:GPU] Add SPIRV-LLVM-Translator and translation pass

Imported from GitHub PR #11424 It is a sub PR of #9042 to add spirv-llvm-translator and translation pass Copybara import of the project: -- 6d8ce76 by Sheng, Yang <[email protected]>: Add SPIRV-LLVM-Translator and translation pass update comments and SPIRV-LLVM-Translator commit Merging this change closes #11424 COPYBARA_INTEGRATE_REVIEW=#11424 from Intel-tensorflow:yang/llvm-spirv 6d8ce76 PiperOrigin-RevId: 661186664
openxla · Aug 12, 2024 · 8ea8e2d · 8ea8e2d
1 parent 0d52bbe
commit 8ea8e2d
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 1 deletion.
diff --git a/xla/service/gpu/llvm_gpu_backend/BUILD b/xla/service/gpu/llvm_gpu_backend/BUILD
@@ -2,6 +2,10 @@ load(
     "@local_config_rocm//rocm:build_defs.bzl",
     "if_rocm_is_configured",
 )
+load(
+    "@local_config_sycl//sycl:build_defs.bzl",
+    "if_sycl_is_configured",
+)
 load(
     "@tsl//tsl/platform/default:cuda_build_defs.bzl",
     "if_cuda_is_configured",
@@ -88,6 +92,8 @@ cc_library(
         "@local_config_rocm//rocm:rocm_headers",
         "@llvm-project//llvm:AMDGPUCodeGen",
         "@llvm-project//llvm:AMDGPUAsmParser",
+    ]) + if_sycl_is_configured([
+        "@spirv_llvm_translator//:spirv_llvm_translator",
     ]),
 )
 

diff --git a/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -106,6 +106,11 @@ limitations under the License.
 #include "xla/stream_executor/cuda/cuda_asm_compiler.h"
 #endif
 
+#if TENSORFLOW_USE_SYCL
+#include "LLVMSPIRVLib.h"
+#include "LLVMSPIRVOpts.h"
+#endif  // TENSORFLOW_USE_SYCL
+
 namespace xla {
 namespace gpu {
 namespace {
@@ -418,7 +423,9 @@ absl::Status LinkAndOptimizeModule(
   llvm::CGSCCAnalysisManager cgam;
   llvm::ModuleAnalysisManager mam;
 
-  fam.registerPass([&] { return target_machine->getTargetIRAnalysis(); });
+  if (target_machine) {
+    fam.registerPass([&] { return target_machine->getTargetIRAnalysis(); });
+  }
 
   llvm::PipelineTuningOptions pto;
   pto.SLPVectorization = true;
@@ -1137,5 +1144,95 @@ absl::StatusOr<std::vector<uint8_t>> CompileToHsaco(
 
 }  // namespace amdgpu
 
+namespace {
+
+std::unique_ptr<llvm::TargetMachine> SPIRGetTargetMachine(
+    llvm::Triple target_triple, se::GpuComputeCapability gpu_version,
+    const DebugOptions& debug_options) {
+  return nullptr;
+}
+
+absl::Status SPIRTargetModuleLinker(
+    llvm::Module* module, se::GpuComputeCapability gpu_version,
+    const DebugOptions& debug_options,
+    const std::string& device_bitcode_dir_path) {
+  return absl::OkStatus();
+}
+
+absl::StatusOr<std::string> EmitModuleToSpir(
+    llvm::Module* module, se::GpuComputeCapability gpu_version,
+    const DebugOptions& debug_options) {
+#if TENSORFLOW_USE_SYCL
+  SPIRV::TranslatorOpts::ExtensionsStatusMap ExtensionsStatus;
+  SPIRV::TranslatorOpts opts(SPIRV::VersionNumber::MaximumVersion,
+                             ExtensionsStatus);
+  opts.enableAllExtensions();  // enable all SPIR-V extension first
+
+  std::ostringstream oss;
+  std::string err;
+  bool success = llvm::writeSpirv(module, opts, oss, err);
+  if (!success) {
+    return xla::Internal("Fails to convert LLVM as SPIR-V: %s", err);
+  }
+  return oss.str();
+#else
+  return absl::UnimplementedError("Not implemented for SYCL");
+#endif
+}
+
+void SPIRBackendInit(const DebugOptions& debug_options) {
+  FeedLLVMWithFlags({
+      "-slp-vectorize-hor=false",
+      "-slp-min-reg-size=64",
+      "-slp-max-reg-size=64",
+  });
+
+  llvm_ir::InitializeLLVMCommandLineOptions(
+      debug_options.xla_backend_extra_options());
+
+  llvm::PassRegistry* registry = llvm::PassRegistry::getPassRegistry();
+  InitializePasses(registry);
+}
+
+}  // namespace
+
+namespace spir {
+
+absl::StatusOr<std::vector<uint8_t>> CompileToSpir(
+    llvm::Module* module, se::GpuComputeCapability gpu_version,
+    const DebugOptions& debug_options) {
+  std::string libdevice_dir_path;
+  static absl::once_flag backend_init_flag;
+  absl::call_once(backend_init_flag, SPIRBackendInit, debug_options);
+
+  std::string spir;
+  {
+    XLA_SCOPED_LOGGING_TIMER("Compile module " + module->getName().str());
+
+    // If the module has no functions or globals, there's nothing to compile.
+    if (module->empty() && module->global_empty()) {
+      VLOG(2) << "Module '" << module->getName().str()
+              << "' is empty. Skipping compilation.";
+      return std::vector<uint8_t>();
+    }
+
+    llvm::Triple default_target_triple("spir64-unknown-unknown");
+    std::unique_ptr<llvm::TargetMachine> target_machine =
+        SPIRGetTargetMachine(default_target_triple, gpu_version, debug_options);
+
+    TF_RETURN_IF_ERROR(LinkAndOptimizeModule(
+        module, gpu_version, debug_options, libdevice_dir_path,
+        SPIRTargetModuleLinker, default_target_triple, target_machine.get(),
+        kDefaultInlineThreshold));
+
+    // Lower optimized LLVM module to SPIR.
+    TF_ASSIGN_OR_RETURN(spir,
+                        EmitModuleToSpir(module, gpu_version, debug_options));
+  }
+  return std::vector<uint8_t>(spir.begin(), spir.end());
+}
+
+}  // namespace spir
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h b/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h
@@ -78,6 +78,13 @@ absl::StatusOr<std::vector<uint8_t>> CompileToHsaco(
     const std::string& module_config_cache_key);
 }  // namespace amdgpu
 
+namespace spir {
+// Compiles the argument module and returns it.
+absl::StatusOr<std::vector<uint8_t>> CompileToSpir(
+    llvm::Module* module, se::GpuComputeCapability gpu_version,
+    const DebugOptions& debug_options);
+}  // namespace spir
+
 }  // namespace gpu
 }  // namespace xla