Skip to content

Commit

Permalink
Auto merge of #133250 - DianQK:embed-bitcode-pgo, r=nikic
Browse files Browse the repository at this point in the history
The embedded bitcode should always be prepared for LTO/ThinLTO

Fixes #115344. Fixes #117220.

There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`.

When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module.

This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`.

r? nikic
  • Loading branch information
bors committed Mar 1, 2025
2 parents 30508fa + a897cc0 commit cd3b56a
Show file tree
Hide file tree
Showing 20 changed files with 294 additions and 101 deletions.
9 changes: 4 additions & 5 deletions compiler/rustc_codegen_gcc/src/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,17 +632,16 @@ pub unsafe fn optimize_thin_module(
Arc::new(SyncContext::new(context))
}
};
let module = ModuleCodegen {
module_llvm: GccContext {
let module = ModuleCodegen::new_regular(
thin_module.name().to_string(),
GccContext {
context,
should_combine_object_files,
// TODO(antoyo): use the correct relocation model here.
relocation_model: RelocModel::Pic,
temp_dir: None,
},
name: thin_module.name().to_string(),
kind: ModuleKind::Regular,
};
);
/*{
let target = &*module.module_llvm.tm;
let llmod = module.module_llvm.llmod();
Expand Down
11 changes: 5 additions & 6 deletions compiler/rustc_codegen_gcc/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ use std::sync::Arc;
use std::time::Instant;

use gccjit::{CType, Context, FunctionType, GlobalKind};
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::DebugInfoCodegenMethods;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_middle::dep_graph;
use rustc_middle::mir::mono::Linkage;
#[cfg(feature = "master")]
Expand Down Expand Up @@ -237,16 +237,15 @@ pub fn compile_codegen_unit(
}
}

ModuleCodegen {
name: cgu_name.to_string(),
module_llvm: GccContext {
ModuleCodegen::new_regular(
cgu_name.to_string(),
GccContext {
context: Arc::new(SyncContext::new(context)),
relocation_model: tcx.sess.relocation_model(),
should_combine_object_files: false,
temp_dir: None,
},
kind: ModuleKind::Regular,
}
)
}

(module, cost)
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_gcc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ impl WriteBackendMethods for GccCodegenBackend {
unsafe fn optimize(
_cgcx: &CodegenContext<Self>,
_dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
module.module_llvm.context.set_optimization_level(to_gcc_opt_level(config.opt_level));
Expand Down
27 changes: 15 additions & 12 deletions compiler/rustc_codegen_llvm/src/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::BTreeMap;
use std::ffi::{CStr, CString};
use std::fs::File;
use std::path::Path;
use std::ptr::NonNull;
use std::sync::Arc;
use std::{io, iter, slice};

Expand Down Expand Up @@ -305,11 +306,8 @@ fn fat_lto(
assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
let (buffer, name) = serialized_modules.remove(0);
info!("no in-memory regular modules to choose from, parsing {:?}", name);
ModuleCodegen {
module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?,
name: name.into_string().unwrap(),
kind: ModuleKind::Regular,
}
let llvm_module = ModuleLlvm::parse(cgcx, &name, buffer.data(), dcx)?;
ModuleCodegen::new_regular(name.into_string().unwrap(), llvm_module)
}
};
{
Expand Down Expand Up @@ -655,14 +653,14 @@ pub(crate) fn run_pass_manager(
}

unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}

if cfg!(llvm_enzyme) && enable_ad {
let opt_stage = llvm::OptStage::FatLTO;
let stage = write::AutodiffStage::PostAD;
unsafe {
write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?;
}

// This is the final IR, so people should be able to inspect the optimized autodiff output.
Expand Down Expand Up @@ -729,6 +727,11 @@ impl ThinBuffer {
ThinBuffer(buffer)
}
}

pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
let mut ptr = NonNull::new(ptr).unwrap();
ThinBuffer(unsafe { ptr.as_mut() })
}
}

impl ThinBufferMethods for ThinBuffer {
Expand Down Expand Up @@ -772,11 +775,11 @@ pub(crate) unsafe fn optimize_thin_module(
// crates but for locally codegened modules we may be able to reuse
// that LLVM Context and Module.
let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx)?;
let mut module = ModuleCodegen {
module_llvm,
name: thin_module.name().to_string(),
kind: ModuleKind::Regular,
};
let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
// Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
if cgcx.config(ModuleKind::Regular).embed_bitcode() {
module.thin_lto_buffer = Some(thin_module.data().to_vec());
}
{
let target = &*module.module_llvm.tm;
let llmod = module.module_llvm.llmod();
Expand Down
126 changes: 83 additions & 43 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::ffi::{CStr, CString};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::ptr::null_mut;
use std::sync::Arc;
use std::{fs, slice, str};

Expand All @@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
TargetMachineFactoryFn,
};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
Expand Down Expand Up @@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>,
config: &ModuleConfig,
opt_level: config::OptLevel,
opt_stage: llvm::OptStage,
Expand Down Expand Up @@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize(
vectorize_loop = config.vectorize_loop;
}
trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme);
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
if thin_lto_buffer.is_some() {
assert!(
matches!(
opt_stage,
llvm::OptStage::PreLinkNoLTO
| llvm::OptStage::PreLinkFatLTO
| llvm::OptStage::PreLinkThinLTO
),
"the bitcode for LTO can only be obtained at the pre-link stage"
);
}
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
Expand Down Expand Up @@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize(
config.no_prepopulate_passes,
config.verify_llvm_ir,
config.lint_llvm_ir,
using_thin_buffers,
thin_lto_buffer,
config.emit_thin_lto,
config.emit_thin_lto_summary,
config.merge_functions,
unroll_loops,
vectorize_slp,
Expand Down Expand Up @@ -675,7 +689,7 @@ pub(crate) unsafe fn llvm_optimize(
pub(crate) unsafe fn optimize(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<ModuleLlvm>,
module: &mut ModuleCodegen<ModuleLlvm>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_optimize", &*module.name);
Expand Down Expand Up @@ -705,9 +719,53 @@ pub(crate) unsafe fn optimize(
// Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
return unsafe {
llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
// The embedded bitcode is used to run LTO/ThinLTO.
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
// this point.
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
|| config.emit_thin_lto_summary
{
Some(null_mut())
} else {
None
};
unsafe {
llvm_optimize(
cgcx,
dcx,
module,
thin_lto_buffer.as_mut(),
config,
opt_level,
opt_stage,
autodiff_stage,
)
}?;
if let Some(thin_lto_buffer) = thin_lto_buffer {
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
module.thin_lto_buffer = Some(thin_lto_buffer.data().to_vec());
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin_lto_buffer.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}
}
}
Ok(())
}
Expand Down Expand Up @@ -760,59 +818,41 @@ pub(crate) unsafe fn codegen(
// otherwise requested.

let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
let bc_summary_out =
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);

if config.bitcode_needed() {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
let data = thin.data();

if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}

if config.emit_thin_lto_summary
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
{
let summary_data = thin.thin_link_data();
cgcx.prof.artifact_size(
"llvm_bitcode_summary",
thin_link_bitcode_filename.to_string_lossy(),
summary_data.len() as u64,
);

let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_emit_bitcode_summary",
&*module.name,
);
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
}
}

if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
let thin = {
let _timer = cgcx.prof.generic_activity_with_arg(
"LLVM_module_codegen_make_bitcode",
&*module.name,
);
ThinBuffer::new(llmod, config.emit_thin_lto, false)
};
let data = thin.data();
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
if let Some(bitcode_filename) = bc_out.file_name() {
cgcx.prof.artifact_size(
"llvm_bitcode",
bitcode_filename.to_string_lossy(),
data.len() as u64,
);
}
if let Err(err) = fs::write(&bc_out, data) {
dcx.emit_err(WriteBytecode { path: &bc_out, err });
}
}

if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
if config.embed_bitcode() && module.kind == ModuleKind::Regular {
let _timer = cgcx
.prof
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
let thin_bc =
module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode");
unsafe {
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc);
}
}
}
Expand Down
8 changes: 2 additions & 6 deletions compiler/rustc_codegen_llvm/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
use std::time::Instant;

use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::base::maybe_create_entry_wrapper;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_middle::dep_graph;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
Expand Down Expand Up @@ -133,11 +133,7 @@ pub(crate) fn compile_codegen_unit(
}
}

ModuleCodegen {
name: cgu_name.to_string(),
module_llvm: llvm_module,
kind: ModuleKind::Regular,
}
ModuleCodegen::new_regular(cgu_name.to_string(), llvm_module)
}

(module, cost)
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
unsafe fn optimize(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: &ModuleCodegen<Self::Module>,
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
unsafe { back::write::optimize(cgcx, dcx, module, config) }
Expand Down
4 changes: 3 additions & 1 deletion compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2425,7 +2425,9 @@ unsafe extern "C" {
NoPrepopulatePasses: bool,
VerifyIR: bool,
LintIR: bool,
UseThinLTOBuffers: bool,
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
EmitThinLTO: bool,
EmitThinLTOSummary: bool,
MergeFunctions: bool,
UnrollLoops: bool,
SLPVectorize: bool,
Expand Down
Loading

0 comments on commit cd3b56a

Please sign in to comment.