From 0b87af9d4f7c6faa9e89496609f016dc3e3977e1 Mon Sep 17 00:00:00 2001
From: Mrmaxmeier <Mrmaxmeier@gmail.com>
Date: Sat, 27 Apr 2024 23:14:36 +0200
Subject: [PATCH 1/3] Add `-Z embed-source=yes` to embed source code in DWARF
 debug info

---
 .../src/debuginfo/metadata.rs                 |  9 +++++++
 compiler/rustc_codegen_llvm/src/llvm/ffi.rs   |  2 ++
 compiler/rustc_interface/src/tests.rs         |  1 +
 .../rustc_llvm/llvm-wrapper/RustWrapper.cpp   |  9 +++++--
 compiler/rustc_session/messages.ftl           |  6 +++++
 compiler/rustc_session/src/errors.rs          | 14 +++++++++++
 compiler/rustc_session/src/options.rs         |  2 ++
 compiler/rustc_session/src/session.rs         | 25 +++++++++++++++++--
 .../src/compiler-flags/embed-source.md        | 12 +++++++++
 9 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100644 src/doc/unstable-book/src/compiler-flags/embed-source.md

diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index ad63858861261..701ea62b21a7d 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -629,6 +629,9 @@ pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) ->
         };
         let hash_value = hex_encode(source_file.src_hash.hash_bytes());
 
+        let source =
+            cx.sess().opts.unstable_opts.embed_source.then_some(()).and(source_file.src.as_ref());
+
         unsafe {
             llvm::LLVMRustDIBuilderCreateFile(
                 DIB(cx),
@@ -639,6 +642,8 @@ pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) ->
                 hash_kind,
                 hash_value.as_ptr().cast(),
                 hash_value.len(),
+                source.map_or(ptr::null(), |x| x.as_ptr().cast()),
+                source.map_or(0, |x| x.len()),
             )
         }
     }
@@ -659,6 +664,8 @@ pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
             llvm::ChecksumKind::None,
             hash_value.as_ptr().cast(),
             hash_value.len(),
+            ptr::null(),
+            0,
         )
     })
 }
@@ -943,6 +950,8 @@ pub fn build_compile_unit_di_node<'ll, 'tcx>(
             llvm::ChecksumKind::None,
             ptr::null(),
             0,
+            ptr::null(),
+            0,
         );
 
         let unit_metadata = llvm::LLVMRustDIBuilderCreateCompileUnit(
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index c8e0e075eeabc..faa675b66c8a1 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -1853,6 +1853,8 @@ extern "C" {
         CSKind: ChecksumKind,
         Checksum: *const c_char,
         ChecksumLen: size_t,
+        Source: *const c_char,
+        SourceLen: size_t,
     ) -> &'a DIFile;
 
     pub fn LLVMRustDIBuilderCreateSubroutineType<'a>(
diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs
index ce3b2f77f210a..c4704e38ce6fa 100644
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@@ -773,6 +773,7 @@ fn test_unstable_options_tracking_hash() {
     tracked!(direct_access_external_data, Some(true));
     tracked!(dual_proc_macros, true);
     tracked!(dwarf_version, Some(5));
+    tracked!(embed_source, true);
     tracked!(emit_thin_lto, false);
     tracked!(export_executable_symbols, true);
     tracked!(fewer_names, Some(true));
diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
index 4cdd8af1008c0..6e700c31e6763 100644
--- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp
@@ -901,14 +901,19 @@ extern "C" LLVMMetadataRef
 LLVMRustDIBuilderCreateFile(LLVMRustDIBuilderRef Builder, const char *Filename,
                             size_t FilenameLen, const char *Directory,
                             size_t DirectoryLen, LLVMRustChecksumKind CSKind,
-                            const char *Checksum, size_t ChecksumLen) {
+                            const char *Checksum, size_t ChecksumLen,
+                            const char *Source, size_t SourceLen) {
 
   std::optional<DIFile::ChecksumKind> llvmCSKind = fromRust(CSKind);
   std::optional<DIFile::ChecksumInfo<StringRef>> CSInfo{};
   if (llvmCSKind)
     CSInfo.emplace(*llvmCSKind, StringRef{Checksum, ChecksumLen});
+  std::optional<StringRef> oSource{};
+  if (Source)
+    oSource = StringRef(Source, SourceLen);
   return wrap(Builder->createFile(StringRef(Filename, FilenameLen),
-                                  StringRef(Directory, DirectoryLen), CSInfo));
+                                  StringRef(Directory, DirectoryLen), CSInfo,
+                                  oSource));
 }
 
 extern "C" LLVMMetadataRef
diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl
index b84280a3ccf3f..afd5360c81194 100644
--- a/compiler/rustc_session/messages.ftl
+++ b/compiler/rustc_session/messages.ftl
@@ -14,6 +14,12 @@ session_crate_name_empty = crate name must not be empty
 
 session_crate_name_invalid = crate names cannot start with a `-`, but `{$s}` has a leading hyphen
 
+session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at least `-Z dwarf-version=5` but DWARF version is {$dwarf_version}
+
+session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
+
+session_embed_source_requires_llvm_backend = `-Zembed-source=y` is only supported on the LLVM codegen backend
+
 session_expr_parentheses_needed = parentheses are required to parse this as an expression
 
 session_failed_to_create_profiler = failed to create profiler: {$err}
diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs
index 5cc54a5855bbe..f708109b87a0c 100644
--- a/compiler/rustc_session/src/errors.rs
+++ b/compiler/rustc_session/src/errors.rs
@@ -165,6 +165,20 @@ pub(crate) struct UnsupportedDwarfVersion {
     pub(crate) dwarf_version: u32,
 }
 
+#[derive(Diagnostic)]
+#[diag(session_embed_source_insufficient_dwarf_version)]
+pub(crate) struct EmbedSourceInsufficientDwarfVersion {
+    pub(crate) dwarf_version: u32,
+}
+
+#[derive(Diagnostic)]
+#[diag(session_embed_source_requires_debug_info)]
+pub(crate) struct EmbedSourceRequiresDebugInfo;
+
+#[derive(Diagnostic)]
+#[diag(session_embed_source_requires_llvm_backend)]
+pub(crate) struct EmbedSourceRequiresLLVMBackend;
+
 #[derive(Diagnostic)]
 #[diag(session_target_stack_protector_not_supported)]
 pub(crate) struct StackProtectorNotSupportedForTarget<'a> {
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index bf54aae1cfeb0..13aac6669fe4f 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -1708,6 +1708,8 @@ options! {
         them only if an error has not been emitted"),
     ehcont_guard: bool = (false, parse_bool, [TRACKED],
         "generate Windows EHCont Guard tables"),
+    embed_source: bool = (false, parse_bool, [TRACKED],
+        "embed source text in DWARF debug sections (default: no)"),
     emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],
         "emit a section containing stack size metadata (default: no)"),
     emit_thin_lto: bool = (true, parse_bool, [TRACKED],
diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs
index be67baf57f6dc..634f3684b51aa 100644
--- a/compiler/rustc_session/src/session.rs
+++ b/compiler/rustc_session/src/session.rs
@@ -37,8 +37,9 @@ use rustc_target::spec::{
 use crate::code_stats::CodeStats;
 pub use crate::code_stats::{DataTypeKind, FieldInfo, FieldKind, SizeKind, VariantInfo};
 use crate::config::{
-    self, CoverageLevel, CrateType, ErrorOutputType, FunctionReturn, Input, InstrumentCoverage,
-    OptLevel, OutFileName, OutputType, RemapPathScopeComponents, SwitchWithOptPath,
+    self, CoverageLevel, CrateType, DebugInfo, ErrorOutputType, FunctionReturn, Input,
+    InstrumentCoverage, OptLevel, OutFileName, OutputType, RemapPathScopeComponents,
+    SwitchWithOptPath,
 };
 use crate::parse::{add_feature_diagnostics, ParseSess};
 use crate::search_paths::{PathKind, SearchPath};
@@ -1300,6 +1301,26 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
             .emit_err(errors::SplitDebugInfoUnstablePlatform { debuginfo: sess.split_debuginfo() });
     }
 
+    if sess.opts.unstable_opts.embed_source {
+        let dwarf_version =
+            sess.opts.unstable_opts.dwarf_version.unwrap_or(sess.target.default_dwarf_version);
+
+        let uses_llvm_backend =
+            matches!(sess.opts.unstable_opts.codegen_backend.as_deref(), None | Some("llvm"));
+
+        if dwarf_version < 5 {
+            sess.dcx().emit_warn(errors::EmbedSourceInsufficientDwarfVersion { dwarf_version });
+        }
+
+        if sess.opts.debuginfo == DebugInfo::None {
+            sess.dcx().emit_warn(errors::EmbedSourceRequiresDebugInfo);
+        }
+
+        if !uses_llvm_backend {
+            sess.dcx().emit_warn(errors::EmbedSourceRequiresLLVMBackend);
+        }
+    }
+
     if sess.opts.unstable_opts.instrument_xray.is_some() && !sess.target.options.supports_xray {
         sess.dcx().emit_err(errors::InstrumentationNotSupported { us: "XRay".to_string() });
     }
diff --git a/src/doc/unstable-book/src/compiler-flags/embed-source.md b/src/doc/unstable-book/src/compiler-flags/embed-source.md
new file mode 100644
index 0000000000000..01a11e3779712
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-flags/embed-source.md
@@ -0,0 +1,12 @@
+# `embed-source`
+
+This flag controls whether the compiler embeds the program source code text into
+the object debug information section. It takes one of the following values:
+
+* `y`, `yes`, `on` or `true`: put source code in debug info.
+* `n`, `no`, `off`, `false` or no value: omit source code from debug info (the default).
+
+This flag is ignored in configurations that don't emit DWARF debug information
+and is ignored on non-LLVM backends. `-Z embed-source` requires DWARFv5. Use
+`-Z dwarf-version=5` to control the compiler's DWARF target version and `-g` to
+enable debug info generation.

From 608901b9c07d7d2f3e2803378c4f0cc07c61bc36 Mon Sep 17 00:00:00 2001
From: Mrmaxmeier <Mrmaxmeier@gmail.com>
Date: Tue, 16 Jul 2024 20:50:28 +0200
Subject: [PATCH 2/3] Add run-make test for -Zembed-source=yes

---
 tests/run-make/embed-source-dwarf/main.rs  |  2 +
 tests/run-make/embed-source-dwarf/rmake.rs | 70 ++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 tests/run-make/embed-source-dwarf/main.rs
 create mode 100644 tests/run-make/embed-source-dwarf/rmake.rs

diff --git a/tests/run-make/embed-source-dwarf/main.rs b/tests/run-make/embed-source-dwarf/main.rs
new file mode 100644
index 0000000000000..c80af84f41415
--- /dev/null
+++ b/tests/run-make/embed-source-dwarf/main.rs
@@ -0,0 +1,2 @@
+// hello
+fn main() {}
diff --git a/tests/run-make/embed-source-dwarf/rmake.rs b/tests/run-make/embed-source-dwarf/rmake.rs
new file mode 100644
index 0000000000000..06d550121b0de
--- /dev/null
+++ b/tests/run-make/embed-source-dwarf/rmake.rs
@@ -0,0 +1,70 @@
+//@ ignore-windows
+//@ ignore-apple
+
+// LLVM 17's embed-source implementation requires that source code is attached
+// for all files in the output DWARF debug info. This restriction was lifted in
+// LLVM 18 (87e22bdd2bd6d77d782f9d64b3e3ae5bdcd5080d).
+//@ min-llvm-version: 18
+
+// This test should be replaced with one in tests/debuginfo once we can easily
+// tell via GDB or LLDB if debuginfo contains source code. Cheap tricks in LLDB
+// like setting an invalid source map path don't appear to work, maybe this'll
+// become easier once GDB supports DWARFv6?
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::rc::Rc;
+
+use gimli::{AttributeValue, EndianRcSlice, Reader, RunTimeEndian};
+use object::{Object, ObjectSection};
+use run_make_support::{gimli, object, rfs, rustc};
+
+fn main() {
+    let output = PathBuf::from("embed-source-main");
+    rustc()
+        .input("main.rs")
+        .output(&output)
+        .arg("-g")
+        .arg("-Zembed-source=yes")
+        .arg("-Zdwarf-version=5")
+        .run();
+    let output = rfs::read(output);
+    let obj = object::File::parse(output.as_slice()).unwrap();
+    let endian = if obj.is_little_endian() { RunTimeEndian::Little } else { RunTimeEndian::Big };
+    let dwarf = gimli::Dwarf::load(|section| -> Result<_, ()> {
+        let data = obj.section_by_name(section.name()).map(|s| s.uncompressed_data().unwrap());
+        Ok(EndianRcSlice::new(Rc::from(data.unwrap_or_default().as_ref()), endian))
+    })
+    .unwrap();
+
+    let mut sources = HashMap::new();
+
+    let mut iter = dwarf.units();
+    while let Some(header) = iter.next().unwrap() {
+        let unit = dwarf.unit(header).unwrap();
+        let unit = unit.unit_ref(&dwarf);
+
+        if let Some(program) = &unit.line_program {
+            let header = program.header();
+            for file in header.file_names() {
+                if let Some(source) = file.source() {
+                    let path = unit
+                        .attr_string(file.path_name())
+                        .unwrap()
+                        .to_string_lossy()
+                        .unwrap()
+                        .to_string();
+                    let source =
+                        unit.attr_string(source).unwrap().to_string_lossy().unwrap().to_string();
+                    if !source.is_empty() {
+                        sources.insert(path, source);
+                    }
+                }
+            }
+        }
+    }
+
+    dbg!(&sources);
+    assert_eq!(sources.len(), 1);
+    assert_eq!(sources.get("main.rs").unwrap(), "// hello\nfn main() {}\n");
+}

From 6899f5a8e12986ee16e028f1597963d0de668aca Mon Sep 17 00:00:00 2001
From: Mrmaxmeier <Mrmaxmeier@gmail.com>
Date: Tue, 6 Aug 2024 20:31:12 +0200
Subject: [PATCH 3/3] -Zembed-source: Don't try to warn about incompatible
 codegen backends

---
 compiler/rustc_session/messages.ftl   | 2 --
 compiler/rustc_session/src/errors.rs  | 4 ----
 compiler/rustc_session/src/session.rs | 7 -------
 3 files changed, 13 deletions(-)

diff --git a/compiler/rustc_session/messages.ftl b/compiler/rustc_session/messages.ftl
index afd5360c81194..01c371ee49884 100644
--- a/compiler/rustc_session/messages.ftl
+++ b/compiler/rustc_session/messages.ftl
@@ -18,8 +18,6 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at
 
 session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled
 
-session_embed_source_requires_llvm_backend = `-Zembed-source=y` is only supported on the LLVM codegen backend
-
 session_expr_parentheses_needed = parentheses are required to parse this as an expression
 
 session_failed_to_create_profiler = failed to create profiler: {$err}
diff --git a/compiler/rustc_session/src/errors.rs b/compiler/rustc_session/src/errors.rs
index f708109b87a0c..15bbd4ff7bf4b 100644
--- a/compiler/rustc_session/src/errors.rs
+++ b/compiler/rustc_session/src/errors.rs
@@ -175,10 +175,6 @@ pub(crate) struct EmbedSourceInsufficientDwarfVersion {
 #[diag(session_embed_source_requires_debug_info)]
 pub(crate) struct EmbedSourceRequiresDebugInfo;
 
-#[derive(Diagnostic)]
-#[diag(session_embed_source_requires_llvm_backend)]
-pub(crate) struct EmbedSourceRequiresLLVMBackend;
-
 #[derive(Diagnostic)]
 #[diag(session_target_stack_protector_not_supported)]
 pub(crate) struct StackProtectorNotSupportedForTarget<'a> {
diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs
index 634f3684b51aa..e2ef144e732a4 100644
--- a/compiler/rustc_session/src/session.rs
+++ b/compiler/rustc_session/src/session.rs
@@ -1305,9 +1305,6 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
         let dwarf_version =
             sess.opts.unstable_opts.dwarf_version.unwrap_or(sess.target.default_dwarf_version);
 
-        let uses_llvm_backend =
-            matches!(sess.opts.unstable_opts.codegen_backend.as_deref(), None | Some("llvm"));
-
         if dwarf_version < 5 {
             sess.dcx().emit_warn(errors::EmbedSourceInsufficientDwarfVersion { dwarf_version });
         }
@@ -1315,10 +1312,6 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
         if sess.opts.debuginfo == DebugInfo::None {
             sess.dcx().emit_warn(errors::EmbedSourceRequiresDebugInfo);
         }
-
-        if !uses_llvm_backend {
-            sess.dcx().emit_warn(errors::EmbedSourceRequiresLLVMBackend);
-        }
     }
 
     if sess.opts.unstable_opts.instrument_xray.is_some() && !sess.target.options.supports_xray {