rust-lang · bors · Dec 14, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 12, 2024
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -75,10 +75,10 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
 
     // Encode all filenames referenced by coverage mappings in this CGU.
     let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
-
-    let filenames_size = filenames_buffer.len();
-    let filenames_val = cx.const_bytes(&filenames_buffer);
-    let filenames_ref = llvm_cov::hash_bytes(&filenames_buffer);
+    // The `llvm-cov` tool uses this hash to associate each covfun record with
+    // its corresponding filenames table, since the final binary will typically
+    // contain multiple covmap records from different compilation units.
+    let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
 
     let mut unused_function_names = Vec::new();
 
@@ -101,7 +101,7 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     for covfun in &covfun_records {
         unused_function_names.extend(covfun.mangled_function_name_if_unused());
 
-        covfun::generate_covfun_record(cx, filenames_ref, covfun)
+        covfun::generate_covfun_record(cx, filenames_hash, covfun)
     }
 
     // For unused functions, we need to take their mangled names and store them
@@ -126,7 +126,7 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     // Generate the coverage map header, which contains the filenames used by
     // this CGU's coverage mappings, and store it in a well-known global.
     // (This is skipped if we returned early due to having no covfun records.)
-    generate_covmap_record(cx, covmap_version, filenames_size, filenames_val);
+    generate_covmap_record(cx, covmap_version, &filenames_buffer);
 }
 
 /// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
@@ -225,38 +225,35 @@ fn span_file_name(tcx: TyCtxt<'_>, span: Span) -> Symbol {
 /// Generates the contents of the covmap record for this CGU, which mostly
 /// consists of a header and a list of filenames. The record is then stored
 /// as a global variable in the `__llvm_covmap` section.
-fn generate_covmap_record<'ll>(
-    cx: &CodegenCx<'ll, '_>,
-    version: u32,
-    filenames_size: usize,
-    filenames_val: &'ll llvm::Value,
-) {
-    debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version);
-
-    // Create the coverage data header (Note, fields 0 and 2 are now always zero,
-    // as of `llvm::coverage::CovMapVersion::Version4`.)
-    let zero_was_n_records_val = cx.const_u32(0);
-    let filenames_size_val = cx.const_u32(filenames_size as u32);
-    let zero_was_coverage_size_val = cx.const_u32(0);
-    let version_val = cx.const_u32(version);
-    let cov_data_header_val = cx.const_struct(
-        &[zero_was_n_records_val, filenames_size_val, zero_was_coverage_size_val, version_val],
-        /*packed=*/ false,
+fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) {
+    // A covmap record consists of four target-endian u32 values, followed by
+    // the encoded filenames table. Two of the header fields are unused in
+    // modern versions of the LLVM coverage mapping format, and are always 0.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp`.
+    let covmap_header = cx.const_struct(
+        &[
+            cx.const_u32(0), // (unused)
+            cx.const_u32(filenames_buffer.len() as u32),
+            cx.const_u32(0), // (unused)
+            cx.const_u32(version),
+        ],
+        /* packed */ false,
     );
-
-    // Create the complete LLVM coverage data value to add to the LLVM IR
-    let covmap_data =
-        cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(covmap_data), &llvm_cov::covmap_var_name());
-    llvm::set_initializer(llglobal, covmap_data);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage);
-    llvm::set_section(llglobal, &llvm_cov::covmap_section_name(cx.llmod));
+    let covmap_record = cx
+        .const_struct(&[covmap_header, cx.const_bytes(filenames_buffer)], /* packed */ false);
+
+    let covmap_global =
+        llvm::add_global(cx.llmod, cx.val_ty(covmap_record), &llvm_cov::covmap_var_name());
+    llvm::set_initializer(covmap_global, covmap_record);
+    llvm::set_global_constant(covmap_global, true);
+    llvm::set_linkage(covmap_global, llvm::Linkage::PrivateLinkage);
+    llvm::set_section(covmap_global, &llvm_cov::covmap_section_name(cx.llmod));
     // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
     // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
-    cx.add_used_global(llglobal);
+    llvm::set_alignment(covmap_global, Align::EIGHT);
+
+    cx.add_used_global(covmap_global);
 }
 
 /// Each CGU will normally only emit coverage metadata for the functions that it actually generates.

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -136,7 +136,7 @@ fn fill_region_tables<'tcx>(
 /// as a global variable in the `__llvm_covfun` section.
 pub(crate) fn generate_covfun_record<'tcx>(
     cx: &CodegenCx<'_, 'tcx>,
-    filenames_ref: u64,
+    filenames_hash: u64,
     covfun: &CovfunRecord<'tcx>,
 ) {
     let &CovfunRecord {
@@ -155,46 +155,45 @@ pub(crate) fn generate_covfun_record<'tcx>(
         regions,
     );
 
-    // Concatenate the encoded coverage mappings
-    let coverage_mapping_size = coverage_mapping_buffer.len();
-    let coverage_mapping_val = cx.const_bytes(&coverage_mapping_buffer);
-
+    // A covfun record consists of four target-endian integers, followed by the
+    // encoded mapping data in bytes. Note that the length field is 32 bits.
+    // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation>
+    // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp` and
+    // `COVMAP_V3` in `src/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc`.
     let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes());
-    let func_name_hash_val = cx.const_u64(func_name_hash);
-    let coverage_mapping_size_val = cx.const_u32(coverage_mapping_size as u32);
-    let source_hash_val = cx.const_u64(source_hash);
-    let filenames_ref_val = cx.const_u64(filenames_ref);
-    let func_record_val = cx.const_struct(
+    let covfun_record = cx.const_struct(
         &[
-            func_name_hash_val,
-            coverage_mapping_size_val,
-            source_hash_val,
-            filenames_ref_val,
-            coverage_mapping_val,
+            cx.const_u64(func_name_hash),
+            cx.const_u32(coverage_mapping_buffer.len() as u32),
+            cx.const_u64(source_hash),
+            cx.const_u64(filenames_hash),
+            cx.const_bytes(&coverage_mapping_buffer),
         ],
-        /*packed=*/ true,
+        // This struct needs to be packed, so that the 32-bit length field
+        // doesn't have unexpected padding.
+        true,
     );
 
     // Choose a variable name to hold this function's covfun data.
     // Functions that are used have a suffix ("u") to distinguish them from
     // unused copies of the same function (from different CGUs), so that if a
     // linker sees both it won't discard the used copy's data.
-    let func_record_var_name =
-        CString::new(format!("__covrec_{:X}{}", func_name_hash, if is_used { "u" } else { "" }))
-            .unwrap();
-    debug!("function record var name: {:?}", func_record_var_name);
-
-    let llglobal = llvm::add_global(cx.llmod, cx.val_ty(func_record_val), &func_record_var_name);
-    llvm::set_initializer(llglobal, func_record_val);
-    llvm::set_global_constant(llglobal, true);
-    llvm::set_linkage(llglobal, llvm::Linkage::LinkOnceODRLinkage);
-    llvm::set_visibility(llglobal, llvm::Visibility::Hidden);
-    llvm::set_section(llglobal, cx.covfun_section_name());
+    let u = if is_used { "u" } else { "" };
+    let covfun_var_name = CString::new(format!("__covrec_{func_name_hash:X}{u}")).unwrap();
+    debug!("function record var name: {covfun_var_name:?}");
+
+    let covfun_global = llvm::add_global(cx.llmod, cx.val_ty(covfun_record), &covfun_var_name);
+    llvm::set_initializer(covfun_global, covfun_record);
+    llvm::set_global_constant(covfun_global, true);
+    llvm::set_linkage(covfun_global, llvm::Linkage::LinkOnceODRLinkage);
+    llvm::set_visibility(covfun_global, llvm::Visibility::Hidden);
+    llvm::set_section(covfun_global, cx.covfun_section_name());
     // LLVM's coverage mapping format specifies 8-byte alignment for items in this section.
     // <https://llvm.org/docs/CoverageMappingFormat.html>
-    llvm::set_alignment(llglobal, Align::EIGHT);
+    llvm::set_alignment(covfun_global, Align::EIGHT);
     if cx.target_spec().supports_comdat() {
-        llvm::set_comdat(cx.llmod, llglobal, &func_record_var_name);
+        llvm::set_comdat(cx.llmod, covfun_global, &covfun_var_name);
     }
-    cx.add_used_global(llglobal);
+
+    cx.add_used_global(covfun_global);
 }
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
@@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
         token: Token::dummy(),
         diag_info: TokenTreeDiagInfo::default(),
     };
-    let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
-    let unmatched_delims = lexer.diag_info.unmatched_delims;
-
-    if res.is_ok() && unmatched_delims.is_empty() {
-        Ok(stream)
-    } else {
-        // Return error if there are unmatched delimiters or unclosed delimiters.
-        // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
-        // because the delimiter mismatch is more likely to be the root cause of error
-        let mut buffer: Vec<_> = unmatched_delims
-            .into_iter()
-            .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
-            .collect();
-        if let Err(errs) = res {
-            // Add unclosing delimiter or diff marker errors
-            buffer.extend(errs);
+    let res = lexer.lex_token_trees(/* is_delimited */ false);
+
+    let mut unmatched_delims: Vec<_> = lexer
+        .diag_info
+        .unmatched_delims
+        .into_iter()
+        .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
+        .collect();
+
+    match res {
+        Ok((_open_spacing, stream)) => {
+            if unmatched_delims.is_empty() {
+                Ok(stream)
+            } else {
+                // Return error if there are unmatched delimiters or unclosed delimiters.
+                Err(unmatched_delims)
+            }
+        }
+        Err(errs) => {
+            // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
+            // because the delimiter mismatch is more likely to be the root cause of error
+            unmatched_delims.extend(errs);
+            Err(unmatched_delims)
         }
-        Err(buffer)
     }
 }
 

diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs
@@ -1,20 +1,18 @@
 use rustc_ast::token::{self, Delimiter, Token};
 use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
 use rustc_ast_pretty::pprust::token_to_string;
-use rustc_errors::{Applicability, Diag};
-use rustc_span::symbol::kw;
+use rustc_errors::Diag;
 
 use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
 use super::{Lexer, UnmatchedDelim};
-use crate::Parser;
 
 impl<'psess, 'src> Lexer<'psess, 'src> {
     // Lex into a token stream. The `Spacing` in the result is that of the
     // opening delimiter.
     pub(super) fn lex_token_trees(
         &mut self,
         is_delimited: bool,
-    ) -> (Spacing, TokenStream, Result<(), Vec<Diag<'psess>>>) {
+    ) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
         // Move past the opening delimiter.
         let open_spacing = self.bump_minimal();
 
@@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
                     buf.push(match self.lex_token_tree_open_delim(delim) {
                         Ok(val) => val,
-                        Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
+                        Err(errs) => return Err(errs),
                     })
                 }
                 token::CloseDelim(delim) => {
                     // Invisible delimiters cannot occur here because `TokenTreesReader` parses
                     // code directly from strings, with no macro expansion involved.
                     debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
-                    return (
-                        open_spacing,
-                        TokenStream::new(buf),
-                        if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) },
-                    );
+                    return if is_delimited {
+                        Ok((open_spacing, TokenStream::new(buf)))
+                    } else {
+                        Err(vec![self.close_delim_err(delim)])
+                    };
                 }
                 token::Eof => {
-                    return (
-                        open_spacing,
-                        TokenStream::new(buf),
-                        if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) },
-                    );
+                    return if is_delimited {
+                        Err(vec![self.eof_err()])
+                    } else {
+                        Ok((open_spacing, TokenStream::new(buf)))
+                    };
                 }
                 _ => {
                     // Get the next normal token.
@@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         // Lex the token trees within the delimiters.
         // We stop at any delimiter so we can try to recover if the user
         // uses an incorrect delimiter.
-        let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true);
-        if let Err(errs) = res {
-            return Err(self.unclosed_delim_err(tts, errs));
-        }
+        let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;
 
         // Expand to cover the entire delimited token tree.
         let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
@@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
         this_spacing
     }
 
-    fn unclosed_delim_err(
-        &mut self,
-        tts: TokenStream,
-        mut errs: Vec<Diag<'psess>>,
-    ) -> Vec<Diag<'psess>> {
-        // If there are unclosed delims, see if there are diff markers and if so, point them
-        // out instead of complaining about the unclosed delims.
-        let mut parser = Parser::new(self.psess, tts, None);
-        let mut diff_errs = vec![];
-        // Suggest removing a `{` we think appears in an `if`/`while` condition.
-        // We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
-        // but we have no way of tracking this in the lexer itself, so we piggyback on the parser.
-        let mut in_cond = false;
-        while parser.token != token::Eof {
-            if let Err(diff_err) = parser.err_vcs_conflict_marker() {
-                diff_errs.push(diff_err);
-            } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
-                in_cond = true;
-            } else if matches!(
-                parser.token.kind,
-                token::CloseDelim(Delimiter::Brace) | token::FatArrow
-            ) {
-                // End of the `if`/`while` body, or the end of a `match` guard.
-                in_cond = false;
-            } else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) {
-                // Store the `&&` and `let` to use their spans later when creating the diagnostic
-                let maybe_andand = parser.look_ahead(1, |t| t.clone());
-                let maybe_let = parser.look_ahead(2, |t| t.clone());
-                if maybe_andand == token::OpenDelim(Delimiter::Brace) {
-                    // This might be the beginning of the `if`/`while` body (i.e., the end of the
-                    // condition).
-                    in_cond = false;
-                } else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) {
-                    let mut err = parser.dcx().struct_span_err(
-                        parser.token.span,
-                        "found a `{` in the middle of a let-chain",
-                    );
-                    err.span_suggestion(
-                        parser.token.span,
-                        "consider removing this brace to parse the `let` as part of the same chain",
-                        "",
-                        Applicability::MachineApplicable,
-                    );
-                    err.span_label(
-                        maybe_andand.span.to(maybe_let.span),
-                        "you might have meant to continue the let-chain here",
-                    );
-                    errs.push(err);
-                }
-            }
-            parser.bump();
-        }
-        if !diff_errs.is_empty() {
-            for err in errs {
-                err.cancel();
-            }
-            return diff_errs;
-        }
-        errs
-    }
-
     fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
         // An unexpected closing delimiter (i.e., there is no matching opening delimiter).
         let token_str = token_to_string(&self.token);