From b94950d1befe2e2f776b8b5165d7e41e7d441ba9 Mon Sep 17 00:00:00 2001
From: Rich Kadel <richkadel@google.com>
Date: Sun, 2 May 2021 02:28:38 -0700
Subject: [PATCH] Update coverage docs

---
 src/llvm-coverage-instrumentation.md | 192 ++++++++++++++-------------
 1 file changed, 98 insertions(+), 94 deletions(-)
diff --git a/src/llvm-coverage-instrumentation.md b/src/llvm-coverage-instrumentation.md
index 8a044849f5..7c722936d6 100644
--- a/src/llvm-coverage-instrumentation.md
+++ b/src/llvm-coverage-instrumentation.md
@@ -28,11 +28,11 @@ them), and generate various reports for analysis, for example:
 <br/>
 
 Detailed instructions and examples are documented in the
-[Rust Unstable Book (under _source-based-code-coverage_)][unstable-book-sbcc].
+[Rust Unstable Book (under _compiler-flags/instrument-coverage_)][unstable-book-instrument-coverage].
 
 [llvm-instrprof-increment]: https://llvm.org/docs/LangRef.html#llvm-instrprof-increment-intrinsic
-[Coverage Map]: https://llvm.org/docs/CoverageMappingFormat.html
-[unstable-book-sbcc]: https://doc.rust-lang.org/nightly/unstable-book/compiler-flags/source-based-code-coverage.html
+[coverage map]: https://llvm.org/docs/CoverageMappingFormat.html
+[unstable-book-instrument-coverage]: https://doc.rust-lang.org/nightly/unstable-book/compiler-flags/instrument-coverage.html
 
 ## Rust symbol mangling
 
@@ -82,7 +82,7 @@ a span of code ([`CodeRegion`][code-region]). It counts the number of times a
 branch is executed, and also specifies the exact location of that code span in
 the Rust source code.
 
-Note that many of these `Coverage` statements will *not* be converted into
+Note that many of these `Coverage` statements will _not_ be converted into
 physical counters (or any other executable instructions) in the final binary.
 Some of them will be (see `CoverageKind::`[`Counter`][counter-coverage-kind]),
 but other counters can be computed on the fly, when generating a coverage
@@ -111,7 +111,7 @@ fn some_func(flag: bool) {
 In this example, four contiguous code regions are counted while only
 incrementing two counters.
 
-CFG analysis is used to not only determine *where* the branches are, for
+CFG analysis is used to not only determine _where_ the branches are, for
 conditional expressions like `if`, `else`, `match`, and `loop`, but also to
 determine where expressions can be used in place of physical counters.
 
@@ -150,50 +150,53 @@ MIR `Statement` into some backend-specific action or instruction.
         match statement.kind {
             ...
             mir::StatementKind::Coverage(box ref coverage) => {
-                self.codegen_coverage(&mut bx, coverage.clone());
+                self.codegen_coverage(&mut bx, coverage.clone(), statement.source_info.scope);
                 bx
             }
 ```
 
-
 `codegen_coverage()` handles each `CoverageKind` as follows:
 
-* For all `CoverageKind`s, Coverage data (counter ID, expression equation
+- For all `CoverageKind`s, Coverage data (counter ID, expression equation
   and ID, and code regions) are passed to the backend's `Builder`, to
   populate data structures that will be used to generate the crate's
   "Coverage Map". (See the [`FunctionCoverage`][function-coverage] `struct`.)
-* For `CoverageKind::Counter`s, an instruction is injected in the backend
+- For `CoverageKind::Counter`s, an instruction is injected in the backend
   IR to increment the physical counter, by calling the `BuilderMethod`
   [`instrprof_increment()`][instrprof-increment].
 
 ```rust
-    pub fn codegen_coverage(&self, bx: &mut Bx, coverage: Coverage) {
+    pub fn codegen_coverage(&self, bx: &mut Bx, coverage: Coverage, scope: SourceScope) {
+        ...
+        let instance = ... // the scoped instance (current or inlined function)
         let Coverage { kind, code_region } = coverage;
         match kind {
             CoverageKind::Counter { function_source_hash, id } => {
-                if let Some(code_region) = code_region {
-                    bx.add_coverage_counter(self.instance, id, code_region);
-                }
+                ...
+                bx.add_coverage_counter(instance, id, code_region);
                 ...
                 bx.instrprof_increment(fn_name, hash, num_counters, index);
             }
             CoverageKind::Expression { id, lhs, op, rhs } => {
-                bx.add_coverage_counter_expression(self.instance, id, lhs, op, rhs, code_region);
+                bx.add_coverage_counter_expression(instance, id, lhs, op, rhs, code_region);
             }
             CoverageKind::Unreachable => {
-                ...
+                bx.add_coverage_unreachable(
+                    instance,
+                    code_region.expect(...
 ```
-_code snippet trimmed for brevity_
+
+_code snippet abbreviated for brevity_
 
 > The function name `instrprof_increment()` is taken from the LLVM intrinsic
-call of the same name ([`llvm.instrprof.increment`][llvm-instrprof-increment]),
-and uses the same arguments and types; but note that, up to and through this
-stage (even though modeled after LLVM's implementation for code coverage
-instrumentation), the data and instructions are not strictly LLVM-specific.
+> call of the same name ([`llvm.instrprof.increment`][llvm-instrprof-increment]),
+> and uses the same arguments and types; but note that, up to and through this
+> stage (even though modeled after LLVM's implementation for code coverage
+> instrumentation), the data and instructions are not strictly LLVM-specific.
 >
 > But since LLVM is the only Rust-supported backend with the tooling to
-process this form of coverage instrumentation, the backend for `Coverage`
-statements is only implemented for LLVM, at this time.
+> process this form of coverage instrumentation, the backend for `Coverage`
+> statements is only implemented for LLVM, at this time.
 
 [backend-lowering-mir]: backend/lowering-mir.md
 [codegen-statement]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/mir/struct.FunctionCx.html#method.codegen_statement
@@ -221,25 +224,28 @@ properly-configured variables in LLVM IR, according to very specific
 details of the [_LLVM Coverage Mapping Format_][coverage-mapping-format]
 (Version 4).[^llvm-and-covmap-versions]
 
-[^llvm-and-covmap-versions]: The Rust compiler (as of <!-- date: 2021-01 -->
-January 2021) supports _LLVM Coverage Mapping Format_ Version 4 (the most
-up-to-date version of the format, at the time of this writing) for improved
-compatibility with other LLVM-based compilers (like _Clang_), and to take
-advantage of some format optimizations. Version 4 was introduced in _LLVM 11_,
-which is currently the default LLVM version for Rust. Note that the Rust
-compiler optionally supports some earlier LLVM versions, prior to _LLVM 11_. If
-`rustc` is configured to use an incompatible version of LLVM, compiling with `-Z
-instrument-coverage` will generate an error message.
+[^llvm-and-covmap-versions]:
+    The Rust compiler (as of <!-- date: 2021-01 -->
+    January 2021) supports _LLVM Coverage Mapping Format_ Version 4 (the most
+    up-to-date version of the format, at the time of this writing) for improved
+    compatibility with other LLVM-based compilers (like _Clang_), and to take
+    advantage of some format optimizations. Version 4 was introduced in _LLVM 11_,
+    which is currently the default LLVM version for Rust. Note that the Rust
+    compiler optionally supports some earlier LLVM versions, prior to _LLVM 11_. If
+    `rustc` is configured to use an incompatible version of LLVM, compiling with `-Z instrument-coverage` will generate an error message.
 
 ```rust
 pub fn finalize<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) {
+    ...
+    if !tcx.sess.instrument_coverage_except_unused_functions() {
+        add_unused_functions(cx);
+    }
+
     let mut function_coverage_map = match cx.coverage_context() {
         Some(ctx) => ctx.take_function_coverage_map(),
         None => return,
     };
     ...
-    add_unreachable_coverage(tcx, &mut function_coverage_map);
-
     let mut mapgen = CoverageMapGenerator::new();
 
     for (instance, function_coverage) in function_coverage_map {
@@ -248,31 +254,30 @@ pub fn finalize<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) {
             mapgen.write_coverage_mapping(expressions, counter_regions, coverage_mapping_buffer);
         });
 ```
+
 _code snippet trimmed for brevity_
 
-One notable step, performed by `mapgen::finalize()` before processing the
-`Instance`s and their `FunctionCoverage`s, is the call to
-[`add_unreachable_functions()`][add-unreachable-coverage].
+One notable first step performed by `mapgen::finalize()` is the call to
+[`add_unused_functions()`][add-unused-functions].
 
 When finalizing the coverage map, `FunctionCoverage` only has the `CodeRegion`s and counters for
 the functions that went through codegen; such as public functions and "used" functions
-(functions referenced by other "used" or public items). Any other functions (considered unused
-or "Unreachable") were still parsed and processed through the MIR stage.
+(functions referenced by other "used" or public items). Any other functions (considered unused)
+were still parsed and processed through the MIR stage.
 
-The set of unreachable functions is computed via the set difference of all MIR
+The set of unused functions is computed via the set difference of all MIR
 `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s
-(`tcx` query `collect_and_partition_mono_items`). `add_unreachable_functions()`
-computes the set of unreachable functions, queries the `tcx` for the
-previously-computed `CodeRegions`, for each unreachable MIR, and adds those code
-regions to one of the non-generic codegenned functions (non-generic avoids
-potentially injecting the unreachable coverage multiple times for multiple
-instantiations).
+(`tcx` query `codegened_and_inlined_items`). `add_unused_functions()`
+computes the set of unused functions, queries the `tcx` for the
+previously-computed `CodeRegions`, for each unused MIR, synthesizes an
+LLVM function (with no internal statements, since it will not be called),
+and adds a new `FunctionCoverage`, with `Unreachable` code regions.
 
 [compile-codegen-unit]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/base/fn.compile_codegen_unit.html
 [coverageinfo-finalize]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/context/struct.CodegenCx.html#method.coverageinfo_finalize
 [mapgen-finalize]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/coverageinfo/mapgen/fn.finalize.html
 [coverage-mapping-format]: https://llvm.org/docs/CoverageMappingFormat.html
-[add-unreachable-coverage]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/coverageinfo/mapgen/fn.add_unreachable_coverage.html
+[add-unused-functions]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_llvm/coverageinfo/mapgen/fn.add_unused_functions.html
 
 ## Testing LLVM Coverage
 
@@ -280,15 +285,10 @@ Coverage instrumentation in the MIR is validated by a `mir-opt` test:
 [`instrument-coverage`][mir-opt-test].
 
 More complete testing of end-to-end coverage instrumentation and reports are done
-in the `run-make-fulldeps` tests, with sample Rust programs (to be instrumented)
-in the [`coverage`][coverage-test-samples] directory, and the actual tests and expected
+in the `run-make` tests, with sample Rust programs (to be instrumented) in the
+[`coverage`][coverage-test-samples] directory, and the actual tests and expected
 results in [`coverage-reports`].
 
-In addition to testing the final result, two intermediate results are also validated
-to catch potential regression errors early: Minimum `CoverageSpan`s computed during
-the `InstrumentCoverage` MIR pass are saved in `mir_dump` [Spanview][spanview-debugging]
-files and compared to expected results in [`coverage-spanview`].
-
 Finally, the [`coverage-llvmir`] test compares compiles a simple Rust program with
 `-Z instrument-coverage` and compares the compiled program's LLVM IR to expected
 LLVM IR instructions and structured data for a coverage-enabled program, including
@@ -296,18 +296,19 @@ various checks for Coverage Map-related metadata and the LLVM intrinsic calls to
 increment the runtime counters.
 
 Expected results for both the `mir-opt` tests and the `coverage*` tests under
-`run-make-fulldeps` can be refreshed by running:
+`run-make` can be refreshed by running:
 
 ```shell
-$ ./x.py test src/test/<test-type> --blessed
+$ ./x.py test mir-opt --blessed
+$ ./x.py test src/test/run-make/coverage --blessed
 ```
 
 [mir-opt-test]: https://github.com/rust-lang/rust/blob/master/src/test/mir-opt/instrument_coverage.rs
-[coverage-test-samples]: https://github.com/rust-lang/rust/tree/master/src/test/run-make-fulldeps/coverage
-[`coverage-reports`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make-fulldeps/coverage-reports
-[`coverage-spanview`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make-fulldeps/coverage-spanview
+[coverage-test-samples]: https://github.com/rust-lang/rust/tree/master/src/test/run-make/coverage
+[`coverage-reports`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make/coverage-reports
+[`coverage-spanview`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make/coverage-spanview
 [spanview-debugging]: compiler-debugging.md#viewing-spanview-output
-[`coverage-llvmir`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make-fulldeps/coverage-llvmir
+[`coverage-llvmir`]: https://github.com/rust-lang/rust/tree/master/src/test/run-make/coverage-llvmir
 
 ## Implementation Details of the `InstrumentCoverage` MIR Pass
 
@@ -352,11 +353,12 @@ with the following steps:
    - `inject_intermediate_expression()`, called for each intermediate expression
      returned from `make_bcb_counters()`
 
-[^intermediate-expressions]: Intermediate expressions are sometimes required
-because `Expression`s are limited to binary additions or subtractions. For
-example, `A + (B - C)` might represent an `Expression` count computed from three
-other counters, `A`, `B`, and `C`, but computing that value requires an
-intermediate expression for `B - C`.
+[^intermediate-expressions]:
+    Intermediate expressions are sometimes required
+    because `Expression`s are limited to binary additions or subtractions. For
+    example, `A + (B - C)` might represent an `Expression` count computed from three
+    other counters, `A`, `B`, and `C`, but computing that value requires an
+    intermediate expression for `B - C`.
 
 [instrumentor]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir/transform/coverage/struct.Instrumentor.html
 [coverage-graph]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir/transform/coverage/graph/struct.CoverageGraph.html
@@ -396,20 +398,21 @@ contrast with the [`SimplifyCfg`][simplify-cfg] MIR pass, this step does
 not alter the MIR itself, because the `CoverageGraph` aggressively simplifies
 the CFG, and ignores nodes that are not relevant to coverage. For example:
 
-  * The BCB CFG ignores (excludes) branches considered not relevant
-    to the current coverage solution. It excludes unwind-related code[^78544]
-    that is injected by the Rust compiler but has no physical source
-    code to count, which allows a `Call`-terminated BasicBlock
-    to be merged with its successor, within a single BCB.
-  * A `Goto`-terminated `BasicBlock` can be merged with its successor
-    ***as long as*** it has the only incoming edge to the successor `BasicBlock`.
-  * Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are
-    not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as
-    a `Goto` (potentially merged with its successor into the same BCB).
-
-[^78544]: (Note, however, that Issue [#78544][rust-lang/rust#78544] considers
-providing future support for coverage of programs that intentionally
-`panic`, as an option, with some non-trivial cost.)
+- The BCB CFG ignores (excludes) branches considered not relevant
+  to the current coverage solution. It excludes unwind-related code[^78544]
+  that is injected by the Rust compiler but has no physical source
+  code to count, which allows a `Call`-terminated BasicBlock
+  to be merged with its successor, within a single BCB.
+- A `Goto`-terminated `BasicBlock` can be merged with its successor
+  **_as long as_** it has the only incoming edge to the successor `BasicBlock`.
+- Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are
+  not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as
+  a `Goto` (potentially merged with its successor into the same BCB).
+
+[^78544]:
+    (Note, however, that Issue [#78544][rust-lang/rust#78544] considers
+    providing future support for coverage of programs that intentionally
+    `panic`, as an option, with some non-trivial cost.)
 
 The BCB CFG is critical to simplifying the coverage analysis by ensuring graph path-based
 queries (`is_dominated_by()`, `predecessors`, `successors`, etc.) have branch (control flow)
@@ -418,10 +421,11 @@ significance.
 To visualize the `CoverageGraph`, you can generate a _graphviz_ `*.dot`
 file with the following `rustc` flags:[^graphviz-dark-mode]
 
-[^graphviz-dark-mode]: This image also applies `-Z graphviz-dark-mode`, to
-produce a Graphviz document with "dark mode" styling. If you use a dark mode or
-theme in your development environment, you will probably want to use this
-option so you can review the graphviz output without straining your vision.
+[^graphviz-dark-mode]:
+    This image also applies `-Z graphviz-dark-mode`, to
+    produce a Graphviz document with "dark mode" styling. If you use a dark mode or
+    theme in your development environment, you will probably want to use this
+    option so you can review the graphviz output without straining your vision.
 
 ```shell
 $ rustc -Z instrument-coverage -Z dump-mir=InstrumentCoverage \
@@ -448,19 +452,19 @@ directional edges (the arrows) leading from each node to its `successors()`.
 The nodes contain information in sections:
 
 1. The gray header has a label showing the BCB ID (or _index_ for looking up
-its `BasicCoverageBlockData`).
+   its `BasicCoverageBlockData`).
 2. The first content section shows the assigned `Counter` or `Expression` for
-each contiguous section of code. (There may be more than one `Expression`
-incremented by the same `Counter` for discontiguous sections of code representing
-the same sequential actions.) Note the code is represented by the line and
-column ranges (for example: `52:28-52:33`, representing the original source
-line 52, for columns 28-33). These are followed by the MIR `Statement` or
-`Terminator` represented by that source range. (How these coverage regions
-are determined is discussed in the following section.)
+   each contiguous section of code. (There may be more than one `Expression`
+   incremented by the same `Counter` for discontiguous sections of code representing
+   the same sequential actions.) Note the code is represented by the line and
+   column ranges (for example: `52:28-52:33`, representing the original source
+   line 52, for columns 28-33). These are followed by the MIR `Statement` or
+   `Terminator` represented by that source range. (How these coverage regions
+   are determined is discussed in the following section.)
 3. The final section(s) show the MIR `BasicBlock`s (by ID/index and its
-`TerminatorKind`) contained in this BCB. The last BCB is separated out because
-its `successors()` determine the edges leading out of the BCB, and into
-the `leading_bb()` (first `BasicBlock`) of each successor BCB.
+   `TerminatorKind`) contained in this BCB. The last BCB is separated out because
+   its `successors()` determine the edges leading out of the BCB, and into
+   the `leading_bb()` (first `BasicBlock`) of each successor BCB.
 
 Note, to find the `BasicCoverageBlock` from a final BCB `Terminator`'s
 successor `BasicBlock`, there is an index and helper
@@ -572,7 +576,7 @@ incoming edges. Given the following graph, for example, the count for
 
 In this situation, BCB node `B` may require an edge counter for its
 "edge from A", and that edge might be computed from an `Expression`,
-`Counter(A) - Counter(C)`. But an expression for the BCB _node_  `B`
+`Counter(A) - Counter(C)`. But an expression for the BCB _node_ `B`
 would be the sum of all incoming edges:
 
 ```text