From 153ccbb7a97a1c7025f731ca4fcee0b109ea36dd Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Mon, 27 Jan 2025 10:28:00 +0000 Subject: [PATCH] Don't give up on a location the first time compiling fails. This comment in the code was partly wrong: ``` // FIXME: We will endless retry tracing this // [HotLocation]. There should be a `Counting` // state. ``` In fact, we wouldn't endlessly retry: the `HotLocation` would get stuck in the `Compiling` state. Still, the end effect was the same as the comment suggested! This commit allows `HotLocation`s to transition back to the `Counting` state so that we can retry. That requires allowing `HotLocations` to also be in the `Counting` state: in other words, we can "count" both with and without a `HotLocation`. --- tests/c/early_return1.c | 9 ++-- ykrt/src/location.rs | 105 ++++++++++++++++++++++++++-------------- ykrt/src/mt.rs | 62 ++++++++++++++++++++++-- 3 files changed, 131 insertions(+), 45 deletions(-) diff --git a/tests/c/early_return1.c b/tests/c/early_return1.c index 2e7346ea4..309c79a16 100644 --- a/tests/c/early_return1.c +++ b/tests/c/early_return1.c @@ -5,8 +5,9 @@ // stderr: // yk-jit-event: start-tracing // early return -// 5 +// 6 // yk-jit-event: tracing-aborted +// 5 // 4 // yk-jit-event: start-tracing // 3 @@ -37,10 +38,10 @@ void loop(YkMT *mt, YkLocation *loc, int i) { NOOPT_VAL(i); while (i > 0) { yk_mt_control_point(mt, loc); - if (i == 6) { + if (i == 7) { loop(mt, loc, i - 1); i--; - } else if (i == 5) { + } else if (i == 6) { fprintf(stderr, "early return\n"); return; } @@ -57,7 +58,7 @@ int main(int argc, char **argv) { YkLocation loc = yk_location_new(); NOOPT_VAL(loc); - loop(mt, &loc, 6); + loop(mt, &loc, 7); fprintf(stderr, "exit\n"); yk_location_drop(loc); yk_mt_shutdown(mt); diff --git a/ykrt/src/location.rs b/ykrt/src/location.rs index 9e8cc0866..ff4ba48cd 100644 --- a/ykrt/src/location.rs +++ b/ykrt/src/location.rs @@ -42,48 +42,71 @@ pub struct Location { /// A Location is a state machine which operates as follows (where Counting is the start state): /// /// ```text - /// ┌──────────────┐ - /// │ │─────────────┐ - /// reprofile │ Counting │ │ - /// ┌──────────▶│ │◀────────────┘ - /// │ └──────────────┘ increment - /// │ │ count - /// │ │ start tracing - /// │ ▼ - /// │ ┌──────────────┐ - /// │ │ │ incomplete ┌─────────────┐ - /// │ │ Tracing │────────────▶│ DontTrace │ - /// │ │ │ └─────────────┘ - /// │ └──────────────┘ - /// │ │ start compiling trace - /// │ │ in thread - /// │ ▼ - /// │ ┌──────────────┐ ┌───────────┐ - /// │ │ Compiling │────────────▶│ Dropped │ - /// │ └──────────────┘ └───────────┘ - /// │ │ - /// │ │ trace compiled - /// │ ▼ - /// │ ┌──────────────┐ - /// └───────────│ Compiled │◀────────────┐ - /// └──────────────┘ │ - /// │ │ - /// │ guard failed │ - /// ▼ │ - /// ┌──────────────┐ │ - /// │ SideTracing │─────────────┘ - /// └──────────────┘ + /// │ + /// │ + /// ▼ + /// ┌─────────────────────────────────────────────────────────────────────────────────────┐ increment count + /// │ │ ──────────────────┐ + /// │ Counting │ │ + /// │ │ ◀─────────────────┘ + /// └─────────────────────────────────────────────────────────────────────────────────────┘ + /// │ ▲ ▲ + /// │ start tracing │ failed below threshold │ failed below threshold + /// ▼ │ │ + /// ┌───────────┐ failed above threshold ┌───────────────────────────────┐ │ │ + /// │ DontTrace │ ◀──────────────────────── │ Tracing │ ─┘ │ + /// └───────────┘ └───────────────────────────────┘ │ + /// ▲ │ │ + /// │ │ │ + /// │ ▼ │ + /// │ failed above threshold ┌───────────────────────────────┐ │ + /// └──────────────────────────────────── │ Compiling │ ────────────────────────────┘ + /// └───────────────────────────────┘ + /// │ + /// │ + /// ▼ + /// ┌───────────────────────────────┐ + /// │ Compiled │ ◀┐ + /// └───────────────────────────────┘ │ + /// │ │ + /// │ guard failed above threshold │ sidetracing completed + /// ▼ │ + /// ┌───────────────────────────────┐ │ + /// │ SideTracing │ ─┘ + /// └───────────────────────────────┘ + /// ``` + /// + /// This diagram was created with [this tool](https://dot-to-ascii.ggerganov.com/) using this + /// GraphViz input: + /// + /// ```text + /// digraph { + /// init [label="", shape=point]; + /// init -> Counting; + /// Counting -> Counting [label="increment count"]; + /// Counting -> Tracing [label="start tracing"]; + /// Tracing -> Compiling; + /// Tracing -> Counting [label="failed below threshold"]; + /// Tracing -> DontTrace [label="failed above threshold"]; + /// Compiling -> Compiled; + /// Compiling -> Counting [label="failed below threshold"]; + /// Compiling -> DontTrace [label="failed above threshold"]; + /// Compiled -> SideTracing [label="guard failed above threshold"]; + /// SideTracing -> Compiled [label="sidetracing completed"]; + /// } /// ``` /// /// We hope that a Location soon reaches the `Compiled` state (aka "the happy state") and stays /// there. However, many Locations will not be used frequently enough to reach such a state, so /// we don't want to waste resources on them. /// - /// We therefore encode a Location as a tagged integer: in the initial (Counting) state, no - /// memory is allocated; if the location is used frequently enough it becomes hot, memory - /// is allocated for it, and a pointer stored instead of an integer. Note that once memory for a - /// hot location is allocated, it can only be (scheduled for) deallocation when a Location - /// is dropped, as the Location may have handed out `&` references to that allocated memory. + /// We therefore encode a Location as a tagged integer: when initialised, no memory is + /// allocated; if the location is used frequently enough it becomes hot, memory is allocated + /// for it, and a pointer stored instead of an integer. Note that once memory for a hot + /// location is allocated, it can only be (scheduled for) deallocation when a Location is + /// dropped, as the Location may have handed out `&` references to that allocated memory. That + /// means that the `Counting` state is encoded in two separate ways: both with and without + /// allocated memory. /// /// The layout of a Location is as follows: bit 0 = ; bits 1.. = payload. In the `STATE_NOT_HOT` state, the payload is an integer; in a `STATE_HOT` @@ -100,7 +123,11 @@ impl Location { } } - /// If `self` is in the `Counting` state, increment and return its count, or `None` otherwise. + /// If `self` is: + /// 1. in the `Counting` state and + /// 2. has not had a `HotLocation` allocated for it + /// + /// then increment and return its count, or `None` otherwise. pub(crate) fn inc_count(&self) -> Option { let x = self.inner.load(Ordering::Relaxed); if x & STATE_NOT_HOT != 0 { @@ -243,6 +270,9 @@ pub(crate) enum HotLocationKind { /// A trace for this HotLocation is being compiled in another trace. When compilation is /// complete, the compiling thread will update the state of this HotLocation. Compiling, + /// Because of a failure in compiling / tracing, we have reentered the `Counting` state. This + /// can be seen as a way of implementing back-off in the face of errors. + Counting(HotThreshold), /// This HotLocation has encountered problems (e.g. traces which are too long) and shouldn't be /// traced again. DontTrace, @@ -265,6 +295,7 @@ pub(crate) enum HotLocationKind { /// When a [HotLocation] has failed to compile a valid trace, should the [HotLocation] be tried /// again or not? +#[derive(Debug)] pub(crate) enum TraceFailed { KeepTrying, DontTrace, diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs index a32c2f03f..5833bc047 100644 --- a/ykrt/src/mt.rs +++ b/ykrt/src/mt.rs @@ -295,6 +295,8 @@ impl MT { debug_assert_matches!(hl.kind, HotLocationKind::Compiling); if let TraceFailed::DontTrace = hl.tracecompilation_error(&mt) { hl.kind = HotLocationKind::DontTrace; + } else { + hl.kind = HotLocationKind::Counting(0); } match e { CompilationError::General(e) | CompilationError::LimitExceeded(e) => { @@ -629,6 +631,19 @@ impl MT { } } HotLocationKind::Compiling => TransitionControlPoint::NoAction, + HotLocationKind::Counting(c) => { + if is_tracing { + // This thread is tracing something, so bail out as quickly as possible + TransitionControlPoint::NoAction + } else if c < self.hot_threshold() { + lk.kind = HotLocationKind::Counting(c + 1); + TransitionControlPoint::NoAction + } else { + let hl = loc.hot_location_arc_clone().unwrap(); + lk.kind = HotLocationKind::Tracing; + TransitionControlPoint::StartTracing(hl) + } + } HotLocationKind::Tracing => { let hl = loc.hot_location_arc_clone().unwrap(); match &*mtt.tstate.borrow() { @@ -668,9 +683,12 @@ impl MT { // instead abort tracing, and hope we can start // at a more propitious point in the future. self.stats.trace_recorded_err(); - // FIXME: We will endless retry tracing this - // [HotLocation]. There should be a `Counting` - // state. + match lk.tracecompilation_error(self) { + TraceFailed::KeepTrying => { + lk.kind = HotLocationKind::Counting(0) + } + TraceFailed::DontTrace => todo!(), + } TransitionControlPoint::AbortTracing } } @@ -1135,7 +1153,7 @@ mod tests { compile::{CompiledTraceTestingBasicTransitions, CompiledTraceTestingMinimal}, trace::TraceRecorderError, }; - use std::{hint::black_box, ptr}; + use std::{assert_matches::assert_matches, hint::black_box, ptr}; use test::bench::Bencher; // We only implement enough of the equality function for the tests we have. @@ -1448,6 +1466,42 @@ mod tests { )); } + #[test] + fn locations_can_fail_multiple_times() { + // Test that a location can fail tracing/compiling multiple times before we give up. + + let hot_thrsh = 5; + let mt = MT::new().unwrap(); + mt.set_hot_threshold(hot_thrsh); + let loc = Location::new(); + for i in 0..mt.hot_threshold() { + assert_eq!( + mt.transition_control_point(&loc, ptr::null_mut()), + TransitionControlPoint::NoAction + ); + assert_eq!(loc.count(), Some(i + 1)); + } + expect_start_tracing(&mt, &loc); + expect_stop_tracing(&mt, &loc); + + for _ in 0..mt.trace_failure_threshold() { + assert_matches!( + loc.hot_location() + .unwrap() + .lock() + .tracecompilation_error(&mt), + TraceFailed::KeepTrying + ); + } + assert_matches!( + loc.hot_location() + .unwrap() + .lock() + .tracecompilation_error(&mt), + TraceFailed::DontTrace + ); + } + #[test] fn dont_trace_two_locations_simultaneously_in_one_thread() { // A thread can only trace one Location at a time: if, having started tracing, it