Skip to content

Commit

Permalink
Ignore lockorder violation on same callsite with different construction
Browse files Browse the repository at this point in the history
As long as the lock order on such locks is still valid, we should allow
them regardless of whether they were constructed at the same location or
not. Note that we can only really enforce this if we require one lock
call per line, or if we have access to symbol columns (as we do on Linux
and macOS). We opt for a smaller patch by relying on the latter.

This was previously triggered by some recent test changes to
`test_manager_serialize_deserialize_inconsistent_monitor`. When the
test ends and a node is dropped causing us to persist each, we'd detect
a possible lockorder violation deadlock across three different `Mutex`
instances that are held at the same location when serializing our
`per_peer_states` in `ChannelManager::write`.

The presumed lockorder violation happens because the first `Mutex` held
shares the same construction location with the third one, while the
second `Mutex` has a different construction location. When we hold the
second one, we consider the first as a dependency, and then consider the
second as a dependency when holding the third, causing a circular
dependency (since the third shares the same construction location as the
first).

This isn't considered a lockorder violation that could result in a
deadlock as the comment suggests inline though, since we are under a
dependent write lock which no one else can have access to.
  • Loading branch information
wpaulino committed Mar 23, 2023
1 parent 04ee948 commit 4b79c1c
Showing 1 changed file with 43 additions and 26 deletions.
69 changes: 43 additions & 26 deletions lightning/src/sync/debug_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,24 +74,31 @@ struct LockDep {
_lockdep_trace: Backtrace,
}

// Locates the earliest frame preceding a `debug_sync` frame in the call stack. This ensures we can
// properly detect a lock's construction and acquiral callsites, since the latter may contain
// multiple `debug_sync` frames.
#[cfg(feature = "backtrace")]
fn get_construction_location(backtrace: &Backtrace) -> (String, Option<u32>) {
// Find the first frame that is after `debug_sync` (or that is in our tests) and use
// that as the mutex construction site. Note that the first few frames may be in
// the `backtrace` crate, so we have to ignore those.
fn locate_call_symbol(backtrace: &Backtrace) -> (String, Option<u32>) {
// Find the earliest `debug_sync` frame (or that is in our tests) and use the frame preceding it
// that as the callsite. Note that the first few frames may be in the `backtrace` crate, so we
// have to ignore those.
let sync_mutex_constr_regex = regex::Regex::new(r"lightning.*debug_sync").unwrap();
let mut found_debug_sync = false;
for frame in backtrace.frames() {
for symbol in frame.symbols() {
let symbol_name = symbol.name().unwrap().as_str().unwrap();
if !sync_mutex_constr_regex.is_match(symbol_name) {
if found_debug_sync {
return (format!("{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap()), symbol.colno());
}
} else { found_debug_sync = true; }
let mut symbol_after_latest_debug_sync = None;
for frame in backtrace.frames().iter() {
for symbol in frame.symbols().iter() {
if let Some(symbol_name) = symbol.name().map(|name| name.as_str()).flatten() {
if !sync_mutex_constr_regex.is_match(symbol_name) {
if found_debug_sync {
symbol_after_latest_debug_sync = Some(symbol);
found_debug_sync = false;
}
} else { found_debug_sync = true; }
}
}
}
panic!("Couldn't find mutex construction callsite");
let symbol = symbol_after_latest_debug_sync.expect("Couldn't find lock call symbol");
(format!("{}:{}", symbol.filename().unwrap().display(), symbol.lineno().unwrap()), symbol.colno())
}

impl LockMetadata {
Expand All @@ -108,13 +115,13 @@ impl LockMetadata {
#[cfg(feature = "backtrace")]
{
let (lock_constr_location, lock_constr_colno) =
get_construction_location(&res._lock_construction_bt);
locate_call_symbol(&res._lock_construction_bt);
LOCKS_INIT.call_once(|| { unsafe { LOCKS = Some(StdMutex::new(HashMap::new())); } });
let mut locks = unsafe { LOCKS.as_ref() }.unwrap().lock().unwrap();
match locks.entry(lock_constr_location) {
hash_map::Entry::Occupied(e) => {
assert_eq!(lock_constr_colno,
get_construction_location(&e.get()._lock_construction_bt).1,
locate_call_symbol(&e.get()._lock_construction_bt).1,
"Because Windows doesn't support column number results in backtraces, we cannot construct two mutexes on the same line or we risk lockorder detection false positives.");
return Arc::clone(e.get())
},
Expand All @@ -138,23 +145,33 @@ impl LockMetadata {
#[cfg(feature = "backtrace")]
debug_assert!(_double_lock_self_allowed,
"Tried to acquire a lock while it was held!\nLock constructed at {}",
get_construction_location(&this._lock_construction_bt).0);
locate_call_symbol(&this._lock_construction_bt).0);
#[cfg(not(feature = "backtrace"))]
panic!("Tried to acquire a lock while it was held!");
}
}
for (_locked_idx, locked) in held.borrow().iter() {
for (locked_dep_idx, _locked_dep) in locked.locked_before.lock().unwrap().iter() {
if *locked_dep_idx == this.lock_idx && *locked_dep_idx != locked.lock_idx {
#[cfg(feature = "backtrace")]
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock being taken constructed at: {} ({}):\n{:?}\nLock constructed at: {} ({})\n{:?}\n\nLock dep created at:\n{:?}\n\n",
get_construction_location(&this._lock_construction_bt).0,
this.lock_idx, this._lock_construction_bt,
get_construction_location(&locked._lock_construction_bt).0,
locked.lock_idx, locked._lock_construction_bt,
_locked_dep._lockdep_trace);
#[cfg(not(feature = "backtrace"))]
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
let is_dep_this_lock = *locked_dep_idx == this.lock_idx;
let has_same_construction = *locked_dep_idx == locked.lock_idx;
if is_dep_this_lock && !has_same_construction {
#[allow(unused_mut, unused_assignments)]
let mut has_same_callsite = false;
#[cfg(feature = "backtrace")] {
has_same_callsite = locate_call_symbol(&_locked_dep._lockdep_trace) ==
locate_call_symbol(&Backtrace::new());
}
if !has_same_callsite {
#[cfg(feature = "backtrace")]
panic!("Tried to violate existing lockorder.\nMutex that should be locked after the current lock was created at the following backtrace.\nNote that to get a backtrace for the lockorder violation, you should set RUST_BACKTRACE=1\nLock being taken constructed at: {} ({}):\n{:?}\nLock constructed at: {} ({})\n{:?}\n\nLock dep created at:\n{:?}\n\n",
locate_call_symbol(&this._lock_construction_bt).0,
this.lock_idx, this._lock_construction_bt,
locate_call_symbol(&locked._lock_construction_bt).0,
locked.lock_idx, locked._lock_construction_bt,
_locked_dep._lockdep_trace);
#[cfg(not(feature = "backtrace"))]
panic!("Tried to violate existing lockorder. Build with the backtrace feature for more info.");
}
}
}
// Insert any already-held locks in our locked-before set.
Expand Down

0 comments on commit 4b79c1c

Please sign in to comment.