Skip to content

Commit

Permalink
Merge pull request #3203 from autonomys/rayon-panic-message
Browse files Browse the repository at this point in the history
Temporarily log panic messages inside rayon CUDA plotter
  • Loading branch information
teor2345 authored Nov 5, 2024
2 parents b9fe73e + badbfb4 commit 6754e05
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
22 changes: 20 additions & 2 deletions crates/subspace-farmer/src/plotter/gpu/cuda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
use crate::plotter::gpu::GpuRecordsEncoder;
use async_lock::Mutex as AsyncMutex;
use parking_lot::Mutex;
use rayon::{ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder};
use rayon::{current_thread_index, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder};
use std::process::exit;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use subspace_core_primitives::pieces::{PieceOffset, Record};
Expand Down Expand Up @@ -93,8 +94,25 @@ impl CudaRecordsEncoder {
global_mutex: Arc<AsyncMutex<()>>,
) -> Result<Self, ThreadPoolBuildError> {
let id = cuda_device.id();
let thread_name = move |thread_index| format!("cuda-{id}.{thread_index}");
// TODO: remove this panic handler when rayon logs panic_info
// https://github.com/rayon-rs/rayon/issues/1208
let panic_handler = move |panic_info| {
if let Some(index) = current_thread_index() {
eprintln!("panic on thread {}: {:?}", thread_name(index), panic_info);
} else {
// We want to guarantee exit, rather than panicking in a panic handler.
eprintln!(
"rayon panic handler called on non-rayon thread: {:?}",
panic_info
);
}
exit(1);
};

let thread_pool = ThreadPoolBuilder::new()
.thread_name(move |thread_index| format!("cuda-{id}.{thread_index}"))
.thread_name(thread_name)
.panic_handler(panic_handler)
// Make sure there is overlap between records, so GPU is almost always busy
.num_threads(2)
.build()?;
Expand Down
28 changes: 24 additions & 4 deletions crates/subspace-farmer/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ use crate::thread_pool_manager::{PlottingThreadPoolManager, PlottingThreadPoolPa
use futures::channel::oneshot;
use futures::channel::oneshot::Canceled;
use futures::future::Either;
use rayon::{ThreadBuilder, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder};
use rayon::{
current_thread_index, ThreadBuilder, ThreadPool, ThreadPoolBuildError, ThreadPoolBuilder,
};
use std::future::Future;
use std::num::NonZeroUsize;
use std::ops::Deref;
use std::pin::{pin, Pin};
use std::process::exit;
use std::task::{Context, Poll};
use std::{fmt, io, iter, thread};
use thread_priority::{set_current_thread_priority, ThreadPriority};
Expand Down Expand Up @@ -488,11 +491,28 @@ fn create_plotting_thread_pool_manager_thread_pool_pair(
cpu_core_set: CpuCoreSet,
thread_priority: Option<ThreadPriority>,
) -> Result<ThreadPool, ThreadPoolBuildError> {
let thread_name =
move |thread_index| format!("{thread_prefix}-{thread_pool_index}.{thread_index}");
// TODO: remove this panic handler when rayon logs panic_info
// https://github.com/rayon-rs/rayon/issues/1208
// (we'll lose the thread name, because it's not stored within rayon's WorkerThread)
let panic_handler = move |panic_info| {
if let Some(index) = current_thread_index() {
eprintln!("panic on thread {}: {:?}", thread_name(index), panic_info);
} else {
// We want to guarantee exit, rather than panicking in a panic handler.
eprintln!(
"rayon panic handler called on non-rayon thread: {:?}",
panic_info
);
}
exit(1);
};

ThreadPoolBuilder::new()
.thread_name(move |thread_index| {
format!("{thread_prefix}-{thread_pool_index}.{thread_index}")
})
.thread_name(thread_name)
.num_threads(cpu_core_set.cpu_cores().len())
.panic_handler(panic_handler)
.spawn_handler({
let handle = Handle::current();

Expand Down

0 comments on commit 6754e05

Please sign in to comment.