From 4c38a18f3c86f5e6ed9ed292c02633f77db1a259 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 9 May 2023 14:37:12 -0500 Subject: [PATCH] Refactor Wasmtime's profiling support (#6361) * Reorganize profiling-related code This commit is a bit of reorganization around profiling-related code in Wasmtime with the aim of eventually simplifying it a bit more. The changes here are: * All exposed agents are removed and instead only constructor functions returning trait objects are now exposed. * All `*_disabled.rs` files and modules are removed in favor of a function that returns a result (less trait impls). * All `*_linux.rs` files where renamed to just `*.rs`. (less files in play) * The `pid` and `tid` arguments were removed as they were only used by the jitdump profiler and now that manages it internally. * Registering an entire ELF image is now part of the trait rather than buried within the trampoline code in Wasmtime. * Remove DWARF support from jitdump In general Wasmtime's support for DWARF is not great so this is rarely used and at least in my experience this hasn't been necessary to get good information from perf. This commit removes the processing here which while probably useful is probably not necessary and otherwise makes the jidump agent a bit of an odd-one-out relative among the other agents. * Remove now no-longer-needed `dbg_image` argument * Only grab the jitdump lock once-per-module Refactor slightly to account for this. * Fill in the `tid` argument to jitump This has been the same as `self.pid` for quite some time but with `rustix` it's pretty easy to get access to the current thread id. * Merge module/trampoline registration for profilers Add a second argument to registration of an entire module for custom names to get functions named correctly, and otherwise profilers now only need to look at individual functions. * Fixup vtune support * Delete no-longer-needed accessors Closes #6328 * Remove unused import * Fix a portability issue with u32-vs-i32 --- Cargo.lock | 1 + crates/jit/Cargo.toml | 3 + crates/jit/src/instantiate.rs | 44 +- crates/jit/src/lib.rs | 3 +- crates/jit/src/profiling.rs | 116 +++-- crates/jit/src/profiling/jitdump.rs | 66 +++ crates/jit/src/profiling/jitdump_disabled.rs | 32 -- crates/jit/src/profiling/jitdump_linux.rs | 444 ------------------- crates/jit/src/profiling/perfmap.rs | 47 ++ crates/jit/src/profiling/perfmap_disabled.rs | 28 -- crates/jit/src/profiling/perfmap_linux.rs | 104 ----- crates/jit/src/profiling/vtune.rs | 95 +--- crates/jit/src/profiling/vtune_disabled.rs | 32 -- crates/wasmtime/src/config.rs | 10 +- crates/wasmtime/src/engine.rs | 2 +- crates/wasmtime/src/trampoline/func.rs | 42 +- 16 files changed, 224 insertions(+), 845 deletions(-) create mode 100644 crates/jit/src/profiling/jitdump.rs delete mode 100644 crates/jit/src/profiling/jitdump_disabled.rs delete mode 100644 crates/jit/src/profiling/jitdump_linux.rs create mode 100644 crates/jit/src/profiling/perfmap.rs delete mode 100644 crates/jit/src/profiling/perfmap_disabled.rs delete mode 100644 crates/jit/src/profiling/perfmap_linux.rs delete mode 100644 crates/jit/src/profiling/vtune_disabled.rs diff --git a/Cargo.lock b/Cargo.lock index ef4f6472bd94..fd0449d75e1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4067,6 +4067,7 @@ dependencies = [ "log", "object", "rustc-demangle", + "rustix", "serde", "target-lexicon", "wasmtime-environ", diff --git a/crates/jit/Cargo.toml b/crates/jit/Cargo.toml index b795d0b32e78..90a11f87e55a 100644 --- a/crates/jit/Cargo.toml +++ b/crates/jit/Cargo.toml @@ -27,6 +27,9 @@ cpp_demangle = "0.3.2" log = { workspace = true } wasmtime-jit-icache-coherence = { workspace = true } +[target.'cfg(target_os = "linux")'.dependencies] +rustix = { workspace = true, features = ['thread'] } + [target.'cfg(target_os = "windows")'.dependencies.windows-sys] workspace = true features = [ diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index b5f0daf82bf9..c108f9b97f8d 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -5,7 +5,7 @@ use crate::code_memory::CodeMemory; use crate::debug::create_gdbjit_image; -use crate::ProfilingAgent; +use crate::profiling::ProfilingAgent; use anyhow::{bail, Context, Error, Result}; use object::write::{Object, SectionId, StandardSegment, WritableBuffer}; use object::SectionKind; @@ -471,17 +471,21 @@ impl CompiledModule { } fn register_debug_and_profiling(&mut self, profiler: &dyn ProfilingAgent) -> Result<()> { - // Register GDB JIT images; initialize profiler and load the wasm module. if self.meta.native_debug_info_present { let text = self.text(); let bytes = create_gdbjit_image(self.mmap().to_vec(), (text.as_ptr(), text.len())) .context("failed to create jit image for gdb")?; - profiler.module_load(self, Some(&bytes)); let reg = GdbJitImageRegistration::register(bytes); self.dbg_jit_registration = Some(reg); - } else { - profiler.module_load(self, None); } + profiler.register_module(&self.code_memory, &|addr| { + let (idx, _) = self.func_by_text_offset(addr)?; + let idx = self.module.func_index(idx); + let name = self.func_name(idx)?; + let mut demangled = String::new(); + crate::demangling::demangle_function_name(&mut demangled, name).unwrap(); + Some(demangled) + }); Ok(()) } @@ -564,16 +568,6 @@ impl CompiledModule { Some(&self.text()[loc.start as usize..][..loc.length as usize]) } - /// Returns an iterator over all array-to-Wasm trampolines defined within - /// this module, providing both their index and their in-memory body. - pub fn array_to_wasm_trampolines( - &self, - ) -> impl ExactSizeIterator + '_ { - self.funcs - .keys() - .map(move |i| (i, self.array_to_wasm_trampoline(i).unwrap())) - } - /// Get the native-to-Wasm trampoline for the function `index` points to. /// /// If the function `index` points to does not escape, then `None` is @@ -586,16 +580,6 @@ impl CompiledModule { Some(&self.text()[loc.start as usize..][..loc.length as usize]) } - /// Returns an iterator over all native-to-Wasm trampolines defined within - /// this module, providing both their index and their in-memory body. - pub fn native_to_wasm_trampolines( - &self, - ) -> impl ExactSizeIterator + '_ { - self.funcs - .keys() - .map(move |i| (i, self.native_to_wasm_trampoline(i).unwrap())) - } - /// Get the Wasm-to-native trampoline for the given signature. /// /// These trampolines are used for filling in @@ -610,16 +594,6 @@ impl CompiledModule { &self.text()[loc.start as usize..][..loc.length as usize] } - /// Returns an iterator over all native-to-Wasm trampolines defined within - /// this module, providing both their index and their in-memory body. - pub fn wasm_to_native_trampolines( - &self, - ) -> impl ExactSizeIterator + '_ { - self.wasm_to_native_trampolines - .iter() - .map(move |(i, _)| (*i, self.wasm_to_native_trampoline(*i))) - } - /// Returns the stack map information for all functions defined in this /// module. /// diff --git a/crates/jit/src/lib.rs b/crates/jit/src/lib.rs index 11f48d7ff0b6..cd0284e460e0 100644 --- a/crates/jit/src/lib.rs +++ b/crates/jit/src/lib.rs @@ -24,7 +24,7 @@ mod code_memory; mod debug; mod demangling; mod instantiate; -mod profiling; +pub mod profiling; mod unwind; pub use crate::code_memory::CodeMemory; @@ -33,7 +33,6 @@ pub use crate::instantiate::{ SymbolizeContext, }; pub use demangling::*; -pub use profiling::*; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/crates/jit/src/profiling.rs b/crates/jit/src/profiling.rs index e158a58c9299..22f53f521fbe 100644 --- a/crates/jit/src/profiling.rs +++ b/crates/jit/src/profiling.rs @@ -1,23 +1,32 @@ -use crate::{demangling::demangle_function_name_or_index, CompiledModule}; -use wasmtime_environ::{DefinedFuncIndex, EntityRef}; +#![allow(missing_docs)] + +use crate::CodeMemory; +#[allow(unused_imports)] +use anyhow::{bail, Result}; cfg_if::cfg_if! { if #[cfg(all(feature = "jitdump", target_os = "linux"))] { - #[path = "profiling/jitdump_linux.rs"] mod jitdump; + pub use jitdump::new as new_jitdump; } else { - #[path = "profiling/jitdump_disabled.rs"] - mod jitdump; + pub fn new_jitdump() -> Result> { + if cfg!(feature = "jitdump") { + bail!("jitdump is not supported on this platform"); + } else { + bail!("jitdump support disabled at compile time"); + } + } } } cfg_if::cfg_if! { if #[cfg(target_os = "linux")] { - #[path = "profiling/perfmap_linux.rs"] mod perfmap; + pub use perfmap::new as new_perfmap; } else { - #[path = "profiling/perfmap_disabled.rs"] - mod perfmap; + pub fn new_perfmap() -> Result> { + bail!("perfmap support not supported on this platform"); + } } } @@ -25,50 +34,75 @@ cfg_if::cfg_if! { // Note: VTune support is disabled on windows mingw because the ittapi crate doesn't compile // there; see also https://github.com/bytecodealliance/wasmtime/pull/4003 for rationale. if #[cfg(all(feature = "vtune", target_arch = "x86_64", not(all(target_os = "windows", target_env = "gnu"))))] { - #[path = "profiling/vtune.rs"] mod vtune; + pub use vtune::new as new_vtune; } else { - #[path = "profiling/vtune_disabled.rs"] - mod vtune; + pub fn new_vtune() -> Result> { + if cfg!(feature = "vtune") { + bail!("VTune is not supported on this platform."); + } else { + bail!("VTune support disabled at compile time."); + } + } } } -pub use jitdump::JitDumpAgent; -pub use perfmap::PerfMapAgent; -pub use vtune::VTuneAgent; - /// Common interface for profiling tools. pub trait ProfilingAgent: Send + Sync + 'static { - /// Notify the profiler of a new module loaded into memory - fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>); + fn register_function(&self, name: &str, addr: *const u8, size: usize); - /// Notify the profiler about a single dynamically-generated trampoline (for host function) - /// that is being loaded now.` - fn load_single_trampoline(&self, name: &str, addr: *const u8, size: usize, pid: u32, tid: u32); -} + fn register_module(&self, code: &CodeMemory, custom_name: &dyn Fn(usize) -> Option) { + use object::{File, Object as _, ObjectSection, ObjectSymbol, SectionKind, SymbolKind}; -/// Default agent for unsupported profiling build. -#[derive(Debug, Default, Clone, Copy)] -pub struct NullProfilerAgent; + let image = match File::parse(&code.mmap()[..]) { + Ok(image) => image, + Err(_) => return, + }; -impl ProfilingAgent for NullProfilerAgent { - fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} - fn load_single_trampoline( - &self, - _name: &str, - _addr: *const u8, - _size: usize, - _pid: u32, - _tid: u32, - ) { + let text_base = match image.sections().find(|s| s.kind() == SectionKind::Text) { + Some(section) => match section.data() { + Ok(data) => data.as_ptr() as usize, + Err(_) => return, + }, + None => return, + }; + + for sym in image.symbols() { + if !sym.is_definition() { + continue; + } + if sym.kind() != SymbolKind::Text { + continue; + } + let address = sym.address(); + let size = sym.size(); + if address == 0 || size == 0 { + continue; + } + if let Ok(name) = sym.name() { + let addr = text_base + address as usize; + let owned; + let name = match custom_name(address as usize) { + Some(name) => { + owned = name; + &owned + } + None => name, + }; + self.register_function(name, addr as *const u8, size as usize); + } + } } } -#[allow(dead_code)] -fn debug_name(module: &CompiledModule, index: DefinedFuncIndex) -> String { - let index = module.module().func_index(index); - let mut debug_name = String::new(); - demangle_function_name_or_index(&mut debug_name, module.func_name(index), index.index()) - .unwrap(); - debug_name +pub fn new_null() -> Box { + Box::new(NullProfilerAgent) +} + +#[derive(Debug, Default, Clone, Copy)] +struct NullProfilerAgent; + +impl ProfilingAgent for NullProfilerAgent { + fn register_function(&self, _name: &str, _addr: *const u8, _size: usize) {} + fn register_module(&self, _code: &CodeMemory, _custom_name: &dyn Fn(usize) -> Option) {} } diff --git a/crates/jit/src/profiling/jitdump.rs b/crates/jit/src/profiling/jitdump.rs new file mode 100644 index 000000000000..f91191761993 --- /dev/null +++ b/crates/jit/src/profiling/jitdump.rs @@ -0,0 +1,66 @@ +//! Support for jitdump files which can be used by perf for profiling jitted code. +//! Spec definitions for the output format is as described here: +//! +//! +//! Usage Example: +//! Record +//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm +//! Combine +//! sudo perf inject -v -j -i perf.data -o perf.jit.data +//! Report +//! sudo perf report -i perf.jit.data -F+period,srcline +//! Note: For descriptive results, the WASM file being executed should contain dwarf debug data + +use crate::profiling::ProfilingAgent; +use anyhow::Result; +use std::process; +use std::sync::Mutex; +use target_lexicon::Architecture; +use wasmtime_jit_debug::perf_jitdump::*; + +use object::elf; + +/// Interface for driving the creation of jitdump files +struct JitDumpAgent { + pid: u32, +} + +/// Process-wide JIT dump file. Perf only accepts a unique file per process, in the injection step. +static JITDUMP_FILE: Mutex> = Mutex::new(None); + +/// Intialize a JitDumpAgent and write out the header. +pub fn new() -> Result> { + let mut jitdump_file = JITDUMP_FILE.lock().unwrap(); + + if jitdump_file.is_none() { + let filename = format!("./jit-{}.dump", process::id()); + let e_machine = match target_lexicon::HOST.architecture { + Architecture::X86_64 => elf::EM_X86_64 as u32, + Architecture::X86_32(_) => elf::EM_386 as u32, + Architecture::Arm(_) => elf::EM_ARM as u32, + Architecture::Aarch64(_) => elf::EM_AARCH64 as u32, + Architecture::S390x => elf::EM_S390 as u32, + _ => unimplemented!("unrecognized architecture"), + }; + *jitdump_file = Some(JitDumpFile::new(filename, e_machine)?); + } + + Ok(Box::new(JitDumpAgent { + pid: std::process::id(), + })) +} + +impl ProfilingAgent for JitDumpAgent { + fn register_function(&self, name: &str, addr: *const u8, size: usize) { + let mut jitdump_file = JITDUMP_FILE.lock().unwrap(); + let jitdump_file = jitdump_file.as_mut().unwrap(); + let timestamp = jitdump_file.get_time_stamp(); + #[allow(trivial_numeric_casts)] + let tid = rustix::thread::gettid().as_raw_nonzero().get() as u32; + if let Err(err) = + jitdump_file.dump_code_load_record(&name, addr, size, timestamp, self.pid, tid) + { + println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); + } + } +} diff --git a/crates/jit/src/profiling/jitdump_disabled.rs b/crates/jit/src/profiling/jitdump_disabled.rs deleted file mode 100644 index 16dd501640c8..000000000000 --- a/crates/jit/src/profiling/jitdump_disabled.rs +++ /dev/null @@ -1,32 +0,0 @@ -use crate::{CompiledModule, ProfilingAgent}; -use anyhow::{bail, Result}; - -/// Interface for driving the creation of jitdump files -#[derive(Debug)] -pub struct JitDumpAgent { - _private: (), -} - -impl JitDumpAgent { - /// Intialize a dummy JitDumpAgent that will fail upon instantiation. - pub fn new() -> Result { - if cfg!(feature = "jitdump") { - bail!("jitdump is not supported on this platform"); - } else { - bail!("jitdump support disabled at compile time"); - } - } -} - -impl ProfilingAgent for JitDumpAgent { - fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} - fn load_single_trampoline( - &self, - _name: &str, - _addr: *const u8, - _size: usize, - __pid: u32, - _tid: u32, - ) { - } -} diff --git a/crates/jit/src/profiling/jitdump_linux.rs b/crates/jit/src/profiling/jitdump_linux.rs deleted file mode 100644 index cce66016c2d9..000000000000 --- a/crates/jit/src/profiling/jitdump_linux.rs +++ /dev/null @@ -1,444 +0,0 @@ -//! Support for jitdump files which can be used by perf for profiling jitted code. -//! Spec definitions for the output format is as described here: -//! -//! -//! Usage Example: -//! Record -//! sudo perf record -k 1 -e instructions:u target/debug/wasmtime -g --profile=jitdump test.wasm -//! Combine -//! sudo perf inject -v -j -i perf.data -o perf.jit.data -//! Report -//! sudo perf report -i perf.jit.data -F+period,srcline -//! Note: For descriptive results, the WASM file being executed should contain dwarf debug data - -use crate::{CompiledModule, ProfilingAgent}; -use anyhow::Result; -use object::{Object, ObjectSection}; -use std::sync::Mutex; -use std::{borrow, mem, process}; -use target_lexicon::Architecture; -use wasmtime_environ::EntityRef; -use wasmtime_jit_debug::perf_jitdump::*; - -use object::elf; - -/// Interface for driving the creation of jitdump files -pub struct JitDumpAgent { - /// Flag for experimenting with dumping code load record - /// after each function (true) or after each module. This - /// flag is currently set to true. - dump_funcs: bool, -} - -/// Process-wide JIT dump file. Perf only accepts a unique file per process, in the injection step. -static JITDUMP_FILE: Mutex> = Mutex::new(None); - -impl JitDumpAgent { - /// Intialize a JitDumpAgent and write out the header. - pub fn new() -> Result { - let mut jitdump_file = JITDUMP_FILE.lock().unwrap(); - - if jitdump_file.is_none() { - let filename = format!("./jit-{}.dump", process::id()); - let e_machine = match target_lexicon::HOST.architecture { - Architecture::X86_64 => elf::EM_X86_64 as u32, - Architecture::X86_32(_) => elf::EM_386 as u32, - Architecture::Arm(_) => elf::EM_ARM as u32, - Architecture::Aarch64(_) => elf::EM_AARCH64 as u32, - Architecture::S390x => elf::EM_S390 as u32, - _ => unimplemented!("unrecognized architecture"), - }; - *jitdump_file = Some(JitDumpFile::new(filename, e_machine)?); - } - - Ok(JitDumpAgent { dump_funcs: true }) - } -} - -impl ProfilingAgent for JitDumpAgent { - /// Sent when a method is compiled and loaded into memory by the VM. - fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { - let mut jitdump_file = JITDUMP_FILE.lock().unwrap(); - let jitdump_file = jitdump_file.as_mut().unwrap(); - - let pid = process::id(); - let tid = pid; // ThreadId does appear to track underlying thread. Using PID. - - for (idx, func) in module.finished_functions() { - let addr = func.as_ptr(); - let len = func.len(); - if let Some(img) = &dbg_image { - if let Err(err) = - self.dump_from_debug_image(jitdump_file, img, "wasm", addr, len, pid, tid) - { - println!( - "Jitdump: module_load failed dumping from debug image: {:?}\n", - err - ); - } - } else { - let timestamp = jitdump_file.get_time_stamp(); - let name = super::debug_name(module, idx); - if let Err(err) = - jitdump_file.dump_code_load_record(&name, addr, len, timestamp, pid, tid) - { - println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); - } - } - } - - // Note: these are the trampolines into exported functions. - for (name, body) in module - .array_to_wasm_trampolines() - .map(|(i, body)| { - ( - format!("wasm::array_to_wasm_trampoline[{}]", i.index()), - body, - ) - }) - .chain(module.native_to_wasm_trampolines().map(|(i, body)| { - ( - format!("wasm::native_to_wasm_trampoline[{}]", i.index()), - body, - ) - })) - .chain(module.wasm_to_native_trampolines().map(|(i, body)| { - ( - format!("wasm::wasm_to_native_trampolines[{}]", i.index()), - body, - ) - })) - { - let addr = body.as_ptr(); - let len = body.len(); - let timestamp = jitdump_file.get_time_stamp(); - if let Err(err) = - jitdump_file.dump_code_load_record(&name, addr, len, timestamp, pid, tid) - { - println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); - } - } - } - - fn load_single_trampoline(&self, name: &str, addr: *const u8, size: usize, pid: u32, tid: u32) { - let mut jitdump_file = JITDUMP_FILE.lock().unwrap(); - let jitdump_file = jitdump_file.as_mut().unwrap(); - - let timestamp = jitdump_file.get_time_stamp(); - if let Err(err) = jitdump_file.dump_code_load_record(&name, addr, size, timestamp, pid, tid) - { - println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); - } - } -} - -impl JitDumpAgent { - /// Attempts to dump debuginfo data structures, adding method and line level - /// for the jitted function. - pub fn dump_from_debug_image( - &self, - jitdump_file: &mut JitDumpFile, - dbg_image: &[u8], - module_name: &str, - addr: *const u8, - len: usize, - pid: u32, - tid: u32, - ) -> Result<()> { - let file = object::File::parse(dbg_image).unwrap(); - let endian = if file.is_little_endian() { - gimli::RunTimeEndian::Little - } else { - gimli::RunTimeEndian::Big - }; - - let load_section = |id: gimli::SectionId| -> Result> { - if let Some(section) = file.section_by_name(id.name()) { - Ok(section.data()?.into()) - } else { - Ok((&[] as &[u8]).into()) - } - }; - - let dwarf_cow = gimli::Dwarf::load(&load_section)?; - let borrow_section: &dyn for<'a> Fn( - &'a borrow::Cow<[u8]>, - ) - -> gimli::EndianSlice<'a, gimli::RunTimeEndian> = - &|section| gimli::EndianSlice::new(&*section, endian); - - let dwarf = dwarf_cow.borrow(&borrow_section); - - let mut iter = dwarf.units(); - while let Some(header) = iter.next()? { - let unit = match dwarf.unit(header) { - Ok(unit) => unit, - Err(_err) => { - return Ok(()); - } - }; - self.dump_entries(jitdump_file, unit, &dwarf, module_name, addr, len, pid, tid)?; - // TODO: Temp exit to avoid duplicate addresses being covered by only - // processing the top unit - break; - } - if !self.dump_funcs { - let timestamp = jitdump_file.get_time_stamp(); - if let Err(err) = - jitdump_file.dump_code_load_record(module_name, addr, len, timestamp, pid, tid) - { - println!("Jitdump: write_code_load_failed_record failed: {:?}\n", err); - } - } - Ok(()) - } - - fn dump_entries( - &self, - jitdump_file: &mut JitDumpFile, - unit: gimli::Unit, - dwarf: &gimli::Dwarf, - module_name: &str, - addr: *const u8, - len: usize, - pid: u32, - tid: u32, - ) -> Result<()> { - let mut depth = 0; - let mut entries = unit.entries(); - while let Some((delta_depth, entry)) = entries.next_dfs()? { - if self.dump_funcs { - let record_header = RecordHeader { - id: RecordId::JitCodeLoad as u32, - record_size: 0, - timestamp: 0, - }; - - let mut clr = CodeLoadRecord { - header: record_header, - pid, - tid, - virtual_address: 0, - address: 0, - size: 0, - index: 0, - }; - let mut clr_name: String = String::from(module_name); - let mut get_debug_entry = false; - depth += delta_depth; - assert!(depth >= 0); - - if entry.tag() == gimli::constants::DW_TAG_subprogram { - get_debug_entry = true; - - let mut attrs = entry.attrs(); - while let Some(attr) = attrs.next()? { - if let Some(n) = attr.name().static_string() { - if n == "DW_AT_low_pc" { - clr.address = match attr.value() { - gimli::AttributeValue::Addr(address) => address, - _ => 0, - }; - clr.virtual_address = clr.address; - } else if n == "DW_AT_high_pc" { - clr.size = match attr.value() { - gimli::AttributeValue::Udata(data) => data, - _ => 0, - }; - } else if n == "DW_AT_name" { - clr_name = match attr.value() { - gimli::AttributeValue::DebugStrRef(offset) => { - if let Ok(s) = dwarf.debug_str.get_str(offset) { - clr_name.push_str("::"); - clr_name.push_str(&s.to_string_lossy()?); - clr_name - } else { - clr_name.push_str("::"); - clr_name.push_str("?"); - clr_name - } - } - _ => { - clr_name.push_str("??"); - clr_name - } - }; - } - } - } - } - if get_debug_entry { - // TODO: Temp check to make sure well only formed data is processed. - if clr.address == 0 { - continue; - } - // TODO: Temp check to make sure well only formed data is processed. - if clr_name == "?" { - continue; - } - if clr.address == 0 || clr.size == 0 { - clr.address = addr as u64; - clr.virtual_address = addr as u64; - clr.size = len as u64; - } - clr.header.record_size = mem::size_of::() as u32 - + (clr_name.len() + 1) as u32 - + clr.size as u32; - clr.index = jitdump_file.next_code_index(); - self.dump_debug_info(jitdump_file, &unit, &dwarf, clr.address, clr.size, None)?; - - clr.header.timestamp = jitdump_file.get_time_stamp(); - - unsafe { - let code_buffer: &[u8] = - std::slice::from_raw_parts(clr.address as *const u8, clr.size as usize); - let _ = jitdump_file.write_code_load_record(&clr_name, clr, code_buffer); - } - } - } else { - let mut func_name: String = String::from("?"); - let mut func_addr = 0; - let mut func_size = 0; - - let mut get_debug_entry = false; - depth += delta_depth; - assert!(depth >= 0); - if entry.tag() == gimli::constants::DW_TAG_subprogram { - get_debug_entry = true; - - let mut attrs = entry.attrs(); - while let Some(attr) = attrs.next()? { - if let Some(n) = attr.name().static_string() { - if n == "DW_AT_low_pc" { - func_addr = match attr.value() { - gimli::AttributeValue::Addr(address) => address, - _ => 0, - }; - } else if n == "DW_AT_high_pc" { - func_size = match attr.value() { - gimli::AttributeValue::Udata(data) => data, - _ => 0, - }; - } else if n == "DW_AT_name" { - func_name = match attr.value() { - gimli::AttributeValue::DebugStrRef(offset) => { - if let Ok(s) = dwarf.debug_str.get_str(offset) { - func_name.clear(); - func_name.push_str(&s.to_string_lossy()?); - func_name - } else { - func_name.push_str("?"); - func_name - } - } - _ => { - func_name.push_str("??"); - func_name - } - }; - } - } - } - } - if get_debug_entry { - // TODO: Temp check to make sure well only formed data is processed. - if func_addr == 0 { - continue; - } - // TODO: Temp check to make sure well only formed data is processed. - if func_name == "?" { - continue; - } - self.dump_debug_info( - jitdump_file, - &unit, - &dwarf, - func_addr, - func_size, - Some(func_name.as_str()), - )?; - } - } - } - Ok(()) - } - - fn dump_debug_info( - &self, - jitdump_file: &mut JitDumpFile, - unit: &gimli::Unit, - dwarf: &gimli::Dwarf, - address: u64, - size: u64, - file_suffix: Option<&str>, - ) -> Result<()> { - let timestamp = jitdump_file.get_time_stamp(); - if let Some(program) = unit.line_program.clone() { - let mut debug_info_record = DebugInfoRecord { - header: RecordHeader { - id: RecordId::JitCodeDebugInfo as u32, - record_size: 0, - timestamp, - }, - address, - count: 0, - }; - - let mut debug_entries = Vec::new(); - let mut debug_entries_total_filenames_len = 0; - let mut rows = program.rows(); - while let Some((header, row)) = rows.next_row()? { - let row_file_index = row.file_index() - 1; - let myfile = dwarf - .attr_string( - &unit, - header.file_names()[row_file_index as usize].path_name(), - ) - .unwrap(); - let filename = myfile.to_string_lossy()?; - let line = row.line().map(|nonzero| nonzero.get()).unwrap_or(0); - let column = match row.column() { - gimli::ColumnType::Column(column) => column.get(), - gimli::ColumnType::LeftEdge => 0, - }; - - if (row.address() < address) || (row.address() > (address + size)) { - continue; - } - let mut debug_entry = DebugEntry { - address: row.address(), - line: line as u32, - discriminator: column as u32, - filename: filename.to_string(), - }; - - if let Some(suffix) = file_suffix { - debug_entry.filename.push_str("::"); - debug_entry.filename.push_str(suffix); - } - - debug_entries_total_filenames_len += debug_entry.filename.len() + 1; - debug_entries.push(debug_entry); - } - - debug_info_record.count = debug_entries.len() as u64; - - let debug_entries_size = (debug_info_record.count - * (mem::size_of::() as u64 - mem::size_of::() as u64)) - + debug_entries_total_filenames_len as u64; - debug_info_record.header.record_size = - mem::size_of::() as u32 + debug_entries_size as u32; - - let _ = jitdump_file.write_debug_info_record(debug_info_record); - let _ = jitdump_file.write_debug_info_entries(debug_entries); - } - Ok(()) - } -} - -trait Reader: gimli::Reader + Send + Sync {} - -impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where - Endian: gimli::Endianity + Send + Sync -{ -} diff --git a/crates/jit/src/profiling/perfmap.rs b/crates/jit/src/profiling/perfmap.rs new file mode 100644 index 000000000000..3b9e0134de51 --- /dev/null +++ b/crates/jit/src/profiling/perfmap.rs @@ -0,0 +1,47 @@ +use crate::profiling::ProfilingAgent; +use anyhow::Result; +use std::io::{self, BufWriter, Write}; +use std::process; +use std::{fs::File, sync::Mutex}; + +/// Process-wide perf map file. Perf only reads a unique file per process. +static PERFMAP_FILE: Mutex>> = Mutex::new(None); + +/// Interface for driving the creation of jitdump files +struct PerfMapAgent; + +/// Intialize a JitDumpAgent and write out the header. +pub fn new() -> Result> { + let mut file = PERFMAP_FILE.lock().unwrap(); + if file.is_none() { + let filename = format!("/tmp/perf-{}.map", process::id()); + *file = Some(BufWriter::new(File::create(filename)?)); + } + Ok(Box::new(PerfMapAgent)) +} + +impl PerfMapAgent { + fn make_line( + writer: &mut dyn Write, + name: &str, + addr: *const u8, + len: usize, + ) -> io::Result<()> { + // Format is documented here: https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt + // Try our best to sanitize the name, since wasm allows for any utf8 string in there. + let sanitized_name = name.replace('\n', "_").replace('\r', "_"); + write!(writer, "{:x} {:x} {}\n", addr as usize, len, sanitized_name)?; + writer.flush()?; + Ok(()) + } +} + +impl ProfilingAgent for PerfMapAgent { + fn register_function(&self, name: &str, addr: *const u8, size: usize) { + let mut file = PERFMAP_FILE.lock().unwrap(); + let file = file.as_mut().unwrap(); + if let Err(err) = Self::make_line(file, name, addr, size) { + eprintln!("Error when writing import trampoline info to the perf map file: {err}"); + } + } +} diff --git a/crates/jit/src/profiling/perfmap_disabled.rs b/crates/jit/src/profiling/perfmap_disabled.rs deleted file mode 100644 index 07c50ca0ea04..000000000000 --- a/crates/jit/src/profiling/perfmap_disabled.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::{CompiledModule, ProfilingAgent}; -use anyhow::{bail, Result}; - -/// Interface for driving the creation of jitdump files -#[derive(Debug)] -pub struct PerfMapAgent { - _private: (), -} - -impl PerfMapAgent { - /// Intialize a dummy PerfMapAgent that will fail upon instantiation. - pub fn new() -> Result { - bail!("perfmap support not supported on this platform"); - } -} - -impl ProfilingAgent for PerfMapAgent { - fn module_load(&self, _module: &CompiledModule, _dbg_image: Option<&[u8]>) {} - fn load_single_trampoline( - &self, - _name: &str, - _addr: *const u8, - _size: usize, - __pid: u32, - _tid: u32, - ) { - } -} diff --git a/crates/jit/src/profiling/perfmap_linux.rs b/crates/jit/src/profiling/perfmap_linux.rs deleted file mode 100644 index b4b032574851..000000000000 --- a/crates/jit/src/profiling/perfmap_linux.rs +++ /dev/null @@ -1,104 +0,0 @@ -use crate::{CompiledModule, ProfilingAgent}; -use anyhow::Result; -use std::io::{self, BufWriter, Write}; -use std::process; -use std::{fs::File, sync::Mutex}; -use wasmtime_environ::EntityRef as _; - -/// Process-wide perf map file. Perf only reads a unique file per process. -static PERFMAP_FILE: Mutex> = Mutex::new(None); - -/// Interface for driving the creation of jitdump files -pub struct PerfMapAgent; - -impl PerfMapAgent { - /// Intialize a JitDumpAgent and write out the header. - pub fn new() -> Result { - let mut file = PERFMAP_FILE.lock().unwrap(); - if file.is_none() { - let filename = format!("/tmp/perf-{}.map", process::id()); - *file = Some(File::create(filename)?); - } - Ok(PerfMapAgent) - } - - fn make_line( - writer: &mut dyn Write, - name: &str, - addr: *const u8, - len: usize, - ) -> io::Result<()> { - // Format is documented here: https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt - // Try our best to sanitize the name, since wasm allows for any utf8 string in there. - let sanitized_name = name.replace('\n', "_").replace('\r', "_"); - write!(writer, "{:x} {:x} {}\n", addr as usize, len, sanitized_name)?; - Ok(()) - } -} - -impl ProfilingAgent for PerfMapAgent { - /// Sent when a method is compiled and loaded into memory by the VM. - fn module_load(&self, module: &CompiledModule, _dbg_image: Option<&[u8]>) { - let mut file = PERFMAP_FILE.lock().unwrap(); - let file = file.as_mut().unwrap(); - let mut file = BufWriter::new(file); - - for (idx, func) in module.finished_functions() { - let addr = func.as_ptr(); - let len = func.len(); - let name = super::debug_name(module, idx); - if let Err(err) = Self::make_line(&mut file, &name, addr, len) { - eprintln!("Error when writing function info to the perf map file: {err}"); - return; - } - } - - // Note: these are the trampolines into exported functions. - for (name, body) in module - .array_to_wasm_trampolines() - .map(|(i, body)| { - ( - format!("wasm::array_to_wasm_trampoline[{}]", i.index()), - body, - ) - }) - .chain(module.native_to_wasm_trampolines().map(|(i, body)| { - ( - format!("wasm::native_to_wasm_trampoline[{}]", i.index()), - body, - ) - })) - .chain(module.wasm_to_native_trampolines().map(|(i, body)| { - ( - format!("wasm::wasm_to_native_trampolines[{}]", i.index()), - body, - ) - })) - { - let (addr, len) = (body.as_ptr(), body.len()); - if let Err(err) = Self::make_line(&mut file, &name, addr, len) { - eprintln!("Error when writing export trampoline info to the perf map file: {err}"); - return; - } - } - - if let Err(err) = file.flush() { - eprintln!("Error when flushing the perf map file buffer: {err}"); - } - } - - fn load_single_trampoline( - &self, - name: &str, - addr: *const u8, - size: usize, - _pid: u32, - _tid: u32, - ) { - let mut file = PERFMAP_FILE.lock().unwrap(); - let file = file.as_mut().unwrap(); - if let Err(err) = Self::make_line(file, name, addr, size) { - eprintln!("Error when writing import trampoline info to the perf map file: {err}"); - } - } -} diff --git a/crates/jit/src/profiling/vtune.rs b/crates/jit/src/profiling/vtune.rs index ca9546ca989d..ca8bd1f1c1e2 100644 --- a/crates/jit/src/profiling/vtune.rs +++ b/crates/jit/src/profiling/vtune.rs @@ -12,14 +12,13 @@ //! installed](https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html#standalone) //! for this to work. -use crate::{CompiledModule, ProfilingAgent}; +use crate::profiling::ProfilingAgent; use anyhow::Result; use ittapi::jit::MethodLoadBuilder; -use std::sync::{atomic, Mutex}; -use wasmtime_environ::EntityRef; +use std::sync::Mutex; /// Interface for driving the ittapi for VTune support -pub struct VTuneAgent { +struct VTuneAgent { // Note that we use a mutex internally to serialize state updates since multiple threads may be // sharing this agent. state: Mutex, @@ -31,15 +30,13 @@ struct State { vtune: ittapi::jit::Jit, } -impl VTuneAgent { - /// Initialize a VTuneAgent. - pub fn new() -> Result { - Ok(VTuneAgent { - state: Mutex::new(State { - vtune: Default::default(), - }), - }) - } +/// Initialize a VTuneAgent. +pub fn new() -> Result> { + Ok(Box::new(VTuneAgent { + state: Mutex::new(State { + vtune: Default::default(), + }), + })) } impl Drop for VTuneAgent { @@ -68,80 +65,16 @@ impl State { } impl ProfilingAgent for VTuneAgent { - fn module_load(&self, module: &CompiledModule, dbg_image: Option<&[u8]>) { - self.state.lock().unwrap().module_load(module, dbg_image); - } - fn load_single_trampoline(&self, name: &str, addr: *const u8, size: usize, pid: u32, tid: u32) { + fn register_function(&self, name: &str, addr: *const u8, size: usize) { self.state .lock() .unwrap() - .load_single_trampoline(name, addr, size, pid, tid); + .register_function(name, addr, size); } } impl State { - fn module_load(&mut self, module: &CompiledModule, _dbg_image: Option<&[u8]>) { - // Global counter for module ids. - static MODULE_ID: atomic::AtomicUsize = atomic::AtomicUsize::new(0); - let global_module_id = MODULE_ID.fetch_add(1, atomic::Ordering::SeqCst); - - let module_name = module - .module() - .name - .as_ref() - .cloned() - .unwrap_or_else(|| format!("wasm_module_{}", global_module_id)); - - for (idx, func) in module.finished_functions() { - let addr = func.as_ptr(); - let len = func.len(); - let method_name = super::debug_name(module, idx); - log::trace!( - "new function {:?}::{:?} @ {:?}\n", - module_name, - method_name, - addr - ); - self.notify_code(&module_name, &method_name, addr, len); - } - - // Note: these are the trampolines into exported functions. - for (name, body) in module - .array_to_wasm_trampolines() - .map(|(i, body)| { - ( - format!("wasm::array_to_wasm_trampoline[{}]", i.index()), - body, - ) - }) - .chain(module.native_to_wasm_trampolines().map(|(i, body)| { - ( - format!("wasm::native_to_wasm_trampoline[{}]", i.index()), - body, - ) - })) - .chain(module.wasm_to_native_trampolines().map(|(i, body)| { - ( - format!("wasm::wasm_to_native_trampolines[{}]", i.index()), - body, - ) - })) - { - let addr = body.as_ptr(); - let len = body.len(); - log::trace!("new trampoline `{}` @ {:?}\n", name, addr); - self.notify_code(&module_name, &name, addr, len); - } - } - - fn load_single_trampoline( - &mut self, - name: &str, - addr: *const u8, - size: usize, - _pid: u32, - _tid: u32, - ) { - self.notify_code("wasm trampoline for Func::new", name, addr, size); + fn register_function(&mut self, name: &str, addr: *const u8, size: usize) { + self.notify_code("wasmtime", name, addr, size); } } diff --git a/crates/jit/src/profiling/vtune_disabled.rs b/crates/jit/src/profiling/vtune_disabled.rs deleted file mode 100644 index 34c68712761d..000000000000 --- a/crates/jit/src/profiling/vtune_disabled.rs +++ /dev/null @@ -1,32 +0,0 @@ -use crate::ProfilingAgent; -use anyhow::{bail, Result}; - -/// Interface for driving vtune support -#[derive(Debug)] -pub struct VTuneAgent { - _private: (), -} - -impl VTuneAgent { - /// Intialize a dummy VTuneAgent that will fail upon instantiation. - pub fn new() -> Result { - if cfg!(feature = "vtune") { - bail!("VTune is not supported on this platform."); - } else { - bail!("VTune support disabled at compile time."); - } - } -} - -impl ProfilingAgent for VTuneAgent { - fn module_load(&self, _module: &crate::CompiledModule, _dbg_image: Option<&[u8]>) {} - fn load_single_trampoline( - &self, - _name: &str, - _addr: *const u8, - _size: usize, - __pid: u32, - _tid: u32, - ) { - } -} diff --git a/crates/wasmtime/src/config.rs b/crates/wasmtime/src/config.rs index 4beeb71ab206..d68afb9961fe 100644 --- a/crates/wasmtime/src/config.rs +++ b/crates/wasmtime/src/config.rs @@ -13,7 +13,7 @@ use wasmparser::WasmFeatures; #[cfg(feature = "cache")] use wasmtime_cache::CacheConfig; use wasmtime_environ::Tunables; -use wasmtime_jit::{JitDumpAgent, NullProfilerAgent, PerfMapAgent, ProfilingAgent, VTuneAgent}; +use wasmtime_jit::profiling::{self, ProfilingAgent}; use wasmtime_runtime::{InstanceAllocator, OnDemandInstanceAllocator, RuntimeMemoryCreator}; pub use wasmtime_environ::CacheStore; @@ -1513,10 +1513,10 @@ impl Config { pub(crate) fn build_profiler(&self) -> Result> { Ok(match self.profiling_strategy { - ProfilingStrategy::PerfMap => Box::new(PerfMapAgent::new()?) as Box, - ProfilingStrategy::JitDump => Box::new(JitDumpAgent::new()?) as Box, - ProfilingStrategy::VTune => Box::new(VTuneAgent::new()?) as Box, - ProfilingStrategy::None => Box::new(NullProfilerAgent), + ProfilingStrategy::PerfMap => profiling::new_perfmap()?, + ProfilingStrategy::JitDump => profiling::new_jitdump()?, + ProfilingStrategy::VTune => profiling::new_vtune()?, + ProfilingStrategy::None => profiling::new_null(), }) } diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 054ea2521ae7..e0e9578a45f6 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use wasmtime_cache::CacheConfig; use wasmtime_environ::obj; use wasmtime_environ::{FlagValue, ObjectKind}; -use wasmtime_jit::{CodeMemory, ProfilingAgent}; +use wasmtime_jit::{profiling::ProfilingAgent, CodeMemory}; use wasmtime_runtime::{debug_builtins, CompiledModuleIdAllocator, InstanceAllocator, MmapVec}; mod serialization; diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs index e719186d89d9..6d8432eddae0 100644 --- a/crates/wasmtime/src/trampoline/func.rs +++ b/crates/wasmtime/src/trampoline/func.rs @@ -4,7 +4,7 @@ use crate::{Engine, FuncType, ValRaw}; use anyhow::Result; use std::panic::{self, AssertUnwindSafe}; use std::ptr::NonNull; -use wasmtime_jit::{CodeMemory, ProfilingAgent}; +use wasmtime_jit::CodeMemory; use wasmtime_runtime::{ StoreBox, VMArrayCallHostFuncContext, VMContext, VMFuncRef, VMOpaqueContext, }; @@ -72,44 +72,6 @@ unsafe extern "C" fn array_call_shim( } } -#[cfg(compiler)] -fn register_trampolines(profiler: &dyn ProfilingAgent, code: &CodeMemory) { - use object::{File, Object as _, ObjectSection, ObjectSymbol, SectionKind, SymbolKind}; - let pid = std::process::id(); - let tid = pid; - - let image = match File::parse(&code.mmap()[..]) { - Ok(image) => image, - Err(_) => return, - }; - - let text_base = match image.sections().find(|s| s.kind() == SectionKind::Text) { - Some(section) => match section.data() { - Ok(data) => data.as_ptr() as usize, - Err(_) => return, - }, - None => return, - }; - - for sym in image.symbols() { - if !sym.is_definition() { - continue; - } - if sym.kind() != SymbolKind::Text { - continue; - } - let address = sym.address(); - let size = sym.size(); - if address == 0 || size == 0 { - continue; - } - if let Ok(name) = sym.name() { - let addr = text_base + address as usize; - profiler.load_single_trampoline(name, addr as *const u8, size as usize, pid, tid); - } - } -} - #[cfg(compiler)] pub fn create_array_call_function( ft: &FuncType, @@ -139,7 +101,7 @@ where let mut code_memory = CodeMemory::new(obj)?; code_memory.publish()?; - register_trampolines(engine.profiler(), &code_memory); + engine.profiler().register_module(&code_memory, &|_| None); // Extract the host/wasm trampolines from the results of compilation since // we know their start/length.