From f487112ae8737c33293da67ab3cc81cffd2e7c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Tue, 2 May 2023 18:45:58 -0400 Subject: [PATCH] winch: Implement new trampolines This change is a follow-up to https://github.com/bytecodealliance/wasmtime/pull/6262, in which the new trampolines, described [here](https://github.com/bytecodealliance/rfcs/blob/main/accepted/tail-calls.md#new-trampolines-and-vmcallercheckedanyfunc-changes), were introduced to Wasmtime. This change, focuses on the `array-to-wasm`, `native-to-wasm` and `wasm-to-native` trampolines to restore Winch's working state prior to the introduction of the new trampolines. It's worth noting that the new approach for trampolines make it easier to support the `TypedFunc` API in Winch. Prior to the introduction of the new trampolines, it was not obvious how to approach it. This change also introduces a pinned register that will hold the `VMContext` pointer, which is loaded in the `*-to-wasm` trampolines; the `VMContext` register is a pre-requisite to this change to support the `wasm-to-native` trampolines. Lastly, with the introduction of the `VMContext` register and the `wasm-to-native` trampolines, this change also introduces support for calling function imports, which is a variation of the already existing calls to locally defined functions. --- crates/winch/src/compiler.rs | 57 +++++- tests/all/winch.rs | 132 +++++++------ winch/codegen/src/abi/mod.rs | 8 + winch/codegen/src/codegen/call.rs | 28 ++- winch/codegen/src/codegen/env.rs | 13 ++ winch/codegen/src/codegen/mod.rs | 59 ++++-- winch/codegen/src/frame/mod.rs | 14 +- winch/codegen/src/isa/aarch64/abi.rs | 8 + winch/codegen/src/isa/aarch64/masm.rs | 2 +- winch/codegen/src/isa/aarch64/mod.rs | 10 +- winch/codegen/src/isa/mod.rs | 21 +- winch/codegen/src/isa/x64/abi.rs | 8 + winch/codegen/src/isa/x64/address.rs | 2 +- winch/codegen/src/isa/x64/masm.rs | 2 +- winch/codegen/src/isa/x64/mod.rs | 30 ++- winch/codegen/src/isa/x64/regs.rs | 22 ++- winch/codegen/src/lib.rs | 2 + winch/codegen/src/masm.rs | 6 +- winch/codegen/src/regalloc.rs | 11 +- winch/codegen/src/stack.rs | 5 + winch/codegen/src/trampoline.rs | 272 +++++++++++++++++++++----- winch/environ/src/lib.rs | 28 ++- winch/src/compile.rs | 10 - 23 files changed, 583 insertions(+), 167 deletions(-) diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 4f29ba73f703..eca72db202f7 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -8,7 +8,7 @@ use wasmtime_environ::{ CompileError, DefinedFuncIndex, FilePos, FuncIndex, FunctionBodyData, FunctionLoc, ModuleTranslation, ModuleTypes, PrimaryMap, Tunables, WasmFunctionInfo, }; -use winch_codegen::TargetIsa; +use winch_codegen::{TargetIsa, TrampolineKind}; use winch_environ::FuncEnv; pub(crate) struct Compiler { @@ -92,8 +92,22 @@ impl wasmtime_environ::Compiler for Compiler { types: &ModuleTypes, index: DefinedFuncIndex, ) -> Result, CompileError> { - let _ = (translation, types, index); - todo!() + let index = translation.module.func_index(index); + let sig = translation.module.functions[index].signature; + let ty = &types[sig]; + let wasm_ty = wasmparser::FuncType::new( + ty.params().iter().copied().map(Into::into), + ty.returns().iter().copied().map(Into::into), + ); + let env = FuncEnv::new(&translation.module, translation.get_types(), &self.isa); + let buffer = self + .isa + .compile_trampoline(&wasm_ty, index.as_u32(), &env, TrampolineKind::ArrayToWasm) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn compile_native_to_wasm_trampoline( @@ -102,8 +116,24 @@ impl wasmtime_environ::Compiler for Compiler { types: &ModuleTypes, index: DefinedFuncIndex, ) -> Result, CompileError> { - let _ = (translation, types, index); - todo!() + let index = translation.module.func_index(index); + let sig = translation.module.functions[index].signature; + let ty = &types[sig]; + let wasm_ty = wasmparser::FuncType::new( + ty.params().iter().copied().map(Into::into), + ty.returns().iter().copied().map(Into::into), + ); + + let env = FuncEnv::new(&translation.module, translation.get_types(), &self.isa); + let buffer = self + .isa + .compile_trampoline(&wasm_ty, index.as_u32(), &env, TrampolineKind::NativeToWasm) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn compile_wasm_to_native_trampoline( @@ -111,8 +141,21 @@ impl wasmtime_environ::Compiler for Compiler { translation: &ModuleTranslation<'_>, wasm_func_ty: &wasmtime_environ::WasmFuncType, ) -> Result, CompileError> { - let _ = (translation, wasm_func_ty); - todo!() + let wasm_ty = wasmparser::FuncType::new( + wasm_func_ty.params().iter().copied().map(Into::into), + wasm_func_ty.returns().iter().copied().map(Into::into), + ); + + let env = FuncEnv::new(&translation.module, translation.get_types(), &self.isa); + let buffer = self + .isa + .compile_trampoline(&wasm_ty, 1u32, &env, TrampolineKind::WasmToNative) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn append_code( diff --git a/tests/all/winch.rs b/tests/all/winch.rs index 5d993accfdae..c78d4c63960d 100644 --- a/tests/all/winch.rs +++ b/tests/all/winch.rs @@ -1,58 +1,13 @@ use anyhow::Result; use wasmtime::*; -#[test] -#[ignore] -fn compiles_with_winch() -> Result<()> { - let mut c = Config::new(); - - c.strategy(Strategy::Winch); - - let engine = Engine::new(&c)?; - - // Winch only supports a very basic function signature for now while it's being developed. - let test_mod = r#" +const MODULE: &'static str = r#" (module + (import "" "" (func $add (param i32 i32) (result i32))) (func $test (result i32) (i32.const 42) ) - (export "test" (func $test)) - ) - "#; - let mut store = Store::new(&engine, ()); - - let module = Module::new(&engine, test_mod)?; - - let instance = Instance::new(&mut store, &module, &[])?; - - let f = instance - .get_func(&mut store, "test") - .ok_or(anyhow::anyhow!("test function not found"))?; - - let mut returns = vec![Val::null(); 1]; - - // Winch doesn't support calling typed functions at the moment. - f.call(&mut store, &[], &mut returns)?; - - assert_eq!(returns.len(), 1); - assert_eq!(returns[0].unwrap_i32(), 42); - - Ok(()) -} - -#[test] -#[ignore] -fn compiles_with_winch_stack_arguments() -> Result<()> { - let mut c = Config::new(); - - c.strategy(Strategy::Winch); - - let engine = Engine::new(&c)?; - - // Winch only supports a very basic function signature for now while it's being developed. - let test_mod = r#" - (module (func $sum10 (param $arg_1 i32) (param $arg_2 i32) (param $arg_3 i32) (param $arg_4 i32) (param $arg_5 i32) (param $arg_6 i32) (param $arg_7 i32) (param $arg_8 i32) (param $arg_9 i32) (param $arg_10 i32) (result i32) local.get $arg_1 local.get $arg_2 @@ -73,30 +28,95 @@ fn compiles_with_winch_stack_arguments() -> Result<()> { i32.add local.get $arg_10 i32.add) + + (func $call_add (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (call $add)) + + (export "42" (func $test)) (export "sum10" (func $sum10)) + (export "call_add" (func $call_add)) ) "#; +fn add_fn(store: impl AsContextMut) -> Func { + Func::wrap(store, |a: i32, b: i32| a + b) +} + +#[test] +fn array_to_wasm() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; - let module = Module::new(&engine, test_mod)?; + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let constant = instance + .get_func(&mut store, "42") + .ok_or(anyhow::anyhow!("test function not found"))?; + let mut returns = vec![Val::null(); 1]; + constant.call(&mut store, &[], &mut returns)?; - let instance = Instance::new(&mut store, &module, &[])?; + assert_eq!(returns.len(), 1); + assert_eq!(returns[0].unwrap_i32(), 42); - let f = instance + let sum = instance .get_func(&mut store, "sum10") .ok_or(anyhow::anyhow!("sum10 function not found"))?; - let mut returns = vec![Val::null(); 1]; - - // create a new Val array with ten 1s let args = vec![Val::I32(1); 10]; - - // Winch doesn't support calling typed functions at the moment. - f.call(&mut store, &args, &mut returns)?; + sum.call(&mut store, &args, &mut returns)?; assert_eq!(returns.len(), 1); assert_eq!(returns[0].unwrap_i32(), 10); Ok(()) } + +#[test] +fn native_to_wasm() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; + let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; + + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let f = instance.get_typed_func::<(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32), i32>( + &mut store, "sum10", + )?; + + let args = (1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let result = f.call(&mut store, args)?; + + assert_eq!(result, 10); + + Ok(()) +} + +#[test] +fn wasm_to_native() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; + let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; + + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let f = instance.get_typed_func::<(i32, i32), i32>(&mut store, "call_add")?; + + let args = (41, 1); + let result = f.call(&mut store, args)?; + + assert_eq!(result, 42); + + Ok(()) +} diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 89a9c2860b60..87e95b9101c2 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -33,6 +33,7 @@ //! | | //! | | //! | Stack slots | +//! | + `VMContext` slot | //! | + dynamic space | //! | | //! | | @@ -78,6 +79,13 @@ pub(crate) trait ABI { /// Returns the designated scratch register. fn scratch_reg() -> Reg; + /// Returns the frame pointer register. + fn fp_reg() -> Reg; + + /// Returns the pinned register used to hold + /// the `VMContext`. + fn vmctx_reg() -> Reg; + /// Returns the callee-saved registers for the given /// calling convention. fn callee_saved_regs(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]>; diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 89e0d69492c3..b09a76d3ff34 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -152,16 +152,38 @@ impl<'a> FnCall<'a> { } } - /// Emit the function call. - pub fn emit( + /// Emit a direct function call, to a locally defined function. + pub fn direct( &self, masm: &mut M, context: &mut CodeGenContext, callee: u32, ) { + self.pre_call::(masm, context); + masm.call(CalleeKind::Direct(callee)); + self.post_call::(masm, context); + } + + /// Emit an indirect function call, using a raw address. + pub fn indirect( + &self, + masm: &mut M, + context: &mut CodeGenContext, + addr: M::Address, + ) { + self.pre_call::(masm, context); + let scratch = ::scratch_reg(); + masm.load(addr, scratch, OperandSize::S64); + masm.call(CalleeKind::Indirect(scratch)); + self.post_call::(masm, context); + } + + fn pre_call(&self, masm: &mut M, context: &mut CodeGenContext) { masm.reserve_stack(self.arg_stack_space); self.assign_args(context, masm, ::scratch_reg()); - masm.call(CalleeKind::Direct(callee)); + } + + fn post_call(&self, masm: &mut M, context: &mut CodeGenContext) { masm.free_stack(self.total_stack_space); context.drop_last(self.abi_sig.params.len()); // The stack pointer at the end of the function call diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index f05ac47fc00d..2453d4709718 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -5,6 +5,19 @@ use wasmparser::FuncType; pub trait FuncEnv { /// Get the callee information from a given function index. fn callee_from_index(&self, index: u32) -> Callee; + + /// Returns the offset to the `VMFuncRef::native_call` field. + /// See crates/runtime/src/vmcontext/vm_host_func_context.rs + /// for more details. + fn vmnative_call_host_func_native_call(&self) -> u8; + + /// Returns the offset to the `wasm_call` field in `*const VMFunctionBody`. + /// See crates/environ/src/vmoffsets.rs for more details. + fn vmctx_vmfunction_import_wasm_call(&self, index: u32) -> u32; + + /// Returns the offset to the `vmctx` field in `*const VMFunctionBody`. + /// See crates/environ/src/vmoffsets.rs for more details. + fn vmctx_vmfunction_import_vmctx(&self, index: u32) -> u32; } /// Metadata about a function callee. Use by the code generation diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index c3c5303f6b32..25d1f1e777bf 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,11 +1,14 @@ use crate::{ abi::{ABISig, ABI}, masm::{MacroAssembler, OperandSize}, + stack::Val, CallingConvention, }; use anyhow::Result; use call::FnCall; -use wasmparser::{BinaryReader, FuncValidator, ValType, ValidatorResources, VisitOperator}; +use wasmparser::{ + BinaryReader, FuncType, FuncValidator, ValType, ValidatorResources, VisitOperator, +}; mod context; pub(crate) use context::*; @@ -89,6 +92,12 @@ where &mut self.context.regalloc, ); + // Save the vmctx pointer to its local slot in case we need to reload it + // at any point. + let vmctx_addr = self.masm.local_address(&self.context.frame.vmctx_slot); + self.masm + .store(::vmctx_reg().into(), vmctx_addr, OperandSize::S64); + while !body.eof() { let offset = body.original_position(); body.visit_operator(&mut ValidateThenVisit(validator.visitor(offset), self))??; @@ -123,14 +132,46 @@ where /// Emit a direct function call. pub fn emit_call(&mut self, index: u32) { let callee = self.env.callee_from_index(index); - if callee.import { - // TODO: Only locally defined functions for now. - unreachable!() - } + let (sig, callee_addr): (ABISig, Option<::Address>) = if callee.import + { + let mut params = vec![ValType::I64, ValType::I64]; + params.extend_from_slice(&callee.ty.params()); + // TODO Remove to_owned. + let sig = FuncType::new(params, callee.ty.results().to_owned()); + + let caller_vmctx = ::vmctx_reg(); + let callee_vmctx = self.context.any_gpr(self.masm); + let callee_vmctx_offset = self.env.vmctx_vmfunction_import_vmctx(index); + let callee_vmctx_addr = self.masm.address_at_reg(caller_vmctx, callee_vmctx_offset); + // TODO Remove harcoded operand size. + self.masm + .load(callee_vmctx_addr, callee_vmctx, OperandSize::S64); + + let callee_body_offset = self.env.vmctx_vmfunction_import_wasm_call(index); + let callee_addr = self.masm.address_at_reg(caller_vmctx, callee_body_offset); + + // Put the callee / caller vmctx at the start of the + // range of the stack so that they are used as first + // and second arguments. + let stack = &mut self.context.stack; + let location = stack.len() - (sig.params().len() - 2); + stack.insert(location as usize, Val::reg(caller_vmctx)); + stack.insert(location as usize, Val::reg(callee_vmctx)); + ( + self.abi.sig(&sig, &CallingConvention::Default), + Some(callee_addr), + ) + } else { + (self.abi.sig(&callee.ty, &CallingConvention::Default), None) + }; - let sig = self.abi.sig(&callee.ty, &CallingConvention::Default); let fncall = FnCall::new(self.abi, &sig, &mut self.context, self.masm); - fncall.emit::(self.masm, &mut self.context, index); + + if let Some(addr) = callee_addr { + fncall.indirect::(self.masm, &mut self.context, addr); + } else { + fncall.direct::(self.masm, &mut self.context, index); + } } /// Emit the usual function end instruction sequence. @@ -141,10 +182,6 @@ where } fn spill_register_arguments(&mut self) { - // TODO - // Revisit this once the implicit VMContext argument is introduced; - // when that happens the mapping between local slots and abi args - // is not going to be symmetric. self.sig .params .iter() diff --git a/winch/codegen/src/frame/mod.rs b/winch/codegen/src/frame/mod.rs index 5e59bf68cfc9..8054a2989faa 100644 --- a/winch/codegen/src/frame/mod.rs +++ b/winch/codegen/src/frame/mod.rs @@ -36,7 +36,7 @@ impl DefinedLocals { validator: &mut FuncValidator, ) -> Result { let mut next_stack = 0; - // The first 32 bits of a WASM binary function describe the number of locals + // The first 32 bits of a WASM binary function describe the number of locals. let local_count = reader.read_var_u32()?; let mut slots: Locals = Default::default(); @@ -74,6 +74,9 @@ pub(crate) struct Frame { /// Locals get calculated when allocating a frame and are readonly /// through the function compilation lifetime. pub locals: Locals, + + /// The offset to the slot containing the `VMContext`. + pub vmctx_slot: LocalSlot, } impl Frame { @@ -90,14 +93,15 @@ impl Frame { .map(|l| LocalSlot::new(l.ty, l.offset + defined_locals_start)), ); - let locals_size = align_to( - defined_locals_start + defined_locals.stack_size, - abi.stack_align().into(), - ); + let vmctx_slots_size = ::word_bytes(); + let vmctx_offset = defined_locals_start + defined_locals.stack_size + vmctx_slots_size; + + let locals_size = align_to(vmctx_offset, abi.stack_align().into()); Ok(Self { locals, locals_size, + vmctx_slot: LocalSlot::i64(vmctx_offset), defined_locals_range: DefinedLocalsRange( defined_locals_start..defined_locals.stack_size, ), diff --git a/winch/codegen/src/isa/aarch64/abi.rs b/winch/codegen/src/isa/aarch64/abi.rs index 3644ecc7956b..8c451cd32450 100644 --- a/winch/codegen/src/isa/aarch64/abi.rs +++ b/winch/codegen/src/isa/aarch64/abi.rs @@ -87,6 +87,14 @@ impl ABI for Aarch64ABI { todo!() } + fn fp_reg() -> Reg { + regs::fp() + } + + fn vmctx_reg() -> Reg { + todo!() + } + fn callee_saved_regs(_call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> { regs::callee_saved() } diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index d28c1bb267c5..0c01cbcbe015 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -195,7 +195,7 @@ impl Masm for MacroAssembler { self.sp_offset } - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address { + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address { Address::offset(reg, offset as i64) } } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index c7ca90b3d582..fe367750728b 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -8,7 +8,7 @@ use crate::{ regalloc::RegAlloc, regset::RegSet, stack::Stack, - FuncEnv, + FuncEnv, TrampolineKind, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -116,7 +116,13 @@ impl TargetIsa for Aarch64 { 32 } - fn host_to_wasm_trampoline(&self, _ty: &FuncType) -> Result> { + fn compile_trampoline( + &self, + _ty: &FuncType, + _index: u32, + _func_env: &dyn FuncEnv, + _kind: TrampolineKind, + ) -> Result> { todo!() } } diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index f117e740c57a..2dc4f2147357 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -10,7 +10,7 @@ use std::{ use target_lexicon::{Architecture, Triple}; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; -use crate::FuncEnv; +use crate::{FuncEnv, TrampolineKind}; #[cfg(feature = "x64")] pub(crate) mod x64; @@ -186,8 +186,23 @@ pub trait TargetIsa: Send + Sync { /// See `cranelift_codegen::isa::TargetIsa::function_alignment`. fn function_alignment(&self) -> u32; - /// Generate a trampoline that can be used to call a wasm function from wasmtime. - fn host_to_wasm_trampoline(&self, ty: &FuncType) -> Result>; + /// Compile a trampoline kind. + /// + /// This function, internally dispatches to the right trampoline to emit + /// depending on the `kind` paramter. + fn compile_trampoline( + &self, + ty: &FuncType, + index: u32, + func_env: &dyn FuncEnv, + kind: TrampolineKind, + ) -> Result>; + + /// Returns the pointer width of the ISA in bytes. + fn pointer_bytes(&self) -> u8 { + let width = self.triple().pointer_width().unwrap(); + width.bytes() + } } impl Debug for &dyn TargetIsa { diff --git a/winch/codegen/src/isa/x64/abi.rs b/winch/codegen/src/isa/x64/abi.rs index 367100d77a9f..7f6553ca4a33 100644 --- a/winch/codegen/src/isa/x64/abi.rs +++ b/winch/codegen/src/isa/x64/abi.rs @@ -121,6 +121,14 @@ impl ABI for X64ABI { regs::scratch() } + fn fp_reg() -> Reg { + regs::rbp() + } + + fn vmctx_reg() -> Reg { + regs::vmctx() + } + fn callee_saved_regs(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> { regs::callee_saved(call_conv) } diff --git a/winch/codegen/src/isa/x64/address.rs b/winch/codegen/src/isa/x64/address.rs index 4a3e26ebc341..adaf902a4c4a 100644 --- a/winch/codegen/src/isa/x64/address.rs +++ b/winch/codegen/src/isa/x64/address.rs @@ -10,7 +10,7 @@ pub(crate) enum Address { } impl Address { - /// Create an offset + /// Create an offset. pub fn offset(base: Reg, offset: u32) -> Self { Self::Offset { base, offset } } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 0dab09a20220..7ba450ca06e9 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -237,7 +237,7 @@ impl Masm for MacroAssembler { self.asm.finalize() } - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address { + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address { Address::offset(reg, offset) } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 0dd3867ef5b5..aca853c47e54 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -8,12 +8,12 @@ use crate::isa::{x64::masm::MacroAssembler as X64Masm, CallingConvention}; use crate::masm::MacroAssembler; use crate::regalloc::RegAlloc; use crate::stack::Stack; -use crate::trampoline::Trampoline; use crate::FuncEnv; use crate::{ isa::{Builder, TargetIsa}, regset::RegSet, }; +use crate::{Trampoline, TrampolineKind}; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; use cranelift_codegen::{isa::x64::settings as x64_settings, Final, MachBufferFinalized}; @@ -120,15 +120,33 @@ impl TargetIsa for X64 { 16 } - fn host_to_wasm_trampoline(&self, ty: &FuncType) -> Result> { + fn compile_trampoline( + &self, + ty: &FuncType, + func_index: u32, + func_env: &dyn FuncEnv, + kind: TrampolineKind, + ) -> Result> { + use TrampolineKind::*; + let abi = abi::X64ABI::default(); let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone()); let call_conv = self.wasmtime_call_conv(); - let mut trampoline = - Trampoline::new(&mut masm, &abi, regs::scratch(), regs::argv(), &call_conv); - - trampoline.emit_host_to_wasm(ty); + let mut trampoline = Trampoline::new( + &mut masm, + &abi, + func_env, + regs::scratch(), + regs::argv(), + &call_conv, + ); + + match kind { + ArrayToWasm => trampoline.emit_array_to_wasm(ty, func_index)?, + NativeToWasm => trampoline.emit_native_to_wasm(ty.clone(), func_index)?, + WasmToNative => trampoline.emit_wasm_to_native(&ty)?, + } Ok(masm.finalize()) } diff --git a/winch/codegen/src/isa/x64/regs.rs b/winch/codegen/src/isa/x64/regs.rs index 5d43b58d3728..23f5228c1f45 100644 --- a/winch/codegen/src/isa/x64/regs.rs +++ b/winch/codegen/src/isa/x64/regs.rs @@ -51,18 +51,25 @@ pub(crate) fn r9() -> Reg { pub(crate) fn r10() -> Reg { gpr(ENC_R10) } -pub(crate) fn r11() -> Reg { - gpr(ENC_R11) -} pub(crate) fn r12() -> Reg { gpr(ENC_R12) } pub(crate) fn r13() -> Reg { gpr(ENC_R13) } +/// Used as a pinned register to hold +/// the `VMContext`. +/// Non-allocatable in Winch's default +/// ABI, and callee-saved in SystemV and +/// Fastcall. pub(crate) fn r14() -> Reg { gpr(ENC_R14) } + +pub(crate) fn vmctx() -> Reg { + r14() +} + pub(crate) fn rbx() -> Reg { gpr(ENC_RBX) } @@ -78,6 +85,13 @@ pub(crate) fn rbp() -> Reg { gpr(ENC_RBP) } +/// Used as the scratch register. +/// Non-allocatable in Winch's default +/// ABI. +pub(crate) fn r11() -> Reg { + gpr(ENC_R11) +} + pub(crate) fn scratch() -> Reg { r11() } @@ -151,7 +165,7 @@ pub(crate) fn xmm15() -> Reg { const GPR: u32 = 16; const ALLOCATABLE_GPR: u32 = (1 << GPR) - 1; -const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11); +const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11) | (1 << ENC_R14); /// Bitmask to represent the available general purpose registers. pub(crate) const ALL_GPR: u32 = ALLOCATABLE_GPR & !NON_ALLOCATABLE_GPR; diff --git a/winch/codegen/src/lib.rs b/winch/codegen/src/lib.rs index f01bd63a400f..744e9062ef75 100644 --- a/winch/codegen/src/lib.rs +++ b/winch/codegen/src/lib.rs @@ -17,4 +17,6 @@ mod regalloc; mod regset; mod stack; mod trampoline; +pub use trampoline::TrampolineKind; +use trampoline::*; mod visitor; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 31e968478850..0f652e0f3045 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -39,6 +39,7 @@ pub(crate) enum RegImm { Imm(i64), } +#[derive(Clone)] pub(crate) enum CalleeKind { /// A function call to a raw address. Indirect(Reg), @@ -109,8 +110,9 @@ pub(crate) trait MacroAssembler { /// current position of the stack pointer (e.g. [sp + offset]. fn address_at_sp(&self, offset: u32) -> Self::Address; - /// Construct an address that is relative to the given register. - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address; + /// Construct an address that is absolute to the current position + /// of the given register. + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address; /// Emit a function call to either a local or external function. fn call(&mut self, callee: CalleeKind); diff --git a/winch/codegen/src/regalloc.rs b/winch/codegen/src/regalloc.rs index 7640561d7133..d09620826cf7 100644 --- a/winch/codegen/src/regalloc.rs +++ b/winch/codegen/src/regalloc.rs @@ -46,6 +46,12 @@ impl RegAlloc { where F: FnMut(&mut RegAlloc), { + // If the scratch register is explicitly requested + // just return it, it's usage should never cause spills. + if named == self.scratch { + return named; + } + self.regset.gpr(named).unwrap_or_else(|| { spill(self); self.regset @@ -56,6 +62,9 @@ impl RegAlloc { /// Mark a particular general purpose register as available. pub fn free_gpr(&mut self, reg: Reg) { - self.regset.free_gpr(reg); + // Never mark the designated scratch register as allocatable. + if reg != self.scratch { + self.regset.free_gpr(reg); + } } } diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index d563d91e8412..4d817f895455 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -117,6 +117,11 @@ impl Stack { } } + /// Insert a new value at the specified index. + pub fn insert(&mut self, at: usize, val: Val) { + self.inner.insert(at, val); + } + /// Get the length of the stack. pub fn len(&self) -> usize { self.inner.len() diff --git a/winch/codegen/src/trampoline.rs b/winch/codegen/src/trampoline.rs index 3d579efe195f..1ff76428bd1f 100644 --- a/winch/codegen/src/trampoline.rs +++ b/winch/codegen/src/trampoline.rs @@ -1,14 +1,23 @@ use crate::{ - abi::{align_to, calculate_frame_adjustment, ABIArg, ABIResult, ABI}, + abi::{align_to, calculate_frame_adjustment, ABIArg, ABIParams, ABIResult, ABISig, ABI}, isa::CallingConvention, masm::{CalleeKind, MacroAssembler, OperandSize, RegImm}, reg::Reg, + FuncEnv, }; +use anyhow::{anyhow, Result}; use smallvec::SmallVec; use std::mem; use wasmparser::{FuncType, ValType}; -/// A trampoline to provide interopt between different calling +/// The supported trampoline kinds. +pub enum TrampolineKind { + ArrayToWasm, + NativeToWasm, + WasmToNative, +} + +/// A trampoline to provide interoperability between different calling /// conventions. pub(crate) struct Trampoline<'a, A, M> where @@ -26,12 +35,15 @@ where /// callee, so it can only be used after the callee-saved /// registers are on the stack. alloc_scratch_reg: Reg, - /// Registers to be saved as part of the trampoline's prologue and epilogue. + /// Registers to be saved as part of the trampoline's prologue + /// and to be restored as part of the trampoline's epilogue. callee_saved_regs: SmallVec<[Reg; 9]>, /// The calling convention used by the trampoline, /// which is the Wasmtime variant of the system ABI's /// calling convention. call_conv: &'a CallingConvention, + /// A reference to the function compilation environment. + func_env: &'a dyn FuncEnv, } impl<'a, A, M> Trampoline<'a, A, M> @@ -43,6 +55,7 @@ where pub fn new( masm: &'a mut M, abi: &'a A, + func_env: &'a dyn FuncEnv, scratch_reg: Reg, alloc_scratch_reg: Reg, call_conv: &'a CallingConvention, @@ -54,17 +67,11 @@ where alloc_scratch_reg, callee_saved_regs: ::callee_saved_regs(call_conv), call_conv, + func_env, } } - /// Emit the host to wasm trampoline. - pub fn emit_host_to_wasm(&mut self, ty: &FuncType) { - // The host to wasm trampoline is currently hard coded (see vmcontext.rs - // in the wasmtime-runtime crate, `VMArrayCallFunction`). The first two - // parameters are VMContexts (not used at this time). The third - // parameter is the function pointer to call. The fourth parameter is - // an address to storage space for both the return value and the - // arguments to the function. + pub fn emit_array_to_wasm(&mut self, ty: &FuncType, callee_index: u32) -> Result<()> { let trampoline_ty = FuncType::new( vec![ValType::I64, ValType::I64, ValType::I64, ValType::I64], vec![], @@ -79,14 +86,17 @@ where let callee_sig = self.abi.sig(ty, &CallingConvention::Default); - let val_ptr = if let ABIArg::Reg { reg, ty: _ty } = &trampoline_sig.params[3] { - Ok(RegImm::reg(*reg)) - } else { - Err(anyhow::anyhow!("Expected the val ptr to be in a register")) - } - .unwrap(); + let val_ptr = &trampoline_sig.params[2] + .get_reg() + .map(RegImm::reg) + .ok_or_else(|| anyhow!("Expected value pointer to be in a register"))?; - self.prologue(); + // Get the VM context pointer. + let vmctx_ptr = Self::vmctx(&trampoline_sig.params)?; + self.prologue_with_callee_saved(); + // Move the VM context pointer to the designated, pinned register. + self.masm + .mov(vmctx_ptr, ::vmctx_reg().into(), OperandSize::S64); let mut trampoline_arg_offsets: [u32; 4] = [0; 4]; @@ -101,28 +111,12 @@ where } }); - let val_ptr_offset = trampoline_arg_offsets[3]; - let func_ptr_offset = trampoline_arg_offsets[2]; + let val_ptr_offset = trampoline_arg_offsets[2]; self.masm - .mov(val_ptr, RegImm::reg(self.scratch_reg), OperandSize::S64); - - // How much we need to adjust the stack pointer by to account - // for the alignment required by the ISA. - let delta = calculate_frame_adjustment( - self.masm.sp_offset(), - self.abi.arg_base_offset() as u32, - self.abi.call_stack_align() as u32, - ); - - // The total amount of stack space we need to reserve for the - // arguments. - let total_arg_stack_space = align_to( - callee_sig.stack_bytes + delta, - self.abi.call_stack_align() as u32, - ); + .mov(*val_ptr, RegImm::reg(self.scratch_reg), OperandSize::S64); - self.masm.reserve_stack(total_arg_stack_space); + let total_arg_stack_space = self.reserve_stack(&callee_sig); // The max size a value can be when reading from the params // memory location. @@ -133,13 +127,13 @@ where match param { ABIArg::Reg { reg, ty } => self.masm.load( - self.masm.address_from_reg(self.scratch_reg, value_offset), + self.masm.address_at_reg(self.scratch_reg, value_offset), *reg, (*ty).into(), ), ABIArg::Stack { offset, ty } => { self.masm.load( - self.masm.address_from_reg(self.scratch_reg, value_offset), + self.masm.address_at_reg(self.scratch_reg, value_offset), self.alloc_scratch_reg, (*ty).into(), ); @@ -152,16 +146,8 @@ where } }); - // Move the function pointer from it's stack location into a - // scratch register. - self.masm.load( - self.masm.address_from_sp(func_ptr_offset), - self.scratch_reg, - OperandSize::S64, - ); - // Call the function that was passed into the trampoline. - self.masm.call(CalleeKind::Indirect(self.scratch_reg)); + self.masm.call(CalleeKind::Direct(callee_index)); self.masm.free_stack(total_arg_stack_space); @@ -178,23 +164,198 @@ where let ABIResult::Reg { reg, ty } = &callee_sig.result; self.masm.store( RegImm::reg(*reg), - self.masm.address_from_reg(self.scratch_reg, 0), + self.masm.address_at_reg(self.scratch_reg, 0), (*ty).unwrap().into(), ); - self.epilogue(trampoline_arg_size); + self.epilogue_with_callee_saved_restore(trampoline_arg_size); + + Ok(()) + } + + pub fn emit_native_to_wasm(&mut self, ty: FuncType, callee_index: u32) -> Result<()> { + let native_sig = self.native_sig(&ty); + let wasm_sig = self.wasm_sig(&ty); + let vmctx_ptr = Self::vmctx(&native_sig.params)?; + + self.prologue_with_callee_saved(); + // Move the VM context pointer to the designated pinned register. + self.masm + .mov(vmctx_ptr, ::vmctx_reg().into(), OperandSize::S64); + // Spill the register arguments in the native function + // signature. + let (native_sig_stack_offsets, spill_size) = self.spill(&native_sig.params); + let total_arg_stack_space = self.reserve_stack(&wasm_sig); + + // Assign the arguments. + wasm_sig.params.iter().enumerate().for_each(|(i, param)| { + let native_index = i + 2; + let native_param = &native_sig.params[native_index]; + let param_slot = match native_param { + ABIArg::Reg { .. } => self + .masm + .address_from_sp(native_sig_stack_offsets[native_index]), + ABIArg::Stack { offset, .. } => self.masm.address_at_reg( + ::fp_reg(), + *offset + (self.abi.arg_base_offset() as u32), + ), + }; + + match param { + ABIArg::Reg { reg, ty } => self.masm.load(param_slot, *reg, (*ty).into()), + ABIArg::Stack { offset, ty } => { + let size = (*ty).into(); + let store_addr = self.masm.address_at_sp(*offset); + self.masm.load(param_slot, self.scratch_reg, size); + self.masm.store(self.scratch_reg.into(), store_addr, size); + } + } + }); + + self.masm.call(CalleeKind::Direct(callee_index)); + self.masm.free_stack(total_arg_stack_space); + self.epilogue_with_callee_saved_restore(spill_size); + + Ok(()) + } + + pub fn emit_wasm_to_native(&mut self, ty: &FuncType) -> Result<()> { + let mut params = Self::callee_and_caller_vmctx(); + params.extend_from_slice(ty.params()); + + let callee_vmctx_size: OperandSize = params[0].into(); + + let func_ty = FuncType::new(params, ty.results().to_owned()); + let wasm_sig = self.wasm_sig(&func_ty); + let native_sig = self.native_sig(ty); + + self.prologue(); + + let (wasm_sig_stack_offsets, spill_size) = self.spill(&wasm_sig.params); + let total_arg_stack_space = self.reserve_stack(&wasm_sig); + + // TODO Ensure save fp and pc for fast stack walking. + let callee_vmctx_offset = wasm_sig_stack_offsets[0]; + let callee_offset = self.func_env.vmnative_call_host_func_native_call(); + + // Load the callee vm context to the scratch register. + let vmctx_offset_addr = self.masm.address_from_sp(callee_vmctx_offset); + self.masm + .load(vmctx_offset_addr, self.alloc_scratch_reg, callee_vmctx_size); + + // Assign the arguments. + native_sig.params.iter().enumerate().for_each(|(i, param)| { + let wasm_param = &wasm_sig.params[i]; + let slot = match wasm_param { + ABIArg::Reg { .. } => self.masm.address_from_sp(wasm_sig_stack_offsets[i]), + ABIArg::Stack { offset, .. } => self.masm.address_at_reg( + ::fp_reg(), + *offset + (self.abi.arg_base_offset() as u32), + ), + }; + + match param { + ABIArg::Reg { reg, ty } => self.masm.load(slot, *reg, (*ty).into()), + ABIArg::Stack { offset, ty } => { + let size = (*ty).into(); + let store_addr = self.masm.address_at_sp(*offset); + self.masm.load(slot, self.scratch_reg, size); + self.masm.store(self.scratch_reg.into(), store_addr, size); + } + } + }); + + // Load the callee address into the other scratch register. + let callee_addr = self + .masm + .address_at_reg(self.alloc_scratch_reg, callee_offset.into()); + self.masm + .load(callee_addr, self.scratch_reg, OperandSize::S64); + + self.masm.call(CalleeKind::Indirect(self.scratch_reg)); + self.masm.free_stack(total_arg_stack_space); + self.epilogue(spill_size); + + Ok(()) + } + + fn callee_and_caller_vmctx() -> Vec { + vec![ValType::I64, ValType::I64] + } + + fn native_sig(&self, ty: &FuncType) -> ABISig { + let mut params = Self::callee_and_caller_vmctx(); + params.extend_from_slice(ty.params()); + let native_type = FuncType::new(params, ty.results().to_owned()); + + self.abi.sig(&native_type, self.call_conv) + } + + fn wasm_sig(&self, ty: &FuncType) -> ABISig { + self.abi.sig(ty, &CallingConvention::Default) + } + + fn vmctx(params: &ABIParams) -> Result { + params[0] + .get_reg() + .map(RegImm::reg) + .ok_or_else(|| anyhow!("Expected vm context pointer to be in a register")) + } + + fn spill(&mut self, params: &ABIParams) -> (SmallVec<[u32; 6]>, u32) { + let mut offsets = SmallVec::new(); + let mut spilled = 0; + params.iter().for_each(|param| { + if let Some(reg) = param.get_reg() { + let offset = self.masm.push(reg); + offsets.push(offset); + spilled += 1; + } + }); + + // The stack size for the spill, calculated + // from the number of spilled register times + // the size of each push (8 bytes). + let size = spilled * ::word_bytes(); + + (offsets, size) + } + + pub fn reserve_stack(&mut self, callee_sig: &ABISig) -> u32 { + // How much we need to adjust the stack pointer by to account + // for the alignment required by the ISA. + let delta = calculate_frame_adjustment( + self.masm.sp_offset(), + self.abi.arg_base_offset() as u32, + self.abi.call_stack_align() as u32, + ); + + // The total amount of stack space we need to reserve for the + // arguments. + let total_arg_stack_space = align_to( + callee_sig.stack_bytes + delta, + self.abi.call_stack_align() as u32, + ); + + self.masm.reserve_stack(total_arg_stack_space); + total_arg_stack_space } /// The trampoline's prologue. fn prologue(&mut self) { self.masm.prologue(); + } + + fn prologue_with_callee_saved(&mut self) { + self.masm.prologue(); // Save any callee-saved registers. for r in &self.callee_saved_regs { self.masm.push(*r); } } - /// The trampoline's epilogue. - fn epilogue(&mut self, arg_size: u32) { + // TODO: Can the arg_size be removed as part of the masm epilogue? + // It should be possible if it's treated as the local size? + fn epilogue_with_callee_saved_restore(&mut self, arg_size: u32) { // Free the stack space allocated by pushing the trampoline arguments. self.masm.free_stack(arg_size); // Restore the callee-saved registers. @@ -203,4 +364,11 @@ where } self.masm.epilogue(0); } + + /// The trampoline's epilogue. + fn epilogue(&mut self, arg_size: u32) { + // Free the stack space allocated by pushing the trampoline arguments. + self.masm.free_stack(arg_size); + self.masm.epilogue(0); + } } diff --git a/winch/environ/src/lib.rs b/winch/environ/src/lib.rs index 66f9e4c4712f..043ba0b4fc8d 100644 --- a/winch/environ/src/lib.rs +++ b/winch/environ/src/lib.rs @@ -4,7 +4,7 @@ //! `winch_codegen::FuncEnv` trait. use wasmparser::types::Types; -use wasmtime_environ::{FuncIndex, Module}; +use wasmtime_environ::{FuncIndex, Module, PtrSize, VMOffsets}; use winch_codegen::{self, Callee, TargetIsa}; /// Function environment containing module and runtime specific @@ -16,6 +16,8 @@ pub struct FuncEnv<'a> { pub types: &'a Types, /// The current ISA. pub isa: &'a Box, + /// VM offsets accessed by generated code. + offsets: VMOffsets, } impl<'a> winch_codegen::FuncEnv for FuncEnv<'a> { @@ -31,11 +33,33 @@ impl<'a> winch_codegen::FuncEnv for FuncEnv<'a> { index, } } + + fn vmnative_call_host_func_native_call(&self) -> u8 { + let pointer_size = self.isa.triple().pointer_width().unwrap().bytes(); + pointer_size.vmnative_call_host_func_context_func_ref() + + pointer_size.vm_func_ref_native_call() + } + + fn vmctx_vmfunction_import_wasm_call(&self, index: u32) -> u32 { + let func_index = FuncIndex::from_u32(index); + self.offsets.vmctx_vmfunction_import_wasm_call(func_index) + } + + fn vmctx_vmfunction_import_vmctx(&self, index: u32) -> u32 { + let func_index = FuncIndex::from_u32(index); + self.offsets.vmctx_vmfunction_import_vmctx(func_index) + } } impl<'a> FuncEnv<'a> { /// Create a new function environment. pub fn new(module: &'a Module, types: &'a Types, isa: &'a Box) -> Self { - Self { module, types, isa } + let offsets = VMOffsets::new(isa.pointer_bytes(), module); + Self { + module, + types, + isa, + offsets, + } } } diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 8ad774a05867..7c0e29a0dc5b 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -68,15 +68,5 @@ fn compile(env: &FuncEnv, f: (DefinedFuncIndex, FunctionBodyData<'_>)) -> Result .iter() .for_each(|s| println!("{}", s)); - let buffer = env - .isa - .host_to_wasm_trampoline(sig) - .expect("Couldn't compile trampoline"); - - println!("Disassembly for trampoline: {}", index.as_u32()); - disasm(buffer.data(), env.isa)? - .iter() - .for_each(|s| println!("{}", s)); - Ok(()) }