From cf4e2012cd522c44e6406510bafe6a7b08f2af0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Tue, 2 May 2023 18:45:58 -0400 Subject: [PATCH] winch: Implement new trampolines This change is a follow-up to https://github.com/bytecodealliance/wasmtime/pull/6262, in which the new trampolines, described [here](https://github.com/bytecodealliance/rfcs/blob/main/accepted/tail-calls.md#new-trampolines-and-vmcallercheckedanyfunc-changes), were introduced to Wasmtime. This change, focuses on the `array-to-wasm`, `native-to-wasm` and `wasm-to-native` trampolines to restore Winch's working state prior to the introduction of the new trampolines. It's worth noting that the new approach for trampolines make it easier to support the `TypedFunc` API in Winch. Prior to the introduction of the new trampolines, it was not obvious how to approach it. This change also introduces a pinned register that will hold the `VMContext` pointer, which is loaded in the `*-to-wasm` trampolines; the `VMContext` register is a pre-requisite to this change to support the `wasm-to-native` trampolines. Lastly, with the introduction of the `VMContext` register and the `wasm-to-native` trampolines, this change also introduces support for calling function imports, which is a variation of the already existing calls to locally defined functions. The other notable piece of this change aside from the trampolines is `winch-codegen`'s dependency on `wasmtime-environ`. Winch is so closely tied to the concepts exposed by the wasmtime crates that it makes sense to tie them together, even though the separation provides some advantages like easier testing in some cases, in the long run, there's probably going to be less need to test Winch in isolation and rather we'd rely more on integration style tests which require all of Wasmtime pieces anyway (fuzzing, spec tests, etc). This change doesn't update the existing implmenetation of `winch_codegen::FuncEnv`, but the intention is to update that part after this change. prtest:full --- Cargo.lock | 1 + crates/winch/src/compiler.rs | 63 +++- tests/all/winch.rs | 132 +++++---- winch/codegen/Cargo.toml | 1 + winch/codegen/src/abi/mod.rs | 8 + winch/codegen/src/codegen/call.rs | 75 +++-- winch/codegen/src/codegen/mod.rs | 71 ++++- winch/codegen/src/frame/mod.rs | 14 +- winch/codegen/src/isa/aarch64/abi.rs | 8 + winch/codegen/src/isa/aarch64/masm.rs | 10 +- winch/codegen/src/isa/aarch64/mod.rs | 12 +- winch/codegen/src/isa/mod.rs | 22 +- winch/codegen/src/isa/x64/abi.rs | 8 + winch/codegen/src/isa/x64/address.rs | 2 +- winch/codegen/src/isa/x64/masm.rs | 22 +- winch/codegen/src/isa/x64/mod.rs | 32 ++- winch/codegen/src/isa/x64/regs.rs | 22 +- winch/codegen/src/lib.rs | 2 + winch/codegen/src/masm.rs | 16 +- winch/codegen/src/regalloc.rs | 11 +- winch/codegen/src/stack.rs | 5 + winch/codegen/src/trampoline.rs | 400 ++++++++++++++++++++------ winch/codegen/src/visitor.rs | 3 +- winch/environ/src/lib.rs | 8 +- winch/src/compile.rs | 10 - 25 files changed, 702 insertions(+), 256 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e74675d28e9..ca81bb5a3f07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4378,6 +4378,7 @@ dependencies = [ "smallvec", "target-lexicon", "wasmparser", + "wasmtime-environ", ] [[package]] diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 4f29ba73f703..41c386edc8c3 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -6,9 +6,9 @@ use wasmparser::FuncValidatorAllocations; use wasmtime_cranelift_shared::{CompiledFunction, ModuleTextBuilder}; use wasmtime_environ::{ CompileError, DefinedFuncIndex, FilePos, FuncIndex, FunctionBodyData, FunctionLoc, - ModuleTranslation, ModuleTypes, PrimaryMap, Tunables, WasmFunctionInfo, + ModuleTranslation, ModuleTypes, PrimaryMap, Tunables, VMOffsets, WasmFunctionInfo, }; -use winch_codegen::TargetIsa; +use winch_codegen::{TargetIsa, TrampolineKind}; use winch_environ::FuncEnv; pub(crate) struct Compiler { @@ -67,10 +67,11 @@ impl wasmtime_environ::Compiler for Compiler { .unwrap(), ); let mut validator = validator.into_validator(self.take_allocations()); - let env = FuncEnv::new(&translation.module, translation.get_types(), &self.isa); + let vmoffsets = VMOffsets::new(self.isa.pointer_bytes(), &translation.module); + let env = FuncEnv::new(&translation.module, translation.get_types()); let buffer = self .isa - .compile_function(&sig, &body, &env, &mut validator) + .compile_function(&sig, &body, &vmoffsets, &env, &mut validator) .map_err(|e| CompileError::Codegen(format!("{e:?}"))); self.save_allocations(validator.into_allocations()); let buffer = buffer?; @@ -92,8 +93,21 @@ impl wasmtime_environ::Compiler for Compiler { types: &ModuleTypes, index: DefinedFuncIndex, ) -> Result, CompileError> { - let _ = (translation, types, index); - todo!() + let func_index = translation.module.func_index(index); + let sig = translation.module.functions[func_index].signature; + let ty = &types[sig]; + let wasm_ty = wasmparser::FuncType::new( + ty.params().iter().copied().map(Into::into), + ty.returns().iter().copied().map(Into::into), + ); + let buffer = self + .isa + .compile_trampoline(&wasm_ty, TrampolineKind::ArrayToWasm(func_index)) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn compile_native_to_wasm_trampoline( @@ -102,17 +116,44 @@ impl wasmtime_environ::Compiler for Compiler { types: &ModuleTypes, index: DefinedFuncIndex, ) -> Result, CompileError> { - let _ = (translation, types, index); - todo!() + let func_index = translation.module.func_index(index); + let sig = translation.module.functions[func_index].signature; + let ty = &types[sig]; + let wasm_ty = wasmparser::FuncType::new( + ty.params().iter().copied().map(Into::into), + ty.returns().iter().copied().map(Into::into), + ); + + let buffer = self + .isa + .compile_trampoline(&wasm_ty, TrampolineKind::NativeToWasm(func_index)) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn compile_wasm_to_native_trampoline( &self, - translation: &ModuleTranslation<'_>, + _translation: &ModuleTranslation<'_>, wasm_func_ty: &wasmtime_environ::WasmFuncType, ) -> Result, CompileError> { - let _ = (translation, wasm_func_ty); - todo!() + let wasm_ty = wasmparser::FuncType::new( + wasm_func_ty.params().iter().copied().map(Into::into), + wasm_func_ty.returns().iter().copied().map(Into::into), + ); + + let buffer = self + .isa + .compile_trampoline(&wasm_ty, TrampolineKind::WasmToNative) + .map_err(|e| CompileError::Codegen(format!("{:?}", e)))?; + + let compiled_function = + CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); + + Ok(Box::new(compiled_function)) } fn append_code( diff --git a/tests/all/winch.rs b/tests/all/winch.rs index 5d993accfdae..c78d4c63960d 100644 --- a/tests/all/winch.rs +++ b/tests/all/winch.rs @@ -1,58 +1,13 @@ use anyhow::Result; use wasmtime::*; -#[test] -#[ignore] -fn compiles_with_winch() -> Result<()> { - let mut c = Config::new(); - - c.strategy(Strategy::Winch); - - let engine = Engine::new(&c)?; - - // Winch only supports a very basic function signature for now while it's being developed. - let test_mod = r#" +const MODULE: &'static str = r#" (module + (import "" "" (func $add (param i32 i32) (result i32))) (func $test (result i32) (i32.const 42) ) - (export "test" (func $test)) - ) - "#; - let mut store = Store::new(&engine, ()); - - let module = Module::new(&engine, test_mod)?; - - let instance = Instance::new(&mut store, &module, &[])?; - - let f = instance - .get_func(&mut store, "test") - .ok_or(anyhow::anyhow!("test function not found"))?; - - let mut returns = vec![Val::null(); 1]; - - // Winch doesn't support calling typed functions at the moment. - f.call(&mut store, &[], &mut returns)?; - - assert_eq!(returns.len(), 1); - assert_eq!(returns[0].unwrap_i32(), 42); - - Ok(()) -} - -#[test] -#[ignore] -fn compiles_with_winch_stack_arguments() -> Result<()> { - let mut c = Config::new(); - - c.strategy(Strategy::Winch); - - let engine = Engine::new(&c)?; - - // Winch only supports a very basic function signature for now while it's being developed. - let test_mod = r#" - (module (func $sum10 (param $arg_1 i32) (param $arg_2 i32) (param $arg_3 i32) (param $arg_4 i32) (param $arg_5 i32) (param $arg_6 i32) (param $arg_7 i32) (param $arg_8 i32) (param $arg_9 i32) (param $arg_10 i32) (result i32) local.get $arg_1 local.get $arg_2 @@ -73,30 +28,95 @@ fn compiles_with_winch_stack_arguments() -> Result<()> { i32.add local.get $arg_10 i32.add) + + (func $call_add (param i32 i32) (result i32) + (local.get 0) + (local.get 1) + (call $add)) + + (export "42" (func $test)) (export "sum10" (func $sum10)) + (export "call_add" (func $call_add)) ) "#; +fn add_fn(store: impl AsContextMut) -> Func { + Func::wrap(store, |a: i32, b: i32| a + b) +} + +#[test] +fn array_to_wasm() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; - let module = Module::new(&engine, test_mod)?; + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let constant = instance + .get_func(&mut store, "42") + .ok_or(anyhow::anyhow!("test function not found"))?; + let mut returns = vec![Val::null(); 1]; + constant.call(&mut store, &[], &mut returns)?; - let instance = Instance::new(&mut store, &module, &[])?; + assert_eq!(returns.len(), 1); + assert_eq!(returns[0].unwrap_i32(), 42); - let f = instance + let sum = instance .get_func(&mut store, "sum10") .ok_or(anyhow::anyhow!("sum10 function not found"))?; - let mut returns = vec![Val::null(); 1]; - - // create a new Val array with ten 1s let args = vec![Val::I32(1); 10]; - - // Winch doesn't support calling typed functions at the moment. - f.call(&mut store, &args, &mut returns)?; + sum.call(&mut store, &args, &mut returns)?; assert_eq!(returns.len(), 1); assert_eq!(returns[0].unwrap_i32(), 10); Ok(()) } + +#[test] +fn native_to_wasm() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; + let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; + + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let f = instance.get_typed_func::<(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32), i32>( + &mut store, "sum10", + )?; + + let args = (1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let result = f.call(&mut store, args)?; + + assert_eq!(result, 10); + + Ok(()) +} + +#[test] +fn wasm_to_native() -> Result<()> { + let mut c = Config::new(); + c.strategy(Strategy::Winch); + let engine = Engine::new(&c)?; + let mut store = Store::new(&engine, ()); + let module = Module::new(&engine, MODULE)?; + + let add_fn = add_fn(store.as_context_mut()); + let instance = Instance::new(&mut store, &module, &[add_fn.into()])?; + + let f = instance.get_typed_func::<(i32, i32), i32>(&mut store, "call_add")?; + + let args = (41, 1); + let result = f.call(&mut store, args)?; + + assert_eq!(result, 42); + + Ok(()) +} diff --git a/winch/codegen/Cargo.toml b/winch/codegen/Cargo.toml index 6ac3309b2881..6e1b2c4cbc65 100644 --- a/winch/codegen/Cargo.toml +++ b/winch/codegen/Cargo.toml @@ -19,6 +19,7 @@ target-lexicon = { workspace = true, features = ["std"] } cranelift-codegen = { workspace = true } regalloc2 = "0.7.0" gimli = { workspace = true } +wasmtime-environ = { workspace = true } [features] x64 = ["cranelift-codegen/x86"] diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 89a9c2860b60..87e95b9101c2 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -33,6 +33,7 @@ //! | | //! | | //! | Stack slots | +//! | + `VMContext` slot | //! | + dynamic space | //! | | //! | | @@ -78,6 +79,13 @@ pub(crate) trait ABI { /// Returns the designated scratch register. fn scratch_reg() -> Reg; + /// Returns the frame pointer register. + fn fp_reg() -> Reg; + + /// Returns the pinned register used to hold + /// the `VMContext`. + fn vmctx_reg() -> Reg; + /// Returns the callee-saved registers for the given /// calling convention. fn callee_saved_regs(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]>; diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 89e0d69492c3..02ce6b08945e 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -2,23 +2,21 @@ //! calling convention, see [ABI]. use super::CodeGenContext; use crate::{ - abi::{align_to, calculate_frame_adjustment, ABIArg, ABIResult, ABISig, ABI}, + abi::{ABIArg, ABIResult, ABISig, ABI}, masm::{CalleeKind, MacroAssembler, OperandSize}, reg::Reg, stack::Val, }; +use wasmtime_environ::FuncIndex; /// All the information needed to emit a function call. pub(crate) struct FnCall<'a> { - /// The total stack space in bytes used by the function call. - /// This amount includes the sum of: + /// The stack space consumed by the function call; that is, + /// the sum of: /// - /// 1. The amount of stack space that needs to be explicitly - /// allocated at the callsite for callee arguments that - /// go in the stack, plus any alignment. - /// 2. The amount of stack space created by saving any live + /// 1. The amount of stack space created by saving any live /// registers at the callsite. - /// 3. The amount of space used by any memory entries in the value + /// 2. The amount of space used by any memory entries in the value /// stack present at the callsite, that will be used as /// arguments for the function call. Any memory values in the /// value stack that are needed as part of the function @@ -33,17 +31,17 @@ pub(crate) struct FnCall<'a> { /// assigned); it's more efficient to track the space needed by /// those memory values and reclaim it at once. /// - /// The machine stack state that this amount is capturing, is the following: + /// The machine stack throghout the function call is as follows: /// ┌──────────────────────────────────────────────────┐ /// │ │ - /// │ │ + /// │ 1 │ /// │ Stack space created by any previous spills │ /// │ from the value stack; and which memory values │ /// │ are used as function arguments. │ /// │ │ /// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: /// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] - /// │ │ + /// │ 2 │ /// │ Stack space created by saving │ /// │ any live registers at the callsite. │ /// │ │ @@ -60,7 +58,7 @@ pub(crate) struct FnCall<'a> { /// │ │ /// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call /// - total_stack_space: u32, + call_stack_space: u32, /// The total stack space needed for the callee arguments on the /// stack, including any adjustments to the function's frame and /// aligned to to the required ABI alignment. @@ -87,7 +85,6 @@ impl<'a> FnCall<'a> { /// having saved any live registers, so that we can account for /// any pushes generated by register spilling. pub fn new( - abi: &A, callee_sig: &'a ABISig, context: &mut CodeGenContext, masm: &mut M, @@ -135,34 +132,52 @@ impl<'a> FnCall<'a> { } }; - let delta = calculate_frame_adjustment( - masm.sp_offset(), - abi.arg_base_offset() as u32, - abi.call_stack_align() as u32, - ); - - let arg_stack_space = align_to(arg_stack_space + delta, abi.call_stack_align() as u32); Self { abi_sig: &callee_sig, arg_stack_space, - total_stack_space: (spilled_regs * ::word_bytes()) - + (memory_values * ::word_bytes()) - + arg_stack_space, + call_stack_space: (spilled_regs * ::word_bytes()) + + (memory_values * ::word_bytes()), sp_offset_at_callsite, } } - /// Emit the function call. - pub fn emit( + /// Emit a direct function call, to a locally defined function. + pub fn direct( + &self, + masm: &mut M, + context: &mut CodeGenContext, + callee: FuncIndex, + ) { + let reserved_stack = masm.call(16, 16, self.arg_stack_space, |masm| { + self.assign_args(context, masm, ::scratch_reg()); + CalleeKind::Direct(callee.as_u32()) + }); + self.post_call::(masm, context, reserved_stack); + } + + /// Emit an indirect function call, using a raw address. + pub fn indirect( + &self, + masm: &mut M, + context: &mut CodeGenContext, + addr: M::Address, + ) { + let reserved_stack = masm.call(16, 16, self.arg_stack_space, |masm| { + let scratch = ::scratch_reg(); + self.assign_args(context, masm, scratch); + masm.load(addr, scratch, OperandSize::S64); + CalleeKind::Indirect(scratch) + }); + self.post_call::(masm, context, reserved_stack); + } + + fn post_call( &self, masm: &mut M, context: &mut CodeGenContext, - callee: u32, + size: u32, ) { - masm.reserve_stack(self.arg_stack_space); - self.assign_args(context, masm, ::scratch_reg()); - masm.call(CalleeKind::Direct(callee)); - masm.free_stack(self.total_stack_space); + masm.free_stack(self.call_stack_space + size); context.drop_last(self.abi_sig.params.len()); // The stack pointer at the end of the function call // cannot be less than what it was when starting the diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index c3c5303f6b32..fc76a3ea26e2 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,11 +1,15 @@ use crate::{ abi::{ABISig, ABI}, masm::{MacroAssembler, OperandSize}, + stack::Val, CallingConvention, }; use anyhow::Result; use call::FnCall; -use wasmparser::{BinaryReader, FuncValidator, ValType, ValidatorResources, VisitOperator}; +use wasmparser::{ + BinaryReader, FuncType, FuncValidator, ValType, ValidatorResources, VisitOperator, +}; +use wasmtime_environ::{FuncIndex, VMOffsets}; mod context; pub(crate) use context::*; @@ -33,6 +37,9 @@ where /// A reference to the current ABI. pub abi: &'a A, + + /// Offsets used with the VM context pointer. + vmoffsets: &'a VMOffsets, } impl<'a, A, M> CodeGen<'a, A, M> @@ -46,6 +53,7 @@ where context: CodeGenContext<'a>, env: &'a dyn FuncEnv, sig: ABISig, + vmoffsets: &'a VMOffsets, ) -> Self { Self { sig, @@ -53,6 +61,7 @@ where masm, abi, env, + vmoffsets, } } @@ -89,6 +98,12 @@ where &mut self.context.regalloc, ); + // Save the vmctx pointer to its local slot in case we need to reload it + // at any point. + let vmctx_addr = self.masm.local_address(&self.context.frame.vmctx_slot); + self.masm + .store(::vmctx_reg().into(), vmctx_addr, OperandSize::S64); + while !body.eof() { let offset = body.original_position(); body.visit_operator(&mut ValidateThenVisit(validator.visitor(offset), self))??; @@ -121,16 +136,48 @@ where } /// Emit a direct function call. - pub fn emit_call(&mut self, index: u32) { - let callee = self.env.callee_from_index(index); - if callee.import { - // TODO: Only locally defined functions for now. - unreachable!() + pub fn emit_call(&mut self, index: FuncIndex) { + let callee = self.env.callee_from_index(index.as_u32()); + let (sig, callee_addr): (ABISig, Option<::Address>) = if callee.import + { + let mut params = vec![ValType::I64, ValType::I64]; + params.extend_from_slice(&callee.ty.params()); + // TODO Remove to_owned. + let sig = FuncType::new(params, callee.ty.results().to_owned()); + + let caller_vmctx = ::vmctx_reg(); + let callee_vmctx = self.context.any_gpr(self.masm); + let callee_vmctx_offset = self.vmoffsets.vmctx_vmfunction_import_vmctx(index); + let callee_vmctx_addr = self.masm.address_at_reg(caller_vmctx, callee_vmctx_offset); + // TODO Remove harcoded operand size. + self.masm + .load(callee_vmctx_addr, callee_vmctx, OperandSize::S64); + + let callee_body_offset = self.vmoffsets.vmctx_vmfunction_import_wasm_call(index); + let callee_addr = self.masm.address_at_reg(caller_vmctx, callee_body_offset); + + // Put the callee / caller vmctx at the start of the + // range of the stack so that they are used as first + // and second arguments. + let stack = &mut self.context.stack; + let location = stack.len() - (sig.params().len() - 2); + stack.insert(location as usize, Val::reg(caller_vmctx)); + stack.insert(location as usize, Val::reg(callee_vmctx)); + ( + self.abi.sig(&sig, &CallingConvention::Default), + Some(callee_addr), + ) + } else { + (self.abi.sig(&callee.ty, &CallingConvention::Default), None) + }; + + let fncall = FnCall::new::(&sig, &mut self.context, self.masm); + + if let Some(addr) = callee_addr { + fncall.indirect::(self.masm, &mut self.context, addr); + } else { + fncall.direct::(self.masm, &mut self.context, index); } - - let sig = self.abi.sig(&callee.ty, &CallingConvention::Default); - let fncall = FnCall::new(self.abi, &sig, &mut self.context, self.masm); - fncall.emit::(self.masm, &mut self.context, index); } /// Emit the usual function end instruction sequence. @@ -141,10 +188,6 @@ where } fn spill_register_arguments(&mut self) { - // TODO - // Revisit this once the implicit VMContext argument is introduced; - // when that happens the mapping between local slots and abi args - // is not going to be symmetric. self.sig .params .iter() diff --git a/winch/codegen/src/frame/mod.rs b/winch/codegen/src/frame/mod.rs index 5e59bf68cfc9..8054a2989faa 100644 --- a/winch/codegen/src/frame/mod.rs +++ b/winch/codegen/src/frame/mod.rs @@ -36,7 +36,7 @@ impl DefinedLocals { validator: &mut FuncValidator, ) -> Result { let mut next_stack = 0; - // The first 32 bits of a WASM binary function describe the number of locals + // The first 32 bits of a WASM binary function describe the number of locals. let local_count = reader.read_var_u32()?; let mut slots: Locals = Default::default(); @@ -74,6 +74,9 @@ pub(crate) struct Frame { /// Locals get calculated when allocating a frame and are readonly /// through the function compilation lifetime. pub locals: Locals, + + /// The offset to the slot containing the `VMContext`. + pub vmctx_slot: LocalSlot, } impl Frame { @@ -90,14 +93,15 @@ impl Frame { .map(|l| LocalSlot::new(l.ty, l.offset + defined_locals_start)), ); - let locals_size = align_to( - defined_locals_start + defined_locals.stack_size, - abi.stack_align().into(), - ); + let vmctx_slots_size = ::word_bytes(); + let vmctx_offset = defined_locals_start + defined_locals.stack_size + vmctx_slots_size; + + let locals_size = align_to(vmctx_offset, abi.stack_align().into()); Ok(Self { locals, locals_size, + vmctx_slot: LocalSlot::i64(vmctx_offset), defined_locals_range: DefinedLocalsRange( defined_locals_start..defined_locals.stack_size, ), diff --git a/winch/codegen/src/isa/aarch64/abi.rs b/winch/codegen/src/isa/aarch64/abi.rs index 3644ecc7956b..8c451cd32450 100644 --- a/winch/codegen/src/isa/aarch64/abi.rs +++ b/winch/codegen/src/isa/aarch64/abi.rs @@ -87,6 +87,14 @@ impl ABI for Aarch64ABI { todo!() } + fn fp_reg() -> Reg { + regs::fp() + } + + fn vmctx_reg() -> Reg { + todo!() + } + fn callee_saved_regs(_call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> { regs::callee_saved() } diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index d28c1bb267c5..47df97d7b57d 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -136,7 +136,13 @@ impl Masm for MacroAssembler { self.asm.str(src, dst, size); } - fn call(&mut self, _callee: CalleeKind) { + fn call( + &mut self, + _alignment: u32, + _addend: u32, + _stack_args_size: u32, + _load_callee: impl FnMut(&mut Self) -> CalleeKind, + ) -> u32 { todo!() } @@ -195,7 +201,7 @@ impl Masm for MacroAssembler { self.sp_offset } - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address { + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address { Address::offset(reg, offset as i64) } } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index c7ca90b3d582..2cbbe7513bae 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -8,7 +8,7 @@ use crate::{ regalloc::RegAlloc, regset::RegSet, stack::Stack, - FuncEnv, + FuncEnv, TrampolineKind, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -17,6 +17,7 @@ use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use masm::MacroAssembler as Aarch64Masm; use target_lexicon::Triple; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; +use wasmtime_environ::VMOffsets; mod abi; mod address; @@ -85,6 +86,7 @@ impl TargetIsa for Aarch64 { &self, sig: &FuncType, body: &FunctionBody, + vmoffsets: &VMOffsets, env: &dyn FuncEnv, validator: &mut FuncValidator, ) -> Result> { @@ -99,7 +101,7 @@ impl TargetIsa for Aarch64 { // TODO: Add floating point bitmask let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch()); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig); + let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig, vmoffsets); codegen.emit(&mut body, validator)?; Ok(masm.finalize()) @@ -116,7 +118,11 @@ impl TargetIsa for Aarch64 { 32 } - fn host_to_wasm_trampoline(&self, _ty: &FuncType) -> Result> { + fn compile_trampoline( + &self, + _ty: &FuncType, + _kind: TrampolineKind, + ) -> Result> { todo!() } } diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index f117e740c57a..396e5d1205d1 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -1,3 +1,4 @@ +use crate::{FuncEnv, TrampolineKind}; use anyhow::{anyhow, Result}; use core::fmt::Formatter; use cranelift_codegen::isa::{CallConv, IsaBuilder}; @@ -9,8 +10,7 @@ use std::{ }; use target_lexicon::{Architecture, Triple}; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; - -use crate::FuncEnv; +use wasmtime_environ::VMOffsets; #[cfg(feature = "x64")] pub(crate) mod x64; @@ -149,6 +149,7 @@ pub trait TargetIsa: Send + Sync { &self, sig: &FuncType, body: &FunctionBody, + vmoffsets: &VMOffsets, env: &dyn FuncEnv, validator: &mut FuncValidator, ) -> Result>; @@ -186,8 +187,21 @@ pub trait TargetIsa: Send + Sync { /// See `cranelift_codegen::isa::TargetIsa::function_alignment`. fn function_alignment(&self) -> u32; - /// Generate a trampoline that can be used to call a wasm function from wasmtime. - fn host_to_wasm_trampoline(&self, ty: &FuncType) -> Result>; + /// Compile a trampoline kind. + /// + /// This function, internally dispatches to the right trampoline to emit + /// depending on the `kind` paramter. + fn compile_trampoline( + &self, + ty: &FuncType, + kind: TrampolineKind, + ) -> Result>; + + /// Returns the pointer width of the ISA in bytes. + fn pointer_bytes(&self) -> u8 { + let width = self.triple().pointer_width().unwrap(); + width.bytes() + } } impl Debug for &dyn TargetIsa { diff --git a/winch/codegen/src/isa/x64/abi.rs b/winch/codegen/src/isa/x64/abi.rs index 367100d77a9f..7f6553ca4a33 100644 --- a/winch/codegen/src/isa/x64/abi.rs +++ b/winch/codegen/src/isa/x64/abi.rs @@ -121,6 +121,14 @@ impl ABI for X64ABI { regs::scratch() } + fn fp_reg() -> Reg { + regs::rbp() + } + + fn vmctx_reg() -> Reg { + regs::vmctx() + } + fn callee_saved_regs(call_conv: &CallingConvention) -> SmallVec<[Reg; 9]> { regs::callee_saved(call_conv) } diff --git a/winch/codegen/src/isa/x64/address.rs b/winch/codegen/src/isa/x64/address.rs index 4a3e26ebc341..adaf902a4c4a 100644 --- a/winch/codegen/src/isa/x64/address.rs +++ b/winch/codegen/src/isa/x64/address.rs @@ -10,7 +10,7 @@ pub(crate) enum Address { } impl Address { - /// Create an offset + /// Create an offset. pub fn offset(base: Reg, offset: u32) -> Self { Self::Offset { base, offset } } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 0dab09a20220..66af86a15713 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -4,7 +4,11 @@ use super::{ regs::{self, rbp, rsp}, }; use crate::masm::{DivKind, MacroAssembler as Masm, OperandSize, RegImm, RemKind}; -use crate::{abi::LocalSlot, codegen::CodeGenContext, stack::Val}; +use crate::{ + abi::{align_to, calculate_frame_adjustment, LocalSlot}, + codegen::CodeGenContext, + stack::Val, +}; use crate::{isa::reg::Reg, masm::CalleeKind}; use cranelift_codegen::{isa::x64::settings as x64_settings, settings, Final, MachBufferFinalized}; @@ -114,8 +118,20 @@ impl Masm for MacroAssembler { self.decrement_sp(8); } - fn call(&mut self, callee: CalleeKind) { + fn call( + &mut self, + alignment: u32, + addend: u32, + stack_args_size: u32, + mut load_callee: impl FnMut(&mut Self) -> CalleeKind, + ) -> u32 { + let delta = calculate_frame_adjustment(self.sp_offset(), addend, alignment); + let aligned_args_size = align_to(stack_args_size, alignment); + let total_stack = delta + aligned_args_size; + self.reserve_stack(total_stack); + let callee = load_callee(self); self.asm.call(callee); + total_stack } fn load(&mut self, src: Address, dst: Reg, size: OperandSize) { @@ -237,7 +253,7 @@ impl Masm for MacroAssembler { self.asm.finalize() } - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address { + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address { Address::offset(reg, offset) } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 0dd3867ef5b5..0fd3e7b8c7b6 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -8,18 +8,19 @@ use crate::isa::{x64::masm::MacroAssembler as X64Masm, CallingConvention}; use crate::masm::MacroAssembler; use crate::regalloc::RegAlloc; use crate::stack::Stack; -use crate::trampoline::Trampoline; use crate::FuncEnv; use crate::{ isa::{Builder, TargetIsa}, regset::RegSet, }; +use crate::{Trampoline, TrampolineKind}; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; use cranelift_codegen::{isa::x64::settings as x64_settings, Final, MachBufferFinalized}; use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use target_lexicon::Triple; use wasmparser::{FuncType, FuncValidator, FunctionBody, ValidatorResources}; +use wasmtime_environ::VMOffsets; use self::regs::ALL_GPR; @@ -90,6 +91,7 @@ impl TargetIsa for X64 { &self, sig: &FuncType, body: &FunctionBody, + vmoffsets: &VMOffsets, env: &dyn FuncEnv, validator: &mut FuncValidator, ) -> Result> { @@ -104,7 +106,7 @@ impl TargetIsa for X64 { // TODO Add in floating point bitmask let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), regs::scratch()); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig); + let mut codegen = CodeGen::new(&mut masm, &abi, codegen_context, env, abi_sig, vmoffsets); codegen.emit(&mut body, validator)?; @@ -120,15 +122,31 @@ impl TargetIsa for X64 { 16 } - fn host_to_wasm_trampoline(&self, ty: &FuncType) -> Result> { + fn compile_trampoline( + &self, + ty: &FuncType, + kind: TrampolineKind, + ) -> Result> { + use TrampolineKind::*; + let abi = abi::X64ABI::default(); let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone()); let call_conv = self.wasmtime_call_conv(); - let mut trampoline = - Trampoline::new(&mut masm, &abi, regs::scratch(), regs::argv(), &call_conv); - - trampoline.emit_host_to_wasm(ty); + let mut trampoline = Trampoline::new( + &mut masm, + &abi, + regs::scratch(), + regs::argv(), + &call_conv, + self.pointer_bytes(), + ); + + match kind { + ArrayToWasm(idx) => trampoline.emit_array_to_wasm(ty, idx)?, + NativeToWasm(idx) => trampoline.emit_native_to_wasm(ty, idx)?, + WasmToNative => trampoline.emit_wasm_to_native(ty)?, + } Ok(masm.finalize()) } diff --git a/winch/codegen/src/isa/x64/regs.rs b/winch/codegen/src/isa/x64/regs.rs index 5d43b58d3728..23f5228c1f45 100644 --- a/winch/codegen/src/isa/x64/regs.rs +++ b/winch/codegen/src/isa/x64/regs.rs @@ -51,18 +51,25 @@ pub(crate) fn r9() -> Reg { pub(crate) fn r10() -> Reg { gpr(ENC_R10) } -pub(crate) fn r11() -> Reg { - gpr(ENC_R11) -} pub(crate) fn r12() -> Reg { gpr(ENC_R12) } pub(crate) fn r13() -> Reg { gpr(ENC_R13) } +/// Used as a pinned register to hold +/// the `VMContext`. +/// Non-allocatable in Winch's default +/// ABI, and callee-saved in SystemV and +/// Fastcall. pub(crate) fn r14() -> Reg { gpr(ENC_R14) } + +pub(crate) fn vmctx() -> Reg { + r14() +} + pub(crate) fn rbx() -> Reg { gpr(ENC_RBX) } @@ -78,6 +85,13 @@ pub(crate) fn rbp() -> Reg { gpr(ENC_RBP) } +/// Used as the scratch register. +/// Non-allocatable in Winch's default +/// ABI. +pub(crate) fn r11() -> Reg { + gpr(ENC_R11) +} + pub(crate) fn scratch() -> Reg { r11() } @@ -151,7 +165,7 @@ pub(crate) fn xmm15() -> Reg { const GPR: u32 = 16; const ALLOCATABLE_GPR: u32 = (1 << GPR) - 1; -const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11); +const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11) | (1 << ENC_R14); /// Bitmask to represent the available general purpose registers. pub(crate) const ALL_GPR: u32 = ALLOCATABLE_GPR & !NON_ALLOCATABLE_GPR; diff --git a/winch/codegen/src/lib.rs b/winch/codegen/src/lib.rs index f01bd63a400f..744e9062ef75 100644 --- a/winch/codegen/src/lib.rs +++ b/winch/codegen/src/lib.rs @@ -17,4 +17,6 @@ mod regalloc; mod regset; mod stack; mod trampoline; +pub use trampoline::TrampolineKind; +use trampoline::*; mod visitor; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 31e968478850..f713bf3fc8d6 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -39,6 +39,7 @@ pub(crate) enum RegImm { Imm(i64), } +#[derive(Clone)] pub(crate) enum CalleeKind { /// A function call to a raw address. Indirect(Reg), @@ -83,7 +84,7 @@ impl From for RegImm { pub(crate) trait MacroAssembler { /// The addressing mode. - type Address; + type Address: Copy; /// Emit the function prologue. fn prologue(&mut self); @@ -109,11 +110,18 @@ pub(crate) trait MacroAssembler { /// current position of the stack pointer (e.g. [sp + offset]. fn address_at_sp(&self, offset: u32) -> Self::Address; - /// Construct an address that is relative to the given register. - fn address_from_reg(&self, reg: Reg, offset: u32) -> Self::Address; + /// Construct an address that is absolute to the current position + /// of the given register. + fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address; /// Emit a function call to either a local or external function. - fn call(&mut self, callee: CalleeKind); + fn call( + &mut self, + alignment: u32, + addend: u32, + stack_args_size: u32, + f: impl FnMut(&mut Self) -> CalleeKind, + ) -> u32; /// Get stack pointer offset. fn sp_offset(&self) -> u32; diff --git a/winch/codegen/src/regalloc.rs b/winch/codegen/src/regalloc.rs index 7640561d7133..d09620826cf7 100644 --- a/winch/codegen/src/regalloc.rs +++ b/winch/codegen/src/regalloc.rs @@ -46,6 +46,12 @@ impl RegAlloc { where F: FnMut(&mut RegAlloc), { + // If the scratch register is explicitly requested + // just return it, it's usage should never cause spills. + if named == self.scratch { + return named; + } + self.regset.gpr(named).unwrap_or_else(|| { spill(self); self.regset @@ -56,6 +62,9 @@ impl RegAlloc { /// Mark a particular general purpose register as available. pub fn free_gpr(&mut self, reg: Reg) { - self.regset.free_gpr(reg); + // Never mark the designated scratch register as allocatable. + if reg != self.scratch { + self.regset.free_gpr(reg); + } } } diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index d563d91e8412..4d817f895455 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -117,6 +117,11 @@ impl Stack { } } + /// Insert a new value at the specified index. + pub fn insert(&mut self, at: usize, val: Val) { + self.inner.insert(at, val); + } + /// Get the length of the stack. pub fn len(&self) -> usize { self.inner.len() diff --git a/winch/codegen/src/trampoline.rs b/winch/codegen/src/trampoline.rs index 3d579efe195f..b35d4ca84551 100644 --- a/winch/codegen/src/trampoline.rs +++ b/winch/codegen/src/trampoline.rs @@ -1,15 +1,41 @@ +//! Trampoline implementation for Winch. +//! +//! This module contains all the necessary pieces to emit the various +//! trampolines required by Wasmtime to call JIT code. +// +// TODO +// +// * Remove the usage of hardcoded operand sizes (`OperandSize::S64`) when +// loading/storing the VM context pointer. The real value of the operand size +// and VM context type should be derived from the ABI's pointer size. This is +// going to be relevant once 32-bit architectures are supported. +// +// * Save the fp and pc for fast stack walking. use crate::{ - abi::{align_to, calculate_frame_adjustment, ABIArg, ABIResult, ABI}, + abi::{ABIArg, ABIParams, ABIResult, ABISig, ABI}, isa::CallingConvention, masm::{CalleeKind, MacroAssembler, OperandSize, RegImm}, reg::Reg, }; +use anyhow::{anyhow, Result}; use smallvec::SmallVec; use std::mem; use wasmparser::{FuncType, ValType}; +use wasmtime_environ::{FuncIndex, PtrSize}; -/// A trampoline to provide interopt between different calling -/// conventions. +/// The supported trampoline kinds. +/// See https://github.com/bytecodealliance/rfcs/blob/main/accepted/tail-calls.md#new-trampolines-and-vmcallercheckedanyfunc-changes +/// for more details. +pub enum TrampolineKind { + /// Calling from native to Wasm, using the array calling convention. + ArrayToWasm(FuncIndex), + /// Calling from native to Wasm. + NativeToWasm(FuncIndex), + /// Calling from Wasm to native. + WasmToNative, +} + +/// The main trampoline abstraction. pub(crate) struct Trampoline<'a, A, M> where A: ABI, @@ -26,12 +52,15 @@ where /// callee, so it can only be used after the callee-saved /// registers are on the stack. alloc_scratch_reg: Reg, - /// Registers to be saved as part of the trampoline's prologue and epilogue. + /// Registers to be saved as part of the trampoline's prologue + /// and to be restored as part of the trampoline's epilogue. callee_saved_regs: SmallVec<[Reg; 9]>, /// The calling convention used by the trampoline, /// which is the Wasmtime variant of the system ABI's /// calling convention. call_conv: &'a CallingConvention, + /// The pointer size of the current ISA. + pointer_size: u8, } impl<'a, A, M> Trampoline<'a, A, M> @@ -46,6 +75,7 @@ where scratch_reg: Reg, alloc_scratch_reg: Reg, call_conv: &'a CallingConvention, + pointer_size: u8, ) -> Self { Self { masm, @@ -54,147 +84,320 @@ where alloc_scratch_reg, callee_saved_regs: ::callee_saved_regs(call_conv), call_conv, + pointer_size, } } - /// Emit the host to wasm trampoline. - pub fn emit_host_to_wasm(&mut self, ty: &FuncType) { - // The host to wasm trampoline is currently hard coded (see vmcontext.rs - // in the wasmtime-runtime crate, `VMArrayCallFunction`). The first two - // parameters are VMContexts (not used at this time). The third - // parameter is the function pointer to call. The fourth parameter is - // an address to storage space for both the return value and the - // arguments to the function. - let trampoline_ty = FuncType::new( + /// Emit an array-to-wasm trampoline. + pub fn emit_array_to_wasm(&mut self, ty: &FuncType, callee_index: FuncIndex) -> Result<()> { + let native_ty = FuncType::new( vec![ValType::I64, ValType::I64, ValType::I64, ValType::I64], vec![], ); - let trampoline_sig = self.abi.sig(&trampoline_ty, self.call_conv); + let native_sig = self.native_sig(&native_ty); + let wasm_sig = self.wasm_sig(ty); - // Hard-coding the size in bytes of the trampoline arguments - // since it's static, based on the current signature we should - // always have 4 arguments, each of which is 8 bytes. - let trampoline_arg_size = 32; + let val_ptr = &native_sig.params[2] + .get_reg() + .map(RegImm::reg) + .ok_or_else(|| anyhow!("Expected value pointer to be in a register"))?; - let callee_sig = self.abi.sig(ty, &CallingConvention::Default); + self.prologue_with_callee_saved(); - let val_ptr = if let ABIArg::Reg { reg, ty: _ty } = &trampoline_sig.params[3] { - Ok(RegImm::reg(*reg)) - } else { - Err(anyhow::anyhow!("Expected the val ptr to be in a register")) - } - .unwrap(); + // Get the VM context pointer and move it to the designated pinned + // register. + let vmctx_ptr = Self::vmctx(&native_sig.params)?; + self.masm + .mov(vmctx_ptr, ::vmctx_reg().into(), OperandSize::S64); - self.prologue(); + let (offsets, spill_size) = self.spill(&native_sig.params); - let mut trampoline_arg_offsets: [u32; 4] = [0; 4]; + let val_ptr_offset = offsets[2]; - trampoline_sig - .params - .iter() - .enumerate() - .for_each(|(i, param)| { - if let ABIArg::Reg { reg, ty: _ty } = param { - let offset = self.masm.push(*reg); - trampoline_arg_offsets[i] = offset; - } - }); + // Call the function that was passed into the trampoline. + let allocated_stack = self.masm.call( + self.abi.call_stack_align().into(), + self.abi.arg_base_offset().into(), + wasm_sig.stack_bytes, + |masm| { + masm.mov(*val_ptr, self.scratch_reg.into(), OperandSize::S64); + Self::assign_args_from_array( + masm, + &wasm_sig, + self.scratch_reg, + self.alloc_scratch_reg, + ); + CalleeKind::Direct(callee_index.as_u32()) + }, + ); + + self.masm.free_stack(allocated_stack); + + // Move the val ptr back into the scratch register so we can + // load the return values. + self.masm.load( + self.masm.address_from_sp(val_ptr_offset), + self.scratch_reg, + OperandSize::S64, + ); + + // Move the return values into the value ptr. We are only + // supporting a single return value at this time. + let ABIResult::Reg { reg, ty } = &wasm_sig.result; + if let Some(ty) = ty { + self.masm.store( + RegImm::reg(*reg), + self.masm.address_at_reg(self.scratch_reg, 0), + (*ty).into(), + ); + } + + self.epilogue_with_callee_saved_restore(spill_size); + Ok(()) + } - let val_ptr_offset = trampoline_arg_offsets[3]; - let func_ptr_offset = trampoline_arg_offsets[2]; + /// Emit a native-to-wasm trampoline. + pub fn emit_native_to_wasm(&mut self, ty: &FuncType, callee_index: FuncIndex) -> Result<()> { + let native_sig = self.native_sig(&ty); + let wasm_sig = self.wasm_sig(&ty); + let vmctx_ptr = Self::vmctx(&native_sig.params)?; + self.prologue_with_callee_saved(); + // Move the VM context pointer to the designated pinned register. self.masm - .mov(val_ptr, RegImm::reg(self.scratch_reg), OperandSize::S64); - - // How much we need to adjust the stack pointer by to account - // for the alignment required by the ISA. - let delta = calculate_frame_adjustment( - self.masm.sp_offset(), - self.abi.arg_base_offset() as u32, - self.abi.call_stack_align() as u32, + .mov(vmctx_ptr, ::vmctx_reg().into(), OperandSize::S64); + + let (offsets, spill_size) = self.spill(&native_sig.params); + + let reserved_stack = self.masm.call( + self.abi.call_stack_align().into(), + self.abi.arg_base_offset().into(), + wasm_sig.stack_bytes, + |masm| { + Self::assign_args( + masm, + &wasm_sig.params, + &native_sig.params[2..], + &offsets[2..], + self.scratch_reg, + self.abi.arg_base_offset().into(), + ); + CalleeKind::Direct(callee_index.as_u32()) + }, ); - // The total amount of stack space we need to reserve for the - // arguments. - let total_arg_stack_space = align_to( - callee_sig.stack_bytes + delta, - self.abi.call_stack_align() as u32, + self.masm.free_stack(reserved_stack); + self.epilogue_with_callee_saved_restore(spill_size); + + Ok(()) + } + + /// Emit a wasm-to-native trampoline. + pub fn emit_wasm_to_native(&mut self, ty: &FuncType) -> Result<()> { + let mut params = Self::callee_and_caller_vmctx(); + params.extend_from_slice(ty.params()); + + let func_ty = FuncType::new(params, ty.results().to_owned()); + let wasm_sig = self.wasm_sig(&func_ty); + let native_sig = self.native_sig(ty); + + self.prologue(); + let (offsets, spill_size) = self.spill(&wasm_sig.params); + + let reserved_stack = self.masm.call( + self.abi.call_stack_align().into(), + self.abi.arg_base_offset().into(), + native_sig.stack_bytes, + |masm| { + // Move the VM context into one of the scratch registers. + let vmctx = Self::vmctx(&wasm_sig.params).unwrap(); + masm.mov( + vmctx.into(), + self.alloc_scratch_reg.into(), + OperandSize::S64, + ); + + Self::assign_args( + masm, + &native_sig.params, + &wasm_sig.params, + &offsets, + self.scratch_reg, + self.abi.arg_base_offset().into(), + ); + + let body_offset = self.pointer_size.vmnative_call_host_func_context_func_ref() + + self.pointer_size.vm_func_ref_native_call(); + let callee_addr = masm.address_at_reg(self.alloc_scratch_reg, body_offset.into()); + masm.load(callee_addr, self.scratch_reg, OperandSize::S64); + + CalleeKind::Indirect(self.scratch_reg) + }, ); - self.masm.reserve_stack(total_arg_stack_space); + self.masm.free_stack(reserved_stack); + self.epilogue(spill_size); + Ok(()) + } + + /// Perfom argument assignment, translating between + /// caller and callee calling conventions. + fn assign_args( + masm: &mut M, + callee_params: &[ABIArg], + caller_params: &[ABIArg], + caller_stack_offsets: &[u32], + scratch: Reg, + arg_base_offset: u32, + ) { + assert!(callee_params.len() == caller_params.len()); + let fp = ::fp_reg(); + let mut offset_index = 0; + + callee_params + .iter() + .zip(caller_params) + .for_each( + |(callee_param, caller_param)| match (callee_param, caller_param) { + (ABIArg::Reg { ty, reg: dst }, ABIArg::Reg { .. }) => { + let offset = caller_stack_offsets[offset_index]; + let addr = masm.address_from_sp(offset); + masm.load(addr, *dst, (*ty).into()); + offset_index += 1; + } + + (ABIArg::Stack { ty, offset }, ABIArg::Reg { .. }) => { + let spill_offset = caller_stack_offsets[offset_index]; + let addr = masm.address_from_sp(spill_offset); + masm.load(addr, scratch, (*ty).into()); + + let arg_addr = masm.address_at_sp(*offset); + masm.store(scratch.into(), arg_addr, (*ty).into()); + offset_index += 1; + } + + (ABIArg::Reg { ty, reg: dst }, ABIArg::Stack { ty: _, offset }) => { + let addr = masm.address_at_reg(fp, arg_base_offset + offset); + masm.load(addr, *dst, (*ty).into()); + } + + ( + ABIArg::Stack { + ty, + offset: callee_offset, + }, + ABIArg::Stack { + offset: caller_offset, + .. + }, + ) => { + let addr = masm.address_at_reg(fp, arg_base_offset + caller_offset); + masm.load(addr, scratch, (*ty).into()); + + let arg_addr = masm.address_at_sp(*callee_offset); + masm.store(scratch.into(), arg_addr, (*ty).into()); + } + }, + ) + } + + /// Get the type of the caller and callee VM contexts. + fn callee_and_caller_vmctx() -> Vec { + vec![ValType::I64, ValType::I64] + } + + /// Returns a signature using the system's calling convention. + fn native_sig(&self, ty: &FuncType) -> ABISig { + let mut params = Self::callee_and_caller_vmctx(); + params.extend_from_slice(ty.params()); + let native_type = FuncType::new(params, ty.results().to_owned()); + + self.abi.sig(&native_type, self.call_conv) + } + + /// Returns a signature using the Winch's default calling convention. + fn wasm_sig(&self, ty: &FuncType) -> ABISig { + self.abi.sig(ty, &CallingConvention::Default) + } + + /// Returns the register containing the VM context pointer. + fn vmctx(params: &ABIParams) -> Result { + params[0] + .get_reg() + .map(RegImm::reg) + .ok_or_else(|| anyhow!("Expected vm context pointer to be in a register")) + } + + /// Performs a spill of the register params. + fn spill(&mut self, params: &ABIParams) -> (SmallVec<[u32; 6]>, u32) { + let mut offsets = SmallVec::new(); + let mut spilled = 0; + params.iter().for_each(|param| { + if let Some(reg) = param.get_reg() { + let offset = self.masm.push(reg); + offsets.push(offset); + spilled += 1; + } + }); + + // The stack size for the spill, calculated + // from the number of spilled register times + // the size of each push (8 bytes). + let size = spilled * ::word_bytes(); + + (offsets, size) + } + + /// Assigns arguments for the callee, loading them from a register. + fn assign_args_from_array(masm: &mut M, callee_sig: &ABISig, values_reg: Reg, scratch: Reg) { // The max size a value can be when reading from the params // memory location. let value_size = mem::size_of::(); - callee_sig.params.iter().enumerate().for_each(|(i, param)| { let value_offset = (i * value_size) as u32; match param { - ABIArg::Reg { reg, ty } => self.masm.load( - self.masm.address_from_reg(self.scratch_reg, value_offset), + ABIArg::Reg { reg, ty } => masm.load( + masm.address_at_reg(values_reg, value_offset), *reg, (*ty).into(), ), ABIArg::Stack { offset, ty } => { - self.masm.load( - self.masm.address_from_reg(self.scratch_reg, value_offset), - self.alloc_scratch_reg, + masm.load( + masm.address_at_reg(values_reg, value_offset), + scratch, (*ty).into(), ); - self.masm.store( - RegImm::reg(self.alloc_scratch_reg), - self.masm.address_at_sp(*offset), + masm.store( + RegImm::reg(scratch), + masm.address_at_sp(*offset), (*ty).into(), ); } } }); - - // Move the function pointer from it's stack location into a - // scratch register. - self.masm.load( - self.masm.address_from_sp(func_ptr_offset), - self.scratch_reg, - OperandSize::S64, - ); - - // Call the function that was passed into the trampoline. - self.masm.call(CalleeKind::Indirect(self.scratch_reg)); - - self.masm.free_stack(total_arg_stack_space); - - // Move the val ptr back into the scratch register so we can - // load the return values. - self.masm.load( - self.masm.address_from_sp(val_ptr_offset), - self.scratch_reg, - OperandSize::S64, - ); - - // Move the return values into the value ptr. We are only - // supporting a single return value at this time. - let ABIResult::Reg { reg, ty } = &callee_sig.result; - self.masm.store( - RegImm::reg(*reg), - self.masm.address_from_reg(self.scratch_reg, 0), - (*ty).unwrap().into(), - ); - self.epilogue(trampoline_arg_size); } /// The trampoline's prologue. fn prologue(&mut self) { self.masm.prologue(); + } + + /// Similar to [Trampoline::prologue], but saves + /// callee-saved registers. + fn prologue_with_callee_saved(&mut self) { + self.masm.prologue(); // Save any callee-saved registers. for r in &self.callee_saved_regs { self.masm.push(*r); } } - /// The trampoline's epilogue. - fn epilogue(&mut self, arg_size: u32) { + /// Similar to [Trampoline::epilogue], but restores + /// callee-saved registers. + fn epilogue_with_callee_saved_restore(&mut self, arg_size: u32) { // Free the stack space allocated by pushing the trampoline arguments. self.masm.free_stack(arg_size); // Restore the callee-saved registers. @@ -203,4 +406,11 @@ where } self.masm.epilogue(0); } + + /// The trampoline's epilogue. + fn epilogue(&mut self, arg_size: u32) { + // Free the stack space allocated by pushing the trampoline arguments. + self.masm.free_stack(arg_size); + self.masm.epilogue(0); + } } diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index ac872e46052e..f5ce67c54e02 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -10,6 +10,7 @@ use crate::masm::{DivKind, MacroAssembler, OperandSize, RegImm, RemKind}; use crate::stack::Val; use wasmparser::ValType; use wasmparser::VisitOperator; +use wasmtime_environ::FuncIndex; /// A macro to define unsupported WebAssembly operators. /// @@ -198,7 +199,7 @@ where } fn visit_call(&mut self, index: u32) { - self.emit_call(index); + self.emit_call(FuncIndex::from_u32(index)); } wasmparser::for_each_operator!(def_unsupported); diff --git a/winch/environ/src/lib.rs b/winch/environ/src/lib.rs index 66f9e4c4712f..81848603f457 100644 --- a/winch/environ/src/lib.rs +++ b/winch/environ/src/lib.rs @@ -5,7 +5,7 @@ use wasmparser::types::Types; use wasmtime_environ::{FuncIndex, Module}; -use winch_codegen::{self, Callee, TargetIsa}; +use winch_codegen::{self, Callee}; /// Function environment containing module and runtime specific /// information. @@ -14,8 +14,6 @@ pub struct FuncEnv<'a> { pub module: &'a Module, /// Type information about a module, once it has been validated. pub types: &'a Types, - /// The current ISA. - pub isa: &'a Box, } impl<'a> winch_codegen::FuncEnv for FuncEnv<'a> { @@ -35,7 +33,7 @@ impl<'a> winch_codegen::FuncEnv for FuncEnv<'a> { impl<'a> FuncEnv<'a> { /// Create a new function environment. - pub fn new(module: &'a Module, types: &'a Types, isa: &'a Box) -> Self { - Self { module, types, isa } + pub fn new(module: &'a Module, types: &'a Types) -> Self { + Self { module, types } } } diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 8ad774a05867..7c0e29a0dc5b 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -68,15 +68,5 @@ fn compile(env: &FuncEnv, f: (DefinedFuncIndex, FunctionBodyData<'_>)) -> Result .iter() .for_each(|s| println!("{}", s)); - let buffer = env - .isa - .host_to_wasm_trampoline(sig) - .expect("Couldn't compile trampoline"); - - println!("Disassembly for trampoline: {}", index.as_u32()); - disasm(buffer.data(), env.isa)? - .iter() - .for_each(|s| println!("{}", s)); - Ok(()) }