From 43be0cb07058afe4c5ab9af1fd349b38ec38232c Mon Sep 17 00:00:00 2001 From: CensoredUsername Date: Fri, 26 Aug 2016 16:38:59 +0200 Subject: [PATCH] Fix prefix calculation for AUTO_NO32 ops, fix encoding data for call reg, change functionality of the Pointer! macros in dynasmrt, add machinery to extract the list of supported instructions from dynasm's internal encoding data and write a tutorial --- build_docs.sh | 3 + doc/.gitignore | 1 + doc/insref/Cargo.toml | 10 +++ doc/insref/src/main.rs | 37 ++++++++++ doc/tutorial.md | 152 ++++++++++++++++++++++++++++++++++++++++- plugin/src/compiler.rs | 4 +- plugin/src/x64data.rs | 8 ++- runtime/src/lib.rs | 10 +-- testing/src/main.rs | 4 +- 9 files changed, 218 insertions(+), 11 deletions(-) create mode 100644 doc/.gitignore create mode 100644 doc/insref/Cargo.toml create mode 100644 doc/insref/src/main.rs diff --git a/build_docs.sh b/build_docs.sh index b2ec6c4867..0fc041f157 100644 --- a/build_docs.sh +++ b/build_docs.sh @@ -17,6 +17,9 @@ mkdir ./build_docs/language mkdir ./build_docs/plugin mkdir ./build_docs/runtime +# create instruction reference markdown file +(cd doc/insref && cargo run > ../instructionref.md) + # build plugin docs for f in ./doc/*.md; do rustdoc $f -o ./build_docs/language --markdown-no-toc --html-before-content=./doc/pre.html --html-after-content=./doc/post.html --markdown-css=./formatting.css diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000000..09d66b51d3 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +instructionref.md diff --git a/doc/insref/Cargo.toml b/doc/insref/Cargo.toml new file mode 100644 index 0000000000..ca2e23cd91 --- /dev/null +++ b/doc/insref/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "insref" +version = "0.0.1" +authors = ["CensoredUsername "] + +[dependencies] +itertools = "0.4.*" + +[dependencies.dynasm] +path = "../../plugin" \ No newline at end of file diff --git a/doc/insref/src/main.rs b/doc/insref/src/main.rs new file mode 100644 index 0000000000..56369edbd7 --- /dev/null +++ b/doc/insref/src/main.rs @@ -0,0 +1,37 @@ +extern crate itertools; + +// we generate this list directly from dynasm's internals +#[allow(plugin_as_library)] +extern crate dynasm; + +use dynasm::debug; +use dynasm::x64data; + +use std::io::{self, Write}; +use itertools::Itertools; + +fn main() { + let stdout = io::stdout(); + let mut stdout = stdout.lock(); + stdout.write_all(b"% Instruction Reference\n\n").unwrap(); + + let mut mnemnonics: Vec<_> = x64data::mnemnonics().cloned().collect(); + mnemnonics.sort(); + + for mnemnonic in mnemnonics { + let data = x64data::get_mnemnonic_data(mnemnonic).unwrap(); + let mut formats = data.into_iter() + .map(|x| debug::format_opdata(mnemnonic, x)) + .flatten() + .map(|x| x.replace(">>> ", "")) + .collect::>(); + formats.sort(); + + stdout.write_all(b"### ").unwrap(); + stdout.write_all(mnemnonic.as_bytes()).unwrap(); + stdout.write_all(b"\n```\n").unwrap(); + + stdout.write_all(formats.join("\n").as_bytes()).unwrap(); + stdout.write_all(b"\n```\n").unwrap(); + } +} diff --git a/doc/tutorial.md b/doc/tutorial.md index 9245fa1a4b..5b809c72b8 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -1,3 +1,153 @@ % Tutorial -Coming soon. \ No newline at end of file +# Introduction + +Dynasm-rs is a library and sytnax extension for assembling code at runtime. For the first part of the tutorial we will be examining the following example program that assembles a simple function at runtime: + +``` +#![feature(plugin)] +#![plugin(dynasm)] + +#[macro_use] +extern crate dynasmrt; + +use dynasmrt::DynasmApi; + +use std::{io, slice, mem}; +use std::io::Write; + +fn main() { + let mut ops = dynasmrt::Assembler::new(); + let string = "Hello World!"; + + dynasm!(ops + ; ->hello: + ; .bytes string.as_bytes() + ); + + let hello = ops.offset(); + dynasm!(ops + ; lea rcx, [->hello] + ; xor edx, edx + ; mov dl, BYTE string.len() as _ + ; mov rax, QWORD print as _ + ; sub rsp, BYTE 0x28 + ; call rax + ; add rsp, BYTE 0x28 + ; ret + ); + + let buf = ops.finalize().unwrap(); + + let hello_fn: extern "win64" fn() -> bool = unsafe { + mem::transmute(buf.ptr(hello)) + }; + + assert!( + hello_fn() + ); +} + +pub extern "win64" fn print(buffer: *const u8, length: u64) -> bool { + io::stdout().write_all(unsafe { + slice::from_raw_parts(buffer, length as usize) + }).is_ok() +} +``` + +We will now examine this code snippet piece by piece. + +``` +#![feature(plugin)] +#![plugin(dynasm)] +``` +To use the dynasm! procedural macro, first the dynasm plugin has to be loaded. As plugins are currently unstable, the plugin feature first needs to be enabled. This currently requires a nightly version of rustc. + +``` +#[macro_use] +extern crate dynasmrt; + +use dynasmrt::DynasmApi; +``` +We then link to the dynasm runtime crate. Although they are not used here, it also contains various utility macros which we load here. +Furthermore, the `DynasmApi` trait is loaded. This trait defines the interface used by the `dynasm!` procedural macro to produce assembled code. + +``` +let mut ops = dynasmrt::Assembler::new(); +``` +Of course, the machine code that will be generated will need to live somewhere. `dynasmrt::Assembler` is a struct that implements the `DynasmApi` trait, provides storage for the generated machine code, handles memory permissions and provides various utilities for dynamically assembling code. It even allows assembling code in one thread while several other threads execute said code. For this example though, we will use it in the most simple usecase, just assembling everything in advance and then executing it. + +``` +dynasm!(ops + ; ->hello: + ; .bytes string.as_bytes() +); +``` +The first invocation of the `dynasm!` macro shows of two features of dynasm. The first line defines a global label `hello` which later can be referenced, while the second line contains an assembler directive. Assembler directives allow the assembler to perform tasks that do not involve instruction assembling like, in this case, inserting a string into the executable buffer. + +``` +let hello = ops.offset(); +``` +This utility function returns a value indicating the position of the current end of the machine code buffer. It can later be used to obtain a pointer to this position in the generated machine code. + + +``` +dynasm!(ops + ; lea rcx, [->hello] + ; xor edx, edx + ; mov dl, BYTE string.len() as _ + ; mov rax, QWORD print as _ + ; sub rsp, BYTE 0x28 + ; call rax + ; add rsp, BYTE 0x28 + ; ret +); +``` +The second invocation of the `dynasm!` macro contains the definition of a small function. It performs the following tasks: + +``` +; lea rcx, [->hello] +``` +First, the address of the global label `->hello` is loaded using the load effective address instruction and a label memory reference. + +``` +; xor edx, edx +; mov dl, BYTE string.len() as _ +``` +Then the length of the string is loaded. Here the `BYTE` prefix determines the size of the immediate in the second instruction. the `as _` cast is necessary to coerce the size of the length down to the `i8` type expected of an immediate. Dynasm-rs tries to avoid performing implicit casts as this tends to hide errors. + +``` +; mov rax, QWORD print as _ +; sub rsp, BYTE 0x28 +; call rax +; add rsp, BYTE 0x28 +``` +Here, a call is made from the dynamically assembled code to the rust `print` function. Note the `QWORD` size prefix which is necessary to determine the appropriate form of the `mov` instruction to encode as `dynasm!` does not analyze the immediate expression at runtime. As this example uses the `"win64"` calling convention, the stack pointer needs to be manipulated too. (Note: the `"win64"` calling convention is used as this it is currently impossible to use the `"sysv64"` calling convention on all platforms) + +``` +; ret +``` +And finally the assembled function returns, returning the return value from the `print` function in `rax` back to the caller rust code. + +``` +let buf = ops.finalize().unwrap(); +``` +With the assembly completed, we now finalize the `dynasmrt::Assembler`, which will resolve all labels previously used and move the data into a `dynasmrt::ExecutableBuffer`. This struct, which dereferences to a `&[u8]`, wraps a buffer of readable and executable memory. + +``` +let hello_fn: extern "win64" fn() -> bool = unsafe { + mem::transmute(buf.ptr(hello)) +}; +``` +We can now get a pointer to the executable memory using the `dynasmrt::ExecutableBuffer::ptr` method, using the value obtained earlier from `ops.offset()`. We can then transmute this pointer into a function. + +``` +assert!( + hello_fn() +); +``` +And finally we can call this function, asserting that it returns true to ensure that it managed to print the encoded message! + +# Advanced usage + +Coming soon. diff --git a/plugin/src/compiler.rs b/plugin/src/compiler.rs index e5db7d0c9f..bbb756d564 100644 --- a/plugin/src/compiler.rs +++ b/plugin/src/compiler.rs @@ -223,9 +223,9 @@ fn compile_op(ecx: &ExtCtxt, buffer: &mut StmtBuffer, op: Ident, prefixes: Vec hash_map::Keys<'static, &'static str, &'static [Opdata]> { + OPMAP.keys() +} + // workaround until bitflags can be used in const const VEX_OP : u32 = flags::flag_bits(flags::VEX_OP); const XOP_OP : u32 = flags::flag_bits(flags::XOP_OP); @@ -151,7 +155,7 @@ Ops!(OPMAP; b"v*ib", [0x0F, 0xBA ], 5, AUTO_SIZE | LOCK; ] "bzhi" = [ b"r*v*r*", [ 2, 0xF5 ], X, AUTO_REXW | VEX_OP; ] "call" = [ b"o*", [0xE8 ], X, AUTO_SIZE; - b"r*", [0xFF ], 2, AUTO_SIZE; + b"r*", [0xFF ], 2, AUTO_NO32; ] "cbw" = [ b"", [0x98 ], X, WORD_SIZE; ] "cwde" = [ b"", [0x98 ], X; ] "cdqe" = [ b"", [0x98 ], X, WITH_REXW; diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 6067a229e2..6fad41cf9f 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -14,18 +14,18 @@ use memmap::{Mmap, Protection}; /// this allows it to be used as an easy shorthand for passing pointers as dynasm immediate arguments. #[macro_export] macro_rules! Pointer { - ($e:expr) => {&$e as *const _ as _}; + ($e:expr) => {$e as *const _ as _}; } /// Preforms the same action as the Pointer! macro, but casts to a *mut pointer. #[macro_export] macro_rules! MutPointer { - ($e:expr) => {&mut $e as *mut _ as _}; + ($e:expr) => {$e as *mut _ as _}; } /// This trait represents the interface that must be implemented to allow /// the dynasm preprocessor to assemble into a datastructure. -pub trait DynAsmApi<'a> : Extend + Extend<&'a u8> { +pub trait DynasmApi<'a> : Extend + Extend<&'a u8> { /// Report the current offset into the assembling target fn offset(&self) -> usize; /// Push a byte into the assembling target @@ -146,7 +146,7 @@ impl<'a> Extend<&'a u8> for Assembler { } } -impl<'a> DynAsmApi<'a> for Assembler { +impl<'a> DynasmApi<'a> for Assembler { #[inline] fn offset(&self) -> usize { self.ops.len() + self.asmoffset @@ -381,7 +381,7 @@ impl Executor { /// A structure wrapping some executable memory. It dereferences into a &[u8] slice. impl ExecutableBuffer { /// Obtain a pointer into the executable memory from an offset into it. - /// When an offset returned from DynAsmApi::offset is used, the resulting pointer + /// When an offset returned from DynasmApi::offset is used, the resulting pointer /// will point to the start of the first instruction after the offset call, /// which can then be jumped or called to divert control flow into the executable /// buffer. Note that if this buffer is accessed through an Executor, these pointers diff --git a/testing/src/main.rs b/testing/src/main.rs index 40ac16e7eb..e8d2be3b0e 100644 --- a/testing/src/main.rs +++ b/testing/src/main.rs @@ -3,7 +3,7 @@ #[macro_use] extern crate dynasmrt; -use dynasmrt::DynAsmApi; +use dynasmrt::DynasmApi; macro_rules! test { () => (mov rax, rbx) @@ -126,7 +126,9 @@ fn main() { bar: u32 } let mut test_array = [Test {foo: 1, bar: 2}, Test {foo: 3, bar: 4}, Test {foo: 5, bar: 6}]; + let mut test_array = &mut test_array; let mut test_single = Test {foo: 7, bar: 8}; + let mut test_single = &mut test_single; dynasm!(ops ; mov rax, AWORD MutPointer!(test_array) ; mov ebx, 2