From f30ce1fe9717aaef65d16b5b30498f8cabeb632b Mon Sep 17 00:00:00 2001
From: Nick Fitzgerald <fitzgen@gmail.com>
Date: Wed, 3 Jun 2020 09:21:34 -0700
Subject: [PATCH 1/4] externref: implement stack map-based garbage collection

For host VM code, we use plain reference counting, where cloning increments
the reference count, and dropping decrements it. We can avoid many of the
on-stack increment/decrement operations that typically plague the
performance of reference counting via Rust's ownership and borrowing system.
Moving a `VMExternRef` avoids mutating its reference count, and borrowing it
either avoids the reference count increment or delays it until if/when the
`VMExternRef` is cloned.

When passing a `VMExternRef` into compiled Wasm code, we don't want to do
reference count mutations for every compiled `local.{get,set}`, nor for
every function call. Therefore, we use a variation of **deferred reference
counting**, where we only mutate reference counts when storing
`VMExternRef`s somewhere that outlives the activation: into a global or
table. Simultaneously, we over-approximate the set of `VMExternRef`s that
are inside Wasm function activations. Periodically, we walk the stack at GC
safe points, and use stack map information to precisely identify the set of
`VMExternRef`s inside Wasm activations. Then we take the difference between
this precise set and our over-approximation, and decrement the reference
count for each of the `VMExternRef`s that are in our over-approximation but
not in the precise set. Finally, the over-approximation is replaced with the
precise set.

The `VMExternRefActivationsTable` implements the over-approximized set of
`VMExternRef`s referenced by Wasm activations. Calling a Wasm function and
passing it a `VMExternRef` moves the `VMExternRef` into the table, and the
compiled Wasm function logically "borrows" the `VMExternRef` from the
table. Similarly, `global.get` and `table.get` operations clone the gotten
`VMExternRef` into the `VMExternRefActivationsTable` and then "borrow" the
reference out of the table.

When a `VMExternRef` is returned to host code from a Wasm function, the host
increments the reference count (because the reference is logically
"borrowed" from the `VMExternRefActivationsTable` and the reference count
from the table will be dropped at the next GC).

For more general information on deferred reference counting, see *An
Examination of Deferred Reference Counting and Cycle Detection* by Quinane:
https://openresearch-repository.anu.edu.au/bitstream/1885/42030/2/hon-thesis.pdf

cc #929

Fixes #1804
---
 Cargo.lock                                    | 277 ++++---
 Cargo.toml                                    |   4 +
 cranelift/codegen/src/binemit/stackmap.rs     |   2 +-
 crates/environ/src/cache.rs                   |   6 +-
 crates/environ/src/compilation.rs             |  16 +
 crates/environ/src/cranelift.rs               |  33 +-
 crates/environ/src/data_structures.rs         |   1 +
 crates/environ/src/func_environ.rs            |   7 -
 crates/environ/src/lib.rs                     |   2 +-
 crates/environ/src/lightbeam.rs               |   2 +
 crates/environ/src/module_environ.rs          |   7 -
 crates/environ/src/vmoffsets.rs               |  31 +-
 crates/jit/src/code_memory.rs                 |   2 +-
 crates/jit/src/compiler.rs                    |  51 +-
 crates/jit/src/instantiate.rs                 |  19 +-
 crates/runtime/Cargo.toml                     |   1 +
 crates/runtime/src/externref.rs               | 735 +++++++++++++++++-
 crates/runtime/src/instance.rs                |  32 +
 crates/runtime/src/lib.rs                     |   2 +-
 crates/runtime/src/mmap.rs                    |   2 +-
 crates/wasmtime/src/func.rs                   |   8 +-
 crates/wasmtime/src/instance.rs               |  33 +-
 crates/wasmtime/src/module.rs                 |  38 +
 crates/wasmtime/src/ref.rs                    |   5 +
 crates/wasmtime/src/runtime.rs                |  32 +-
 .../wasmtime/src/trampoline/create_handle.rs  |   2 +
 crates/wasmtime/src/trampoline/func.rs        |  15 +-
 crates/wasmtime/src/types.rs                  |   2 +
 crates/wasmtime/src/values.rs                 |  23 +-
 src/obj.rs                                    |  31 +-
 tests/all/gc.rs                               | 228 ++++++
 tests/all/main.rs                             |   1 +
 32 files changed, 1415 insertions(+), 235 deletions(-)
 mode change 100644 => 100755 crates/wasmtime/src/ref.rs
 create mode 100644 tests/all/gc.rs

diff --git a/Cargo.lock b/Cargo.lock
index 0298e2d046c5..fcfb241d34f9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,5 +1,20 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+[[package]]
+name = "addr2line"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a49806b9dadc843c61e7c97e72490ad7f7220ae249012fbda9ad0609457c0543"
+dependencies = [
+ "gimli",
+]
+
+[[package]]
+name = "adler32"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
+
 [[package]]
 name = "ahash"
 version = "0.2.18"
@@ -35,9 +50,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.31"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
+checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
 
 [[package]]
 name = "arbitrary"
@@ -85,26 +100,17 @@ checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
 
 [[package]]
 name = "backtrace"
-version = "0.3.46"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1e692897359247cc6bb902933361652380af0f1b7651ae5c5013407f30e109e"
+version = "0.3.48"
+source = "git+https://github.com/rust-lang/backtrace-rs.git#05319f46dd4126a4d4a3b2bbf8c378c4cf9fd902"
 dependencies = [
- "backtrace-sys",
+ "addr2line",
  "cfg-if",
  "libc",
+ "miniz_oxide",
+ "object 0.19.0",
  "rustc-demangle",
 ]
 
-[[package]]
-name = "backtrace-sys"
-version = "0.1.37"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fbebbe1c9d1f383a9cc7e8ccdb471b91c8d024ee9c2ca5b5346121fe8b4399"
-dependencies = [
- "cc",
- "libc",
-]
-
 [[package]]
 name = "base64"
 version = "0.11.0"
@@ -113,9 +119,9 @@ checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
 
 [[package]]
 name = "base64"
-version = "0.12.1"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53d1ccbaf7d9ec9537465a97bf19edc1a4e158ecb49fc16178202238c569cc42"
+checksum = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3"
 
 [[package]]
 name = "binaryen"
@@ -150,18 +156,18 @@ dependencies = [
 
 [[package]]
 name = "bit-set"
-version = "0.5.2"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
+checksum = "e84c238982c4b1e1ee668d136c510c67a13465279c0cb367ea6baf6310620a80"
 dependencies = [
  "bit-vec",
 ]
 
 [[package]]
 name = "bit-vec"
-version = "0.6.2"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3"
+checksum = "f59bbe95d4e52a6398ec21238d31577f2b28a9d86807f06ca59d191d8440d0bb"
 
 [[package]]
 name = "bitflags"
@@ -203,9 +209,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.4.0"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820"
+checksum = "5356f1d23ee24a1f785a56d1d1a5f0fd5b0f6a0c0fb2412ce11da71649ab78f6"
 
 [[package]]
 name = "byte-tools"
@@ -239,9 +245,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.54"
+version = "1.0.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311"
+checksum = "c3d87b23d6a92cd03af510a5ade527033f6aa6fa92161e2d5863a907d4c5e31d"
 dependencies = [
  "jobserver",
 ]
@@ -265,9 +271,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "2.33.1"
+version = "2.33.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
+checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
 dependencies = [
  "ansi_term",
  "atty",
@@ -289,18 +295,18 @@ dependencies = [
 
 [[package]]
 name = "cmake"
-version = "0.1.44"
+version = "0.1.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e56268c17a6248366d66d4a47a3381369d068cce8409bb1716ed77ea32163bb"
+checksum = "81fb25b677f8bf1eb325017cb6bb8452f87969db0fedb4f757b297bee78a7c62"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "console"
-version = "0.11.3"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c0994e656bba7b922d8dd1245db90672ffb701e684e45be58f20719d69abc5a"
+checksum = "dea0f3e2e8d7dba335e913b97f9e1992c86c4399d54f8be1d31c8727d0652064"
 dependencies = [
  "encode_unicode",
  "lazy_static",
@@ -493,7 +499,7 @@ dependencies = [
  "anyhow",
  "cranelift-codegen",
  "cranelift-module",
- "object",
+ "object 0.18.0",
  "target-lexicon",
 ]
 
@@ -633,9 +639,9 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-queue"
-version = "0.2.2"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab6bffe714b6bb07e42f201352c34f51fefd355ace793f9e638ebd52d23f98d2"
+checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
@@ -674,9 +680,9 @@ dependencies = [
 
 [[package]]
 name = "derive_more"
-version = "0.99.7"
+version = "0.99.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2127768764f1556535c01b5326ef94bd60ff08dcfbdc544d53e69ed155610f5d"
+checksum = "e2323f3f47db9a0e77ce7a300605d8d2098597fc451ed1a97bb1f6411bb550a7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -685,9 +691,9 @@ dependencies = [
 
 [[package]]
 name = "derive_utils"
-version = "0.10.0"
+version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3df5480412da86cdf5d6b7f3b682422c84359ff7399aa658df1d15ee83244b1d"
+checksum = "114aa287358087a616096186f3de19525d8083f83d437dc6b583f895316b02e6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -777,6 +783,19 @@ version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 
+[[package]]
+name = "env_logger"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
 [[package]]
 name = "env_logger"
 version = "0.7.1"
@@ -848,11 +867,11 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
 
 [[package]]
 name = "file-per-thread-logger"
-version = "0.1.3"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b3937f028664bd0e13df401ba49a4567ccda587420365823242977f06609ed1"
+checksum = "8505b75b31ef7285168dd237c4a7db3c1f3e0927e7d314e670bc98e854272fe9"
 dependencies = [
- "env_logger",
+ "env_logger 0.6.2",
  "log",
 ]
 
@@ -868,9 +887,9 @@ dependencies = [
 
 [[package]]
 name = "filetime"
-version = "0.2.10"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "affc17579b132fc2461adf7c575cc6e8b134ebca52c51f5411388965227dc695"
+checksum = "f59efc38004c988e4201d11d263b8171f49a2e7ec0bdbb71773433f271504a5e"
 dependencies = [
  "cfg-if",
  "libc",
@@ -880,15 +899,15 @@ dependencies = [
 
 [[package]]
 name = "fnv"
-version = "1.0.7"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
 
 [[package]]
 name = "fst"
-version = "0.4.4"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7293de202dbfe786c0b3fe6110a027836c5438ed06db7b715c9955ff4bfea51"
+checksum = "81f9cac32c1741cdf6b66be7dcf0d9c7f25ccf12f8aa84c16cfa31f9f14513b3"
 
 [[package]]
 name = "fuchsia-cprng"
@@ -981,9 +1000,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.1.13"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
+checksum = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
 dependencies = [
  "libc",
 ]
@@ -999,9 +1018,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "1.4.0"
+version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c398b2b113b55809ceb9ee3e753fcbac793f1956663f3c36549c1346015c2afe"
+checksum = "076f042c5b7b98f31d205f1249267e12a6518c1481e9dae9764af19b707d2292"
 dependencies = [
  "autocfg 1.0.0",
 ]
@@ -1020,9 +1039,9 @@ dependencies = [
 
 [[package]]
 name = "iter-enum"
-version = "0.2.4"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2cdea9771bec3d95893f6c665a4fcd477af7858446a46bc2772f560534eee43b"
+checksum = "58107b833f974ca7bc9f98e032881967613dc214b6ab7ececb45b1ab9f0e7008"
 dependencies = [
  "derive_utils",
  "quote",
@@ -1038,15 +1057,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "itertools"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itoa"
 version = "0.4.5"
@@ -1085,9 +1095,9 @@ checksum = "3576a87f2ba00f6f106fdfcd16db1d698d648a26ad8e0573cad8537c3c362d2a"
 
 [[package]]
 name = "libc"
-version = "0.2.71"
+version = "0.2.70"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
+checksum = "3baa92041a6fec78c687fa0cc2b3fae8884f743d672cf551bed1d6dac6988d0f"
 
 [[package]]
 name = "libfuzzer-sys"
@@ -1110,7 +1120,7 @@ dependencies = [
  "dynasm",
  "dynasmrt",
  "iter-enum",
- "itertools 0.8.2",
+ "itertools",
  "lazy_static",
  "memoffset",
  "more-asserts",
@@ -1134,9 +1144,9 @@ dependencies = [
 
 [[package]]
 name = "mach"
-version = "0.3.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
+checksum = "86dd2487cdfea56def77b88438a2c915fb45113c5319bfe7e14306ca4cd0b0e1"
 dependencies = [
  "libc",
 ]
@@ -1181,6 +1191,15 @@ dependencies = [
  "autocfg 1.0.0",
 ]
 
+[[package]]
+name = "miniz_oxide"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "791daaae1ed6889560f8c4359194f56648355540573244a5448a83ba1ecc7435"
+dependencies = [
+ "adler32",
+]
+
 [[package]]
 name = "more-asserts"
 version = "0.2.1"
@@ -1232,11 +1251,16 @@ dependencies = [
  "indexmap",
 ]
 
+[[package]]
+name = "object"
+version = "0.19.0"
+source = "git+https://github.com/gimli-rs/object.git#2ad508810e5470f5bcb7ede8d866c8b1eda4c4d3"
+
 [[package]]
 name = "once_cell"
-version = "1.4.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d"
+checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b"
 
 [[package]]
 name = "opaque-debug"
@@ -1246,9 +1270,9 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
 
 [[package]]
 name = "os_pipe"
-version = "0.9.2"
+version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb233f06c2307e1f5ce2ecad9f8121cffbbee2c95428f44ea85222e460d0d213"
+checksum = "db4d06355a7090ce852965b2d08e11426c315438462638c6d721448d0b47aa22"
 dependencies = [
  "libc",
  "winapi",
@@ -1288,7 +1312,7 @@ version = "0.2.0"
 dependencies = [
  "arbitrary",
  "bincode",
- "env_logger",
+ "env_logger 0.7.1",
  "fst",
  "log",
  "peepmatic",
@@ -1327,7 +1351,7 @@ dependencies = [
 name = "peepmatic-test"
 version = "0.2.0"
 dependencies = [
- "env_logger",
+ "env_logger 0.7.1",
  "log",
  "peepmatic",
  "peepmatic-runtime",
@@ -1341,9 +1365,9 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.8"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
+checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
 
 [[package]]
 name = "pretty_env_logger"
@@ -1351,7 +1375,7 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d"
 dependencies = [
- "env_logger",
+ "env_logger 0.7.1",
  "log",
 ]
 
@@ -1383,15 +1407,15 @@ dependencies = [
 
 [[package]]
 name = "proc-macro-hack"
-version = "0.5.16"
+version = "0.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e0456befd48169b9f13ef0f0ad46d492cf9d2dbb918bcf38e01eed4ce3ec5e4"
+checksum = "0d659fe7c6d27f25e9d80a1a094c223f5246f6a6596453e09d7229bf42750b63"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.18"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
+checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3"
 dependencies = [
  "unicode-xid",
 ]
@@ -1428,7 +1452,7 @@ version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f"
 dependencies = [
- "env_logger",
+ "env_logger 0.7.1",
  "log",
  "rand 0.7.3",
  "rand_core 0.5.1",
@@ -1675,9 +1699,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.3.9"
+version = "1.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
+checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -1697,15 +1721,15 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.6.18"
+version = "0.6.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
+checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
 
 [[package]]
 name = "region"
-version = "2.2.0"
+version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0"
+checksum = "448e868c6e4cfddfa49b6a72c95906c04e8547465e9536575b95c70a4044f856"
 dependencies = [
  "bitflags",
  "libc",
@@ -1776,9 +1800,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.5"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
+checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
 
 [[package]]
 name = "same-file"
@@ -1806,9 +1830,9 @@ dependencies = [
 
 [[package]]
 name = "scroll_derive"
-version = "0.10.2"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e367622f934864ffa1c704ba2b82280aab856e3d8213c84c5720257eb34b15b9"
+checksum = "f8584eea9b9ff42825b46faf46a8c24d2cff13ec152fa2a50df788b87c07ee28"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1832,18 +1856,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.111"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9124df5b40cbd380080b2cc6ab894c040a3070d995f5c9dc77e18c34a8ae37d"
+checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.111"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f2c3ac8e6ca1e9c80b8be1023940162bf81ae3cffbb1809474152f2ce1eb250"
+checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1852,9 +1876,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.53"
+version = "1.0.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "993948e75b189211a9b31a7528f950c6adc21f9720b6438ff80a7fa2f864cea2"
+checksum = "a7894c8ed05b7a3a279aeb79025fdec1d3158080b75b98a08faf2806bb799edd"
 dependencies = [
  "itoa",
  "ryu",
@@ -1863,9 +1887,9 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.8.2"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a256f46ea78a0c0d9ff00077504903ac881a1dafdc20da66545699e7776b3e69"
+checksum = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
 dependencies = [
  "block-buffer",
  "digest",
@@ -1941,9 +1965,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "1.0.30"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93a56fabc59dce20fe48b6c832cc249c713e7ed88fa28b0ee0a3bfcaae5fe4e2"
+checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2046,18 +2070,18 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.19"
+version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b13f926965ad00595dd129fa12823b04bbf866e9085ab0a5f2b05b850fbfc344"
+checksum = "d12a1dae4add0f0d568eebc7bf142f145ba1aa2544cafb195c76f0f409091b60"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.19"
+version = "1.0.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "893582086c2f98cde18f906265a65b5030a074b1046c674ae898be6519a7f479"
+checksum = "3f34e0c1caaa462fd840ec6b768946ea1e7842620d94fe29d5b847138f521269"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2215,15 +2239,15 @@ dependencies = [
 
 [[package]]
 name = "vec_map"
-version = "0.8.2"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
 
 [[package]]
 name = "version_check"
-version = "0.9.2"
+version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
+checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce"
 
 [[package]]
 name = "wait-timeout"
@@ -2321,7 +2345,7 @@ name = "wasmtime-c-api"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "env_logger",
+ "env_logger 0.7.1",
  "once_cell",
  "wasi-common",
  "wasmtime",
@@ -2343,13 +2367,14 @@ name = "wasmtime-cli"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "env_logger",
+ "env_logger 0.7.1",
  "file-per-thread-logger",
  "filecheck",
  "humantime",
  "libc",
+ "log",
  "more-asserts",
- "object",
+ "object 0.18.0",
  "pretty_env_logger",
  "rayon",
  "structopt",
@@ -2376,7 +2401,7 @@ dependencies = [
  "anyhow",
  "gimli",
  "more-asserts",
- "object",
+ "object 0.18.0",
  "target-lexicon",
  "thiserror",
  "wasmparser 0.57.0",
@@ -2388,7 +2413,7 @@ name = "wasmtime-environ"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "base64 0.12.1",
+ "base64 0.12.0",
  "bincode",
  "cranelift-codegen",
  "cranelift-entity",
@@ -2436,7 +2461,7 @@ dependencies = [
  "anyhow",
  "arbitrary",
  "binaryen",
- "env_logger",
+ "env_logger 0.7.1",
  "log",
  "rayon",
  "wasmparser 0.57.0",
@@ -2477,7 +2502,7 @@ version = "0.18.0"
 dependencies = [
  "anyhow",
  "more-asserts",
- "object",
+ "object 0.18.0",
  "wasmtime-environ",
 ]
 
@@ -2491,7 +2516,7 @@ dependencies = [
  "ittapi-rs",
  "lazy_static",
  "libc",
- "object",
+ "object 0.18.0",
  "scroll",
  "serde",
  "target-lexicon",
@@ -2509,6 +2534,7 @@ dependencies = [
  "indexmap",
  "lazy_static",
  "libc",
+ "log",
  "memoffset",
  "more-asserts",
  "region",
@@ -2652,7 +2678,7 @@ dependencies = [
 name = "wiggle-test"
 version = "0.18.0"
 dependencies = [
- "env_logger",
+ "env_logger 0.7.1",
  "proptest",
  "thiserror",
  "tracing",
@@ -2746,18 +2772,18 @@ dependencies = [
 
 [[package]]
 name = "zstd"
-version = "0.5.2+zstd.1.4.5"
+version = "0.5.1+zstd.1.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "644352b10ce7f333d6e0af85bd4f5322dc449416dc1211c6308e95bca8923db4"
+checksum = "5c5d978b793ae64375b80baf652919b148f6a496ac8802922d9999f5a553194f"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "2.0.4+zstd.1.4.5"
+version = "2.0.3+zstd.1.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7113c0c9aed2c55181f2d9f5b0a36e7d2c0183b11c058ab40b35987479efe4d7"
+checksum = "bee25eac9753cfedd48133fa1736cbd23b774e253d89badbeac7d12b23848d3f"
 dependencies = [
  "libc",
  "zstd-sys",
@@ -2765,12 +2791,11 @@ dependencies = [
 
 [[package]]
 name = "zstd-sys"
-version = "1.4.16+zstd.1.4.5"
+version = "1.4.15+zstd.1.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c442965efc45353be5a9b9969c9b0872fff6828c7e06d118dda2cb2d0bb11d5a"
+checksum = "89719b034dc22d240d5b407fb0a3fe6d29952c181cff9a9f95c0bd40b4f8f7d8"
 dependencies = [
  "cc",
  "glob",
- "itertools 0.9.0",
  "libc",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index b95970a3384e..f62003074897 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,6 +38,7 @@ pretty_env_logger = "0.4.0"
 file-per-thread-logger = "0.1.1"
 wat = "1.0.18"
 libc = "0.2.60"
+log = "0.4.8"
 rayon = "1.2.1"
 humantime = "1.3.0"
 
@@ -86,3 +87,6 @@ maintenance = { status = "actively-developed" }
 [[test]]
 name = "host_segfault"
 harness = false
+
+[patch.crates-io]
+backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git" }
diff --git a/cranelift/codegen/src/binemit/stackmap.rs b/cranelift/codegen/src/binemit/stackmap.rs
index 4db9b929293c..45716dcae0e6 100644
--- a/cranelift/codegen/src/binemit/stackmap.rs
+++ b/cranelift/codegen/src/binemit/stackmap.rs
@@ -103,7 +103,7 @@ impl Stackmap {
 
         // Refer to the doc comment for `Stackmap` above to understand the
         // bitmap representation used here.
-        let map_size = (dbg!(info.frame_size) + dbg!(info.inbound_args_size)) as usize;
+        let map_size = (info.frame_size + info.inbound_args_size) as usize;
         let word_size = isa.pointer_bytes() as usize;
         let num_words = map_size / word_size;
 
diff --git a/crates/environ/src/cache.rs b/crates/environ/src/cache.rs
index 290a54033f88..5b1788216cb9 100644
--- a/crates/environ/src/cache.rs
+++ b/crates/environ/src/cache.rs
@@ -1,5 +1,5 @@
 use crate::address_map::{ModuleAddressMap, ValueLabelsRanges};
-use crate::compilation::{Compilation, Relocations, Traps};
+use crate::compilation::{Compilation, Relocations, StackMaps, Traps};
 use cranelift_codegen::ir;
 use cranelift_entity::PrimaryMap;
 use cranelift_wasm::DefinedFuncIndex;
@@ -35,6 +35,7 @@ pub struct ModuleCacheData {
     value_ranges: ValueLabelsRanges,
     stack_slots: PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
     traps: Traps,
+    stack_maps: StackMaps,
 }
 
 /// A type alias over the module cache data as a tuple.
@@ -45,6 +46,7 @@ pub type ModuleCacheDataTupleType = (
     ValueLabelsRanges,
     PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
     Traps,
+    StackMaps,
 );
 
 struct Sha256Hasher(Sha256);
@@ -204,6 +206,7 @@ impl ModuleCacheData {
             value_ranges: data.3,
             stack_slots: data.4,
             traps: data.5,
+            stack_maps: data.6,
         }
     }
 
@@ -215,6 +218,7 @@ impl ModuleCacheData {
             self.value_ranges,
             self.stack_slots,
             self.traps,
+            self.stack_maps,
         )
     }
 }
diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs
index 3382d89ae421..b08e82fd43df 100644
--- a/crates/environ/src/compilation.rs
+++ b/crates/environ/src/compilation.rs
@@ -144,6 +144,22 @@ pub struct TrapInformation {
 /// Information about traps associated with the functions where the traps are placed.
 pub type Traps = PrimaryMap<DefinedFuncIndex, Vec<TrapInformation>>;
 
+/// The offset within a function of a GC safepoint, and its associated stack
+/// map.
+#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
+pub struct StackMapInformation {
+    /// The offset of the GC safepoint within the function's native code. It is
+    /// relative to the beginning of the function.
+    pub code_offset: binemit::CodeOffset,
+
+    /// The stack map for identifying live GC refs at the GC safepoint.
+    pub stack_map: binemit::Stackmap,
+}
+
+/// Information about GC safepoints and their associated stack maps within each
+/// function.
+pub type StackMaps = PrimaryMap<DefinedFuncIndex, Vec<StackMapInformation>>;
+
 /// An error while compiling WebAssembly to machine code.
 #[derive(Error, Debug)]
 pub enum CompileError {
diff --git a/crates/environ/src/cranelift.rs b/crates/environ/src/cranelift.rs
index d7d0146a6e7c..d261200848dc 100644
--- a/crates/environ/src/cranelift.rs
+++ b/crates/environ/src/cranelift.rs
@@ -88,7 +88,8 @@
 use crate::address_map::{FunctionAddressMap, InstructionAddressMap};
 use crate::cache::{ModuleCacheDataTupleType, ModuleCacheEntry};
 use crate::compilation::{
-    Compilation, CompileError, CompiledFunction, Relocation, RelocationTarget, TrapInformation,
+    Compilation, CompileError, CompiledFunction, Relocation, RelocationTarget, StackMapInformation,
+    TrapInformation,
 };
 use crate::func_environ::{get_func_name, FuncEnvironment};
 use crate::{CacheConfig, FunctionBodyData, ModuleLocal, ModuleTranslation, Tunables};
@@ -204,6 +205,27 @@ impl binemit::TrapSink for TrapSink {
     }
 }
 
+#[derive(Default)]
+struct StackMapSink {
+    infos: Vec<StackMapInformation>,
+}
+
+impl binemit::StackmapSink for StackMapSink {
+    fn add_stackmap(&mut self, code_offset: binemit::CodeOffset, stack_map: binemit::Stackmap) {
+        self.infos.push(StackMapInformation {
+            code_offset,
+            stack_map,
+        });
+    }
+}
+
+impl StackMapSink {
+    fn finish(mut self) -> Vec<StackMapInformation> {
+        self.infos.sort_by_key(|info| info.code_offset);
+        self.infos
+    }
+}
+
 fn get_function_address_map<'data>(
     context: &Context,
     data: &FunctionBodyData<'data>,
@@ -294,6 +316,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
     let mut value_ranges = PrimaryMap::with_capacity(env.function_body_inputs.len());
     let mut stack_slots = PrimaryMap::with_capacity(env.function_body_inputs.len());
     let mut traps = PrimaryMap::with_capacity(env.function_body_inputs.len());
+    let mut stack_maps = PrimaryMap::with_capacity(env.function_body_inputs.len());
 
     env.function_body_inputs
         .into_iter()
@@ -354,14 +377,14 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
             let mut code_buf: Vec<u8> = Vec::new();
             let mut reloc_sink = RelocSink::new(func_index);
             let mut trap_sink = TrapSink::new();
-            let mut stackmap_sink = binemit::NullStackmapSink {};
+            let mut stack_map_sink = StackMapSink::default();
             context
                 .compile_and_emit(
                     isa,
                     &mut code_buf,
                     &mut reloc_sink,
                     &mut trap_sink,
-                    &mut stackmap_sink,
+                    &mut stack_map_sink,
                 )
                 .map_err(|error| {
                     CompileError::Codegen(pretty_error(&context.func, Some(isa), error))
@@ -391,6 +414,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
                 context.func.stack_slots,
                 trap_sink.traps,
                 unwind_info,
+                stack_map_sink.finish(),
             ))
         })
         .collect::<Result<Vec<_>, CompileError>>()?
@@ -405,6 +429,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
                 sss,
                 function_traps,
                 unwind_info,
+                stack_map,
             )| {
                 functions.push(CompiledFunction {
                     body: function,
@@ -416,6 +441,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
                 value_ranges.push(ranges.unwrap_or_default());
                 stack_slots.push(sss);
                 traps.push(function_traps);
+                stack_maps.push(stack_map);
             },
         );
 
@@ -428,6 +454,7 @@ fn compile(env: CompileEnv<'_>) -> Result<ModuleCacheDataTupleType, CompileError
         value_ranges,
         stack_slots,
         traps,
+        stack_maps,
     ))
 }
 
diff --git a/crates/environ/src/data_structures.rs b/crates/environ/src/data_structures.rs
index 30d00d1d0e19..c9aaa5c9206c 100644
--- a/crates/environ/src/data_structures.rs
+++ b/crates/environ/src/data_structures.rs
@@ -1,6 +1,7 @@
 #![doc(hidden)]
 
 pub mod ir {
+    pub use cranelift_codegen::binemit::Stackmap;
     pub use cranelift_codegen::ir::{
         types, AbiParam, ArgumentPurpose, Signature, SourceLoc, StackSlots, TrapCode, Type,
         ValueLabel, ValueLoc,
diff --git a/crates/environ/src/func_environ.rs b/crates/environ/src/func_environ.rs
index 52e4fbb64ae9..f2645828ec25 100644
--- a/crates/environ/src/func_environ.rs
+++ b/crates/environ/src/func_environ.rs
@@ -658,13 +658,6 @@ impl<'module_environment> TargetEnvironment for FuncEnvironment<'module_environm
     fn target_config(&self) -> TargetFrontendConfig {
         self.target_config
     }
-
-    fn reference_type(&self) -> ir::Type {
-        // For now, the only reference types we support are `externref`, which
-        // don't require tracing GC and stack maps. So we just use the target's
-        // pointer type. This will have to change once we move to tracing GC.
-        self.pointer_type()
-    }
 }
 
 impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'module_environment> {
diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs
index f059795f9d82..fa32e116e39d 100644
--- a/crates/environ/src/lib.rs
+++ b/crates/environ/src/lib.rs
@@ -47,7 +47,7 @@ pub use crate::cache::create_new_config as cache_create_new_config;
 pub use crate::cache::CacheConfig;
 pub use crate::compilation::{
     Compilation, CompileError, CompiledFunction, Compiler, Relocation, RelocationTarget,
-    Relocations, TrapInformation, Traps,
+    Relocations, StackMapInformation, StackMaps, TrapInformation, Traps,
 };
 pub use crate::cranelift::Cranelift;
 pub use crate::data_structures::*;
diff --git a/crates/environ/src/lightbeam.rs b/crates/environ/src/lightbeam.rs
index f4a32ec5ddc1..49fda2f8a023 100644
--- a/crates/environ/src/lightbeam.rs
+++ b/crates/environ/src/lightbeam.rs
@@ -34,6 +34,7 @@ impl crate::compilation::Compiler for Lightbeam {
         );
         let mut relocations = PrimaryMap::with_capacity(translation.function_body_inputs.len());
         let mut traps = PrimaryMap::with_capacity(translation.function_body_inputs.len());
+        let stack_maps = PrimaryMap::with_capacity(translation.function_body_inputs.len());
 
         let mut codegen_session: CodeGenSession<_> = CodeGenSession::new(
             translation.function_body_inputs.len() as u32,
@@ -81,6 +82,7 @@ impl crate::compilation::Compiler for Lightbeam {
             ValueLabelsRanges::new(),
             PrimaryMap::new(),
             traps,
+            stack_maps,
         ))
     }
 }
diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs
index 755321511947..4876c3c2ee26 100644
--- a/crates/environ/src/module_environ.rs
+++ b/crates/environ/src/module_environ.rs
@@ -92,13 +92,6 @@ impl<'data> TargetEnvironment for ModuleEnvironment<'data> {
     fn target_config(&self) -> TargetFrontendConfig {
         self.result.target_config
     }
-
-    fn reference_type(&self) -> ir::Type {
-        // For now, the only reference types we support are `externref`, which
-        // don't require tracing GC and stack maps. So we just use the target's
-        // pointer type. This will have to change once we move to tracing GC.
-        self.pointer_type()
-    }
 }
 
 /// This trait is useful for `translate_module` because it tells how to translate
diff --git a/crates/environ/src/vmoffsets.rs b/crates/environ/src/vmoffsets.rs
index 940792557a38..0d7f08073c06 100644
--- a/crates/environ/src/vmoffsets.rs
+++ b/crates/environ/src/vmoffsets.rs
@@ -5,6 +5,8 @@
 //
 // struct VMContext {
 //      interrupts: *const VMInterrupts,
+//      externref_activations_table: *mut VMExternRefActivationsTable,
+//      stack_map_registry: *mut StackMapRegistry,
 //      signature_ids: [VMSharedSignatureIndex; module.num_signature_ids],
 //      imported_functions: [VMFunctionImport; module.num_imported_functions],
 //      imported_tables: [VMTableImport; module.num_imported_tables],
@@ -286,9 +288,23 @@ impl VMOffsets {
         0
     }
 
+    /// The offset of the `VMExternRefActivationsTable` member.
+    pub fn vmctx_externref_activations_table(&self) -> u32 {
+        self.vmctx_interrupts()
+            .checked_add(u32::from(self.pointer_size))
+            .unwrap()
+    }
+
+    /// The offset of the `*mut StackMapRegistry` member.
+    pub fn vmctx_stack_map_registry(&self) -> u32 {
+        self.vmctx_externref_activations_table()
+            .checked_add(u32::from(self.pointer_size))
+            .unwrap()
+    }
+
     /// The offset of the `signature_ids` array.
     pub fn vmctx_signature_ids_begin(&self) -> u32 {
-        self.vmctx_interrupts()
+        self.vmctx_stack_map_registry()
             .checked_add(u32::from(self.pointer_size))
             .unwrap()
     }
@@ -591,6 +607,19 @@ impl VMOffsets {
     }
 }
 
+/// Offsets for `VMExternRefActivationsTable`.
+impl VMOffsets {
+    /// Return the offset for `VMExternRefActivationsTable::next`.
+    pub fn vm_extern_ref_activation_table_next(&self) -> u32 {
+        0
+    }
+
+    /// Return the offset for `VMExternRefActivationsTable::end`.
+    pub fn vm_extern_ref_activation_table_end(&self) -> u32 {
+        self.pointer_size.into()
+    }
+}
+
 /// Target specific type for shared signature index.
 #[derive(Debug, Copy, Clone)]
 pub struct TargetSharedSignatureIndex(u32);
diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs
index b7e0a3851f38..e9dc25fc90e5 100644
--- a/crates/jit/src/code_memory.rs
+++ b/crates/jit/src/code_memory.rs
@@ -134,7 +134,7 @@ impl CodeMemory {
 
             if !m.is_empty() {
                 unsafe {
-                    region::protect(m.as_mut_ptr(), m.len(), region::Protection::READ_EXECUTE)
+                    region::protect(m.as_mut_ptr(), m.len(), region::Protection::ReadExecute)
                 }
                 .expect("unable to make memory readonly and executable");
             }
diff --git a/crates/jit/src/compiler.rs b/crates/jit/src/compiler.rs
index a53656e99f65..04af38ed7a15 100644
--- a/crates/jit/src/compiler.rs
+++ b/crates/jit/src/compiler.rs
@@ -15,7 +15,7 @@ use wasmtime_environ::wasm::{DefinedFuncIndex, DefinedMemoryIndex, MemoryIndex,
 use wasmtime_environ::{
     CacheConfig, CompileError, CompiledFunction, Compiler as _C, Module, ModuleAddressMap,
     ModuleMemoryOffset, ModuleTranslation, ModuleVmctxInfo, Relocation, RelocationTarget,
-    Relocations, Traps, Tunables, VMOffsets, ValueLabelsRanges,
+    Relocations, StackMaps, Traps, Tunables, VMOffsets, ValueLabelsRanges,
 };
 use wasmtime_runtime::{InstantiationError, VMFunctionBody, VMTrampoline};
 
@@ -138,6 +138,7 @@ pub struct Compilation {
     pub jt_offsets: PrimaryMap<DefinedFuncIndex, ir::JumpTableOffsets>,
     pub dwarf_sections: Vec<DwarfSection>,
     pub traps: Traps,
+    pub stack_maps: StackMaps,
     pub address_transform: ModuleAddressMap,
 }
 
@@ -165,27 +166,34 @@ impl Compiler {
     ) -> Result<Compilation, SetupError> {
         let mut code_memory = CodeMemory::new();
 
-        let (compilation, relocations, address_transform, value_ranges, stack_slots, traps) =
-            match self.strategy {
-                // For now, interpret `Auto` as `Cranelift` since that's the most stable
-                // implementation.
-                CompilationStrategy::Auto | CompilationStrategy::Cranelift => {
-                    wasmtime_environ::cranelift::Cranelift::compile_module(
-                        translation,
-                        &*self.isa,
-                        &self.cache_config,
-                    )
-                }
-                #[cfg(feature = "lightbeam")]
-                CompilationStrategy::Lightbeam => {
-                    wasmtime_environ::lightbeam::Lightbeam::compile_module(
-                        translation,
-                        &*self.isa,
-                        &self.cache_config,
-                    )
-                }
+        let (
+            compilation,
+            relocations,
+            address_transform,
+            value_ranges,
+            stack_slots,
+            traps,
+            stack_maps,
+        ) = match self.strategy {
+            // For now, interpret `Auto` as `Cranelift` since that's the most stable
+            // implementation.
+            CompilationStrategy::Auto | CompilationStrategy::Cranelift => {
+                wasmtime_environ::cranelift::Cranelift::compile_module(
+                    translation,
+                    &*self.isa,
+                    &self.cache_config,
+                )
+            }
+            #[cfg(feature = "lightbeam")]
+            CompilationStrategy::Lightbeam => {
+                wasmtime_environ::lightbeam::Lightbeam::compile_module(
+                    translation,
+                    &*self.isa,
+                    &self.cache_config,
+                )
             }
-            .map_err(SetupError::Compile)?;
+        }
+        .map_err(SetupError::Compile)?;
 
         let dwarf_sections = if debug_data.is_some() && !compilation.is_empty() {
             transform_dwarf_data(
@@ -239,6 +247,7 @@ impl Compiler {
             jt_offsets,
             dwarf_sections,
             traps,
+            stack_maps,
             address_transform,
         })
     }
diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs
index 12152c708461..7180a9efe891 100644
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -10,6 +10,7 @@ use crate::link::link_module;
 use crate::resolver::Resolver;
 use std::any::Any;
 use std::collections::HashMap;
+use std::rc::Rc;
 use std::sync::Arc;
 use thiserror::Error;
 use wasmtime_debug::{read_debuginfo, write_debugsections_image, DwarfSection};
@@ -18,13 +19,13 @@ use wasmtime_environ::isa::TargetIsa;
 use wasmtime_environ::wasm::{DefinedFuncIndex, SignatureIndex};
 use wasmtime_environ::{
     CompileError, DataInitializer, DataInitializerLocation, Module, ModuleAddressMap,
-    ModuleEnvironment, ModuleTranslation, Traps,
+    ModuleEnvironment, ModuleTranslation, StackMaps, Traps,
 };
 use wasmtime_profiling::ProfilingAgent;
 use wasmtime_runtime::VMInterrupts;
 use wasmtime_runtime::{
     GdbJitImageRegistration, InstanceHandle, InstantiationError, RuntimeMemoryCreator,
-    SignatureRegistry, VMFunctionBody, VMTrampoline,
+    SignatureRegistry, StackMapRegistry, VMExternRefActivationsTable, VMFunctionBody, VMTrampoline,
 };
 
 /// An error condition while setting up a wasm instance, be it validation,
@@ -69,6 +70,7 @@ pub struct CompiledModule {
     trampolines: PrimaryMap<SignatureIndex, VMTrampoline>,
     data_initializers: Box<[OwnedDataInitializer]>,
     traps: Traps,
+    stack_maps: StackMaps,
     address_transform: ModuleAddressMap,
 }
 
@@ -99,6 +101,7 @@ impl CompiledModule {
             jt_offsets,
             dwarf_sections,
             traps,
+            stack_maps,
             address_transform,
         } = compiler.compile(&translation, debug_data)?;
 
@@ -149,6 +152,7 @@ impl CompiledModule {
             trampolines,
             data_initializers,
             traps,
+            stack_maps,
             address_transform,
         })
     }
@@ -169,6 +173,8 @@ impl CompiledModule {
         mem_creator: Option<&dyn RuntimeMemoryCreator>,
         interrupts: Arc<VMInterrupts>,
         host_state: Box<dyn Any>,
+        externref_activations_table: Rc<VMExternRefActivationsTable>,
+        stack_map_registry: Arc<StackMapRegistry>,
     ) -> Result<InstanceHandle, InstantiationError> {
         // Compute indices into the shared signature table.
         let signatures = {
@@ -200,6 +206,8 @@ impl CompiledModule {
             signatures.into_boxed_slice(),
             host_state,
             interrupts,
+            externref_activations_table,
+            stack_map_registry,
         )
     }
 
@@ -229,11 +237,16 @@ impl CompiledModule {
         &self.finished_functions.0
     }
 
-    /// Returns the a map for all traps in this module.
+    /// Returns the map for all traps in this module.
     pub fn traps(&self) -> &Traps {
         &self.traps
     }
 
+    /// Returns the map for each of this module's stack maps.
+    pub fn stack_maps(&self) -> &StackMaps {
+        &self.stack_maps
+    }
+
     /// Returns a map of compiled addresses back to original bytecode offsets.
     pub fn address_transform(&self) -> &ModuleAddressMap {
         &self.address_transform
diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml
index b862d25899c8..91de66aae482 100644
--- a/crates/runtime/Cargo.toml
+++ b/crates/runtime/Cargo.toml
@@ -15,6 +15,7 @@ edition = "2018"
 wasmtime-environ = { path = "../environ", version = "0.18.0" }
 region = "2.0.0"
 libc = { version = "0.2.70", default-features = false }
+log = "0.4.8"
 memoffset = "0.5.3"
 indexmap = "1.0.2"
 thiserror = "1.0.4"
diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs
index 8e8aeecfeaa1..dba09a801446 100644
--- a/crates/runtime/src/externref.rs
+++ b/crates/runtime/src/externref.rs
@@ -58,29 +58,59 @@
 //! need a ton of excess padding between the `VMExternData` and the value for
 //! values with large alignment.
 //!
-//! ## Reference Counting Protocol and Wasm Functions
+//! ## Reference Counting, Wasm Functions, and Garbage Collection
 //!
-//! Currently, `VMExternRef`s passed into compiled Wasm functions have move
-//! semantics: the host code gives up ownership and does not decrement the
-//! reference count. Similarly, `VMExternRef`s returned from compiled Wasm
-//! functions also have move semantics: host code takes ownership and the
-//! reference count is not incremented.
+//! For host VM code, we use plain reference counting, where cloning increments
+//! the reference count, and dropping decrements it. We can avoid many of the
+//! on-stack increment/decrement operations that typically plague the
+//! performance of reference counting via Rust's ownership and borrowing system.
+//! Moving a `VMExternRef` avoids mutating its reference count, and borrowing it
+//! either avoids the reference count increment or delays it until if/when the
+//! `VMExternRef` is cloned.
 //!
-//! This works well when a reference is passed into Wasm and then passed back
-//! out again. However, if a reference is passed into Wasm, but not passed back
-//! out, then the reference is leaked. This is only a temporary state, and
-//! follow up work will leverage stack maps to fix this issue. Follow
-//! https://github.com/bytecodealliance/wasmtime/issues/929 to keep an eye on
-//! this.
+//! When passing a `VMExternRef` into compiled Wasm code, we don't want to do
+//! reference count mutations for every compiled `local.{get,set}`, nor for
+//! every function call. Therefore, we use a variation of **deferred reference
+//! counting**, where we only mutate reference counts when storing
+//! `VMExternRef`s somewhere that outlives the activation: into a global or
+//! table. Simultaneously, we over-approximate the set of `VMExternRef`s that
+//! are inside Wasm function activations. Periodically, we walk the stack at GC
+//! safe points, and use stack map information to precisely identify the set of
+//! `VMExternRef`s inside Wasm activations. Then we take the difference between
+//! this precise set and our over-approximation, and decrement the reference
+//! count for each of the `VMExternRef`s that are in our over-approximation but
+//! not in the precise set. Finally, the over-approximation is replaced with the
+//! precise set.
+//!
+//! The `VMExternRefActivationsTable` implements the over-approximized set of
+//! `VMExternRef`s referenced by Wasm activations. Calling a Wasm function and
+//! passing it a `VMExternRef` moves the `VMExternRef` into the table, and the
+//! compiled Wasm function logically "borrows" the `VMExternRef` from the
+//! table. Similarly, `global.get` and `table.get` operations clone the gotten
+//! `VMExternRef` into the `VMExternRefActivationsTable` and then "borrow" the
+//! reference out of the table.
+//!
+//! When a `VMExternRef` is returned to host code from a Wasm function, the host
+//! increments the reference count (because the reference is logically
+//! "borrowed" from the `VMExternRefActivationsTable` and the reference count
+//! from the table will be dropped at the next GC).
+//!
+//! For more general information on deferred reference counting, see *An
+//! Examination of Deferred Reference Counting and Cycle Detection* by Quinane:
+//! https://openresearch-repository.anu.edu.au/bitstream/1885/42030/2/hon-thesis.pdf
 
 use std::alloc::Layout;
 use std::any::Any;
-use std::cell::UnsafeCell;
+use std::cell::{Cell, RefCell, UnsafeCell};
 use std::cmp::Ordering;
+use std::collections::BTreeMap;
+use std::collections::HashSet;
 use std::hash::Hasher;
 use std::mem;
 use std::ops::Deref;
 use std::ptr::{self, NonNull};
+use std::sync::{Arc, RwLock};
+use wasmtime_environ::{ir::Stackmap, StackMapInformation};
 
 /// An external reference to some opaque data.
 ///
@@ -307,35 +337,73 @@ impl VMExternRef {
         }
     }
 
+    // /// Turn this `VMExternRef` into a raw, untyped pointer.
+    // ///
+    // /// This forgets `self` and does *not* decrement the reference count on the
+    // /// pointed-to data.
+    // ///
+    // /// This `VMExternRef` may be recovered with `VMExternRef::from_raw`.
+    // pub fn into_raw(self) -> *mut u8 {
+    //     let ptr = self.as_raw();
+    //     mem::forget(self);
+    //     ptr
+    // }
+
+    // /// Recreate a `VMExternRef` from a pointer returned from a previous call to
+    // /// `VMExternRef::into_raw`.
+    // ///
+    // /// # Safety
+    // ///
+    // /// Wildly unsafe to use with anything other than the result of a previous
+    // /// `into_raw` call!
+    // ///
+    // /// This method does *not* increment the reference count on the pointed-to
+    // /// data, so `from_raw` must be called at most *once* on the result of a
+    // /// previous `into_raw` call. (Ideally, every `into_raw` is later followed
+    // /// by a `from_raw`, but it is technically memory safe to never call
+    // /// `from_raw` after `into_raw`: it will leak the pointed-to value, which is
+    // /// memory safe).
+    // pub unsafe fn from_raw(ptr: *mut u8) -> Self {
+    //     debug_assert!(!ptr.is_null());
+    //     VMExternRef(NonNull::new_unchecked(ptr).cast())
+    // }
+
     /// Turn this `VMExternRef` into a raw, untyped pointer.
     ///
-    /// This forgets `self` and does *not* decrement the reference count on the
-    /// pointed-to data.
+    /// Unlike `into_raw`, this does not consume and forget `self`. It is *not*
+    /// safe to use `from_raw` on pointers returned from this method; only use
+    /// `clone_from_raw`!
     ///
-    /// This `VMExternRef` may be recovered with `VMExternRef::from_raw`.
-    pub fn into_raw(self) -> *mut u8 {
+    ///  Nor does this method increment the reference count. You must ensure
+    ///  that `self` (or some other clone of `self`) stays alive until
+    ///  `clone_from_raw` is called.
+    pub fn as_raw(&self) -> *mut u8 {
         let ptr = self.0.cast::<u8>().as_ptr();
         mem::forget(self);
         ptr
     }
 
-    /// Create a `VMExternRef` from a pointer returned from a previous call to
-    /// `VMExternRef::into_raw`.
+    /// Recreate a `VMExternRef` from a pointer returned from a previous call to
+    /// `VMExternRef::as_raw`.
     ///
     /// # Safety
     ///
     /// Wildly unsafe to use with anything other than the result of a previous
-    /// `into_raw` call!
+    /// `as_raw` call!
     ///
-    /// This method does *not* increment the reference count on the pointed-to
-    /// data, so `from_raw` must be called at most *once* on the result of a
-    /// previous `into_raw` call. (Ideally, every `into_raw` is later followed
-    /// by a `from_raw`, but it is technically memory safe to never call
-    /// `from_raw` after `into_raw`: it will leak the pointed-to value, which is
-    /// memory safe).
-    pub unsafe fn from_raw(ptr: *mut u8) -> Self {
+    /// Additionally, it is your responsibility to ensure that this raw
+    /// `VMExternRef`'s reference count has not dropped to zero. Failure to do
+    /// so will result in use after free!
+    pub unsafe fn clone_from_raw(ptr: *mut u8) -> Self {
         debug_assert!(!ptr.is_null());
-        VMExternRef(NonNull::new_unchecked(ptr).cast())
+        let x = VMExternRef(NonNull::new_unchecked(ptr).cast());
+        x.extern_data().increment_ref_count();
+        x
+    }
+
+    /// Get the reference count for this `VMExternRef`.
+    pub fn get_reference_count(&self) -> usize {
+        self.extern_data().get_ref_count()
     }
 
     #[inline]
@@ -393,6 +461,556 @@ impl Deref for VMExternRef {
     }
 }
 
+type TableElem = UnsafeCell<Option<VMExternRef>>;
+
+/// A table that over-approximizes the set of `VMExternRef`s that any Wasm
+/// activation on this thread is currently using.
+///
+/// Under the covers, this is a simple bump allocator that allows duplicate
+/// entries. Deduplication happens at GC time.
+#[repr(C)]
+pub struct VMExternRefActivationsTable {
+    /// Bump-allocation finger within the current chunk.
+    ///
+    /// NB: this is an `UnsafeCell` because it is read from and written to by
+    /// compiled Wasm code.
+    next: UnsafeCell<NonNull<TableElem>>,
+
+    /// Pointer to just after the current chunk.
+    ///
+    /// This is *not* within the current chunk and therefore is not a valid
+    /// place to insert a reference!
+    ///
+    /// This is only updated from host code.
+    end: Cell<NonNull<TableElem>>,
+
+    /// The chunks within which we are bump allocating.
+    ///
+    /// This is only updated from host code.
+    chunks: RefCell<Vec<Box<[TableElem]>>>,
+
+    /// The precise set of on-stack, inside-Wasm GC roots that we discover via
+    /// walking the stack and interpreting stack maps.
+    ///
+    /// That is, this is the precise set that the bump allocation table is
+    /// over-approximating.
+    ///
+    /// This is *only* used inside the `gc` function, and is empty otherwise. It
+    /// is just part of this struct so that we can reuse the allocation, rather
+    /// than create a new hash set every GC.
+    precise_stack_roots: RefCell<HashSet<NonNull<VMExternData>>>,
+}
+
+impl VMExternRefActivationsTable {
+    const INITIAL_CHUNK_SIZE: usize = 4096 / mem::size_of::<usize>();
+
+    /// Create a new `VMExternRefActivationsTable`.
+    pub fn new() -> Self {
+        let chunk = Self::new_chunk(Self::INITIAL_CHUNK_SIZE);
+        let next = chunk.as_ptr() as *mut TableElem;
+        let end = unsafe { next.add(chunk.len()) };
+
+        VMExternRefActivationsTable {
+            next: UnsafeCell::new(NonNull::new(next).unwrap()),
+            end: Cell::new(NonNull::new(end).unwrap()),
+            chunks: RefCell::new(vec![chunk]),
+            precise_stack_roots: RefCell::new(HashSet::with_capacity(Self::INITIAL_CHUNK_SIZE)),
+        }
+    }
+
+    fn new_chunk(size: usize) -> Box<[UnsafeCell<Option<VMExternRef>>]> {
+        assert!(size >= Self::INITIAL_CHUNK_SIZE);
+        let mut chunk = Vec::with_capacity(size);
+        for _ in 0..size {
+            chunk.push(UnsafeCell::new(None));
+        }
+        chunk.into_boxed_slice()
+    }
+
+    /// Try and insert a `VMExternRef` into this table.
+    ///
+    /// This is a fast path that only succeeds when the current chunk has the
+    /// capacity for the requested insertion.
+    ///
+    /// If the insertion fails, then the `VMExternRef` is given back. Callers
+    /// may attempt a GC to free up space and try again, or may call
+    /// `insert_slow_path` to allocate a new bump chunk for this insertion.
+    #[inline]
+    pub fn try_insert(&self, externref: VMExternRef) -> Result<(), VMExternRef> {
+        unsafe {
+            let next = *self.next.get();
+            let end = self.end.get();
+            if next == end {
+                return Err(externref);
+            }
+
+            debug_assert!((*next.as_ref().get()).is_none());
+            ptr::write(next.as_ptr(), UnsafeCell::new(Some(externref)));
+            let next = NonNull::new_unchecked(next.as_ptr().add(1));
+            debug_assert!(next <= end);
+            *self.next.get() = next;
+            Ok(())
+        }
+    }
+
+    /// This is a slow path for inserting a reference into the table when the
+    /// current bump chunk is full.
+    ///
+    /// This method is infallible, and will allocate an additional bump chunk if
+    /// necessary.
+    #[inline(never)]
+    pub fn insert_slow_path(&self, externref: VMExternRef) {
+        let externref = match self.try_insert(externref) {
+            Ok(()) => return,
+            Err(x) => x,
+        };
+
+        {
+            let mut chunks = self.chunks.borrow_mut();
+
+            let new_size = chunks.last().unwrap().len() * 2;
+            let new_chunk = Self::new_chunk(new_size);
+
+            unsafe {
+                let next = new_chunk.as_ptr() as *mut TableElem;
+                debug_assert!(!next.is_null());
+                *self.next.get() = NonNull::new_unchecked(next);
+
+                let end = next.add(new_chunk.len());
+                debug_assert!(!end.is_null());
+                self.end.set(NonNull::new_unchecked(end));
+            }
+
+            chunks.push(new_chunk);
+        }
+
+        self.try_insert(externref)
+            .expect("insertion should always succeed after we allocate a new chunk");
+    }
+
+    fn num_filled_in_last_chunk(&self, chunks: &[Box<[TableElem]>]) -> usize {
+        let last_chunk = chunks.last().unwrap();
+        let next = unsafe { *self.next.get() };
+        let end = self.end.get();
+        let num_unused_in_last_chunk =
+            ((end.as_ptr() as usize) - (next.as_ptr() as usize)) / mem::size_of::<usize>();
+        last_chunk.len().saturating_sub(num_unused_in_last_chunk)
+    }
+
+    fn elements(&self, mut f: impl FnMut(&VMExternRef)) {
+        // Every chunk except the last one is full, so we can simply iterate
+        // over all of their elements.
+        let chunks = self.chunks.borrow();
+        for chunk in chunks.iter().take(chunks.len() - 1) {
+            for elem in chunk.iter() {
+                if let Some(elem) = unsafe { &*elem.get() } {
+                    f(elem);
+                }
+            }
+        }
+
+        // The last chunk is not all the way full, so we only iterate over its
+        // full parts.
+        let num_filled_in_last_chunk = self.num_filled_in_last_chunk(&chunks);
+        for elem in chunks.last().unwrap().iter().take(num_filled_in_last_chunk) {
+            if let Some(elem) = unsafe { &*elem.get() } {
+                f(elem);
+            }
+        }
+    }
+
+    fn insert_precise_stack_root(&self, root: NonNull<VMExternData>) {
+        let mut precise_stack_roots = self.precise_stack_roots.borrow_mut();
+        if precise_stack_roots.insert(root) {
+            // If this root was not already in the set, then we need to
+            // increment its reference count, so that it doesn't get freed in
+            // `reset` when we're overwriting old bump allocation table entries
+            // with new ones.
+            unsafe {
+                root.as_ref().increment_ref_count();
+            }
+        }
+    }
+
+    /// Refill the bump allocation table with our precise stack roots, and sweep
+    /// away everything else.
+    fn reset(&self) {
+        let mut chunks = self.chunks.borrow_mut();
+
+        let mut precise_roots = self.precise_stack_roots.borrow_mut();
+        if precise_roots.is_empty() {
+            // Get rid of all but our first bump chunk, and set our `next` and
+            // `end` bump allocation fingers into it.
+            unsafe {
+                let chunk = chunks.first().unwrap();
+
+                let next = chunk.as_ptr() as *mut TableElem;
+                debug_assert!(!next.is_null());
+                *self.next.get() = NonNull::new_unchecked(next);
+
+                let end = next.add(chunk.len());
+                debug_assert!(!end.is_null());
+                self.end.set(NonNull::new_unchecked(end));
+            }
+            chunks.truncate(1);
+        } else {
+            // Drain our precise stack roots into the bump allocation table.
+            //
+            // This overwrites old entries, which drops them and decrements their
+            // reference counts. Safety relies on the reference count increment in
+            // `insert_precise_stack_root` to avoid over-eagerly dropping references
+            // that are in `self.precise_stack_roots` but haven't been inserted into
+            // the bump allocation table yet.
+            let mut precise_roots = precise_roots.drain();
+            'outer: for (chunk_index, chunk) in chunks.iter().enumerate() {
+                for (slot_index, slot) in chunk.iter().enumerate() {
+                    if let Some(root) = precise_roots.next() {
+                        unsafe {
+                            // NB: there is no reference count increment here
+                            // because everything in `self.precise_stack_roots`
+                            // already had its reference count incremented for us,
+                            // and this is logically a move out from there, rather
+                            // than a clone.
+                            *slot.get() = Some(VMExternRef(root));
+                        }
+                    } else {
+                        // We've inserted all of our precise, on-stack roots back
+                        // into the bump allocation table. Update our `next` and
+                        // `end` bump pointer members for the new current chunk, and
+                        // free any excess chunks.
+                        let start = chunk.as_ptr() as *mut TableElem;
+                        unsafe {
+                            let next = start.add(slot_index + 1);
+                            debug_assert!(!next.is_null());
+                            *self.next.get() = NonNull::new_unchecked(next);
+
+                            let end = start.add(chunk.len());
+                            debug_assert!(!end.is_null());
+                            self.end.set(NonNull::new_unchecked(end));
+                        }
+                        chunks.truncate(chunk_index + 1);
+                        break 'outer;
+                    }
+                }
+            }
+
+            debug_assert!(
+                precise_roots.next().is_none(),
+                "should always have enough capacity in the bump allocations table \
+                 to hold all of our precise, on-stack roots"
+            );
+        }
+
+        // Finally, sweep away excess capacity within our new last/current
+        // chunk, so that old, no-longer-live roots get dropped.
+        let num_filled_in_last_chunk = self.num_filled_in_last_chunk(&chunks);
+        for slot in chunks.last().unwrap().iter().skip(num_filled_in_last_chunk) {
+            unsafe {
+                *slot.get() = None;
+            }
+        }
+    }
+}
+
+/// A registry of stack maps for currently active Wasm modules.
+#[derive(Default)]
+pub struct StackMapRegistry {
+    inner: RwLock<StackMapRegistryInner>,
+}
+
+#[derive(Default)]
+struct StackMapRegistryInner {
+    /// A map from the highest pc in a module, to its stack maps.
+    ///
+    /// For details, see the comment above `GlobalFrameInfo::ranges`.
+    ranges: BTreeMap<usize, ModuleStackMaps>,
+}
+
+#[derive(Debug)]
+struct ModuleStackMaps {
+    /// The range of PCs that this module covers. Different modules should
+    /// always have distinct ranges.
+    range: std::ops::Range<usize>,
+
+    /// A map from a PC in this module (that is a GC safepoint) to its
+    /// associated stack map.
+    pc_to_stack_map: Vec<(usize, Arc<Stackmap>)>,
+}
+
+impl StackMapRegistry {
+    /// Register the stack maps for a given module.
+    ///
+    /// The stack maps should be given as an iterator over a function's PC range
+    /// in memory (that is, where the JIT actually allocated and emitted the
+    /// function's code at), and the stack maps and code offsets within that
+    /// range for each of its GC safepoints.
+    ///
+    /// The return value is an RAII registration for the stack maps. The
+    /// registration should not be dropped until its associated module is
+    /// dropped. Dropping the registration will unregister its stack
+    /// maps.
+    ///
+    /// # Safety
+    ///
+    /// Dropping the returned registration before the module is dropped, or when
+    /// there are still active frames from the module on the stack, means we
+    /// will no longer be able to find GC roots for the module's frames anymore,
+    /// which could lead to freeing still-in-use objects and use-after-free!
+    pub unsafe fn register_stack_maps<'a>(
+        self: &Arc<Self>,
+        stack_maps: impl IntoIterator<Item = (std::ops::Range<usize>, &'a [StackMapInformation])>,
+    ) -> Option<StackMapRegistration> {
+        let mut min = usize::max_value();
+        let mut max = 0;
+        let mut pc_to_stack_map = vec![];
+
+        for (range, infos) in stack_maps {
+            let len = range.end - range.start;
+
+            min = std::cmp::min(min, range.start);
+            max = std::cmp::max(max, range.end);
+
+            for info in infos {
+                assert!((info.code_offset as usize) < len);
+                pc_to_stack_map.push((
+                    range.start + (info.code_offset as usize),
+                    Arc::new(info.stack_map.clone()),
+                ));
+            }
+        }
+
+        if pc_to_stack_map.is_empty() {
+            // Nothing to register.
+            return None;
+        }
+
+        let module_stack_maps = ModuleStackMaps {
+            range: min..max,
+            pc_to_stack_map,
+        };
+
+        let mut inner = self.inner.write().unwrap();
+
+        // Assert that this chunk of ranges doesn't collide with any other known
+        // chunks.
+        if let Some((_, prev)) = inner.ranges.range(max..).next() {
+            assert!(prev.range.start > max);
+        }
+        if let Some((prev_end, _)) = inner.ranges.range(..=min).next_back() {
+            assert!(*prev_end < min);
+        }
+
+        let old = inner.ranges.insert(max, module_stack_maps);
+        assert!(old.is_none());
+
+        Some(StackMapRegistration {
+            key: max,
+            registry: self.clone(),
+        })
+    }
+
+    /// Lookup the stack map for the given PC, if any.
+    pub fn lookup_stack_map(&self, pc: usize) -> Option<Arc<Stackmap>> {
+        let inner = self.inner.read().unwrap();
+        let stack_maps = inner.module_stack_maps(pc)?;
+
+        // Do a binary search to find the stack map for the given PC.
+        //
+        // Because GC safepoints are technically only associated with a single
+        // PC, we should ideally only care about `Ok(index)` values returned
+        // from the binary search. However, safepoints are inserted right before
+        // calls, and there are two things that can disturb the PC/offset
+        // associated with the safepoint versus the PC we actually use to query
+        // for the stack map:
+        //
+        // 1. The `backtrace` crate gives us the PC in a frame that will be
+        //    *returned to*, and where execution will continue from, rather than
+        //    the PC of the call we are currently at. So we would need to
+        //    disassemble one instruction backwards to query the actual PC for
+        //    the stack map.
+        //
+        //    TODO: One thing we *could* do to make this a little less error
+        //    prone, would be to assert/check that the nearest GC safepoint
+        //    found is within `max_encoded_size(any kind of call instruction)`
+        //    our queried PC for the target architecture.
+        //
+        // 2. Cranelift's stack maps only handle the stack, not
+        //    registers. However, some references that are arguments to a call
+        //    may need to be in registers. In these cases, what Cranelift will
+        //    do is:
+        //
+        //      a. spill all the live references,
+        //      b. insert a GC safepoint for those references,
+        //      c. reload the references into registers, and finally
+        //      d. make the call.
+        //
+        //    Step (c) adds drift between the GC safepoint and the location of
+        //    the call, which is where we actually walk the stack frame and
+        //    collect its live references.
+        //
+        //    Luckily, the spill stack slots for the live references are still
+        //    up to date, so we can still find all the on-stack roots.
+        //    Furthermore, we do not have a moving GC, so we don't need to worry
+        //    whether the following code will reuse the references in registers
+        //    (which would not have been updated to point to the moved objects)
+        //    or reload from the stack slots (which would have been updated to
+        //    point to the moved objects).
+        let index = match stack_maps
+            .pc_to_stack_map
+            .binary_search_by_key(&pc, |(pc, _stack_map)| *pc)
+        {
+            // Exact hit.
+            Ok(i) => i,
+
+            Err(n) => {
+                // `Err(0)` means that the associated stack map would have been
+                // the first element in the array if this pc had an associated
+                // stack map, but this pc does not have an associated stack
+                // map. That doesn't make sense since every call and trap inside
+                // Wasm is a GC safepoint and should have a stack map, and the
+                // only way to have Wasm frames under this native frame is if we
+                // are at a call or a trap.
+                debug_assert!(n != 0);
+
+                n - 1
+            }
+        };
+
+        let stack_map = stack_maps.pc_to_stack_map[index].1.clone();
+        Some(stack_map)
+    }
+}
+
+impl StackMapRegistryInner {
+    fn module_stack_maps(&self, pc: usize) -> Option<&ModuleStackMaps> {
+        let (end, stack_maps) = self.ranges.range(pc..).next()?;
+        if pc < stack_maps.range.start || *end < pc {
+            None
+        } else {
+            Some(stack_maps)
+        }
+    }
+}
+
+/// The registration for a module's stack maps.
+///
+/// Unsafe to drop earlier than its module is dropped. See
+/// `StackMapRegistry::register_stack_maps` for details.
+pub struct StackMapRegistration {
+    key: usize,
+    registry: Arc<StackMapRegistry>,
+}
+
+impl Drop for StackMapRegistration {
+    fn drop(&mut self) {
+        if let Ok(mut inner) = self.registry.inner.write() {
+            inner.ranges.remove(&self.key);
+        }
+    }
+}
+
+#[cfg(debug_assertions)]
+#[derive(Debug, Default)]
+struct DebugOnly<T> {
+    inner: T,
+}
+
+#[cfg(debug_assertions)]
+impl<T> std::ops::Deref for DebugOnly<T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        &self.inner
+    }
+}
+
+#[cfg(debug_assertions)]
+impl<T> std::ops::DerefMut for DebugOnly<T> {
+    fn deref_mut(&mut self) -> &mut T {
+        &mut self.inner
+    }
+}
+
+#[cfg(not(debug_assertions))]
+#[derive(Debug, Default)]
+struct DebugOnly<T> {
+    _phantom: PhantomData<T>,
+}
+
+#[cfg(not(debug_assertions))]
+impl<T> std::ops::Deref for DebugOnly<T> {
+    type Target = T;
+
+    fn deref(&self) -> &T {
+        panic!("only deref `DebugOnly` inside `debug_assert!`s")
+    }
+}
+
+#[cfg(not(debug_assertions))]
+impl<T> std::ops::DerefMut for DebugOnly<T> {
+    fn deref_mut(&mut self) -> &mut T {
+        panic!("only deref `DebugOnly` inside `debug_assert!`s")
+    }
+}
+
+/// Perform garbage collection of `VMExternRef`s.
+pub fn gc(
+    stack_maps_registry: &StackMapRegistry,
+    externref_activations_table: &VMExternRefActivationsTable,
+) {
+    log::debug!("start GC");
+
+    debug_assert!({
+        // This set is only non-empty within this function. It is built up when
+        // walking the stack and interpreting stack maps, and then drained back
+        // into the activations table's bump-allocated space at the end.
+        let precise_stack_roots = externref_activations_table.precise_stack_roots.borrow();
+        precise_stack_roots.is_empty()
+    });
+
+    let mut activations_table_set: DebugOnly<HashSet<_>> = Default::default();
+    if cfg!(debug_assertions) {
+        externref_activations_table.elements(|elem| {
+            activations_table_set.insert(elem.as_raw() as *mut VMExternData);
+        });
+    }
+
+    backtrace::trace(|frame| {
+        let pc = frame.ip() as usize;
+
+        if let Some(stack_map) = stack_maps_registry.lookup_stack_map(pc) {
+            let ptr_to_frame = frame.sp() as usize;
+
+            for i in 0..(stack_map.mapped_words() as usize) {
+                if stack_map.get_bit(i) {
+                    // Stack maps have one bit per word in the frame, and the
+                    // zero^th bit is the *lowest* addressed word in the frame,
+                    // i.e. the closest to the SP. So to get the `i`^th word in
+                    // this frame, we add `i * sizeof(word)` to the
+                    // lowest-addressed word within this frame.
+                    let ptr_to_ref = ptr_to_frame + i * mem::size_of::<usize>();
+
+                    let r = unsafe { std::ptr::read(ptr_to_ref as *const *mut VMExternData) };
+                    debug_assert!(
+                        r.is_null() || activations_table_set.contains(&r),
+                        "every on-stack externref inside a Wasm frame should \
+                         have an entry in the VMExternRefActivationsTable"
+                    );
+                    if let Some(r) = NonNull::new(r) {
+                        externref_activations_table.insert_precise_stack_root(r);
+                    }
+                }
+            }
+        }
+
+        // Keep walking the stack.
+        true
+    });
+
+    externref_activations_table.reset();
+    log::debug!("end GC");
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -434,4 +1052,65 @@ mod tests {
             actual_offset.try_into().unwrap(),
         );
     }
+
+    #[test]
+    fn table_next_is_at_correct_offset() {
+        let table = VMExternRefActivationsTable::new();
+
+        let table_ptr = &table as *const _;
+        let next_ptr = &table.next as *const _;
+
+        let actual_offset = (next_ptr as usize) - (table_ptr as usize);
+
+        let offsets = wasmtime_environ::VMOffsets {
+            pointer_size: 8,
+            num_signature_ids: 0,
+            num_imported_functions: 0,
+            num_imported_tables: 0,
+            num_imported_memories: 0,
+            num_imported_globals: 0,
+            num_defined_tables: 0,
+            num_defined_memories: 0,
+            num_defined_globals: 0,
+        };
+        assert_eq!(offsets.vm_extern_ref_activation_table_next(), actual_offset);
+    }
+
+    #[test]
+    fn table_end_is_at_correct_offset() {
+        let table = VMExternRefActivationsTable::new();
+
+        let table_ptr = &table as *const _;
+        let end_ptr = &table.end as *const _;
+
+        let actual_offset = (end_ptr as usize) - (table_ptr as usize);
+
+        let offsets = wasmtime_environ::VMOffsets {
+            pointer_size: 8,
+            num_signature_ids: 0,
+            num_imported_functions: 0,
+            num_imported_tables: 0,
+            num_imported_memories: 0,
+            num_imported_globals: 0,
+            num_defined_tables: 0,
+            num_defined_memories: 0,
+            num_defined_globals: 0,
+        };
+        assert_eq!(offsets.vm_extern_ref_activation_table_end(), actual_offset);
+    }
+
+    fn assert_is_send<T: Send>() {}
+    fn assert_is_sync<T: Send>() {}
+
+    #[test]
+    fn stack_map_registry_is_send_sync() {
+        assert_is_send::<StackMapRegistry>();
+        assert_is_sync::<StackMapRegistry>();
+    }
+
+    #[test]
+    fn stack_map_registration_is_send_sync() {
+        assert_is_send::<StackMapRegistration>();
+        assert_is_sync::<StackMapRegistration>();
+    }
 }
diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs
index 8642da2aef45..026c100b1b37 100644
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@@ -3,6 +3,7 @@
 //! `InstanceHandle` is a reference-counting handle for an `Instance`.
 
 use crate::export::Export;
+use crate::externref::{StackMapRegistry, VMExternRefActivationsTable};
 use crate::imports::Imports;
 use crate::memory::{DefaultMemoryCreator, RuntimeLinearMemory, RuntimeMemoryCreator};
 use crate::table::Table;
@@ -20,6 +21,7 @@ use std::any::Any;
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::convert::TryFrom;
+use std::rc::Rc;
 use std::sync::Arc;
 use std::{mem, ptr, slice};
 use thiserror::Error;
@@ -72,6 +74,19 @@ pub(crate) struct Instance {
     /// interrupted.
     pub(crate) interrupts: Arc<VMInterrupts>,
 
+    /// A handle to the (over-approximized) set of `externref`s that Wasm code
+    /// is using.
+    ///
+    /// The `vmctx` also holds a raw pointer to the table and relies on this
+    /// member to keep it alive.
+    pub(crate) externref_activations_table: Rc<VMExternRefActivationsTable>,
+
+    /// A handle to the stack map registry for this thread.
+    ///
+    /// The `vmctx` also holds a raw pointer to the registry and relies on this
+    /// member to keep it alive.
+    pub(crate) stack_map_registry: Arc<StackMapRegistry>,
+
     /// Additional context used by compiled wasm code. This field is last, and
     /// represents a dynamically-sized array that extends beyond the nominal
     /// end of the struct (similar to a flexible array member).
@@ -238,6 +253,16 @@ impl Instance {
         unsafe { self.vmctx_plus_offset(self.offsets.vmctx_interrupts()) }
     }
 
+    /// Return a pointer to the `VMExternRefActivationsTable`.
+    pub fn externref_activations_table(&self) -> *mut *mut VMExternRefActivationsTable {
+        unsafe { self.vmctx_plus_offset(self.offsets.vmctx_externref_activations_table()) }
+    }
+
+    /// Return a pointer to the `StackMapRegistry`.
+    pub fn stack_map_registry(&self) -> *mut *mut StackMapRegistry {
+        unsafe { self.vmctx_plus_offset(self.offsets.vmctx_stack_map_registry()) }
+    }
+
     /// Return a reference to the vmctx used by compiled wasm code.
     pub fn vmctx(&self) -> &VMContext {
         &self.vmctx
@@ -784,6 +809,8 @@ impl InstanceHandle {
         vmshared_signatures: BoxedSlice<SignatureIndex, VMSharedSignatureIndex>,
         host_state: Box<dyn Any>,
         interrupts: Arc<VMInterrupts>,
+        externref_activations_table: Rc<VMExternRefActivationsTable>,
+        stack_map_registry: Arc<StackMapRegistry>,
     ) -> Result<Self, InstantiationError> {
         let tables = create_tables(&module);
         let memories = create_memories(&module, mem_creator.unwrap_or(&DefaultMemoryCreator {}))?;
@@ -819,6 +846,8 @@ impl InstanceHandle {
                 trampolines,
                 host_state,
                 interrupts,
+                externref_activations_table,
+                stack_map_registry,
                 vmctx: VMContext {},
             };
             let layout = instance.alloc_layout();
@@ -878,6 +907,9 @@ impl InstanceHandle {
             VMBuiltinFunctionsArray::initialized(),
         );
         *instance.interrupts() = &*instance.interrupts;
+        *instance.externref_activations_table() =
+            &*instance.externref_activations_table as *const _ as *mut _;
+        *instance.stack_map_registry() = &*instance.stack_map_registry as *const _ as *mut _;
 
         // Perform infallible initialization in this constructor, while fallible
         // initialization is deferred to the `initialize` method.
diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs
index 6f2896054a5f..f523206e902e 100644
--- a/crates/runtime/src/lib.rs
+++ b/crates/runtime/src/lib.rs
@@ -37,7 +37,7 @@ pub mod debug_builtins;
 pub mod libcalls;
 
 pub use crate::export::*;
-pub use crate::externref::VMExternRef;
+pub use crate::externref::*;
 pub use crate::imports::Imports;
 pub use crate::instance::{InstanceHandle, InstantiationError, LinkError};
 pub use crate::jit_int::GdbJitImageRegistration;
diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs
index 483ec5be0963..8cba107404ef 100644
--- a/crates/runtime/src/mmap.rs
+++ b/crates/runtime/src/mmap.rs
@@ -182,7 +182,7 @@ impl Mmap {
 
         // Commit the accessible size.
         let ptr = self.ptr as *const u8;
-        unsafe { region::protect(ptr.add(start), len, region::Protection::READ_WRITE) }
+        unsafe { region::protect(ptr.add(start), len, region::Protection::ReadWrite) }
             .map_err(|e| e.to_string())
     }
 
diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs
index 119c87ac8f73..c8551d2aad58 100644
--- a/crates/wasmtime/src/func.rs
+++ b/crates/wasmtime/src/func.rs
@@ -265,14 +265,14 @@ impl Func {
             // values produced are correct. There could be a bug in `func` that
             // produces the wrong number or wrong types of values, and we need
             // to catch that here.
-            for (i, (ret, ty)) in returns.iter_mut().zip(ty_clone.results()).enumerate() {
+            for (i, (ret, ty)) in returns.into_iter().zip(ty_clone.results()).enumerate() {
                 if ret.ty() != *ty {
                     return Err(Trap::new(
                         "function attempted to return an incompatible value",
                     ));
                 }
                 unsafe {
-                    ret.write_value_to(values_vec.add(i));
+                    ret.write_value_to(&store, values_vec.add(i));
                 }
             }
             Ok(())
@@ -535,7 +535,7 @@ impl Func {
 
         // Store the argument values into `values_vec`.
         let param_tys = my_ty.params().iter();
-        for ((arg, slot), ty) in params.iter().zip(&mut values_vec).zip(param_tys) {
+        for ((arg, slot), ty) in params.iter().cloned().zip(&mut values_vec).zip(param_tys) {
             if arg.ty() != *ty {
                 bail!(
                     "argument type mismatch: found {} but expected {}",
@@ -547,7 +547,7 @@ impl Func {
                 bail!("cross-`Store` values are not currently supported");
             }
             unsafe {
-                arg.write_value_to(slot);
+                arg.write_value_to(&self.instance.store, slot);
             }
         }
 
diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs
index ed39a70d4d72..0e15dcdc97dc 100644
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@@ -3,9 +3,13 @@ use crate::{Engine, Export, Extern, Func, Global, Memory, Module, Store, Table,
 use anyhow::{bail, Error, Result};
 use std::any::Any;
 use std::mem;
+use std::rc::Rc;
+use std::sync::Arc;
 use wasmtime_environ::EntityIndex;
 use wasmtime_jit::{CompiledModule, Resolver};
-use wasmtime_runtime::{InstantiationError, VMContext, VMFunctionBody};
+use wasmtime_runtime::{
+    InstantiationError, StackMapRegistry, VMContext, VMExternRefActivationsTable, VMFunctionBody,
+};
 
 struct SimpleResolver<'a> {
     imports: &'a [Extern],
@@ -24,6 +28,8 @@ fn instantiate(
     compiled_module: &CompiledModule,
     imports: &[Extern],
     host: Box<dyn Any>,
+    externref_activations_table: Rc<VMExternRefActivationsTable>,
+    stack_map_registry: Arc<StackMapRegistry>,
 ) -> Result<StoreInstanceHandle, Error> {
     // For now we have a restriction that the `Store` that we're working
     // with is the same for everything involved here.
@@ -50,6 +56,8 @@ fn instantiate(
             config.memory_creator.as_ref().map(|a| a as _),
             store.interrupts().clone(),
             host,
+            externref_activations_table,
+            stack_map_registry,
         )?;
 
         // After we've created the `InstanceHandle` we still need to run
@@ -183,10 +191,27 @@ impl Instance {
             bail!("cross-`Engine` instantiation is not currently supported");
         }
 
-        let info = module.register_frame_info();
-        store.register_jit_code(module.compiled_module().jit_code_ranges());
+        let host_info = Box::new({
+            let frame_info_registration = module.register_frame_info();
+            store.register_jit_code(module.compiled_module().jit_code_ranges());
 
-        let handle = instantiate(store, module.compiled_module(), imports, Box::new(info))?;
+            // We need to make sure that we keep this alive as long as the instance
+            // is alive, or else we could miss GC roots, reclaim objects too early,
+            // and get user-after-frees.
+            let stack_map_registration =
+                unsafe { module.register_stack_maps(&*store.stack_map_registry()) };
+
+            (frame_info_registration, stack_map_registration)
+        });
+
+        let handle = instantiate(
+            store,
+            module.compiled_module(),
+            imports,
+            host_info,
+            store.externref_activations_table().clone(),
+            store.stack_map_registry().clone(),
+        )?;
 
         Ok(Instance {
             handle,
diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs
index 391c3b63de10..c79e4970ba3f 100644
--- a/crates/wasmtime/src/module.rs
+++ b/crates/wasmtime/src/module.rs
@@ -6,6 +6,7 @@ use std::path::Path;
 use std::sync::{Arc, Mutex};
 use wasmparser::validate;
 use wasmtime_jit::CompiledModule;
+use wasmtime_runtime::{StackMapRegistration, StackMapRegistry};
 
 /// A compiled WebAssembly module, ready to be instantiated.
 ///
@@ -80,6 +81,7 @@ pub struct Module {
     engine: Engine,
     compiled: Arc<CompiledModule>,
     frame_info_registration: Arc<Mutex<Option<Option<Arc<GlobalFrameInfoRegistration>>>>>,
+    stack_map_registration: Arc<Mutex<Option<Option<Arc<StackMapRegistration>>>>>,
 }
 
 impl Module {
@@ -307,6 +309,7 @@ impl Module {
             engine: engine.clone(),
             compiled: Arc::new(compiled),
             frame_info_registration: Arc::new(Mutex::new(None)),
+            stack_map_registration: Arc::new(Mutex::new(None)),
         })
     }
 
@@ -534,6 +537,41 @@ impl Module {
         *info = Some(ret.clone());
         return ret;
     }
+
+    /// Register this module's stack maps.
+    ///
+    /// # Safety
+    ///
+    /// The same as `wasmtime_runtime::StackMapRegistry::register_stack_maps`.
+    pub(crate) unsafe fn register_stack_maps(
+        &self,
+        registry: &Arc<StackMapRegistry>,
+    ) -> Option<Arc<StackMapRegistration>> {
+        let mut registration = self.stack_map_registration.lock().unwrap();
+        if let Some(registration) = &*registration {
+            return registration.clone();
+        }
+
+        let module = &self.compiled;
+        let ret = registry
+            .register_stack_maps(
+                module
+                    .finished_functions()
+                    .values()
+                    .zip(module.stack_maps().values())
+                    .map(|(func, stack_maps)| {
+                        let ptr = (**func).as_ptr();
+                        let len = (**func).len();
+                        let start = ptr as usize;
+                        let end = ptr as usize + len;
+                        let range = start..end;
+                        (range, &stack_maps[..])
+                    }),
+            )
+            .map(Arc::new);
+        *registration = Some(ret.clone());
+        ret
+    }
 }
 
 fn _assert_send_sync() {
diff --git a/crates/wasmtime/src/ref.rs b/crates/wasmtime/src/ref.rs
old mode 100644
new mode 100755
index 3f87dbd8a256..6bb8141494d7
--- a/crates/wasmtime/src/ref.rs
+++ b/crates/wasmtime/src/ref.rs
@@ -36,6 +36,11 @@ impl ExternRef {
         &*self.inner
     }
 
+    /// Get the reference count for this `ExternRef`.
+    pub fn get_reference_count(&self) -> usize {
+        self.inner.get_reference_count()
+    }
+
     /// Does this `ExternRef` point to the same inner value as `other`?0
     ///
     /// This is *only* pointer equality, and does *not* run any inner value's
diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs
index f6eb0e97e4ff..31bf83bcc14d 100644
--- a/crates/wasmtime/src/runtime.rs
+++ b/crates/wasmtime/src/runtime.rs
@@ -19,7 +19,8 @@ use wasmtime_jit::{native, CompilationStrategy, Compiler};
 use wasmtime_profiling::{JitDumpAgent, NullProfilerAgent, ProfilingAgent, VTuneAgent};
 use wasmtime_runtime::{
     debug_builtins, InstanceHandle, RuntimeMemoryCreator, SignalHandler, SignatureRegistry,
-    VMExternRef, VMInterrupts, VMSharedSignatureIndex,
+    StackMapRegistry, VMExternRef, VMExternRefActivationsTable, VMInterrupts,
+    VMSharedSignatureIndex,
 };
 
 // Runtime Environment
@@ -194,10 +195,15 @@ impl Config {
         self.validating_config
             .operator_config
             .enable_reference_types = enable;
-        // The reference types proposal depends on the bulk memory proposal
+        self.flags
+            .set("enable_safepoints", if enable { "true" } else { "false" })
+            .unwrap();
+
+        // The reference types proposal depends on the bulk memory proposal.
         if enable {
             self.wasm_bulk_memory(true);
         }
+
         self
     }
 
@@ -724,6 +730,7 @@ pub struct Engine {
 struct EngineInner {
     config: Config,
     compiler: Compiler,
+    stack_map_registry: Arc<StackMapRegistry>,
 }
 
 impl Engine {
@@ -735,6 +742,7 @@ impl Engine {
             inner: Arc::new(EngineInner {
                 config: config.clone(),
                 compiler: config.build_compiler(),
+                stack_map_registry: Arc::new(StackMapRegistry::default()),
             }),
         }
     }
@@ -792,6 +800,8 @@ pub(crate) struct StoreInner {
     signal_handler: RefCell<Option<Box<SignalHandler<'static>>>>,
     jit_code_ranges: RefCell<Vec<(usize, usize)>>,
     host_info: RefCell<HashMap<HostInfoKey, Rc<RefCell<dyn Any>>>>,
+    externref_activations_table: Rc<VMExternRefActivationsTable>,
+    stack_map_registry: Arc<StackMapRegistry>,
 }
 
 struct HostInfoKey(VMExternRef);
@@ -832,6 +842,8 @@ impl Store {
                 signal_handler: RefCell::new(None),
                 jit_code_ranges: RefCell::new(Vec::new()),
                 host_info: RefCell::new(HashMap::new()),
+                externref_activations_table: Rc::new(VMExternRefActivationsTable::new()),
+                stack_map_registry: engine.inner.stack_map_registry.clone(),
             }),
         }
     }
@@ -1074,6 +1086,22 @@ impl Store {
             bail!("interrupts aren't enabled for this `Store`")
         }
     }
+
+    pub(crate) fn externref_activations_table(&self) -> &Rc<VMExternRefActivationsTable> {
+        &self.inner.externref_activations_table
+    }
+
+    pub(crate) fn stack_map_registry(&self) -> &Arc<StackMapRegistry> {
+        &self.inner.engine.inner.stack_map_registry
+    }
+
+    /// Perform garbage collection of `ExternRef`s.
+    pub fn gc(&self) {
+        wasmtime_runtime::gc(
+            &*self.inner.stack_map_registry,
+            &*self.inner.externref_activations_table,
+        );
+    }
 }
 
 impl Default for Store {
diff --git a/crates/wasmtime/src/trampoline/create_handle.rs b/crates/wasmtime/src/trampoline/create_handle.rs
index 6d2f691cc69f..5a3eca831b7a 100644
--- a/crates/wasmtime/src/trampoline/create_handle.rs
+++ b/crates/wasmtime/src/trampoline/create_handle.rs
@@ -46,6 +46,8 @@ pub(crate) fn create_handle(
             signatures.into_boxed_slice(),
             state,
             store.interrupts().clone(),
+            store.externref_activations_table().clone(),
+            store.stack_map_registry().clone(),
         )?;
         Ok(store.add_instance(handle))
     }
diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs
index b4a614357b13..6ccc700c2431 100644
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@@ -2,7 +2,7 @@
 
 use super::create_handle::create_handle;
 use crate::trampoline::StoreInstanceHandle;
-use crate::{FuncType, Store, Trap};
+use crate::{FuncType, Store, Trap, ValType};
 use anyhow::{bail, Result};
 use std::any::Any;
 use std::cmp;
@@ -11,7 +11,9 @@ use std::mem;
 use std::panic::{self, AssertUnwindSafe};
 use wasmtime_environ::entity::PrimaryMap;
 use wasmtime_environ::isa::TargetIsa;
-use wasmtime_environ::{ir, settings, CompiledFunction, EntityIndex, Module};
+use wasmtime_environ::{
+    ir, settings, settings::Configurable, CompiledFunction, EntityIndex, Module,
+};
 use wasmtime_jit::trampoline::ir::{
     ExternalName, Function, InstBuilder, MemFlags, StackSlotData, StackSlotKind,
 };
@@ -210,7 +212,14 @@ pub fn create_handle_with_function(
 ) -> Result<(StoreInstanceHandle, VMTrampoline)> {
     let isa = {
         let isa_builder = native::builder();
-        let flag_builder = settings::builder();
+        let mut flag_builder = settings::builder();
+
+        if ft.params().iter().any(|p| *p == ValType::ExternRef)
+            || ft.results().iter().any(|r| *r == ValType::ExternRef)
+        {
+            flag_builder.set("enable_safepoints", "true").unwrap();
+        }
+
         isa_builder.finish(settings::Flags::new(flag_builder))
     };
 
diff --git a/crates/wasmtime/src/types.rs b/crates/wasmtime/src/types.rs
index 75c67ae18806..8fa8b79b345a 100644
--- a/crates/wasmtime/src/types.rs
+++ b/crates/wasmtime/src/types.rs
@@ -106,6 +106,7 @@ impl ValType {
             ValType::F32 => Some(ir::types::F32),
             ValType::F64 => Some(ir::types::F64),
             ValType::V128 => Some(ir::types::I8X16),
+            ValType::ExternRef => Some(ir::types::R64),
             _ => None,
         }
     }
@@ -117,6 +118,7 @@ impl ValType {
             ir::types::F32 => Some(ValType::F32),
             ir::types::F64 => Some(ValType::F64),
             ir::types::I8X16 => Some(ValType::V128),
+            ir::types::R64 => Some(ValType::ExternRef),
             _ => None,
         }
     }
diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs
index 9c598cd51382..2eb9b8bc7ef2 100644
--- a/crates/wasmtime/src/values.rs
+++ b/crates/wasmtime/src/values.rs
@@ -79,15 +79,22 @@ impl Val {
         }
     }
 
-    pub(crate) unsafe fn write_value_to(&self, p: *mut u128) {
+    pub(crate) unsafe fn write_value_to(self, store: &Store, p: *mut u128) {
         match self {
-            Val::I32(i) => ptr::write(p as *mut i32, *i),
-            Val::I64(i) => ptr::write(p as *mut i64, *i),
-            Val::F32(u) => ptr::write(p as *mut u32, *u),
-            Val::F64(u) => ptr::write(p as *mut u64, *u),
-            Val::V128(b) => ptr::write(p as *mut u128, *b),
+            Val::I32(i) => ptr::write(p as *mut i32, i),
+            Val::I64(i) => ptr::write(p as *mut i64, i),
+            Val::F32(u) => ptr::write(p as *mut u32, u),
+            Val::F64(u) => ptr::write(p as *mut u64, u),
+            Val::V128(b) => ptr::write(p as *mut u128, b),
             Val::ExternRef(None) => ptr::write(p, 0),
-            Val::ExternRef(Some(x)) => ptr::write(p as *mut *mut u8, x.inner.clone().into_raw()),
+            Val::ExternRef(Some(x)) => {
+                let externref_ptr = x.inner.as_raw();
+                if let Err(inner) = store.externref_activations_table().try_insert(x.inner) {
+                    store.gc();
+                    store.externref_activations_table().insert_slow_path(inner);
+                }
+                ptr::write(p as *mut *mut u8, externref_ptr)
+            }
             _ => unimplemented!("Val::write_value_to"),
         }
     }
@@ -105,7 +112,7 @@ impl Val {
                     Val::ExternRef(None)
                 } else {
                     Val::ExternRef(Some(ExternRef {
-                        inner: VMExternRef::from_raw(raw),
+                        inner: VMExternRef::clone_from_raw(raw),
                         store: store.weak(),
                     }))
                 }
diff --git a/src/obj.rs b/src/obj.rs
index 72cb4618952e..63929f619dae 100644
--- a/src/obj.rs
+++ b/src/obj.rs
@@ -99,19 +99,26 @@ pub fn compile_to_obj(
         .translate(wasm)
         .context("failed to translate module")?;
 
-    // TODO: use the traps information
-    let (compilation, relocations, address_transform, value_ranges, stack_slots, _traps) =
-        match strategy {
-            Strategy::Auto | Strategy::Cranelift => {
-                Cranelift::compile_module(&translation, &*isa, cache_config)
-            }
-            #[cfg(feature = "lightbeam")]
-            Strategy::Lightbeam => Lightbeam::compile_module(&translation, &*isa, cache_config),
-            #[cfg(not(feature = "lightbeam"))]
-            Strategy::Lightbeam => bail!("lightbeam support not enabled"),
-            other => bail!("unsupported compilation strategy {:?}", other),
+    // TODO: use the traps and stack maps information.
+    let (
+        compilation,
+        relocations,
+        address_transform,
+        value_ranges,
+        stack_slots,
+        _traps,
+        _stack_maps,
+    ) = match strategy {
+        Strategy::Auto | Strategy::Cranelift => {
+            Cranelift::compile_module(&translation, &*isa, cache_config)
         }
-        .context("failed to compile module")?;
+        #[cfg(feature = "lightbeam")]
+        Strategy::Lightbeam => Lightbeam::compile_module(&translation, &*isa, cache_config),
+        #[cfg(not(feature = "lightbeam"))]
+        Strategy::Lightbeam => bail!("lightbeam support not enabled"),
+        other => bail!("unsupported compilation strategy {:?}", other),
+    }
+    .context("failed to compile module")?;
 
     if compilation.is_empty() {
         bail!("no functions were found/compiled");
diff --git a/tests/all/gc.rs b/tests/all/gc.rs
new file mode 100644
index 000000000000..cd6bae369134
--- /dev/null
+++ b/tests/all/gc.rs
@@ -0,0 +1,228 @@
+use std::cell::Cell;
+use std::rc::Rc;
+use wasmtime::*;
+
+fn ref_types_module(source: &str) -> anyhow::Result<(Store, Module)> {
+    let _ = env_logger::try_init();
+
+    let mut config = Config::new();
+    config.wasm_reference_types(true);
+
+    let engine = Engine::new(&config);
+    let store = Store::new(&engine);
+
+    let module = Module::new(&engine, source)?;
+
+    Ok((store, module))
+}
+
+#[test]
+fn smoke_test_gc() -> anyhow::Result<()> {
+    let (store, module) = ref_types_module(
+        r#"
+            (module
+                (import "" "" (func $do_gc))
+                (func $recursive (export "func") (param i32 externref) (result externref)
+                    local.get 0
+                    i32.eqz
+                    if (result externref)
+                        call $do_gc
+                        local.get 1
+                    else
+                        local.get 0
+                        i32.const 1
+                        i32.sub
+                        local.get 1
+                        call $recursive
+                    end
+                )
+            )
+        "#,
+    )?;
+
+    let do_gc = Func::wrap(&store, {
+        let store = store.clone();
+        move || {
+            // Do a GC with `externref`s on the stack in Wasm frames.
+            store.gc();
+        }
+    });
+    let instance = Instance::new(&store, &module, &[do_gc.into()])?;
+    let func = instance.get_func("func").unwrap();
+
+    let inner_dropped = Rc::new(Cell::new(false));
+    let r = ExternRef::new(&store, SetFlagOnDrop(inner_dropped.clone()));
+    {
+        let args = [Val::I32(5), Val::ExternRef(Some(r.clone()))];
+        func.call(&args)?;
+    }
+
+    // Still held alive by the `VMExternRefActivationsTable` (potentially in
+    // multiple slots within the table) and by this `r` local.
+    assert!(r.get_reference_count() >= 2);
+
+    // Doing a GC should see that there aren't any `externref`s on the stack in
+    // Wasm frames anymore.
+    store.gc();
+    assert_eq!(r.get_reference_count(), 1);
+
+    // Dropping `r` should drop the inner `SetFlagOnDrop` value.
+    drop(r);
+    assert!(inner_dropped.get());
+
+    return Ok(());
+
+    struct SetFlagOnDrop(Rc<Cell<bool>>);
+
+    impl Drop for SetFlagOnDrop {
+        fn drop(&mut self) {
+            self.0.set(true);
+        }
+    }
+}
+
+#[test]
+fn wasm_dropping_refs() -> anyhow::Result<()> {
+    let (store, module) = ref_types_module(
+        r#"
+            (module
+                (func (export "drop_ref") (param externref)
+                    nop
+                )
+            )
+        "#,
+    )?;
+
+    let instance = Instance::new(&store, &module, &[])?;
+    let drop_ref = instance.get_func("drop_ref").unwrap();
+
+    let num_refs_dropped = Rc::new(Cell::new(0));
+
+    // NB: 4096 is greater than the initial `VMExternRefActivationsTable`
+    // capacity, so this will trigger at least one GC.
+    for _ in 0..4096 {
+        let r = ExternRef::new(&store, CountDrops(num_refs_dropped.clone()));
+        let args = [Val::ExternRef(Some(r))];
+        drop_ref.call(&args)?;
+    }
+
+    assert!(num_refs_dropped.get() > 0);
+
+    // And after doing a final GC, all the refs should have been dropped.
+    store.gc();
+    assert_eq!(num_refs_dropped.get(), 4096);
+
+    return Ok(());
+
+    struct CountDrops(Rc<Cell<usize>>);
+
+    impl Drop for CountDrops {
+        fn drop(&mut self) {
+            self.0.set(self.0.get() + 1);
+        }
+    }
+}
+
+#[test]
+fn many_live_refs() -> anyhow::Result<()> {
+    let mut wat = r#"
+        (module
+            ;; Make new `externref`s.
+            (import "" "make_ref" (func $make_ref (result externref)))
+
+            ;; Observe an `externref` so it is kept live.
+            (import "" "observe_ref" (func $observe_ref (param externref)))
+
+            (func (export "many_live_refs")
+    "#
+    .to_string();
+
+    // This is more than the initial `VMExternRefActivationsTable` capacity, so
+    // it will need to allocate additional bump chunks.
+    const NUM_LIVE_REFS: usize = 1024;
+
+    // Push `externref`s onto the stack.
+    for _ in 0..NUM_LIVE_REFS {
+        wat.push_str("(call $make_ref)\n");
+    }
+
+    // Pop `externref`s from the stack. Because we pass each of them to a
+    // function call here, they are all live references for the duration of
+    // their lifetimes.
+    for _ in 0..NUM_LIVE_REFS {
+        wat.push_str("(call $observe_ref)\n");
+    }
+
+    wat.push_str(
+        "
+            ) ;; func
+        ) ;; module
+        ",
+    );
+
+    let (store, module) = ref_types_module(&wat)?;
+
+    let live_refs = Rc::new(Cell::new(0));
+
+    let make_ref = Func::new(
+        &store,
+        FuncType::new(
+            vec![].into_boxed_slice(),
+            vec![ValType::ExternRef].into_boxed_slice(),
+        ),
+        {
+            let store = store.clone();
+            let live_refs = live_refs.clone();
+            move |_caller, _params, results| {
+                results[0] = Val::ExternRef(Some(ExternRef::new(
+                    &store,
+                    CountLiveRefs::new(live_refs.clone()),
+                )));
+                Ok(())
+            }
+        },
+    );
+
+    let observe_ref = Func::new(
+        &store,
+        FuncType::new(
+            vec![ValType::ExternRef].into_boxed_slice(),
+            vec![].into_boxed_slice(),
+        ),
+        |_caller, params, _results| {
+            let r = params[0].externref().unwrap().unwrap();
+            let r = r.data().downcast_ref::<CountLiveRefs>().unwrap();
+            assert!(r.live_refs.get() > 0);
+            Ok(())
+        },
+    );
+
+    let instance = Instance::new(&store, &module, &[make_ref.into(), observe_ref.into()])?;
+    let many_live_refs = instance.get_func("many_live_refs").unwrap();
+
+    many_live_refs.call(&[])?;
+
+    store.gc();
+    assert_eq!(live_refs.get(), 0);
+
+    return Ok(());
+
+    struct CountLiveRefs {
+        live_refs: Rc<Cell<usize>>,
+    }
+
+    impl CountLiveRefs {
+        fn new(live_refs: Rc<Cell<usize>>) -> Self {
+            let live = live_refs.get();
+            live_refs.set(live + 1);
+            Self { live_refs }
+        }
+    }
+
+    impl Drop for CountLiveRefs {
+        fn drop(&mut self) {
+            let live = self.live_refs.get();
+            self.live_refs.set(live - 1);
+        }
+    }
+}
diff --git a/tests/all/main.rs b/tests/all/main.rs
index e7409c21d2d2..5d8db9ffe2d4 100644
--- a/tests/all/main.rs
+++ b/tests/all/main.rs
@@ -4,6 +4,7 @@ mod debug;
 mod externals;
 mod func;
 mod fuzzing;
+mod gc;
 mod globals;
 mod iloop;
 mod import_calling_export;

From 618c278e419f58fd1a5df0edfcb332402fdd9e16 Mon Sep 17 00:00:00 2001
From: Nick Fitzgerald <fitzgen@gmail.com>
Date: Thu, 11 Jun 2020 14:29:30 -0700
Subject: [PATCH 2/4] externref: implement a canary for GC stack walking

This allows us to detect when stack walking has failed to walk the whole stack,
and we are potentially missing on-stack roots, and therefore it would be unsafe
to do a GC because we could free objects too early, leading to use-after-free.
When we detect this scenario, we skip the GC.
---
 crates/runtime/src/externref.rs | 170 +++++++++++++++++++++++++++++---
 crates/wasmtime/src/func.rs     |  39 +++++---
 crates/wasmtime/src/runtime.rs  |  12 ++-
 3 files changed, 195 insertions(+), 26 deletions(-)

diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs
index dba09a801446..92d7105a8a55 100644
--- a/crates/runtime/src/externref.rs
+++ b/crates/runtime/src/externref.rs
@@ -499,6 +499,14 @@ pub struct VMExternRefActivationsTable {
     /// is just part of this struct so that we can reuse the allocation, rather
     /// than create a new hash set every GC.
     precise_stack_roots: RefCell<HashSet<NonNull<VMExternData>>>,
+
+    /// A pointer to a `u8` on the youngest host stack frame before we called
+    /// into Wasm for the first time. When walking the stack in garbage
+    /// collection, if we don't find this frame, then we failed to walk every
+    /// Wasm stack frame, which means we failed to find all on-stack,
+    /// inside-a-Wasm-frame roots, and doing a GC could lead to freeing one of
+    /// those missed roots, and use after free.
+    stack_canary: Cell<Option<NonNull<u8>>>,
 }
 
 impl VMExternRefActivationsTable {
@@ -515,6 +523,7 @@ impl VMExternRefActivationsTable {
             end: Cell::new(NonNull::new(end).unwrap()),
             chunks: RefCell::new(vec![chunk]),
             precise_stack_roots: RefCell::new(HashSet::with_capacity(Self::INITIAL_CHUNK_SIZE)),
+            stack_canary: Cell::new(None),
         }
     }
 
@@ -710,6 +719,73 @@ impl VMExternRefActivationsTable {
             }
         }
     }
+
+    /// Set the stack canary around a call into Wasm.
+    ///
+    /// The return value should not be dropped until after the Wasm call has
+    /// returned.
+    ///
+    /// While this method is always safe to call (or not call), it is unsafe to
+    /// call the `wasmtime_runtime::gc` function unless this method is called at
+    /// the proper times and its return value properly outlives its Wasm call.
+    ///
+    /// For `gc` to be safe, this is only *strictly required* to surround the
+    /// oldest host-->Wasm stack frame transition on this thread, but repeatedly
+    /// calling it is idempotent and cheap, so it is recommended to call this
+    /// for every host-->Wasm call.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// use wasmtime_runtime::*;
+    ///
+    /// #let get_table_from_somewhere = || unimplemented!();
+    /// let table: &VMExternRefActivationsTable = get_table_from_somewhere();
+    ///
+    /// // Set the canary before a Wasm call. The canary should always be a
+    /// // local on the stack.
+    /// let canary = 0;
+    /// let auto_reset_canary = table.set_stack_canary(&canary);
+    ///
+    /// // Do the call into Wasm.
+    /// #let call_into_wasm = || unimplemented!();
+    /// call_into_wasm();
+    ///
+    /// // Only drop the value returned by `set_stack_canary` after the Wasm
+    /// // call has returned.
+    /// drop(auto_reset_canary);
+    /// ```
+    pub fn set_stack_canary<'a>(&'a self, canary: &u8) -> impl Drop + 'a {
+        let should_reset = if self.stack_canary.get().is_none() {
+            let canary = canary as *const u8 as *mut u8;
+            self.stack_canary.set(Some(unsafe {
+                debug_assert!(!canary.is_null());
+                NonNull::new_unchecked(canary)
+            }));
+            true
+        } else {
+            false
+        };
+
+        return AutoResetCanary {
+            table: self,
+            should_reset,
+        };
+
+        struct AutoResetCanary<'a> {
+            table: &'a VMExternRefActivationsTable,
+            should_reset: bool,
+        }
+
+        impl Drop for AutoResetCanary<'_> {
+            fn drop(&mut self) {
+                if self.should_reset {
+                    debug_assert!(self.table.stack_canary.get().is_some());
+                    self.table.stack_canary.set(None);
+                }
+            }
+        }
+    }
 }
 
 /// A registry of stack maps for currently active Wasm modules.
@@ -954,7 +1030,14 @@ impl<T> std::ops::DerefMut for DebugOnly<T> {
 }
 
 /// Perform garbage collection of `VMExternRef`s.
-pub fn gc(
+///
+/// # Unsafety
+///
+/// You must have called `VMExternRefActivationsTable::set_stack_canary` for at
+/// least the oldest host-->Wasm stack frame transition on this thread's stack
+/// (it is idempotent to call it more than once) and keep its return value alive
+/// across the duration of that host-->Wasm call.
+pub unsafe fn gc(
     stack_maps_registry: &StackMapRegistry,
     externref_activations_table: &VMExternRefActivationsTable,
 ) {
@@ -963,11 +1046,57 @@ pub fn gc(
     debug_assert!({
         // This set is only non-empty within this function. It is built up when
         // walking the stack and interpreting stack maps, and then drained back
-        // into the activations table's bump-allocated space at the end.
+        // into the activations table's bump-allocated space at the
+        // end. Therefore, it should always be empty upon entering this
+        // function.
         let precise_stack_roots = externref_activations_table.precise_stack_roots.borrow();
         precise_stack_roots.is_empty()
     });
 
+    // Whenever we call into Wasm from host code for the first time, we set a
+    // stack canary. When we return to that host code, we unset the stack
+    // canary. If there is *not* a stack canary, then there must be zero Wasm
+    // frames on the stack. Therefore, we can simply reset the table without
+    // walking the stack.
+    let stack_canary = match externref_activations_table.stack_canary.get() {
+        None => {
+            if cfg!(debug_assertions) {
+                // Assert that there aren't any Wasm frames on the stack.
+                backtrace::trace(|frame| {
+                    let stack_map = stack_maps_registry.lookup_stack_map(frame.ip() as usize);
+                    assert!(stack_map.is_none());
+                    true
+                });
+            }
+            externref_activations_table.reset();
+            log::debug!("end GC");
+            return;
+        }
+        Some(canary) => canary.as_ptr() as usize,
+    };
+
+    // There is a stack canary, so there must be Wasm frames on the stack. The
+    // rest of this function consists of:
+    //
+    // * walking the stack,
+    //
+    // * finding the precise set of roots inside Wasm frames via our stack maps,
+    //   and
+    //
+    // * resetting our bump-allocated table's over-approximation to the
+    //   newly-discovered precise set.
+
+    // The SP of the previous frame we processed.
+    let mut last_sp = None;
+
+    // Whether we have found our stack canary or not yet.
+    let mut found_canary = false;
+
+    // The `activations_table_set` is used for `debug_assert!`s checking that
+    // every reference we read out from the stack via stack maps is actually in
+    // the table. If that weren't true, than either we forgot to insert a
+    // reference in the table when passing it into Wasm (a bug) or we are
+    // reading invalid references from the stack (another bug).
     let mut activations_table_set: DebugOnly<HashSet<_>> = Default::default();
     if cfg!(debug_assertions) {
         externref_activations_table.elements(|elem| {
@@ -977,20 +1106,18 @@ pub fn gc(
 
     backtrace::trace(|frame| {
         let pc = frame.ip() as usize;
+        let sp = frame.sp() as usize;
 
         if let Some(stack_map) = stack_maps_registry.lookup_stack_map(pc) {
-            let ptr_to_frame = frame.sp() as usize;
-
             for i in 0..(stack_map.mapped_words() as usize) {
                 if stack_map.get_bit(i) {
                     // Stack maps have one bit per word in the frame, and the
                     // zero^th bit is the *lowest* addressed word in the frame,
                     // i.e. the closest to the SP. So to get the `i`^th word in
-                    // this frame, we add `i * sizeof(word)` to the
-                    // lowest-addressed word within this frame.
-                    let ptr_to_ref = ptr_to_frame + i * mem::size_of::<usize>();
+                    // this frame, we add `i * sizeof(word)` to the SP.
+                    let ptr_to_ref = sp + i * mem::size_of::<usize>();
 
-                    let r = unsafe { std::ptr::read(ptr_to_ref as *const *mut VMExternData) };
+                    let r = std::ptr::read(ptr_to_ref as *const *mut VMExternData);
                     debug_assert!(
                         r.is_null() || activations_table_set.contains(&r),
                         "every on-stack externref inside a Wasm frame should \
@@ -1003,11 +1130,32 @@ pub fn gc(
             }
         }
 
-        // Keep walking the stack.
-        true
+        if let Some(last_sp) = last_sp {
+            // We've found the stack canary when we walk over the frame that it
+            // is contained within.
+            found_canary |= last_sp <= stack_canary && stack_canary <= sp;
+        }
+        last_sp = Some(sp);
+
+        // Keep walking the stack until we've found the canary, which is the
+        // oldest frame before we ever called into Wasm. We can stop once we've
+        // found it because there won't be any more Wasm frames, and therefore
+        // there won't be anymore on-stack, inside-a-Wasm-frame roots.
+        !found_canary
     });
 
-    externref_activations_table.reset();
+    // Only reset the table if we found the stack canary, and therefore know
+    // that we discovered all the on-stack, inside-a-Wasm-frame roots. If we did
+    // *not* find the stack canary, then `libunwind` failed to walk the whole
+    // stack, and we might be missing roots. Reseting the table would free those
+    // missing roots while they are still in use, leading to use-after-free.
+    if found_canary {
+        externref_activations_table.reset();
+    } else {
+        let mut roots = externref_activations_table.precise_stack_roots.borrow_mut();
+        roots.clear();
+    }
+
     log::debug!("end GC");
 }
 
diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs
index c8551d2aad58..9fb58c743dfa 100644
--- a/crates/wasmtime/src/func.rs
+++ b/crates/wasmtime/src/func.rs
@@ -194,9 +194,17 @@ macro_rules! getters {
                     >(export.address);
                     let mut ret = None;
                     $(let $args = $args.into_abi();)*
-                    catch_traps(export.vmctx, &instance.store, || {
-                        ret = Some(fnptr(export.vmctx, ptr::null_mut(), $($args,)*));
-                    })?;
+
+                    {
+                        let canary = 0;
+                        let _auto_reset = instance
+                            .store
+                            .externref_activations_table()
+                            .set_stack_canary(&canary);
+                        catch_traps(export.vmctx, &instance.store, || {
+                            ret = Some(fnptr(export.vmctx, ptr::null_mut(), $($args,)*));
+                        })?;
+                    }
 
                     Ok(ret.unwrap())
                 }
@@ -552,14 +560,23 @@ impl Func {
         }
 
         // Call the trampoline.
-        catch_traps(self.export.vmctx, &self.instance.store, || unsafe {
-            (self.trampoline)(
-                self.export.vmctx,
-                ptr::null_mut(),
-                self.export.address,
-                values_vec.as_mut_ptr(),
-            )
-        })?;
+        {
+            let canary = 0;
+            let _auto_reset = self
+                .instance
+                .store
+                .externref_activations_table()
+                .set_stack_canary(&canary);
+
+            catch_traps(self.export.vmctx, &self.instance.store, || unsafe {
+                (self.trampoline)(
+                    self.export.vmctx,
+                    ptr::null_mut(),
+                    self.export.address,
+                    values_vec.as_mut_ptr(),
+                )
+            })?;
+        }
 
         // Load the return values out of `values_vec`.
         let mut results = Vec::with_capacity(my_ty.results().len());
diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs
index 31bf83bcc14d..6e3f95b5ff86 100644
--- a/crates/wasmtime/src/runtime.rs
+++ b/crates/wasmtime/src/runtime.rs
@@ -1097,10 +1097,14 @@ impl Store {
 
     /// Perform garbage collection of `ExternRef`s.
     pub fn gc(&self) {
-        wasmtime_runtime::gc(
-            &*self.inner.stack_map_registry,
-            &*self.inner.externref_activations_table,
-        );
+        // For this crate's API, we ensure that `set_stack_canary` invariants
+        // are upheld for all host-->Wasm calls.
+        unsafe {
+            wasmtime_runtime::gc(
+                &*self.inner.stack_map_registry,
+                &*self.inner.externref_activations_table,
+            );
+        }
     }
 }
 

From 7e167cae10e94c73b4dafc209fd15160d857136b Mon Sep 17 00:00:00 2001
From: Nick Fitzgerald <fitzgen@gmail.com>
Date: Fri, 12 Jun 2020 17:22:54 -0700
Subject: [PATCH 3/4] externref: Address review feedback

---
 Cargo.lock                                    | 265 +++++-----
 Cargo.toml                                    |   4 +-
 crates/environ/src/cache/tests.rs             |   1 +
 crates/environ/src/cranelift.rs               |   2 +-
 crates/jit/src/code_memory.rs                 |   2 +-
 crates/jit/src/instantiate.rs                 |   5 +-
 crates/runtime/Cargo.toml                     |   2 +-
 crates/runtime/src/externref.rs               | 489 +++++++-----------
 crates/runtime/src/instance.rs                |  32 +-
 crates/runtime/src/mmap.rs                    |   2 +-
 crates/wasmtime/src/func.rs                   |  45 +-
 crates/wasmtime/src/instance.rs               |  33 +-
 crates/wasmtime/src/module.rs                 |  38 --
 crates/wasmtime/src/ref.rs                    |   6 +-
 crates/wasmtime/src/runtime.rs                |  41 +-
 .../wasmtime/src/trampoline/create_handle.rs  |   4 +-
 crates/wasmtime/src/trampoline/func.rs        |  19 +-
 crates/wasmtime/src/types.rs                  |   6 +
 crates/wasmtime/src/values.rs                 |   7 +-
 tests/all/gc.rs                               |   4 +-
 20 files changed, 420 insertions(+), 587 deletions(-)
 mode change 100755 => 100644 crates/wasmtime/src/ref.rs

diff --git a/Cargo.lock b/Cargo.lock
index fcfb241d34f9..5e4f6c155214 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11,9 +11,9 @@ dependencies = [
 
 [[package]]
 name = "adler32"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
+checksum = "567b077b825e468cc974f0020d4082ee6e03132512f207ef1a02fd5d00d1f32d"
 
 [[package]]
 name = "ahash"
@@ -50,9 +50,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.28"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9a60d744a80c30fcb657dfe2c1b22bcb3e814c1a1e3674f32bf5820b570fbff"
+checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
 
 [[package]]
 name = "arbitrary"
@@ -100,14 +100,15 @@ checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
 
 [[package]]
 name = "backtrace"
-version = "0.3.48"
-source = "git+https://github.com/rust-lang/backtrace-rs.git#05319f46dd4126a4d4a3b2bbf8c378c4cf9fd902"
+version = "0.3.49"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05100821de9e028f12ae3d189176b41ee198341eb8f369956407fea2f5cc666c"
 dependencies = [
  "addr2line",
  "cfg-if",
  "libc",
  "miniz_oxide",
- "object 0.19.0",
+ "object 0.20.0",
  "rustc-demangle",
 ]
 
@@ -119,9 +120,9 @@ checksum = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
 
 [[package]]
 name = "base64"
-version = "0.12.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3"
+checksum = "53d1ccbaf7d9ec9537465a97bf19edc1a4e158ecb49fc16178202238c569cc42"
 
 [[package]]
 name = "binaryen"
@@ -156,18 +157,18 @@ dependencies = [
 
 [[package]]
 name = "bit-set"
-version = "0.5.1"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e84c238982c4b1e1ee668d136c510c67a13465279c0cb367ea6baf6310620a80"
+checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
 dependencies = [
  "bit-vec",
 ]
 
 [[package]]
 name = "bit-vec"
-version = "0.5.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f59bbe95d4e52a6398ec21238d31577f2b28a9d86807f06ca59d191d8440d0bb"
+checksum = "5f0dc55f2d8a1a85650ac47858bb001b4c0dd73d79e3c455a842925e68d29cd3"
 
 [[package]]
 name = "bitflags"
@@ -209,9 +210,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.3.0"
+version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5356f1d23ee24a1f785a56d1d1a5f0fd5b0f6a0c0fb2412ce11da71649ab78f6"
+checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820"
 
 [[package]]
 name = "byte-tools"
@@ -245,9 +246,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.52"
+version = "1.0.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3d87b23d6a92cd03af510a5ade527033f6aa6fa92161e2d5863a907d4c5e31d"
+checksum = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311"
 dependencies = [
  "jobserver",
 ]
@@ -271,9 +272,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "2.33.0"
+version = "2.33.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
+checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
 dependencies = [
  "ansi_term",
  "atty",
@@ -295,18 +296,18 @@ dependencies = [
 
 [[package]]
 name = "cmake"
-version = "0.1.42"
+version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81fb25b677f8bf1eb325017cb6bb8452f87969db0fedb4f757b297bee78a7c62"
+checksum = "0e56268c17a6248366d66d4a47a3381369d068cce8409bb1716ed77ea32163bb"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "console"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dea0f3e2e8d7dba335e913b97f9e1992c86c4399d54f8be1d31c8727d0652064"
+checksum = "8c0994e656bba7b922d8dd1245db90672ffb701e684e45be58f20719d69abc5a"
 dependencies = [
  "encode_unicode",
  "lazy_static",
@@ -499,7 +500,7 @@ dependencies = [
  "anyhow",
  "cranelift-codegen",
  "cranelift-module",
- "object 0.18.0",
+ "object 0.19.0",
  "target-lexicon",
 ]
 
@@ -639,12 +640,13 @@ dependencies = [
 
 [[package]]
 name = "crossbeam-queue"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db"
+checksum = "774ba60a54c213d409d5353bda12d49cd68d14e45036a285234c8d6f91f92570"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
+ "maybe-uninit",
 ]
 
 [[package]]
@@ -680,9 +682,9 @@ dependencies = [
 
 [[package]]
 name = "derive_more"
-version = "0.99.5"
+version = "0.99.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2323f3f47db9a0e77ce7a300605d8d2098597fc451ed1a97bb1f6411bb550a7"
+checksum = "2127768764f1556535c01b5326ef94bd60ff08dcfbdc544d53e69ed155610f5d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -691,9 +693,9 @@ dependencies = [
 
 [[package]]
 name = "derive_utils"
-version = "0.9.1"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "114aa287358087a616096186f3de19525d8083f83d437dc6b583f895316b02e6"
+checksum = "3df5480412da86cdf5d6b7f3b682422c84359ff7399aa658df1d15ee83244b1d"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -783,19 +785,6 @@ version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 
-[[package]]
-name = "env_logger"
-version = "0.6.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3"
-dependencies = [
- "atty",
- "humantime",
- "log",
- "regex",
- "termcolor",
-]
-
 [[package]]
 name = "env_logger"
 version = "0.7.1"
@@ -867,11 +856,11 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
 
 [[package]]
 name = "file-per-thread-logger"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8505b75b31ef7285168dd237c4a7db3c1f3e0927e7d314e670bc98e854272fe9"
+checksum = "8b3937f028664bd0e13df401ba49a4567ccda587420365823242977f06609ed1"
 dependencies = [
- "env_logger 0.6.2",
+ "env_logger",
  "log",
 ]
 
@@ -887,9 +876,9 @@ dependencies = [
 
 [[package]]
 name = "filetime"
-version = "0.2.9"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f59efc38004c988e4201d11d263b8171f49a2e7ec0bdbb71773433f271504a5e"
+checksum = "affc17579b132fc2461adf7c575cc6e8b134ebca52c51f5411388965227dc695"
 dependencies = [
  "cfg-if",
  "libc",
@@ -899,15 +888,15 @@ dependencies = [
 
 [[package]]
 name = "fnv"
-version = "1.0.6"
+version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "fst"
-version = "0.4.3"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81f9cac32c1741cdf6b66be7dcf0d9c7f25ccf12f8aa84c16cfa31f9f14513b3"
+checksum = "a7293de202dbfe786c0b3fe6110a027836c5438ed06db7b715c9955ff4bfea51"
 
 [[package]]
 name = "fuchsia-cprng"
@@ -1000,9 +989,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.1.12"
+version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61565ff7aaace3525556587bd2dc31d4a07071957be715e63ce7b1eccf51a8f4"
+checksum = "b9586eedd4ce6b3c498bc3b4dd92fc9f11166aa908a914071953768066c67909"
 dependencies = [
  "libc",
 ]
@@ -1018,9 +1007,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "1.3.2"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "076f042c5b7b98f31d205f1249267e12a6518c1481e9dae9764af19b707d2292"
+checksum = "c398b2b113b55809ceb9ee3e753fcbac793f1956663f3c36549c1346015c2afe"
 dependencies = [
  "autocfg 1.0.0",
 ]
@@ -1039,9 +1028,9 @@ dependencies = [
 
 [[package]]
 name = "iter-enum"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58107b833f974ca7bc9f98e032881967613dc214b6ab7ececb45b1ab9f0e7008"
+checksum = "2cdea9771bec3d95893f6c665a4fcd477af7858446a46bc2772f560534eee43b"
 dependencies = [
  "derive_utils",
  "quote",
@@ -1057,6 +1046,15 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "0.4.5"
@@ -1095,9 +1093,9 @@ checksum = "3576a87f2ba00f6f106fdfcd16db1d698d648a26ad8e0573cad8537c3c362d2a"
 
 [[package]]
 name = "libc"
-version = "0.2.70"
+version = "0.2.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3baa92041a6fec78c687fa0cc2b3fae8884f743d672cf551bed1d6dac6988d0f"
+checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
 
 [[package]]
 name = "libfuzzer-sys"
@@ -1120,7 +1118,7 @@ dependencies = [
  "dynasm",
  "dynasmrt",
  "iter-enum",
- "itertools",
+ "itertools 0.8.2",
  "lazy_static",
  "memoffset",
  "more-asserts",
@@ -1144,9 +1142,9 @@ dependencies = [
 
 [[package]]
 name = "mach"
-version = "0.2.3"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86dd2487cdfea56def77b88438a2c915fb45113c5319bfe7e14306ca4cd0b0e1"
+checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa"
 dependencies = [
  "libc",
 ]
@@ -1208,9 +1206,9 @@ checksum = "0debeb9fcf88823ea64d64e4a815ab1643f33127d995978e099942ce38f25238"
 
 [[package]]
 name = "num-integer"
-version = "0.1.42"
+version = "0.1.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba"
+checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b"
 dependencies = [
  "autocfg 1.0.0",
  "num-traits",
@@ -1218,9 +1216,9 @@ dependencies = [
 
 [[package]]
 name = "num-traits"
-version = "0.2.11"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096"
+checksum = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611"
 dependencies = [
  "autocfg 1.0.0",
 ]
@@ -1253,14 +1251,15 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.19.0"
-source = "git+https://github.com/gimli-rs/object.git#2ad508810e5470f5bcb7ede8d866c8b1eda4c4d3"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ab52be62400ca80aa00285d25253d7f7c437b7375c4de678f5405d3afe82ca5"
 
 [[package]]
 name = "once_cell"
-version = "1.3.1"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1c601810575c99596d4afc46f78a678c80105117c379eb3650cf99b8a21ce5b"
+checksum = "0b631f7e854af39a1739f401cf34a8a013dfe09eac4fa4dba91e9768bd28168d"
 
 [[package]]
 name = "opaque-debug"
@@ -1270,9 +1269,9 @@ checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
 
 [[package]]
 name = "os_pipe"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db4d06355a7090ce852965b2d08e11426c315438462638c6d721448d0b47aa22"
+checksum = "fb233f06c2307e1f5ce2ecad9f8121cffbbee2c95428f44ea85222e460d0d213"
 dependencies = [
  "libc",
  "winapi",
@@ -1312,7 +1311,7 @@ version = "0.2.0"
 dependencies = [
  "arbitrary",
  "bincode",
- "env_logger 0.7.1",
+ "env_logger",
  "fst",
  "log",
  "peepmatic",
@@ -1351,7 +1350,7 @@ dependencies = [
 name = "peepmatic-test"
 version = "0.2.0"
 dependencies = [
- "env_logger 0.7.1",
+ "env_logger",
  "log",
  "peepmatic",
  "peepmatic-runtime",
@@ -1365,9 +1364,9 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.6"
+version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
+checksum = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
 
 [[package]]
 name = "pretty_env_logger"
@@ -1375,7 +1374,7 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d"
 dependencies = [
- "env_logger 0.7.1",
+ "env_logger",
  "log",
 ]
 
@@ -1407,15 +1406,15 @@ dependencies = [
 
 [[package]]
 name = "proc-macro-hack"
-version = "0.5.15"
+version = "0.5.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d659fe7c6d27f25e9d80a1a094c223f5246f6a6596453e09d7229bf42750b63"
+checksum = "7e0456befd48169b9f13ef0f0ad46d492cf9d2dbb918bcf38e01eed4ce3ec5e4"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.10"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df246d292ff63439fea9bc8c0a270bed0e390d5ebd4db4ba15aba81111b5abe3"
+checksum = "beae6331a816b1f65d04c45b078fd8e6c93e8071771f41b8163255bbd8d7c8fa"
 dependencies = [
  "unicode-xid",
 ]
@@ -1452,7 +1451,7 @@ version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a44883e74aa97ad63db83c4bf8ca490f02b2fc02f92575e720c8551e843c945f"
 dependencies = [
- "env_logger 0.7.1",
+ "env_logger",
  "log",
  "rand 0.7.3",
  "rand_core 0.5.1",
@@ -1637,10 +1636,11 @@ dependencies = [
 
 [[package]]
 name = "rayon"
-version = "1.3.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098"
+checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080"
 dependencies = [
+ "autocfg 1.0.0",
  "crossbeam-deque",
  "either",
  "rayon-core",
@@ -1648,9 +1648,9 @@ dependencies = [
 
 [[package]]
 name = "rayon-core"
-version = "1.7.0"
+version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9"
+checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280"
 dependencies = [
  "crossbeam-deque",
  "crossbeam-queue",
@@ -1699,9 +1699,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.3.7"
+version = "1.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6020f034922e3194c711b82a627453881bc4682166cabb07134a10c26ba7692"
+checksum = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -1721,15 +1721,15 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.6.17"
+version = "0.6.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae"
+checksum = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
 
 [[package]]
 name = "region"
-version = "2.1.2"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "448e868c6e4cfddfa49b6a72c95906c04e8547465e9536575b95c70a4044f856"
+checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0"
 dependencies = [
  "bitflags",
  "libc",
@@ -1739,9 +1739,9 @@ dependencies = [
 
 [[package]]
 name = "remove_dir_all"
-version = "0.5.2"
+version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
 dependencies = [
  "winapi",
 ]
@@ -1800,9 +1800,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
+checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
 
 [[package]]
 name = "same-file"
@@ -1830,9 +1830,9 @@ dependencies = [
 
 [[package]]
 name = "scroll_derive"
-version = "0.10.1"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8584eea9b9ff42825b46faf46a8c24d2cff13ec152fa2a50df788b87c07ee28"
+checksum = "e367622f934864ffa1c704ba2b82280aab856e3d8213c84c5720257eb34b15b9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1856,18 +1856,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 
 [[package]]
 name = "serde"
-version = "1.0.106"
+version = "1.0.112"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399"
+checksum = "736aac72d1eafe8e5962d1d1c3d99b0df526015ba40915cb3c49d042e92ec243"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.106"
+version = "1.0.112"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c"
+checksum = "bf0343ce212ac0d3d6afd9391ac8e9c9efe06b533c8d33f660f6390cc4093f57"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1876,9 +1876,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.52"
+version = "1.0.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7894c8ed05b7a3a279aeb79025fdec1d3158080b75b98a08faf2806bb799edd"
+checksum = "ec2c5d7e739bc07a3e73381a39d61fdb5f671c60c1df26a130690665803d8226"
 dependencies = [
  "itoa",
  "ryu",
@@ -1887,9 +1887,9 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
+checksum = "a256f46ea78a0c0d9ff00077504903ac881a1dafdc20da66545699e7776b3e69"
 dependencies = [
  "block-buffer",
  "digest",
@@ -1965,9 +1965,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "1.0.18"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "410a7488c0a728c7ceb4ad59b9567eb4053d02e8cc7f5c0e0eeeb39518369213"
+checksum = "b5304cfdf27365b7585c25d4af91b35016ed21ef88f17ced89c7093b43dba8b6"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2070,18 +2070,18 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.16"
+version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d12a1dae4add0f0d568eebc7bf142f145ba1aa2544cafb195c76f0f409091b60"
+checksum = "b13f926965ad00595dd129fa12823b04bbf866e9085ab0a5f2b05b850fbfc344"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.16"
+version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f34e0c1caaa462fd840ec6b768946ea1e7842620d94fe29d5b847138f521269"
+checksum = "893582086c2f98cde18f906265a65b5030a074b1046c674ae898be6519a7f479"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2239,15 +2239,15 @@ dependencies = [
 
 [[package]]
 name = "vec_map"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
 
 [[package]]
 name = "version_check"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce"
+checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
 
 [[package]]
 name = "wait-timeout"
@@ -2345,7 +2345,7 @@ name = "wasmtime-c-api"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "env_logger 0.7.1",
+ "env_logger",
  "once_cell",
  "wasi-common",
  "wasmtime",
@@ -2367,14 +2367,14 @@ name = "wasmtime-cli"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "env_logger 0.7.1",
+ "env_logger",
  "file-per-thread-logger",
  "filecheck",
  "humantime",
  "libc",
  "log",
  "more-asserts",
- "object 0.18.0",
+ "object 0.19.0",
  "pretty_env_logger",
  "rayon",
  "structopt",
@@ -2401,7 +2401,7 @@ dependencies = [
  "anyhow",
  "gimli",
  "more-asserts",
- "object 0.18.0",
+ "object 0.19.0",
  "target-lexicon",
  "thiserror",
  "wasmparser 0.57.0",
@@ -2413,7 +2413,7 @@ name = "wasmtime-environ"
 version = "0.18.0"
 dependencies = [
  "anyhow",
- "base64 0.12.0",
+ "base64 0.12.1",
  "bincode",
  "cranelift-codegen",
  "cranelift-entity",
@@ -2461,7 +2461,7 @@ dependencies = [
  "anyhow",
  "arbitrary",
  "binaryen",
- "env_logger 0.7.1",
+ "env_logger",
  "log",
  "rayon",
  "wasmparser 0.57.0",
@@ -2502,7 +2502,7 @@ version = "0.18.0"
 dependencies = [
  "anyhow",
  "more-asserts",
- "object 0.18.0",
+ "object 0.19.0",
  "wasmtime-environ",
 ]
 
@@ -2516,7 +2516,7 @@ dependencies = [
  "ittapi-rs",
  "lazy_static",
  "libc",
- "object 0.18.0",
+ "object 0.19.0",
  "scroll",
  "serde",
  "target-lexicon",
@@ -2678,7 +2678,7 @@ dependencies = [
 name = "wiggle-test"
 version = "0.18.0"
 dependencies = [
- "env_logger 0.7.1",
+ "env_logger",
  "proptest",
  "thiserror",
  "tracing",
@@ -2772,18 +2772,18 @@ dependencies = [
 
 [[package]]
 name = "zstd"
-version = "0.5.1+zstd.1.4.4"
+version = "0.5.3+zstd.1.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c5d978b793ae64375b80baf652919b148f6a496ac8802922d9999f5a553194f"
+checksum = "01b32eaf771efa709e8308605bbf9319bf485dc1503179ec0469b611937c0cd8"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "2.0.3+zstd.1.4.4"
+version = "2.0.5+zstd.1.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bee25eac9753cfedd48133fa1736cbd23b774e253d89badbeac7d12b23848d3f"
+checksum = "1cfb642e0d27f64729a639c52db457e0ae906e7bc6f5fe8f5c453230400f1055"
 dependencies = [
  "libc",
  "zstd-sys",
@@ -2791,11 +2791,12 @@ dependencies = [
 
 [[package]]
 name = "zstd-sys"
-version = "1.4.15+zstd.1.4.4"
+version = "1.4.17+zstd.1.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89719b034dc22d240d5b407fb0a3fe6d29952c181cff9a9f95c0bd40b4f8f7d8"
+checksum = "b89249644df056b522696b1bb9e7c18c87e8ffa3e2f0dc3b0155875d6498f01b"
 dependencies = [
  "cc",
  "glob",
+ "itertools 0.9.0",
  "libc",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index f62003074897..d7b22090f4be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -88,5 +88,5 @@ maintenance = { status = "actively-developed" }
 name = "host_segfault"
 harness = false
 
-[patch.crates-io]
-backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git" }
+[profile.dev.package.backtrace]
+debug = false # FIXME(#1813)
diff --git a/crates/environ/src/cache/tests.rs b/crates/environ/src/cache/tests.rs
index d454371178e0..35dc6c613e36 100644
--- a/crates/environ/src/cache/tests.rs
+++ b/crates/environ/src/cache/tests.rs
@@ -100,5 +100,6 @@ fn new_module_cache_data() -> Result<ModuleCacheDataTupleType, ()> {
         PrimaryMap::new(),
         PrimaryMap::new(),
         PrimaryMap::new(),
+        PrimaryMap::new(),
     ))
 }
diff --git a/crates/environ/src/cranelift.rs b/crates/environ/src/cranelift.rs
index d261200848dc..89a7dfd6f6fb 100644
--- a/crates/environ/src/cranelift.rs
+++ b/crates/environ/src/cranelift.rs
@@ -221,7 +221,7 @@ impl binemit::StackmapSink for StackMapSink {
 
 impl StackMapSink {
     fn finish(mut self) -> Vec<StackMapInformation> {
-        self.infos.sort_by_key(|info| info.code_offset);
+        self.infos.sort_unstable_by_key(|info| info.code_offset);
         self.infos
     }
 }
diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs
index e9dc25fc90e5..b7e0a3851f38 100644
--- a/crates/jit/src/code_memory.rs
+++ b/crates/jit/src/code_memory.rs
@@ -134,7 +134,7 @@ impl CodeMemory {
 
             if !m.is_empty() {
                 unsafe {
-                    region::protect(m.as_mut_ptr(), m.len(), region::Protection::ReadExecute)
+                    region::protect(m.as_mut_ptr(), m.len(), region::Protection::READ_EXECUTE)
                 }
                 .expect("unable to make memory readonly and executable");
             }
diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs
index 7180a9efe891..97b0ef05e12a 100644
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -10,7 +10,6 @@ use crate::link::link_module;
 use crate::resolver::Resolver;
 use std::any::Any;
 use std::collections::HashMap;
-use std::rc::Rc;
 use std::sync::Arc;
 use thiserror::Error;
 use wasmtime_debug::{read_debuginfo, write_debugsections_image, DwarfSection};
@@ -173,8 +172,8 @@ impl CompiledModule {
         mem_creator: Option<&dyn RuntimeMemoryCreator>,
         interrupts: Arc<VMInterrupts>,
         host_state: Box<dyn Any>,
-        externref_activations_table: Rc<VMExternRefActivationsTable>,
-        stack_map_registry: Arc<StackMapRegistry>,
+        externref_activations_table: *mut VMExternRefActivationsTable,
+        stack_map_registry: *mut StackMapRegistry,
     ) -> Result<InstanceHandle, InstantiationError> {
         // Compute indices into the shared signature table.
         let signatures = {
diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml
index 91de66aae482..9ed855d2f2ca 100644
--- a/crates/runtime/Cargo.toml
+++ b/crates/runtime/Cargo.toml
@@ -21,7 +21,7 @@ indexmap = "1.0.2"
 thiserror = "1.0.4"
 more-asserts = "0.2.1"
 cfg-if = "0.1.9"
-backtrace = "0.3.42"
+backtrace = "0.3.49"
 lazy_static = "1.3.0"
 
 [target.'cfg(target_os = "windows")'.dependencies]
diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs
index 92d7105a8a55..04c2bddbeb6c 100644
--- a/crates/runtime/src/externref.rs
+++ b/crates/runtime/src/externref.rs
@@ -105,11 +105,11 @@ use std::cell::{Cell, RefCell, UnsafeCell};
 use std::cmp::Ordering;
 use std::collections::BTreeMap;
 use std::collections::HashSet;
-use std::hash::Hasher;
+use std::hash::{Hash, Hasher};
 use std::mem;
 use std::ops::Deref;
 use std::ptr::{self, NonNull};
-use std::sync::{Arc, RwLock};
+use std::rc::Rc;
 use wasmtime_environ::{ir::Stackmap, StackMapInformation};
 
 /// An external reference to some opaque data.
@@ -337,37 +337,6 @@ impl VMExternRef {
         }
     }
 
-    // /// Turn this `VMExternRef` into a raw, untyped pointer.
-    // ///
-    // /// This forgets `self` and does *not* decrement the reference count on the
-    // /// pointed-to data.
-    // ///
-    // /// This `VMExternRef` may be recovered with `VMExternRef::from_raw`.
-    // pub fn into_raw(self) -> *mut u8 {
-    //     let ptr = self.as_raw();
-    //     mem::forget(self);
-    //     ptr
-    // }
-
-    // /// Recreate a `VMExternRef` from a pointer returned from a previous call to
-    // /// `VMExternRef::into_raw`.
-    // ///
-    // /// # Safety
-    // ///
-    // /// Wildly unsafe to use with anything other than the result of a previous
-    // /// `into_raw` call!
-    // ///
-    // /// This method does *not* increment the reference count on the pointed-to
-    // /// data, so `from_raw` must be called at most *once* on the result of a
-    // /// previous `into_raw` call. (Ideally, every `into_raw` is later followed
-    // /// by a `from_raw`, but it is technically memory safe to never call
-    // /// `from_raw` after `into_raw`: it will leak the pointed-to value, which is
-    // /// memory safe).
-    // pub unsafe fn from_raw(ptr: *mut u8) -> Self {
-    //     debug_assert!(!ptr.is_null());
-    //     VMExternRef(NonNull::new_unchecked(ptr).cast())
-    // }
-
     /// Turn this `VMExternRef` into a raw, untyped pointer.
     ///
     /// Unlike `into_raw`, this does not consume and forget `self`. It is *not*
@@ -379,7 +348,6 @@ impl VMExternRef {
     ///  `clone_from_raw` is called.
     pub fn as_raw(&self) -> *mut u8 {
         let ptr = self.0.cast::<u8>().as_ptr();
-        mem::forget(self);
         ptr
     }
 
@@ -401,8 +369,8 @@ impl VMExternRef {
         x
     }
 
-    /// Get the reference count for this `VMExternRef`.
-    pub fn get_reference_count(&self) -> usize {
+    /// Get the strong reference count for this `VMExternRef`.
+    pub fn strong_count(&self) -> usize {
         self.extern_data().get_ref_count()
     }
 
@@ -461,6 +429,31 @@ impl Deref for VMExternRef {
     }
 }
 
+/// A wrapper around a `VMExternRef` that implements `Eq` and `Hash` with
+/// pointer semantics.
+///
+/// We use this so that we can morally put `VMExternRef`s inside of `HashSet`s
+/// even though they don't implement `Eq` and `Hash` to avoid foot guns.
+#[derive(Clone)]
+struct VMExternRefWithTraits(VMExternRef);
+
+impl Hash for VMExternRefWithTraits {
+    fn hash<H>(&self, hasher: &mut H)
+    where
+        H: Hasher,
+    {
+        VMExternRef::hash(&self.0, hasher)
+    }
+}
+
+impl PartialEq for VMExternRefWithTraits {
+    fn eq(&self, other: &Self) -> bool {
+        VMExternRef::eq(&self.0, &other.0)
+    }
+}
+
+impl Eq for VMExternRefWithTraits {}
+
 type TableElem = UnsafeCell<Option<VMExternRef>>;
 
 /// A table that over-approximizes the set of `VMExternRef`s that any Wasm
@@ -470,35 +463,37 @@ type TableElem = UnsafeCell<Option<VMExternRef>>;
 /// entries. Deduplication happens at GC time.
 #[repr(C)]
 pub struct VMExternRefActivationsTable {
-    /// Bump-allocation finger within the current chunk.
+    /// Bump-allocation finger within the `chunk`.
     ///
-    /// NB: this is an `UnsafeCell` because it is read from and written to by
-    /// compiled Wasm code.
+    /// NB: this is an `UnsafeCell` because it is written to by compiled Wasm
+    /// code.
     next: UnsafeCell<NonNull<TableElem>>,
 
-    /// Pointer to just after the current chunk.
+    /// Pointer to just after the `chunk`.
     ///
     /// This is *not* within the current chunk and therefore is not a valid
     /// place to insert a reference!
-    ///
-    /// This is only updated from host code.
-    end: Cell<NonNull<TableElem>>,
+    end: NonNull<TableElem>,
 
-    /// The chunks within which we are bump allocating.
+    /// Bump allocation chunk that stores fast-path insertions.
+    chunk: Box<[TableElem]>,
+
+    /// When unioned with `chunk`, this is an over-approximation of the GC roots
+    /// on the stack, inside Wasm frames.
     ///
-    /// This is only updated from host code.
-    chunks: RefCell<Vec<Box<[TableElem]>>>,
+    /// This is used by slow-path insertion, and when a GC cycle finishes, is
+    /// re-initialized to the just-discovered precise set of stack roots (which
+    /// immediately becomes an over-approximation again as soon as Wasm runs and
+    /// potentially drops references).
+    over_approximated_stack_roots: RefCell<HashSet<VMExternRefWithTraits>>,
 
     /// The precise set of on-stack, inside-Wasm GC roots that we discover via
     /// walking the stack and interpreting stack maps.
     ///
-    /// That is, this is the precise set that the bump allocation table is
-    /// over-approximating.
-    ///
     /// This is *only* used inside the `gc` function, and is empty otherwise. It
     /// is just part of this struct so that we can reuse the allocation, rather
     /// than create a new hash set every GC.
-    precise_stack_roots: RefCell<HashSet<NonNull<VMExternData>>>,
+    precise_stack_roots: RefCell<HashSet<VMExternRefWithTraits>>,
 
     /// A pointer to a `u8` on the youngest host stack frame before we called
     /// into Wasm for the first time. When walking the stack in garbage
@@ -510,119 +505,107 @@ pub struct VMExternRefActivationsTable {
 }
 
 impl VMExternRefActivationsTable {
-    const INITIAL_CHUNK_SIZE: usize = 4096 / mem::size_of::<usize>();
+    const CHUNK_SIZE: usize = 4096 / mem::size_of::<usize>();
 
     /// Create a new `VMExternRefActivationsTable`.
     pub fn new() -> Self {
-        let chunk = Self::new_chunk(Self::INITIAL_CHUNK_SIZE);
+        let chunk = Self::new_chunk(Self::CHUNK_SIZE);
         let next = chunk.as_ptr() as *mut TableElem;
         let end = unsafe { next.add(chunk.len()) };
 
         VMExternRefActivationsTable {
             next: UnsafeCell::new(NonNull::new(next).unwrap()),
-            end: Cell::new(NonNull::new(end).unwrap()),
-            chunks: RefCell::new(vec![chunk]),
-            precise_stack_roots: RefCell::new(HashSet::with_capacity(Self::INITIAL_CHUNK_SIZE)),
+            end: NonNull::new(end).unwrap(),
+            chunk,
+            over_approximated_stack_roots: RefCell::new(HashSet::with_capacity(Self::CHUNK_SIZE)),
+            precise_stack_roots: RefCell::new(HashSet::with_capacity(Self::CHUNK_SIZE)),
             stack_canary: Cell::new(None),
         }
     }
 
     fn new_chunk(size: usize) -> Box<[UnsafeCell<Option<VMExternRef>>]> {
-        assert!(size >= Self::INITIAL_CHUNK_SIZE);
-        let mut chunk = Vec::with_capacity(size);
-        for _ in 0..size {
-            chunk.push(UnsafeCell::new(None));
-        }
-        chunk.into_boxed_slice()
+        assert!(size >= Self::CHUNK_SIZE);
+        (0..size).map(|_| UnsafeCell::new(None)).collect()
     }
 
     /// Try and insert a `VMExternRef` into this table.
     ///
-    /// This is a fast path that only succeeds when the current chunk has the
+    /// This is a fast path that only succeeds when the bump chunk has the
     /// capacity for the requested insertion.
     ///
     /// If the insertion fails, then the `VMExternRef` is given back. Callers
     /// may attempt a GC to free up space and try again, or may call
-    /// `insert_slow_path` to allocate a new bump chunk for this insertion.
+    /// `insert_slow_path` to infallibly insert the reference (potentially
+    /// allocating additional space in the table to hold it).
     #[inline]
     pub fn try_insert(&self, externref: VMExternRef) -> Result<(), VMExternRef> {
         unsafe {
             let next = *self.next.get();
-            let end = self.end.get();
-            if next == end {
+            if next == self.end {
                 return Err(externref);
             }
 
             debug_assert!((*next.as_ref().get()).is_none());
             ptr::write(next.as_ptr(), UnsafeCell::new(Some(externref)));
+
             let next = NonNull::new_unchecked(next.as_ptr().add(1));
-            debug_assert!(next <= end);
+            debug_assert!(next <= self.end);
             *self.next.get() = next;
+
             Ok(())
         }
     }
 
-    /// This is a slow path for inserting a reference into the table when the
-    /// current bump chunk is full.
+    /// Insert a reference into the table, falling back on a GC to clear up
+    /// space if the table is already full.
     ///
-    /// This method is infallible, and will allocate an additional bump chunk if
-    /// necessary.
-    #[inline(never)]
-    pub fn insert_slow_path(&self, externref: VMExternRef) {
-        let externref = match self.try_insert(externref) {
-            Ok(()) => return,
-            Err(x) => x,
-        };
-
-        {
-            let mut chunks = self.chunks.borrow_mut();
-
-            let new_size = chunks.last().unwrap().len() * 2;
-            let new_chunk = Self::new_chunk(new_size);
-
-            unsafe {
-                let next = new_chunk.as_ptr() as *mut TableElem;
-                debug_assert!(!next.is_null());
-                *self.next.get() = NonNull::new_unchecked(next);
-
-                let end = next.add(new_chunk.len());
-                debug_assert!(!end.is_null());
-                self.end.set(NonNull::new_unchecked(end));
-            }
-
-            chunks.push(new_chunk);
+    /// # Unsafety
+    ///
+    /// The same as `gc`.
+    #[inline]
+    pub unsafe fn insert_with_gc(
+        &self,
+        externref: VMExternRef,
+        stack_maps_registry: &StackMapRegistry,
+    ) {
+        if let Err(externref) = self.try_insert(externref) {
+            self.gc_and_insert_slow(externref, stack_maps_registry);
         }
+    }
 
-        self.try_insert(externref)
-            .expect("insertion should always succeed after we allocate a new chunk");
+    #[inline(never)]
+    unsafe fn gc_and_insert_slow(
+        &self,
+        externref: VMExternRef,
+        stack_maps_registry: &StackMapRegistry,
+    ) {
+        gc(stack_maps_registry, self);
+
+        // Might as well insert right into the hash set, rather than the bump
+        // chunk, since we are already on a slow path and we get de-duplication
+        // this way.
+        let mut roots = self.over_approximated_stack_roots.borrow_mut();
+        roots.insert(VMExternRefWithTraits(externref));
     }
 
-    fn num_filled_in_last_chunk(&self, chunks: &[Box<[TableElem]>]) -> usize {
-        let last_chunk = chunks.last().unwrap();
+    fn num_filled_in_bump_chunk(&self) -> usize {
         let next = unsafe { *self.next.get() };
-        let end = self.end.get();
-        let num_unused_in_last_chunk =
-            ((end.as_ptr() as usize) - (next.as_ptr() as usize)) / mem::size_of::<usize>();
-        last_chunk.len().saturating_sub(num_unused_in_last_chunk)
+        let bytes_unused = (self.end.as_ptr() as usize) - (next.as_ptr() as usize);
+        let slots_unused = bytes_unused / mem::size_of::<TableElem>();
+        self.chunk.len().saturating_sub(slots_unused)
     }
 
     fn elements(&self, mut f: impl FnMut(&VMExternRef)) {
-        // Every chunk except the last one is full, so we can simply iterate
-        // over all of their elements.
-        let chunks = self.chunks.borrow();
-        for chunk in chunks.iter().take(chunks.len() - 1) {
-            for elem in chunk.iter() {
-                if let Some(elem) = unsafe { &*elem.get() } {
-                    f(elem);
-                }
-            }
+        let roots = self.over_approximated_stack_roots.borrow();
+        for elem in roots.iter() {
+            f(&elem.0);
         }
 
-        // The last chunk is not all the way full, so we only iterate over its
-        // full parts.
-        let num_filled_in_last_chunk = self.num_filled_in_last_chunk(&chunks);
-        for elem in chunks.last().unwrap().iter().take(num_filled_in_last_chunk) {
-            if let Some(elem) = unsafe { &*elem.get() } {
+        // The bump chunk is not all the way full, so we only iterate over its
+        // filled-in slots.
+        let num_filled = self.num_filled_in_bump_chunk();
+        for slot in self.chunk.iter().take(num_filled) {
+            if let Some(elem) = unsafe { &*slot.get() } {
                 f(elem);
             }
         }
@@ -630,94 +613,43 @@ impl VMExternRefActivationsTable {
 
     fn insert_precise_stack_root(&self, root: NonNull<VMExternData>) {
         let mut precise_stack_roots = self.precise_stack_roots.borrow_mut();
-        if precise_stack_roots.insert(root) {
-            // If this root was not already in the set, then we need to
-            // increment its reference count, so that it doesn't get freed in
-            // `reset` when we're overwriting old bump allocation table entries
-            // with new ones.
-            unsafe {
-                root.as_ref().increment_ref_count();
-            }
-        }
+        let root = unsafe { VMExternRef::clone_from_raw(root.as_ptr() as *mut _) };
+        precise_stack_roots.insert(VMExternRefWithTraits(root));
     }
 
-    /// Refill the bump allocation table with our precise stack roots, and sweep
-    /// away everything else.
-    fn reset(&self) {
-        let mut chunks = self.chunks.borrow_mut();
-
-        let mut precise_roots = self.precise_stack_roots.borrow_mut();
-        if precise_roots.is_empty() {
-            // Get rid of all but our first bump chunk, and set our `next` and
-            // `end` bump allocation fingers into it.
+    /// Sweep the bump allocation table after we've discovered our precise stack
+    /// roots.
+    fn sweep(&self) {
+        // Sweep our bump chunk.
+        let num_filled = self.num_filled_in_bump_chunk();
+        for slot in self.chunk.iter().take(num_filled) {
             unsafe {
-                let chunk = chunks.first().unwrap();
-
-                let next = chunk.as_ptr() as *mut TableElem;
-                debug_assert!(!next.is_null());
-                *self.next.get() = NonNull::new_unchecked(next);
-
-                let end = next.add(chunk.len());
-                debug_assert!(!end.is_null());
-                self.end.set(NonNull::new_unchecked(end));
-            }
-            chunks.truncate(1);
-        } else {
-            // Drain our precise stack roots into the bump allocation table.
-            //
-            // This overwrites old entries, which drops them and decrements their
-            // reference counts. Safety relies on the reference count increment in
-            // `insert_precise_stack_root` to avoid over-eagerly dropping references
-            // that are in `self.precise_stack_roots` but haven't been inserted into
-            // the bump allocation table yet.
-            let mut precise_roots = precise_roots.drain();
-            'outer: for (chunk_index, chunk) in chunks.iter().enumerate() {
-                for (slot_index, slot) in chunk.iter().enumerate() {
-                    if let Some(root) = precise_roots.next() {
-                        unsafe {
-                            // NB: there is no reference count increment here
-                            // because everything in `self.precise_stack_roots`
-                            // already had its reference count incremented for us,
-                            // and this is logically a move out from there, rather
-                            // than a clone.
-                            *slot.get() = Some(VMExternRef(root));
-                        }
-                    } else {
-                        // We've inserted all of our precise, on-stack roots back
-                        // into the bump allocation table. Update our `next` and
-                        // `end` bump pointer members for the new current chunk, and
-                        // free any excess chunks.
-                        let start = chunk.as_ptr() as *mut TableElem;
-                        unsafe {
-                            let next = start.add(slot_index + 1);
-                            debug_assert!(!next.is_null());
-                            *self.next.get() = NonNull::new_unchecked(next);
-
-                            let end = start.add(chunk.len());
-                            debug_assert!(!end.is_null());
-                            self.end.set(NonNull::new_unchecked(end));
-                        }
-                        chunks.truncate(chunk_index + 1);
-                        break 'outer;
-                    }
-                }
+                *slot.get() = None;
             }
-
-            debug_assert!(
-                precise_roots.next().is_none(),
-                "should always have enough capacity in the bump allocations table \
-                 to hold all of our precise, on-stack roots"
-            );
         }
+        debug_assert!(
+            self.chunk
+                .iter()
+                .all(|slot| unsafe { (*slot.get()).as_ref().is_none() }),
+            "after sweeping the bump chunk, all slots should be `None`"
+        );
 
-        // Finally, sweep away excess capacity within our new last/current
-        // chunk, so that old, no-longer-live roots get dropped.
-        let num_filled_in_last_chunk = self.num_filled_in_last_chunk(&chunks);
-        for slot in chunks.last().unwrap().iter().skip(num_filled_in_last_chunk) {
-            unsafe {
-                *slot.get() = None;
-            }
+        // Reset our `next` bump allocation finger.
+        unsafe {
+            let next = self.chunk.as_ptr() as *mut TableElem;
+            debug_assert!(!next.is_null());
+            *self.next.get() = NonNull::new_unchecked(next);
         }
+
+        // The current `precise_roots` becomes our new over-appoximated set for
+        // the next GC cycle.
+        let mut precise_roots = self.precise_stack_roots.borrow_mut();
+        let mut over_approximated = self.over_approximated_stack_roots.borrow_mut();
+        mem::swap(&mut *precise_roots, &mut *over_approximated);
+
+        // And finally, the new `precise_roots` should be cleared and remain
+        // empty until the next GC cycle.
+        precise_roots.clear();
     }
 
     /// Set the stack canary around a call into Wasm.
@@ -739,7 +671,7 @@ impl VMExternRefActivationsTable {
     /// ```no_run
     /// use wasmtime_runtime::*;
     ///
-    /// #let get_table_from_somewhere = || unimplemented!();
+    /// # let get_table_from_somewhere = || unimplemented!();
     /// let table: &VMExternRefActivationsTable = get_table_from_somewhere();
     ///
     /// // Set the canary before a Wasm call. The canary should always be a
@@ -748,7 +680,7 @@ impl VMExternRefActivationsTable {
     /// let auto_reset_canary = table.set_stack_canary(&canary);
     ///
     /// // Do the call into Wasm.
-    /// #let call_into_wasm = || unimplemented!();
+    /// # let call_into_wasm = || unimplemented!();
     /// call_into_wasm();
     ///
     /// // Only drop the value returned by `set_stack_canary` after the Wasm
@@ -791,7 +723,7 @@ impl VMExternRefActivationsTable {
 /// A registry of stack maps for currently active Wasm modules.
 #[derive(Default)]
 pub struct StackMapRegistry {
-    inner: RwLock<StackMapRegistryInner>,
+    inner: RefCell<StackMapRegistryInner>,
 }
 
 #[derive(Default)]
@@ -804,13 +736,13 @@ struct StackMapRegistryInner {
 
 #[derive(Debug)]
 struct ModuleStackMaps {
-    /// The range of PCs that this module covers. Different modules should
-    /// always have distinct ranges.
+    /// The range of PCs that this module covers. Different modules must always
+    /// have distinct ranges.
     range: std::ops::Range<usize>,
 
     /// A map from a PC in this module (that is a GC safepoint) to its
     /// associated stack map.
-    pc_to_stack_map: Vec<(usize, Arc<Stackmap>)>,
+    pc_to_stack_map: Vec<(usize, Rc<Stackmap>)>,
 }
 
 impl StackMapRegistry {
@@ -820,22 +752,10 @@ impl StackMapRegistry {
     /// in memory (that is, where the JIT actually allocated and emitted the
     /// function's code at), and the stack maps and code offsets within that
     /// range for each of its GC safepoints.
-    ///
-    /// The return value is an RAII registration for the stack maps. The
-    /// registration should not be dropped until its associated module is
-    /// dropped. Dropping the registration will unregister its stack
-    /// maps.
-    ///
-    /// # Safety
-    ///
-    /// Dropping the returned registration before the module is dropped, or when
-    /// there are still active frames from the module on the stack, means we
-    /// will no longer be able to find GC roots for the module's frames anymore,
-    /// which could lead to freeing still-in-use objects and use-after-free!
-    pub unsafe fn register_stack_maps<'a>(
-        self: &Arc<Self>,
+    pub fn register_stack_maps<'a>(
+        &self,
         stack_maps: impl IntoIterator<Item = (std::ops::Range<usize>, &'a [StackMapInformation])>,
-    ) -> Option<StackMapRegistration> {
+    ) {
         let mut min = usize::max_value();
         let mut max = 0;
         let mut pc_to_stack_map = vec![];
@@ -850,14 +770,14 @@ impl StackMapRegistry {
                 assert!((info.code_offset as usize) < len);
                 pc_to_stack_map.push((
                     range.start + (info.code_offset as usize),
-                    Arc::new(info.stack_map.clone()),
+                    Rc::new(info.stack_map.clone()),
                 ));
             }
         }
 
         if pc_to_stack_map.is_empty() {
             // Nothing to register.
-            return None;
+            return;
         }
 
         let module_stack_maps = ModuleStackMaps {
@@ -865,7 +785,17 @@ impl StackMapRegistry {
             pc_to_stack_map,
         };
 
-        let mut inner = self.inner.write().unwrap();
+        let mut inner = self.inner.borrow_mut();
+
+        // Check if we've already registered this module.
+        if let Some(existing_module) = inner.ranges.get(&max) {
+            assert_eq!(existing_module.range, module_stack_maps.range);
+            debug_assert_eq!(
+                existing_module.pc_to_stack_map,
+                module_stack_maps.pc_to_stack_map,
+            );
+            return;
+        }
 
         // Assert that this chunk of ranges doesn't collide with any other known
         // chunks.
@@ -878,16 +808,11 @@ impl StackMapRegistry {
 
         let old = inner.ranges.insert(max, module_stack_maps);
         assert!(old.is_none());
-
-        Some(StackMapRegistration {
-            key: max,
-            registry: self.clone(),
-        })
     }
 
     /// Lookup the stack map for the given PC, if any.
-    pub fn lookup_stack_map(&self, pc: usize) -> Option<Arc<Stackmap>> {
-        let inner = self.inner.read().unwrap();
+    pub fn lookup_stack_map(&self, pc: usize) -> Option<Rc<Stackmap>> {
+        let inner = self.inner.borrow();
         let stack_maps = inner.module_stack_maps(pc)?;
 
         // Do a binary search to find the stack map for the given PC.
@@ -968,64 +893,36 @@ impl StackMapRegistryInner {
     }
 }
 
-/// The registration for a module's stack maps.
-///
-/// Unsafe to drop earlier than its module is dropped. See
-/// `StackMapRegistry::register_stack_maps` for details.
-pub struct StackMapRegistration {
-    key: usize,
-    registry: Arc<StackMapRegistry>,
-}
-
-impl Drop for StackMapRegistration {
-    fn drop(&mut self) {
-        if let Ok(mut inner) = self.registry.inner.write() {
-            inner.ranges.remove(&self.key);
-        }
-    }
-}
-
-#[cfg(debug_assertions)]
 #[derive(Debug, Default)]
 struct DebugOnly<T> {
     inner: T,
 }
 
-#[cfg(debug_assertions)]
-impl<T> std::ops::Deref for DebugOnly<T> {
-    type Target = T;
-
-    fn deref(&self) -> &T {
-        &self.inner
-    }
-}
-
-#[cfg(debug_assertions)]
-impl<T> std::ops::DerefMut for DebugOnly<T> {
-    fn deref_mut(&mut self) -> &mut T {
-        &mut self.inner
-    }
-}
-
-#[cfg(not(debug_assertions))]
-#[derive(Debug, Default)]
-struct DebugOnly<T> {
-    _phantom: PhantomData<T>,
-}
-
-#[cfg(not(debug_assertions))]
 impl<T> std::ops::Deref for DebugOnly<T> {
     type Target = T;
 
     fn deref(&self) -> &T {
-        panic!("only deref `DebugOnly` inside `debug_assert!`s")
+        if cfg!(debug_assertions) {
+            &self.inner
+        } else {
+            panic!(
+                "only deref `DebugOnly` when `cfg(debug_assertions)` or \
+                 inside a `debug_assert!(..)`"
+            )
+        }
     }
 }
 
-#[cfg(not(debug_assertions))]
 impl<T> std::ops::DerefMut for DebugOnly<T> {
     fn deref_mut(&mut self) -> &mut T {
-        panic!("only deref `DebugOnly` inside `debug_assert!`s")
+        if cfg!(debug_assertions) {
+            &mut self.inner
+        } else {
+            panic!(
+                "only deref `DebugOnly` when `cfg(debug_assertions)` or \
+                 inside a `debug_assert!(..)`"
+            )
+        }
     }
 }
 
@@ -1037,6 +934,9 @@ impl<T> std::ops::DerefMut for DebugOnly<T> {
 /// least the oldest host-->Wasm stack frame transition on this thread's stack
 /// (it is idempotent to call it more than once) and keep its return value alive
 /// across the duration of that host-->Wasm call.
+///
+/// Additionally, you must have registered the stack maps for every Wasm module
+/// that has frames on the stack with the given `stack_maps_registry`.
 pub unsafe fn gc(
     stack_maps_registry: &StackMapRegistry,
     externref_activations_table: &VMExternRefActivationsTable,
@@ -1068,7 +968,7 @@ pub unsafe fn gc(
                     true
                 });
             }
-            externref_activations_table.reset();
+            externref_activations_table.sweep();
             log::debug!("end GC");
             return;
         }
@@ -1086,7 +986,7 @@ pub unsafe fn gc(
     // * resetting our bump-allocated table's over-approximation to the
     //   newly-discovered precise set.
 
-    // The SP of the previous frame we processed.
+    // The SP of the previous (younger) frame we processed.
     let mut last_sp = None;
 
     // Whether we have found our stack canary or not yet.
@@ -1109,6 +1009,8 @@ pub unsafe fn gc(
         let sp = frame.sp() as usize;
 
         if let Some(stack_map) = stack_maps_registry.lookup_stack_map(pc) {
+            debug_assert!(sp != 0, "we should always get a valid SP for Wasm frames");
+
             for i in 0..(stack_map.mapped_words() as usize) {
                 if stack_map.get_bit(i) {
                     // Stack maps have one bit per word in the frame, and the
@@ -1144,14 +1046,16 @@ pub unsafe fn gc(
         !found_canary
     });
 
-    // Only reset the table if we found the stack canary, and therefore know
-    // that we discovered all the on-stack, inside-a-Wasm-frame roots. If we did
-    // *not* find the stack canary, then `libunwind` failed to walk the whole
-    // stack, and we might be missing roots. Reseting the table would free those
-    // missing roots while they are still in use, leading to use-after-free.
+    // Only sweep and reset the table if we found the stack canary, and
+    // therefore know that we discovered all the on-stack, inside-a-Wasm-frame
+    // roots. If we did *not* find the stack canary, then `libunwind` failed to
+    // walk the whole stack, and we might be missing roots. Reseting the table
+    // would free those missing roots while they are still in use, leading to
+    // use-after-free.
     if found_canary {
-        externref_activations_table.reset();
+        externref_activations_table.sweep();
     } else {
+        log::warn!("did not find stack canary; skipping GC sweep");
         let mut roots = externref_activations_table.precise_stack_roots.borrow_mut();
         roots.clear();
     }
@@ -1221,7 +1125,10 @@ mod tests {
             num_defined_memories: 0,
             num_defined_globals: 0,
         };
-        assert_eq!(offsets.vm_extern_ref_activation_table_next(), actual_offset);
+        assert_eq!(
+            offsets.vm_extern_ref_activation_table_next() as usize,
+            actual_offset
+        );
     }
 
     #[test]
@@ -1244,21 +1151,9 @@ mod tests {
             num_defined_memories: 0,
             num_defined_globals: 0,
         };
-        assert_eq!(offsets.vm_extern_ref_activation_table_end(), actual_offset);
-    }
-
-    fn assert_is_send<T: Send>() {}
-    fn assert_is_sync<T: Send>() {}
-
-    #[test]
-    fn stack_map_registry_is_send_sync() {
-        assert_is_send::<StackMapRegistry>();
-        assert_is_sync::<StackMapRegistry>();
-    }
-
-    #[test]
-    fn stack_map_registration_is_send_sync() {
-        assert_is_send::<StackMapRegistration>();
-        assert_is_sync::<StackMapRegistration>();
+        assert_eq!(
+            offsets.vm_extern_ref_activation_table_end() as usize,
+            actual_offset
+        );
     }
 }
diff --git a/crates/runtime/src/instance.rs b/crates/runtime/src/instance.rs
index 026c100b1b37..4179276e85e8 100644
--- a/crates/runtime/src/instance.rs
+++ b/crates/runtime/src/instance.rs
@@ -21,7 +21,6 @@ use std::any::Any;
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::convert::TryFrom;
-use std::rc::Rc;
 use std::sync::Arc;
 use std::{mem, ptr, slice};
 use thiserror::Error;
@@ -74,19 +73,6 @@ pub(crate) struct Instance {
     /// interrupted.
     pub(crate) interrupts: Arc<VMInterrupts>,
 
-    /// A handle to the (over-approximized) set of `externref`s that Wasm code
-    /// is using.
-    ///
-    /// The `vmctx` also holds a raw pointer to the table and relies on this
-    /// member to keep it alive.
-    pub(crate) externref_activations_table: Rc<VMExternRefActivationsTable>,
-
-    /// A handle to the stack map registry for this thread.
-    ///
-    /// The `vmctx` also holds a raw pointer to the registry and relies on this
-    /// member to keep it alive.
-    pub(crate) stack_map_registry: Arc<StackMapRegistry>,
-
     /// Additional context used by compiled wasm code. This field is last, and
     /// represents a dynamically-sized array that extends beyond the nominal
     /// end of the struct (similar to a flexible array member).
@@ -799,6 +785,10 @@ impl InstanceHandle {
     /// internally if you'd like to do so. If possible it's recommended to use
     /// the `wasmtime` crate API rather than this type since that is vetted for
     /// safety.
+    ///
+    /// It is your responsibility to ensure that the given raw
+    /// `externref_activations_table` and `stack_map_registry` outlive this
+    /// instance.
     pub unsafe fn new(
         module: Arc<Module>,
         code: Arc<dyn Any>,
@@ -809,9 +799,12 @@ impl InstanceHandle {
         vmshared_signatures: BoxedSlice<SignatureIndex, VMSharedSignatureIndex>,
         host_state: Box<dyn Any>,
         interrupts: Arc<VMInterrupts>,
-        externref_activations_table: Rc<VMExternRefActivationsTable>,
-        stack_map_registry: Arc<StackMapRegistry>,
+        externref_activations_table: *mut VMExternRefActivationsTable,
+        stack_map_registry: *mut StackMapRegistry,
     ) -> Result<Self, InstantiationError> {
+        debug_assert!(!externref_activations_table.is_null());
+        debug_assert!(!stack_map_registry.is_null());
+
         let tables = create_tables(&module);
         let memories = create_memories(&module, mem_creator.unwrap_or(&DefaultMemoryCreator {}))?;
 
@@ -846,8 +839,6 @@ impl InstanceHandle {
                 trampolines,
                 host_state,
                 interrupts,
-                externref_activations_table,
-                stack_map_registry,
                 vmctx: VMContext {},
             };
             let layout = instance.alloc_layout();
@@ -907,9 +898,8 @@ impl InstanceHandle {
             VMBuiltinFunctionsArray::initialized(),
         );
         *instance.interrupts() = &*instance.interrupts;
-        *instance.externref_activations_table() =
-            &*instance.externref_activations_table as *const _ as *mut _;
-        *instance.stack_map_registry() = &*instance.stack_map_registry as *const _ as *mut _;
+        *instance.externref_activations_table() = externref_activations_table;
+        *instance.stack_map_registry() = stack_map_registry;
 
         // Perform infallible initialization in this constructor, while fallible
         // initialization is deferred to the `initialize` method.
diff --git a/crates/runtime/src/mmap.rs b/crates/runtime/src/mmap.rs
index 8cba107404ef..483ec5be0963 100644
--- a/crates/runtime/src/mmap.rs
+++ b/crates/runtime/src/mmap.rs
@@ -182,7 +182,7 @@ impl Mmap {
 
         // Commit the accessible size.
         let ptr = self.ptr as *const u8;
-        unsafe { region::protect(ptr.add(start), len, region::Protection::ReadWrite) }
+        unsafe { region::protect(ptr.add(start), len, region::Protection::READ_WRITE) }
             .map_err(|e| e.to_string())
     }
 
diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs
index 9fb58c743dfa..53c7e5ab01d7 100644
--- a/crates/wasmtime/src/func.rs
+++ b/crates/wasmtime/src/func.rs
@@ -195,16 +195,9 @@ macro_rules! getters {
                     let mut ret = None;
                     $(let $args = $args.into_abi();)*
 
-                    {
-                        let canary = 0;
-                        let _auto_reset = instance
-                            .store
-                            .externref_activations_table()
-                            .set_stack_canary(&canary);
-                        catch_traps(export.vmctx, &instance.store, || {
-                            ret = Some(fnptr(export.vmctx, ptr::null_mut(), $($args,)*));
-                        })?;
-                    }
+                    invoke_wasm_and_catch_traps(export.vmctx, &instance.store, || {
+                        ret = Some(fnptr(export.vmctx, ptr::null_mut(), $($args,)*));
+                    })?;
 
                     Ok(ret.unwrap())
                 }
@@ -560,23 +553,14 @@ impl Func {
         }
 
         // Call the trampoline.
-        {
-            let canary = 0;
-            let _auto_reset = self
-                .instance
-                .store
-                .externref_activations_table()
-                .set_stack_canary(&canary);
-
-            catch_traps(self.export.vmctx, &self.instance.store, || unsafe {
-                (self.trampoline)(
-                    self.export.vmctx,
-                    ptr::null_mut(),
-                    self.export.address,
-                    values_vec.as_mut_ptr(),
-                )
-            })?;
-        }
+        invoke_wasm_and_catch_traps(self.export.vmctx, &self.instance.store, || unsafe {
+            (self.trampoline)(
+                self.export.vmctx,
+                ptr::null_mut(),
+                self.export.address,
+                values_vec.as_mut_ptr(),
+            )
+        })?;
 
         // Load the return values out of `values_vec`.
         let mut results = Vec::with_capacity(my_ty.results().len());
@@ -746,13 +730,18 @@ impl fmt::Debug for Func {
     }
 }
 
-pub(crate) fn catch_traps(
+pub(crate) fn invoke_wasm_and_catch_traps(
     vmctx: *mut VMContext,
     store: &Store,
     closure: impl FnMut(),
 ) -> Result<(), Trap> {
     let signalhandler = store.signal_handler();
     unsafe {
+        let canary = 0;
+        let _auto_reset_canary = store
+            .externref_activations_table()
+            .set_stack_canary(&canary);
+
         wasmtime_runtime::catch_traps(
             vmctx,
             store.engine().config().max_wasm_stack,
diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs
index 0e15dcdc97dc..1477c0a0ba59 100644
--- a/crates/wasmtime/src/instance.rs
+++ b/crates/wasmtime/src/instance.rs
@@ -3,13 +3,9 @@ use crate::{Engine, Export, Extern, Func, Global, Memory, Module, Store, Table,
 use anyhow::{bail, Error, Result};
 use std::any::Any;
 use std::mem;
-use std::rc::Rc;
-use std::sync::Arc;
 use wasmtime_environ::EntityIndex;
 use wasmtime_jit::{CompiledModule, Resolver};
-use wasmtime_runtime::{
-    InstantiationError, StackMapRegistry, VMContext, VMExternRefActivationsTable, VMFunctionBody,
-};
+use wasmtime_runtime::{InstantiationError, VMContext, VMFunctionBody};
 
 struct SimpleResolver<'a> {
     imports: &'a [Extern],
@@ -28,8 +24,6 @@ fn instantiate(
     compiled_module: &CompiledModule,
     imports: &[Extern],
     host: Box<dyn Any>,
-    externref_activations_table: Rc<VMExternRefActivationsTable>,
-    stack_map_registry: Arc<StackMapRegistry>,
 ) -> Result<StoreInstanceHandle, Error> {
     // For now we have a restriction that the `Store` that we're working
     // with is the same for everything involved here.
@@ -56,8 +50,8 @@ fn instantiate(
             config.memory_creator.as_ref().map(|a| a as _),
             store.interrupts().clone(),
             host,
-            externref_activations_table,
-            stack_map_registry,
+            &*store.externref_activations_table() as *const _ as *mut _,
+            &*store.stack_map_registry() as *const _ as *mut _,
         )?;
 
         // After we've created the `InstanceHandle` we still need to run
@@ -97,7 +91,7 @@ fn instantiate(
         };
         let vmctx_ptr = instance.handle.vmctx_ptr();
         unsafe {
-            super::func::catch_traps(vmctx_ptr, store, || {
+            super::func::invoke_wasm_and_catch_traps(vmctx_ptr, store, || {
                 mem::transmute::<
                     *const VMFunctionBody,
                     unsafe extern "C" fn(*mut VMContext, *mut VMContext),
@@ -194,24 +188,11 @@ impl Instance {
         let host_info = Box::new({
             let frame_info_registration = module.register_frame_info();
             store.register_jit_code(module.compiled_module().jit_code_ranges());
-
-            // We need to make sure that we keep this alive as long as the instance
-            // is alive, or else we could miss GC roots, reclaim objects too early,
-            // and get user-after-frees.
-            let stack_map_registration =
-                unsafe { module.register_stack_maps(&*store.stack_map_registry()) };
-
-            (frame_info_registration, stack_map_registration)
+            store.register_stack_maps(&module);
+            frame_info_registration
         });
 
-        let handle = instantiate(
-            store,
-            module.compiled_module(),
-            imports,
-            host_info,
-            store.externref_activations_table().clone(),
-            store.stack_map_registry().clone(),
-        )?;
+        let handle = instantiate(store, module.compiled_module(), imports, host_info)?;
 
         Ok(Instance {
             handle,
diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs
index c79e4970ba3f..391c3b63de10 100644
--- a/crates/wasmtime/src/module.rs
+++ b/crates/wasmtime/src/module.rs
@@ -6,7 +6,6 @@ use std::path::Path;
 use std::sync::{Arc, Mutex};
 use wasmparser::validate;
 use wasmtime_jit::CompiledModule;
-use wasmtime_runtime::{StackMapRegistration, StackMapRegistry};
 
 /// A compiled WebAssembly module, ready to be instantiated.
 ///
@@ -81,7 +80,6 @@ pub struct Module {
     engine: Engine,
     compiled: Arc<CompiledModule>,
     frame_info_registration: Arc<Mutex<Option<Option<Arc<GlobalFrameInfoRegistration>>>>>,
-    stack_map_registration: Arc<Mutex<Option<Option<Arc<StackMapRegistration>>>>>,
 }
 
 impl Module {
@@ -309,7 +307,6 @@ impl Module {
             engine: engine.clone(),
             compiled: Arc::new(compiled),
             frame_info_registration: Arc::new(Mutex::new(None)),
-            stack_map_registration: Arc::new(Mutex::new(None)),
         })
     }
 
@@ -537,41 +534,6 @@ impl Module {
         *info = Some(ret.clone());
         return ret;
     }
-
-    /// Register this module's stack maps.
-    ///
-    /// # Safety
-    ///
-    /// The same as `wasmtime_runtime::StackMapRegistry::register_stack_maps`.
-    pub(crate) unsafe fn register_stack_maps(
-        &self,
-        registry: &Arc<StackMapRegistry>,
-    ) -> Option<Arc<StackMapRegistration>> {
-        let mut registration = self.stack_map_registration.lock().unwrap();
-        if let Some(registration) = &*registration {
-            return registration.clone();
-        }
-
-        let module = &self.compiled;
-        let ret = registry
-            .register_stack_maps(
-                module
-                    .finished_functions()
-                    .values()
-                    .zip(module.stack_maps().values())
-                    .map(|(func, stack_maps)| {
-                        let ptr = (**func).as_ptr();
-                        let len = (**func).len();
-                        let start = ptr as usize;
-                        let end = ptr as usize + len;
-                        let range = start..end;
-                        (range, &stack_maps[..])
-                    }),
-            )
-            .map(Arc::new);
-        *registration = Some(ret.clone());
-        ret
-    }
 }
 
 fn _assert_send_sync() {
diff --git a/crates/wasmtime/src/ref.rs b/crates/wasmtime/src/ref.rs
old mode 100755
new mode 100644
index 6bb8141494d7..988c1590093d
--- a/crates/wasmtime/src/ref.rs
+++ b/crates/wasmtime/src/ref.rs
@@ -36,9 +36,9 @@ impl ExternRef {
         &*self.inner
     }
 
-    /// Get the reference count for this `ExternRef`.
-    pub fn get_reference_count(&self) -> usize {
-        self.inner.get_reference_count()
+    /// Get the strong reference count for this `ExternRef`.
+    pub fn strong_count(&self) -> usize {
+        self.inner.strong_count()
     }
 
     /// Does this `ExternRef` point to the same inner value as `other`?0
diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs
index 6e3f95b5ff86..f57fdc8a2935 100644
--- a/crates/wasmtime/src/runtime.rs
+++ b/crates/wasmtime/src/runtime.rs
@@ -1,6 +1,7 @@
 use crate::externals::MemoryCreator;
 use crate::r#ref::ExternRef;
 use crate::trampoline::{MemoryCreatorProxy, StoreInstanceHandle};
+use crate::Module;
 use anyhow::{bail, Result};
 use std::any::Any;
 use std::cell::RefCell;
@@ -14,7 +15,7 @@ use std::rc::{Rc, Weak};
 use std::sync::Arc;
 use wasmparser::{OperatorValidatorConfig, ValidatingParserConfig};
 use wasmtime_environ::settings::{self, Configurable};
-use wasmtime_environ::{ir, wasm, CacheConfig, Tunables};
+use wasmtime_environ::{ir, isa::TargetIsa, wasm, CacheConfig, Tunables};
 use wasmtime_jit::{native, CompilationStrategy, Compiler};
 use wasmtime_profiling::{JitDumpAgent, NullProfilerAgent, ProfilingAgent, VTuneAgent};
 use wasmtime_runtime::{
@@ -195,6 +196,7 @@ impl Config {
         self.validating_config
             .operator_config
             .enable_reference_types = enable;
+
         self.flags
             .set("enable_safepoints", if enable { "true" } else { "false" })
             .unwrap();
@@ -597,8 +599,12 @@ impl Config {
         self
     }
 
+    pub(crate) fn target_isa(&self) -> Box<dyn TargetIsa> {
+        native::builder().finish(settings::Flags::new(self.flags.clone()))
+    }
+
     fn build_compiler(&self) -> Compiler {
-        let isa = native::builder().finish(settings::Flags::new(self.flags.clone()));
+        let isa = self.target_isa();
         Compiler::new(
             isa,
             self.strategy,
@@ -730,7 +736,6 @@ pub struct Engine {
 struct EngineInner {
     config: Config,
     compiler: Compiler,
-    stack_map_registry: Arc<StackMapRegistry>,
 }
 
 impl Engine {
@@ -742,7 +747,6 @@ impl Engine {
             inner: Arc::new(EngineInner {
                 config: config.clone(),
                 compiler: config.build_compiler(),
-                stack_map_registry: Arc::new(StackMapRegistry::default()),
             }),
         }
     }
@@ -801,7 +805,7 @@ pub(crate) struct StoreInner {
     jit_code_ranges: RefCell<Vec<(usize, usize)>>,
     host_info: RefCell<HashMap<HostInfoKey, Rc<RefCell<dyn Any>>>>,
     externref_activations_table: Rc<VMExternRefActivationsTable>,
-    stack_map_registry: Arc<StackMapRegistry>,
+    stack_map_registry: Rc<StackMapRegistry>,
 }
 
 struct HostInfoKey(VMExternRef);
@@ -843,7 +847,7 @@ impl Store {
                 jit_code_ranges: RefCell::new(Vec::new()),
                 host_info: RefCell::new(HashMap::new()),
                 externref_activations_table: Rc::new(VMExternRefActivationsTable::new()),
-                stack_map_registry: engine.inner.stack_map_registry.clone(),
+                stack_map_registry: Rc::new(StackMapRegistry::default()),
             }),
         }
     }
@@ -916,6 +920,24 @@ impl Store {
         }
     }
 
+    pub(crate) fn register_stack_maps(&self, module: &Module) {
+        let module = &module.compiled_module();
+        self.stack_map_registry().register_stack_maps(
+            module
+                .finished_functions()
+                .values()
+                .zip(module.stack_maps().values())
+                .map(|(func, stack_maps)| unsafe {
+                    let ptr = (**func).as_ptr();
+                    let len = (**func).len();
+                    let start = ptr as usize;
+                    let end = ptr as usize + len;
+                    let range = start..end;
+                    (range, &stack_maps[..])
+                }),
+        );
+    }
+
     pub(crate) unsafe fn add_instance(&self, handle: InstanceHandle) -> StoreInstanceHandle {
         self.inner.instances.borrow_mut().push(handle.clone());
         StoreInstanceHandle {
@@ -1091,14 +1113,15 @@ impl Store {
         &self.inner.externref_activations_table
     }
 
-    pub(crate) fn stack_map_registry(&self) -> &Arc<StackMapRegistry> {
-        &self.inner.engine.inner.stack_map_registry
+    pub(crate) fn stack_map_registry(&self) -> &Rc<StackMapRegistry> {
+        &self.inner.stack_map_registry
     }
 
     /// Perform garbage collection of `ExternRef`s.
     pub fn gc(&self) {
         // For this crate's API, we ensure that `set_stack_canary` invariants
-        // are upheld for all host-->Wasm calls.
+        // are upheld for all host-->Wasm calls, and we register every module
+        // used with this store in `self.inner.stack_map_registry`.
         unsafe {
             wasmtime_runtime::gc(
                 &*self.inner.stack_map_registry,
diff --git a/crates/wasmtime/src/trampoline/create_handle.rs b/crates/wasmtime/src/trampoline/create_handle.rs
index 5a3eca831b7a..12faef120e10 100644
--- a/crates/wasmtime/src/trampoline/create_handle.rs
+++ b/crates/wasmtime/src/trampoline/create_handle.rs
@@ -46,8 +46,8 @@ pub(crate) fn create_handle(
             signatures.into_boxed_slice(),
             state,
             store.interrupts().clone(),
-            store.externref_activations_table().clone(),
-            store.stack_map_registry().clone(),
+            &*store.externref_activations_table() as *const _ as *mut _,
+            &*store.stack_map_registry() as *const _ as *mut _,
         )?;
         Ok(store.add_instance(handle))
     }
diff --git a/crates/wasmtime/src/trampoline/func.rs b/crates/wasmtime/src/trampoline/func.rs
index 6ccc700c2431..fd095e6d150a 100644
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@@ -2,7 +2,7 @@
 
 use super::create_handle::create_handle;
 use crate::trampoline::StoreInstanceHandle;
-use crate::{FuncType, Store, Trap, ValType};
+use crate::{FuncType, Store, Trap};
 use anyhow::{bail, Result};
 use std::any::Any;
 use std::cmp;
@@ -11,9 +11,7 @@ use std::mem;
 use std::panic::{self, AssertUnwindSafe};
 use wasmtime_environ::entity::PrimaryMap;
 use wasmtime_environ::isa::TargetIsa;
-use wasmtime_environ::{
-    ir, settings, settings::Configurable, CompiledFunction, EntityIndex, Module,
-};
+use wasmtime_environ::{ir, settings, CompiledFunction, EntityIndex, Module};
 use wasmtime_jit::trampoline::ir::{
     ExternalName, Function, InstBuilder, MemFlags, StackSlotData, StackSlotKind,
 };
@@ -210,18 +208,7 @@ pub fn create_handle_with_function(
     func: Box<dyn Fn(*mut VMContext, *mut u128) -> Result<(), Trap>>,
     store: &Store,
 ) -> Result<(StoreInstanceHandle, VMTrampoline)> {
-    let isa = {
-        let isa_builder = native::builder();
-        let mut flag_builder = settings::builder();
-
-        if ft.params().iter().any(|p| *p == ValType::ExternRef)
-            || ft.results().iter().any(|r| *r == ValType::ExternRef)
-        {
-            flag_builder.set("enable_safepoints", "true").unwrap();
-        }
-
-        isa_builder.finish(settings::Flags::new(flag_builder))
-    };
+    let isa = store.engine().config().target_isa();
 
     let pointer_type = isa.pointer_type();
     let sig = match ft.get_wasmtime_signature(pointer_type) {
diff --git a/crates/wasmtime/src/types.rs b/crates/wasmtime/src/types.rs
index 8fa8b79b345a..96f5ce9f3f39 100644
--- a/crates/wasmtime/src/types.rs
+++ b/crates/wasmtime/src/types.rs
@@ -106,7 +106,10 @@ impl ValType {
             ValType::F32 => Some(ir::types::F32),
             ValType::F64 => Some(ir::types::F64),
             ValType::V128 => Some(ir::types::I8X16),
+            #[cfg(target_pointer_width = "64")]
             ValType::ExternRef => Some(ir::types::R64),
+            #[cfg(target_pointer_width = "32")]
+            ValType::ExternRef => Some(ir::types::R32),
             _ => None,
         }
     }
@@ -118,7 +121,10 @@ impl ValType {
             ir::types::F32 => Some(ValType::F32),
             ir::types::F64 => Some(ValType::F64),
             ir::types::I8X16 => Some(ValType::V128),
+            #[cfg(target_pointer_width = "64")]
             ir::types::R64 => Some(ValType::ExternRef),
+            #[cfg(target_pointer_width = "32")]
+            ir::types::R32 => Some(ValType::ExternRef),
             _ => None,
         }
     }
diff --git a/crates/wasmtime/src/values.rs b/crates/wasmtime/src/values.rs
index 2eb9b8bc7ef2..7885e26c5ea2 100644
--- a/crates/wasmtime/src/values.rs
+++ b/crates/wasmtime/src/values.rs
@@ -89,10 +89,9 @@ impl Val {
             Val::ExternRef(None) => ptr::write(p, 0),
             Val::ExternRef(Some(x)) => {
                 let externref_ptr = x.inner.as_raw();
-                if let Err(inner) = store.externref_activations_table().try_insert(x.inner) {
-                    store.gc();
-                    store.externref_activations_table().insert_slow_path(inner);
-                }
+                store
+                    .externref_activations_table()
+                    .insert_with_gc(x.inner, store.stack_map_registry());
                 ptr::write(p as *mut *mut u8, externref_ptr)
             }
             _ => unimplemented!("Val::write_value_to"),
diff --git a/tests/all/gc.rs b/tests/all/gc.rs
index cd6bae369134..e6950ccbbe68 100644
--- a/tests/all/gc.rs
+++ b/tests/all/gc.rs
@@ -59,12 +59,12 @@ fn smoke_test_gc() -> anyhow::Result<()> {
 
     // Still held alive by the `VMExternRefActivationsTable` (potentially in
     // multiple slots within the table) and by this `r` local.
-    assert!(r.get_reference_count() >= 2);
+    assert!(r.strong_count() >= 2);
 
     // Doing a GC should see that there aren't any `externref`s on the stack in
     // Wasm frames anymore.
     store.gc();
-    assert_eq!(r.get_reference_count(), 1);
+    assert_eq!(r.strong_count(), 1);
 
     // Dropping `r` should drop the inner `SetFlagOnDrop` value.
     drop(r);

From 683dc1538540324ddbe2e833830bb5ac33a4827e Mon Sep 17 00:00:00 2001
From: Nick Fitzgerald <fitzgen@gmail.com>
Date: Mon, 15 Jun 2020 16:05:51 -0700
Subject: [PATCH 4/4] Only run reference types tests on x86_64

Cranelift does not support reference types on other targets.
---
 build.rs          | 9 +++++++--
 tests/all/main.rs | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/build.rs b/build.rs
index 2cbb4b3811fe..40b4385b5ed9 100644
--- a/build.rs
+++ b/build.rs
@@ -209,9 +209,14 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             // testsuite repo.
             ("simd", "simd_const") => return true,
 
+            ("reference_types", "table_copy_on_imported_tables")
+            | ("reference_types", "externref_id_function") => {
+                // Ignore if this isn't x64, because Cranelift only supports
+                // reference types on x64.
+                return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64";
+            }
+
             // Still working on implementing these. See #929.
-            ("reference_types", "table_copy_on_imported_tables") => return false,
-            ("reference_types", "externref_id_function") => return false,
             ("reference_types", _) => return true,
 
             _ => {}
diff --git a/tests/all/main.rs b/tests/all/main.rs
index 5d8db9ffe2d4..a664807f5023 100644
--- a/tests/all/main.rs
+++ b/tests/all/main.rs
@@ -4,7 +4,6 @@ mod debug;
 mod externals;
 mod func;
 mod fuzzing;
-mod gc;
 mod globals;
 mod iloop;
 mod import_calling_export;
@@ -19,3 +18,7 @@ mod table;
 mod traps;
 mod use_after_drop;
 mod wast;
+
+// Cranelift only supports reference types on x64.
+#[cfg(target_arch = "x86_64")]
+mod gc;