diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index 28478b342fea..c75f6fe863da 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1066,21 +1066,24 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let heap = state.get_heap(builder.func, memarg.memory, environ)?;
             let timeout = state.pop1(); // 64 (fixed)
             let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`)
-            let (_flags, addr) = prepare_atomic_addr(
-                memarg,
-                u8::try_from(implied_ty.bytes()).unwrap(),
-                builder,
-                state,
-                environ,
-            )?;
             assert!(builder.func.dfg.value_type(expected) == implied_ty);
+            let addr = state.pop1();
+            let effective_addr = if memarg.offset == 0 {
+                addr
+            } else {
+                let index_type = builder.func.heaps[heap].index_type;
+                let offset = builder.ins().iconst(index_type, memarg.offset as i64);
+                builder
+                    .ins()
+                    .uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds)
+            };
             // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what
             // code it needs to generate, if it wants.
             let res = environ.translate_atomic_wait(
                 builder.cursor(),
                 heap_index,
                 heap,
-                addr,
+                effective_addr,
                 expected,
                 timeout,
             )?;
@@ -1090,12 +1093,23 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let heap_index = MemoryIndex::from_u32(memarg.memory);
             let heap = state.get_heap(builder.func, memarg.memory, environ)?;
             let count = state.pop1(); // 32 (fixed)
-
-            // `memory.atomic.notify` is defined to have an access size of 4
-            // bytes in the spec, even though it doesn't necessarily access memory.
-            let (_flags, addr) = prepare_atomic_addr(memarg, 4, builder, state, environ)?;
-            let res =
-                environ.translate_atomic_notify(builder.cursor(), heap_index, heap, addr, count)?;
+            let addr = state.pop1();
+            let effective_addr = if memarg.offset == 0 {
+                addr
+            } else {
+                let index_type = builder.func.heaps[heap].index_type;
+                let offset = builder.ins().iconst(index_type, memarg.offset as i64);
+                builder
+                    .ins()
+                    .uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds)
+            };
+            let res = environ.translate_atomic_notify(
+                builder.cursor(),
+                heap_index,
+                heap,
+                effective_addr,
+                count,
+            )?;
             state.push1(res);
         }
         Operator::I32AtomicLoad { memarg } => {
@@ -2324,13 +2338,12 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
     Ok((flags, addr))
 }
 
-fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
+fn align_atomic_addr(
     memarg: &MemArg,
     loaded_bytes: u8,
     builder: &mut FunctionBuilder,
     state: &mut FuncTranslationState,
-    environ: &mut FE,
-) -> WasmResult<(MemFlags, Value)> {
+) {
     // Atomic addresses must all be aligned correctly, and for now we check
     // alignment before we check out-of-bounds-ness. The order of this check may
     // need to be updated depending on the outcome of the official threads
@@ -2358,7 +2371,16 @@ fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
         let f = builder.ins().icmp_imm(IntCC::NotEqual, misalignment, 0);
         builder.ins().trapnz(f, ir::TrapCode::HeapMisaligned);
     }
+}
 
+fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
+    memarg: &MemArg,
+    loaded_bytes: u8,
+    builder: &mut FunctionBuilder,
+    state: &mut FuncTranslationState,
+    environ: &mut FE,
+) -> WasmResult<(MemFlags, Value)> {
+    align_atomic_addr(memarg, loaded_bytes, builder, state);
     prepare_addr(memarg, loaded_bytes, builder, state, environ)
 }
 
diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs
index 7094a6173df1..518f524c3c7d 100644
--- a/crates/cranelift/src/func_environ.rs
+++ b/crates/cranelift/src/func_environ.rs
@@ -1981,6 +1981,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
         expected: ir::Value,
         timeout: ir::Value,
     ) -> WasmResult<ir::Value> {
+        let addr = self.cast_memory_index_to_i64(&mut pos, addr, memory_index);
         let implied_ty = pos.func.dfg.value_type(expected);
         let (func_sig, memory_index, func_idx) =
             self.get_memory_atomic_wait(&mut pos.func, memory_index, implied_ty);
@@ -2006,6 +2007,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
         addr: ir::Value,
         count: ir::Value,
     ) -> WasmResult<ir::Value> {
+        let addr = self.cast_memory_index_to_i64(&mut pos, addr, memory_index);
         let func_sig = self
             .builtin_function_signatures
             .memory_atomic_notify(&mut pos.func);
diff --git a/crates/environ/src/builtin.rs b/crates/environ/src/builtin.rs
index 4df985cabd31..fed56aa3a150 100644
--- a/crates/environ/src/builtin.rs
+++ b/crates/environ/src/builtin.rs
@@ -42,11 +42,11 @@ macro_rules! foreach_builtin_function {
             /// Returns an index for Wasm's `global.get` instruction for `externref`s.
             externref_global_set(vmctx: vmctx, global: i32, val: reference);
             /// Returns an index for wasm's `memory.atomic.notify` instruction.
-            memory_atomic_notify(vmctx: vmctx, memory: i32, addr: pointer, count: i32) -> i32;
+            memory_atomic_notify(vmctx: vmctx, memory: i32, addr: i64, count: i32) -> i32;
             /// Returns an index for wasm's `memory.atomic.wait32` instruction.
-            memory_atomic_wait32(vmctx: vmctx, memory: i32, addr: pointer, expected: i32, timeout: i64) -> i32;
+            memory_atomic_wait32(vmctx: vmctx, memory: i32, addr: i64, expected: i32, timeout: i64) -> i32;
             /// Returns an index for wasm's `memory.atomic.wait64` instruction.
-            memory_atomic_wait64(vmctx: vmctx, memory: i32, addr: pointer, expected: i64, timeout: i64) -> i32;
+            memory_atomic_wait64(vmctx: vmctx, memory: i32, addr: i64, expected: i64, timeout: i64) -> i32;
             /// Invoked when fuel has run out while executing a function.
             out_of_gas(vmctx: vmctx);
             /// Invoked when we reach a new epoch.
diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs
index 78ae3b551b5c..6a7fb956eeae 100644
--- a/crates/runtime/src/libcalls.rs
+++ b/crates/runtime/src/libcalls.rs
@@ -434,17 +434,12 @@ unsafe fn externref_global_set(vmctx: *mut VMContext, index: u32, externref: *mu
 unsafe fn memory_atomic_notify(
     vmctx: *mut VMContext,
     memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
     _count: u32,
 ) -> Result<u32, TrapReason> {
-    let addr = addr as usize;
     let memory = MemoryIndex::from_u32(memory_index);
     let instance = (*vmctx).instance();
-    // this should never overflow since addr + 4 either hits a guard page
-    // or it's been validated to be in-bounds already. Double-check for now
-    // just to be sure.
-    let addr_to_check = addr.checked_add(4).unwrap();
-    validate_atomic_addr(instance, memory, addr_to_check)?;
+    validate_atomic_addr(instance, memory, addr, 4, 4)?;
     Err(
         anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_notify) unsupported",)
             .into(),
@@ -455,17 +450,13 @@ unsafe fn memory_atomic_notify(
 unsafe fn memory_atomic_wait32(
     vmctx: *mut VMContext,
     memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
     _expected: u32,
     _timeout: u64,
 ) -> Result<u32, TrapReason> {
-    let addr = addr as usize;
     let memory = MemoryIndex::from_u32(memory_index);
     let instance = (*vmctx).instance();
-    // see wasmtime_memory_atomic_notify for why this shouldn't overflow
-    // but we still double-check
-    let addr_to_check = addr.checked_add(4).unwrap();
-    validate_atomic_addr(instance, memory, addr_to_check)?;
+    validate_atomic_addr(instance, memory, addr, 4, 4)?;
     Err(
         anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported",)
             .into(),
@@ -476,40 +467,47 @@ unsafe fn memory_atomic_wait32(
 unsafe fn memory_atomic_wait64(
     vmctx: *mut VMContext,
     memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
     _expected: u64,
     _timeout: u64,
 ) -> Result<u32, TrapReason> {
-    let addr = addr as usize;
     let memory = MemoryIndex::from_u32(memory_index);
     let instance = (*vmctx).instance();
-    // see wasmtime_memory_atomic_notify for why this shouldn't overflow
-    // but we still double-check
-    let addr_to_check = addr.checked_add(8).unwrap();
-    validate_atomic_addr(instance, memory, addr_to_check)?;
+    validate_atomic_addr(instance, memory, addr, 8, 8)?;
     Err(
         anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported",)
             .into(),
     )
 }
 
-/// For atomic operations we still check the actual address despite this also
-/// being checked via the `heap_addr` instruction in cranelift. The reason for
-/// that is because the `heap_addr` instruction can defer to a later segfault to
-/// actually recognize the out-of-bounds whereas once we're running Rust code
-/// here we don't want to segfault.
-///
-/// In the situations where bounds checks were elided in JIT code (because oob
-/// would then be later guaranteed to segfault) this manual check is here
-/// so we don't segfault from Rust.
+macro_rules! ensure {
+    ($cond:expr, $trap:expr) => {
+        if !($cond) {
+            return Err($trap);
+        }
+    };
+}
+
+/// In the configurations where bounds checks were elided in JIT code (because
+/// we are using static memories with virtual memory guard pages) this manual
+/// check is here so we don't segfault from Rust. For other configurations,
+/// these checks are required anyways.
 unsafe fn validate_atomic_addr(
     instance: &Instance,
     memory: MemoryIndex,
-    addr: usize,
+    addr: u64,
+    access_size: u64,
+    access_alignment: u64,
 ) -> Result<(), TrapCode> {
-    if addr > instance.get_memory(memory).current_length() {
-        return Err(TrapCode::HeapOutOfBounds);
-    }
+    debug_assert!(access_alignment.is_power_of_two());
+    ensure!(addr % access_alignment == 0, TrapCode::HeapMisaligned);
+
+    let length = u64::try_from(instance.get_memory(memory).current_length()).unwrap();
+    ensure!(
+        addr.saturating_add(access_size) < length,
+        TrapCode::HeapOutOfBounds
+    );
+
     Ok(())
 }