diff --git a/compiler/rustc_target/src/callconv/x86_win64.rs b/compiler/rustc_target/src/callconv/x86_win64.rs
index 83d94cb11bafd..59f911f7d5a5d 100644
--- a/compiler/rustc_target/src/callconv/x86_win64.rs
+++ b/compiler/rustc_target/src/callconv/x86_win64.rs
@@ -1,4 +1,4 @@
-use rustc_abi::{BackendRepr, Float, Primitive};
+use rustc_abi::{BackendRepr, Float, Integer, Primitive, RegKind, Size};
 
 use crate::abi::call::{ArgAbi, FnAbi, Reg};
 use crate::spec::HasTargetSpec;
@@ -6,7 +6,7 @@ use crate::spec::HasTargetSpec;
 // Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/parameter-passing
 
 pub(crate) fn compute_abi_info<Ty>(cx: &impl HasTargetSpec, fn_abi: &mut FnAbi<'_, Ty>) {
-    let fixup = |a: &mut ArgAbi<'_, Ty>| {
+    let fixup = |a: &mut ArgAbi<'_, Ty>, is_ret: bool| {
         match a.layout.backend_repr {
             BackendRepr::Uninhabited | BackendRepr::Memory { sized: false } => {}
             BackendRepr::ScalarPair(..) | BackendRepr::Memory { sized: true } => {
@@ -23,11 +23,16 @@ pub(crate) fn compute_abi_info<Ty>(cx: &impl HasTargetSpec, fn_abi: &mut FnAbi<'
                 // (probably what clang calls "illegal vectors").
             }
             BackendRepr::Scalar(scalar) => {
-                // Match what LLVM does for `f128` so that `compiler-builtins` builtins match up
-                // with what LLVM expects.
-                if a.layout.size.bytes() > 8
+                if is_ret && matches!(scalar.primitive(), Primitive::Int(Integer::I128, _)) {
+                    // `i128` is returned in xmm0 by Clang and GCC, no
+                    // FIXME(#134288): This may change for the `-msvc` targets in the future.
+                    let reg = Reg { kind: RegKind::Vector, size: Size::from_bits(128) };
+                    a.cast_to(reg);
+                } else if a.layout.size.bytes() > 8
                     && !matches!(scalar.primitive(), Primitive::Float(Float::F128))
                 {
+                    // Match what LLVM does for `f128` so that `compiler-builtins` builtins match up
+                    // with what LLVM expects.
                     a.make_indirect();
                 } else {
                     a.extend_integer_width_to(32);
@@ -37,8 +42,9 @@ pub(crate) fn compute_abi_info<Ty>(cx: &impl HasTargetSpec, fn_abi: &mut FnAbi<'
     };
 
     if !fn_abi.ret.is_ignore() {
-        fixup(&mut fn_abi.ret);
+        fixup(&mut fn_abi.ret, true);
     }
+
     for arg in fn_abi.args.iter_mut() {
         if arg.is_ignore() {
             // x86_64-pc-windows-gnu doesn't ignore ZSTs.
@@ -50,6 +56,6 @@ pub(crate) fn compute_abi_info<Ty>(cx: &impl HasTargetSpec, fn_abi: &mut FnAbi<'
             }
             continue;
         }
-        fixup(arg);
+        fixup(arg, false);
     }
 }
diff --git a/tests/auxiliary/minicore.rs b/tests/auxiliary/minicore.rs
index c4317752920f0..2379d3adf1b4c 100644
--- a/tests/auxiliary/minicore.rs
+++ b/tests/auxiliary/minicore.rs
@@ -40,8 +40,11 @@ impl<T: ?Sized> LegacyReceiver for &mut T {}
 pub trait Copy: Sized {}
 
 impl_marker_trait!(
-    Copy => [ bool, char, isize, usize, i8, i16, i32, i64, u8, u16, u32, u64, f32, f64 ]
+    Copy => [
+        bool, char, isize, usize, i8, i16, i32, i64, i128, u8, u16, u32, u64, u128, f32, f64
+    ]
 );
+
 impl<'a, T: ?Sized> Copy for &'a T {}
 impl<T: ?Sized> Copy for *const T {}
 impl<T: ?Sized> Copy for *mut T {}
diff --git a/tests/codegen/i128-x86-callconv.rs b/tests/codegen/i128-x86-callconv.rs
new file mode 100644
index 0000000000000..ee33239be3a7a
--- /dev/null
+++ b/tests/codegen/i128-x86-callconv.rs
@@ -0,0 +1,76 @@
+//! Verify that Rust implements the expected calling convention for `i128`/`u128`.
+
+//@ revisions: MSVC MINGW
+//@ add-core-stubs
+//@ [MSVC] needs-llvm-components: x86
+//@ [MINGW] needs-llvm-components: x86
+//@ [MSVC] compile-flags: --target x86_64-pc-windows-msvc
+//@ [MINGW] compile-flags: --target x86_64-pc-windows-gnu
+//@ [MSVC] filecheck-flags: --check-prefix=WIN
+//@ [MINGW] filecheck-flags: --check-prefix=WIN
+
+#![crate_type = "lib"]
+#![no_std]
+#![no_core]
+#![feature(no_core, lang_items)]
+
+extern crate minicore;
+
+extern "C" {
+    fn extern_call(arg0: i128);
+    fn extern_ret() -> i128;
+}
+
+#[no_mangle]
+pub extern "C" fn pass(_arg0: u32, arg1: i128) {
+    // CHECK-LABEL: @pass(
+    // i128 is passed indirectly on Windows. It should load the pointer to the stack and pass
+    // a pointer to that allocation.
+    // WIN-SAME: %_arg0, ptr{{.*}} %arg1)
+    // WIN: [[PASS:%[_0-9]+]] = alloca [16 x i8], align 16
+    // WIN: [[LOADED:%[_0-9]+]] = load i128, ptr %arg1
+    // WIN: store i128 [[LOADED]], ptr [[PASS]]
+    // WIN: call void @extern_call
+    unsafe { extern_call(arg1) };
+}
+
+// Check that we produce the correct return ABI
+#[no_mangle]
+pub extern "C" fn ret(_arg0: u32, arg1: i128) -> i128 {
+    // CHECK-LABEL: @ret(
+    // i128 is returned in xmm0 on Windows
+    // FIXME(#134288): This may change for the `-msvc` targets in the future.
+    // WIN-SAME: i32{{.*}} %_arg0, ptr{{.*}} %arg1)
+    // WIN: [[LOADED:%[_0-9]+]] = load <16 x i8>, ptr %arg1
+    // WIN-NEXT: ret <16 x i8> [[LOADED]]
+    arg1
+}
+
+// Check that we consume the correct return ABI
+#[no_mangle]
+pub extern "C" fn forward(dst: *mut i128) {
+    // CHECK-LABEL: @forward
+    // WIN-SAME: ptr{{.*}} %dst)
+    // WIN: [[RETURNED:%[_0-9]+]] = tail call <16 x i8> @extern_ret()
+    // WIN: store <16 x i8> [[RETURNED]], ptr %dst
+    // WIN: ret void
+    unsafe { *dst = extern_ret() };
+}
+
+#[repr(C)]
+struct RetAggregate {
+    a: i32,
+    b: i128,
+}
+
+#[no_mangle]
+pub extern "C" fn ret_aggregate(_arg0: u32, arg1: i128) -> RetAggregate {
+    // CHECK-LABEL: @ret_aggregate(
+    // Aggregates should also be returned indirectly
+    // WIN-SAME: ptr{{.*}}sret([32 x i8]){{.*}}[[RET:%[_0-9]+]], i32{{.*}}%_arg0, ptr{{.*}}%arg1)
+    // WIN: [[LOADED:%[_0-9]+]] = load i128, ptr %arg1
+    // WIN: [[GEP:%[_0-9]+]] = getelementptr{{.*}}, ptr [[RET]]
+    // WIN: store i128 [[LOADED]], ptr [[GEP]]
+    // WIN: ret void
+    RetAggregate { a: 1, b: arg1 }
+}