From a916788ab460f0fad4865235a697ff82d6747e57 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand <ulrich.weigand@de.ibm.com> Date: Tue, 16 Aug 2022 21:39:42 +0200 Subject: [PATCH] Fix mis-aligned access issues with s390x (#4702) This fixes two problems: minimum symbol alignment for the LARL instruction, and alignment requirements for LRL/LGRL etc. The first problem is that the LARL instruction used to load a symbol address (PC relative) requires that the target symbol is at least 2-byte aligned. This is always guaranteed for code symbols (all instructions must be 2-aligned anyway), but not necessarily for data symbols. Other s390x compilers fix this problem by ensuring that all global symbols are always emitted with a minimum 2-byte alignment. This patch introduces an equivalent mechanism for cranelift: - Add a symbol_alignment routine to TargetIsa, similar to the existing code_section_alignment routine. - Respect symbol_alignment as minimum alignment for all symbols emitted in the object backend (code and data). The second problem is that PC-relative instructions that directly *access* data (like LRL/LGRL, STRL/STGRL etc.) not only have the 2-byte requirement like LARL, but actually require that their memory operand is *naturally* aligned (i.e. alignment is at least the size of the access). This property (natural alignment for memory accesses) is supposed to be provided by the "aligned" flag in MemFlags; however, this is not implemented correctly at the moment. To fix this, this patch: - Only emits PC-relative memory access instructions if the "aligned" flag is set in the associated MemFlags. - Fixes a bug in emit_small_memory_copy and emit_small_memset which currently set the aligned flag unconditionally, ignoring the actual alignment info passed by their caller. Tested with wasmtime and cg_clif. --- cranelift/codegen/src/isa/mod.rs | 9 + cranelift/codegen/src/isa/s390x/inst/emit.rs | 88 +++++-- cranelift/codegen/src/isa/s390x/inst/mod.rs | 226 ++++++++++++++---- .../isa/s390x/atomic_load-little.clif | 6 +- .../filetests/isa/s390x/atomic_load.clif | 6 +- .../isa/s390x/atomic_store-little.clif | 6 +- .../filetests/isa/s390x/atomic_store.clif | 6 +- .../filetests/filetests/isa/s390x/icmp.clif | 24 +- .../filetests/isa/s390x/load-little.clif | 18 +- .../filetests/filetests/isa/s390x/load.clif | 18 +- .../filetests/isa/s390x/store-little.clif | 12 +- .../filetests/filetests/isa/s390x/store.clif | 12 +- cranelift/frontend/src/frontend.rs | 8 +- cranelift/object/src/backend.rs | 16 +- 14 files changed, 323 insertions(+), 132 deletions(-) diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index f3eb30a7cc01..b9496a14a414 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -312,6 +312,15 @@ impl<'a> dyn TargetIsa + 'a { } } + /// Returns the minimum symbol alignment for this ISA. + pub fn symbol_alignment(&self) -> u64 { + match self.triple().architecture { + // All symbols need to be aligned to at least 2 on s390x. + Architecture::S390x => 2, + _ => 1, + } + } + /// Get the pointer type of this ISA. pub fn pointer_type(&self) -> ir::Type { ir::Type::int(self.pointer_bits() as u16).unwrap() diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index bbbfb1f318e2..2877610cf893 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -11,6 +11,20 @@ use crate::trace; use core::convert::TryFrom; use regalloc2::Allocation; +/// Type(s) of memory instructions available for mem_finalize. +pub struct MemInstType { + /// True if 12-bit unsigned displacement is supported. + pub have_d12: bool, + /// True if 20-bit signed displacement is supported. + pub have_d20: bool, + /// True if PC-relative addressing is supported (memory access). + pub have_pcrel: bool, + /// True if PC-relative addressing is supported (load address). + pub have_unaligned_pcrel: bool, + /// True if an index register is supported. + pub have_index: bool, +} + /// Memory addressing mode finalization: convert "special" modes (e.g., /// generic arbitrary stack offset) into real addressing modes, possibly by /// emitting some helper instructions that come immediately before the use @@ -18,10 +32,7 @@ use regalloc2::Allocation; pub fn mem_finalize( mem: &MemArg, state: &EmitState, - have_d12: bool, - have_d20: bool, - have_pcrel: bool, - have_index: bool, + mi: MemInstType, ) -> (SmallVec<[Inst; 4]>, MemArg) { let mut insts = SmallVec::new(); @@ -70,9 +81,10 @@ pub fn mem_finalize( // If this addressing mode cannot be handled by the instruction, use load-address. let need_load_address = match &mem { - &MemArg::Label { .. } | &MemArg::Symbol { .. } if !have_pcrel => true, - &MemArg::BXD20 { .. } if !have_d20 => true, - &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !have_index => { + &MemArg::Label { .. } | &MemArg::Symbol { .. } if !mi.have_pcrel => true, + &MemArg::Symbol { flags, .. } if !mi.have_unaligned_pcrel && !flags.aligned() => true, + &MemArg::BXD20 { .. } if !mi.have_d20 => true, + &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !mi.have_index => { index != zero_reg() } _ => false, @@ -93,8 +105,8 @@ pub fn mem_finalize( index, disp, flags, - } if !have_d12 => { - assert!(have_d20); + } if !mi.have_d12 => { + assert!(mi.have_d20); MemArg::BXD20 { base, index, @@ -122,10 +134,13 @@ pub fn mem_emit( let (mem_insts, mem) = mem_finalize( mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: opcode_ril.is_some() && !add_trap, + have_index: true, + }, ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -190,10 +205,13 @@ pub fn mem_rs_emit( let (mem_insts, mem) = mem_finalize( mem, state, - opcode_rs.is_some(), - opcode_rsy.is_some(), - false, - false, + MemInstType { + have_d12: opcode_rs.is_some(), + have_d20: opcode_rsy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -236,7 +254,17 @@ pub fn mem_imm8_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } @@ -274,7 +302,17 @@ pub fn mem_imm16_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } @@ -336,7 +374,17 @@ pub fn mem_vrx_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index c557cfe2021a..36667baf52ad 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1170,15 +1170,8 @@ impl MachInst for Inst { //============================================================================= // Pretty-printing of instructions. -fn mem_finalize_for_show( - mem: &MemArg, - state: &EmitState, - have_d12: bool, - have_d20: bool, - have_pcrel: bool, - have_index: bool, -) -> (String, MemArg) { - let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index); +fn mem_finalize_for_show(mem: &MemArg, state: &EmitState, mi: MemInstType) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(mem, state, mi); let mut mem_str = mem_insts .into_iter() .map(|inst| { @@ -1342,10 +1335,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - false, - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1602,10 +1598,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1706,7 +1705,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem) } @@ -1723,10 +1732,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rs.is_some(), - opcode_rsy.is_some(), - false, - false, + MemInstType { + have_d12: opcode_rs.is_some(), + have_d20: opcode_rsy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rs, @@ -1777,10 +1789,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1814,10 +1829,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1831,7 +1849,17 @@ impl Inst { } &Inst::StoreImm8 { imm, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => "mvi", &MemArg::BXD20 { .. } => "mviy", @@ -1845,7 +1873,17 @@ impl Inst { | &Inst::StoreImm32SExt16 { imm, ref mem } | &Inst::StoreImm64SExt16 { imm, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let op = match self { &Inst::StoreImm16 { .. } => "mvhhi", &Inst::StoreImm32SExt16 { .. } => "mvhi", @@ -1874,7 +1912,17 @@ impl Inst { } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let rt = pretty_print_reg(rt.to_reg(), &mut empty_allocs); let rt2 = pretty_print_reg(rt2.to_reg(), &mut empty_allocs); let mem = mem.pretty_print_default(); @@ -1882,7 +1930,17 @@ impl Inst { } &Inst::StoreMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let rt = pretty_print_reg(rt, &mut empty_allocs); let rt2 = pretty_print_reg(rt2, &mut empty_allocs); let mem = mem.pretty_print_default(); @@ -2561,7 +2619,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2587,7 +2655,17 @@ impl Inst { let rd = pretty_print_reg(rd, allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2606,7 +2684,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2734,8 +2822,17 @@ impl Inst { let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); if lane_imm == 0 && rd_fpr.is_some() && opcode_rx.is_some() { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, true, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, @@ -2744,8 +2841,17 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) } else { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm) } @@ -2776,8 +2882,17 @@ impl Inst { let (rd, rd_fpr) = pretty_print_fpr(rd, allocs); let mem = mem.with_allocs(allocs); if lane_imm == 0 && rd_fpr.is_some() && opcode_rx.is_some() { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, true, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, @@ -2786,8 +2901,17 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) } else { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm,) } @@ -3017,7 +3141,17 @@ impl Inst { &Inst::LoadAddr { rd, ref mem } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, true, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: true, + have_unaligned_pcrel: true, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => "la", &MemArg::BXD20 { .. } => "lay", diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif index fa493bcdd0e6..539fee2c536f 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif @@ -15,7 +15,7 @@ function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i64 little v0 + v1 = atomic_load.i64 aligned little v0 return v1 } @@ -37,7 +37,7 @@ function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i32 little v0 + v1 = atomic_load.i32 aligned little v0 return v1 } @@ -59,7 +59,7 @@ function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i16 little v0 + v1 = atomic_load.i16 aligned little v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif index 673577633bb7..dfec456e9816 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif @@ -15,7 +15,7 @@ function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i64 v0 + v1 = atomic_load.i64 aligned v0 return v1 } @@ -37,7 +37,7 @@ function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i32 v0 + v1 = atomic_load.i32 aligned v0 return v1 } @@ -59,7 +59,7 @@ function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i16 v0 + v1 = atomic_load.i16 aligned v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif index 1f83d1e81ec6..8e9fd78c9a6a 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif @@ -16,7 +16,7 @@ function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - atomic_store.i64 little v0, v1 + atomic_store.i64 aligned little v0, v1 return } @@ -53,7 +53,7 @@ function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - atomic_store.i32 little v0, v1 + atomic_store.i32 aligned little v0, v1 return } @@ -90,7 +90,7 @@ function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - atomic_store.i16 little v0, v1 + atomic_store.i16 aligned little v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif index f536779be3e1..9c2944745c2b 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif @@ -16,7 +16,7 @@ function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - atomic_store.i64 v0, v1 + atomic_store.i64 aligned v0, v1 return } @@ -52,7 +52,7 @@ function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - atomic_store.i32 v0, v1 + atomic_store.i32 aligned v0, v1 return } @@ -88,7 +88,7 @@ function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - atomic_store.i16 v0, v1 + atomic_store.i16 aligned v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif index 6d1c2b0ce178..66572b233a52 100644 --- a/cranelift/filetests/filetests/isa/s390x/icmp.clif +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -69,7 +69,7 @@ function %icmp_slt_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = load.i64 v1 + v2 = load.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -97,7 +97,7 @@ function %icmp_slt_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = sload16.i64 v1 + v2 = sload16.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -125,7 +125,7 @@ function %icmp_slt_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = sload32.i64 v1 + v2 = sload32.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -204,7 +204,7 @@ function %icmp_slt_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = load.i32 v1 + v2 = load.i32 aligned v1 v3 = icmp.i32 slt v0, v2 return v3 } @@ -245,7 +245,7 @@ function %icmp_slt_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = sload16.i32 v1 + v2 = sload16.i32 aligned v1 v3 = icmp.i32 slt v0, v2 return v3 } @@ -303,7 +303,7 @@ function %icmp_slt_i16_sym(i16) -> b1 { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - v2 = load.i16 v1 + v2 = load.i16 aligned v1 v3 = icmp.i16 slt v0, v2 return v3 } @@ -415,7 +415,7 @@ function %icmp_ult_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = load.i64 v1 + v2 = load.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -443,7 +443,7 @@ function %icmp_ult_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = uload32.i64 v1 + v2 = uload32.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -472,7 +472,7 @@ function %icmp_ult_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = uload16.i64 v1 + v2 = uload16.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -538,7 +538,7 @@ function %icmp_ult_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = load.i32 v1 + v2 = load.i32 aligned v1 v3 = icmp.i32 ult v0, v2 return v3 } @@ -567,7 +567,7 @@ function %icmp_ult_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = uload16.i32 v1 + v2 = uload16.i32 aligned v1 v3 = icmp.i32 ult v0, v2 return v3 } @@ -627,7 +627,7 @@ function %icmp_ult_i16_mem(i16) -> b1 { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - v2 = load.i16 v1 + v2 = load.i16 aligned v1 v3 = icmp.i16 ult v0, v2 return v3 } diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif index 876e929f773f..329f87cd3066 100644 --- a/cranelift/filetests/filetests/isa/s390x/load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -15,7 +15,7 @@ function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i64 little v0 + v1 = load.i64 aligned little v0 return v1 } @@ -58,7 +58,7 @@ function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i64 little v0 + v1 = uload16.i64 aligned little v0 return v1 } @@ -82,7 +82,7 @@ function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i64 little v0 + v1 = sload16.i64 aligned little v0 return v1 } @@ -106,7 +106,7 @@ function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload32.i64 little v0 + v1 = uload32.i64 aligned little v0 return v1 } @@ -130,7 +130,7 @@ function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload32.i64 little v0 + v1 = sload32.i64 aligned little v0 return v1 } @@ -153,7 +153,7 @@ function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i32 little v0 + v1 = load.i32 aligned little v0 return v1 } @@ -196,7 +196,7 @@ function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i32 little v0 + v1 = uload16.i32 aligned little v0 return v1 } @@ -220,7 +220,7 @@ function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i32 little v0 + v1 = sload16.i32 aligned little v0 return v1 } @@ -243,7 +243,7 @@ function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i16 little v0 + v1 = load.i16 aligned little v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif index 1d0a4a10c738..8e53baae36af 100644 --- a/cranelift/filetests/filetests/isa/s390x/load.clif +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -15,7 +15,7 @@ function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i64 v0 + v1 = load.i64 aligned v0 return v1 } @@ -57,7 +57,7 @@ function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i64 v0 + v1 = uload16.i64 aligned v0 return v1 } @@ -79,7 +79,7 @@ function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i64 v0 + v1 = sload16.i64 aligned v0 return v1 } @@ -101,7 +101,7 @@ function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload32.i64 v0 + v1 = uload32.i64 aligned v0 return v1 } @@ -123,7 +123,7 @@ function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload32.i64 v0 + v1 = sload32.i64 aligned v0 return v1 } @@ -145,7 +145,7 @@ function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i32 v0 + v1 = load.i32 aligned v0 return v1 } @@ -197,7 +197,7 @@ function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i32 v0 + v1 = uload16.i32 aligned v0 return v1 } @@ -229,7 +229,7 @@ function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i32 v0 + v1 = sload16.i32 aligned v0 return v1 } @@ -251,7 +251,7 @@ function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i16 v0 + v1 = load.i16 aligned v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif index 79b172ff7261..1ceebec3d263 100644 --- a/cranelift/filetests/filetests/isa/s390x/store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -15,7 +15,7 @@ function %store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - store.i64 little v0, v1 + store.i64 aligned little v0, v1 return } @@ -70,7 +70,7 @@ function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore16.i64 little v0, v1 + istore16.i64 aligned little v0, v1 return } @@ -103,7 +103,7 @@ function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore32.i64 little v0, v1 + istore32.i64 aligned little v0, v1 return } @@ -137,7 +137,7 @@ function %store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - store.i32 little v0, v1 + store.i32 aligned little v0, v1 return } @@ -192,7 +192,7 @@ function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - istore16.i32 little v0, v1 + istore16.i32 aligned little v0, v1 return } @@ -225,7 +225,7 @@ function %store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - store.i16 little v0, v1 + store.i16 aligned little v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif index b0cea254e4a7..15c09b76b748 100644 --- a/cranelift/filetests/filetests/isa/s390x/store.clif +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -15,7 +15,7 @@ function %store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - store.i64 v0, v1 + store.i64 aligned v0, v1 return } @@ -69,7 +69,7 @@ function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore16.i64 v0, v1 + istore16.i64 aligned v0, v1 return } @@ -102,7 +102,7 @@ function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore32.i64 v0, v1 + istore32.i64 aligned v0, v1 return } @@ -135,7 +135,7 @@ function %store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - store.i32 v0, v1 + store.i32 aligned v0, v1 return } @@ -199,7 +199,7 @@ function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - istore16.i32 v0, v1 + istore16.i32 aligned v0, v1 return } @@ -232,7 +232,7 @@ function %store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - store.i16 v0, v1 + store.i16 aligned v0, v1 return } diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 7a20d5c6e5e9..af2240ff6e69 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -816,7 +816,9 @@ impl<'a> FunctionBuilder<'a> { return; } - flags.set_aligned(); + if u64::from(src_align) >= access_size && u64::from(dest_align) >= access_size { + flags.set_aligned(); + } // Load all of the memory first. This is necessary in case `dest` overlaps. // It can also improve performance a bit. @@ -903,7 +905,9 @@ impl<'a> FunctionBuilder<'a> { let size = self.ins().iconst(config.pointer_type(), size as i64); self.call_memset(config, buffer, ch, size); } else { - flags.set_aligned(); + if u64::from(buffer_align) >= access_size { + flags.set_aligned(); + } let ch = u64::from(ch); let raw_value = if int_type == types::I64 { diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index 6f7a4e0af140..845b279986d6 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -348,22 +348,18 @@ impl Module for ObjectModule { } *defined = true; + let align = std::cmp::max(self.function_alignment, self.isa.symbol_alignment()); let (section, offset) = if self.per_function_section { let symbol_name = self.object.symbol(symbol).name.clone(); - let (section, offset) = self.object.add_subsection( - StandardSection::Text, - &symbol_name, - bytes, - self.function_alignment, - ); + let (section, offset) = + self.object + .add_subsection(StandardSection::Text, &symbol_name, bytes, align); self.object.symbol_mut(symbol).section = SymbolSection::Section(section); self.object.symbol_mut(symbol).value = offset; (section, offset) } else { let section = self.object.section_id(StandardSection::Text); - let offset = - self.object - .add_symbol_data(symbol, section, bytes, self.function_alignment); + let offset = self.object.add_symbol_data(symbol, section, bytes, align); (section, offset) }; @@ -452,7 +448,7 @@ impl Module for ObjectModule { ) }; - let align = align.unwrap_or(1); + let align = std::cmp::max(align.unwrap_or(1), self.isa.symbol_alignment()); let offset = match *init { Init::Uninitialized => { panic!("data is not initialized yet");