diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 5904f51b209e..3626bc8e46e0 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -308,6 +308,15 @@ impl<'a> dyn TargetIsa + 'a { } } + /// Returns the minimum symbol alignment for this ISA. + pub fn symbol_alignment(&self) -> u64 { + match self.triple().architecture { + // All symbols need to be aligned to at least 2 on s390x. + Architecture::S390x => 2, + _ => 1, + } + } + /// Get the pointer type of this ISA. pub fn pointer_type(&self) -> ir::Type { ir::Type::int(self.pointer_bits() as u16).unwrap() diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 62f3736fe10c..0e7e65352915 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -11,6 +11,20 @@ use crate::trace; use core::convert::TryFrom; use regalloc2::Allocation; +/// Type(s) of memory instructions available for mem_finalize. +pub struct MemInstType { + /// True if 12-bit unsigned displacement is supported. + pub have_d12: bool, + /// True if 20-bit signed displacement is supported. + pub have_d20: bool, + /// True if PC-relative addressing is supported (memory access). + pub have_pcrel: bool, + /// True if PC-relative addressing is supported (load address). + pub have_unaligned_pcrel: bool, + /// True if an index register is supported. + pub have_index: bool, +} + /// Memory addressing mode finalization: convert "special" modes (e.g., /// generic arbitrary stack offset) into real addressing modes, possibly by /// emitting some helper instructions that come immediately before the use @@ -18,10 +32,7 @@ use regalloc2::Allocation; pub fn mem_finalize( mem: &MemArg, state: &EmitState, - have_d12: bool, - have_d20: bool, - have_pcrel: bool, - have_index: bool, + mi: MemInstType, ) -> (SmallVec<[Inst; 4]>, MemArg) { let mut insts = SmallVec::new(); @@ -70,9 +81,10 @@ pub fn mem_finalize( // If this addressing mode cannot be handled by the instruction, use load-address. let need_load_address = match &mem { - &MemArg::Label { .. } | &MemArg::Symbol { .. } if !have_pcrel => true, - &MemArg::BXD20 { .. } if !have_d20 => true, - &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !have_index => { + &MemArg::Label { .. } | &MemArg::Symbol { .. } if !mi.have_pcrel => true, + &MemArg::Symbol { flags, .. } if !mi.have_unaligned_pcrel && !flags.aligned() => true, + &MemArg::BXD20 { .. } if !mi.have_d20 => true, + &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !mi.have_index => { index != zero_reg() } _ => false, @@ -93,8 +105,8 @@ pub fn mem_finalize( index, disp, flags, - } if !have_d12 => { - assert!(have_d20); + } if !mi.have_d12 => { + assert!(mi.have_d20); MemArg::BXD20 { base, index, @@ -122,10 +134,13 @@ pub fn mem_emit( let (mem_insts, mem) = mem_finalize( mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: opcode_ril.is_some() && !add_trap, + have_index: true, + }, ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -190,10 +205,13 @@ pub fn mem_rs_emit( let (mem_insts, mem) = mem_finalize( mem, state, - opcode_rs.is_some(), - opcode_rsy.is_some(), - false, - false, + MemInstType { + have_d12: opcode_rs.is_some(), + have_d20: opcode_rsy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); @@ -236,7 +254,17 @@ pub fn mem_imm8_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } @@ -274,7 +302,17 @@ pub fn mem_imm16_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } @@ -336,7 +374,17 @@ pub fn mem_vrx_emit( emit_info: &EmitInfo, state: &mut EmitState, ) { - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + let (mem_insts, mem) = mem_finalize( + mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); for inst in mem_insts.into_iter() { inst.emit(&[], sink, emit_info, state); } diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 60e148cd8db8..48d1cfbe66dc 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1168,15 +1168,8 @@ impl MachInst for Inst { //============================================================================= // Pretty-printing of instructions. -fn mem_finalize_for_show( - mem: &MemArg, - state: &EmitState, - have_d12: bool, - have_d20: bool, - have_pcrel: bool, - have_index: bool, -) -> (String, MemArg) { - let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index); +fn mem_finalize_for_show(mem: &MemArg, state: &EmitState, mi: MemInstType) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(mem, state, mi); let mut mem_str = mem_insts .into_iter() .map(|inst| { @@ -1340,10 +1333,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - false, - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1600,10 +1596,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1704,7 +1703,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let rn = pretty_print_reg(rn, allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem) } @@ -1721,10 +1730,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rs.is_some(), - opcode_rsy.is_some(), - false, - false, + MemInstType { + have_d12: opcode_rs.is_some(), + have_d20: opcode_rsy.is_some(), + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rs, @@ -1775,10 +1787,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1812,10 +1827,13 @@ impl Inst { let (mem_str, mem) = mem_finalize_for_show( &mem, state, - opcode_rx.is_some(), - opcode_rxy.is_some(), - opcode_ril.is_some(), - true, + MemInstType { + have_d12: opcode_rx.is_some(), + have_d20: opcode_rxy.is_some(), + have_pcrel: opcode_ril.is_some(), + have_unaligned_pcrel: false, + have_index: true, + }, ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, @@ -1829,7 +1847,17 @@ impl Inst { } &Inst::StoreImm8 { imm, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => "mvi", &MemArg::BXD20 { .. } => "mviy", @@ -1843,7 +1871,17 @@ impl Inst { | &Inst::StoreImm32SExt16 { imm, ref mem } | &Inst::StoreImm64SExt16 { imm, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let op = match self { &Inst::StoreImm16 { .. } => "mvhhi", &Inst::StoreImm32SExt16 { .. } => "mvhi", @@ -1872,7 +1910,17 @@ impl Inst { } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let rt = pretty_print_reg(rt.to_reg(), &mut empty_allocs); let rt2 = pretty_print_reg(rt2.to_reg(), &mut empty_allocs); let mem = mem.pretty_print_default(); @@ -1880,7 +1928,17 @@ impl Inst { } &Inst::StoreMultiple64 { rt, rt2, ref mem } => { let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: false, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: false, + }, + ); let rt = pretty_print_reg(rt, &mut empty_allocs); let rt2 = pretty_print_reg(rt2, &mut empty_allocs); let mem = mem.pretty_print_default(); @@ -2559,7 +2617,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2585,7 +2653,17 @@ impl Inst { let rd = pretty_print_reg(rd, allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2604,7 +2682,17 @@ impl Inst { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, opcode, rd, mem) } @@ -2732,8 +2820,17 @@ impl Inst { let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); if lane_imm == 0 && rd_fpr.is_some() && opcode_rx.is_some() { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, true, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, @@ -2742,8 +2839,17 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) } else { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm) } @@ -2774,8 +2880,17 @@ impl Inst { let (rd, rd_fpr) = pretty_print_fpr(rd, allocs); let mem = mem.with_allocs(allocs); if lane_imm == 0 && rd_fpr.is_some() && opcode_rx.is_some() { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, true, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, @@ -2784,8 +2899,17 @@ impl Inst { let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem) } else { - let (mem_str, mem) = - mem_finalize_for_show(&mem, state, true, false, false, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: false, + have_pcrel: false, + have_unaligned_pcrel: false, + have_index: true, + }, + ); let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, opcode_vrx, rd, mem, lane_imm,) } @@ -3011,7 +3135,17 @@ impl Inst { &Inst::LoadAddr { rd, ref mem } => { let rd = pretty_print_reg(rd.to_reg(), allocs); let mem = mem.with_allocs(allocs); - let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, true, true); + let (mem_str, mem) = mem_finalize_for_show( + &mem, + state, + MemInstType { + have_d12: true, + have_d20: true, + have_pcrel: true, + have_unaligned_pcrel: true, + have_index: true, + }, + ); let op = match &mem { &MemArg::BXD12 { .. } => "la", &MemArg::BXD20 { .. } => "lay", diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif index fa493bcdd0e6..539fee2c536f 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif @@ -15,7 +15,7 @@ function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i64 little v0 + v1 = atomic_load.i64 aligned little v0 return v1 } @@ -37,7 +37,7 @@ function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i32 little v0 + v1 = atomic_load.i32 aligned little v0 return v1 } @@ -59,7 +59,7 @@ function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i16 little v0 + v1 = atomic_load.i16 aligned little v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif index 673577633bb7..dfec456e9816 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif @@ -15,7 +15,7 @@ function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i64 v0 + v1 = atomic_load.i64 aligned v0 return v1 } @@ -37,7 +37,7 @@ function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i32 v0 + v1 = atomic_load.i32 aligned v0 return v1 } @@ -59,7 +59,7 @@ function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = atomic_load.i16 v0 + v1 = atomic_load.i16 aligned v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif index 1f83d1e81ec6..8e9fd78c9a6a 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif @@ -16,7 +16,7 @@ function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - atomic_store.i64 little v0, v1 + atomic_store.i64 aligned little v0, v1 return } @@ -53,7 +53,7 @@ function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - atomic_store.i32 little v0, v1 + atomic_store.i32 aligned little v0, v1 return } @@ -90,7 +90,7 @@ function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - atomic_store.i16 little v0, v1 + atomic_store.i16 aligned little v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif index f536779be3e1..9c2944745c2b 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif @@ -16,7 +16,7 @@ function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - atomic_store.i64 v0, v1 + atomic_store.i64 aligned v0, v1 return } @@ -52,7 +52,7 @@ function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - atomic_store.i32 v0, v1 + atomic_store.i32 aligned v0, v1 return } @@ -88,7 +88,7 @@ function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - atomic_store.i16 v0, v1 + atomic_store.i16 aligned v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif index 6d1c2b0ce178..66572b233a52 100644 --- a/cranelift/filetests/filetests/isa/s390x/icmp.clif +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -69,7 +69,7 @@ function %icmp_slt_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = load.i64 v1 + v2 = load.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -97,7 +97,7 @@ function %icmp_slt_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = sload16.i64 v1 + v2 = sload16.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -125,7 +125,7 @@ function %icmp_slt_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = sload32.i64 v1 + v2 = sload32.i64 aligned v1 v3 = icmp.i64 slt v0, v2 return v3 } @@ -204,7 +204,7 @@ function %icmp_slt_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = load.i32 v1 + v2 = load.i32 aligned v1 v3 = icmp.i32 slt v0, v2 return v3 } @@ -245,7 +245,7 @@ function %icmp_slt_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = sload16.i32 v1 + v2 = sload16.i32 aligned v1 v3 = icmp.i32 slt v0, v2 return v3 } @@ -303,7 +303,7 @@ function %icmp_slt_i16_sym(i16) -> b1 { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - v2 = load.i16 v1 + v2 = load.i16 aligned v1 v3 = icmp.i16 slt v0, v2 return v3 } @@ -415,7 +415,7 @@ function %icmp_ult_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = load.i64 v1 + v2 = load.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -443,7 +443,7 @@ function %icmp_ult_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = uload32.i64 v1 + v2 = uload32.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -472,7 +472,7 @@ function %icmp_ult_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - v2 = uload16.i64 v1 + v2 = uload16.i64 aligned v1 v3 = icmp.i64 ult v0, v2 return v3 } @@ -538,7 +538,7 @@ function %icmp_ult_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = load.i32 v1 + v2 = load.i32 aligned v1 v3 = icmp.i32 ult v0, v2 return v3 } @@ -567,7 +567,7 @@ function %icmp_ult_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - v2 = uload16.i32 v1 + v2 = uload16.i32 aligned v1 v3 = icmp.i32 ult v0, v2 return v3 } @@ -627,7 +627,7 @@ function %icmp_ult_i16_mem(i16) -> b1 { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - v2 = load.i16 v1 + v2 = load.i16 aligned v1 v3 = icmp.i16 ult v0, v2 return v3 } diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif index 876e929f773f..329f87cd3066 100644 --- a/cranelift/filetests/filetests/isa/s390x/load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -15,7 +15,7 @@ function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i64 little v0 + v1 = load.i64 aligned little v0 return v1 } @@ -58,7 +58,7 @@ function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i64 little v0 + v1 = uload16.i64 aligned little v0 return v1 } @@ -82,7 +82,7 @@ function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i64 little v0 + v1 = sload16.i64 aligned little v0 return v1 } @@ -106,7 +106,7 @@ function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload32.i64 little v0 + v1 = uload32.i64 aligned little v0 return v1 } @@ -130,7 +130,7 @@ function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload32.i64 little v0 + v1 = sload32.i64 aligned little v0 return v1 } @@ -153,7 +153,7 @@ function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i32 little v0 + v1 = load.i32 aligned little v0 return v1 } @@ -196,7 +196,7 @@ function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i32 little v0 + v1 = uload16.i32 aligned little v0 return v1 } @@ -220,7 +220,7 @@ function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i32 little v0 + v1 = sload16.i32 aligned little v0 return v1 } @@ -243,7 +243,7 @@ function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i16 little v0 + v1 = load.i16 aligned little v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif index 1d0a4a10c738..8e53baae36af 100644 --- a/cranelift/filetests/filetests/isa/s390x/load.clif +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -15,7 +15,7 @@ function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i64 v0 + v1 = load.i64 aligned v0 return v1 } @@ -57,7 +57,7 @@ function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i64 v0 + v1 = uload16.i64 aligned v0 return v1 } @@ -79,7 +79,7 @@ function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i64 v0 + v1 = sload16.i64 aligned v0 return v1 } @@ -101,7 +101,7 @@ function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload32.i64 v0 + v1 = uload32.i64 aligned v0 return v1 } @@ -123,7 +123,7 @@ function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload32.i64 v0 + v1 = sload32.i64 aligned v0 return v1 } @@ -145,7 +145,7 @@ function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i32 v0 + v1 = load.i32 aligned v0 return v1 } @@ -197,7 +197,7 @@ function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = uload16.i32 v0 + v1 = uload16.i32 aligned v0 return v1 } @@ -229,7 +229,7 @@ function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = sload16.i32 v0 + v1 = sload16.i32 aligned v0 return v1 } @@ -251,7 +251,7 @@ function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym block0: v0 = symbol_value.i64 gv0 - v1 = load.i16 v0 + v1 = load.i16 aligned v0 return v1 } diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif index 79b172ff7261..1ceebec3d263 100644 --- a/cranelift/filetests/filetests/isa/s390x/store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -15,7 +15,7 @@ function %store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - store.i64 little v0, v1 + store.i64 aligned little v0, v1 return } @@ -70,7 +70,7 @@ function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore16.i64 little v0, v1 + istore16.i64 aligned little v0, v1 return } @@ -103,7 +103,7 @@ function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore32.i64 little v0, v1 + istore32.i64 aligned little v0, v1 return } @@ -137,7 +137,7 @@ function %store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - store.i32 little v0, v1 + store.i32 aligned little v0, v1 return } @@ -192,7 +192,7 @@ function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - istore16.i32 little v0, v1 + istore16.i32 aligned little v0, v1 return } @@ -225,7 +225,7 @@ function %store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - store.i16 little v0, v1 + store.i16 aligned little v0, v1 return } diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif index b0cea254e4a7..15c09b76b748 100644 --- a/cranelift/filetests/filetests/isa/s390x/store.clif +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -15,7 +15,7 @@ function %store_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - store.i64 v0, v1 + store.i64 aligned v0, v1 return } @@ -69,7 +69,7 @@ function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore16.i64 v0, v1 + istore16.i64 aligned v0, v1 return } @@ -102,7 +102,7 @@ function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym block0(v0: i64): v1 = symbol_value.i64 gv0 - istore32.i64 v0, v1 + istore32.i64 aligned v0, v1 return } @@ -135,7 +135,7 @@ function %store_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - store.i32 v0, v1 + store.i32 aligned v0, v1 return } @@ -199,7 +199,7 @@ function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym block0(v0: i32): v1 = symbol_value.i64 gv0 - istore16.i32 v0, v1 + istore16.i32 aligned v0, v1 return } @@ -232,7 +232,7 @@ function %store_i16_sym(i16) { gv0 = symbol colocated %sym block0(v0: i16): v1 = symbol_value.i64 gv0 - store.i16 v0, v1 + store.i16 aligned v0, v1 return } diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 644c38e729b3..21255aa86a1d 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -810,7 +810,9 @@ impl<'a> FunctionBuilder<'a> { return; } - flags.set_aligned(); + if u64::from(src_align) >= access_size && u64::from(dest_align) >= access_size { + flags.set_aligned(); + } // Load all of the memory first. This is necessary in case `dest` overlaps. // It can also improve performance a bit. @@ -897,7 +899,9 @@ impl<'a> FunctionBuilder<'a> { let size = self.ins().iconst(config.pointer_type(), size as i64); self.call_memset(config, buffer, ch, size); } else { - flags.set_aligned(); + if u64::from(buffer_align) >= access_size { + flags.set_aligned(); + } let ch = u64::from(ch); let raw_value = if int_type == types::I64 { diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index 88cca41bbe64..33ff8d9982d8 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -342,22 +342,18 @@ impl Module for ObjectModule { } *defined = true; + let align = std::cmp::max(self.function_alignment, self.isa.symbol_alignment()); let (section, offset) = if self.per_function_section { let symbol_name = self.object.symbol(symbol).name.clone(); - let (section, offset) = self.object.add_subsection( - StandardSection::Text, - &symbol_name, - bytes, - self.function_alignment, - ); + let (section, offset) = + self.object + .add_subsection(StandardSection::Text, &symbol_name, bytes, align); self.object.symbol_mut(symbol).section = SymbolSection::Section(section); self.object.symbol_mut(symbol).value = offset; (section, offset) } else { let section = self.object.section_id(StandardSection::Text); - let offset = - self.object - .add_symbol_data(symbol, section, bytes, self.function_alignment); + let offset = self.object.add_symbol_data(symbol, section, bytes, align); (section, offset) }; @@ -446,7 +442,7 @@ impl Module for ObjectModule { ) }; - let align = align.unwrap_or(1); + let align = std::cmp::max(align.unwrap_or(1), self.isa.symbol_alignment()); let offset = match *init { Init::Uninitialized => { panic!("data is not initialized yet");