Skip to content

Commit

Permalink
aarch64: Avoid a copy in the extract_vector lowering (#6252)
Browse files Browse the repository at this point in the history
* Avoid a move in the aarch64 lowering of extract_vector

* Update tests
  • Loading branch information
elliottt authored Apr 20, 2023
1 parent b667f5f commit 7ad2fe3
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 195 deletions.
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@

;;; Rules for `extract_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (extract_vector x 0))
(value_reg (fpu_move_128 (put_in_reg x))))
(value_reg (put_in_reg x)))

;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
180 changes: 90 additions & 90 deletions cranelift/filetests/filetests/isa/aarch64/dynamic-simd-narrow.clif
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v3.4h, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; sqxtn v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; sqxtn v0.8b, v2.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.4h, w0
; mov v3.d[1], v3.d[0]
; sqxtn v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[0]
; sqxtn v0.8b, v2.8h
; ret

function %snarrow_i16x8(i16) -> i8x16 {
Expand All @@ -43,16 +43,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v5.8h, w0
; sqxtn v0.8b, v5.8h
; sqxtn2 v0.16b, v0.16b, v5.8h
; dup v4.8h, w0
; sqxtn v0.8b, v4.8h
; sqxtn2 v0.16b, v0.16b, v4.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.8h, w0
; sqxtn v0.8b, v5.8h
; sqxtn2 v0.16b, v5.8h
; dup v4.8h, w0
; sqxtn v0.8b, v4.8h
; sqxtn2 v0.16b, v4.8h
; ret

function %snarrow_i32x2(i32) -> i16x4 {
Expand All @@ -70,16 +70,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v3.2s, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; sqxtn v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; sqxtn v0.4h, v2.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.2s, w0
; mov v3.d[1], v3.d[0]
; sqxtn v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[0]
; sqxtn v0.4h, v2.4s
; ret

function %snarrow_i32x4(i32) -> i16x8 {
Expand All @@ -97,16 +97,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v5.4s, w0
; sqxtn v0.4h, v5.4s
; sqxtn2 v0.8h, v0.8h, v5.4s
; dup v4.4s, w0
; sqxtn v0.4h, v4.4s
; sqxtn2 v0.8h, v0.8h, v4.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.4s, w0
; sqxtn v0.4h, v5.4s
; sqxtn2 v0.8h, v5.4s
; dup v4.4s, w0
; sqxtn v0.4h, v4.4s
; sqxtn2 v0.8h, v4.4s
; ret

function %snarrow_i64x2(i64) -> i32x4 {
Expand All @@ -124,16 +124,16 @@ block0(v0: i64):

; VCode:
; block0:
; dup v5.2d, x0
; sqxtn v0.2s, v5.2d
; sqxtn2 v0.4s, v0.4s, v5.2d
; dup v4.2d, x0
; sqxtn v0.2s, v4.2d
; sqxtn2 v0.4s, v0.4s, v4.2d
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.2d, x0
; sqxtn v0.2s, v5.2d
; sqxtn2 v0.4s, v5.2d
; dup v4.2d, x0
; sqxtn v0.2s, v4.2d
; sqxtn2 v0.4s, v4.2d
; ret

function %unarrow_i16x4(i16) -> i8x8 {
Expand All @@ -151,16 +151,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v3.4h, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; sqxtun v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; sqxtun v0.8b, v2.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.4h, w0
; mov v3.d[1], v3.d[0]
; sqxtun v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[0]
; sqxtun v0.8b, v2.8h
; ret

function %unarrow_i16x8(i16) -> i8x16 {
Expand All @@ -178,16 +178,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v5.8h, w0
; sqxtun v0.8b, v5.8h
; sqxtun2 v0.16b, v0.16b, v5.8h
; dup v4.8h, w0
; sqxtun v0.8b, v4.8h
; sqxtun2 v0.16b, v0.16b, v4.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.8h, w0
; sqxtun v0.8b, v5.8h
; sqxtun2 v0.16b, v5.8h
; dup v4.8h, w0
; sqxtun v0.8b, v4.8h
; sqxtun2 v0.16b, v4.8h
; ret

function %unarrow_i32x2(i32) -> i16x4 {
Expand All @@ -205,16 +205,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v3.2s, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; sqxtun v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; sqxtun v0.4h, v2.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.2s, w0
; mov v3.d[1], v3.d[0]
; sqxtun v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[0]
; sqxtun v0.4h, v2.4s
; ret

function %unarrow_i32x4(i32) -> i16x8 {
Expand All @@ -232,16 +232,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v5.4s, w0
; sqxtun v0.4h, v5.4s
; sqxtun2 v0.8h, v0.8h, v5.4s
; dup v4.4s, w0
; sqxtun v0.4h, v4.4s
; sqxtun2 v0.8h, v0.8h, v4.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.4s, w0
; sqxtun v0.4h, v5.4s
; sqxtun2 v0.8h, v5.4s
; dup v4.4s, w0
; sqxtun v0.4h, v4.4s
; sqxtun2 v0.8h, v4.4s
; ret

function %unarrow_i64x2(i64) -> i32x4 {
Expand All @@ -259,16 +259,16 @@ block0(v0: i64):

; VCode:
; block0:
; dup v5.2d, x0
; sqxtun v0.2s, v5.2d
; sqxtun2 v0.4s, v0.4s, v5.2d
; dup v4.2d, x0
; sqxtun v0.2s, v4.2d
; sqxtun2 v0.4s, v0.4s, v4.2d
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.2d, x0
; sqxtun v0.2s, v5.2d
; sqxtun2 v0.4s, v5.2d
; dup v4.2d, x0
; sqxtun v0.2s, v4.2d
; sqxtun2 v0.4s, v4.2d
; ret

function %uunarrow_i16x4(i16) -> i8x8 {
Expand All @@ -286,16 +286,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v3.4h, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; uqxtn v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; uqxtn v0.8b, v2.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.4h, w0
; mov v3.d[1], v3.d[0]
; uqxtn v0.8b, v3.8h
; dup v2.4h, w0
; mov v2.d[1], v2.d[0]
; uqxtn v0.8b, v2.8h
; ret

function %uunarrow_i16x8(i16) -> i8x16 {
Expand All @@ -313,16 +313,16 @@ block0(v0: i16):

; VCode:
; block0:
; dup v5.8h, w0
; uqxtn v0.8b, v5.8h
; uqxtn2 v0.16b, v0.16b, v5.8h
; dup v4.8h, w0
; uqxtn v0.8b, v4.8h
; uqxtn2 v0.16b, v0.16b, v4.8h
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.8h, w0
; uqxtn v0.8b, v5.8h
; uqxtn2 v0.16b, v5.8h
; dup v4.8h, w0
; uqxtn v0.8b, v4.8h
; uqxtn2 v0.16b, v4.8h
; ret

function %uunarrow_i32x2(i32) -> i16x4 {
Expand All @@ -340,16 +340,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v3.2s, w0
; mov v3.d[1], v3.d[1], v3.d[0]
; uqxtn v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[1], v2.d[0]
; uqxtn v0.4h, v2.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v3.2s, w0
; mov v3.d[1], v3.d[0]
; uqxtn v0.4h, v3.4s
; dup v2.2s, w0
; mov v2.d[1], v2.d[0]
; uqxtn v0.4h, v2.4s
; ret

function %uunarrow_i32x4(i32) -> i16x8 {
Expand All @@ -367,16 +367,16 @@ block0(v0: i32):

; VCode:
; block0:
; dup v5.4s, w0
; uqxtn v0.4h, v5.4s
; uqxtn2 v0.8h, v0.8h, v5.4s
; dup v4.4s, w0
; uqxtn v0.4h, v4.4s
; uqxtn2 v0.8h, v0.8h, v4.4s
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.4s, w0
; uqxtn v0.4h, v5.4s
; uqxtn2 v0.8h, v5.4s
; dup v4.4s, w0
; uqxtn v0.4h, v4.4s
; uqxtn2 v0.8h, v4.4s
; ret

function %uunarrow_i64x2(i64) -> i32x4 {
Expand All @@ -394,15 +394,15 @@ block0(v0: i64):

; VCode:
; block0:
; dup v5.2d, x0
; uqxtn v0.2s, v5.2d
; uqxtn2 v0.4s, v0.4s, v5.2d
; dup v4.2d, x0
; uqxtn v0.2s, v4.2d
; uqxtn2 v0.4s, v0.4s, v4.2d
; ret
;
; Disassembled:
; block0: ; offset 0x0
; dup v5.2d, x0
; uqxtn v0.2s, v5.2d
; uqxtn2 v0.4s, v5.2d
; dup v4.2d, x0
; uqxtn v0.2s, v4.2d
; uqxtn2 v0.4s, v4.2d
; ret

Loading

0 comments on commit 7ad2fe3

Please sign in to comment.