-
Notifications
You must be signed in to change notification settings - Fork 5.1k
[RISC-V] Move comparison transformations from codegen to lowering #118270
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…value when the value is relocatable
…ended like mulw does
8a04fbc
to
1e95bf9
Compare
@risc-vv /run |
RISC-V pull_request-CLR-QEMU: 9096 / 9127 (99.66%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-CLR-VF2: 9097 / 9127 (99.67%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-QEMU: 0 / 1 (0.00%)
report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-VF2: 0 / 56 (0.00%)
report.xml, report.md, failures.xml, testclr_details.tar.zst Build information and commandsGIT: |
The improvements come mostly from eliminating sign-extensions. The very few regressions come mostly from temp registers being allocated by JIT instead of using reserved registers (in case of branches) which sometimes results in additional moves. Ideally we don't want "reserved" registers and have all of them available for JIT so it'll stay this way. Diffs are based on 297,430 contexts (92,567 MinOpts, 204,863 FullOpts). MISSED contexts: 6 (0.00%) Overall (-143,812 bytes)
MinOpts (-39,656 bytes)
FullOpts (-104,156 bytes)
Example diffslinux.riscv64.Checked.1.mch-12 (-21.43%) : 155054.dasm - System.Text.UnicodeUtility:IsValidCodePoint(uint):bool (FullOpts)@@ -20,20 +20,17 @@ G_M5817_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M5817_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ sext.w a0, a0
lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- sext.w ra, a0
- sext.w a2, a1
- sltu a0, a2, ra
- xori a0, a0, 1
- ;; size=24 bbWeight=1 PerfScore 4.00
+ sltu a0, a0, a1
+ ;; size=12 bbWeight=1 PerfScore 2.00
G_M5817_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 56, prolog size 16, PerfScore 20.50, instruction count 13, allocated bytes for code 56 (MethodHash=99a6e946) for method System.Text.UnicodeUtility:IsValidCodePoint(uint):bool (FullOpts)
+; Total bytes of code 44, prolog size 16, PerfScore 18.50, instruction count 11, allocated bytes for code 44 (MethodHash=99a6e946) for method System.Text.UnicodeUtility:IsValidCodePoint(uint):bool (FullOpts)
; ============================================================
Unwind Info:
@@ -44,7 +41,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 14 (0x0000e) Actual length = 56 (0x000038)
+ Function Length : 11 (0x0000b) Actual length = 44 (0x00002c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -36 (-16.98%) : 150727.dasm - System.MemoryExtensions:g__IndexOfAnyExceptDefaultComparer|61_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)@@ -42,8 +42,8 @@ G_M910_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {},
G_M910_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
; byrRegs +[a0]
sext.w a2, zero
- sext.w t6, a1
- bge zero, t6, G_M910_IG08
+ sext.w a3, a1
+ bge zero, a3, G_M910_IG08
;; size=12 bbWeight=1 PerfScore 4.50
G_M910_IG03: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
slli.uw a3, a2, 3
@@ -52,46 +52,37 @@ G_M910_IG03: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byr
fld ft4, 0xD1FFAB1E(t6)
fmv.d ft5, ft4
feq.d a3, fa0, ft5
- sext.w t6, a3
- ; byrRegs -[t6]
- bnez t6, G_M910_IG07
- ;; size=28 bbWeight=3.96 PerfScore 43.56
+ bnez a3, G_M910_IG07
+ ;; size=24 bbWeight=3.96 PerfScore 41.58
G_M910_IG04: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
+ ; byrRegs -[t6]
feq.d a3, fa0, fa0
- sext.w t6, a3
- bnez t6, G_M910_IG05
+ bnez a3, G_M910_IG05
feq.d a3, ft5, ft5
- sext.w t6, a3
- beqz t6, G_M910_IG07
- ;; size=24 bbWeight=1.98 PerfScore 27.72
+ beqz a3, G_M910_IG07
+ ;; size=16 bbWeight=1.98 PerfScore 25.74
G_M910_IG05: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
fmv.d ft5, ft4
feq.d a3, fa1, ft5
- sext.w t6, a3
- bnez t6, G_M910_IG07
+ bnez a3, G_M910_IG07
feq.d a3, fa1, fa1
- sext.w t6, a3
- bnez t6, G_M910_IG06
+ bnez a3, G_M910_IG06
feq.d a3, ft5, ft5
- sext.w t6, a3
- beqz t6, G_M910_IG07
- ;; size=40 bbWeight=1.98 PerfScore 43.56
+ beqz a3, G_M910_IG07
+ ;; size=28 bbWeight=1.98 PerfScore 40.59
G_M910_IG06: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
feq.d a3, fa2, ft4
- sext.w t6, a3
- bnez t6, G_M910_IG07
+ bnez a3, G_M910_IG07
feq.d a3, fa2, fa2
- sext.w t6, a3
- bnez t6, G_M910_IG10
+ bnez a3, G_M910_IG10
feq.d a3, ft4, ft4
- sext.w t6, a3
- bnez t6, G_M910_IG10
- ;; size=36 bbWeight=1.98 PerfScore 41.58
+ bnez a3, G_M910_IG10
+ ;; size=24 bbWeight=1.98 PerfScore 38.61
G_M910_IG07: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
addiw a2, a2, 0xD1FFAB1E
- sext.w ra, a2
- sext.w t6, a1
- blt ra, t6, G_M910_IG03
+ sext.w a3, a2
+ sext.w a4, a1
+ blt a3, a4, G_M910_IG03
;; size=16 bbWeight=3.96 PerfScore 19.80
G_M910_IG08: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[a0]
@@ -111,7 +102,7 @@ G_M910_IG11: ; bbWeight=0.50, epilog, nogc, extend
addi sp, sp, 16
ret ;; size=16 bbWeight=0.50 PerfScore 3.75
-; Total bytes of code 212, prolog size 16, PerfScore 197.72, instruction count 53, allocated bytes for code 212 (MethodHash=e706fc71) for method System.MemoryExtensions:<IndexOfAnyExcept>g__IndexOfAnyExceptDefaultComparer|61_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)
+; Total bytes of code 176, prolog size 16, PerfScore 187.82, instruction count 44, allocated bytes for code 176 (MethodHash=e706fc71) for method System.MemoryExtensions:<IndexOfAnyExcept>g__IndexOfAnyExceptDefaultComparer|61_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)
; ============================================================
Unwind Info:
@@ -122,7 +113,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 53 (0x00035) Actual length = 212 (0x0000d4)
+ Function Length : 44 (0x0002c) Actual length = 176 (0x0000b0)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) -32 (-14.81%) : 150820.dasm - System.MemoryExtensions:g__IndexOfAnyDefaultComparer|108_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)@@ -42,8 +42,8 @@ G_M4144_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {}
G_M4144_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
; byrRegs +[a0]
sext.w a2, zero
- sext.w t6, a1
- bge zero, t6, G_M4144_IG10
+ sext.w a3, a1
+ bge zero, a3, G_M4144_IG10
;; size=12 bbWeight=1 PerfScore 4.50
G_M4144_IG03: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
slli.uw a3, a2, 3
@@ -52,51 +52,43 @@ G_M4144_IG03: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, by
fld ft4, 0xD1FFAB1E(t6)
fmv.d ft5, ft4
feq.d a3, fa0, ft5
- sext.w t6, a3
- ; byrRegs -[t6]
- bnez t6, G_M4144_IG12
+ bnez a3, G_M4144_IG12
feq.d a3, fa0, fa0
- sext.w t6, a3
- bnez t6, G_M4144_IG05
- ;; size=40 bbWeight=3.96 PerfScore 71.28
+ bnez a3, G_M4144_IG05
+ ;; size=32 bbWeight=3.96 PerfScore 67.32
G_M4144_IG04: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
+ ; byrRegs -[t6]
feq.d a3, ft5, ft5
- sext.w t6, a3
- beqz t6, G_M4144_IG12
- ;; size=12 bbWeight=1.98 PerfScore 13.86
+ beqz a3, G_M4144_IG12
+ ;; size=8 bbWeight=1.98 PerfScore 12.87
G_M4144_IG05: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
fmv.d ft5, ft4
feq.d a3, fa1, ft5
- sext.w t6, a3
- bnez t6, G_M4144_IG12
+ bnez a3, G_M4144_IG12
feq.d a3, fa1, fa1
- sext.w t6, a3
- bnez t6, G_M4144_IG07
- ;; size=28 bbWeight=3.96 PerfScore 59.40
+ bnez a3, G_M4144_IG07
+ ;; size=20 bbWeight=3.96 PerfScore 55.44
G_M4144_IG06: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
feq.d a3, ft5, ft5
- sext.w t6, a3
- beqz t6, G_M4144_IG12
- ;; size=12 bbWeight=1.98 PerfScore 13.86
+ beqz a3, G_M4144_IG12
+ ;; size=8 bbWeight=1.98 PerfScore 12.87
G_M4144_IG07: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
feq.d a3, fa2, ft4
- sext.w t6, a3
- bnez t6, G_M4144_IG12
+ bnez a3, G_M4144_IG12
feq.d a3, fa2, fa2
- sext.w t6, a3
- bnez t6, G_M4144_IG09
- ;; size=24 bbWeight=3.96 PerfScore 55.44
+ bnez a3, G_M4144_IG09
+ ;; size=16 bbWeight=3.96 PerfScore 51.48
G_M4144_IG08: ; bbWeight=1.98, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
feq.d a3, ft4, ft4
xori a3, a3, 1
- sext.w t6, a3
- bnez t6, G_M4144_IG12
+ sext.w a3, a3
+ bnez a3, G_M4144_IG12
;; size=16 bbWeight=1.98 PerfScore 14.85
G_M4144_IG09: ; bbWeight=3.96, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
addiw a2, a2, 0xD1FFAB1E
- sext.w ra, a2
- sext.w t6, a1
- blt ra, t6, G_M4144_IG03
+ sext.w a3, a2
+ sext.w a4, a1
+ blt a3, a4, G_M4144_IG03
;; size=16 bbWeight=3.96 PerfScore 19.80
G_M4144_IG10: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[a0]
@@ -116,7 +108,7 @@ G_M4144_IG13: ; bbWeight=0.50, epilog, nogc, extend
addi sp, sp, 16
ret ;; size=16 bbWeight=0.50 PerfScore 3.75
-; Total bytes of code 216, prolog size 16, PerfScore 269.99, instruction count 54, allocated bytes for code 216 (MethodHash=0a5befcf) for method System.MemoryExtensions:<IndexOfAny>g__IndexOfAnyDefaultComparer|108_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)
+; Total bytes of code 184, prolog size 16, PerfScore 256.13, instruction count 46, allocated bytes for code 184 (MethodHash=0a5befcf) for method System.MemoryExtensions:<IndexOfAny>g__IndexOfAnyDefaultComparer|108_0[double](System.ReadOnlySpan`1[double],double,double,double):int (FullOpts)
; ============================================================
Unwind Info:
@@ -127,7 +119,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 54 (0x00036) Actual length = 216 (0x0000d8)
+ Function Length : 46 (0x0002e) Actual length = 184 (0x0000b8)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) +8 (+8.00%) : 139078.dasm - CompareTestUint:GtMinus1(System.ValueTuple`2[uint,float]):bool (MinOpts)@@ -28,8 +28,8 @@ G_M34490_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
addi t6, t6, 0xD1FFAB1E
slli t6, t6, 2
lw a0, 0xD1FFAB1E(t6)
- sext.w t6, a0
- beqz t6, G_M34490_IG04
+ sext.w a0, a0
+ beqz a0, G_M34490_IG04
;; size=32 bbWeight=1 PerfScore 11.00
G_M34490_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
lui a0, 0xD1FFAB1E
@@ -41,15 +41,17 @@ G_M34490_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
;; size=20 bbWeight=0.50 PerfScore 3.50
G_M34490_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
lw a0, -8(fp)
- mv a0, zero
- ;; size=8 bbWeight=1 PerfScore 2.50
+ sext.w a0, a0
+ addi a1, zero, 0xD1FFAB1E
+ sltu a0, a1, a0
+ ;; size=16 bbWeight=1 PerfScore 3.50
G_M34490_IG05: ; bbWeight=1, epilog, nogc, extend
ld ra, 24(sp)
ld fp, 16(sp)
addi sp, sp, 32
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 100, prolog size 24, PerfScore 41.50, instruction count 18, allocated bytes for code 100 (MethodHash=10007945) for method CompareTestUint:GtMinus1(System.ValueTuple`2[uint,float]):bool (MinOpts)
+; Total bytes of code 108, prolog size 24, PerfScore 42.50, instruction count 20, allocated bytes for code 108 (MethodHash=10007945) for method CompareTestUint:GtMinus1(System.ValueTuple`2[uint,float]):bool (MinOpts)
; ============================================================
Unwind Info:
@@ -60,7 +62,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 25 (0x00019) Actual length = 100 (0x000064)
+ Function Length : 27 (0x0001b) Actual length = 108 (0x00006c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) +4 (+6.25%) : 261580.dasm - Microsoft.Diagnostics.Tracing.EventPipeEventMetaDataHeader:ClearMemory(ptr,int) (FullOpts)@@ -23,24 +23,27 @@ G_M46340_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M46340_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- sext.w t6, a1
- bge zero, t6, G_M46340_IG04
- ;; size=8 bbWeight=1 PerfScore 4.00
-G_M46340_IG03: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ sext.w a2, a1
+ bge zero, a2, G_M46340_IG05
+ j G_M46340_IG04
+ ;; size=12 bbWeight=1 PerfScore 5.50
+G_M46340_IG03: ; bbWeight=2, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ mv a0, a2
+ ;; size=4 bbWeight=2 PerfScore 1.00
+G_M46340_IG04: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
addi a2, a0, 0xD1FFAB1E
sb zero, 0xD1FFAB1E(a0)
addiw a1, a1, 0xD1FFAB1E
- mv a0, a2
- sext.w t6, a1
- blt zero, t6, G_M46340_IG03
- ;; size=24 bbWeight=4 PerfScore 38.00
-G_M46340_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
+ sext.w a0, a1
+ blt zero, a0, G_M46340_IG03
+ ;; size=20 bbWeight=4 PerfScore 36.00
+G_M46340_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 64, prolog size 16, PerfScore 58.50, instruction count 16, allocated bytes for code 64 (MethodHash=dae14afb) for method Microsoft.Diagnostics.Tracing.EventPipeEventMetaDataHeader:ClearMemory(ptr,int) (FullOpts)
+; Total bytes of code 68, prolog size 16, PerfScore 59.00, instruction count 17, allocated bytes for code 68 (MethodHash=dae14afb) for method Microsoft.Diagnostics.Tracing.EventPipeEventMetaDataHeader:ClearMemory(ptr,int) (FullOpts)
; ============================================================
Unwind Info:
@@ -51,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 16 (0x00010) Actual length = 64 (0x000040)
+ Function Length : 17 (0x00011) Actual length = 68 (0x000044)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) +4 (+5.00%) : 261717.dasm - Microsoft.Diagnostics.Tracing.TraceEvent:CopyBlob(nint,nint,int) (FullOpts)@@ -29,27 +29,30 @@ G_M63283_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=16 bbWeight=1 PerfScore 9.00
G_M63283_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
sraiw a2, a2, 2
- sext.w t6, a2
- bge zero, t6, G_M63283_IG04
- ;; size=12 bbWeight=1 PerfScore 4.50
-G_M63283_IG03: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ sext.w a3, a2
+ bge zero, a3, G_M63283_IG05
+ j G_M63283_IG04
+ ;; size=16 bbWeight=1 PerfScore 6.00
+G_M63283_IG03: ; bbWeight=2, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
+ mv a0, a4
+ ;; size=4 bbWeight=2 PerfScore 1.00
+G_M63283_IG04: ; bbWeight=4, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
addi a3, a1, 0xD1FFAB1E
addi a4, a0, 0xD1FFAB1E
lw a0, 0xD1FFAB1E(a0)
sw a0, 0xD1FFAB1E(a1)
addiw a2, a2, 0xD1FFAB1E
- mv a0, a4
+ sext.w a0, a2
mv a1, a3
- sext.w t6, a2
- blt zero, t6, G_M63283_IG03
- ;; size=36 bbWeight=4 PerfScore 50.00
-G_M63283_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
+ blt zero, a0, G_M63283_IG03
+ ;; size=32 bbWeight=4 PerfScore 48.00
+G_M63283_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 80, prolog size 16, PerfScore 71.00, instruction count 20, allocated bytes for code 80 (MethodHash=3f6508cc) for method Microsoft.Diagnostics.Tracing.TraceEvent:CopyBlob(nint,nint,int) (FullOpts)
+; Total bytes of code 84, prolog size 16, PerfScore 71.50, instruction count 21, allocated bytes for code 84 (MethodHash=3f6508cc) for method Microsoft.Diagnostics.Tracing.TraceEvent:CopyBlob(nint,nint,int) (FullOpts)
; ============================================================
Unwind Info:
@@ -60,7 +63,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 20 (0x00014) Actual length = 80 (0x000050)
+ Function Length : 21 (0x00015) Actual length = 84 (0x000054)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e) DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze output |
(Build linux-riscv64 Debug CoreCLR_Bootstrapped) failure looks related, I'll look into it. |
//------------------------------------------------------------------------ | ||
// SignExtendIfNecessary: inserts a 32-bit sign extension unless the argument is full-register or is known to be | ||
// implemented with a sign-extending instruction. | ||
// | ||
// Arguments: | ||
// arg - the argument to sign-extend | ||
// | ||
void Lowering::SignExtendIfNecessary(GenTree** arg) | ||
{ | ||
assert(varTypeUsesIntReg(*arg)); | ||
if (!genActualTypeIsInt(*arg)) | ||
return; | ||
|
||
if ((*arg)->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_MOD, GT_UMOD, GT_DIV, GT_UDIV, GT_CNS_INT)) | ||
return; | ||
|
||
if ((*arg)->OperIsShiftOrRotate() || (*arg)->OperIsCmpCompare() || (*arg)->OperIsAtomicOp()) | ||
return; | ||
|
||
*arg = comp->gtNewCastNode(TYP_I_IMPL, *arg, false, TYP_I_IMPL); | ||
BlockRange().InsertAfter((*arg)->gtGetOp1(), *arg); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is more of a temporary solutions to get rid of obviously unnecessary sign-extensions.
We need identify the nodes with undefined extension bits (some function arguments) and the nodes which pass-through the extensions (bit operations, these should mark what extension is passed through). The remaining bulk of operations should leave the result properly extended, so the solution should probably be opt-in: "don't sign-extend unless the source node has an undefined extension".
I briefly looked at Xarch's emitter::IsRedundantMov
approach looking at the past instructions and it seems too many unnecessary sign-extension nodes would be generated if applied to RISC-V.
Codegen was not a good place for these transformations, it resulted in contorted logic and complicated register allocation. Every type of comparison benefits: branch, floating, and integer but that was especially true for non-branch integer comparisons -- only less-than is available so any other comparison must be achieved with additional operations which are much easier to insert/transform to at node level.
Inserting sign-extensions (and recognizing when it is necessary) for comparands is also easier at node level. This PR introduces only simple local checks, more comprehensive sign-extension elimination will be attempted in subsequent PRs.
Part of #84834, cc @dotnet/samsung