|
1025 | 1025 | ;; Converts a `Value` and a static offset into an `Amode` for x64, attempting
|
1026 | 1026 | ;; to be as fancy as possible with offsets/registers/shifts/etc to make maximal
|
1027 | 1027 | ;; use of the x64 addressing modes.
|
| 1028 | +;; |
| 1029 | +;; This is a bit subtle unfortunately due to a few constraints. This function |
| 1030 | +;; was originally written recursively but that can lead to stack overflow |
| 1031 | +;; for certain inputs due to the recursion being defined by user-controlled |
| 1032 | +;; input. This means that nowadays this function is not recursive and has a |
| 1033 | +;; specific structure to handle that. |
| 1034 | +;; |
| 1035 | +;; Additionally currently in CLIF all loads/stores have an `Offset32` immediate |
| 1036 | +;; to go with them, but the wasm lowering to CLIF doesn't use this meaning that |
| 1037 | +;; it's frequently 0. Additionally mid-end optimizations do not fold `iconst` |
| 1038 | +;; values into this `Offset32`, meaning that it's left up to backends to hunt |
| 1039 | +;; for constants for good codegen. That means that one important aspect of this |
| 1040 | +;; function is that it searches for constants to fold into the `Offset32` to |
| 1041 | +;; avoid unnecessary instructions. |
| 1042 | +;; |
| 1043 | +;; Note, though, that the "optimal addressing modes" are only guaranteed to be |
| 1044 | +;; generated if egraph-based optimizations have run. For example this will only |
| 1045 | +;; attempt to find one constant as opposed to many, and that'll only happen |
| 1046 | +;; with constant folding from optimizations. |
| 1047 | +;; |
| 1048 | +;; Finally there's two primary entry points for this function. One is this |
| 1049 | +;; function here, `to_amode,` and another is `to_amode_add`. The latter is used |
| 1050 | +;; by the lowering of `iadd` in the x64 backend to use the `lea` instruction |
| 1051 | +;; where the input is two `Value` operands instead of just one. Most of the |
| 1052 | +;; logic here is then deferred through `to_amode_add`. |
| 1053 | +;; |
| 1054 | +;; In the future if mid-end optimizations fold constants into `Offset32` then |
| 1055 | +;; this in theory can "simply" delegate to the `amode_imm_reg` helper, and |
| 1056 | +;; below can delegate to `amode_imm_reg_reg_shift`, or something like that. |
1028 | 1057 | (decl to_amode (MemFlags Value Offset32) Amode)
|
1029 |
| - |
1030 |
| -;; Base case, "just put it in a register" |
1031 |
| -(rule (to_amode flags base offset) |
1032 |
| - (Amode.ImmReg offset base flags)) |
1033 |
| - |
1034 |
| -;; Slightly-more-fancy case, if the address is the addition of two things then |
1035 |
| -;; delegate to the `to_amode_add` helper. |
| 1058 | +(rule 0 (to_amode flags base offset) |
| 1059 | + (amode_imm_reg flags base offset)) |
1036 | 1060 | (rule 1 (to_amode flags (iadd x y) offset)
|
1037 |
| - (to_amode_add flags x y offset)) |
| 1061 | + (to_amode_add flags x y offset)) |
1038 | 1062 |
|
1039 | 1063 | ;; Same as `to_amode`, except that the base address is computed via the addition
|
1040 | 1064 | ;; of the two `Value` arguments provided.
|
1041 |
| -(decl to_amode_add (MemFlags Value Value Offset32) Amode) |
1042 |
| - |
1043 |
| -;; Base case, "just put things in registers". Note that the shift value of 0 |
1044 |
| -;; here means `x + (y << 0)` which is the same as `x + y`. |
1045 |
| -(rule (to_amode_add flags x y offset) |
1046 |
| - (Amode.ImmRegRegShift offset x y 0 flags)) |
1047 |
| - |
1048 |
| -;; If the one of the arguments being added is itself a constant shift then |
1049 |
| -;; that can be modeled directly so long as the shift is a modestly small amount. |
1050 |
| -(rule 1 (to_amode_add flags x (ishl y (iconst (uimm8 shift))) offset) |
1051 |
| - (if (u32_lteq (u8_as_u32 shift) 3)) |
1052 |
| - (Amode.ImmRegRegShift offset x y shift flags)) |
1053 |
| -(rule 2 (to_amode_add flags (ishl y (iconst (uimm8 shift))) x offset) |
1054 |
| - (if (u32_lteq (u8_as_u32 shift) 3)) |
1055 |
| - (Amode.ImmRegRegShift offset x y shift flags)) |
1056 |
| - |
1057 |
| -;; Constant extraction rules. |
1058 | 1065 | ;;
|
1059 |
| -;; These rules attempt to find a constant within one of `x` or `y`, or deeper |
1060 |
| -;; within them if they have their own adds. These only succeed if the constant |
1061 |
| -;; itself can be represented with 32-bits and can be infallibly added to the |
1062 |
| -;; offset that we already have. |
| 1066 | +;; The primary purpose of this is to hunt for constants within the two `Value` |
| 1067 | +;; operands provided. Failing that this will defer to `amode_imm_reg` or |
| 1068 | +;; `amode_imm_reg_reg_shift` which is the final step in amode lowering and |
| 1069 | +;; performs final pattern matches related to shifts to see if that can be |
| 1070 | +;; peeled out into the amode. |
1063 | 1071 | ;;
|
1064 |
| -;; Note the recursion here where this rule is defined in terms of itself to |
1065 |
| -;; "peel" layers of constants. |
| 1072 | +;; In other words this function's job is to find constants and then defer to |
| 1073 | +;; `amode_imm_reg*`. |
| 1074 | +(decl to_amode_add (MemFlags Value Value Offset32) Amode) |
| 1075 | + |
| 1076 | +(rule 0 (to_amode_add flags x y offset) |
| 1077 | + (amode_imm_reg_reg_shift flags x y offset)) |
| 1078 | +(rule 1 (to_amode_add flags x (iconst (simm32 c)) offset) |
| 1079 | + (if-let sum (s32_add_fallible offset c)) |
| 1080 | + (amode_imm_reg flags x sum)) |
| 1081 | +(rule 2 (to_amode_add flags (iconst (simm32 c)) x offset) |
| 1082 | + (if-let sum (s32_add_fallible offset c)) |
| 1083 | + (amode_imm_reg flags x sum)) |
1066 | 1084 | (rule 3 (to_amode_add flags (iadd x (iconst (simm32 c))) y offset)
|
1067 |
| - (if-let sum (s32_add_fallible offset c)) |
1068 |
| - (to_amode_add flags x y sum)) |
1069 |
| -(rule 4 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset) |
1070 |
| - (if-let sum (s32_add_fallible offset c)) |
1071 |
| - (to_amode_add flags x y sum)) |
1072 |
| -(rule 5 (to_amode_add flags x (iconst (simm32 c)) offset) |
1073 |
| - (if-let sum (s32_add_fallible offset c)) |
1074 |
| - (to_amode flags x sum)) |
1075 |
| -(rule 6 (to_amode_add flags (iconst (simm32 c)) x offset) |
1076 |
| - (if-let sum (s32_add_fallible offset c)) |
1077 |
| - (to_amode flags x sum)) |
| 1085 | + (if-let sum (s32_add_fallible offset c)) |
| 1086 | + (amode_imm_reg_reg_shift flags x y sum)) |
| 1087 | +(rule 4 (to_amode_add flags (iadd (iconst (simm32 c)) x) y offset) |
| 1088 | + (if-let sum (s32_add_fallible offset c)) |
| 1089 | + (amode_imm_reg_reg_shift flags x y sum)) |
| 1090 | +(rule 5 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset) |
| 1091 | + (if-let sum (s32_add_fallible offset c)) |
| 1092 | + (amode_imm_reg_reg_shift flags x y sum)) |
| 1093 | +(rule 6 (to_amode_add flags x (iadd (iconst (simm32 c)) y) offset) |
| 1094 | + (if-let sum (s32_add_fallible offset c)) |
| 1095 | + (amode_imm_reg_reg_shift flags x y sum)) |
| 1096 | + |
| 1097 | +;; Final cases of amode lowering. Does not hunt for constants and only attempts |
| 1098 | +;; to pattern match add-of-shifts to generate fancier `ImmRegRegShift` modes, |
| 1099 | +;; otherwise falls back on `ImmReg`. |
| 1100 | +(decl amode_imm_reg (MemFlags Value Offset32) Amode) |
| 1101 | +(rule 0 (amode_imm_reg flags base offset) |
| 1102 | + (Amode.ImmReg offset base flags)) |
| 1103 | +(rule 1 (amode_imm_reg flags (iadd x y) offset) |
| 1104 | + (amode_imm_reg_reg_shift flags x y offset)) |
| 1105 | + |
| 1106 | +(decl amode_imm_reg_reg_shift (MemFlags Value Value Offset32) Amode) |
| 1107 | +(rule 0 (amode_imm_reg_reg_shift flags x y offset) |
| 1108 | + (Amode.ImmRegRegShift offset x y 0 flags)) ;; 0 == y<<0 == "no shift" |
| 1109 | +(rule 1 (amode_imm_reg_reg_shift flags x (ishl y (iconst (uimm8 shift))) offset) |
| 1110 | + (if (u32_lteq (u8_as_u32 shift) 3)) |
| 1111 | + (Amode.ImmRegRegShift offset x y shift flags)) |
| 1112 | +(rule 2 (amode_imm_reg_reg_shift flags (ishl y (iconst (uimm8 shift))) x offset) |
| 1113 | + (if (u32_lteq (u8_as_u32 shift) 3)) |
| 1114 | + (Amode.ImmRegRegShift offset x y shift flags)) |
1078 | 1115 |
|
1079 | 1116 | ;; Offsetting an Amode. Used when we need to do consecutive
|
1080 | 1117 | ;; loads/stores to adjacent addresses.
|
|
0 commit comments