[10.0.0]: x64: Remove recursion in to_amode helper (#6997)

alexcrichton · web-flow · commit 5a239f6e6332 · 2023-09-11T12:02:36.000-05:00
* x64: Remove recursion in `to_amode` helper

This commit removes the recursion present in the x64 backend's
`to_amode` and `to_amode_add` helpers. The recursion is currently
unbounded and controlled by user input meaning it's not too hard to
craft user input which triggers stack overflow in the host. By removing
recursion there's no need to worry about this since the stack depth will
never be hit.

The main concern with removing recursion is that code quality may not be
quite as good any more. The purpose of the recursion here is to "hunt
for constants" and update the immediate `Offset32`, and now without
recursion only at most one constant is found and folded instead of an
arbitrary number of constants as before. This should continue to produce
the same code as before so long as optimizations are enabled, but
without optimizations this will produce worse code than before.

Note with a hypothetical mid-end optimization that does this constant
folding for us the rules here could be further simplified to purely
consider the shape of the input `Value` to amode computation without
considering constants at all.

* Perform a `--locked` install of `cargo vet`
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -354,7 +354,7 @@ jobs:
     - uses: ./.github/actions/install-rust
       with:
         toolchain: nightly-2023-03-20
-    - run: cargo install cargo-fuzz --vers "^0.11"
+    - run: cargo install cargo-fuzz --vers "^0.11" --locked
     # Install the OCaml packages necessary for fuzz targets that use the
     # `wasm-spec-interpreter`.
     - run: sudo apt install -y ocaml-nox ocamlbuild ocaml-findlib libzarith-ocaml-dev
diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle
@@ -1025,56 +1025,93 @@
 ;; Converts a `Value` and a static offset into an `Amode` for x64, attempting
 ;; to be as fancy as possible with offsets/registers/shifts/etc to make maximal
 ;; use of the x64 addressing modes.
+;;
+;; This is a bit subtle unfortunately due to a few constraints. This function
+;; was originally written recursively but that can lead to stack overflow
+;; for certain inputs due to the recursion being defined by user-controlled
+;; input. This means that nowadays this function is not recursive and has a
+;; specific structure to handle that.
+;;
+;; Additionally currently in CLIF all loads/stores have an `Offset32` immediate
+;; to go with them, but the wasm lowering to CLIF doesn't use this meaning that
+;; it's frequently 0. Additionally mid-end optimizations do not fold `iconst`
+;; values into this `Offset32`, meaning that it's left up to backends to hunt
+;; for constants for good codegen. That means that one important aspect of this
+;; function is that it searches for constants to fold into the `Offset32` to
+;; avoid unnecessary instructions.
+;;
+;; Note, though, that the "optimal addressing modes" are only guaranteed to be
+;; generated if egraph-based optimizations have run. For example this will only
+;; attempt to find one constant as opposed to many, and that'll only happen
+;; with constant folding from optimizations.
+;;
+;; Finally there's two primary entry points for this function. One is this
+;; function here, `to_amode,` and another is `to_amode_add`. The latter is used
+;; by the lowering of `iadd` in the x64 backend to use the `lea` instruction
+;; where the input is two `Value` operands instead of just one. Most of the
+;; logic here is then deferred through `to_amode_add`.
+;;
+;; In the future if mid-end optimizations fold constants into `Offset32` then
+;; this in theory can "simply" delegate to the `amode_imm_reg` helper, and
+;; below can delegate to `amode_imm_reg_reg_shift`, or something like that.
 (decl to_amode (MemFlags Value Offset32) Amode)
-
-;; Base case, "just put it in a register"
-(rule (to_amode flags base offset)
-      (Amode.ImmReg offset base flags))
-
-;; Slightly-more-fancy case, if the address is the addition of two things then
-;; delegate to the `to_amode_add` helper.
+(rule 0 (to_amode flags base offset)
+        (amode_imm_reg flags base offset))
 (rule 1 (to_amode flags (iadd x y) offset)
-      (to_amode_add flags x y offset))
+        (to_amode_add flags x y offset))
 
 ;; Same as `to_amode`, except that the base address is computed via the addition
 ;; of the two `Value` arguments provided.
-(decl to_amode_add (MemFlags Value Value Offset32) Amode)
-
-;; Base case, "just put things in registers". Note that the shift value of 0
-;; here  means `x + (y << 0)` which is the same as `x + y`.
-(rule (to_amode_add flags x y offset)
-      (Amode.ImmRegRegShift offset x y 0 flags))
-
-;; If the one of the arguments being added is itself a constant shift then
-;; that can be modeled directly so long as the shift is a modestly small amount.
-(rule 1 (to_amode_add flags x (ishl y (iconst (uimm8 shift))) offset)
-      (if (u32_lteq (u8_as_u32 shift) 3))
-      (Amode.ImmRegRegShift offset x y shift flags))
-(rule 2 (to_amode_add flags (ishl y (iconst (uimm8 shift))) x offset)
-      (if (u32_lteq (u8_as_u32 shift) 3))
-      (Amode.ImmRegRegShift offset x y shift flags))
-
-;; Constant extraction rules.
 ;;
-;; These rules attempt to find a constant within one of `x` or `y`, or deeper
-;; within them if they have their own adds. These only succeed if the constant
-;; itself can be represented with 32-bits and can be infallibly added to the
-;; offset that we already have.
+;; The primary purpose of this is to hunt for constants within the two `Value`
+;; operands provided. Failing that this will defer to `amode_imm_reg` or
+;; `amode_imm_reg_reg_shift` which is the final step in amode lowering and
+;; performs final pattern matches related to shifts to see if that can be
+;; peeled out into the amode.
 ;;
-;; Note the recursion here where this rule is defined in terms of itself to
-;; "peel" layers of constants.
+;; In other words this function's job is to find constants and then defer to
+;; `amode_imm_reg*`.
+(decl to_amode_add (MemFlags Value Value Offset32) Amode)
+
+(rule 0 (to_amode_add flags x y offset)
+        (amode_imm_reg_reg_shift flags x y offset))
+(rule 1 (to_amode_add flags x (iconst (simm32 c)) offset)
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg flags x sum))
+(rule 2 (to_amode_add flags (iconst (simm32 c)) x offset)
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg flags x sum))
 (rule 3 (to_amode_add flags (iadd x (iconst (simm32 c))) y offset)
-      (if-let sum (s32_add_fallible offset c))
-      (to_amode_add flags x y sum))
-(rule 4 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset)
-      (if-let sum (s32_add_fallible offset c))
-      (to_amode_add flags x y sum))
-(rule 5 (to_amode_add flags x (iconst (simm32 c)) offset)
-      (if-let sum (s32_add_fallible offset c))
-      (to_amode flags x sum))
-(rule 6 (to_amode_add flags (iconst (simm32 c)) x offset)
-      (if-let sum (s32_add_fallible offset c))
-      (to_amode flags x sum))
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg_reg_shift flags x y sum))
+(rule 4 (to_amode_add flags (iadd (iconst (simm32 c)) x) y offset)
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg_reg_shift flags x y sum))
+(rule 5 (to_amode_add flags x (iadd y (iconst (simm32 c))) offset)
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg_reg_shift flags x y sum))
+(rule 6 (to_amode_add flags x (iadd (iconst (simm32 c)) y) offset)
+        (if-let sum (s32_add_fallible offset c))
+        (amode_imm_reg_reg_shift flags x y sum))
+
+;; Final cases of amode lowering. Does not hunt for constants and only attempts
+;; to pattern match add-of-shifts to generate fancier `ImmRegRegShift` modes,
+;; otherwise falls back on `ImmReg`.
+(decl amode_imm_reg (MemFlags Value Offset32) Amode)
+(rule 0 (amode_imm_reg flags base offset)
+        (Amode.ImmReg offset base flags))
+(rule 1 (amode_imm_reg flags (iadd x y) offset)
+        (amode_imm_reg_reg_shift flags x y offset))
+
+(decl amode_imm_reg_reg_shift (MemFlags Value Value Offset32) Amode)
+(rule 0 (amode_imm_reg_reg_shift flags x y offset)
+        (Amode.ImmRegRegShift offset x y 0 flags)) ;; 0 == y<<0 == "no shift"
+(rule 1 (amode_imm_reg_reg_shift flags x (ishl y (iconst (uimm8 shift))) offset)
+        (if (u32_lteq (u8_as_u32 shift) 3))
+        (Amode.ImmRegRegShift offset x y shift flags))
+(rule 2 (amode_imm_reg_reg_shift flags (ishl y (iconst (uimm8 shift))) x offset)
+        (if (u32_lteq (u8_as_u32 shift) 3))
+        (Amode.ImmRegRegShift offset x y shift flags))
 
 ;; Offsetting an Amode. Used when we need to do consecutive
 ;; loads/stores to adjacent addresses.
diff --git a/tests/all/module.rs b/tests/all/module.rs
@@ -216,3 +216,26 @@ fn missing_sse_and_floats_still_works() -> Result<()> {
 
     Ok(())
 }
+
+#[test]
+#[cfg_attr(miri, ignore)]
+fn large_add_chain_no_stack_overflow() -> Result<()> {
+    let mut config = Config::new();
+    config.cranelift_opt_level(OptLevel::None);
+    let engine = Engine::new(&config)?;
+    let mut wat = String::from(
+        "
+        (module
+            (func (result i64)
+                (i64.const 1)
+        ",
+    );
+    for _ in 0..20_000 {
+        wat.push_str("(i64.add (i64.const 1))\n");
+    }
+
+    wat.push_str(")\n)");
+    Module::new(&engine, &wat)?;
+
+    Ok(())
+}