Move internal changes (#380)

shelkesagar29 · Copybara Bot · web-flow · commit d4a305809fd6 · 2024-11-15T17:13:56.000-07:00
This PR moves the following internal changes to OSS
repo.

## [plan][transforms] Fix an issue in shape materialization pass 

This MR fixes an issue in `SimplifyExtractOfReshape` pattern
of shape materialization pass. This patten was being
applied even when reshape op operand is dynamic. However,
with dynamic operand, mapping extract index into
reshape operand doesn't work.
With this change, we return failure if reshape op
operand is dynamic. MLIR test is added for scenario
when pattern should return failure.

## [tensorrt] Fix incorrect handling of dynamic shape in
`tensorrt-broadcast-elimination`

Fixes an issue where a `tensorrt-broadcast-elimination` would improperly
handle
dynamically shaped tensors when attempting to reshape them. In certain
cases (when
more than 1 dynamic dimension is present), to perform a reshape, the
target shape
must be explicitly calculated in the IR and a dynamic reshape must be
created.

Co-authored-by: Copybara Bot &lt;mlir-tensorrt-devs@exchange.nvidia.com&gt;
diff --git a/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/MaterializeShapeCalculations.cpp b/mlir-tensorrt/compiler/lib/Dialect/Plan/Transforms/MaterializeShapeCalculations.cpp
@@ -361,6 +361,9 @@ struct SimplifyExtractOfReshape : public OpRewritePattern<tensor::ExtractOp> {
     if (!reshapeOp)
       return failure();
 
+    if (!reshapeOp.getOperand().getType().hasStaticShape())
+      return failure();
+
     std::optional<SmallVector<int64_t>> coords =
         getConstantIntValues(getAsOpFoldResult(op.getIndices()));
     if (!coords)
diff --git a/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/BroadcastElimination.cpp b/mlir-tensorrt/tensorrt/lib/TensorRT/Transforms/BroadcastElimination.cpp
@@ -120,6 +120,65 @@ struct PushDownBroadcastReduceRankOp : public OpRewritePattern<CollapseRankOp> {
 };
 } // namespace
 
+static Value expandRank(RewriterBase &rewriter, Location loc,
+                        TypedValue<RankedTensorType> input,
+                        ArrayRef<int64_t> reorderedBroadcastDims,
+                        RankedTensorType resultType) {
+  RankedTensorType inputType = input.getType();
+  // For <= 1 dynamic dims, no need to do dynamic reshape.
+  if (input.getType().getNumDynamicDims() <= 1) {
+    SmallVector<int64_t> staticShape(resultType.getRank());
+
+    unsigned inputIdx = 0;
+    for (unsigned i = 0, e = staticShape.size(); i < e; i++) {
+      if (inputIdx < reorderedBroadcastDims.size() &&
+          i == reorderedBroadcastDims[inputIdx]) {
+        staticShape[i] = inputType.getDimSize(inputIdx++);
+        continue;
+      }
+      staticShape[i] = 1;
+    }
+    return rewriter.create<ReshapeOp>(loc, resultType.clone(staticShape),
+                                      input);
+  }
+
+  // Otherwise, we need to do dynamic reshape.
+  auto shape = rewriter.create<tensorrt::ShapeOp>(loc, input);
+  SmallVector<Value> shapeComponents(resultType.getRank());
+  SmallVector<int64_t> staticShape(resultType.getRank());
+  unsigned inputIdx = 0;
+  for (unsigned i = 0, e = shapeComponents.size(); i < e; i++) {
+    if (inputIdx < reorderedBroadcastDims.size() &&
+        i == reorderedBroadcastDims[inputIdx]) {
+      if (!inputType.isDynamicDim(inputIdx)) {
+        staticShape[i] = inputType.getDimSize(inputIdx);
+        shapeComponents[i] = rewriter.create<tensorrt::ConstantOp>(
+            loc, rewriter.getI32TensorAttr(
+                     {static_cast<int32_t>(inputType.getDimSize(inputIdx++))}));
+        continue;
+      }
+      shapeComponents[i] = rewriter.create<tensorrt::SliceOp>(
+          loc, shape,
+          /*offset=*/ArrayRef<int32_t>{static_cast<int32_t>(inputIdx++)},
+          ArrayRef<int32_t>{1}, ArrayRef<int32_t>{1});
+      staticShape[i] = ShapedType::kDynamic;
+      continue;
+    }
+    staticShape[i] = 1;
+    shapeComponents[i] = rewriter.create<tensorrt::ConstantOp>(
+        loc, rewriter.getI32TensorAttr(
+                 {static_cast<int32_t>(inputType.getDimSize(1))}));
+  }
+  auto newShape = rewriter.create<tensorrt::ConcatenationOp>(
+      loc,
+      RankedTensorType::get(static_cast<int64_t>(shapeComponents.size()),
+                            rewriter.getI32Type()),
+      shapeComponents, /*axis=*/0);
+
+  return rewriter.create<ReshapeOp>(loc, resultType.clone(staticShape), input,
+                                    newShape);
+}
+
 namespace {
 /// Create transpose + expand_rank on the input of a `tensorrt.broadcast` so
 /// that the result has the same rank as the `tensorrt.broadcast` result and the
@@ -157,8 +216,9 @@ struct SimplifyBroadcast : public OpRewritePattern<BroadcastOp> {
         }
         expandedShape[i] = 1;
       }
-      Value expanded = rewriter.create<ExpandRankOp>(
-          loc, resultType.clone(expandedShape), transposeOp);
+
+      Value expanded = expandRank(rewriter, loc, transposeOp,
+                                  reorderedBroadcastDims, resultType);
       rewriter.replaceOpWithNewOp<BroadcastOp>(
           op, op.getType(), expanded, op.getShape(),
           llvm::to_vector(llvm::seq<int64_t>(0, resultType.getRank())));
@@ -341,6 +401,8 @@ class BroadcastEliminationPass
     patterns.add<SimplifyBroadcast, ElementwiseAbsorbBroadcast,
                  PushDownBroadcastReduceRankOp, SelectAbsorbBroadcast,
                  MatMulAbsorbBroadcast>(&getContext());
+    tensorrt::ReshapeOp::getCanonicalizationPatterns(patterns,
+                                                     patterns.getContext());
     if (failed(applyPatternsAndFoldGreedily(getOperation(),
                                             std::move(patterns)))) {
       emitError(getOperation()->getLoc())
diff --git a/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/broadcast-elimination.mlir b/mlir-tensorrt/tensorrt/test/Dialect/TensorRT/broadcast-elimination.mlir
@@ -236,3 +236,42 @@ func.func @broadcast_elim_matmul_vector(%arg0: tensor<?x?x128xf32>, %arg1: tenso
 //       CHECK:     return %[[v0]] : tensor<?x?x100xf32>
 
 
+// -----
+
+func.func @broadcast_dynamic_expand_shape_regression(%arg0: tensor<?x?x1x1xi1>, %arg1: tensor<?x1xf16>, %arg2: tensor<?x?x256x256xf16>, %arg3: tensor<4xi32>) -> tensor<?x?x256x256xf16> {
+  %0 = tensorrt.broadcast %arg0 broadcast_dims<0, 1, 2, 3> shape(%arg3 : tensor<4xi32>) : tensor<?x?x1x1xi1> to tensor<?x?x256x256xi1>
+  %1 = tensorrt.broadcast %arg1 broadcast_dims<2, 3> shape(%arg3 : tensor<4xi32>) : tensor<?x1xf16> to tensor<?x?x256x256xf16>
+  %2 = tensorrt.select ins(%0, %arg2, %1 : tensor<?x?x256x256xi1>, tensor<?x?x256x256xf16>, tensor<?x?x256x256xf16>)
+    -> tensor<?x?x256x256xf16>
+  return %2 : tensor<?x?x256x256xf16>
+}
+
+// CHECK-LABEL: func.func @broadcast_dynamic_expand_shape_regression
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<?x?x1x1xi1>, %[[arg1:.+]]: tensor<?x1xf16>, %[[arg2:.+]]: tensor<?x?x256x256xf16>, %[[arg3:.+]]: tensor<4xi32>) -> tensor<?x?x256x256xf16> {
+//       CHECK:     %[[v0:.+]] = tensorrt.reshape %[[arg1]] : tensor<?x1xf16> to tensor<1x1x?x1xf16>
+//       CHECK:     %[[v1:.+]] = tensorrt.select ins(%[[arg0]], %[[arg2]], %[[v0]] : tensor<?x?x1x1xi1>, tensor<?x?x256x256xf16>, tensor<1x1x?x1xf16>) -> tensor<?x?x256x256xf16>
+//       CHECK:     return %[[v1]] : tensor<?x?x256x256xf16>
+
+// -----
+
+func.func @broadcast_dynamic_expand_shape_regression(%arg0: tensor<?x?x1x1xi1>, %arg1: tensor<?x1x?xf16>, %arg2: tensor<?x?x256x256xf16>, %arg3: tensor<4xi32>) -> tensor<?x?x256x256xf16> {
+  %0 = tensorrt.broadcast %arg0 broadcast_dims<0, 1, 2, 3> shape(%arg3 : tensor<4xi32>) : tensor<?x?x1x1xi1> to tensor<?x?x256x256xi1>
+  %1 = tensorrt.broadcast %arg1 broadcast_dims<3, 2, 1> shape(%arg3 : tensor<4xi32>) : tensor<?x1x?xf16> to tensor<?x?x256x256xf16>
+  %2 = tensorrt.select ins(%0, %arg2, %1 : tensor<?x?x256x256xi1>, tensor<?x?x256x256xf16>, tensor<?x?x256x256xf16>)
+    -> tensor<?x?x256x256xf16>
+  return %2 : tensor<?x?x256x256xf16>
+}
+
+//       CHECK: #[[$map:.+]] = affine_map<(d0, d1, d2) -> (d2, d1, d0)>
+//       CHECK: module {
+// CHECK-LABEL: func.func @broadcast_dynamic_expand_shape_regression
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<?x?x1x1xi1>, %[[arg1:.+]]: tensor<?x1x?xf16>, %[[arg2:.+]]: tensor<?x?x256x256xf16>, %[[arg3:.+]]: tensor<4xi32>) -> tensor<?x?x256x256xf16> {
+//       CHECK:     %[[cst_i32:.+]] = tensorrt.constant dense<1> : tensor<1xi32>
+//       CHECK:     %[[v0:.+]] = tensorrt.transpose {permutation = #[[$map]]} %[[arg1]] : tensor<?x1x?xf16> to tensor<?x1x?xf16>
+//       CHECK:     %[[v1:.+]] = tensorrt.shape %[[v0]] : tensor<?x1x?xf16> -> tensor<3xi32>
+//       CHECK:     %[[v2:.+]] = tensorrt.slice %[[v1]][0][1][1] : tensor<3xi32> to tensor<1xi32>
+//       CHECK:     %[[v3:.+]] = tensorrt.slice %[[v1]][2][1][1] : tensor<3xi32> to tensor<1xi32>
+//       CHECK:     %[[v4:.+]] = tensorrt.concatenation {axis = 0 : i32} ins(%[[cst_i32]], %[[v2]], %[[cst_i32]], %[[v3]] : tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<4xi32>
+//       CHECK:     %[[v5:.+]] = tensorrt.reshape %[[v0]] shape(%[[v4]]: tensor<4xi32>) : tensor<?x1x?xf16> to tensor<1x?x1x?xf16>
+//       CHECK:     %[[v6:.+]] = tensorrt.select ins(%[[arg0]], %[[arg2]], %[[v5]] : tensor<?x?x1x1xi1>, tensor<?x?x256x256xf16>, tensor<1x?x1x?xf16>) -> tensor<?x?x256x256xf16>
+//       CHECK:     return %[[v6]] : tensor<?x?x256x256xf16>
diff --git a/mlir-tensorrt/test/Dialect/Plan/materialize-shape-calculations.mlir b/mlir-tensorrt/test/Dialect/Plan/materialize-shape-calculations.mlir
@@ -1088,3 +1088,27 @@ func.func @reduce_window_dynamic_input(%arg0: tensor<?x?x?x?xf32> {tensorrt.shap
 //  CHECK-DAG: %[[v2:.+]] = arith.maxsi %[[dim]], %[[c0]] : index
 //  CHECK-DAG: %[[v3:.+]] = plan.with_shape %[[v1]](%[[v2]], %[[c3]], %[[c512]], %[[c512]]) :
 //  CHECK-DAG: return %[[v3]]
+
+// -----
+
+func.func @simplify_extract_of_reshape_negative(%arg0: tensor<1x?x3x4xf32>) -> f32 {
+  %c0 = arith.constant 0: index
+  %c1 = arith.constant 1 : index
+  %c2 = arith.constant 2 : index
+  %1 = stablehlo.reshape %arg0 : (tensor<1x?x3x4xf32>) -> tensor<1x6x4xf32>
+  %2 = tensor.extract %1[%c0, %c1, %c2] : tensor<1x6x4xf32>
+  return %2 : f32
+}
+
+// CHECK-LABEL: simplify_extract_of_reshape_negative
+//  CHECK-SAME: (%[[arg0:.+]]: tensor<1x?x3x4xf32>)
+//  CHECK-NEXT: %[[c4:.+]] = arith.constant 4 : index
+//  CHECK-NEXT: %[[c3:.+]] = arith.constant 3 : index
+//  CHECK-NEXT: %[[c2:.+]] = arith.constant 2 : index
+//  CHECK-NEXT: %[[c1:.+]] = arith.constant 1 : index
+//  CHECK-NEXT: %[[c0:.+]] = arith.constant 0 : index
+//  CHECK-NEXT: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] : tensor<1x?x3x4xf32>
+//  CHECK-NEXT: %[[v0:.+]] = plan.with_shape %[[arg0]](%[[c1]], %[[dim]], %[[c3]], %[[c4]])
+//  CHECK-NEXT: %[[v1:.+]] = stablehlo.reshape %[[v0]]
+//  CHECK-NEXT: %[[extracted:.+]] = tensor.extract %[[v1]][%[[c0]], %[[c1]], %[[c2]]]
+//  CHECK-NEXT: return %extracted 
diff --git a/mlir-tensorrt/test/models/bert.stablehlo.elided.mlir b/mlir-tensorrt/test/models/bert.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @bert attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<32x8xi32> {mhlo.layout_mode = "default"}) -> (tensor<32x8x768xf16> {mhlo.layout_mode = "default"}, tensor<32x768xf16> {mhlo.layout_mode = "default"}) {
     %0 = stablehlo.constant dense_resource<__elided__> : tensor<30522x768xf32>
     %1 = stablehlo.constant dense_resource<__elided__> : tensor<512x768xf32>
diff --git a/mlir-tensorrt/test/models/gpt2.stablehlo.bs2.elided.mlir b/mlir-tensorrt/test/models/gpt2.stablehlo.bs2.elided.mlir
@@ -1,4 +1,4 @@
-module @jit_generate attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @gpt2_bs2 attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<2x6xi32> {mhlo.sharding = "{replicated}"}, %arg1: tensor<2x6xi32> {mhlo.sharding = "{replicated}"}) -> (tensor<2x20xi32> {jax.result_info = ""}) {
     %0 = stablehlo.constant dense<0> : tensor<1xi32>
     %1 = stablehlo.constant dense<768> : tensor<i32>
diff --git a/mlir-tensorrt/test/models/gpt2.stablehlo.elided.mlir b/mlir-tensorrt/test/models/gpt2.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit_generate attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @gpt_bs1 attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<1x7xi32> {jax.arg_info = "inputs['attention_mask']", mhlo.sharding = "{replicated}"}, %arg1: tensor<1x7xi32> {jax.arg_info = "inputs['input_ids']", mhlo.sharding = "{replicated}"}) -> (tensor<1x20xi32> {jax.result_info = ""}) {
     %0 = stablehlo.constant dense_resource<__elided__> : tensor<50257x768xf16>
     %1 = stablehlo.constant dense_resource<__elided__> : tensor<1024x768xf16>
diff --git a/mlir-tensorrt/test/models/llama-68m.stablehlo.elided.mlir b/mlir-tensorrt/test/models/llama-68m.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit_generate attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {
+module @llama_68m attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {
   func.func @main(%arg0: tensor<1x9xi32> {mhlo.sharding = "{replicated}"}, %arg1: tensor<1x9xi32> {mhlo.sharding = "{replicated}"}) -> tensor<1x20xi32> {
     %0 = stablehlo.constant dense<1.000000e+00> : tensor<1x1x3072xf32>
     %1 = stablehlo.constant dense<-3.40282347E+38> : tensor<1x1x1x20xf32>
diff --git a/mlir-tensorrt/test/models/llama-v2.stablehlo.elided.mlir b/mlir-tensorrt/test/models/llama-v2.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @llama_v2 attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<1x27xf32> {mhlo.layout_mode = "default"}) -> (tensor<1x27x32000xf32> {mhlo.layout_mode = "default"}) {
     %0 = stablehlo.constant dense_resource<__elided__> : tensor<32000x4096xf16>
     %1 = stablehlo.constant dense_resource<__elided__> : tensor<4096xf16>
diff --git a/mlir-tensorrt/test/models/resnet50.stablehlo.elided.mlir b/mlir-tensorrt/test/models/resnet50.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @resnet50 attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<16x3x224x224xf16> {mhlo.layout_mode = "default"}) -> (tensor<16x1000xf16> {mhlo.layout_mode = "default"}) {
     %0 = stablehlo.constant dense_resource<__elided__> : tensor<7x7x3x64xf32>
     %1 = stablehlo.constant dense_resource<__elided__> : tensor<64xf32>
diff --git a/mlir-tensorrt/test/models/swin.stablehlo.elided.mlir b/mlir-tensorrt/test/models/swin.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit_run attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {
+module @swin attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {
   func.func @main(%arg0: tensor<1x224x224x3xf32>) -> tensor<1x1000xf32> {
     %cst = stablehlo.constant dense_resource<__elided__> : tensor<1x1000xf32>
     %cst_0 = stablehlo.constant dense_resource<__elided__> : tensor<1024x1000xf32>
diff --git a/mlir-tensorrt/test/models/whisper-jax.stablehlo.elided.mlir b/mlir-tensorrt/test/models/whisper-jax.stablehlo.elided.mlir
@@ -1,4 +1,4 @@
-module @jit_generate_fn attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
+module @whisper_jax attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {
   func.func public @main(%arg0: tensor<1x80x3000xf32> {jax.arg_info = "input_features", mhlo.sharding = "{replicated}"}) -> (tensor<1x448xi32> {jax.result_info = ""}) {
     %0 = stablehlo.constant dense_resource<__elided__> : tensor<3x80x384xf32>
     %1 = stablehlo.constant dense_resource<__elided__> : tensor<384xf32>

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
	`1`	`+module @bert attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
`2`	`2`	`func.func public @main(%arg0: tensor<32x8xi32> {mhlo.layout_mode = "default"}) -> (tensor<32x8x768xf16> {mhlo.layout_mode = "default"}, tensor<32x768xf16> {mhlo.layout_mode = "default"}) {`
`3`	`3`	`%0 = stablehlo.constant dense_resource<__elided__> : tensor<30522x768xf32>`
`4`	`4`	`%1 = stablehlo.constant dense_resource<__elided__> : tensor<512x768xf32>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit_generate attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
	`1`	`+module @gpt2_bs2 attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
`2`	`2`	`func.func public @main(%arg0: tensor<2x6xi32> {mhlo.sharding = "{replicated}"}, %arg1: tensor<2x6xi32> {mhlo.sharding = "{replicated}"}) -> (tensor<2x20xi32> {jax.result_info = ""}) {`
`3`	`3`	`%0 = stablehlo.constant dense<0> : tensor<1xi32>`
`4`	`4`	`%1 = stablehlo.constant dense<768> : tensor<i32>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit_generate attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
	`1`	`+module @gpt_bs1 attributes {mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
`2`	`2`	`func.func public @main(%arg0: tensor<1x7xi32> {jax.arg_info = "inputs['attention_mask']", mhlo.sharding = "{replicated}"}, %arg1: tensor<1x7xi32> {jax.arg_info = "inputs['input_ids']", mhlo.sharding = "{replicated}"}) -> (tensor<1x20xi32> {jax.result_info = ""}) {`
`3`	`3`	`%0 = stablehlo.constant dense_resource<__elided__> : tensor<50257x768xf16>`
`4`	`4`	`%1 = stablehlo.constant dense_resource<__elided__> : tensor<1024x768xf16>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit_generate attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {`
	`1`	`+module @llama_68m attributes {mhlo.cross_program_prefetches = [], mhlo.is_dynamic = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32, mhlo.use_auto_spmd_partitioning = false} {`
`2`	`2`	`func.func @main(%arg0: tensor<1x9xi32> {mhlo.sharding = "{replicated}"}, %arg1: tensor<1x9xi32> {mhlo.sharding = "{replicated}"}) -> tensor<1x20xi32> {`
`3`	`3`	`%0 = stablehlo.constant dense<1.000000e+00> : tensor<1x1x3072xf32>`
`4`	`4`	`%1 = stablehlo.constant dense<-3.40282347E+38> : tensor<1x1x1x20xf32>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
	`1`	`+module @llama_v2 attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
`2`	`2`	`func.func public @main(%arg0: tensor<1x27xf32> {mhlo.layout_mode = "default"}) -> (tensor<1x27x32000xf32> {mhlo.layout_mode = "default"}) {`
`3`	`3`	`%0 = stablehlo.constant dense_resource<__elided__> : tensor<32000x4096xf16>`
`4`	`4`	`%1 = stablehlo.constant dense_resource<__elided__> : tensor<4096xf16>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module @jit__unnamed_wrapped_function_ attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
	`1`	`+module @resnet50 attributes {jax.uses_shape_polymorphism = false, mhlo.num_partitions = 1 : i32, mhlo.num_replicas = 1 : i32} {`
`2`	`2`	`func.func public @main(%arg0: tensor<16x3x224x224xf16> {mhlo.layout_mode = "default"}) -> (tensor<16x1000xf16> {mhlo.layout_mode = "default"}) {`
`3`	`3`	`%0 = stablehlo.constant dense_resource<__elided__> : tensor<7x7x3x64xf32>`
`4`	`4`	`%1 = stablehlo.constant dense_resource<__elided__> : tensor<64xf32>`