From d550d384bf926b9ac0b21e6c6257aa9eaa19bca7 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 05:27:16 +0200 Subject: [PATCH 1/9] =?UTF-8?q?fix:=20handling=20of=20default=20attrs=20in?= =?UTF-8?q?=20SimplifiedLayerNormalization=20+=20LayerNormalization?= =?UTF-8?q?=F0=9F=90=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ort_fusions/skip_normalization.py | 31 ++++++------------- .../ort_fusions/skip_normalization_test.py | 3 ++ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization.py b/onnxscript/rewriter/ort_fusions/skip_normalization.py index ee6e366608..b89e6c95df 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization.py @@ -36,11 +36,7 @@ def pattern(self, op, input, skip, gamma, bias, epsilon, stash_type): # Note: ORT's SimplifiedLayerNormalization was placed in onnx domain by mistake. # No need to use com.microsoft domain here; but this is a custom op in ORT. normalized = op.SimplifiedLayerNormalization( - skip_sum, - gamma, - axis=-1, - epsilon=epsilon, - stash_type=stash_type, + skip_sum, gamma, axis=-1, _outputs=["simplified_layer_norm"] ) return normalized, skip_sum @@ -51,8 +47,6 @@ def check( skip, gamma, bias, - epsilon, - stash_type, **_, ) -> pattern.MatchResult: # type: ignore[name-defined] """Check if the pattern matches conditions for use of SkipSimplifiedLayerNormalization op.""" @@ -93,10 +87,11 @@ def rewrite( skip, gamma, bias, - epsilon, - stash_type, + simplified_layer_norm, **_, ): + epsilon = simplified_layer_norm.producer().attributes.get_float("epsilon") + if self._has_bias: normalized, _mean, _inv_std_var, skip_sum = op.SkipSimplifiedLayerNormalization( input, @@ -141,7 +136,7 @@ def __init__(self, name: str, has_bias: bool = False, bias_pre_add: bool = False self._has_bias = has_bias self._bias_pre_add = bias_pre_add - def pattern(self, op, input, skip, gamma, beta, bias, epsilon, stash_type): + def pattern(self, op, input, skip, gamma, beta, bias): if self._has_bias and self._bias_pre_add: input = op.Add(input, bias) @@ -149,16 +144,11 @@ def pattern(self, op, input, skip, gamma, beta, bias, epsilon, stash_type): skip_sum_pattern_1 = op.Add(skip, input) skip_sum_pattern_2 = op.Add(input, skip) skip_sum = pattern.OrValue([skip_sum_pattern_1, skip_sum_pattern_2], name="skip_sum") - + # TODO: check if combo is missed. if self._has_bias and not self._bias_pre_add: skip_sum = op.Add(skip_sum, bias) normalized = op.LayerNormalization( - skip_sum, - gamma, - beta, - axis=-1, - epsilon=epsilon, - stash_type=stash_type, + skip_sum, gamma, beta, axis=-1, _outputs=["layer_norm"] ) return normalized, skip_sum @@ -170,8 +160,6 @@ def check( gamma, beta, bias, - epsilon, - stash_type, **_, ) -> pattern.MatchResult: # type: ignore[name-defined] """Check if the pattern matches conditions for use of SimplifiedLayerNormalization op.""" @@ -218,10 +206,10 @@ def rewrite( gamma, beta, bias, - epsilon, - stash_type, + layer_norm, **_, ): + epsilon = layer_norm.producer().attributes.get_float("epsilon") normalized, _mean, _inv_std_var, skip_sum = op.SkipLayerNormalization( input, skip, @@ -247,7 +235,6 @@ def rewrite( [_skip_layer_pre_add_bias_rule, _skip_layer_add_bias_rule, _skip_layer_rule] ) - fuse_skip_layer_normalization = _fusion_utils.apply_fusion_rules( skip_layer_normalization_ruleset ) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py index f7f5cc7612..533d9be0d4 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py @@ -30,6 +30,7 @@ def test_smollm(self): new_outputs = ort_run("optimized", model, inputs) assert_allclose(new_outputs, original_outputs) + # TODO: investigate, why precision drops. @unittest.skip("fixme: accuracy is not high") def test_whisper_encoder(self): whisper_encoder = whisper_encoder_test() @@ -61,6 +62,8 @@ def test_whisper_decoder(self): new_outputs = ort_run("optimized", model, inputs) assert_allclose(new_outputs, original_outputs) + # TODO: add more testcases with default attrs. + if __name__ == "__main__": unittest.main() From b5ffeac0dfa92e4f63377b70c4f8f9a05e211164 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 09:54:04 +0200 Subject: [PATCH 2/9] =?UTF-8?q?tests:=20add=20new=20BART=20encoder=20test?= =?UTF-8?q?=20model=E2=9A=A1=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ort_fusions/models/_bart_encoder.py | 707 ++++++++++++++++++ .../ort_fusions/skip_normalization_test.py | 15 +- 2 files changed, 721 insertions(+), 1 deletion(-) create mode 100644 onnxscript/rewriter/ort_fusions/models/_bart_encoder.py diff --git a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py new file mode 100644 index 0000000000..7dbef59193 --- /dev/null +++ b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py @@ -0,0 +1,707 @@ +""" +Onnxscript version of "hf-internal-testing_tiny-random-bart". + +See: https://huggingface.co/hf-internal-testing/tiny-random-bart +""" + +import numpy as np +from onnx import TensorProto +from onnx.helper import make_tensor + +import onnxscript.ir as ir +from onnxscript import script +from onnxscript.onnx_opset import opset20 +from onnxscript.onnx_types import FLOAT, INT64 + + +def make_model( + encoder_embed_tokens_weight, + encoder_embed_positions_weight, + encoder_layers_0_self_attn_k_proj_bias, + encoder_layers_0_self_attn_layer_norm_weight, + encoder_layers_0_fc1_bias, + MatMul_257, + MatMul_267, + MatMul_268, + MatMul_270, + MatMul_271, + MatMul_272, + MatMul_273, + MatMul_283, + MatMul_284, + MatMul_286, + MatMul_287, + MatMul_288, +): + @script() + def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: + encoder_layernorm_embedding_bias = opset20.Identity( + encoder_layers_0_self_attn_layer_norm_weight + ) + encoder_layernorm_embedding_weight = opset20.Identity( + encoder_layers_0_self_attn_layer_norm_weight + ) + + encoder_layers_1_final_layer_norm_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_1_final_layer_norm_weight = opset20.Identity( + encoder_layers_0_self_attn_layer_norm_weight + ) + + encoder_layers_1_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) + encoder_layers_1_fc1_bias = opset20.Identity(encoder_layers_0_fc1_bias) + encoder_layers_1_self_attn_layer_norm_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_1_self_attn_layer_norm_weight = opset20.Identity( + encoder_layers_0_self_attn_layer_norm_weight + ) + encoder_layers_1_self_attn_layer_norm_weight = opset20.Identity( + encoder_layers_1_self_attn_layer_norm_weight + ) + encoder_layers_1_self_attn_out_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_1_self_attn_q_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_1_self_attn_v_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_1_self_attn_k_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_0_final_layer_norm_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_0_final_layer_norm_weight = opset20.Identity( + encoder_layers_0_self_attn_layer_norm_weight + ) + encoder_layers_0_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) + encoder_layers_1_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) + encoder_layers_1_self_attn_layer_norm_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_0_self_attn_out_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_0_self_attn_q_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + encoder_layers_0_self_attn_v_proj_bias = opset20.Identity( + encoder_layers_0_self_attn_k_proj_bias + ) + + encoder_Shape_output_0 = opset20.Shape(input_ids) + encoder_Constant_output_0 = opset20.Constant(value_ints=1) + encoder_Gather_output_0 = opset20.Gather( + encoder_Shape_output_0, encoder_Constant_output_0 + ) + + encoder_Constant_1_output_0 = opset20.Constant(value_int=-1) + Unsqueeze_43 = opset20.Constant(int_values=[0]) + encoder_Unsqueeze_output_0 = opset20.Unsqueeze(encoder_Gather_output_0, Unsqueeze_43) + encoder_Concat_output_0 = opset20.Concat( + encoder_Constant_1_output_0, encoder_Unsqueeze_output_0, axis=0 + ) + encoder_Reshape_output_0 = opset20.Reshape( + input_ids, encoder_Concat_output_0, allowzero=0 + ) + encoder_embed_tokens_Gather_output_0 = opset20.Gather( + encoder_embed_tokens_weight, encoder_Reshape_output_0 + ) + encoder_embed_tokens_Constant_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_embed_tokens_Mul_output_0 = opset20.Mul( + encoder_embed_tokens_Gather_output_0, encoder_embed_tokens_Constant_output_0 + ) + encoder_embed_positions_Shape_output_0 = opset20.Shape(input_ids) + encoder_embed_positions_Constant_output_0 = opset20.Constant(value_int=0) + encoder_embed_positions_Gather_output_0 = opset20.Gather( + encoder_embed_positions_Shape_output_0, + encoder_embed_positions_Constant_output_0, + axis=0, + ) + encoder_embed_positions_Constant_1_output_0 = opset20.Constant(value_int=0) + encoder_embed_positions_Cast_output_0 = opset20.Cast(encoder_Gather_output_0, to=7) + encoder_embed_positions_Constant_2_output_0 = opset20.Constant(value_int=1) + encoder_embed_positions_Range_output_0 = opset20.Range( + encoder_embed_positions_Constant_1_output_0, + encoder_embed_positions_Cast_output_0, + encoder_embed_positions_Constant_2_output_0, + ) + encoder_embed_positions_Constant_3_output_0 = opset20.Constant(value_ints=[0]) + encoder_embed_positions_Unsqueeze_output_0 = opset20.Unsqueeze( + encoder_embed_positions_Gather_output_0, + encoder_embed_positions_Constant_3_output_0, + ) + encoder_embed_positions_Constant_4_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Concat_output_0 = opset20.Concat( + encoder_embed_positions_Unsqueeze_output_0, + encoder_embed_positions_Constant_4_output_0, + axis=0, + ) + encoder_embed_positions_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Reshape_output_0 = opset20.Reshape( + encoder_embed_positions_Concat_output_0, + encoder_embed_positions_Constant_5_output_0, + ) + encoder_embed_positions_Shape_1_output_0 = opset20.Shape( + encoder_embed_positions_Reshape_output_0 + ) + encoder_embed_positions_ConstantOfShape_output_0 = opset20.ConstantOfShape( + encoder_embed_positions_Shape_1_output_0, + value=make_tensor("onef", TensorProto.FLOAT, [1], [1]), + ) + encoder_embed_positions_Constant_6_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Mul_output_0 = opset20.Mul( + encoder_embed_positions_ConstantOfShape_output_0, + encoder_embed_positions_Constant_6_output_0, + ) + encoder_embed_positions_Equal_output_0 = opset20.Equal( + encoder_embed_positions_Reshape_output_0, encoder_embed_positions_Mul_output_0 + ) + encoder_embed_positions_Where_output_0 = opset20.Where( + encoder_embed_positions_Equal_output_0, + encoder_embed_positions_ConstantOfShape_output_0, + encoder_embed_positions_Reshape_output_0, + ) + encoder_embed_positions_Expand_output_0 = opset20.Expand( + encoder_embed_positions_Range_output_0, encoder_embed_positions_Where_output_0 + ) + encoder_embed_positions_Constant_7_output_0 = opset20.Constant(value_int=2) + encoder_embed_positions_Add_output_0 = opset20.Add( + encoder_embed_positions_Expand_output_0, + encoder_embed_positions_Constant_7_output_0, + ) + encoder_embed_positions_Gather_1_output_0 = opset20.Gather( + encoder_embed_positions_weight, encoder_embed_positions_Add_output_0 + ) + encoder_Cast_output_0 = opset20.Cast(encoder_embed_positions_Gather_1_output_0, to=1) + encoder_Add_output_0 = opset20.Add( + encoder_embed_tokens_Mul_output_0, encoder_Cast_output_0 + ) + encoder_layernorm_embedding_LayerNormalization_output_0 = opset20.LayerNormalization( + encoder_Add_output_0, + encoder_layernorm_embedding_weight, + encoder_layernorm_embedding_bias, + axis=-1, + epsilon=9.999999747378752e-06, + ) + encoder_layers_0_self_attn_Shape_output_0 = opset20.Shape( + encoder_layernorm_embedding_LayerNormalization_output_0 + ) + encoder_layers_0_self_attn_Constant_output_0 = opset20.Constant(value_int=0) + encoder_layers_0_self_attn_Gather_output_0 = opset20.Gather( + encoder_layers_0_self_attn_Shape_output_0, + encoder_layers_0_self_attn_Constant_output_0, + axis=0, + ) + encoder_layers_0_self_attn_Shape_1_output_0 = opset20.Shape( + encoder_layernorm_embedding_LayerNormalization_output_0 + ) + encoder_layers_0_self_attn_Constant_1_output_0 = opset20.Constant(value_int=1) + encoder_layers_0_self_attn_Gather_1_output_0 = opset20.Gather( + encoder_layers_0_self_attn_Shape_1_output_0, + encoder_layers_0_self_attn_Constant_1_output_0, + axis=0, + ) + encoder_layers_0_self_attn_q_proj_MatMul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_257 + ) + encoder_layers_0_self_attn_q_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_q_proj_bias, + encoder_layers_0_self_attn_q_proj_MatMul_output_0, + ) + Unsqueeze_88 = opset20.Constant(value_ints=[0]) + encoder_layers_0_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_88 + ) + encoder_layers_0_self_attn_Constant_2_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_0_self_attn_Constant_3_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_4_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Concat_output_0 = opset20.Concat( + encoder_layers_0_self_attn_Unsqueeze_output_0, + encoder_layers_0_self_attn_Constant_2_output_0, + encoder_layers_0_self_attn_Constant_3_output_0, + encoder_layers_0_self_attn_Constant_4_output_0, + axis=0, + ) + Unsqueeze_97 = opset20.Constant(value_ints=[1]) + encoder_layers_0_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_97 + ) + encoder_layers_0_self_attn_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_0_self_attn_Constant_6_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_7_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Concat_1_output_0 = opset20.Concat( + encoder_layers_0_self_attn_Unsqueeze_1_output_0, + encoder_layers_0_self_attn_Constant_5_output_0, + encoder_layers_0_self_attn_Constant_6_output_0, + encoder_layers_0_self_attn_Constant_7_output_0, + axis=0, + ) + Unsqueeze_106 = opset20.Constant(value_ints=[1]) + encoder_layers_0_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_106 + ) + encoder_layers_0_self_attn_Constant_8_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_0_self_attn_Constant_9_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_10_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Concat_2_output_0 = opset20.Concat( + encoder_layers_0_self_attn_Unsqueeze_2_output_0, + encoder_layers_0_self_attn_Constant_8_output_0, + encoder_layers_0_self_attn_Constant_9_output_0, + encoder_layers_0_self_attn_Constant_10_output_0, + axis=0, + ) + + encoder_layers_0_self_attn_Reshape_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_q_proj_Add_output_0, + encoder_layers_0_self_attn_Concat_output_0, + allowzero=0, + ) + encoder_layers_0_self_attn_Transpose_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_Reshape_output_0, perm=[0, 2, 1, 3] + ) + encoder_layers_0_self_attn_k_proj_MatMul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_267 + ) + encoder_layers_0_self_attn_k_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_k_proj_bias, + encoder_layers_0_self_attn_k_proj_MatMul_output_0, + ) + encoder_layers_0_self_attn_v_proj_MatMul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_268 + ) + encoder_layers_0_self_attn_v_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_v_proj_bias, + encoder_layers_0_self_attn_v_proj_MatMul_output_0, + ) + encoder_layers_0_self_attn_Reshape_1_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_k_proj_Add_output_0, + encoder_layers_0_self_attn_Concat_1_output_0, + allowzero=0, + ) + encoder_layers_0_self_attn_Reshape_2_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_v_proj_Add_output_0, + encoder_layers_0_self_attn_Concat_2_output_0, + allowzero=0, + ) + encoder_layers_0_self_attn_Transpose_1_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_Reshape_2_output_0, perm=[0, 2, 1, 3] + ) + encoder_layers_0_self_attn_Shape_2_output_0 = opset20.Shape( + encoder_layers_0_self_attn_Transpose_output_0 + ) + encoder_layers_0_self_attn_Constant_11_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_0_self_attn_Constant_12_output_0 = opset20.Constant( + value_ints=[9223372036854775807] + ) + encoder_layers_0_self_attn_Slice_output_0 = opset20.Slice( + encoder_layers_0_self_attn_Shape_2_output_0, + encoder_layers_0_self_attn_Constant_11_output_0, + encoder_layers_0_self_attn_Constant_12_output_0, + ) + encoder_layers_0_self_attn_Cast_output_0 = opset20.Cast( + encoder_layers_0_self_attn_Slice_output_0, to=1 + ) + encoder_layers_0_self_attn_Sqrt_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_Cast_output_0 + ) + encoder_layers_0_self_attn_Constant_13_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_layers_0_self_attn_Div_output_0 = opset20.Div( + encoder_layers_0_self_attn_Constant_13_output_0, + encoder_layers_0_self_attn_Sqrt_output_0, + ) + encoder_layers_0_self_attn_Cast_1_output_0 = opset20.Cast( + encoder_layers_0_self_attn_Div_output_0, to=1 + ) + encoder_layers_0_self_attn_Transpose_2_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_Reshape_1_output_0, perm=[0, 2, 3, 1] + ) + encoder_layers_0_self_attn_Sqrt_1_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_Cast_1_output_0 + ) + encoder_layers_0_self_attn_Mul_output_0 = opset20.Mul( + encoder_layers_0_self_attn_Transpose_output_0, + encoder_layers_0_self_attn_Sqrt_1_output_0, + ) + encoder_layers_0_self_attn_Sqrt_2_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_Cast_1_output_0 + ) + encoder_layers_0_self_attn_Mul_1_output_0 = opset20.Mul( + encoder_layers_0_self_attn_Transpose_2_output_0, + encoder_layers_0_self_attn_Sqrt_2_output_0, + ) + encoder_layers_0_self_attn_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_Mul_output_0, encoder_layers_0_self_attn_Mul_1_output_0 + ) + encoder_layers_0_self_attn_Softmax_output_0 = opset20.Softmax( + encoder_layers_0_self_attn_MatMul_output_0, axis=-1 + ) + encoder_layers_0_self_attn_MatMul_1_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_Softmax_output_0, + encoder_layers_0_self_attn_Transpose_1_output_0, + ) + encoder_layers_0_self_attn_Transpose_3_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] + ) + Unsqueeze_145 = opset20.Constant(value_ints=[0]) + encoder_layers_0_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_145 + ) + Unsqueeze_147 = opset20.Constant(value_ints=[0]) + encoder_layers_0_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_Gather_1_output_0, Unsqueeze_147 + ) + encoder_layers_0_self_attn_Constant_14_output_0 = opset20.Constant(value_ints=[16]) + encoder_layers_0_self_attn_Concat_3_output_0 = opset20.Concat( + encoder_layers_0_self_attn_Unsqueeze_3_output_0, + encoder_layers_0_self_attn_Unsqueeze_4_output_0, + encoder_layers_0_self_attn_Constant_14_output_0, + axis=0, + ) + encoder_layers_0_self_attn_Reshape_3_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_Transpose_3_output_0, + encoder_layers_0_self_attn_Concat_3_output_0, + allowzero=0, + ) + encoder_layers_0_self_attn_out_proj_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_Reshape_3_output_0, MatMul_270 + ) + encoder_layers_0_self_attn_out_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_out_proj_bias, + encoder_layers_0_self_attn_out_proj_MatMul_output_0, + ) + encoder_layers_0_Add_output_0 = opset20.Add( + encoder_layernorm_embedding_LayerNormalization_output_0, + encoder_layers_0_self_attn_out_proj_Add_output_0, + ) + encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0 = ( + opset20.LayerNormalization( + encoder_layers_0_Add_output_0, + encoder_layers_0_self_attn_layer_norm_weight, + axis=-1, + epsilon=9.999999747378752e-0, + ) + ) + encoder_layers_0_fc1_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0, MatMul_271 + ) + encoder_layers_0_fc1_Add_output_0 = opset20.Add( + encoder_layers_0_fc1_bias, encoder_layers_0_fc1_MatMul_output_0 + ) + encoder_layers_0_activation_fn_Gelu_output_0 = opset20.Gelu( + encoder_layers_0_fc1_Add_output_0, approximate="none" + ) + encoder_layers_0_fc2_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_activation_fn_Gelu_output_0, MatMul_272 + ) + encoder_layers_0_fc2_Add_output_0 = opset20.Add( + encoder_layers_0_fc2_bias, encoder_layers_0_fc2_MatMul_output_0 + ) + encoder_layers_0_Add_1_output_0 = opset20.Add( + encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0, + encoder_layers_0_fc2_Add_output_0, + ) + encoder_layers_0_final_layer_norm_LayerNormalization_output_0 = ( + opset20.LayerNormalization( + encoder_layers_0_Add_1_output_0, + encoder_layers_0_final_layer_norm_weight, + encoder_layers_0_final_layer_norm_bias, + axis=-1, + epsilon=9.999999747378752e-06, + ) + ) + encoder_layers_1_self_attn_Shape_output_0 = opset20.Shape( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0 + ) + encoder_layers_1_self_attn_Constant_output_0 = opset20.Constant(value_int=0) + encoder_layers_1_self_attn_Gather_output_0 = opset20.Gather( + encoder_layers_1_self_attn_Shape_output_0, + encoder_layers_1_self_attn_Constant_output_0, + axis=0, + ) + encoder_layers_1_self_attn_Shape_1_output_0 = opset20.Shape( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0 + ) + encoder_layers_1_self_attn_Constant_1_output_0 = opset20.Constant(value_int=1) + encoder_layers_1_self_attn_Gather_1_output_0 = opset20.Gather( + encoder_layers_1_self_attn_Shape_1_output_0, + encoder_layers_1_self_attn_Constant_1_output_0, + axis=0, + ) + encoder_layers_1_self_attn_q_proj_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_273 + ) + encoder_layers_1_self_attn_q_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_q_proj_bias, + encoder_layers_1_self_attn_q_proj_MatMul_output_0, + ) + Unsqueeze_176 = opset20.Constant(value_ints=[0]) + encoder_layers_1_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_176 + ) + encoder_layers_1_self_attn_Constant_2_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_1_self_attn_Constant_3_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Constant_4_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Concat_output_0 = opset20.Concat( + encoder_layers_1_self_attn_Unsqueeze_output_0, + encoder_layers_1_self_attn_Constant_2_output_0, + encoder_layers_1_self_attn_Constant_3_output_0, + encoder_layers_1_self_attn_Constant_4_output_0, + axis=0, + ) + Unsqueeze_185 = opset20.Constant(value_ints=[0]) + encoder_layers_1_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_185 + ) + encoder_layers_1_self_attn_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_1_self_attn_Constant_6_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Constant_7_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Concat_1_output_0 = opset20.Constant( + encoder_layers_1_self_attn_Unsqueeze_1_output_0, + encoder_layers_1_self_attn_Constant_5_output_0, + encoder_layers_1_self_attn_Constant_6_output_0, + encoder_layers_1_self_attn_Constant_7_output_0, + axis=0, + ) + Unsqueeze_194 = opset20.Constant(value_ints=[0]) + encoder_layers_1_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_194 + ) + encoder_layers_1_self_attn_Constant_8_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_1_self_attn_Constant_9_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Constant_10_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Concat_2_output_0 = opset20.Concat( + encoder_layers_1_self_attn_Unsqueeze_2_output_0, + encoder_layers_1_self_attn_Constant_8_output_0, + encoder_layers_1_self_attn_Constant_9_output_0, + encoder_layers_1_self_attn_Constant_10_output_0, + axis=0, + ) + encoder_layers_1_self_attn_Reshape_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_q_proj_Add_output_0, + encoder_layers_1_self_attn_Concat_output_0, + allowzero=0, + ) + encoder_layers_1_self_attn_Transpose_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_Reshape_output_0, perm=[0, 2, 1, 3] + ) + encoder_layers_1_self_attn_k_proj_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_283 + ) + encoder_layers_1_self_attn_k_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_k_proj_bias, + encoder_layers_1_self_attn_k_proj_MatMul_output_0, + ) + encoder_layers_1_self_attn_v_proj_MatMul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_284 + ) + encoder_layers_1_self_attn_v_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_v_proj_bias, + encoder_layers_1_self_attn_v_proj_MatMul_output_0, + ) + encoder_layers_1_self_attn_Reshape_1_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_k_proj_Add_output_0, + encoder_layers_1_self_attn_Concat_1_output_0, + allowzero=0, + ) + encoder_layers_1_self_attn_Reshape_2_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_v_proj_Add_output_0, + encoder_layers_1_self_attn_Concat_2_output_0, + allowzero=0, + ) + encoder_layers_1_self_attn_Transpose_1_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_Reshape_2_output_0, perm=[0, 2, 1, 3] + ) + encoder_layers_1_self_attn_Shape_2_output_0 = opset20.Shape( + encoder_layers_1_self_attn_Transpose_output_0 + ) + encoder_layers_1_self_attn_Constant_11_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_1_self_attn_Constant_12_output_0 = opset20.Constant( + value_ints=[9223372036854775807] + ) + encoder_layers_1_self_attn_Slice_output_0 = opset20.Slice( + encoder_layers_1_self_attn_Shape_2_output_0, + encoder_layers_1_self_attn_Constant_11_output_0, + encoder_layers_1_self_attn_Constant_12_output_0, + ) + encoder_layers_1_self_attn_Cast_output_0 = opset20.Cast( + encoder_layers_1_self_attn_Slice_output_0, to=1 + ) + encoder_layers_1_self_attn_Sqrt_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_Cast_output_0 + ) + encoder_layers_1_self_attn_Constant_13_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_layers_1_self_attn_Div_output_0 = opset20.Div( + encoder_layers_1_self_attn_Constant_13_output_0, + encoder_layers_1_self_attn_Sqrt_output_0, + ) + encoder_layers_1_self_attn_Cast_1_output_0 = opset20.Cast( + encoder_layers_1_self_attn_Div_output_0, to=1 + ) + encoder_layers_1_self_attn_Transpose_2_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_Reshape_1_output_0, perm=[0, 2, 3, 1] + ) + encoder_layers_1_self_attn_Sqrt_1_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_Cast_1_output_0 + ) + encoder_layers_1_self_attn_Mul_output_0 = opset20.Mul( + encoder_layers_1_self_attn_Transpose_output_0, + encoder_layers_1_self_attn_Sqrt_1_output_0, + ) + encoder_layers_1_self_attn_Sqrt_2_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_Cast_1_output_0 + ) + encoder_layers_1_self_attn_Mul_1_output_0 = opset20.Mul( + encoder_layers_1_self_attn_Transpose_2_output_0, + encoder_layers_1_self_attn_Sqrt_2_output_0, + ) + encoder_layers_1_self_attn_MatMul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_Mul_output_0, encoder_layers_1_self_attn_Mul_1_output_0 + ) + encoder_layers_1_self_attn_Softmax_output_0 = opset20.Softmax( + encoder_layers_1_self_attn_MatMul_output_0, axis=-1 + ) + encoder_layers_1_self_attn_MatMul_1_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_Softmax_output_0, + encoder_layers_1_self_attn_Transpose_1_output_0, + ) + encoder_layers_1_self_attn_Transpose_3_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] + ) + Unsqueeze_232 = opset20.Constant(int_values=[0]) + encoder_layers_1_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_232 + ) + Unsqueeze_234 = opset20.Constant(int_values=[0]) + encoder_layers_1_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_Gather_1_output_0, Unsqueeze_234 + ) + encoder_layers_1_self_attn_Constant_14_output_0 = opset20.Constant(value_ints=[16]) + + encoder_layers_1_self_attn_Concat_3_output_0 = opset20.Concat( + encoder_layers_1_self_attn_Unsqueeze_3_output_0, + encoder_layers_1_self_attn_Unsqueeze_4_output_0, + encoder_layers_1_self_attn_Constant_14_output_0, + axis=0, + ) + encoder_layers_1_self_attn_Reshape_3_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_Transpose_3_output_0, + encoder_layers_1_self_attn_Concat_3_output_0, + allowzero=0, + ) + encoder_layers_1_self_attn_out_proj_MatMul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_Reshape_3_output_0, MatMul_286 + ) + encoder_layers_1_self_attn_out_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_out_proj_bias, + encoder_layers_1_self_attn_out_proj_MatMul_output_0, + ) + encoder_layers_1_Add_output_0 = opset20.Add( + encoder_layers_0_final_layer_norm_LayerNormalization_output_0, + encoder_layers_1_self_attn_out_proj_Add_output_0, + ) + encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0 = ( + opset20.LayerNormalization( + encoder_layers_1_Add_output_0, + encoder_layers_1_self_attn_layer_norm_weight, + encoder_layers_1_self_attn_layer_norm_bias, + axis=-1, + epsilon=9.999999747378752e-06, + ) + ) + encoder_layers_1_fc1_MatMul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0, MatMul_287 + ) + encoder_layers_1_fc1_Add_output_0 = opset20.Add( + encoder_layers_1_fc1_bias, encoder_layers_1_fc1_MatMul_output_0 + ) + encoder_layers_1_activation_fn_Gelu_output_0 = opset20.Gelu( + encoder_layers_1_fc1_Add_output_0, approximate="none" + ) + encoder_layers_1_fc2_MatMul_output_0 = opset20.MatMul( + encoder_layers_1_activation_fn_Gelu_output_0, MatMul_288 + ) + encoder_layers_1_fc2_Add_output_0 = opset20.Add( + encoder_layers_1_fc2_bias, encoder_layers_1_fc2_MatMul_output_0 + ) + encoder_layers_1_Add_1_output_0 = opset20.Add( + encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0, + encoder_layers_1_fc2_Add_output_0, + ) + encoder_output = opset20.LayerNormalization( + encoder_layers_1_Add_1_output_0, + encoder_layers_1_final_layer_norm_weight, + encoder_layers_1_final_layer_norm_bias, + axis=-1, + epsilon=9.999999747378752e-06, + ) + return encoder_output + + return main_graph.to_model_proto() + + +def make_model_with_random_weights(): + encoder_embed_tokens_weight = np.random.rand(1000, 16).astype(np.float32) + encoder_embed_positions_weight = np.random.rand(102, 16).astype(np.float32) + encoder_layers_0_self_attn_k_proj_bias = np.random.rand(16).astype(np.float32) + encoder_layers_0_self_attn_layer_norm_weight = np.random.rand(16).astype(np.float32) + encoder_layers_0_fc1_bias = np.zeros((4), dtype=np.float32) + + MatMul_257 = np.random.rand(16, 16).astype(np.float32) + MatMul_267 = np.random.rand(16, 16).astype(np.float32) + MatMul_268 = np.random.rand(16, 16).astype(np.float32) + MatMul_270 = np.random.rand(16, 16).astype(np.float32) + MatMul_271 = np.random.rand(16, 4).astype(np.float32) + MatMul_272 = np.random.rand(4, 16).astype(np.float32) + MatMul_273 = np.random.rand(16, 16).astype(np.float32) + MatMul_283 = np.random.rand(16, 16).astype(np.float32) + MatMul_284 = np.random.rand(16, 16).astype(np.float32) + MatMul_286 = np.random.rand(16, 16).astype(np.float32) + MatMul_287 = np.random.rand(16, 16).astype(np.float32) + MatMul_288 = np.random.rand(16, 16).astype(np.float32) + + model = make_model( + encoder_embed_positions_weight=encoder_embed_positions_weight, + encoder_embed_tokens_weight=encoder_embed_tokens_weight, + encoder_layers_0_self_attn_k_proj_bias=encoder_layers_0_self_attn_k_proj_bias, + encoder_layers_0_self_attn_layer_norm_weight=encoder_layers_0_self_attn_layer_norm_weight, + encoder_layers_0_fc1_bias=encoder_layers_0_fc1_bias, + MatMul_257=MatMul_257, + MatMul_267=MatMul_267, + MatMul_268=MatMul_268, + MatMul_270=MatMul_270, + MatMul_271=MatMul_271, + MatMul_272=MatMul_272, + MatMul_273=MatMul_273, + MatMul_283=MatMul_283, + MatMul_284=MatMul_284, + MatMul_286=MatMul_286, + MatMul_287=MatMul_287, + MatMul_288=MatMul_288, + ) + return model + + +class _BartEncoderTest: + def get_onnx_model(self): + if not hasattr(self, "_onnx_model"): + model_proto = make_model_with_random_weights() + model = ir.serde.deserialize_model(model_proto) + self._onnx_model = model + return self._onnx_model + + def get_ort_inputs(self): + if not hasattr(self, "_ort_inputs"): + inputs = { + "input_ids": np.random.randint(0, 49152, (1, 16)).astype(np.int64), + } + self._ort_inputs = inputs + return self._ort_inputs + + +def bart_encoder_test(): + return _BartEncoderTest() diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py index 533d9be0d4..ce824e3c71 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py @@ -62,7 +62,20 @@ def test_whisper_decoder(self): new_outputs = ort_run("optimized", model, inputs) assert_allclose(new_outputs, original_outputs) - # TODO: add more testcases with default attrs. + def test_bart_encoder(self): + bart_encoder = whisper_decoder_test() + model = bart_encoder.get_onnx_model() + onnxscript.optimizer.optimize(model) + + inputs = bart_encoder.get_ort_inputs() + original_outputs = ort_run("original", model, inputs) + + fuse_skip_layer_normalization(model) + op_types = [n.op_type for n in model.graph] + self.assertIn("SkipLayerNormalization", op_types) + self.assertEqual(op_types.count("SkipLayerNormalization"), 4) + new_outputs = ort_run("optimized", model, inputs) + assert_allclose(new_outputs, original_outputs) if __name__ == "__main__": From 87122e84b2f5d95f79812d767abe3311b08e7cba Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 11:18:56 +0200 Subject: [PATCH 3/9] =?UTF-8?q?test:=20fix=20typos=20in=20BART=20encoder?= =?UTF-8?q?=20test=20model=E2=9A=A1=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ort_fusions/models/_bart_encoder.py | 118 +++++++++--------- .../ort_fusions/skip_normalization_test.py | 3 +- 2 files changed, 61 insertions(+), 60 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py index 7dbef59193..3364d4fa24 100644 --- a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py +++ b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py @@ -34,7 +34,7 @@ def make_model( MatMul_288, ): @script() - def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: + def main_graph(input_ids: INT64[1, None]) -> FLOAT[None]: encoder_layernorm_embedding_bias = opset20.Identity( encoder_layers_0_self_attn_layer_norm_weight ) @@ -94,13 +94,13 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: ) encoder_Shape_output_0 = opset20.Shape(input_ids) - encoder_Constant_output_0 = opset20.Constant(value_ints=1) + encoder_Constant_output_0 = opset20.Constant(value=1) encoder_Gather_output_0 = opset20.Gather( encoder_Shape_output_0, encoder_Constant_output_0 ) - encoder_Constant_1_output_0 = opset20.Constant(value_int=-1) - Unsqueeze_43 = opset20.Constant(int_values=[0]) + encoder_Constant_1_output_0 = opset20.Constant(value=[-1]) + Unsqueeze_43 = opset20.Constant(value=[0]) encoder_Unsqueeze_output_0 = opset20.Unsqueeze(encoder_Gather_output_0, Unsqueeze_43) encoder_Concat_output_0 = opset20.Concat( encoder_Constant_1_output_0, encoder_Unsqueeze_output_0, axis=0 @@ -111,37 +111,37 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_embed_tokens_Gather_output_0 = opset20.Gather( encoder_embed_tokens_weight, encoder_Reshape_output_0 ) - encoder_embed_tokens_Constant_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_embed_tokens_Constant_output_0 = opset20.Constant(value=[1.0]) encoder_embed_tokens_Mul_output_0 = opset20.Mul( encoder_embed_tokens_Gather_output_0, encoder_embed_tokens_Constant_output_0 ) encoder_embed_positions_Shape_output_0 = opset20.Shape(input_ids) - encoder_embed_positions_Constant_output_0 = opset20.Constant(value_int=0) + encoder_embed_positions_Constant_output_0 = opset20.Constant(value=0) encoder_embed_positions_Gather_output_0 = opset20.Gather( encoder_embed_positions_Shape_output_0, encoder_embed_positions_Constant_output_0, axis=0, ) - encoder_embed_positions_Constant_1_output_0 = opset20.Constant(value_int=0) + encoder_embed_positions_Constant_1_output_0 = opset20.Constant(value=0) encoder_embed_positions_Cast_output_0 = opset20.Cast(encoder_Gather_output_0, to=7) - encoder_embed_positions_Constant_2_output_0 = opset20.Constant(value_int=1) + encoder_embed_positions_Constant_2_output_0 = opset20.Constant(value=1) encoder_embed_positions_Range_output_0 = opset20.Range( encoder_embed_positions_Constant_1_output_0, encoder_embed_positions_Cast_output_0, encoder_embed_positions_Constant_2_output_0, ) - encoder_embed_positions_Constant_3_output_0 = opset20.Constant(value_ints=[0]) + encoder_embed_positions_Constant_3_output_0 = opset20.Constant(value=[0]) encoder_embed_positions_Unsqueeze_output_0 = opset20.Unsqueeze( encoder_embed_positions_Gather_output_0, encoder_embed_positions_Constant_3_output_0, ) - encoder_embed_positions_Constant_4_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Constant_4_output_0 = opset20.Constant(value=[-1]) encoder_embed_positions_Concat_output_0 = opset20.Concat( encoder_embed_positions_Unsqueeze_output_0, encoder_embed_positions_Constant_4_output_0, axis=0, ) - encoder_embed_positions_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Constant_5_output_0 = opset20.Constant(value=[-1]) encoder_embed_positions_Reshape_output_0 = opset20.Reshape( encoder_embed_positions_Concat_output_0, encoder_embed_positions_Constant_5_output_0, @@ -151,9 +151,9 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: ) encoder_embed_positions_ConstantOfShape_output_0 = opset20.ConstantOfShape( encoder_embed_positions_Shape_1_output_0, - value=make_tensor("onef", TensorProto.FLOAT, [1], [1]), + value=make_tensor("onef", TensorProto.INT64, [1], [1]), ) - encoder_embed_positions_Constant_6_output_0 = opset20.Constant(value_ints=[-1]) + encoder_embed_positions_Constant_6_output_0 = opset20.Constant(value=[-1]) encoder_embed_positions_Mul_output_0 = opset20.Mul( encoder_embed_positions_ConstantOfShape_output_0, encoder_embed_positions_Constant_6_output_0, @@ -169,7 +169,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_embed_positions_Expand_output_0 = opset20.Expand( encoder_embed_positions_Range_output_0, encoder_embed_positions_Where_output_0 ) - encoder_embed_positions_Constant_7_output_0 = opset20.Constant(value_int=2) + encoder_embed_positions_Constant_7_output_0 = opset20.Constant(value=2) encoder_embed_positions_Add_output_0 = opset20.Add( encoder_embed_positions_Expand_output_0, encoder_embed_positions_Constant_7_output_0, @@ -191,7 +191,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Shape_output_0 = opset20.Shape( encoder_layernorm_embedding_LayerNormalization_output_0 ) - encoder_layers_0_self_attn_Constant_output_0 = opset20.Constant(value_int=0) + encoder_layers_0_self_attn_Constant_output_0 = opset20.Constant(value=0) encoder_layers_0_self_attn_Gather_output_0 = opset20.Gather( encoder_layers_0_self_attn_Shape_output_0, encoder_layers_0_self_attn_Constant_output_0, @@ -200,7 +200,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Shape_1_output_0 = opset20.Shape( encoder_layernorm_embedding_LayerNormalization_output_0 ) - encoder_layers_0_self_attn_Constant_1_output_0 = opset20.Constant(value_int=1) + encoder_layers_0_self_attn_Constant_1_output_0 = opset20.Constant(value=1) encoder_layers_0_self_attn_Gather_1_output_0 = opset20.Gather( encoder_layers_0_self_attn_Shape_1_output_0, encoder_layers_0_self_attn_Constant_1_output_0, @@ -213,13 +213,13 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_q_proj_bias, encoder_layers_0_self_attn_q_proj_MatMul_output_0, ) - Unsqueeze_88 = opset20.Constant(value_ints=[0]) + Unsqueeze_88 = opset20.Constant(value=[0]) encoder_layers_0_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_88 ) - encoder_layers_0_self_attn_Constant_2_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_0_self_attn_Constant_3_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_0_self_attn_Constant_4_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_2_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_Constant_3_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_Constant_4_output_0 = opset20.Constant(value=[4]) encoder_layers_0_self_attn_Concat_output_0 = opset20.Concat( encoder_layers_0_self_attn_Unsqueeze_output_0, encoder_layers_0_self_attn_Constant_2_output_0, @@ -227,13 +227,13 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Constant_4_output_0, axis=0, ) - Unsqueeze_97 = opset20.Constant(value_ints=[1]) + Unsqueeze_97 = opset20.Constant(value=[0]) encoder_layers_0_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_97 ) - encoder_layers_0_self_attn_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_0_self_attn_Constant_6_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_0_self_attn_Constant_7_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_5_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_Constant_6_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_Constant_7_output_0 = opset20.Constant(value=[4]) encoder_layers_0_self_attn_Concat_1_output_0 = opset20.Concat( encoder_layers_0_self_attn_Unsqueeze_1_output_0, encoder_layers_0_self_attn_Constant_5_output_0, @@ -241,13 +241,13 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Constant_7_output_0, axis=0, ) - Unsqueeze_106 = opset20.Constant(value_ints=[1]) + Unsqueeze_106 = opset20.Constant(value=[0]) encoder_layers_0_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_106 ) - encoder_layers_0_self_attn_Constant_8_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_0_self_attn_Constant_9_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_0_self_attn_Constant_10_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_0_self_attn_Constant_8_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_Constant_9_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_Constant_10_output_0 = opset20.Constant(value=[4]) encoder_layers_0_self_attn_Concat_2_output_0 = opset20.Concat( encoder_layers_0_self_attn_Unsqueeze_2_output_0, encoder_layers_0_self_attn_Constant_8_output_0, @@ -294,9 +294,9 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Shape_2_output_0 = opset20.Shape( encoder_layers_0_self_attn_Transpose_output_0 ) - encoder_layers_0_self_attn_Constant_11_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_0_self_attn_Constant_11_output_0 = opset20.Constant(value=[-1]) encoder_layers_0_self_attn_Constant_12_output_0 = opset20.Constant( - value_ints=[9223372036854775807] + value=[9223372036854775807] ) encoder_layers_0_self_attn_Slice_output_0 = opset20.Slice( encoder_layers_0_self_attn_Shape_2_output_0, @@ -309,7 +309,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Sqrt_output_0 = opset20.Sqrt( encoder_layers_0_self_attn_Cast_output_0 ) - encoder_layers_0_self_attn_Constant_13_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_layers_0_self_attn_Constant_13_output_0 = opset20.Constant(value=[1.0]) encoder_layers_0_self_attn_Div_output_0 = opset20.Div( encoder_layers_0_self_attn_Constant_13_output_0, encoder_layers_0_self_attn_Sqrt_output_0, @@ -347,15 +347,15 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_0_self_attn_Transpose_3_output_0 = opset20.Transpose( encoder_layers_0_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] ) - Unsqueeze_145 = opset20.Constant(value_ints=[0]) + Unsqueeze_145 = opset20.Constant(value=[0]) encoder_layers_0_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_145 ) - Unsqueeze_147 = opset20.Constant(value_ints=[0]) + Unsqueeze_147 = opset20.Constant(value=[0]) encoder_layers_0_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( encoder_layers_0_self_attn_Gather_1_output_0, Unsqueeze_147 ) - encoder_layers_0_self_attn_Constant_14_output_0 = opset20.Constant(value_ints=[16]) + encoder_layers_0_self_attn_Constant_14_output_0 = opset20.Constant(value=[16]) encoder_layers_0_self_attn_Concat_3_output_0 = opset20.Concat( encoder_layers_0_self_attn_Unsqueeze_3_output_0, encoder_layers_0_self_attn_Unsqueeze_4_output_0, @@ -417,7 +417,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Shape_output_0 = opset20.Shape( encoder_layers_0_final_layer_norm_LayerNormalization_output_0 ) - encoder_layers_1_self_attn_Constant_output_0 = opset20.Constant(value_int=0) + encoder_layers_1_self_attn_Constant_output_0 = opset20.Constant(value=0) encoder_layers_1_self_attn_Gather_output_0 = opset20.Gather( encoder_layers_1_self_attn_Shape_output_0, encoder_layers_1_self_attn_Constant_output_0, @@ -426,7 +426,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Shape_1_output_0 = opset20.Shape( encoder_layers_0_final_layer_norm_LayerNormalization_output_0 ) - encoder_layers_1_self_attn_Constant_1_output_0 = opset20.Constant(value_int=1) + encoder_layers_1_self_attn_Constant_1_output_0 = opset20.Constant(value=1) encoder_layers_1_self_attn_Gather_1_output_0 = opset20.Gather( encoder_layers_1_self_attn_Shape_1_output_0, encoder_layers_1_self_attn_Constant_1_output_0, @@ -439,13 +439,13 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_q_proj_bias, encoder_layers_1_self_attn_q_proj_MatMul_output_0, ) - Unsqueeze_176 = opset20.Constant(value_ints=[0]) + Unsqueeze_176 = opset20.Constant(value=[0]) encoder_layers_1_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_176 ) - encoder_layers_1_self_attn_Constant_2_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_1_self_attn_Constant_3_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_1_self_attn_Constant_4_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Constant_2_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_Constant_3_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_Constant_4_output_0 = opset20.Constant(value=[4]) encoder_layers_1_self_attn_Concat_output_0 = opset20.Concat( encoder_layers_1_self_attn_Unsqueeze_output_0, encoder_layers_1_self_attn_Constant_2_output_0, @@ -453,27 +453,27 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Constant_4_output_0, axis=0, ) - Unsqueeze_185 = opset20.Constant(value_ints=[0]) + Unsqueeze_185 = opset20.Constant(value=[0]) encoder_layers_1_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_185 ) - encoder_layers_1_self_attn_Constant_5_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_1_self_attn_Constant_6_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_1_self_attn_Constant_7_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_1_self_attn_Concat_1_output_0 = opset20.Constant( + encoder_layers_1_self_attn_Constant_5_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_Constant_6_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_Constant_7_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_Concat_1_output_0 = opset20.Concat( encoder_layers_1_self_attn_Unsqueeze_1_output_0, encoder_layers_1_self_attn_Constant_5_output_0, encoder_layers_1_self_attn_Constant_6_output_0, encoder_layers_1_self_attn_Constant_7_output_0, axis=0, ) - Unsqueeze_194 = opset20.Constant(value_ints=[0]) + Unsqueeze_194 = opset20.Constant(value=[0]) encoder_layers_1_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_194 ) - encoder_layers_1_self_attn_Constant_8_output_0 = opset20.Constant(value_ints=[-1]) - encoder_layers_1_self_attn_Constant_9_output_0 = opset20.Constant(value_ints=[4]) - encoder_layers_1_self_attn_Constant_10_output_0 = opset20.Constant(value_ints=[4]) + encoder_layers_1_self_attn_Constant_8_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_Constant_9_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_Constant_10_output_0 = opset20.Constant(value=[4]) encoder_layers_1_self_attn_Concat_2_output_0 = opset20.Concat( encoder_layers_1_self_attn_Unsqueeze_2_output_0, encoder_layers_1_self_attn_Constant_8_output_0, @@ -519,9 +519,9 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Shape_2_output_0 = opset20.Shape( encoder_layers_1_self_attn_Transpose_output_0 ) - encoder_layers_1_self_attn_Constant_11_output_0 = opset20.Constant(value_ints=[-1]) + encoder_layers_1_self_attn_Constant_11_output_0 = opset20.Constant(value=[-1]) encoder_layers_1_self_attn_Constant_12_output_0 = opset20.Constant( - value_ints=[9223372036854775807] + value=[9223372036854775807] ) encoder_layers_1_self_attn_Slice_output_0 = opset20.Slice( encoder_layers_1_self_attn_Shape_2_output_0, @@ -534,7 +534,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Sqrt_output_0 = opset20.Sqrt( encoder_layers_1_self_attn_Cast_output_0 ) - encoder_layers_1_self_attn_Constant_13_output_0 = opset20.Constant(value_floats=[1.0]) + encoder_layers_1_self_attn_Constant_13_output_0 = opset20.Constant(value=[1.0]) encoder_layers_1_self_attn_Div_output_0 = opset20.Div( encoder_layers_1_self_attn_Constant_13_output_0, encoder_layers_1_self_attn_Sqrt_output_0, @@ -572,15 +572,15 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: encoder_layers_1_self_attn_Transpose_3_output_0 = opset20.Transpose( encoder_layers_1_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] ) - Unsqueeze_232 = opset20.Constant(int_values=[0]) + Unsqueeze_232 = opset20.Constant(value=[0]) encoder_layers_1_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_232 ) - Unsqueeze_234 = opset20.Constant(int_values=[0]) + Unsqueeze_234 = opset20.Constant(value=[0]) encoder_layers_1_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( encoder_layers_1_self_attn_Gather_1_output_0, Unsqueeze_234 ) - encoder_layers_1_self_attn_Constant_14_output_0 = opset20.Constant(value_ints=[16]) + encoder_layers_1_self_attn_Constant_14_output_0 = opset20.Constant(value=[16]) encoder_layers_1_self_attn_Concat_3_output_0 = opset20.Concat( encoder_layers_1_self_attn_Unsqueeze_3_output_0, @@ -645,7 +645,7 @@ def main_graph(input_ids: INT64[1, 16]) -> FLOAT[None]: def make_model_with_random_weights(): - encoder_embed_tokens_weight = np.random.rand(1000, 16).astype(np.float32) + encoder_embed_tokens_weight = np.random.rand(1024, 16).astype(np.float32) encoder_embed_positions_weight = np.random.rand(102, 16).astype(np.float32) encoder_layers_0_self_attn_k_proj_bias = np.random.rand(16).astype(np.float32) encoder_layers_0_self_attn_layer_norm_weight = np.random.rand(16).astype(np.float32) @@ -661,8 +661,8 @@ def make_model_with_random_weights(): MatMul_283 = np.random.rand(16, 16).astype(np.float32) MatMul_284 = np.random.rand(16, 16).astype(np.float32) MatMul_286 = np.random.rand(16, 16).astype(np.float32) - MatMul_287 = np.random.rand(16, 16).astype(np.float32) - MatMul_288 = np.random.rand(16, 16).astype(np.float32) + MatMul_287 = np.random.rand(16, 4).astype(np.float32) + MatMul_288 = np.random.rand(4, 16).astype(np.float32) model = make_model( encoder_embed_positions_weight=encoder_embed_positions_weight, @@ -697,7 +697,7 @@ def get_onnx_model(self): def get_ort_inputs(self): if not hasattr(self, "_ort_inputs"): inputs = { - "input_ids": np.random.randint(0, 49152, (1, 16)).astype(np.int64), + "input_ids": np.random.randint(0, 1024, (1, 16)).astype(np.int64), } self._ort_inputs = inputs return self._ort_inputs diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py index ce824e3c71..2c6248ec2c 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py @@ -6,6 +6,7 @@ import onnxscript.optimizer from onnxscript.rewriter.ort_fusions._test_utils import assert_allclose, ort_run +from onnxscript.rewriter.ort_fusions.models._bart_encoder import bart_encoder_test from onnxscript.rewriter.ort_fusions.models._smollm_1 import smollm_test_1 from onnxscript.rewriter.ort_fusions.models._whisper_decoder import whisper_decoder_test from onnxscript.rewriter.ort_fusions.models._whisper_encoder import whisper_encoder_test @@ -63,7 +64,7 @@ def test_whisper_decoder(self): assert_allclose(new_outputs, original_outputs) def test_bart_encoder(self): - bart_encoder = whisper_decoder_test() + bart_encoder = bart_encoder_test() model = bart_encoder.get_onnx_model() onnxscript.optimizer.optimize(model) From e9c02358786aea16f2ec45da346f61708593536a Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 11:24:59 +0200 Subject: [PATCH 4/9] =?UTF-8?q?style:=20rename=20variable=20names=20of=20b?= =?UTF-8?q?art=20encoder=20to=20lowercase=F0=9F=92=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ort_fusions/models/_bart_encoder.py | 820 +++++++++--------- 1 file changed, 410 insertions(+), 410 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py index 3364d4fa24..77ccc61dde 100644 --- a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py +++ b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py @@ -20,18 +20,18 @@ def make_model( encoder_layers_0_self_attn_k_proj_bias, encoder_layers_0_self_attn_layer_norm_weight, encoder_layers_0_fc1_bias, - MatMul_257, - MatMul_267, - MatMul_268, - MatMul_270, - MatMul_271, - MatMul_272, - MatMul_273, - MatMul_283, - MatMul_284, - MatMul_286, - MatMul_287, - MatMul_288, + matmul_257, + matmul_267, + matmul_268, + matmul_270, + matmul_271, + matmul_272, + matmul_273, + matmul_283, + matmul_284, + matmul_286, + matmul_287, + matmul_288, ): @script() def main_graph(input_ids: INT64[1, None]) -> FLOAT[None]: @@ -93,547 +93,547 @@ def main_graph(input_ids: INT64[1, None]) -> FLOAT[None]: encoder_layers_0_self_attn_k_proj_bias ) - encoder_Shape_output_0 = opset20.Shape(input_ids) - encoder_Constant_output_0 = opset20.Constant(value=1) - encoder_Gather_output_0 = opset20.Gather( - encoder_Shape_output_0, encoder_Constant_output_0 + encoder_shape_output_0 = opset20.Shape(input_ids) + encoder_constant_output_0 = opset20.Constant(value=1) + encoder_gather_output_0 = opset20.Gather( + encoder_shape_output_0, encoder_constant_output_0 ) - encoder_Constant_1_output_0 = opset20.Constant(value=[-1]) - Unsqueeze_43 = opset20.Constant(value=[0]) - encoder_Unsqueeze_output_0 = opset20.Unsqueeze(encoder_Gather_output_0, Unsqueeze_43) - encoder_Concat_output_0 = opset20.Concat( - encoder_Constant_1_output_0, encoder_Unsqueeze_output_0, axis=0 - ) - encoder_Reshape_output_0 = opset20.Reshape( - input_ids, encoder_Concat_output_0, allowzero=0 - ) - encoder_embed_tokens_Gather_output_0 = opset20.Gather( - encoder_embed_tokens_weight, encoder_Reshape_output_0 - ) - encoder_embed_tokens_Constant_output_0 = opset20.Constant(value=[1.0]) - encoder_embed_tokens_Mul_output_0 = opset20.Mul( - encoder_embed_tokens_Gather_output_0, encoder_embed_tokens_Constant_output_0 - ) - encoder_embed_positions_Shape_output_0 = opset20.Shape(input_ids) - encoder_embed_positions_Constant_output_0 = opset20.Constant(value=0) - encoder_embed_positions_Gather_output_0 = opset20.Gather( - encoder_embed_positions_Shape_output_0, - encoder_embed_positions_Constant_output_0, + encoder_constant_1_output_0 = opset20.Constant(value=[-1]) + unsqueeze_43 = opset20.Constant(value=[0]) + encoder_unsqueeze_output_0 = opset20.Unsqueeze(encoder_gather_output_0, unsqueeze_43) + encoder_concat_output_0 = opset20.Concat( + encoder_constant_1_output_0, encoder_unsqueeze_output_0, axis=0 + ) + encoder_reshape_output_0 = opset20.Reshape( + input_ids, encoder_concat_output_0, allowzero=0 + ) + encoder_embed_tokens_gather_output_0 = opset20.Gather( + encoder_embed_tokens_weight, encoder_reshape_output_0 + ) + encoder_embed_tokens_constant_output_0 = opset20.Constant(value=[1.0]) + encoder_embed_tokens_mul_output_0 = opset20.Mul( + encoder_embed_tokens_gather_output_0, encoder_embed_tokens_constant_output_0 + ) + encoder_embed_positions_shape_output_0 = opset20.Shape(input_ids) + encoder_embed_positions_constant_output_0 = opset20.Constant(value=0) + encoder_embed_positions_gather_output_0 = opset20.Gather( + encoder_embed_positions_shape_output_0, + encoder_embed_positions_constant_output_0, axis=0, ) - encoder_embed_positions_Constant_1_output_0 = opset20.Constant(value=0) - encoder_embed_positions_Cast_output_0 = opset20.Cast(encoder_Gather_output_0, to=7) - encoder_embed_positions_Constant_2_output_0 = opset20.Constant(value=1) - encoder_embed_positions_Range_output_0 = opset20.Range( - encoder_embed_positions_Constant_1_output_0, - encoder_embed_positions_Cast_output_0, - encoder_embed_positions_Constant_2_output_0, - ) - encoder_embed_positions_Constant_3_output_0 = opset20.Constant(value=[0]) - encoder_embed_positions_Unsqueeze_output_0 = opset20.Unsqueeze( - encoder_embed_positions_Gather_output_0, - encoder_embed_positions_Constant_3_output_0, - ) - encoder_embed_positions_Constant_4_output_0 = opset20.Constant(value=[-1]) - encoder_embed_positions_Concat_output_0 = opset20.Concat( - encoder_embed_positions_Unsqueeze_output_0, - encoder_embed_positions_Constant_4_output_0, + encoder_embed_positions_constant_1_output_0 = opset20.Constant(value=0) + encoder_embed_positions_cast_output_0 = opset20.Cast(encoder_gather_output_0, to=7) + encoder_embed_positions_constant_2_output_0 = opset20.Constant(value=1) + encoder_embed_positions_range_output_0 = opset20.Range( + encoder_embed_positions_constant_1_output_0, + encoder_embed_positions_cast_output_0, + encoder_embed_positions_constant_2_output_0, + ) + encoder_embed_positions_constant_3_output_0 = opset20.Constant(value=[0]) + encoder_embed_positions_unsqueeze_output_0 = opset20.Unsqueeze( + encoder_embed_positions_gather_output_0, + encoder_embed_positions_constant_3_output_0, + ) + encoder_embed_positions_constant_4_output_0 = opset20.Constant(value=[-1]) + encoder_embed_positions_concat_output_0 = opset20.Concat( + encoder_embed_positions_unsqueeze_output_0, + encoder_embed_positions_constant_4_output_0, axis=0, ) - encoder_embed_positions_Constant_5_output_0 = opset20.Constant(value=[-1]) - encoder_embed_positions_Reshape_output_0 = opset20.Reshape( - encoder_embed_positions_Concat_output_0, - encoder_embed_positions_Constant_5_output_0, + encoder_embed_positions_constant_5_output_0 = opset20.Constant(value=[-1]) + encoder_embed_positions_reshape_output_0 = opset20.Reshape( + encoder_embed_positions_concat_output_0, + encoder_embed_positions_constant_5_output_0, ) - encoder_embed_positions_Shape_1_output_0 = opset20.Shape( - encoder_embed_positions_Reshape_output_0 + encoder_embed_positions_shape_1_output_0 = opset20.Shape( + encoder_embed_positions_reshape_output_0 ) - encoder_embed_positions_ConstantOfShape_output_0 = opset20.ConstantOfShape( - encoder_embed_positions_Shape_1_output_0, + encoder_embed_positions_constantofshape_output_0 = opset20.ConstantOfShape( + encoder_embed_positions_shape_1_output_0, value=make_tensor("onef", TensorProto.INT64, [1], [1]), ) - encoder_embed_positions_Constant_6_output_0 = opset20.Constant(value=[-1]) - encoder_embed_positions_Mul_output_0 = opset20.Mul( - encoder_embed_positions_ConstantOfShape_output_0, - encoder_embed_positions_Constant_6_output_0, + encoder_embed_positions_constant_6_output_0 = opset20.Constant(value=[-1]) + encoder_embed_positions_mul_output_0 = opset20.Mul( + encoder_embed_positions_constantofshape_output_0, + encoder_embed_positions_constant_6_output_0, ) - encoder_embed_positions_Equal_output_0 = opset20.Equal( - encoder_embed_positions_Reshape_output_0, encoder_embed_positions_Mul_output_0 + encoder_embed_positions_equal_output_0 = opset20.Equal( + encoder_embed_positions_reshape_output_0, encoder_embed_positions_mul_output_0 ) - encoder_embed_positions_Where_output_0 = opset20.Where( - encoder_embed_positions_Equal_output_0, - encoder_embed_positions_ConstantOfShape_output_0, - encoder_embed_positions_Reshape_output_0, + encoder_embed_positions_where_output_0 = opset20.Where( + encoder_embed_positions_equal_output_0, + encoder_embed_positions_constantofshape_output_0, + encoder_embed_positions_reshape_output_0, ) - encoder_embed_positions_Expand_output_0 = opset20.Expand( - encoder_embed_positions_Range_output_0, encoder_embed_positions_Where_output_0 + encoder_embed_positions_expand_output_0 = opset20.Expand( + encoder_embed_positions_range_output_0, encoder_embed_positions_where_output_0 ) - encoder_embed_positions_Constant_7_output_0 = opset20.Constant(value=2) - encoder_embed_positions_Add_output_0 = opset20.Add( - encoder_embed_positions_Expand_output_0, - encoder_embed_positions_Constant_7_output_0, + encoder_embed_positions_constant_7_output_0 = opset20.Constant(value=2) + encoder_embed_positions_add_output_0 = opset20.Add( + encoder_embed_positions_expand_output_0, + encoder_embed_positions_constant_7_output_0, ) - encoder_embed_positions_Gather_1_output_0 = opset20.Gather( - encoder_embed_positions_weight, encoder_embed_positions_Add_output_0 + encoder_embed_positions_gather_1_output_0 = opset20.Gather( + encoder_embed_positions_weight, encoder_embed_positions_add_output_0 ) - encoder_Cast_output_0 = opset20.Cast(encoder_embed_positions_Gather_1_output_0, to=1) - encoder_Add_output_0 = opset20.Add( - encoder_embed_tokens_Mul_output_0, encoder_Cast_output_0 + encoder_cast_output_0 = opset20.Cast(encoder_embed_positions_gather_1_output_0, to=1) + encoder_add_output_0 = opset20.Add( + encoder_embed_tokens_mul_output_0, encoder_cast_output_0 ) - encoder_layernorm_embedding_LayerNormalization_output_0 = opset20.LayerNormalization( - encoder_Add_output_0, + encoder_layernorm_embedding_layernormalization_output_0 = opset20.LayerNormalization( + encoder_add_output_0, encoder_layernorm_embedding_weight, encoder_layernorm_embedding_bias, axis=-1, epsilon=9.999999747378752e-06, ) - encoder_layers_0_self_attn_Shape_output_0 = opset20.Shape( - encoder_layernorm_embedding_LayerNormalization_output_0 + encoder_layers_0_self_attn_shape_output_0 = opset20.Shape( + encoder_layernorm_embedding_layernormalization_output_0 ) - encoder_layers_0_self_attn_Constant_output_0 = opset20.Constant(value=0) - encoder_layers_0_self_attn_Gather_output_0 = opset20.Gather( - encoder_layers_0_self_attn_Shape_output_0, - encoder_layers_0_self_attn_Constant_output_0, + encoder_layers_0_self_attn_constant_output_0 = opset20.Constant(value=0) + encoder_layers_0_self_attn_gather_output_0 = opset20.Gather( + encoder_layers_0_self_attn_shape_output_0, + encoder_layers_0_self_attn_constant_output_0, axis=0, ) - encoder_layers_0_self_attn_Shape_1_output_0 = opset20.Shape( - encoder_layernorm_embedding_LayerNormalization_output_0 + encoder_layers_0_self_attn_shape_1_output_0 = opset20.Shape( + encoder_layernorm_embedding_layernormalization_output_0 ) - encoder_layers_0_self_attn_Constant_1_output_0 = opset20.Constant(value=1) - encoder_layers_0_self_attn_Gather_1_output_0 = opset20.Gather( - encoder_layers_0_self_attn_Shape_1_output_0, - encoder_layers_0_self_attn_Constant_1_output_0, + encoder_layers_0_self_attn_constant_1_output_0 = opset20.Constant(value=1) + encoder_layers_0_self_attn_gather_1_output_0 = opset20.Gather( + encoder_layers_0_self_attn_shape_1_output_0, + encoder_layers_0_self_attn_constant_1_output_0, axis=0, ) - encoder_layers_0_self_attn_q_proj_MatMul_output_0 = opset20.MatMul( - encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_257 + encoder_layers_0_self_attn_q_proj_matmul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_layernormalization_output_0, matmul_257 ) - encoder_layers_0_self_attn_q_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_q_proj_add_output_0 = opset20.Add( encoder_layers_0_self_attn_q_proj_bias, - encoder_layers_0_self_attn_q_proj_MatMul_output_0, - ) - Unsqueeze_88 = opset20.Constant(value=[0]) - encoder_layers_0_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( - encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_88 - ) - encoder_layers_0_self_attn_Constant_2_output_0 = opset20.Constant(value=[-1]) - encoder_layers_0_self_attn_Constant_3_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Constant_4_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Concat_output_0 = opset20.Concat( - encoder_layers_0_self_attn_Unsqueeze_output_0, - encoder_layers_0_self_attn_Constant_2_output_0, - encoder_layers_0_self_attn_Constant_3_output_0, - encoder_layers_0_self_attn_Constant_4_output_0, + encoder_layers_0_self_attn_q_proj_matmul_output_0, + ) + unsqueeze_88 = opset20.Constant(value=[0]) + encoder_layers_0_self_attn_unsqueeze_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_gather_output_0, unsqueeze_88 + ) + encoder_layers_0_self_attn_constant_2_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_constant_3_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_constant_4_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_concat_output_0 = opset20.Concat( + encoder_layers_0_self_attn_unsqueeze_output_0, + encoder_layers_0_self_attn_constant_2_output_0, + encoder_layers_0_self_attn_constant_3_output_0, + encoder_layers_0_self_attn_constant_4_output_0, axis=0, ) - Unsqueeze_97 = opset20.Constant(value=[0]) - encoder_layers_0_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( - encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_97 - ) - encoder_layers_0_self_attn_Constant_5_output_0 = opset20.Constant(value=[-1]) - encoder_layers_0_self_attn_Constant_6_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Constant_7_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Concat_1_output_0 = opset20.Concat( - encoder_layers_0_self_attn_Unsqueeze_1_output_0, - encoder_layers_0_self_attn_Constant_5_output_0, - encoder_layers_0_self_attn_Constant_6_output_0, - encoder_layers_0_self_attn_Constant_7_output_0, + unsqueeze_97 = opset20.Constant(value=[0]) + encoder_layers_0_self_attn_unsqueeze_1_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_gather_output_0, unsqueeze_97 + ) + encoder_layers_0_self_attn_constant_5_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_constant_6_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_constant_7_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_concat_1_output_0 = opset20.Concat( + encoder_layers_0_self_attn_unsqueeze_1_output_0, + encoder_layers_0_self_attn_constant_5_output_0, + encoder_layers_0_self_attn_constant_6_output_0, + encoder_layers_0_self_attn_constant_7_output_0, axis=0, ) - Unsqueeze_106 = opset20.Constant(value=[0]) - encoder_layers_0_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( - encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_106 - ) - encoder_layers_0_self_attn_Constant_8_output_0 = opset20.Constant(value=[-1]) - encoder_layers_0_self_attn_Constant_9_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Constant_10_output_0 = opset20.Constant(value=[4]) - encoder_layers_0_self_attn_Concat_2_output_0 = opset20.Concat( - encoder_layers_0_self_attn_Unsqueeze_2_output_0, - encoder_layers_0_self_attn_Constant_8_output_0, - encoder_layers_0_self_attn_Constant_9_output_0, - encoder_layers_0_self_attn_Constant_10_output_0, + unsqueeze_106 = opset20.Constant(value=[0]) + encoder_layers_0_self_attn_unsqueeze_2_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_gather_output_0, unsqueeze_106 + ) + encoder_layers_0_self_attn_constant_8_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_constant_9_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_constant_10_output_0 = opset20.Constant(value=[4]) + encoder_layers_0_self_attn_concat_2_output_0 = opset20.Concat( + encoder_layers_0_self_attn_unsqueeze_2_output_0, + encoder_layers_0_self_attn_constant_8_output_0, + encoder_layers_0_self_attn_constant_9_output_0, + encoder_layers_0_self_attn_constant_10_output_0, axis=0, ) - encoder_layers_0_self_attn_Reshape_output_0 = opset20.Reshape( - encoder_layers_0_self_attn_q_proj_Add_output_0, - encoder_layers_0_self_attn_Concat_output_0, + encoder_layers_0_self_attn_reshape_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_q_proj_add_output_0, + encoder_layers_0_self_attn_concat_output_0, allowzero=0, ) - encoder_layers_0_self_attn_Transpose_output_0 = opset20.Transpose( - encoder_layers_0_self_attn_Reshape_output_0, perm=[0, 2, 1, 3] + encoder_layers_0_self_attn_transpose_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_reshape_output_0, perm=[0, 2, 1, 3] ) - encoder_layers_0_self_attn_k_proj_MatMul_output_0 = opset20.MatMul( - encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_267 + encoder_layers_0_self_attn_k_proj_matmul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_layernormalization_output_0, matmul_267 ) - encoder_layers_0_self_attn_k_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_k_proj_add_output_0 = opset20.Add( encoder_layers_0_self_attn_k_proj_bias, - encoder_layers_0_self_attn_k_proj_MatMul_output_0, + encoder_layers_0_self_attn_k_proj_matmul_output_0, ) - encoder_layers_0_self_attn_v_proj_MatMul_output_0 = opset20.MatMul( - encoder_layernorm_embedding_LayerNormalization_output_0, MatMul_268 + encoder_layers_0_self_attn_v_proj_matmul_output_0 = opset20.MatMul( + encoder_layernorm_embedding_layernormalization_output_0, matmul_268 ) - encoder_layers_0_self_attn_v_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_v_proj_add_output_0 = opset20.Add( encoder_layers_0_self_attn_v_proj_bias, - encoder_layers_0_self_attn_v_proj_MatMul_output_0, + encoder_layers_0_self_attn_v_proj_matmul_output_0, ) - encoder_layers_0_self_attn_Reshape_1_output_0 = opset20.Reshape( - encoder_layers_0_self_attn_k_proj_Add_output_0, - encoder_layers_0_self_attn_Concat_1_output_0, + encoder_layers_0_self_attn_reshape_1_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_k_proj_add_output_0, + encoder_layers_0_self_attn_concat_1_output_0, allowzero=0, ) - encoder_layers_0_self_attn_Reshape_2_output_0 = opset20.Reshape( - encoder_layers_0_self_attn_v_proj_Add_output_0, - encoder_layers_0_self_attn_Concat_2_output_0, + encoder_layers_0_self_attn_reshape_2_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_v_proj_add_output_0, + encoder_layers_0_self_attn_concat_2_output_0, allowzero=0, ) - encoder_layers_0_self_attn_Transpose_1_output_0 = opset20.Transpose( - encoder_layers_0_self_attn_Reshape_2_output_0, perm=[0, 2, 1, 3] + encoder_layers_0_self_attn_transpose_1_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_reshape_2_output_0, perm=[0, 2, 1, 3] ) - encoder_layers_0_self_attn_Shape_2_output_0 = opset20.Shape( - encoder_layers_0_self_attn_Transpose_output_0 + encoder_layers_0_self_attn_shape_2_output_0 = opset20.Shape( + encoder_layers_0_self_attn_transpose_output_0 ) - encoder_layers_0_self_attn_Constant_11_output_0 = opset20.Constant(value=[-1]) - encoder_layers_0_self_attn_Constant_12_output_0 = opset20.Constant( + encoder_layers_0_self_attn_constant_11_output_0 = opset20.Constant(value=[-1]) + encoder_layers_0_self_attn_constant_12_output_0 = opset20.Constant( value=[9223372036854775807] ) - encoder_layers_0_self_attn_Slice_output_0 = opset20.Slice( - encoder_layers_0_self_attn_Shape_2_output_0, - encoder_layers_0_self_attn_Constant_11_output_0, - encoder_layers_0_self_attn_Constant_12_output_0, + encoder_layers_0_self_attn_slice_output_0 = opset20.Slice( + encoder_layers_0_self_attn_shape_2_output_0, + encoder_layers_0_self_attn_constant_11_output_0, + encoder_layers_0_self_attn_constant_12_output_0, ) - encoder_layers_0_self_attn_Cast_output_0 = opset20.Cast( - encoder_layers_0_self_attn_Slice_output_0, to=1 + encoder_layers_0_self_attn_cast_output_0 = opset20.Cast( + encoder_layers_0_self_attn_slice_output_0, to=1 ) - encoder_layers_0_self_attn_Sqrt_output_0 = opset20.Sqrt( - encoder_layers_0_self_attn_Cast_output_0 + encoder_layers_0_self_attn_sqrt_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_cast_output_0 ) - encoder_layers_0_self_attn_Constant_13_output_0 = opset20.Constant(value=[1.0]) - encoder_layers_0_self_attn_Div_output_0 = opset20.Div( - encoder_layers_0_self_attn_Constant_13_output_0, - encoder_layers_0_self_attn_Sqrt_output_0, + encoder_layers_0_self_attn_constant_13_output_0 = opset20.Constant(value=[1.0]) + encoder_layers_0_self_attn_div_output_0 = opset20.Div( + encoder_layers_0_self_attn_constant_13_output_0, + encoder_layers_0_self_attn_sqrt_output_0, ) - encoder_layers_0_self_attn_Cast_1_output_0 = opset20.Cast( - encoder_layers_0_self_attn_Div_output_0, to=1 + encoder_layers_0_self_attn_cast_1_output_0 = opset20.Cast( + encoder_layers_0_self_attn_div_output_0, to=1 ) - encoder_layers_0_self_attn_Transpose_2_output_0 = opset20.Transpose( - encoder_layers_0_self_attn_Reshape_1_output_0, perm=[0, 2, 3, 1] + encoder_layers_0_self_attn_transpose_2_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_reshape_1_output_0, perm=[0, 2, 3, 1] ) - encoder_layers_0_self_attn_Sqrt_1_output_0 = opset20.Sqrt( - encoder_layers_0_self_attn_Cast_1_output_0 + encoder_layers_0_self_attn_sqrt_1_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_cast_1_output_0 ) - encoder_layers_0_self_attn_Mul_output_0 = opset20.Mul( - encoder_layers_0_self_attn_Transpose_output_0, - encoder_layers_0_self_attn_Sqrt_1_output_0, + encoder_layers_0_self_attn_mul_output_0 = opset20.Mul( + encoder_layers_0_self_attn_transpose_output_0, + encoder_layers_0_self_attn_sqrt_1_output_0, ) - encoder_layers_0_self_attn_Sqrt_2_output_0 = opset20.Sqrt( - encoder_layers_0_self_attn_Cast_1_output_0 + encoder_layers_0_self_attn_sqrt_2_output_0 = opset20.Sqrt( + encoder_layers_0_self_attn_cast_1_output_0 ) - encoder_layers_0_self_attn_Mul_1_output_0 = opset20.Mul( - encoder_layers_0_self_attn_Transpose_2_output_0, - encoder_layers_0_self_attn_Sqrt_2_output_0, + encoder_layers_0_self_attn_mul_1_output_0 = opset20.Mul( + encoder_layers_0_self_attn_transpose_2_output_0, + encoder_layers_0_self_attn_sqrt_2_output_0, ) - encoder_layers_0_self_attn_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_self_attn_Mul_output_0, encoder_layers_0_self_attn_Mul_1_output_0 + encoder_layers_0_self_attn_matmul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_mul_output_0, encoder_layers_0_self_attn_mul_1_output_0 ) - encoder_layers_0_self_attn_Softmax_output_0 = opset20.Softmax( - encoder_layers_0_self_attn_MatMul_output_0, axis=-1 + encoder_layers_0_self_attn_softmax_output_0 = opset20.Softmax( + encoder_layers_0_self_attn_matmul_output_0, axis=-1 ) - encoder_layers_0_self_attn_MatMul_1_output_0 = opset20.MatMul( - encoder_layers_0_self_attn_Softmax_output_0, - encoder_layers_0_self_attn_Transpose_1_output_0, + encoder_layers_0_self_attn_matmul_1_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_softmax_output_0, + encoder_layers_0_self_attn_transpose_1_output_0, ) - encoder_layers_0_self_attn_Transpose_3_output_0 = opset20.Transpose( - encoder_layers_0_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] + encoder_layers_0_self_attn_transpose_3_output_0 = opset20.Transpose( + encoder_layers_0_self_attn_matmul_1_output_0, perm=[0, 2, 1, 3] ) - Unsqueeze_145 = opset20.Constant(value=[0]) - encoder_layers_0_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( - encoder_layers_0_self_attn_Gather_output_0, Unsqueeze_145 + unsqueeze_145 = opset20.Constant(value=[0]) + encoder_layers_0_self_attn_unsqueeze_3_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_gather_output_0, unsqueeze_145 ) - Unsqueeze_147 = opset20.Constant(value=[0]) - encoder_layers_0_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( - encoder_layers_0_self_attn_Gather_1_output_0, Unsqueeze_147 + unsqueeze_147 = opset20.Constant(value=[0]) + encoder_layers_0_self_attn_unsqueeze_4_output_0 = opset20.Unsqueeze( + encoder_layers_0_self_attn_gather_1_output_0, unsqueeze_147 ) - encoder_layers_0_self_attn_Constant_14_output_0 = opset20.Constant(value=[16]) - encoder_layers_0_self_attn_Concat_3_output_0 = opset20.Concat( - encoder_layers_0_self_attn_Unsqueeze_3_output_0, - encoder_layers_0_self_attn_Unsqueeze_4_output_0, - encoder_layers_0_self_attn_Constant_14_output_0, + encoder_layers_0_self_attn_constant_14_output_0 = opset20.Constant(value=[16]) + encoder_layers_0_self_attn_concat_3_output_0 = opset20.Concat( + encoder_layers_0_self_attn_unsqueeze_3_output_0, + encoder_layers_0_self_attn_unsqueeze_4_output_0, + encoder_layers_0_self_attn_constant_14_output_0, axis=0, ) - encoder_layers_0_self_attn_Reshape_3_output_0 = opset20.Reshape( - encoder_layers_0_self_attn_Transpose_3_output_0, - encoder_layers_0_self_attn_Concat_3_output_0, + encoder_layers_0_self_attn_reshape_3_output_0 = opset20.Reshape( + encoder_layers_0_self_attn_transpose_3_output_0, + encoder_layers_0_self_attn_concat_3_output_0, allowzero=0, ) - encoder_layers_0_self_attn_out_proj_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_self_attn_Reshape_3_output_0, MatMul_270 + encoder_layers_0_self_attn_out_proj_matmul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_reshape_3_output_0, matmul_270 ) - encoder_layers_0_self_attn_out_proj_Add_output_0 = opset20.Add( + encoder_layers_0_self_attn_out_proj_add_output_0 = opset20.Add( encoder_layers_0_self_attn_out_proj_bias, - encoder_layers_0_self_attn_out_proj_MatMul_output_0, + encoder_layers_0_self_attn_out_proj_matmul_output_0, ) - encoder_layers_0_Add_output_0 = opset20.Add( - encoder_layernorm_embedding_LayerNormalization_output_0, - encoder_layers_0_self_attn_out_proj_Add_output_0, + encoder_layers_0_add_output_0 = opset20.Add( + encoder_layernorm_embedding_layernormalization_output_0, + encoder_layers_0_self_attn_out_proj_add_output_0, ) - encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0 = ( + encoder_layers_0_self_attn_layer_norm_layernormalization_output_0 = ( opset20.LayerNormalization( - encoder_layers_0_Add_output_0, + encoder_layers_0_add_output_0, encoder_layers_0_self_attn_layer_norm_weight, axis=-1, epsilon=9.999999747378752e-0, ) ) - encoder_layers_0_fc1_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0, MatMul_271 + encoder_layers_0_fc1_matmul_output_0 = opset20.MatMul( + encoder_layers_0_self_attn_layer_norm_layernormalization_output_0, matmul_271 ) - encoder_layers_0_fc1_Add_output_0 = opset20.Add( - encoder_layers_0_fc1_bias, encoder_layers_0_fc1_MatMul_output_0 + encoder_layers_0_fc1_add_output_0 = opset20.Add( + encoder_layers_0_fc1_bias, encoder_layers_0_fc1_matmul_output_0 ) - encoder_layers_0_activation_fn_Gelu_output_0 = opset20.Gelu( - encoder_layers_0_fc1_Add_output_0, approximate="none" + encoder_layers_0_activation_fn_gelu_output_0 = opset20.Gelu( + encoder_layers_0_fc1_add_output_0, approximate="none" ) - encoder_layers_0_fc2_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_activation_fn_Gelu_output_0, MatMul_272 + encoder_layers_0_fc2_matmul_output_0 = opset20.MatMul( + encoder_layers_0_activation_fn_gelu_output_0, matmul_272 ) - encoder_layers_0_fc2_Add_output_0 = opset20.Add( - encoder_layers_0_fc2_bias, encoder_layers_0_fc2_MatMul_output_0 + encoder_layers_0_fc2_add_output_0 = opset20.Add( + encoder_layers_0_fc2_bias, encoder_layers_0_fc2_matmul_output_0 ) - encoder_layers_0_Add_1_output_0 = opset20.Add( - encoder_layers_0_self_attn_layer_norm_LayerNormalization_output_0, - encoder_layers_0_fc2_Add_output_0, + encoder_layers_0_add_1_output_0 = opset20.Add( + encoder_layers_0_self_attn_layer_norm_layernormalization_output_0, + encoder_layers_0_fc2_add_output_0, ) - encoder_layers_0_final_layer_norm_LayerNormalization_output_0 = ( + encoder_layers_0_final_layer_norm_layernormalization_output_0 = ( opset20.LayerNormalization( - encoder_layers_0_Add_1_output_0, + encoder_layers_0_add_1_output_0, encoder_layers_0_final_layer_norm_weight, encoder_layers_0_final_layer_norm_bias, axis=-1, epsilon=9.999999747378752e-06, ) ) - encoder_layers_1_self_attn_Shape_output_0 = opset20.Shape( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0 + encoder_layers_1_self_attn_shape_output_0 = opset20.Shape( + encoder_layers_0_final_layer_norm_layernormalization_output_0 ) - encoder_layers_1_self_attn_Constant_output_0 = opset20.Constant(value=0) - encoder_layers_1_self_attn_Gather_output_0 = opset20.Gather( - encoder_layers_1_self_attn_Shape_output_0, - encoder_layers_1_self_attn_Constant_output_0, + encoder_layers_1_self_attn_constant_output_0 = opset20.Constant(value=0) + encoder_layers_1_self_attn_gather_output_0 = opset20.Gather( + encoder_layers_1_self_attn_shape_output_0, + encoder_layers_1_self_attn_constant_output_0, axis=0, ) - encoder_layers_1_self_attn_Shape_1_output_0 = opset20.Shape( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0 + encoder_layers_1_self_attn_shape_1_output_0 = opset20.Shape( + encoder_layers_0_final_layer_norm_layernormalization_output_0 ) - encoder_layers_1_self_attn_Constant_1_output_0 = opset20.Constant(value=1) - encoder_layers_1_self_attn_Gather_1_output_0 = opset20.Gather( - encoder_layers_1_self_attn_Shape_1_output_0, - encoder_layers_1_self_attn_Constant_1_output_0, + encoder_layers_1_self_attn_constant_1_output_0 = opset20.Constant(value=1) + encoder_layers_1_self_attn_gather_1_output_0 = opset20.Gather( + encoder_layers_1_self_attn_shape_1_output_0, + encoder_layers_1_self_attn_constant_1_output_0, axis=0, ) - encoder_layers_1_self_attn_q_proj_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_273 + encoder_layers_1_self_attn_q_proj_matmul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_layernormalization_output_0, matmul_273 ) - encoder_layers_1_self_attn_q_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_q_proj_add_output_0 = opset20.Add( encoder_layers_1_self_attn_q_proj_bias, - encoder_layers_1_self_attn_q_proj_MatMul_output_0, - ) - Unsqueeze_176 = opset20.Constant(value=[0]) - encoder_layers_1_self_attn_Unsqueeze_output_0 = opset20.Unsqueeze( - encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_176 - ) - encoder_layers_1_self_attn_Constant_2_output_0 = opset20.Constant(value=[-1]) - encoder_layers_1_self_attn_Constant_3_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Constant_4_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Concat_output_0 = opset20.Concat( - encoder_layers_1_self_attn_Unsqueeze_output_0, - encoder_layers_1_self_attn_Constant_2_output_0, - encoder_layers_1_self_attn_Constant_3_output_0, - encoder_layers_1_self_attn_Constant_4_output_0, + encoder_layers_1_self_attn_q_proj_matmul_output_0, + ) + unsqueeze_176 = opset20.Constant(value=[0]) + encoder_layers_1_self_attn_unsqueeze_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_gather_output_0, unsqueeze_176 + ) + encoder_layers_1_self_attn_constant_2_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_constant_3_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_constant_4_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_concat_output_0 = opset20.Concat( + encoder_layers_1_self_attn_unsqueeze_output_0, + encoder_layers_1_self_attn_constant_2_output_0, + encoder_layers_1_self_attn_constant_3_output_0, + encoder_layers_1_self_attn_constant_4_output_0, axis=0, ) - Unsqueeze_185 = opset20.Constant(value=[0]) - encoder_layers_1_self_attn_Unsqueeze_1_output_0 = opset20.Unsqueeze( - encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_185 - ) - encoder_layers_1_self_attn_Constant_5_output_0 = opset20.Constant(value=[-1]) - encoder_layers_1_self_attn_Constant_6_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Constant_7_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Concat_1_output_0 = opset20.Concat( - encoder_layers_1_self_attn_Unsqueeze_1_output_0, - encoder_layers_1_self_attn_Constant_5_output_0, - encoder_layers_1_self_attn_Constant_6_output_0, - encoder_layers_1_self_attn_Constant_7_output_0, + unsqueeze_185 = opset20.Constant(value=[0]) + encoder_layers_1_self_attn_unsqueeze_1_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_gather_output_0, unsqueeze_185 + ) + encoder_layers_1_self_attn_constant_5_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_constant_6_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_constant_7_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_concat_1_output_0 = opset20.Concat( + encoder_layers_1_self_attn_unsqueeze_1_output_0, + encoder_layers_1_self_attn_constant_5_output_0, + encoder_layers_1_self_attn_constant_6_output_0, + encoder_layers_1_self_attn_constant_7_output_0, axis=0, ) - Unsqueeze_194 = opset20.Constant(value=[0]) - encoder_layers_1_self_attn_Unsqueeze_2_output_0 = opset20.Unsqueeze( - encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_194 - ) - encoder_layers_1_self_attn_Constant_8_output_0 = opset20.Constant(value=[-1]) - encoder_layers_1_self_attn_Constant_9_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Constant_10_output_0 = opset20.Constant(value=[4]) - encoder_layers_1_self_attn_Concat_2_output_0 = opset20.Concat( - encoder_layers_1_self_attn_Unsqueeze_2_output_0, - encoder_layers_1_self_attn_Constant_8_output_0, - encoder_layers_1_self_attn_Constant_9_output_0, - encoder_layers_1_self_attn_Constant_10_output_0, + unsqueeze_194 = opset20.Constant(value=[0]) + encoder_layers_1_self_attn_unsqueeze_2_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_gather_output_0, unsqueeze_194 + ) + encoder_layers_1_self_attn_constant_8_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_constant_9_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_constant_10_output_0 = opset20.Constant(value=[4]) + encoder_layers_1_self_attn_concat_2_output_0 = opset20.Concat( + encoder_layers_1_self_attn_unsqueeze_2_output_0, + encoder_layers_1_self_attn_constant_8_output_0, + encoder_layers_1_self_attn_constant_9_output_0, + encoder_layers_1_self_attn_constant_10_output_0, axis=0, ) - encoder_layers_1_self_attn_Reshape_output_0 = opset20.Reshape( - encoder_layers_1_self_attn_q_proj_Add_output_0, - encoder_layers_1_self_attn_Concat_output_0, + encoder_layers_1_self_attn_reshape_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_q_proj_add_output_0, + encoder_layers_1_self_attn_concat_output_0, allowzero=0, ) - encoder_layers_1_self_attn_Transpose_output_0 = opset20.Transpose( - encoder_layers_1_self_attn_Reshape_output_0, perm=[0, 2, 1, 3] + encoder_layers_1_self_attn_transpose_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_reshape_output_0, perm=[0, 2, 1, 3] ) - encoder_layers_1_self_attn_k_proj_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_283 + encoder_layers_1_self_attn_k_proj_matmul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_layernormalization_output_0, matmul_283 ) - encoder_layers_1_self_attn_k_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_k_proj_add_output_0 = opset20.Add( encoder_layers_1_self_attn_k_proj_bias, - encoder_layers_1_self_attn_k_proj_MatMul_output_0, + encoder_layers_1_self_attn_k_proj_matmul_output_0, ) - encoder_layers_1_self_attn_v_proj_MatMul_output_0 = opset20.MatMul( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0, MatMul_284 + encoder_layers_1_self_attn_v_proj_matmul_output_0 = opset20.MatMul( + encoder_layers_0_final_layer_norm_layernormalization_output_0, matmul_284 ) - encoder_layers_1_self_attn_v_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_v_proj_add_output_0 = opset20.Add( encoder_layers_1_self_attn_v_proj_bias, - encoder_layers_1_self_attn_v_proj_MatMul_output_0, + encoder_layers_1_self_attn_v_proj_matmul_output_0, ) - encoder_layers_1_self_attn_Reshape_1_output_0 = opset20.Reshape( - encoder_layers_1_self_attn_k_proj_Add_output_0, - encoder_layers_1_self_attn_Concat_1_output_0, + encoder_layers_1_self_attn_reshape_1_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_k_proj_add_output_0, + encoder_layers_1_self_attn_concat_1_output_0, allowzero=0, ) - encoder_layers_1_self_attn_Reshape_2_output_0 = opset20.Reshape( - encoder_layers_1_self_attn_v_proj_Add_output_0, - encoder_layers_1_self_attn_Concat_2_output_0, + encoder_layers_1_self_attn_reshape_2_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_v_proj_add_output_0, + encoder_layers_1_self_attn_concat_2_output_0, allowzero=0, ) - encoder_layers_1_self_attn_Transpose_1_output_0 = opset20.Transpose( - encoder_layers_1_self_attn_Reshape_2_output_0, perm=[0, 2, 1, 3] + encoder_layers_1_self_attn_transpose_1_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_reshape_2_output_0, perm=[0, 2, 1, 3] ) - encoder_layers_1_self_attn_Shape_2_output_0 = opset20.Shape( - encoder_layers_1_self_attn_Transpose_output_0 + encoder_layers_1_self_attn_shape_2_output_0 = opset20.Shape( + encoder_layers_1_self_attn_transpose_output_0 ) - encoder_layers_1_self_attn_Constant_11_output_0 = opset20.Constant(value=[-1]) - encoder_layers_1_self_attn_Constant_12_output_0 = opset20.Constant( + encoder_layers_1_self_attn_constant_11_output_0 = opset20.Constant(value=[-1]) + encoder_layers_1_self_attn_constant_12_output_0 = opset20.Constant( value=[9223372036854775807] ) - encoder_layers_1_self_attn_Slice_output_0 = opset20.Slice( - encoder_layers_1_self_attn_Shape_2_output_0, - encoder_layers_1_self_attn_Constant_11_output_0, - encoder_layers_1_self_attn_Constant_12_output_0, + encoder_layers_1_self_attn_slice_output_0 = opset20.Slice( + encoder_layers_1_self_attn_shape_2_output_0, + encoder_layers_1_self_attn_constant_11_output_0, + encoder_layers_1_self_attn_constant_12_output_0, ) - encoder_layers_1_self_attn_Cast_output_0 = opset20.Cast( - encoder_layers_1_self_attn_Slice_output_0, to=1 + encoder_layers_1_self_attn_cast_output_0 = opset20.Cast( + encoder_layers_1_self_attn_slice_output_0, to=1 ) - encoder_layers_1_self_attn_Sqrt_output_0 = opset20.Sqrt( - encoder_layers_1_self_attn_Cast_output_0 + encoder_layers_1_self_attn_sqrt_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_cast_output_0 ) - encoder_layers_1_self_attn_Constant_13_output_0 = opset20.Constant(value=[1.0]) - encoder_layers_1_self_attn_Div_output_0 = opset20.Div( - encoder_layers_1_self_attn_Constant_13_output_0, - encoder_layers_1_self_attn_Sqrt_output_0, + encoder_layers_1_self_attn_constant_13_output_0 = opset20.Constant(value=[1.0]) + encoder_layers_1_self_attn_div_output_0 = opset20.Div( + encoder_layers_1_self_attn_constant_13_output_0, + encoder_layers_1_self_attn_sqrt_output_0, ) - encoder_layers_1_self_attn_Cast_1_output_0 = opset20.Cast( - encoder_layers_1_self_attn_Div_output_0, to=1 + encoder_layers_1_self_attn_cast_1_output_0 = opset20.Cast( + encoder_layers_1_self_attn_div_output_0, to=1 ) - encoder_layers_1_self_attn_Transpose_2_output_0 = opset20.Transpose( - encoder_layers_1_self_attn_Reshape_1_output_0, perm=[0, 2, 3, 1] + encoder_layers_1_self_attn_transpose_2_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_reshape_1_output_0, perm=[0, 2, 3, 1] ) - encoder_layers_1_self_attn_Sqrt_1_output_0 = opset20.Sqrt( - encoder_layers_1_self_attn_Cast_1_output_0 + encoder_layers_1_self_attn_sqrt_1_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_cast_1_output_0 ) - encoder_layers_1_self_attn_Mul_output_0 = opset20.Mul( - encoder_layers_1_self_attn_Transpose_output_0, - encoder_layers_1_self_attn_Sqrt_1_output_0, + encoder_layers_1_self_attn_mul_output_0 = opset20.Mul( + encoder_layers_1_self_attn_transpose_output_0, + encoder_layers_1_self_attn_sqrt_1_output_0, ) - encoder_layers_1_self_attn_Sqrt_2_output_0 = opset20.Sqrt( - encoder_layers_1_self_attn_Cast_1_output_0 + encoder_layers_1_self_attn_sqrt_2_output_0 = opset20.Sqrt( + encoder_layers_1_self_attn_cast_1_output_0 ) - encoder_layers_1_self_attn_Mul_1_output_0 = opset20.Mul( - encoder_layers_1_self_attn_Transpose_2_output_0, - encoder_layers_1_self_attn_Sqrt_2_output_0, + encoder_layers_1_self_attn_mul_1_output_0 = opset20.Mul( + encoder_layers_1_self_attn_transpose_2_output_0, + encoder_layers_1_self_attn_sqrt_2_output_0, ) - encoder_layers_1_self_attn_MatMul_output_0 = opset20.MatMul( - encoder_layers_1_self_attn_Mul_output_0, encoder_layers_1_self_attn_Mul_1_output_0 + encoder_layers_1_self_attn_matmul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_mul_output_0, encoder_layers_1_self_attn_mul_1_output_0 ) - encoder_layers_1_self_attn_Softmax_output_0 = opset20.Softmax( - encoder_layers_1_self_attn_MatMul_output_0, axis=-1 + encoder_layers_1_self_attn_softmax_output_0 = opset20.Softmax( + encoder_layers_1_self_attn_matmul_output_0, axis=-1 ) - encoder_layers_1_self_attn_MatMul_1_output_0 = opset20.MatMul( - encoder_layers_1_self_attn_Softmax_output_0, - encoder_layers_1_self_attn_Transpose_1_output_0, + encoder_layers_1_self_attn_matmul_1_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_softmax_output_0, + encoder_layers_1_self_attn_transpose_1_output_0, ) - encoder_layers_1_self_attn_Transpose_3_output_0 = opset20.Transpose( - encoder_layers_1_self_attn_MatMul_1_output_0, perm=[0, 2, 1, 3] + encoder_layers_1_self_attn_transpose_3_output_0 = opset20.Transpose( + encoder_layers_1_self_attn_matmul_1_output_0, perm=[0, 2, 1, 3] ) - Unsqueeze_232 = opset20.Constant(value=[0]) - encoder_layers_1_self_attn_Unsqueeze_3_output_0 = opset20.Unsqueeze( - encoder_layers_1_self_attn_Gather_output_0, Unsqueeze_232 + unsqueeze_232 = opset20.Constant(value=[0]) + encoder_layers_1_self_attn_unsqueeze_3_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_gather_output_0, unsqueeze_232 ) - Unsqueeze_234 = opset20.Constant(value=[0]) - encoder_layers_1_self_attn_Unsqueeze_4_output_0 = opset20.Unsqueeze( - encoder_layers_1_self_attn_Gather_1_output_0, Unsqueeze_234 + unsqueeze_234 = opset20.Constant(value=[0]) + encoder_layers_1_self_attn_unsqueeze_4_output_0 = opset20.Unsqueeze( + encoder_layers_1_self_attn_gather_1_output_0, unsqueeze_234 ) - encoder_layers_1_self_attn_Constant_14_output_0 = opset20.Constant(value=[16]) + encoder_layers_1_self_attn_constant_14_output_0 = opset20.Constant(value=[16]) - encoder_layers_1_self_attn_Concat_3_output_0 = opset20.Concat( - encoder_layers_1_self_attn_Unsqueeze_3_output_0, - encoder_layers_1_self_attn_Unsqueeze_4_output_0, - encoder_layers_1_self_attn_Constant_14_output_0, + encoder_layers_1_self_attn_concat_3_output_0 = opset20.Concat( + encoder_layers_1_self_attn_unsqueeze_3_output_0, + encoder_layers_1_self_attn_unsqueeze_4_output_0, + encoder_layers_1_self_attn_constant_14_output_0, axis=0, ) - encoder_layers_1_self_attn_Reshape_3_output_0 = opset20.Reshape( - encoder_layers_1_self_attn_Transpose_3_output_0, - encoder_layers_1_self_attn_Concat_3_output_0, + encoder_layers_1_self_attn_reshape_3_output_0 = opset20.Reshape( + encoder_layers_1_self_attn_transpose_3_output_0, + encoder_layers_1_self_attn_concat_3_output_0, allowzero=0, ) - encoder_layers_1_self_attn_out_proj_MatMul_output_0 = opset20.MatMul( - encoder_layers_1_self_attn_Reshape_3_output_0, MatMul_286 + encoder_layers_1_self_attn_out_proj_matmul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_reshape_3_output_0, matmul_286 ) - encoder_layers_1_self_attn_out_proj_Add_output_0 = opset20.Add( + encoder_layers_1_self_attn_out_proj_add_output_0 = opset20.Add( encoder_layers_1_self_attn_out_proj_bias, - encoder_layers_1_self_attn_out_proj_MatMul_output_0, + encoder_layers_1_self_attn_out_proj_matmul_output_0, ) - encoder_layers_1_Add_output_0 = opset20.Add( - encoder_layers_0_final_layer_norm_LayerNormalization_output_0, - encoder_layers_1_self_attn_out_proj_Add_output_0, + encoder_layers_1_add_output_0 = opset20.Add( + encoder_layers_0_final_layer_norm_layernormalization_output_0, + encoder_layers_1_self_attn_out_proj_add_output_0, ) - encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0 = ( + encoder_layers_1_self_attn_layer_norm_layernormalization_output_0 = ( opset20.LayerNormalization( - encoder_layers_1_Add_output_0, + encoder_layers_1_add_output_0, encoder_layers_1_self_attn_layer_norm_weight, encoder_layers_1_self_attn_layer_norm_bias, axis=-1, epsilon=9.999999747378752e-06, ) ) - encoder_layers_1_fc1_MatMul_output_0 = opset20.MatMul( - encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0, MatMul_287 + encoder_layers_1_fc1_matmul_output_0 = opset20.MatMul( + encoder_layers_1_self_attn_layer_norm_layernormalization_output_0, matmul_287 ) - encoder_layers_1_fc1_Add_output_0 = opset20.Add( - encoder_layers_1_fc1_bias, encoder_layers_1_fc1_MatMul_output_0 + encoder_layers_1_fc1_add_output_0 = opset20.Add( + encoder_layers_1_fc1_bias, encoder_layers_1_fc1_matmul_output_0 ) - encoder_layers_1_activation_fn_Gelu_output_0 = opset20.Gelu( - encoder_layers_1_fc1_Add_output_0, approximate="none" + encoder_layers_1_activation_fn_gelu_output_0 = opset20.Gelu( + encoder_layers_1_fc1_add_output_0, approximate="none" ) - encoder_layers_1_fc2_MatMul_output_0 = opset20.MatMul( - encoder_layers_1_activation_fn_Gelu_output_0, MatMul_288 + encoder_layers_1_fc2_matmul_output_0 = opset20.MatMul( + encoder_layers_1_activation_fn_gelu_output_0, matmul_288 ) - encoder_layers_1_fc2_Add_output_0 = opset20.Add( - encoder_layers_1_fc2_bias, encoder_layers_1_fc2_MatMul_output_0 + encoder_layers_1_fc2_add_output_0 = opset20.Add( + encoder_layers_1_fc2_bias, encoder_layers_1_fc2_matmul_output_0 ) - encoder_layers_1_Add_1_output_0 = opset20.Add( - encoder_layers_1_self_attn_layer_norm_LayerNormalization_output_0, - encoder_layers_1_fc2_Add_output_0, + encoder_layers_1_add_1_output_0 = opset20.Add( + encoder_layers_1_self_attn_layer_norm_layernormalization_output_0, + encoder_layers_1_fc2_add_output_0, ) encoder_output = opset20.LayerNormalization( - encoder_layers_1_Add_1_output_0, + encoder_layers_1_add_1_output_0, encoder_layers_1_final_layer_norm_weight, encoder_layers_1_final_layer_norm_bias, axis=-1, @@ -651,18 +651,18 @@ def make_model_with_random_weights(): encoder_layers_0_self_attn_layer_norm_weight = np.random.rand(16).astype(np.float32) encoder_layers_0_fc1_bias = np.zeros((4), dtype=np.float32) - MatMul_257 = np.random.rand(16, 16).astype(np.float32) - MatMul_267 = np.random.rand(16, 16).astype(np.float32) - MatMul_268 = np.random.rand(16, 16).astype(np.float32) - MatMul_270 = np.random.rand(16, 16).astype(np.float32) - MatMul_271 = np.random.rand(16, 4).astype(np.float32) - MatMul_272 = np.random.rand(4, 16).astype(np.float32) - MatMul_273 = np.random.rand(16, 16).astype(np.float32) - MatMul_283 = np.random.rand(16, 16).astype(np.float32) - MatMul_284 = np.random.rand(16, 16).astype(np.float32) - MatMul_286 = np.random.rand(16, 16).astype(np.float32) - MatMul_287 = np.random.rand(16, 4).astype(np.float32) - MatMul_288 = np.random.rand(4, 16).astype(np.float32) + matmul_257 = np.random.rand(16, 16).astype(np.float32) + matmul_267 = np.random.rand(16, 16).astype(np.float32) + matmul_268 = np.random.rand(16, 16).astype(np.float32) + matmul_270 = np.random.rand(16, 16).astype(np.float32) + matmul_271 = np.random.rand(16, 4).astype(np.float32) + matmul_272 = np.random.rand(4, 16).astype(np.float32) + matmul_273 = np.random.rand(16, 16).astype(np.float32) + matmul_283 = np.random.rand(16, 16).astype(np.float32) + matmul_284 = np.random.rand(16, 16).astype(np.float32) + matmul_286 = np.random.rand(16, 16).astype(np.float32) + matmul_287 = np.random.rand(16, 4).astype(np.float32) + matmul_288 = np.random.rand(4, 16).astype(np.float32) model = make_model( encoder_embed_positions_weight=encoder_embed_positions_weight, @@ -670,18 +670,18 @@ def make_model_with_random_weights(): encoder_layers_0_self_attn_k_proj_bias=encoder_layers_0_self_attn_k_proj_bias, encoder_layers_0_self_attn_layer_norm_weight=encoder_layers_0_self_attn_layer_norm_weight, encoder_layers_0_fc1_bias=encoder_layers_0_fc1_bias, - MatMul_257=MatMul_257, - MatMul_267=MatMul_267, - MatMul_268=MatMul_268, - MatMul_270=MatMul_270, - MatMul_271=MatMul_271, - MatMul_272=MatMul_272, - MatMul_273=MatMul_273, - MatMul_283=MatMul_283, - MatMul_284=MatMul_284, - MatMul_286=MatMul_286, - MatMul_287=MatMul_287, - MatMul_288=MatMul_288, + matmul_257=matmul_257, + matmul_267=matmul_267, + matmul_268=matmul_268, + matmul_270=matmul_270, + matmul_271=matmul_271, + matmul_272=matmul_272, + matmul_273=matmul_273, + matmul_283=matmul_283, + matmul_284=matmul_284, + matmul_286=matmul_286, + matmul_287=matmul_287, + matmul_288=matmul_288, ) return model From ac155dcad12de32cf7144834ed4fc3fcf0ff42a0 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 12:38:18 +0200 Subject: [PATCH 5/9] chore: final clean up of skip layer norm fusion --- .../rewriter/ort_fusions/models/_bart_encoder.py | 8 ++++---- .../rewriter/ort_fusions/skip_normalization.py | 13 +++++++------ .../rewriter/ort_fusions/skip_normalization_test.py | 1 - 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py index 77ccc61dde..e7fbf9c2ac 100644 --- a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py +++ b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py @@ -1,3 +1,5 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. """ Onnxscript version of "hf-internal-testing_tiny-random-bart". @@ -5,8 +7,6 @@ """ import numpy as np -from onnx import TensorProto -from onnx.helper import make_tensor import onnxscript.ir as ir from onnxscript import script @@ -34,7 +34,7 @@ def make_model( matmul_288, ): @script() - def main_graph(input_ids: INT64[1, None]) -> FLOAT[None]: + def main_graph(input_ids: INT64[1, None]) -> FLOAT[None, None, 16]: encoder_layernorm_embedding_bias = opset20.Identity( encoder_layers_0_self_attn_layer_norm_weight ) @@ -151,7 +151,7 @@ def main_graph(input_ids: INT64[1, None]) -> FLOAT[None]: ) encoder_embed_positions_constantofshape_output_0 = opset20.ConstantOfShape( encoder_embed_positions_shape_1_output_0, - value=make_tensor("onef", TensorProto.INT64, [1], [1]), + value=ir.tensor(np.array([1], dtype=np.int64)), ) encoder_embed_positions_constant_6_output_0 = opset20.Constant(value=[-1]) encoder_embed_positions_mul_output_0 = opset20.Mul( diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization.py b/onnxscript/rewriter/ort_fusions/skip_normalization.py index b89e6c95df..9810e6fc7a 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization.py @@ -140,13 +140,10 @@ def pattern(self, op, input, skip, gamma, beta, bias): if self._has_bias and self._bias_pre_add: input = op.Add(input, bias) - # Support different combinations of addition of input and skip - skip_sum_pattern_1 = op.Add(skip, input) - skip_sum_pattern_2 = op.Add(input, skip) - skip_sum = pattern.OrValue([skip_sum_pattern_1, skip_sum_pattern_2], name="skip_sum") - # TODO: check if combo is missed. + skip_sum = op.Add(input, skip) if self._has_bias and not self._bias_pre_add: skip_sum = op.Add(skip_sum, bias) + normalized = op.LayerNormalization( skip_sum, gamma, beta, axis=-1, _outputs=["layer_norm"] ) @@ -232,7 +229,11 @@ def rewrite( _skip_layer_rule = SkipLayerNormFusion.rule("SkipLayerNorm", has_bias=False) skip_layer_normalization_ruleset = pattern.RewriteRuleSet( - [_skip_layer_pre_add_bias_rule, _skip_layer_add_bias_rule, _skip_layer_rule] + [ + *_skip_layer_pre_add_bias_rule.commute(), + *_skip_layer_add_bias_rule.commute(), + *_skip_layer_rule.commute(), + ] ) fuse_skip_layer_normalization = _fusion_utils.apply_fusion_rules( diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py index 2c6248ec2c..0ebb127e47 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py @@ -31,7 +31,6 @@ def test_smollm(self): new_outputs = ort_run("optimized", model, inputs) assert_allclose(new_outputs, original_outputs) - # TODO: investigate, why precision drops. @unittest.skip("fixme: accuracy is not high") def test_whisper_encoder(self): whisper_encoder = whisper_encoder_test() From edad0ca9f9bbfa08cfa1bd99fe4271385dc9ced1 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 19:28:52 +0200 Subject: [PATCH 6/9] fix: set allow_other_attributes for layer norm fusion --- .../rewriter/ort_fusions/skip_normalization.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization.py b/onnxscript/rewriter/ort_fusions/skip_normalization.py index 9810e6fc7a..4bc638b44d 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization.py @@ -36,7 +36,11 @@ def pattern(self, op, input, skip, gamma, bias, epsilon, stash_type): # Note: ORT's SimplifiedLayerNormalization was placed in onnx domain by mistake. # No need to use com.microsoft domain here; but this is a custom op in ORT. normalized = op.SimplifiedLayerNormalization( - skip_sum, gamma, axis=-1, _outputs=["simplified_layer_norm"] + skip_sum, + gamma, + axis=-1, + _allow_other_attributes=True, + _outputs=["simplified_layer_norm"], ) return normalized, skip_sum @@ -145,7 +149,12 @@ def pattern(self, op, input, skip, gamma, beta, bias): skip_sum = op.Add(skip_sum, bias) normalized = op.LayerNormalization( - skip_sum, gamma, beta, axis=-1, _outputs=["layer_norm"] + skip_sum, + gamma, + beta, + axis=-1, + _allow_other_attributes=True, + _outputs=["layer_norm"], ) return normalized, skip_sum From ba4a9711e05ca3fcc0d77efb8c97777c149cecf5 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 17 Jun 2025 20:10:34 +0200 Subject: [PATCH 7/9] fix: address copilot review comments --- .../rewriter/ort_fusions/models/_bart_encoder.py | 10 ++-------- .../rewriter/ort_fusions/skip_normalization_test.py | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py index e7fbf9c2ac..2e5bcce5c0 100644 --- a/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py +++ b/onnxscript/rewriter/ort_fusions/models/_bart_encoder.py @@ -50,16 +50,12 @@ def main_graph(input_ids: INT64[1, None]) -> FLOAT[None, None, 16]: ) encoder_layers_1_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) - encoder_layers_1_fc1_bias = opset20.Identity(encoder_layers_0_fc1_bias) encoder_layers_1_self_attn_layer_norm_bias = opset20.Identity( encoder_layers_0_self_attn_k_proj_bias ) encoder_layers_1_self_attn_layer_norm_weight = opset20.Identity( encoder_layers_0_self_attn_layer_norm_weight ) - encoder_layers_1_self_attn_layer_norm_weight = opset20.Identity( - encoder_layers_1_self_attn_layer_norm_weight - ) encoder_layers_1_self_attn_out_proj_bias = opset20.Identity( encoder_layers_0_self_attn_k_proj_bias ) @@ -79,10 +75,7 @@ def main_graph(input_ids: INT64[1, None]) -> FLOAT[None, None, 16]: encoder_layers_0_self_attn_layer_norm_weight ) encoder_layers_0_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) - encoder_layers_1_fc2_bias = opset20.Identity(encoder_layers_0_self_attn_k_proj_bias) - encoder_layers_1_self_attn_layer_norm_bias = opset20.Identity( - encoder_layers_0_self_attn_k_proj_bias - ) + encoder_layers_1_fc1_bias = opset20.Identity(encoder_layers_0_fc1_bias) encoder_layers_0_self_attn_out_proj_bias = opset20.Identity( encoder_layers_0_self_attn_k_proj_bias ) @@ -382,6 +375,7 @@ def main_graph(input_ids: INT64[1, None]) -> FLOAT[None, None, 16]: opset20.LayerNormalization( encoder_layers_0_add_output_0, encoder_layers_0_self_attn_layer_norm_weight, + encoder_layernorm_embedding_bias, axis=-1, epsilon=9.999999747378752e-0, ) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py index 0ebb127e47..3b244c1c6b 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization_test.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization_test.py @@ -73,7 +73,7 @@ def test_bart_encoder(self): fuse_skip_layer_normalization(model) op_types = [n.op_type for n in model.graph] self.assertIn("SkipLayerNormalization", op_types) - self.assertEqual(op_types.count("SkipLayerNormalization"), 4) + self.assertEqual(op_types.count("SkipLayerNormalization"), 5) new_outputs = ort_run("optimized", model, inputs) assert_allclose(new_outputs, original_outputs) From 32678617e1c452ba61169db70576fde1a670764d Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Sun, 6 Jul 2025 17:17:46 +0200 Subject: [PATCH 8/9] revert: use non-communative version of skip layer norm --- .../rewriter/ort_fusions/skip_normalization.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization.py b/onnxscript/rewriter/ort_fusions/skip_normalization.py index 4bc638b44d..ada210ca8d 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization.py @@ -144,7 +144,11 @@ def pattern(self, op, input, skip, gamma, beta, bias): if self._has_bias and self._bias_pre_add: input = op.Add(input, bias) - skip_sum = op.Add(input, skip) + # Support different combinations of addition of input and skip + skip_sum_pattern_1 = op.Add(skip, input) + skip_sum_pattern_2 = op.Add(input, skip) + skip_sum = pattern.OrValue([skip_sum_pattern_1, skip_sum_pattern_2], name="skip_sum") + if self._has_bias and not self._bias_pre_add: skip_sum = op.Add(skip_sum, bias) @@ -239,9 +243,9 @@ def rewrite( skip_layer_normalization_ruleset = pattern.RewriteRuleSet( [ - *_skip_layer_pre_add_bias_rule.commute(), - *_skip_layer_add_bias_rule.commute(), - *_skip_layer_rule.commute(), + _skip_layer_pre_add_bias_rule, + _skip_layer_add_bias_rule, + _skip_layer_rule, ] ) From b3467eaa9ab41c38e4ac7b1d27cd8782eb0bfd91 Mon Sep 17 00:00:00 2001 From: Markus Bilz Date: Tue, 15 Jul 2025 21:53:55 +0200 Subject: [PATCH 9/9] =?UTF-8?q?fix:=20apply=20review=20comments=20for=20sk?= =?UTF-8?q?ip=20layer=20normalization=E2=9C=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../rewriter/ort_fusions/skip_normalization.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/onnxscript/rewriter/ort_fusions/skip_normalization.py b/onnxscript/rewriter/ort_fusions/skip_normalization.py index ada210ca8d..2734265910 100644 --- a/onnxscript/rewriter/ort_fusions/skip_normalization.py +++ b/onnxscript/rewriter/ort_fusions/skip_normalization.py @@ -51,6 +51,7 @@ def check( skip, gamma, bias, + simplified_layer_norm, **_, ) -> pattern.MatchResult: # type: ignore[name-defined] """Check if the pattern matches conditions for use of SkipSimplifiedLayerNormalization op.""" @@ -82,6 +83,10 @@ def no_match(val: ir.Value, dims: Sequence[str]) -> bool: bias, ) + stash_type = simplified_layer_norm.producer().attributes.get_int("stash_type", 1) + if stash_type != 1: + return check_result.fail("Stash type is not supported.") + return check_result def rewrite( @@ -94,7 +99,7 @@ def rewrite( simplified_layer_norm, **_, ): - epsilon = simplified_layer_norm.producer().attributes.get_float("epsilon") + epsilon = simplified_layer_norm.producer().attributes.get_float("epsilon", 1e-5) if self._has_bias: normalized, _mean, _inv_std_var, skip_sum = op.SkipSimplifiedLayerNormalization( @@ -170,6 +175,7 @@ def check( gamma, beta, bias, + layer_norm, **_, ) -> pattern.MatchResult: # type: ignore[name-defined] """Check if the pattern matches conditions for use of SimplifiedLayerNormalization op.""" @@ -206,6 +212,9 @@ def no_match(val: ir.Value, dims: Sequence[str]) -> bool: bias, ) + stash_type = layer_norm.producer().attributes.get_int("stash_type", 1) + if stash_type != 1: + return check_result.fail("Stash type is not supported.") return check_result def rewrite( @@ -219,7 +228,8 @@ def rewrite( layer_norm, **_, ): - epsilon = layer_norm.producer().attributes.get_float("epsilon") + epsilon = layer_norm.producer().attributes.get_float("epsilon", 1e-5) + normalized, _mean, _inv_std_var, skip_sum = op.SkipLayerNormalization( input, skip,