From 595543dd093df3e92621c253d6da3f9092ec7ff8 Mon Sep 17 00:00:00 2001 From: "Zhang, Yifei" Date: Fri, 6 Oct 2023 19:34:57 -0700 Subject: [PATCH] graph: backend: compiler: fix updated llama mlp pattern --- src/graph/backend/graph_compiler/patterns/mlp_pattern.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/graph/backend/graph_compiler/patterns/mlp_pattern.hpp b/src/graph/backend/graph_compiler/patterns/mlp_pattern.hpp index 9ee27264319..384d74c1566 100644 --- a/src/graph/backend/graph_compiler/patterns/mlp_pattern.hpp +++ b/src/graph/backend/graph_compiler/patterns/mlp_pattern.hpp @@ -353,8 +353,12 @@ void create_llama_mlp(const std::shared_ptr &pgraph, auto quant1 = pgraph->append_op(graph::op_kind::Quantize, {in_edge(0, extra_cast_after_mul, 0)}); if (split_smooth_quant) { + auto extra_cast_before_mul_rhs + = append_single_op_repetition_subgraph( + pgraph, graph::op_kind::TypeCast, norm1); auto smooth_quant_mul1_rhs = append_single_op_repetition_subgraph( - pgraph, graph::op_kind::Multiply, extra_cast_before_mul); + pgraph, graph::op_kind::Multiply, + extra_cast_before_mul_rhs); auto extra_cast_after_mul_rhs = append_single_op_repetition_subgraph(pgraph, graph::op_kind::TypeCast, smooth_quant_mul1_rhs, 0,