Make configured max token size work (#39)

intel · Nov 7, 2023 · 86bb99c · 86bb99c
1 parent bd86814
commit 86bb99c
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/src/layers/attention.h b/src/layers/attention.h
@@ -33,7 +33,7 @@
 template <typename WeiT, typename QKPO_CLS, typename NORM_CLS, bool INPUT_AS_RESID = true>
 class Attention {
 public:
-    Attention(int layerId, DecoderContext *ctx) : layerId(layerId), qkpo(ctx->attHeadSize) {
+    Attention(int layerId, DecoderContext *ctx) : layerId(layerId), qkpo(ctx->attHeadSize, ctx->maxPositions) {
         // Group attention or multi-head attention (multi-head attn is a special case of group attn)
         if (ctx->attHeadNum % ctx->kvHeadNum == 0) {
             // We are responsible for the range [startQHead, endQHead)

diff --git a/src/models/common_decoder.h b/src/models/common_decoder.h
@@ -35,7 +35,7 @@
 using namespace xft;
 
 struct QKPO_Dummy {
-    QKPO_Dummy(int dim) {}
+    QKPO_Dummy(int dim, int maxPos) {}
     void forward(float *query, float *key, int qStride, int kStride, const int *qk_shape, const int *position_ids) {}
 };