Skip to content

Commit

Permalink
Make configured max token size work (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
pujiang2018 authored Nov 7, 2023
1 parent bd86814 commit 86bb99c
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/layers/attention.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
template <typename WeiT, typename QKPO_CLS, typename NORM_CLS, bool INPUT_AS_RESID = true>
class Attention {
public:
Attention(int layerId, DecoderContext *ctx) : layerId(layerId), qkpo(ctx->attHeadSize) {
Attention(int layerId, DecoderContext *ctx) : layerId(layerId), qkpo(ctx->attHeadSize, ctx->maxPositions) {
// Group attention or multi-head attention (multi-head attn is a special case of group attn)
if (ctx->attHeadNum % ctx->kvHeadNum == 0) {
// We are responsible for the range [startQHead, endQHead)
Expand Down
2 changes: 1 addition & 1 deletion src/models/common_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
using namespace xft;

struct QKPO_Dummy {
QKPO_Dummy(int dim) {}
QKPO_Dummy(int dim, int maxPos) {}
void forward(float *query, float *key, int qStride, int kStride, const int *qk_shape, const int *position_ids) {}
};

Expand Down

0 comments on commit 86bb99c

Please sign in to comment.