Skip to content

Commit

Permalink
Update runq.c
Browse files Browse the repository at this point in the history
runq - moarrr openmp/openacc parallel loops
  • Loading branch information
trholding committed Jul 20, 2024
1 parent 1c47da5 commit e842bf7
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions runq.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,6 @@ void rmsnorm(float* o, float* x, float* weight, int size) {
#ifdef BLAS
ss = cblas_sdot(size, x, 1.0f, x, 1.0f);
#else
// END L2E Addition
// L2E Addition
#ifdef ACCEL
ACCELRD(ss) // OMP/OACC Macro
#endif
Expand Down Expand Up @@ -727,6 +725,11 @@ float* forward(Transformer* transformer, int token, int pos) {
matmul(s->xb2, &s->xq, w->wo + l, dim, dim);

// residual connection back into x
// L2E Addition
#ifdef ACCEL
ACCELS() // OMP/OACC Macro
#endif
// END L2E Addition
for (int i = 0; i < dim; i++) {
x[i] += s->xb2[i];
}
Expand All @@ -741,6 +744,11 @@ float* forward(Transformer* transformer, int token, int pos) {
matmul(s->hb2, &s->xq, w->w3 + l, dim, hidden_dim);

// SwiGLU non-linearity
// L2E Addition
#ifdef ACCEL
ACCELS() // OMP/OACC Macro
#endif
// END L2E Addition
for (int i = 0; i < hidden_dim; i++) {
float val = s->hb[i];
// silu(x)=x*σ(x), where σ(x) is the logistic sigmoid
Expand Down

0 comments on commit e842bf7

Please sign in to comment.