Skip to content

Commit

Permalink
beef up transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidrains committed Oct 23, 2023
1 parent 452aa84 commit 23fa90f
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 5 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ preds = model(time_series)

## Todo

- [ ] beef up the transformer with latest findings
- [x] beef up the transformer with latest findings

- [ ] improvise a 2d version - either global pool across time at end, or use a CLS token for attention pooling

## Citation
Expand All @@ -63,6 +64,15 @@ preds = model(time_series)
}
```

```bibtex
@misc{shazeer2020glu,
title = {GLU Variants Improve Transformer},
author = {Noam Shazeer},
year = {2020},
url = {https://arxiv.org/abs/2002.05202}
}
```

```bibtex
@misc{burtsev2020memory,
title = {Memory Transformer},
Expand Down Expand Up @@ -91,3 +101,14 @@ preds = model(time_series)
year = {2022}
}
```

```bibtex
@Article{AlphaFold2021,
author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A A and Ballard, Andrew J and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
journal = {Nature},
title = {Highly accurate protein structure prediction with {AlphaFold}},
year = {2021},
doi = {10.1038/s41586-021-03819-2},
note = {(Accelerated article preview)},
}
```
18 changes: 15 additions & 3 deletions iTransformer/iTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ def __init__(
Rearrange('b n (qkv h d) -> qkv b h n d', qkv = 3, h = heads)
)

self.to_v_gates = nn.Sequential(
nn.Linear(dim, dim_inner, bias = False),
nn.SiLU(),
Rearrange('b n (h d) -> b h n d', h = heads)
)

self.attend = Attend(flash = flash, dropout = dropout)

self.to_out = nn.Sequential(
Expand All @@ -55,15 +61,21 @@ def forward(self, x):

out = self.attend(q, k, v)

out = out * self.to_v_gates(x)
return self.to_out(out)

# feedforward

class GEGLU(Module):
def forward(self, x):
x, gate = rearrange(x, '... (r d) -> r ... d', r = 2)
return x * F.gelu(gate)

def FeedForward(dim, mult = 4, dropout = 0.):
dim_inner = int(dim * mult)
dim_inner = int(dim * mult * 2 / 3)
return nn.Sequential(
nn.Linear(dim, dim_inner),
nn.GELU(),
nn.Linear(dim, dim_inner * 2),
GEGLU(),
nn.Dropout(dropout),
nn.Linear(dim_inner, dim)
)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name = 'iTransformer',
packages = find_packages(exclude=[]),
version = '0.0.7',
version = '0.1.0',
license='MIT',
description = 'iTransformer - Inverted Transformer Are Effective for Time Series Forecasting',
author = 'Phil Wang',
Expand Down

0 comments on commit 23fa90f

Please sign in to comment.