-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
46 lines (37 loc) · 1.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
from torch import cuda
from torch import compile as torch_compile
from mlflow import start_run
import lightning.pytorch as pl
from lightning.pytorch.tuner import Tuner
from lightning.pytorch.callbacks import StochasticWeightAveraging, ModelCheckpoint, EarlyStopping
from siamese.trainer_mng import ModelTrainingWrapper
EPOCH = 10
if epochs := os.getenv('EPOCH'):
EPOCH = int(epochs)
SAVE_MODEL_PATH = os.getenv("SAVE_MODEL_PATH")
if __name__ == "__main__":
cuda.empty_cache()
# dataset
with start_run(description=f"Run {EPOCH} epochs, BCElogits"):
model_wrapped = ModelTrainingWrapper()
# model_wrapped = torch_compile(model_wrapped)
trainer = pl.Trainer(
min_epochs=1,
max_epochs=EPOCH,
accumulate_grad_batches=2,
log_every_n_steps=10,
callbacks=[
StochasticWeightAveraging(swa_lrs=0.05),
ModelCheckpoint(dirpath=SAVE_MODEL_PATH, save_top_k=1, monitor="eval_loss"),
EarlyStopping(monitor='eval_loss', patience=30)
],
default_root_dir=SAVE_MODEL_PATH
)
tuner = Tuner(trainer)
if eval(os.getenv("FIND_BATCH_SIZE")):
tuner.scale_batch_size(model_wrapped, mode='binsearch')
if eval(os.getenv("FIND_LR_RATE")):
tuner.lr_find(model_wrapped, num_training=50)
trainer.fit(model=model_wrapped)
trainer.test(model_wrapped)