mozilla · eu9ene · Jan 17, 2024 · Jan 9, 2024 · Jan 9, 2024 · Jan 10, 2024
@@ -9,8 +9,8 @@ train:
   - until original 10 # Train for 10 epochs. Only OpusTrainer can control epochs, it's all one big epoch for Marian
 
 modifiers:
-- UpperCase: 0.1 # Apply randomly to 5% of sentences
-- TitleCase: 0.1
+- UpperCase: 0.07 # Apply randomly to 7% of sentences
+- TitleCase: 0.05
 #- Typos: 0.05
 
 seed: 1111

@@ -11,8 +11,8 @@ train:
 # TODO: augment corpus before decoding or reduce augmentation rate
 # TODO: https://github.com/mozilla/firefox-translations-training/issues/272
 #modifiers:
-#- UpperCase: 0.1 # Apply randomly to 5% of sentences
-#- TitleCase: 0.1
+- UpperCase: 0.07 # Apply randomly to 7% of sentences
+- TitleCase: 0.05
 # TODO: enable typos, issue https://github.com/mozilla/firefox-translations-training/issues/262
 #- Typos: 0.05
 # TODO: enable tags, currently doesn't work because of the issue with  tokenization

@@ -3,28 +3,14 @@ datasets:
   backtranslated: <dataset1> # Back-translated data
 
 stages:
-  - start
-  - mid
-  - end
+  - pretrain
   - finetune
 
- # One epoch of only original high-quality data to warm up the model
-start:
-  - original 1.0
-  - until original 1
-
-# Gradually add back-translations to the mix
-# Back-translated corpus can vary a lot in size, so we can try using original to count epochs
-mid:
-  - original 0.7
-  - backtranslated 0.3
-  - until original 1
-
-# Expand back-translations
-end:
+# Back-translated corpus can vary a lot in size, so we can try using original one to count epochs
+pretrain:
   - original 0.6
   - backtranslated 0.4
-  - until original 1
+  - until original 2
 
 # Fine-tuning only on original clean corpus until the early stopping
 finetune:
@@ -33,8 +19,8 @@ finetune:
 
 
 modifiers:
-- UpperCase: 0.1 # Apply randomly to 10% of sentences
-- TitleCase: 0.1
+- UpperCase: 0.07 # Apply randomly to 7% of sentences
+- TitleCase: 0.05
 # TODO: enable typos, issue https://github.com/mozilla/firefox-translations-training/issues/262
 #- Typos: 0.05
 

@@ -1,9 +1,10 @@
-# https://discourse.translatelocally.com/t/marian-configuration-to-use/24
 disp-freq: 1000
+# default learning rate for transformer-big is 0.0002 https://github.com/marian-nmt/marian-dev/blob/master/src/common/aliases.cpp
 learn-rate: 0.0003 # Turn this down if you get a diverged model, maybe 0.0001
-optimizer-delay: 1 # Roughly GPU devices * optimizer-delay = 8, but keep as an integer
+optimizer-delay: 2 # Roughly GPU devices * optimizer-delay = 8, but keep as an integer
+lr-report: True
 save-freq: 5000
-valid-freq: 3000
+valid-freq: 5000
 valid-max-length: 300
 valid-mini-batch: 8
-early-stopping: 20
+early-stopping: 20