Code clean-ups (#171)

* misc optimization * clear cache after translation during scoring * allow more recompiles * set rope / position_embeddings at model build * remove BPTT * clarify pad_mask(true=yes we pad, so we won't attend) and attn_mask(true=yes we attend) * preallocate KV cache even in "pytorch" path (same as flash) * reduce config updates
eole-nlp · Jan 3, 2025 · 8a8987f · 8a8987f
1 parent b5e2266
commit 8a8987f
Show file tree

Hide file tree

Showing 42 changed files with 562 additions and 588 deletions.
diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml
@@ -48,7 +48,7 @@ jobs:
           -src_vocab /tmp/eole.vocab.src \
           -tgt_vocab /tmp/eole.vocab.tgt \
           && rm -rf /tmp/sample
-    - name: Test field/transform dump
+    - name: Testing architecture rnn sample dump...
       run: |
         # The dumped fields are used later when testing tools
         python eole/bin/main.py train \
@@ -61,7 +61,7 @@ jobs:
           -tgt_vocab /tmp/eole.vocab.tgt \
           -src_vocab_size 1000 \
           -tgt_vocab_size 1000
-    - name: Test RNN training
+    - name: Testing architecture rnn training
       run: |
         python eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -75,7 +75,7 @@ jobs:
           -tensorboard \
           -tensorboard_log_dir /tmp/logs_train
         python eole/tests/test_events.py --logdir /tmp/logs_train -tensorboard_checks train
-    - name: Test RNN training and validation
+    - name: Testing architecture rnn training and validation
       run: |
         python eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -90,7 +90,7 @@ jobs:
           -tensorboard_log_dir /tmp/logs_train_and_valid
         python eole/tests/test_events.py --logdir /tmp/logs_train_and_valid -tensorboard_checks train
                 python eole/tests/test_events.py --logdir /tmp/logs_train_and_valid -tensorboard_checks valid
-    - name: Test RNN training with coverage
+    - name: Testing architecture rnn training w/ coverage
       run: |
         python eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -101,7 +101,7 @@ jobs:
           -report_every 5 \
           -model '{"architecture": "rnn", "hidden_size": 10, "embeddings": {"word_vec_size": 5, "position_encoding_type": None}, "decoder": {"coverage_attn": True, "lambda_coverage": 0.1}}' \
           -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 10}'
-    - name: Test Transformer training with align
+    - name: Testing architecture custom transformer training w/ align
       run: |
         python eole/bin/main.py train \
           -config eole/tests/data/align_data.yaml \
@@ -112,7 +112,7 @@ jobs:
           -model '{"layers": 4, "hidden_size": 16, "transformer_ff": 64, "embeddings": {"word_vec_size": 16}, "encoder": {"encoder_type": "transformer", "heads": 2}, "decoder": {"decoder_type": "transformer", "lambda_align": 0.05, "alignment_layer": 2, "alignment_heads": 0, "heads": 2}}' \
           -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 10, "dropout_steps": [0, 3, 7], "dropout": [0.3, 0.2, 0.1], "attention_dropout": [0.2, 0.2, 0.1]}' \
           -report_every 5 \
-    - name : Test Transformer training and validation with dynamic scoring
+    - name : Testing architecture custom transformer training w/ validation with dynamic scoring
       run: |
         python3 eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -129,7 +129,7 @@ jobs:
           -tensorboard_log_dir /tmp/logs_dynamic-scoring \
           -dump_preds /tmp/dump_preds
         python eole/tests/test_events.py --logdir /tmp/logs_dynamic-scoring -tensorboard_checks valid_metrics 
-    - name : Test Transformer training and validation with dynamic scoring and maxrelative
+    - name : Testing architecture transformer training w/ validation with dynamic scoring and maxrelative
       run: |
         python3 eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -146,15 +146,15 @@ jobs:
           -tensorboard_log_dir /tmp/logs_dynamic-scoring_and_relative \
           -dump_preds /tmp/dump_preds
         python eole/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_relative -tensorboard_checks valid_metrics 
-    - name : Test Transformer training and validation with dynamic scoring and rotary
+    - name : Testing architecture transformer training w/ validation with dynamic scoring and rotary
       run: |
         python3 eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
           -src_vocab /tmp/eole.vocab.src \
           -tgt_vocab /tmp/eole.vocab.tgt \
           -src_vocab_size 1000 \
           -tgt_vocab_size 1000 \
-          -model '{"architecture": "transformer", "layers": 4, "heads": 2, "hidden_size": 16, "transformer_ff": 64, "embeddings": {"word_vec_size": 16, "position_encoding_type": "Rotary"}}' \
+          -model '{"architecture": "transformer", "layers": 4, "heads": 2, "hidden_size": 16, "transformer_ff": 64, "rope_config": {}, "embeddings": {"word_vec_size": 16, "position_encoding_type": "Rotary"}}' \
           -training '{"batch_size": 10, "num_workers": 0, "bucket_size": 1024, "train_steps": 10, "valid_steps": 5}' \
           -report_every 2 \
           -valid_metrics "BLEU" "TER" \
@@ -163,7 +163,7 @@ jobs:
           -tensorboard_log_dir /tmp/logs_dynamic-scoring_and_rotary \
           -dump_preds /tmp/dump_preds
         python eole/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_rotary -tensorboard_checks valid_metrics 
-    - name : Test Transformer training and validation with dynamic scoring and alibi
+    - name : Testing architecture transformer training w/ validation with dynamic scoring and alibi
       run: |
         python3 eole/bin/main.py train \
           -config eole/tests/data/data.yaml \
@@ -180,7 +180,7 @@ jobs:
           -tensorboard_log_dir /tmp/logs_dynamic-scoring_and_alibi \
           -dump_preds /tmp/dump_preds
         python eole/tests/test_events.py --logdir /tmp/logs_dynamic-scoring_and_alibi -tensorboard_checks valid_metrics 
-    - name: Test LM training
+    - name: Testing architecture custom decoder only training
       run: |
         python eole/bin/main.py train \
             -config eole/tests/data/lm_data.yaml \

diff --git a/README.md b/README.md
@@ -2,30 +2,35 @@
 
 [![Documentation](https://img.shields.io/badge/docs-latest-blue.svg)](https://eole-nlp.github.io/eole)
 
-Open language modeling toolkit based on [PyTorch](https://pytorch.org).
+Open language modeling toolkit based on [PyTorch](https://pytorch.org) initially spun-off of OpenNMT-py
 
-## 👷‍♂️🚧 Work in Progress
+We aim to maintain the research-friendly approach of the original project while including latest architectures (LLMs) and various other techniques.
+Our goal is to provide a comprehensive yet compact and modular codebase for experimenting with various types of language models (encoder, decoder, seq2seq).
 
-[EOLE](https://github.com/eole-nlp/eole) is a spin-off of the [OpenNMT-py](https://github.com/opennmt/opennmt-py) project. We aim to maintain the research-friendly approach of the original project while updating the structure and expanding it to include new topics related to large language models (LLMs) and various other techniques. Our goal is to provide a comprehensive yet compact and modular codebase for experimenting with various types of language models (encoder, decoder, seq2seq).
+## Latest developments
 
----
+- **Web-based (Google translator-like) interface** featuring the latest EuroLLM-8B-Instruct LLM: read more [here](https://github.com/eole-nlp/eole/tree/main/recipes/eurollm)
+- **Estimator layer** which enables to rescore multiple beams in the same model. Read article [here](https://medium.com/p/05b00b271a47) and [here](https://medium.com/p/7dccfe167814)
+- **Support Hugging Face Tokenizers** for better compatiblity
+- **New recipes** for TowerInstruct-llama2 and TowerInstruct-Mistral
+- **Support latest models** for Llama3.1, Gemma2, Pixtral
+- **Replicate CometKiwi(XL/XXL)** Encoder+Estimator models
 
-### Current State
+## Work completed
 
 We have made significant progress in several areas:
 
 - **Configuration Management**: Streamlined through [pydantic](https://docs.pydantic.dev) models.
 - **Command Line Entry Points**: Improved using structured subparsers for better organization.
 - **Reproducible Recipes**: Provided for widely used models and tasks, ensuring consistency and reliability.
 - **Core API Simplification**: Refined around the new configuration objects for ease of use.
+- **Revamped Fast API based server**: see above example with EuroLLM-9B-Instruct
 
 ### Future Directions
 
 There are still several exciting avenues to explore:
 
 - **Further Simplification and Refactoring**: Continue enhancing the codebase for clarity and efficiency.
-- **Inference Server**: Develop a robust solution for model inference.
-- **Additional Recipes**: Expand the library of reproducible recipes.
 - **Documentation**: Enhance and expand the documentation for better user guidance.
 - **Test Coverage**: Improve testing to ensure code reliability and performance.
 - **Logging Enhancements**: Implement more sophisticated logging mechanisms.
@@ -37,7 +42,7 @@ There are still several exciting avenues to explore:
 
 - **Versatile Training and Inference**: Train from scratch, finetune, and infer models of various architectures including Transformer Encoder/Decoder/EncoderDecoder and RNN EncoderDecoder.
 - **Dynamic Data Transforms**: Apply on-the-fly transformations in the dataloading logic for both training and inference.
-- **Comprehensive LLM Support**: Includes converters for Llama, Mistral, Phi, OpenLlama, Redpajama, MPT-7B, and Falcon models.
+- **Comprehensive LLM Support**: Includes converters for Llama, Mistral, Phi, Gemma ...
 - **Advanced Quantization**: Support for 8-bit and 4-bit quantization, along with LoRA adapters, with or without checkpointing, as well as mixed precision (FP16).
 - **Efficient Finetuning**: Finetune 7B and 13B models on a single RTX 24GB GPU using 4-bit quantization.
 - **Flexible Inference**: Perform inference in 4-bit or 8-bit using the same layer quantization methods as in finetuning.

diff --git a/eole/bin/convert/convert_HF.py b/eole/bin/convert/convert_HF.py
@@ -836,7 +836,7 @@ def get_weight(checkpoint, tensor_name):
                             for target in targetlist:
                                 if target in key_maps[arch].keys():
                                     source = key_maps[arch][target]
-                                    if type(source) == tuple:
+                                    if isinstance(source, tuple):
                                         srckey = source[0]
                                         srcmap = source[1]
                                     else:
@@ -847,7 +847,7 @@ def get_weight(checkpoint, tensor_name):
                                     )
 
                                     if w is not None:
-                                        if type(source) == tuple:
+                                        if isinstance(source, tuple):
                                             w = eval("w" + srcmap).contiguous()
                                         eole_safetensor[
                                             eole_prefix + str(i) + target + param
@@ -859,9 +859,8 @@ def get_weight(checkpoint, tensor_name):
                             idx = 1
                         for p in ["weight", "bias"]:
                             if ".input_layernorm." + p in key_maps[arch].keys():
-                                if (
-                                    type(key_maps[arch][".input_layernorm." + p])
-                                    == tuple
+                                if isinstance(
+                                    key_maps[arch][".input_layernorm." + p], tuple
                                 ):
                                     w = get_weight(
                                         checkpoint,

diff --git a/eole/bin/run/serve.py b/eole/bin/run/serve.py
@@ -267,7 +267,7 @@ def infer(self, inputs, settings={}, is_chat=False):
         """
         Run inference on the given inputs.
         """
-        if type(inputs) == str:
+        if isinstance(inputs, str):
             inputs = [inputs]
         if not (self.loaded):
             self.load()

diff --git a/eole/config/models.py b/eole/config/models.py
@@ -287,9 +287,11 @@ class TransformerConfig(Config):
 
     @model_validator(mode="after")
     def _validate_transformer_config(self):
+        """
         if self.position_encoding_type == PositionEncodingType.Rotary:
             if self.rope_config is None:
                 self.rope_config = RotaryPositionConfig()
+        """
         if self.add_qkvbias and "add_final_linear_bias" not in self.model_fields_set:
             self.update(add_final_linear_bias=True)
         return self
@@ -503,40 +505,70 @@ def default_architecture(cls, data: Any) -> Any:
         return data
 
     def update_model_opts(self):
-        if self.embeddings is not None and self.embeddings.word_vec_size > 0:
-            self.embeddings.src_word_vec_size = self.embeddings.word_vec_size
-            self.embeddings.tgt_word_vec_size = self.embeddings.word_vec_size
+        update_dict = {}
+        if self.embeddings.position_encoding_type == PositionEncodingType.Rotary:
+            if not self.rope_config:
+                update_dict["rope_config"] = RotaryPositionConfig()
+                rope_config = update_dict["rope_config"]
+            else:
+                rope_config = self.rope_config
+        else:
+            rope_config = None
 
-        # Backward compatibility with "fix_word_vecs_*" opts
-        # We can probably drop this now...
-        # if hasattr(self, "fix_word_vecs_enc"):
-        #     self.embeddings.freeze_word_vecs_enc = self.embeddings.fix_word_vecs_enc
-        # if hasattr(self, "fix_word_vecs_dec"):
-        #     self.embeddings.freeze_word_vecs_dec = self.embeddings.fix_word_vecs_dec
+        if self.embeddings is not None and self.embeddings.word_vec_size > 0:
+            update_dict["embeddings"] = {
+                "src_word_vec_size": self.embeddings.word_vec_size,
+                "tgt_word_vec_size": self.embeddings.word_vec_size,
+            }
+        if self.embeddings is not None and "embeddings" in update_dict.keys():
+            self.embeddings.update(**update_dict.pop("embeddings"))
 
         if (
             getattr(self.encoder, "encoder_type", None) == "brnn"
             and self.decoder.decoder_type == "rnn"
         ):
-            self.decoder.bidirectional_encoder = True
+            update_dict["decoder"] = {"bidirectional_encoder": True}
 
         if self.encoder is not None:
-            self.encoder.src_word_vec_size = self.embeddings.src_word_vec_size
+            update_dict["encoder"] = {
+                "src_word_vec_size": self.embeddings.src_word_vec_size
+            }
             if getattr(self.encoder, "encoder_type", None) == "transformer":
-                self.encoder.position_encoding_type = (
-                    self.embeddings.position_encoding_type
+                update_dict["encoder"].update(
+                    {
+                        "position_encoding_type": self.embeddings.position_encoding_type,
+                        "n_positions": self.embeddings.n_positions,
+                        "rope_config": rope_config,
+                    }
                 )
-                self.encoder.n_positions = self.embeddings.n_positions
+                update_dict[
+                    "position_encoding_type"
+                ] = self.embeddings.position_encoding_type
+        if self.encoder is not None and "encoder" in update_dict.keys():
+            self.encoder.update(**update_dict.pop("encoder"))
+
         if self.decoder is not None:
-            self.decoder.tgt_word_vec_size = self.embeddings.tgt_word_vec_size
+            update_dict["decoder"] = {
+                "tgt_word_vec_size": self.embeddings.tgt_word_vec_size
+            }
             if getattr(self.decoder, "decoder_type", None) in [
                 "transformer",
                 "transformer_lm",
             ]:
-                self.decoder.position_encoding_type = (
-                    self.embeddings.position_encoding_type
+                update_dict["decoder"].update(
+                    {
+                        "position_encoding_type": self.embeddings.position_encoding_type,
+                        "n_positions": self.embeddings.n_positions,
+                        "rope_config": rope_config,
+                    }
                 )
-                self.decoder.n_positions = self.embeddings.n_positions
+                update_dict[
+                    "position_encoding_type"
+                ] = self.embeddings.position_encoding_type
+        if self.decoder is not None and "decoder" in update_dict.keys():
+            self.decoder.update(**update_dict.pop("decoder"))
+
+        self.update(**update_dict)
 
         # causing some weird recursion issue in unit test, to investigate
         # if self.encoder is not None:
@@ -584,7 +616,7 @@ def _validate_model_config(self):
         return self
 
 
-class CustomModelConfig(BaseModelConfig):
+class CustomModelConfig(TransformerConfig, BaseModelConfig):
     """
     Wrap anything that does not fit a set common architecture.
     """

diff --git a/eole/config/run.py b/eole/config/run.py
@@ -187,24 +187,26 @@ def _update_with_model_config(self):
                     quant_type=training_config.quant_type,
                 )
 
-            model_config._validate_model_config()
-            # training_config._validate_running_config()  # not sure it's needed
-
             self.update(
                 model=model_config,
             )
 
+        update_dict = {}
         if "transforms" not in self.model_fields_set:
-            self.transforms = self._all_transform = transforms
+            update_dict["transforms"] = transforms
+            update_dict["_all_transform"] = transforms
         if "transforms_configs" not in self.model_fields_set:
-            self.transforms_configs = config_dict.get("transforms_configs", {})
+            update_dict["transforms_configs"] = NestedAllTransformsConfig(
+                **config_dict.get("transforms_configs", {})
+            )
         if "compute_dtype" not in self.model_fields_set:
             self.compute_dtype = config_dict.get("training", {}).get(
                 "compute_dtype", "fp16"
             )
         for key, value in config_dict.get("inference", {}).items():
             if key not in self.model_fields_set:
-                setattr(self, key, value)
+                update_dict[key] = value
+        self.update(**update_dict)
 
 
 class BuildVocabConfig(

diff --git a/eole/config/training.py b/eole/config/training.py
@@ -212,10 +212,6 @@ class TrainingConfig(
     dropout_steps: List[int] = Field(
         default=[0], description="Steps at which dropout changes."
     )
-    truncated_decoder: int = Field(
-        default=0, description="Truncated bptt."
-    )  # deprecated?
-
     label_smoothing: float = Field(
         default=0.0,
         description="Label smoothing value epsilon. "