[deprecation] Clarify that datasets and readers are deprecated sinc…

…e v3 (#3184) They only exist for backwards compatibility, and will be removed eventually.
UKPLab · Jan 21, 2025 · b1f1d03 · b1f1d03
1 parent 7056137
commit b1f1d03
Show file tree

Hide file tree

Showing 13 changed files with 125 additions and 0 deletions.
diff --git a/sentence_transformers/datasets/DenoisingAutoEncoderDataset.py b/sentence_transformers/datasets/DenoisingAutoEncoderDataset.py
@@ -1,3 +1,14 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+See this script for more details on how to use the new training API:
+https://github.com/UKPLab/sentence-transformers/blob/master/examples/unsupervised_learning/TSDAE/train_stsb_tsdae.py
+"""
+
 from __future__ import annotations
 
 import numpy as np

diff --git a/sentence_transformers/datasets/NoDuplicatesDataLoader.py b/sentence_transformers/datasets/NoDuplicatesDataLoader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+In particular, you can pass "no_duplicates" to `batch_sampler` in the `SentenceTransformerTrainingArguments` class.
+"""
+
 from __future__ import annotations
 
 import math

diff --git a/sentence_transformers/datasets/ParallelSentencesDataset.py b/sentence_transformers/datasets/ParallelSentencesDataset.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import gzip

diff --git a/sentence_transformers/datasets/SentenceLabelDataset.py b/sentence_transformers/datasets/SentenceLabelDataset.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+In particular, you can pass "group_by_label" to `batch_sampler` in the `SentenceTransformerTrainingArguments` class.
+"""
+
 from __future__ import annotations
 
 import logging

diff --git a/sentence_transformers/datasets/SentencesDataset.py b/sentence_transformers/datasets/SentencesDataset.py
@@ -1,3 +1,11 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+"""
+
 from __future__ import annotations
 
 from torch.utils.data import Dataset

diff --git a/sentence_transformers/datasets/__init__.py b/sentence_transformers/datasets/__init__.py
@@ -1,3 +1,11 @@
+"""
+This directory contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+"""
+
 from __future__ import annotations
 
 from .DenoisingAutoEncoderDataset import DenoisingAutoEncoderDataset

diff --git a/sentence_transformers/readers/InputExample.py b/sentence_transformers/readers/InputExample.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 

diff --git a/sentence_transformers/readers/LabelSentenceReader.py b/sentence_transformers/readers/LabelSentenceReader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import os

diff --git a/sentence_transformers/readers/NLIDataReader.py b/sentence_transformers/readers/NLIDataReader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import gzip

diff --git a/sentence_transformers/readers/PairedFilesReader.py b/sentence_transformers/readers/PairedFilesReader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import gzip

diff --git a/sentence_transformers/readers/STSDataReader.py b/sentence_transformers/readers/STSDataReader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import csv

diff --git a/sentence_transformers/readers/TripletReader.py b/sentence_transformers/readers/TripletReader.py
@@ -1,3 +1,13 @@
+"""
+This file contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+
+Instead, you should create a `datasets` `Dataset` for training: https://huggingface.co/docs/datasets/create_dataset
+"""
+
 from __future__ import annotations
 
 import csv

diff --git a/sentence_transformers/readers/__init__.py b/sentence_transformers/readers/__init__.py
@@ -1,3 +1,11 @@
+"""
+This directory contains deprecated code that can only be used with the old `model.fit`-style Sentence Transformers v2.X training.
+It exists for backwards compatibility with the `model.old_fit` method, but will be removed in a future version.
+
+Nowadays, with Sentence Transformers v3+, it is recommended to use the `SentenceTransformerTrainer` class to train models.
+See https://www.sbert.net/docs/sentence_transformer/training_overview.html for more information.
+"""
+
 from __future__ import annotations
 
 from .InputExample import InputExample