diff --git a/evalem/misc/datasets.py b/evalem/misc/datasets.py index fffab03..155cddc 100755 --- a/evalem/misc/datasets.py +++ b/evalem/misc/datasets.py @@ -36,7 +36,7 @@ def get_squad_v2( data = data.shuffle(seed=42) if shuffle else data data = data.select(range(nsamples)) if nsamples > 0 else data - inputs = [dict(question=d["question"], context=d["context"]) for d in data] + inputs = [dict(question=d["question"].lstrip(), context=d["context"]) for d in data] references = [d["answers"]["text"] for d in data] inputs, references = zip(*filter(lambda x: len(x[1]) > 0, zip(inputs, references))) diff --git a/evalem/models/defaults.py b/evalem/models/defaults.py index 910bea6..f775a74 100755 --- a/evalem/models/defaults.py +++ b/evalem/models/defaults.py @@ -48,6 +48,7 @@ def __init__( def _postprocess_predictions( self, predictions: Union[dict, List[dict]], + **kwargs, ) -> Iterable[QAPredictionDTO]: """ This method converts the pipeline's default output format diff --git a/requirements.txt b/requirements.txt index e183511..b86fd2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,16 @@ +arrow==1.2.3 bert-score==0.3.13 -datasets==2.9.0 +datasets==2.3.2 +evaluate==0.2.2 jury==2.2.3 loguru==0.6.0 numpy==1.24.2 pandas==1.5.3 +pyarrow==9.0.0 pytest==7.2.1 sacrebleu==2.3.1 scikit-learn==1.2.1 sentencepiece==0.1.97 seqeval==1.2.2 -torch==1.13.1 -transformers==4.26.1 +torch==2.0.0 +transformers==4.28.1 diff --git a/setup.py b/setup.py index 014c66a..1c0be1a 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,11 @@ python_requires=">=3.8", packages=[ "evalem", - "evalem.misc", - "evalem.models", "evalem.evaluators", "evalem.metrics", + "evalem.misc", + "evalem.models", + "evalem.pipelines", ], install_requires=required, classifiers=[