diff --git a/evalem/evaluators/__init__.py b/evalem/evaluators/__init__.py
index f730e53..29051a1 100644
--- a/evalem/evaluators/__init__.py
+++ b/evalem/evaluators/__init__.py
@@ -1,2 +1,3 @@
 # flake8: noqa
 from ._base import Evaluator
+from .basics import QAEvaluator, TextClassificationEvaluator
diff --git a/evalem/evaluators/basics.py b/evalem/evaluators/basics.py
index 98b3665..8c8ac91 100755
--- a/evalem/evaluators/basics.py
+++ b/evalem/evaluators/basics.py
@@ -1,6 +1,13 @@
 #!/usr/bin/env python3
 
-from ..metrics import AccuracyMetric, ExactMatchMetric, F1Metric
+from ..metrics import (
+    AccuracyMetric,
+    ConfusionMatrix,
+    ExactMatchMetric,
+    F1Metric,
+    PrecisionMetric,
+    RecallMetric,
+)
 from ._base import Evaluator
 
 
@@ -30,6 +37,23 @@ def __init__(self) -> None:
         )
 
 
+class TextClassificationEvaluator(BasicEvaluator):
+    """
+    An evaluator for text classification tasks.
+    """
+
+    def __init__(self) -> None:
+        super().__init__(
+            metrics=[
+                AccuracyMetric(),
+                F1Metric(),
+                PrecisionMetric(),
+                RecallMetric(),
+                ConfusionMatrix(),
+            ],
+        )
+
+
 def main():
     pass
 
diff --git a/evalem/metrics/basics.py b/evalem/metrics/basics.py
index 0aa24d2..ce6c3b3 100755
--- a/evalem/metrics/basics.py
+++ b/evalem/metrics/basics.py
@@ -101,11 +101,3 @@ def __get_labels(
         Get unique list of labels across predictions + references.
         """
         return sorted(set(predictions).union(references))
-
-
-def main():
-    pass
-
-
-if __name__ == "__main__":
-    main()
diff --git a/evalem/metrics/semantics.py b/evalem/metrics/semantics.py
index ea43909..62b799c 100755
--- a/evalem/metrics/semantics.py
+++ b/evalem/metrics/semantics.py
@@ -37,6 +37,10 @@ class BertScore(SemanticMetric):
             https://github.com/Tiiiger/bert_score/blob/master/bert_score/utils.py
         ```device```: ```str```
             Which device to run the model on? Defaults to "cpu".
+        ```per_instance_score```: ```bool```
+            If enabled, precision, recall and f1 score per instance is also
+            returned in the computation result.
+            Else: mean precision, recall and f1 is computed by default.
         ```debug```: ```bool```
             Enable debugging log? Defaults to False.
 
@@ -68,12 +72,14 @@ class BertScore(SemanticMetric):
 
     def __init__(
         self,
-        model_type: str = "roberta-large",
+        model_type: str = "bert-base-uncased",
         device: str = "cpu",
+        per_instance_score: bool = False,
         debug: bool = False,
     ) -> None:
         super().__init__(metrics="bertscore", device=device, debug=debug)
         self.model_type = model_type
+        self.per_instance_score = per_instance_score
 
     def compute(
         self,
@@ -83,13 +89,19 @@ def compute(
     ) -> MetricOutput:
         device = kwargs.pop("device", self.device)
         model_type = kwargs.pop("model_type", self.model_type)
-        return super().compute(
+        result = super().compute(
             predictions=predictions,
             references=references,
             model_type=model_type,
             device=device,
             **kwargs,
         )
+        # if you want to supress a list of all these metrics
+        # and want to just have mean/average.
+        if not self.per_instance_score:
+            for _key in ["precision", "recall", "f1"]:
+                result["bertscore"][_key] = np.mean(result["bertscore"][_key])
+        return result
 
 
 class BartScore(SemanticMetric):
diff --git a/evalem/misc/datasets.py b/evalem/misc/datasets.py
index a5de18b..fffab03 100755
--- a/evalem/misc/datasets.py
+++ b/evalem/misc/datasets.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python3
 
+from typing import Dict
+
 from datasets import load_dataset
 
 
@@ -41,6 +43,42 @@ def get_squad_v2(
     return dict(inputs=inputs, references=references)
 
 
+def get_imdb(
+    data_type: str = "test",
+    nsamples: int = 1000,
+    shuffle: bool = False,
+) -> Dict[str, list]:
+    """
+    This loads imdb text classification dataset using HuggingFace datasets module.
+
+    Args:
+        ```data_type```: ```str```
+            Either "train" or "test"
+        ```nsamples```: ```int```
+            How many samples to load?
+            Note: The returned data size may not be exactly equal to nsamples
+            as we're filtering out empty references
+        ```shuffle```: ```bool```
+            If enabled, shuffles the data prior to sampling/filtering.
+
+    Returns:
+        Returns a dict with 2 keys:
+            - `inputs`: `List[dict]`, each dict has "context" and "question"
+            keys
+            - `references`: ```List[List[str]]```
+
+    """
+    nsamples = nsamples or 0
+    data = load_dataset("imdb")[data_type]
+    data = data.shuffle(seed=42) if shuffle else data
+    data = data.select(range(nsamples)) if nsamples > 0 else data
+
+    label_map = ["NEGATIVE", "POSITIVE"]
+    inputs = [(d["text"], label_map[d["label"]]) for d in data]
+    inputs, references = zip(*inputs)
+    return dict(inputs=list(inputs), references=list(references))
+
+
 def main():
     pass
 
diff --git a/evalem/models/__init__.py b/evalem/models/__init__.py
index 87296b0..8add094 100644
--- a/evalem/models/__init__.py
+++ b/evalem/models/__init__.py
@@ -1,3 +1,7 @@
 # flake8: noqa
 from ._base import HFLMWrapper, HFPipelineWrapper, ModelWrapper
-from .defaults import DefaultQAModelWrapper
+from .defaults import (
+    DefaultQAModelWrapper,
+    QuestionAnsweringHFPipelineWrapper,
+    TextClassificationHFPipelineWrapper,
+)
diff --git a/evalem/models/_base.py b/evalem/models/_base.py
index e035249..8a5ea5c 100644
--- a/evalem/models/_base.py
+++ b/evalem/models/_base.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 from abc import abstractmethod
-from typing import Iterable, Type
+from typing import Callable, Iterable, Type
 
 from transformers import Pipeline as HF_Pipeline
 from transformers import PreTrainedModel, PreTrainedTokenizerBase
@@ -16,19 +16,47 @@ class ModelWrapper(AbstractBase):
     all the upstream models into a nice wrapper.
 
     All the downstream implementation of `ModelWrapper` should implement
-    the `predict(...)` method.
+    the `_predict(...)` method which is itself called by `.predict(...)` method.
+
+    Args:
+        ```model```:
+            Input model that's being wrapped for common interface
+        ```debug```: ```bool```
+            If enabled, debugging logs could be printed
+        ```kwargs```:
+            - ```inputs_preprocessor```
+                A `Callable` to apply on inputs.
+            - ```predictions_postprocessor```
+                A `Callable` to apply on model outputs/predictions.
 
     Note:
-        In order to convert to task-specific downstream format, we provide
-        `_map_predictions(...)` method which user can override. By default,
-        it is an identity that doesn't change the format egested by the model.
+        - Override `_preprocess_inputs` method to change data format for
+            model input. Default it identity (no change).
+        - Override `_postprocess_predictions` to convert predictions to
+            task-specific downstream format. Defaults to identity (no change).
     """
 
-    def __init__(self, model, debug: bool = False, **kwargs) -> None:
+    def __init__(
+        self,
+        model,
+        debug: bool = False,
+        **kwargs,
+    ) -> None:
         super().__init__(debug=debug)
         self.model = model
 
-    @abstractmethod
+        # specifies how the input format conversion is done
+        self.inputs_preprocessor: Callable = (
+            kwargs.get("inputs_preprocessor", self._preprocess_inputs)
+            or self._preprocess_inputs
+        )
+
+        # specifies how the predictions formatting is done
+        self.predictions_postprocessor: Callable = (
+            kwargs.get("predictions_postprocessor", self._postprocess_predictions)
+            or self._postprocess_predictions
+        )
+
     def predict(
         self,
         inputs: Iterable,
@@ -45,16 +73,37 @@ def predict(
         Returns:
             Iterable of predicted instance
         """
-        raise NotImplementedError()
+        inputs = self.inputs_preprocessor(inputs, **kwargs)
+        predictions = self._predict(inputs, **kwargs)
+        return self.predictions_postprocessor(predictions, **kwargs)
 
-    def __call__(
+    @abstractmethod
+    def _predict(
         self,
         inputs: Iterable,
         **kwargs,
     ) -> Iterable[EvaluationPredictionInstance]:
-        return self.predict(inputs, **kwargs)
+        """
+        Entrypoint method for predicting using the wrapped model
+
+        Args:
+            ```inputs```
+                Represent input dataset whose format depends on
+                downstream tasks.
+
+        Returns:
+            Iterable of predicted instance
+        """
+        raise NotImplementedError()
 
-    def _map_predictions(self, predictions: Iterable):
+    def _preprocess_inputs(self, inputs: Iterable, **kwargs) -> Iterable:
+        """
+        A helper method to transform inputs suitable for model to ingest.
+        By default, it's an identity function.
+        """
+        return inputs
+
+    def _postprocess_predictions(self, predictions: Iterable, **kwargs):
         """
         A helper method to transform predictions from the models
         into any downstream format. By default, it's an identity function.
@@ -62,6 +111,13 @@ def _map_predictions(self, predictions: Iterable):
         # default -> Identity
         return predictions
 
+    def __call__(
+        self,
+        inputs: Iterable,
+        **kwargs,
+    ) -> Iterable[EvaluationPredictionInstance]:
+        return self.predict(inputs, **kwargs)
+
 
 class HFWrapper(ModelWrapper):
     """
@@ -86,8 +142,9 @@ def __init__(
         self,
         model: Type[PreTrainedModel],
         tokenizer: Type[PreTrainedTokenizerBase],
+        **kwargs,
     ) -> None:
-        super().__init__(model=model)
+        super().__init__(model=model, **kwargs)
         self.tokenizer = tokenizer
 
 
@@ -113,21 +170,29 @@ class HFPipelineWrapper(HFWrapper):
             pipe = hf_pipeline("question-answering")
             wrapped_model = HFPipelineWrapper(pipe)
 
+            # Or: if you want to specify how to post-process predictions,
+            # provide the processor explicitly.
+            wrapped_model = HFPipelineWrapper(
+                pipeline("question-answering", model="deepset/roberta-base-squad2"),
+                predictions_postprocessor=lambda xs: list(map(lambda x: x["answer"], xs))
+            )
+
+
             # compute predictions
             # (format?) and pass to evaluator along with references
             predictions = wrapped_model.predict(<inputs>)
     """
 
-    def __init__(self, pipeline: Type[HF_Pipeline], debug: bool = False) -> None:
+    def __init__(self, pipeline: Type[HF_Pipeline], **kwargs) -> None:
         """
         Args:
             ```pipeline```:
                 A HuggingFace pipeline object used for prediction
         """
-        super().__init__(model=pipeline)
+        super().__init__(model=pipeline, **kwargs)
 
-    def predict(self, inputs, **kwargs):
-        return self._map_predictions(self.model(inputs))
+    def _predict(self, inputs, **kwargs):
+        return self.model(inputs, **kwargs)
 
     @property
     def pipeline(self) -> HF_Pipeline:
diff --git a/evalem/models/defaults.py b/evalem/models/defaults.py
index 065a790..910bea6 100755
--- a/evalem/models/defaults.py
+++ b/evalem/models/defaults.py
@@ -1,43 +1,56 @@
 #!/usr/bin/env python3
 
-from typing import Iterable, List, Union
+from typing import Iterable, List, Optional, Union
 
 from transformers import pipeline as hf_pipeline
 
-from ..structures import EvaluationPredictionInstance, QAPredictionDTO
-from ._base import HFPipelineWrapper
+from ..structures import PredictionDTO, QAPredictionDTO
+from ._base import HFPipelineWrapper, PreTrainedModel, PreTrainedTokenizerBase
 
 
-class DefaultQAModelWrapper(HFPipelineWrapper):
+class QuestionAnsweringHFPipelineWrapper(HFPipelineWrapper):
     """
-    A default distill-bert-uncased base HF pipeline for
-    Question-Answering task.
-
-    The predictor expects the input format to be a `List[dict]`, where each
-    dict has the following keys:
-        - `context` (str): Paragraph/context fromw which question is asked
-        - `question` (str): Actual question string being asked
-
-    Example input dict:
-            .. code-block: python
-
-                {
-                    "context": "There are 7 continents in the world."
-                    "question": "How many continents are there?"
-                }
-
-    The `predict(...)` method finally returns `List[QAPredictionDTO]` structure.
+    A HFPipelineWrapper for question-answering.
+
+    Args:
+        ```model```: ```Type[PreTrainedModel]```
+            Which model to use?
+        ```tokenizer```: ```Type[PreTrainedTokenizerBase]```
+            Which tokenizer to use?
+        ```device```:```str```
+            Which device to run the model on? cpu? gpu? mps?
     """
 
-    def __init__(self, device: str = "cpu") -> None:
-        super().__init__(pipeline=hf_pipeline("question-answering", device=device))
+    _task = "question-answering"
 
-    def _map_predictions(
+    def __init__(
+        self,
+        model: Optional[
+            Union[str, PreTrainedModel]
+        ] = "distilbert-base-cased-distilled-squad",
+        tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None,
+        device: str = "cpu",
+        hf_params: Optional[dict] = None,
+        **kwargs,
+    ) -> None:
+        self.hf_params = hf_params or {}
+        super().__init__(
+            pipeline=hf_pipeline(
+                self._task,
+                model=model,
+                tokenizer=tokenizer,
+                device=device,
+                **self.hf_params,
+            ),
+            **kwargs,
+        )
+
+    def _postprocess_predictions(
         self,
         predictions: Union[dict, List[dict]],
-    ) -> Iterable[EvaluationPredictionInstance]:
+    ) -> Iterable[QAPredictionDTO]:
         """
-        This helper method converts the pipeline's default output format
+        This method converts the pipeline's default output format
         to the iterable of QAPredictionDTO.
 
         Args:
@@ -64,6 +77,86 @@ def _map_predictions(
         )
 
 
+class DefaultQAModelWrapper(HFPipelineWrapper):
+    """
+    Deprecated: Use `QuestionAnsweringHFPipelineWrapper()`
+    """
+
+    def __init__(self, device: str = "cpu") -> None:
+        raise DeprecationWarning(
+            "Deprecated ModelWrapper. Please use `QuestionAnsweringHFPipelineWrapper`",
+        )
+
+
+class TextClassificationHFPipelineWrapper(HFPipelineWrapper):
+    """
+    A HFPipelineWrapper for text classification.
+
+    Args:
+        ```model```: ```Type[PreTrainedModel]```
+            Which model to use?
+        ```tokenizer```: ```Type[PreTrainedTokenizerBase]```
+            Which tokenizer to use?
+        ```device```:```str```
+            Which device to run the model on? cpu? gpu? mps?
+    """
+
+    _task = "text-classification"
+
+    def __init__(
+        self,
+        model: Optional[
+            Union[str, PreTrainedModel]
+        ] = "distilbert-base-uncased-finetuned-sst-2-english",
+        tokenizer: Optional[Union[str, PreTrainedTokenizerBase]] = None,
+        device: str = "cpu",
+        hf_params: Optional[dict] = None,
+        **kwargs,
+    ) -> None:
+        self.hf_params = hf_params or {}
+        super().__init__(
+            pipeline=hf_pipeline(
+                self._task,
+                model=model,
+                tokenizer=tokenizer,
+                device=device,
+                **self.hf_params,
+            ),
+            **kwargs,
+        )
+        # mapping  from int code to actual label name.
+        self.label_map = kwargs.get("label_map", {})
+
+    def _postprocess_predictions(
+        self,
+        predictions: Union[dict, List[dict]],
+    ) -> Iterable[PredictionDTO]:
+        """
+        This method converts the pipeline's default output format
+        to the iterable of QAPredictionDTO.
+
+        Args:
+            ```predictions```: ```Union[dict, List[dict]]```
+                Predictions provided by the the classificaton pipeline.
+
+        Returns:
+            Converted format: ```Iterable[PredictionDTO]```
+        """
+        if isinstance(predictions, dict):
+            predictions = [predictions]
+
+        # Note: Default model here is guaranteed to have these keys.
+        # Use label mapping. If mapping doesn't exist, just use the prediction.
+        predictions = map(
+            lambda p: PredictionDTO(
+                text=self.label_map.get(p["label"], p["label"]),
+                score=p.get("score"),
+            ),
+            predictions,
+        )
+        return list(predictions)
+
+
 def main():
     pass
 
diff --git a/requirements.txt b/requirements.txt
index d4c4cf9..d130660 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,7 @@ numpy==1.24.2
 pandas==1.5.3
 pytest==7.2.1
 scikit-learn==1.2.1
+sentencepiece==0.1.97
 seqeval==1.2.2
 torch==1.13.1
 transformers==4.26.1