From 10e1c5a7bdfcea178062fdeef8004e3e06a279c2 Mon Sep 17 00:00:00 2001
From: JarbasAI <33701864+JarbasAl@users.noreply.github.com>
Date: Sun, 21 Jul 2024 21:18:30 +0100
Subject: [PATCH] feat/initial_implementation (#1)

* feat/initial_implementation

* feat/initial_implementation
---
 README.md                         | 74 +++++++++++++++++++++++++++++++
 ovos_flashrank_plugin/__init__.py | 38 ++++++++++++++++
 requirements.txt                  |  4 ++
 3 files changed, 116 insertions(+)
 create mode 100644 README.md
 create mode 100644 ovos_flashrank_plugin/__init__.py
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9400259
--- /dev/null
+++ b/README.md
@@ -0,0 +1,74 @@
+# FlashRankMultipleChoiceSolver for OVOS
+
+The `FlashRankMultipleChoiceSolver` plugin is designed for the Open Voice OS (OVOS) platform to help select the best answer to a question from a list of options. This plugin utilizes the FlashRank library to evaluate and rank multiple-choice answers based on their relevance to the given query.
+
+## Features
+
+- **Rerank Options**: Reranks a list of options based on their relevance to the query.
+- **Customizable Model**: Allows the use of different ranking models.
+- **Seamless Integration**: Designed to work with OVOS plugin manager.
+
+### Important Note on FlashRank and Llama-CPP Compatibility
+
+Installing FlashRank can lead to a downgrade of the `llama-cpp-python` version, which is critical for GPU support and performance, especially for large language models (LLMs). This issue is tracked in [FlashRank's GitHub repository](https://github.com/PrithivirajDamodaran/FlashRank/issues/29).
+
+**Workaround for GPU Support with `llama-cpp-python`:**
+
+If you need GPU support with `llama-cpp-python`, you might need to reinstall it after installing flashrank with specific CMake arguments:
+```bash
+CMAKE_ARGS="-DGGML_CUDA=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir
+```
+
+Be aware that installing FlashRank may undo these custom installations
+
+## Usage
+
+### Example Usage
+
+```python
+if __name__ == "__main__":
+    from flashrank_multiple_choice_solver import FlashRankMultipleChoiceSolver
+
+    p = FlashRankMultipleChoiceSolver()
+    a = p.rerank("what is the speed of light", [
+        "very fast", "10m/s", "the speed of light is C"
+    ])
+    print(a)
+    # Expected output:
+    # [(0.999819, 'the speed of light is C'),
+    #  (2.7686672e-05, 'very fast'),
+    #  (1.2555749e-05, '10m/s')]
+
+    a = p.select_answer("what is the speed of light", [
+        "very fast", "10m/s", "the speed of light is C"
+    ])
+    print(a) # Expected output: the speed of light is C
+```
+
+## Configuration
+
+The `FlashRankMultipleChoiceSolver` can be configured to use different ranking models. By default, it uses the `ms-marco-TinyBERT-L-2-v2` model. You can specify a different model in the configuration if needed.
+
+Example configuration:
+```json
+{
+    "model": "desired-model-name"
+}
+```
+
+## Available Models
+
+The following models are available for use with the `FlashRankMultipleChoiceSolver`:
+
+## Available Models
+
+The following models are available for use with the `FlashRankMultipleChoiceSolver`:
+
+| Model Name                   | Description                                                                                                   |
+|------------------------------|---------------------------------------------------------------------------------------------------------------|
+| ms-marco-TinyBERT-L-2-v2     | (default) [Model card](https://www.modelcards.com/ms-marco-TinyBERT-L-2-v2)                                     |
+| ms-marco-MiniLM-L-12-v2      | [Model card](https://www.modelcards.com/ms-marco-MiniLM-L-12-v2)                                               |
+| rank-T5-flan                 | Best non cross-encoder reranker [Model card](https://www.modelcards.com/rank-T5-flan)                          |
+| ms-marco-MultiBERT-L-12      | Multi-lingual, supports 100+ languages                                                                         |
+| ce-esci-MiniLM-L12-v2        | FT on Amazon ESCI dataset (This is interesting because most models are FT on MSFT MARCO Bing queries) [Model card](https://www.modelcards.com/ce-esci-MiniLM-L12-v2) |
+| rank_zephyr_7b_v1_full       | 4-bit-quantised GGUF [Model card](https://www.modelcards.com/rank_zephyr_7b_v1_full) (Offers very competitive performance, with large context window and relatively faster for a 4GB model) |
\ No newline at end of file
diff --git a/ovos_flashrank_plugin/__init__.py b/ovos_flashrank_plugin/__init__.py
new file mode 100644
index 0000000..829e594
--- /dev/null
+++ b/ovos_flashrank_plugin/__init__.py
@@ -0,0 +1,38 @@
+from typing import Optional, List, Tuple
+from ovos_plugin_manager.templates.solvers import MultipleChoiceSolver
+from flashrank import Ranker, RerankRequest
+
+
+class FlashRankMultipleChoiceSolver(MultipleChoiceSolver):
+    """select best answer to a question from a list of options """
+
+    # plugin methods to override
+    def rerank(self, query: str, options: List[str],
+               context: Optional[dict] = None) -> List[Tuple[float, str]]:
+        """
+        rank options list, returning a list of tuples (score, text)
+        """
+        ranker = Ranker(model_name=self.config.get("model", "ms-marco-TinyBERT-L-2-v2"))
+        passages = [
+            {"text": o}
+            for o in options
+        ]
+        rerankrequest = RerankRequest(query=query, passages=passages)
+        results = ranker.rerank(rerankrequest)
+        return [(r["score"], r["text"]) for r in results]
+
+
+if __name__ == "__main__":
+    p = FlashRankMultipleChoiceSolver()
+    a = p.rerank("what is the speed of light", [
+        "very fast", "10m/s", "the speed of light is C"
+    ])
+    print(a)
+    # [(0.999819, 'the speed of light is C'),
+    # (2.7686672e-05, 'very fast'),
+    # (1.2555749e-05, '10m/s')]
+
+    a = p.select_answer("what is the speed of light", [
+        "very fast", "10m/s", "the speed of light is C"
+    ])
+    print(a) # the speed of light is C
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..995fb57
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+ovos-plugin-manager
+# TODO - flashrank might downgrade existing llama-cpp-python versions!
+# see https://github.com/PrithivirajDamodaran/FlashRank/issues/29
+flashrank
\ No newline at end of file