From 10e1c5a7bdfcea178062fdeef8004e3e06a279c2 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Sun, 21 Jul 2024 21:18:30 +0100 Subject: [PATCH] feat/initial_implementation (#1) * feat/initial_implementation * feat/initial_implementation --- README.md | 74 +++++++++++++++++++++++++++++++ ovos_flashrank_plugin/__init__.py | 38 ++++++++++++++++ requirements.txt | 4 ++ 3 files changed, 116 insertions(+) create mode 100644 README.md create mode 100644 ovos_flashrank_plugin/__init__.py create mode 100644 requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..9400259 --- /dev/null +++ b/README.md @@ -0,0 +1,74 @@ +# FlashRankMultipleChoiceSolver for OVOS + +The `FlashRankMultipleChoiceSolver` plugin is designed for the Open Voice OS (OVOS) platform to help select the best answer to a question from a list of options. This plugin utilizes the FlashRank library to evaluate and rank multiple-choice answers based on their relevance to the given query. + +## Features + +- **Rerank Options**: Reranks a list of options based on their relevance to the query. +- **Customizable Model**: Allows the use of different ranking models. +- **Seamless Integration**: Designed to work with OVOS plugin manager. + +### Important Note on FlashRank and Llama-CPP Compatibility + +Installing FlashRank can lead to a downgrade of the `llama-cpp-python` version, which is critical for GPU support and performance, especially for large language models (LLMs). This issue is tracked in [FlashRank's GitHub repository](https://github.com/PrithivirajDamodaran/FlashRank/issues/29). + +**Workaround for GPU Support with `llama-cpp-python`:** + +If you need GPU support with `llama-cpp-python`, you might need to reinstall it after installing flashrank with specific CMake arguments: +```bash +CMAKE_ARGS="-DGGML_CUDA=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --no-cache-dir +``` + +Be aware that installing FlashRank may undo these custom installations + +## Usage + +### Example Usage + +```python +if __name__ == "__main__": + from flashrank_multiple_choice_solver import FlashRankMultipleChoiceSolver + + p = FlashRankMultipleChoiceSolver() + a = p.rerank("what is the speed of light", [ + "very fast", "10m/s", "the speed of light is C" + ]) + print(a) + # Expected output: + # [(0.999819, 'the speed of light is C'), + # (2.7686672e-05, 'very fast'), + # (1.2555749e-05, '10m/s')] + + a = p.select_answer("what is the speed of light", [ + "very fast", "10m/s", "the speed of light is C" + ]) + print(a) # Expected output: the speed of light is C +``` + +## Configuration + +The `FlashRankMultipleChoiceSolver` can be configured to use different ranking models. By default, it uses the `ms-marco-TinyBERT-L-2-v2` model. You can specify a different model in the configuration if needed. + +Example configuration: +```json +{ + "model": "desired-model-name" +} +``` + +## Available Models + +The following models are available for use with the `FlashRankMultipleChoiceSolver`: + +## Available Models + +The following models are available for use with the `FlashRankMultipleChoiceSolver`: + +| Model Name | Description | +|------------------------------|---------------------------------------------------------------------------------------------------------------| +| ms-marco-TinyBERT-L-2-v2 | (default) [Model card](https://www.modelcards.com/ms-marco-TinyBERT-L-2-v2) | +| ms-marco-MiniLM-L-12-v2 | [Model card](https://www.modelcards.com/ms-marco-MiniLM-L-12-v2) | +| rank-T5-flan | Best non cross-encoder reranker [Model card](https://www.modelcards.com/rank-T5-flan) | +| ms-marco-MultiBERT-L-12 | Multi-lingual, supports 100+ languages | +| ce-esci-MiniLM-L12-v2 | FT on Amazon ESCI dataset (This is interesting because most models are FT on MSFT MARCO Bing queries) [Model card](https://www.modelcards.com/ce-esci-MiniLM-L12-v2) | +| rank_zephyr_7b_v1_full | 4-bit-quantised GGUF [Model card](https://www.modelcards.com/rank_zephyr_7b_v1_full) (Offers very competitive performance, with large context window and relatively faster for a 4GB model) | \ No newline at end of file diff --git a/ovos_flashrank_plugin/__init__.py b/ovos_flashrank_plugin/__init__.py new file mode 100644 index 0000000..829e594 --- /dev/null +++ b/ovos_flashrank_plugin/__init__.py @@ -0,0 +1,38 @@ +from typing import Optional, List, Tuple +from ovos_plugin_manager.templates.solvers import MultipleChoiceSolver +from flashrank import Ranker, RerankRequest + + +class FlashRankMultipleChoiceSolver(MultipleChoiceSolver): + """select best answer to a question from a list of options """ + + # plugin methods to override + def rerank(self, query: str, options: List[str], + context: Optional[dict] = None) -> List[Tuple[float, str]]: + """ + rank options list, returning a list of tuples (score, text) + """ + ranker = Ranker(model_name=self.config.get("model", "ms-marco-TinyBERT-L-2-v2")) + passages = [ + {"text": o} + for o in options + ] + rerankrequest = RerankRequest(query=query, passages=passages) + results = ranker.rerank(rerankrequest) + return [(r["score"], r["text"]) for r in results] + + +if __name__ == "__main__": + p = FlashRankMultipleChoiceSolver() + a = p.rerank("what is the speed of light", [ + "very fast", "10m/s", "the speed of light is C" + ]) + print(a) + # [(0.999819, 'the speed of light is C'), + # (2.7686672e-05, 'very fast'), + # (1.2555749e-05, '10m/s')] + + a = p.select_answer("what is the speed of light", [ + "very fast", "10m/s", "the speed of light is C" + ]) + print(a) # the speed of light is C \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..995fb57 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +ovos-plugin-manager +# TODO - flashrank might downgrade existing llama-cpp-python versions! +# see https://github.com/PrithivirajDamodaran/FlashRank/issues/29 +flashrank \ No newline at end of file