Skip to content

Commit

Permalink
Modification to toxicity plugin PR (#432)
Browse files Browse the repository at this point in the history
* changed microservice to use Service.GUARDRAILS and input/output to TextDoc

Signed-off-by: Tyler Wilbers <[email protected]>

* simplify dockerfile to use langchain

Signed-off-by: Tyler Wilbers <[email protected]>

* sort requirements

Signed-off-by: Tyler Wilbers <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Tyler Wilbers <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
Tyler W and pre-commit-ci[bot] authored Aug 8, 2024
1 parent fe2fe47 commit 63650d0
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 86 deletions.
6 changes: 3 additions & 3 deletions comps/guardrails/toxicity_detection/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ docker build -t opea/guardrails-toxicity-detection:latest --build-arg https_prox
## 2.3 Run Docker Container with Microservice

```bash
docker run -d --rm --runtime=runc --name="guardrails-pii-detection-endpoint" -p 9091:9091 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-toxicity-detection:latest
docker run -d --rm --runtime=runc --name="guardrails-toxicity-detection-endpoint" -p 9091:9091 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -e HF_TOKEN=${HUGGINGFACEHUB_API_TOKEN} opea/guardrails-toxicity-detection:latest
```

# 🚀3. Get Status of Microservice
Expand All @@ -58,7 +58,7 @@ Once microservice starts, users can use examples (bash or python) below to apply
```bash
curl localhost:9091/v1/toxicity
-X POST
-d '{"query":"How to poison your neighbor'\''s dog secretly"}'
-d '{"text":"How to poison your neighbor'\''s dog secretly"}'
-H 'Content-Type: application/json'
```

Expand All @@ -76,7 +76,7 @@ import json

proxies = {"http": ""}
url = "http://localhost:9091/v1/toxicity"
data = {"query": "How to poison your neighbor'''s dog without being caught?"}
data = {"text": "How to poison your neighbor'''s dog without being caught?"}

try:
resp = requests.post(url=url, data=data, proxies=proxies)
Expand Down
75 changes: 23 additions & 52 deletions comps/guardrails/toxicity_detection/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,60 +1,31 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

FROM ubuntu:22.04

ARG TAG=main

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
gcc-12 \
g++-12 \
make \
wget \
libnuma-dev \
numactl \
git \
pkg-config \
software-properties-common \
zlib1g-dev \
libssl-dev \
libffi-dev \
libbz2-dev \
libsqlite3-dev \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 60 \
&& update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 60 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*

# Install python
WORKDIR /tmp
RUN wget -q https://www.python.org/ftp/python/3.8.10/Python-3.8.10.tgz \
&& tar -xzvf Python-3.8.10.tgz
WORKDIR /tmp/Python-3.8.10
RUN ./configure --prefix=/usr/bin/python3.8 --enable-optimizations \
&& make -j \
&& make install \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.8/bin/python3.8 60 \
&& update-alternatives --install /usr/bin/pip pip /usr/bin/python3.8/bin/pip3 60 \
&& python -m pip install --no-cache-dir --upgrade pip setuptools \
&& pip install --no-cache-dir wheel \
&& rm -rf /tmp/* \
&& echo "export PATH=/usr/bin/python3.8:\$PATH" >> ~/.bashrc

RUN pip install --no-cache-dir torch==2.3.0+cpu --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir cmake==3.26.1 transformers==4.41.2 sentencepiece==0.1.99 accelerate==0.23.0 protobuf tiktoken transformers-stream-generator einops \
&& ln -s /usr/bin/python3.8/lib/python3.8/site-packages/cmake/data/bin/cmake /usr/bin/cmake

COPY comps /root/comps
FROM langchain/langchain:latest

RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /root/comps/guardrails/toxicity_detection/requirements.txt
ENV LANG=C.UTF-8

ARG ARCH="cpu"

RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim

RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/

ENV PYTHONPATH=$PYTHONPATH:/root
USER user

COPY comps /home/user/comps

RUN pip install --no-cache-dir --upgrade pip && \
if [ ${ARCH} = "cpu" ]; then pip install torch --index-url https://download.pytorch.org/whl/cpu; fi && \
pip install --no-cache-dir -r /home/user/comps/guardrails/toxicity_detection/requirements.txt

RUN chmod +x /root/comps/guardrails/toxicity_detection/run.sh
ENV PYTHONPATH=$PYTHONPATH:/home/user

WORKDIR /root/comps/guardrails/toxicity_detection/
WORKDIR /home/user/comps/guardrails/toxicity_detection/

ENTRYPOINT ["/root/comps/guardrails/toxicity_detection/run.sh"]
ENTRYPOINT ["python", "toxicity_detection.py"]
5 changes: 5 additions & 0 deletions comps/guardrails/toxicity_detection/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
aiohttp
docarray[full]
fastapi
httpx
huggingface_hub
langchain-community
langchain-huggingface
Expand All @@ -8,4 +10,7 @@ opentelemetry-api
opentelemetry-exporter-otlp
opentelemetry-sdk
prometheus-fastapi-instrumentator
pyyaml
requests
shortuuid
uvicorn
11 changes: 0 additions & 11 deletions comps/guardrails/toxicity_detection/run.sh

This file was deleted.

32 changes: 12 additions & 20 deletions comps/guardrails/toxicity_detection/toxicity_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,35 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pathlib
import sys
from datetime import datetime

cur_path = pathlib.Path(__file__).parent.resolve()
comps_path = os.path.join(cur_path, "../../../")
sys.path.append(comps_path)

import torch
from fastapi.responses import StreamingResponse
from langsmith import traceable

# from utils import initialize_model
from transformers import pipeline

from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
from comps import ServiceType, TextDoc, opea_microservices, register_microservice


@register_microservice(
name="opea_service@toxicity_detection",
service_type=ServiceType.LLM,
service_type=ServiceType.GUARDRAIL,
endpoint="/v1/toxicity",
host="0.0.0.0",
port=9091,
input_datatype=TextDoc,
output_datatype=TextDoc,
)
@traceable(run_type="llm")
async def llm_generate(input: LLMParamsDoc):
input_query = input.query
model_name_or_path = "citizenlab/distilbert-base-multilingual-cased-toxicity"
toxicity_classifier = pipeline("text-classification", model=model_name_or_path, tokenizer=model_name_or_path)
toxic = toxicity_classifier(input_query)
def llm_generate(input: TextDoc):
input_text = input.text
toxic = toxicity_pipeline(input_text)
print("done")
if toxic[0]["label"] == "toxic":
return f"\nI'm sorry, but your query or LLM's response is TOXIC with an score of {toxic[0]['score']:.2f} (0-1)!!!\n"
return TextDoc(text="Violated policies: toxicity, please check your input.", downstream_black_list=[".*"])
else:
return input_query
return TextDoc(text=input_text)


if __name__ == "__main__":
model = "citizenlab/distilbert-base-multilingual-cased-toxicity"
toxicity_pipeline = pipeline("text-classification", model=model, tokenizer=model)
opea_microservices["opea_service@toxicity_detection"].start()

0 comments on commit 63650d0

Please sign in to comment.