Skip to content

Commit

Permalink
[OPIK-641] create guardrails integration (#954)
Browse files Browse the repository at this point in the history
* Draft implementation

* Add guardrails integration test

* Add docstring

* Add guardrails workflow

* Add protection from decorating the same object multiple times.

* Fix lint errors

* Update tests

* Move hub downloads to workflow file

* Update workflow file

* Add gr api key from environments

* Update workflow file

* Configure gh token in workflow via cli command

* Update gr token configuration

* Pop 3.9 py version from test matrix since it doesnt support guardrails CLI

* Separate hub installations from lib installation in workflow

* Update workflow to not install local models

* Remove obsolete mv command

* Update test to use LLM check only, update integration to get model from validator if available

* Fix lint errors
  • Loading branch information
alexkuzmik authored Dec 24, 2024
1 parent f8d6678 commit af3dbcf
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 3 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/lib-guardrails-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Workflow to run Guar tests
#
# Please read inputs to provide correct values.
#
name: SDK Lib Guardrails Tests
run-name: "SDK Lib Guardrails Tests ${{ github.ref_name }} by @${{ github.actor }}"
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_ORG_ID: ${{ secrets.OPENAI_ORG_ID }}
GUARDRAILS_API_KEY: ${{ secrets.GUARDRAILS_API_KEY }}
on:
workflow_call:

jobs:
tests:
name: Guardrails Python ${{matrix.python_version}}
runs-on: ubuntu-latest
defaults:
run:
working-directory: sdks/python

strategy:
fail-fast: true
matrix:
python_version: ["3.10", "3.11", "3.12"]

steps:
- name: Check out code
uses: actions/checkout@v4

- name: Setup Python ${{matrix.python_version}}
uses: actions/setup-python@v5
with:
python-version: ${{matrix.python_version}}

- name: Install opik
run: pip install .

- name: Install test tools
run: |
cd ./tests
pip install --no-cache-dir --disable-pip-version-check -r test_requirements.txt
- name: Install lib
run: |
cd ./tests
pip install --no-cache-dir --disable-pip-version-check -r library_integration/guardrails/requirements.txt
- name: Install checks from guardrails hub
run: |
guardrails configure --token $GUARDRAILS_API_KEY --disable-metrics --enable-remote-inferencing;
guardrails hub install hub://guardrails/politeness_check
- name: Run tests
run: |
cd ./tests/library_integration/guardrails/
python -m pytest -vv .
7 changes: 7 additions & 0 deletions .github/workflows/lib-integration-tests-runner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ on:
- anthropic
- aisuite
- haystack
- guardrails
schedule:
- cron: "0 0 */1 * *"
pull_request:
Expand Down Expand Up @@ -80,3 +81,9 @@ jobs:
if: contains(fromJSON('["haystack", "all"]'), needs.init_environment.outputs.LIBS)
uses: ./.github/workflows/lib-haystack-tests.yml
secrets: inherit

guardrails_tests:
needs: [init_environment]
if: contains(fromJSON('["guardrails", "all"]'), needs.init_environment.outputs.LIBS)
uses: ./.github/workflows/lib-guardrails-tests.yml
secrets: inherit
3 changes: 3 additions & 0 deletions sdks/python/src/opik/integrations/guardrails/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .guardrails_tracker import track_guardrails

__all__ = ["track_guardrails"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import logging
from typing import (
Any,
AsyncGenerator,
Callable,
Dict,
Generator,
List,
Optional,
Tuple,
Union,
)

from guardrails import validators

from opik.decorator import arguments_helpers, base_track_decorator, inspect_helpers

LOGGER = logging.getLogger(__name__)

KWARGS_KEYS_TO_LOG_AS_INPUTS = ["value"]
RESPONSE_KEYS_TO_LOG_AS_OUTPUT = ["output"]


class GuardrailsValidatorValidateDecorator(base_track_decorator.BaseTrackDecorator):
def _start_span_inputs_preprocessor(
self,
func: Callable,
track_options: arguments_helpers.TrackOptions,
args: Tuple,
kwargs: Dict[str, Any],
) -> arguments_helpers.StartSpanParameters:
name = track_options.name if track_options.name is not None else func.__name__
metadata = track_options.metadata if track_options.metadata is not None else {}
metadata.update({"created_from": "guardrails"})
input = (
inspect_helpers.extract_inputs(func, args, kwargs)
if track_options.capture_input
else None
)

validator_instance = func.__self__ # type: ignore
model = (
validator_instance.llm_callable
if hasattr(validator_instance, "llm_callable")
else None
)
if model is not None:
metadata["model"] = model

result = arguments_helpers.StartSpanParameters(
name=name,
input=input,
type=track_options.type,
metadata=metadata,
project_name=track_options.project_name,
model=model,
)

return result

def _end_span_inputs_preprocessor(
self, output: Any, capture_output: bool
) -> arguments_helpers.EndSpanParameters:
assert isinstance(
output,
validators.ValidationResult,
)
tags = ["guardrails", output.outcome]

result = arguments_helpers.EndSpanParameters(
output=output,
metadata=output.metadata,
tags=tags,
)

return result

def _generators_handler(
self,
output: Any,
capture_output: bool,
generations_aggregator: Optional[Callable[[List[Any]], str]],
) -> Optional[Union[Generator, AsyncGenerator]]:
return super()._generators_handler(
output, capture_output, generations_aggregator
)
39 changes: 39 additions & 0 deletions sdks/python/src/opik/integrations/guardrails/guardrails_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from typing import Optional

import guardrails

from . import guardrails_decorator


def track_guardrails(
guard: guardrails.Guard, project_name: Optional[str] = None
) -> guardrails.Guard:
"""
Adds Opik tracking to a guardrails Guard instance.
Every validation step will be logged as a trace.
Args:
guard: An instance of Guard object.
project_name: The name of the project to log data.
Returns:
The modified Guard instance with Opik tracking enabled for its validators.
"""
validators = guard._validators
decorator_factory = guardrails_decorator.GuardrailsValidatorValidateDecorator()

for validator in validators:
if hasattr(validator.async_validate, "opik_tracked"):
continue

validate_decorator = decorator_factory.track(
name=f"{validator.rail_alias}.validate",
project_name=project_name,
type="llm" if hasattr(validator, "llm_callable") else "general",
)
setattr(
validator, "async_validate", validate_decorator(validator.async_validate)
) # decorate async version because it is being called under the hood of guardrails engine

return guard
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
guardrails-ai
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import pytest
from guardrails import Guard, OnFailAction
from guardrails.hub import PolitenessCheck

import opik
from opik.config import OPIK_PROJECT_DEFAULT_NAME
from opik.integrations.guardrails.guardrails_tracker import track_guardrails

from ...testlib import ANY_BUT_NONE, ANY_DICT, SpanModel, TraceModel, assert_equal


@pytest.mark.parametrize(
"project_name, expected_project_name",
[
(None, OPIK_PROJECT_DEFAULT_NAME),
("guardrails-integration-test", "guardrails-integration-test"),
],
)
def test_guardrails__trace_and_span_per_one_validation_check(
fake_backend, ensure_openai_configured, project_name, expected_project_name
):
politeness_check = PolitenessCheck(
llm_callable="gpt-3.5-turbo", on_fail=OnFailAction.NOOP
)

guard: Guard = Guard().use_many(politeness_check)
guard = track_guardrails(guard, project_name=project_name)

result = guard.validate(
"Would you be so kind to pass me a cup of tea?",
) # Both the guardrails pass
expected_result_tag = "pass" if result.validation_passed else "fail"
opik.flush_tracker()

COMPETITOR_CHECK_EXPECTED_TRACE_TREE = TraceModel(
id=ANY_BUT_NONE,
name="guardrails/politeness_check.validate",
input={
"value": "Would you be so kind to pass me a cup of tea?",
"metadata": ANY_DICT,
},
output=ANY_BUT_NONE,
tags=["guardrails", expected_result_tag],
metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
project_name=expected_project_name,
spans=[
SpanModel(
id=ANY_BUT_NONE,
type="llm",
name="guardrails/politeness_check.validate",
input={
"value": "Would you be so kind to pass me a cup of tea?",
"metadata": ANY_DICT,
},
output=ANY_BUT_NONE,
tags=["guardrails", expected_result_tag],
metadata={"created_from": "guardrails", "model": "gpt-3.5-turbo"},
start_time=ANY_BUT_NONE,
end_time=ANY_BUT_NONE,
project_name=expected_project_name,
model="gpt-3.5-turbo",
spans=[],
)
],
)

assert_equal(COMPETITOR_CHECK_EXPECTED_TRACE_TREE, fake_backend.trace_trees[0])
8 changes: 5 additions & 3 deletions sdks/python/tests/testlib/assert_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ def prepare_difference_report(expected: Any, actual: Any) -> str:


def assert_equal(expected, actual):
# expected MUST be left argument so that __eq__ operators
# from our ANY* comparison helpers were called instead of __eq__ operators
# of the actual object
"""
expected MUST be left argument so that __eq__ operators
from our ANY* comparison helpers were called instead of __eq__ operators
of the actual object
"""
assert expected == actual, f"Details: {prepare_difference_report(actual, expected)}"


Expand Down

0 comments on commit af3dbcf

Please sign in to comment.