diff --git a/sdks/python/examples/evaluation_example.py b/sdks/python/examples/evaluation_example.py index a05699f68f..3f887b1caa 100644 --- a/sdks/python/examples/evaluation_example.py +++ b/sdks/python/examples/evaluation_example.py @@ -1,9 +1,6 @@ from typing import Dict, Any -from opik.evaluation.metrics import ( - IsJson, - Hallucination, -) +from opik.evaluation.metrics import IsJson, Hallucination from opik.evaluation import evaluate from opik import Opik, DatasetItem, track from opik.integrations.openai import track_openai @@ -15,8 +12,6 @@ openai_client = track_openai(openai.OpenAI()) -# contains_hello = Contains(searched_value="hello", name="ContainsHello") -# contains_bye = Contains(searched_value="bye", name="ContainsBye") is_json = IsJson() hallucination = Hallucination() @@ -63,6 +58,7 @@ def llm_task(item: DatasetItem) -> Dict[str, Any]: "output": response.choices[0].message.content, "input": item.input["message"], "context": item.input["context"], + "reference": "test", } diff --git a/sdks/python/src/opik/evaluation/evaluator.py b/sdks/python/src/opik/evaluation/evaluator.py index accb5d45f9..59e4a9439c 100644 --- a/sdks/python/src/opik/evaluation/evaluator.py +++ b/sdks/python/src/opik/evaluation/evaluator.py @@ -75,6 +75,9 @@ def evaluate( dataset_name=dataset.name, experiment_config=experiment_config, ) + + report.display_experiment_link(dataset.name, experiment.id) + experiment_items = [ experiment_item.ExperimentItem( dataset_item_id=result.test_case.dataset_item_id, diff --git a/sdks/python/src/opik/evaluation/report.py b/sdks/python/src/opik/evaluation/report.py index b2caecf1fd..ad420c9450 100644 --- a/sdks/python/src/opik/evaluation/report.py +++ b/sdks/python/src/opik/evaluation/report.py @@ -2,6 +2,7 @@ from typing import List, Dict, Tuple from collections import defaultdict from . import test_result +from .. import url_helpers def _format_time(seconds: float) -> str: @@ -81,3 +82,13 @@ def display_experiment_results( console_container = console.Console() console_container.print(panel_content) console_container.print("Uploading results to Opik ... ") + + +def display_experiment_link(dataset_name: str, experiment_id: str) -> None: + console_container = console.Console() + experiment_url = url_helpers.get_experiment_url( + dataset_name=dataset_name, experiment_id=experiment_id + ) + console_container.print( + f"View the results [link={experiment_url}]in your Opik dashboard[/link]." + ) diff --git a/sdks/python/src/opik/url_helpers.py b/sdks/python/src/opik/url_helpers.py index 463c329c28..e2e2d9115b 100644 --- a/sdks/python/src/opik/url_helpers.py +++ b/sdks/python/src/opik/url_helpers.py @@ -1,4 +1,5 @@ import opik.config +import opik.api_objects.opik_client def get_ui_url() -> str: @@ -8,6 +9,21 @@ def get_ui_url() -> str: return opik_url_override.rstrip("/api") +def get_experiment_url(dataset_name: str, experiment_id: str) -> str: + client = opik.api_objects.opik_client.get_client_cached() + + # Get dataset id from name + dataset = client._rest_client.datasets.get_dataset_by_identifier( + dataset_name=dataset_name + ) + dataset_id = dataset.id + + config = opik.config.OpikConfig() + ui_url = get_ui_url() + + return f'{ui_url}/{config.workspace}/experiments/{dataset_id}/compare?experiments=%5B"{experiment_id}"%5D' + + def get_projects_url() -> str: config = opik.config.OpikConfig() ui_url = get_ui_url() diff --git a/sdks/python/tests/unit/evaluation/test_evaluate.py b/sdks/python/tests/unit/evaluation/test_evaluate.py index 936a2e1af6..390de83bc5 100644 --- a/sdks/python/tests/unit/evaluation/test_evaluate.py +++ b/sdks/python/tests/unit/evaluation/test_evaluate.py @@ -3,7 +3,7 @@ from opik.api_objects.dataset import dataset_item from opik.api_objects import opik_client -from opik import evaluation, exceptions +from opik import evaluation, exceptions, url_helpers from opik.evaluation import metrics from ...testlib import backend_emulator_message_processor, ANY_BUT_NONE, assert_equal from ...testlib.models import ( @@ -56,6 +56,9 @@ def say_task(dataset_item: dataset_item.DatasetItem): mock_create_experiment = mock.Mock() mock_create_experiment.return_value = mock_experiment + mock_get_experiment_url = mock.Mock() + mock_get_experiment_url.return_value = "any_url" + with mock.patch.object( opik_client.Opik, "create_experiment", mock_create_experiment ): @@ -64,13 +67,16 @@ def say_task(dataset_item: dataset_item.DatasetItem): "construct_online_streamer", mock_construct_online_streamer, ): - evaluation.evaluate( - dataset=mock_dataset, - task=say_task, - experiment_name="the-experiment-name", - scoring_metrics=[metrics.Equals()], - task_threads=1, - ) + with mock.patch.object( + url_helpers, "get_experiment_url", mock_get_experiment_url + ): + evaluation.evaluate( + dataset=mock_dataset, + task=say_task, + experiment_name="the-experiment-name", + scoring_metrics=[metrics.Equals()], + task_threads=1, + ) mock_create_experiment.assert_called_once_with( dataset_name="the-dataset-name",