Skip to content

Commit

Permalink
Experiments complex metrics (#8)
Browse files Browse the repository at this point in the history
* feat: new class and API for metrics

* update system test

* update high level log method

* fix system test

* update example

* change from system schema to google schema
  • Loading branch information
jaycee-li authored Sep 26, 2022
1 parent 2a906c8 commit 3ca2d28
Show file tree
Hide file tree
Showing 9 changed files with 419 additions and 3 deletions.
4 changes: 4 additions & 0 deletions google/cloud/aiplatform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@

log_params = metadata.metadata._experiment_tracker.log_params
log_metrics = metadata.metadata._experiment_tracker.log_metrics
log_classification_metrics = (
metadata.metadata._experiment_tracker.log_classification_metrics
)
get_experiment_df = metadata.metadata._experiment_tracker.get_experiment_df
start_run = metadata.metadata._experiment_tracker.start_run
start_execution = metadata.metadata._experiment_tracker.start_execution
Expand All @@ -110,6 +113,7 @@
"log",
"log_params",
"log_metrics",
"log_classification_metrics",
"log_time_series_metrics",
"get_experiment_df",
"get_pipeline_df",
Expand Down
156 changes: 156 additions & 0 deletions google/cloud/aiplatform/metadata/experiment_run_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from google.cloud.aiplatform.metadata import metadata
from google.cloud.aiplatform.metadata import resource
from google.cloud.aiplatform.metadata import utils as metadata_utils
from google.cloud.aiplatform.metadata import schema
from google.cloud.aiplatform.tensorboard import tensorboard_resource
from google.cloud.aiplatform.utils import rest_utils

Expand Down Expand Up @@ -990,6 +991,103 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]):
# TODO: query the latest metrics artifact resource before logging.
self._metadata_node.update(metadata={constants._METRIC_KEY: metrics})

def log_classification_metrics(
self,
*,
labels: Optional[List[str]] = None,
matrix: Optional[List[List[int]]] = None,
fpr: Optional[List[float]] = None,
tpr: Optional[List[float]] = None,
threshold: Optional[List[float]] = None,
display_name: Optional[str] = None,
):
"""Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve.
```
my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
my_run.log_classification_metrics(
display_name='my-classification-metrics',
labels=['cat', 'dog'],
matrix=[[9, 1], [1, 9]],
fpr=[0.1, 0.5, 0.9],
tpr=[0.1, 0.7, 0.9],
threshold=[0.9, 0.5, 0.1],
)
```
Args:
labels (List[str]):
Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set.
matrix (List[List[int]):
Optional. Values for the confusion matrix. Must be set if 'labels' is set.
fpr (List[float]):
Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set.
tpr (List[float]):
Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set.
threshold (List[float]):
Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set.
display_name (str):
Optional. The user-defined name for the classification metric artifact.
Raises:
ValueError: if 'labels' and 'matrix' are not set together
or if 'labels' and 'matrix' are not in the same length
or if 'fpr' and 'tpr' and 'threshold' are not set together
or if 'fpr' and 'tpr' and 'threshold' are not in the same length
"""
if (labels or matrix) and not (labels and matrix):
raise ValueError("labels and matrix must be set together.")

if (fpr or tpr or threshold) and not (fpr and tpr and threshold):
raise ValueError("fpr, tpr, and thresholds must be set together.")

metadata = {}
if labels and matrix:
if len(matrix) != len(labels):
raise ValueError(
"Length of labels and matrix must be the same. "
"Got lengths {} and {} respectively.".format(
len(labels), len(matrix)
)
)

confusion_matrix = {
"annotationSpecs": [{"displayName": label} for label in labels],
"rows": matrix,
}
metadata["confusionMatrix"] = confusion_matrix

if fpr and tpr and threshold:
if (
len(fpr) != len(tpr)
or len(fpr) != len(threshold)
or len(tpr) != len(threshold)
):
raise ValueError(
"Length of fpr, tpr and threshold must be the same. "
"Got lengths {}, {} and {} respectively.".format(
len(fpr), len(tpr), len(threshold)
)
)

metadata["confidenceMetrics"] = [
{
"confidenceThreshold": threshold[i],
"recall": tpr[i],
"falsePositiveRate": fpr[i],
}
for i in range(len(fpr))
]

classification_metrics = schema.google.artifact_schema.ClassificationMetrics(
display_name=display_name,
metadata=metadata,
)
classfication_metrics = classification_metrics.create()
self._metadata_node.add_artifacts_and_executions(
artifact_resource_names=[classfication_metrics.resource_name]
)

@_v1_not_supported
def get_time_series_data_frame(self) -> "pd.DataFrame": # noqa: F821
"""Returns all time series in this Run as a DataFrame.
Expand Down Expand Up @@ -1149,6 +1247,64 @@ def get_metrics(self) -> Dict[str, Union[float, int, str]]:
else:
return self._metadata_node.metadata[constants._METRIC_KEY]

def get_classification_metrics(self) -> List[Dict[str, Union[str, List]]]:
"""Get all the classification metrics logged to this run.
```
my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
metric = my_run.get_classification_metrics()[0]
print(metric)
## print result:
{
"id": "e6c893a4-222e-4c60-a028-6a3b95dfc109",
"display_name": "my-classification-metrics",
"labels": ["cat", "dog"],
"matrix": [[9,1], [1,9]],
"fpr": [0.1, 0.5, 0.9],
"tpr": [0.1, 0.7, 0.9],
"thresholds": [0.9, 0.5, 0.1]
}
```
Returns:
List of classification metrics logged to this experiment run.
"""

artifact_list = artifact.Artifact.list(
filter=metadata_utils._make_filter_string(
in_context=[self.resource_name],
schema_title="google.ClassificationMetrics",
),
project=self.project,
location=self.location,
credentials=self.credentials,
)

metrics = []
for metric_artifact in artifact_list:
metric = {}
metric["id"] = metric_artifact.name
metric["display_name"] = metric_artifact.display_name
metadata = metric_artifact.metadata
if "confusionMatrix" in metadata:
metric["labels"] = [
d["displayName"]
for d in metadata["confusionMatrix"]["annotationSpecs"]
]
metric["matrix"] = metadata["confusionMatrix"]["rows"]

if "confidenceMetrics" in metadata:
metric["fpr"] = [
d["falsePositiveRate"] for d in metadata["confidenceMetrics"]
]
metric["tpr"] = [d["recall"] for d in metadata["confidenceMetrics"]]
metric["threshold"] = [
d["confidenceThreshold"] for d in metadata["confidenceMetrics"]
]
metrics.append(metric)

return metrics

@_v1_not_supported
def associate_execution(self, execution: execution.Execution):
"""Associate an execution to this experiment run.
Expand Down
59 changes: 57 additions & 2 deletions google/cloud/aiplatform/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
# limitations under the License.
#


from typing import Dict, Union, Optional, Any
from typing import Dict, Union, Optional, Any, List

from google.api_core import exceptions
from google.auth import credentials as auth_credentials
Expand Down Expand Up @@ -371,6 +370,62 @@ def log_metrics(self, metrics: Dict[str, Union[float, int, str]]):
# query the latest metrics artifact resource before logging.
self._experiment_run.log_metrics(metrics=metrics)

def log_classification_metrics(
self,
*,
labels: Optional[List[str]] = None,
matrix: Optional[List[List[int]]] = None,
fpr: Optional[List[float]] = None,
tpr: Optional[List[float]] = None,
threshold: Optional[List[float]] = None,
display_name: Optional[str] = None,
):
"""Create an artifact for classification metrics and log to ExperimentRun. Currently support confusion matrix and ROC curve.
```
my_run = aiplatform.ExperimentRun('my-run', experiment='my-experiment')
my_run.log_classification_metrics(
display_name='my-classification-metrics',
labels=['cat', 'dog'],
matrix=[[9, 1], [1, 9]],
fpr=[0.1, 0.5, 0.9],
tpr=[0.1, 0.7, 0.9],
threshold=[0.9, 0.5, 0.1],
)
```
Args:
labels (List[str]):
Optional. List of label names for the confusion matrix. Must be set if 'matrix' is set.
matrix (List[List[int]):
Optional. Values for the confusion matrix. Must be set if 'labels' is set.
fpr (List[float]):
Optional. List of false positive rates for the ROC curve. Must be set if 'tpr' or 'thresholds' is set.
tpr (List[float]):
Optional. List of true positive rates for the ROC curve. Must be set if 'fpr' or 'thresholds' is set.
threshold (List[float]):
Optional. List of thresholds for the ROC curve. Must be set if 'fpr' or 'tpr' is set.
display_name (str):
Optional. The user-defined name for the classification metric artifact.
Raises:
ValueError: if 'labels' and 'matrix' are not set together
or if 'labels' and 'matrix' are not in the same length
or if 'fpr' and 'tpr' and 'threshold' are not set together
or if 'fpr' and 'tpr' and 'threshold' are not in the same length
"""

self._validate_experiment_and_run(method_name="log_classification_metrics")
# query the latest metrics artifact resource before logging.
self._experiment_run.log_classification_metrics(
display_name=display_name,
labels=labels,
matrix=matrix,
fpr=fpr,
tpr=tpr,
threshold=threshold,
)

def _validate_experiment_and_run(self, method_name: str):
"""Validates Experiment and Run are set and raises informative error message.
Expand Down
6 changes: 6 additions & 0 deletions samples/model-builder/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,12 @@ def mock_log_params():
mock_log_params.return_value = None
yield mock_log_params

@pytest.fixture
def mock_log_classification_metrics():
with patch.object(aiplatform, "log_classification_metrics") as mock_log_metrics:
mock_log_metrics.return_value = None
yield mock_log_metrics


@pytest.fixture
def mock_log_pipeline_job():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional

from google.cloud import aiplatform


# [START aiplatform_sdk_log_classification_metrics_sample]
def log_classification_metrics_sample(
experiment_name: str,
run_name: str,
project: str,
location: str,
labels: Optional[List[str]] = None,
matrix: Optional[List[List[int]]] = None,
fpr: Optional[List[float]] = None,
tpr: Optional[List[float]] = None,
threshold: Optional[List[float]] = None,
display_name: Optional[str] = None,
):
aiplatform.init(experiment=experiment_name, project=project, location=location)

aiplatform.start_run(run=run_name, resume=True)

aiplatform.log_classification_metrics(
labels=labels,
matrix=matrix,
fpr=fpr,
tpr=tpr,
threshold=threshold,
display_name=display_name,
)


# [END aiplatform_sdk_log_params_sample]
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import log_classification_metrics_sample

import pytest

import test_constants as constants


@pytest.mark.usefixtures("mock_sdk_init", "mock_start_run")
def test_log_metrics_sample(mock_log_classification_metrics):

log_classification_metrics_sample.log_classification_metrics_sample(
experiment_name=constants.EXPERIMENT_NAME,
run_name=constants.EXPERIMENT_RUN_NAME,
project=constants.PROJECT,
location=constants.LOCATION,
labels=constants.CLASSIFICATION_METRICS["labels"],
matrix=constants.CLASSIFICATION_METRICS["matrix"],
fpr=constants.CLASSIFICATION_METRICS["fpr"],
tpr=constants.CLASSIFICATION_METRICS["tpr"],
threshold=constants.CLASSIFICATION_METRICS["threshold"],
display_name=constants.CLASSIFICATION_METRICS["display_name"],
)

mock_log_classification_metrics.assert_called_with(constants.CLASSIFICATION_METRICS)
9 changes: 8 additions & 1 deletion samples/model-builder/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,14 @@

METRICS = {"accuracy": 0.1}
PARAMS = {"learning_rate": 0.1}

CLASSIFICATION_METRICS = {
"display_name": "my-classification-metrics",
"labels": ["cat", "dog"],
"matrix": [[9, 1], [1, 9]],
"fpr": [0.1, 0.5, 0.9],
"tpr": [0.1, 0.7, 0.9],
"threshold": [0.9, 0.5, 0.1],
}
TEMPLATE_PATH = "pipeline.json"

STEP = 1
Expand Down
Loading

0 comments on commit 3ca2d28

Please sign in to comment.