From e9ef3f64d3059210e99d60e04980dc598e3ee564 Mon Sep 17 00:00:00 2001 From: Laura Date: Wed, 30 Oct 2024 11:35:37 -0600 Subject: [PATCH] [FEAT] INT-2427 Blueprint model actions (#452) (#454) * update use cases test with new fields * update json --- test_use_cases.py | 36 +++++++++++++++++ use_cases/gretel.json | 91 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/test_use_cases.py b/test_use_cases.py index 165cb2c4..0cd02b77 100644 --- a/test_use_cases.py +++ b/test_use_cases.py @@ -16,6 +16,42 @@ "type": "string", "enum": ["New", "Beta", "Preview", "Popular", "Deprecated","Labs"], }, + "modelActions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "modelCategory": { + "type": "string", + "enum": [ + "synthetics", + "transform", + "classify", + "evaluate", + ], + }, + "modelType": { + "type": "string", + "enum": [ + "actgan", + "navigator_ft", + "amplify", + "classify", + "ctgan", + "evaluate", + "gpt_x", + "lstm", + "synthetics", + "timeseries_dgan", + "transform", + "transform_v2", + "tabular_dp", + ], + }, + "defaultConfig": {"type": "string"}, + } + } + }, "modelType": { "type": "string", "enum": [ diff --git a/use_cases/gretel.json b/use_cases/gretel.json index bdb50239..2ac59ec1 100644 --- a/use_cases/gretel.json +++ b/use_cases/gretel.json @@ -27,6 +27,13 @@ "modelType": "navigator_ft", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml", + "modelActions": [ + { + "modelType": "navigator_ft", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml" + } + ], "button1": { "label": "SDK Notebook", "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb" @@ -49,6 +56,13 @@ "modelType": "transform_v2", "modelCategory": "transform", "defaultConfig": "config_templates/gretel/transform/default.yml", + "modelActions": [ + { + "modelType": "transform_v2", + "modelCategory": "transform", + "defaultConfig": "config_templates/gretel/transform/default.yml" + } + ], "sampleDataset": { "fileName": "patients.csv", "description": "This patient dataset contains names, addresses and other personally identifiable information, which needs to be redacted before the dataset can be shared or used to train ML models.", @@ -69,6 +83,13 @@ "modelType": "gpt_x", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml", + "modelActions": [ + { + "modelType": "gpt_x", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml" + } + ], "button1": { "label": "SDK Notebook", "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/generate_differentially_private_synthetic_text.ipynb" @@ -108,6 +129,13 @@ "modelType": "synthetics", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml", + "modelActions": [ + { + "modelType": "synthetics", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml" + } + ], "sampleDataset": { "fileName": "sample-synthetic-healthcare.csv", "description": "Use this sample electronic health records (EHR) dataset to synthesize an entirely new set of statistically equivalent records.", @@ -134,6 +162,13 @@ "modelType": "tabular_dp", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml", + "modelActions": [ + { + "modelType": "tabular_dp", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml" + } + ], "sampleDataset": { "fileName": "bank_marketing_small.csv", "description": "This dataset contains direct marketing campaign details (phone calls) from a Portuguese financial institution. It has sensitive information such as demographics and financials, which can benefit from privacy preserving techniques before sharing. ", @@ -153,6 +188,13 @@ "modelType": "gpt_x", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/natural-language.yml", + "modelActions": [ + { + "modelType": "gpt_x", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/natural-language.yml" + } + ], "sampleDataset": { "fileName": "sample-banking-questions-intents.csv", "description": "Create realistic banking-related questions and intent labels using this sample dataset.", @@ -179,6 +221,13 @@ "modelType": "timeseries_dgan", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/time-series.yml", + "modelActions": [ + { + "modelType": "timeseries_dgan", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/time-series.yml" + } + ], "sampleDataset": { "fileName": "daily-website-visitors.csv", "description": "Safely synthesize a dataset of daily website visitors while maintaining correlations and data patterns.", @@ -197,6 +246,13 @@ "modelType": "transform_v2", "modelCategory": "transform", "defaultConfig": "config_templates/gretel/transform/transform_v2.yml", + "modelActions": [ + { + "modelType": "transform_v2", + "modelCategory": "transform", + "defaultConfig": "config_templates/gretel/transform/transform_v2.yml" + } + ], "sampleConnection": { "id": "sample_mysql_telecom", "type": "mysql", @@ -217,6 +273,13 @@ "modelType": "evaluate", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml", + "modelActions": [ + { + "modelType": "evaluate", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml" + } + ], "sampleDataset": { "fileName": "bank_marketing_small.csv", "description": "Create synthetic data based on the publicly available dataset predicting opting in or out of bank marketing.", @@ -235,6 +298,13 @@ "modelType": "amplify", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/amplify.yml", + "modelActions": [ + { + "modelType": "amplify", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/amplify.yml" + } + ], "sampleDataset": { "fileName": "safe-driver-prediction.csv", "description": "Use this dataset to predict if a driver will file an insurance claim in the following year. Specify an output size in the config. By default, the model will create as many records as the input dataset.", @@ -269,6 +339,13 @@ "modelType": "actgan", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml", + "modelActions": [ + { + "modelType": "actgan", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml" + } + ], "sampleDataset": { "fileName": "monthly-customer-payments.csv", "description": "This dataset of monthly customer charges contains sensitive information and more than 20 columns.", @@ -335,6 +412,13 @@ "modelType": "gpt_x", "modelCategory": "synthetics", "defaultConfig": "config_templates/gretel/synthetics/natural-language.yml", + "modelActions": [ + { + "modelType": "gpt_x", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/natural-language.yml" + } + ], "sampleDataset": { "fileName": "taylor-swift-lyrics-up.csv", "description": "Create believable song lyrics using this dataset of Taylor Swift hits.", @@ -353,6 +437,13 @@ "modelType": "transform_v2", "modelCategory": "transform", "defaultConfig": "config_templates/gretel/transform/default.yml", + "modelActions": [ + { + "modelType": "transform_v2", + "modelCategory": "transform", + "defaultConfig": "config_templates/gretel/transform/default.yml" + } + ], "sampleDataset": { "fileName": "sample-transform-emails.csv", "description": "Unstructured text datasets are useful for training chatbots or other models that need large amounts of data. The emails in this public dataset need to be de-identified before they can be used to train ML models.",