Skip to content

Commit

Permalink
[FEAT] INT-2427 Blueprint model actions (#452) (#454)
Browse files Browse the repository at this point in the history
* update use cases test with new fields

* update json
  • Loading branch information
thesteady authored Oct 30, 2024
1 parent f752b65 commit e9ef3f6
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
36 changes: 36 additions & 0 deletions test_use_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,42 @@
"type": "string",
"enum": ["New", "Beta", "Preview", "Popular", "Deprecated","Labs"],
},
"modelActions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"modelCategory": {
"type": "string",
"enum": [
"synthetics",
"transform",
"classify",
"evaluate",
],
},
"modelType": {
"type": "string",
"enum": [
"actgan",
"navigator_ft",
"amplify",
"classify",
"ctgan",
"evaluate",
"gpt_x",
"lstm",
"synthetics",
"timeseries_dgan",
"transform",
"transform_v2",
"tabular_dp",
],
},
"defaultConfig": {"type": "string"},
}
}
},
"modelType": {
"type": "string",
"enum": [
Expand Down
91 changes: 91 additions & 0 deletions use_cases/gretel.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@
"modelType": "navigator_ft",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml",
"modelActions": [
{
"modelType": "navigator_ft",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml"
}
],
"button1": {
"label": "SDK Notebook",
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb"
Expand All @@ -49,6 +56,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml"
}
],
"sampleDataset": {
"fileName": "patients.csv",
"description": "This patient dataset contains names, addresses and other personally identifiable information, which needs to be redacted before the dataset can be shared or used to train ML models.",
Expand All @@ -69,6 +83,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml"
}
],
"button1": {
"label": "SDK Notebook",
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/generate_differentially_private_synthetic_text.ipynb"
Expand Down Expand Up @@ -108,6 +129,13 @@
"modelType": "synthetics",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
"modelActions": [
{
"modelType": "synthetics",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml"
}
],
"sampleDataset": {
"fileName": "sample-synthetic-healthcare.csv",
"description": "Use this sample electronic health records (EHR) dataset to synthesize an entirely new set of statistically equivalent records.",
Expand All @@ -134,6 +162,13 @@
"modelType": "tabular_dp",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml",
"modelActions": [
{
"modelType": "tabular_dp",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml"
}
],
"sampleDataset": {
"fileName": "bank_marketing_small.csv",
"description": "This dataset contains direct marketing campaign details (phone calls) from a Portuguese financial institution. It has sensitive information such as demographics and financials, which can benefit from privacy preserving techniques before sharing. ",
Expand All @@ -153,6 +188,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml"
}
],
"sampleDataset": {
"fileName": "sample-banking-questions-intents.csv",
"description": "Create realistic banking-related questions and intent labels using this sample dataset.",
Expand All @@ -179,6 +221,13 @@
"modelType": "timeseries_dgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/time-series.yml",
"modelActions": [
{
"modelType": "timeseries_dgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/time-series.yml"
}
],
"sampleDataset": {
"fileName": "daily-website-visitors.csv",
"description": "Safely synthesize a dataset of daily website visitors while maintaining correlations and data patterns.",
Expand All @@ -197,6 +246,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml"
}
],
"sampleConnection": {
"id": "sample_mysql_telecom",
"type": "mysql",
Expand All @@ -217,6 +273,13 @@
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml",
"modelActions": [
{
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml"
}
],
"sampleDataset": {
"fileName": "bank_marketing_small.csv",
"description": "Create synthetic data based on the publicly available dataset predicting opting in or out of bank marketing.",
Expand All @@ -235,6 +298,13 @@
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml",
"modelActions": [
{
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml"
}
],
"sampleDataset": {
"fileName": "safe-driver-prediction.csv",
"description": "Use this dataset to predict if a driver will file an insurance claim in the following year. Specify an output size in the config. By default, the model will create as many records as the input dataset.",
Expand Down Expand Up @@ -269,6 +339,13 @@
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
"modelActions": [
{
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml"
}
],
"sampleDataset": {
"fileName": "monthly-customer-payments.csv",
"description": "This dataset of monthly customer charges contains sensitive information and more than 20 columns.",
Expand Down Expand Up @@ -335,6 +412,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml"
}
],
"sampleDataset": {
"fileName": "taylor-swift-lyrics-up.csv",
"description": "Create believable song lyrics using this dataset of Taylor Swift hits.",
Expand All @@ -353,6 +437,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml"
}
],
"sampleDataset": {
"fileName": "sample-transform-emails.csv",
"description": "Unstructured text datasets are useful for training chatbots or other models that need large amounts of data. The emails in this public dataset need to be de-identified before they can be used to train ML models.",
Expand Down

0 comments on commit e9ef3f6

Please sign in to comment.