Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CHORE] remove legacy blueprints (INT-2531) #461

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 0 additions & 168 deletions use_cases/gretel.json
Original file line number Diff line number Diff line change
Expand Up @@ -237,83 +237,6 @@
"bytes": 63000
}
},
{
"gtmId": "use-case-transform-database",
"title": "Redact PII in a database",
"description": "Safeguard sensitive information and uphold compliance standards by automatically removing private information from your database.",
"cardType": "Console",
"icon": "relational-db.png",
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml"
}
],
"sampleConnection": {
"id": "sample_mysql_telecom",
"type": "mysql",
"description": "This telecommunications dataset contains names, addresses, social security numbers, and other personally identifiable information, which needs to be redacted before the dataset can be shared or used to train ML models.",
"tables": 5,
"records": 230988,
"fields": 29,
"trainingTime": "5 mins",
"bytes": 9537000
}
},
{
"gtmId": "use-case-downstream-accuracy",
"title": "Synthesize data + evaluate ML performance",
"description": "Evaluate synthetic data performance on classification and regression models.",
"cardType": "Console",
"icon": "downstream-accuracy.png",
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml",
"modelActions": [
{
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml"
}
],
"sampleDataset": {
"fileName": "bank_marketing_small.csv",
"description": "Create synthetic data based on the publicly available dataset predicting opting in or out of bank marketing.",
"records": 4521,
"fields": 17,
"trainingTime": "< 10 mins",
"bytes": 371020
}
},
{
"gtmId": "use-case-amplify",
"title": "Synthesize data at scale",
"description": "Create large volumes of synthetic records for demos and scale testing.",
"cardType": "Console",
"icon": "amplify.png",
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml",
"modelActions": [
{
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml"
}
],
"sampleDataset": {
"fileName": "safe-driver-prediction.csv",
"description": "Use this dataset to predict if a driver will file an insurance claim in the following year. Specify an output size in the config. By default, the model will create as many records as the input dataset.",
"records": 547827,
"fields": 59,
"trainingTime": "< 5 mins",
"bytes": 100000000
}
},
{
"gtmId": "use-case-boost-minority-class",
"title": "Boost minority class data",
Expand All @@ -330,31 +253,6 @@
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/boost_minority_class.ipynb"
}
},
{
"gtmId": "use-case-synthetic-hi-dim",
"title": "Synthesize highly dimensional data",
"description": "Train a synthetic ACTGAN model designed to easily handle datasets with 50k+ records and 100+ columns.",
"cardType": "Console",
"icon": "high-dimensional.png",
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
"modelActions": [
{
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml"
}
],
"sampleDataset": {
"fileName": "monthly-customer-payments.csv",
"description": "This dataset of monthly customer charges contains sensitive information and more than 20 columns.",
"records": 7032,
"fields": 23,
"trainingTime": "3 mins",
"bytes": 1000000
}
},
{
"gtmId": "use-case-downstream-accuracy-notebook",
"title": "Evaluate classification and regression",
Expand All @@ -371,22 +269,6 @@
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/evaluate/evaluate_synthetic_data_classification_models.ipynb"
}
},
{
"gtmId": "use-case-gdpr-anon",
"title": "Automatic anonymization for GDPR ",
"description": "Use our Transform and Synthetics generative AI-based models to anonymize sensitive data to meet GDPR standards.",
"cardType": "Notebook",
"icon": "gdpr-anonymization.png",
"detailsFileName": "gdpr-anonymization.md",
"button1": {
"label": "Open in GitHub",
"link": "https://github.com/gretelai/gdpr-helpers/blob/main/examples/anonymize_files_colab.ipynb"
},
"button2": {
"label": "Run in Google Colab",
"link": "https://colab.research.google.com/github/gretelai/gdpr-helpers/blob/main/examples/anonymize_files_colab.ipynb"
}
},
{
"gtmId": "use-case-benchmark",
"title": "Benchmark multiple synthetic models",
Expand All @@ -402,56 +284,6 @@
"label": "Run in Google Colab",
"link": "https://colab.research.google.com/github/gretelai/trainer/blob/main/notebooks/benchmark.ipynb"
}
},
{
"gtmId": "use-case-natural-lang-gpt-alternate",
"title": "Generate text including song lyrics using GPT",
"description": "Create natural language text data including song lyrics.",
"cardType": "Console",
"icon": "natural-lang-gpt.png",
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml"
}
],
"sampleDataset": {
"fileName": "taylor-swift-lyrics-up.csv",
"description": "Create believable song lyrics using this dataset of Taylor Swift hits.",
"records": 1225,
"fields": 1,
"trainingTime": "30 mins",
"bytes": 2076350
}
},
{
"gtmId": "use-case-remove-sensitive-data",
"title": "Remove sensitive data from unstructured text",
"description": "Use Named Entity Recognition (NER) to detect and redact personally identifiable data in free text fields.",
"cardType": "Console",
"icon": "transform.png",
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml"
}
],
"sampleDataset": {
"fileName": "sample-transform-emails.csv",
"description": "Unstructured text datasets are useful for training chatbots or other models that need large amounts of data. The emails in this public dataset need to be de-identified before they can be used to train ML models.",
"records": 100,
"fields": 2,
"trainingTime": "< 1 min",
"bytes": 65300
}
}
]
}
Loading