Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RELEASE] Blueprint model actions (#452) #454

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions test_use_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,42 @@
"type": "string",
"enum": ["New", "Beta", "Preview", "Popular", "Deprecated","Labs"],
},
"modelActions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"modelCategory": {
"type": "string",
"enum": [
"synthetics",
"transform",
"classify",
"evaluate",
],
},
"modelType": {
"type": "string",
"enum": [
"actgan",
"navigator_ft",
"amplify",
"classify",
"ctgan",
"evaluate",
"gpt_x",
"lstm",
"synthetics",
"timeseries_dgan",
"transform",
"transform_v2",
"tabular_dp",
],
},
"defaultConfig": {"type": "string"},
}
}
},
"modelType": {
"type": "string",
"enum": [
Expand Down
91 changes: 91 additions & 0 deletions use_cases/gretel.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@
"modelType": "navigator_ft",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml",
"modelActions": [
{
"modelType": "navigator_ft",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml"
}
],
"button1": {
"label": "SDK Notebook",
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb"
Expand All @@ -49,6 +56,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml"
}
],
"sampleDataset": {
"fileName": "patients.csv",
"description": "This patient dataset contains names, addresses and other personally identifiable information, which needs to be redacted before the dataset can be shared or used to train ML models.",
Expand All @@ -69,6 +83,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml"
}
],
"button1": {
"label": "SDK Notebook",
"link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/generate_differentially_private_synthetic_text.ipynb"
Expand Down Expand Up @@ -108,6 +129,13 @@
"modelType": "synthetics",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
"modelActions": [
{
"modelType": "synthetics",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml"
}
],
"sampleDataset": {
"fileName": "sample-synthetic-healthcare.csv",
"description": "Use this sample electronic health records (EHR) dataset to synthesize an entirely new set of statistically equivalent records.",
Expand All @@ -134,6 +162,13 @@
"modelType": "tabular_dp",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml",
"modelActions": [
{
"modelType": "tabular_dp",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-differential-privacy.yml"
}
],
"sampleDataset": {
"fileName": "bank_marketing_small.csv",
"description": "This dataset contains direct marketing campaign details (phone calls) from a Portuguese financial institution. It has sensitive information such as demographics and financials, which can benefit from privacy preserving techniques before sharing. ",
Expand All @@ -153,6 +188,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml"
}
],
"sampleDataset": {
"fileName": "sample-banking-questions-intents.csv",
"description": "Create realistic banking-related questions and intent labels using this sample dataset.",
Expand All @@ -179,6 +221,13 @@
"modelType": "timeseries_dgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/time-series.yml",
"modelActions": [
{
"modelType": "timeseries_dgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/time-series.yml"
}
],
"sampleDataset": {
"fileName": "daily-website-visitors.csv",
"description": "Safely synthesize a dataset of daily website visitors while maintaining correlations and data patterns.",
Expand All @@ -197,6 +246,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/transform_v2.yml"
}
],
"sampleConnection": {
"id": "sample_mysql_telecom",
"type": "mysql",
Expand All @@ -217,6 +273,13 @@
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml",
"modelActions": [
{
"modelType": "evaluate",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-lstm-evaluate.yml"
}
],
"sampleDataset": {
"fileName": "bank_marketing_small.csv",
"description": "Create synthetic data based on the publicly available dataset predicting opting in or out of bank marketing.",
Expand All @@ -235,6 +298,13 @@
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml",
"modelActions": [
{
"modelType": "amplify",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/amplify.yml"
}
],
"sampleDataset": {
"fileName": "safe-driver-prediction.csv",
"description": "Use this dataset to predict if a driver will file an insurance claim in the following year. Specify an output size in the config. By default, the model will create as many records as the input dataset.",
Expand Down Expand Up @@ -269,6 +339,13 @@
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
"modelActions": [
{
"modelType": "actgan",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml"
}
],
"sampleDataset": {
"fileName": "monthly-customer-payments.csv",
"description": "This dataset of monthly customer charges contains sensitive information and more than 20 columns.",
Expand Down Expand Up @@ -335,6 +412,13 @@
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml",
"modelActions": [
{
"modelType": "gpt_x",
"modelCategory": "synthetics",
"defaultConfig": "config_templates/gretel/synthetics/natural-language.yml"
}
],
"sampleDataset": {
"fileName": "taylor-swift-lyrics-up.csv",
"description": "Create believable song lyrics using this dataset of Taylor Swift hits.",
Expand All @@ -353,6 +437,13 @@
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml",
"modelActions": [
{
"modelType": "transform_v2",
"modelCategory": "transform",
"defaultConfig": "config_templates/gretel/transform/default.yml"
}
],
"sampleDataset": {
"fileName": "sample-transform-emails.csv",
"description": "Unstructured text datasets are useful for training chatbots or other models that need large amounts of data. The emails in this public dataset need to be de-identified before they can be used to train ML models.",
Expand Down
Loading