gretelai · thesteady · Aug 27, 2024 · Aug 13, 2024 · Aug 14, 2024 · Aug 19, 2024
diff --git a/config_templates/gretel/synthetics/navigator-ft.yml b/config_templates/gretel/synthetics/navigator-ft.yml
@@ -20,10 +20,12 @@ models:
         num_records: 5000
 
       params:
-        # The parameter below is a proxy for training
-        # time. It sets the number of records from the
-        # input dataset that the model will see during
-        # training. It can be smaller (we downsample),
-        # larger (we resample), or the same size as
-        # your input dataset.
-        num_input_records_to_sample: 25000
+        # The parameter below is a proxy for training time.
+        # If set to 'auto', we will automatically choose an
+        # appropriate value. An integer value will set the
+        # number of records from the input dataset that the
+        # model will see during training. It can be smaller
+        # (we downsample), larger (we resample), or the same
+        # size as your input dataset. A starting value to
+        # experiment with is 25,000.
+        num_input_records_to_sample: auto
diff --git a/model_types/modelTypesList.json b/model_types/modelTypesList.json
@@ -1,5 +1,20 @@
 {
   "modelTypes": [
+    {
+      "modelType": "navigator_ft",
+      "modelCategory": "synthetics",
+      "defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml",
+      "description": "Our newest language model capable of generating multi-modal tabular data including mixed categorical, numeric, time-series, and free text fields.",
+      "label": "Navigator Fine Tuning",
+      "sampleDataset": {
+        "fileName": "financial_transactions.csv",
+        "description": "This financial dataset contains transaction ID, date, payment, currency, type, and description for banking transactions. It includes numeric, categorical, and free text tabular data.",
+        "records": 9344,
+        "fields": 6,
+        "trainingTime": "10 mins",
+        "bytes": 683576
+      }
+    },
     {
       "modelType": "actgan",
       "modelCategory": "synthetics",
@@ -15,21 +30,6 @@
         "bytes": 1000000
       }
     },
-    {
-      "modelType": "navigator_ft",
-      "modelCategory": "synthetics",
-      "defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml",
-      "description": "Our newest language model capable of generating multi-modal tabular data including mixed categorical, numeric, time-series, and free text fields.",
-      "label": "Navigator Fine Tuning",
-      "sampleDataset": {
-        "fileName": "sample-patient-events.csv",
-        "description": "This medical dataset contains sequences of annotated events (such as hospital admission, diagnosis, treatment, etc.) for 1,712 synthetic patients.",
-        "records": 7348,
-        "fields": 17,
-        "trainingTime": "25 mins",
-        "bytes": 2386363
-      }
-    },
     {
       "modelType": "gpt_x",
       "modelCategory": "synthetics",

diff --git a/sample_data/clinical_notes.csv b/sample_data/clinical_notes.csv
diff --git a/sample_data/financial_transactions.csv b/sample_data/financial_transactions.csv
diff --git a/sample_data_previews/clinical_notes.csv b/sample_data_previews/clinical_notes.csv
diff --git a/sample_data_previews/financial_transactions.csv b/sample_data_previews/financial_transactions.csv
@@ -0,0 +1,21 @@
+transaction_id,date,amount,currency,transaction_type,description
+86492337,2021-08-01,250.5,USD,debit,Paid monthly rent for apartment in New York City.
+82782780,2022-02-14,50.0,EUR,debit,Bought a souvenir for a friend during a trip to Paris.
+81861773,2023-01-05,1000.0,GBP,transfer,Sent money to a family member in the UK for holiday expenses.
+46881945,2021-11-20,120.0,USD,credit,Bought groceries for the week from a local supermarket.
+22031755,2022-06-15,75.0,CAD,debit,Withdrew cash for a weekend camping trip in Canada.
+23494374,2023-09-22,300.0,JPY,transfer,Sent money to a friend in Japan for a birthday gift.
+32513708,2021-02-18,150.0,USD,debit,Paid utility bills for a house in Texas.
+65492428,2022-07-03,100.0,USD,debit,Bought a new pair of running shoes.
+70958305,2023-12-28,200.0,USD,transfer,Sent money to a friend for holiday gifts.
+99557011,2021-06-10,80.0,USD,debit,Paid for a haircut and styling session.
+94473946,2022-11-27,150.0,USD,debit,Bought tickets for a football game in Seattle.
+43949381,2023-04-15,120.0,USD,transfer,Sent money to a friend in New York for a birthday gift.
+14818853,2022-04-15,125.78,USD,credit,Received salary payment from employer.
+80157141,2023-03-01,1000.0,EUR,transfer,Sent monthly rent payment to landlord.
+49840331,2021-12-28,50.25,GBP,debit,Purchased Christmas gifts for family.
+83297667,2022-01-03,200.0,JPY,credit,Received transfer from family in Japan.
+87362864,2021-06-15,35.99,USD,debit,Bought lunch for colleagues.
+44491138,2023-02-27,750.0,USD,transfer,Sent money to friend for group trip expenses.
+75439054,2022-05-01,150.0,USD,credit,Deposited tax refund into account.
+77764312,2023-04-10,2000.0,EUR,transfer,Sent money to parents for home repairs.
diff --git a/use_cases/details/gpt-dp.md b/use_cases/details/gpt-dp.md
@@ -0,0 +1,7 @@
+![Create free text data with privacy guarantees](https://blueprints.gretel.cloud/use_cases/images/gpt-dp.png "Create free text data with privacy guarantees")
+
+Unlock the potential of your text data while ensuring privacy by applying [differentially private fine-tuning using GPT](https://gretel.ai/blog/generate-differentially-private-synthetic-text-with-gretel-gpt). This method allows you to create a version of your free text data that maintains the integrity of sensitive information while still providing high-quality outputs.
+
+We recommend having a dataset of at least 10,000 samples to ensure reasonable quality. Note that differential privacy requires more epochs, which leads to longer training times compared to running without differential privacy.
+
+Prefer coding? Check out the [SDK notebook](https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/generate_differentially_private_synthetic_text.ipynb) example.
diff --git a/use_cases/details/navigator-ft.md b/use_cases/details/navigator-ft.md
@@ -1,14 +1,7 @@
-![Generate synthetic tabular, text and time series data](https://blueprints.gretel.cloud/use_cases/images/navigator-ft-hero.png "Generate synthetic tabular, text and time series data")
+![Synthesize tabular data with Navigator Fine Tuning](https://blueprints.gretel.cloud/use_cases/images/navigator-ft-hero.png "Synthesize tabular data with Navigator Fine Tuning")
 
-We are excited to announce the public preview of **Navigator Fine Tuning**, the latest advancement in our suite of synthetic data solutions. This new feature builds upon the recent general availability of [Gretel Navigator](https://console.gretel.ai/navigator), enabling you to generate data not only from a prompt, but also from fine-tuning the underlying model on your domain-specific real-world datasets to generate the highest quality synthetic data.
+If you’re new to Gretel, our Navigator Fine-Tuning blueprint is a great place to start. This blueprint automatically selects our comprehensive multi-modal model, a great one-stop shop for most synthetic data generation needs. Just answer a few questions, review the model configuration and hit **Run**. 
 
-One of the standout features of Navigator Fine Tuning is its support for multiple tabular data modalities within a single model. This means you can now generate datasets that maintain correlations across:
-- Numeric Data: Continuous or discrete numbers
-- Categorical Data: Categories or labels
-- Free Text: Unstructured text entries and long-form natural language such as email messages or notes in medical treatment summaries
-- Time Series: Sequential time-stamped data
-- JSON Data: Complex nested structures
+Navigator Fine-Tuning supports mutliple tabular modalities of data within a single model, such as numeric, categorical, and free text data.
 
-All these data types can coexist within a single dataset, maintaining correlations not just within individual rows, but also across events spanning multiple rows, making Navigator  an exceptionally powerful tool for time series data generation.
-
-Try it in the Console, or if you prefer code, give the [SDK notebook](https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb) a spin. 
+Prefer coding? Check out the [SDK notebook](https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb) example.
diff --git a/use_cases/details/synthetic.md b/use_cases/details/synthetic.md
@@ -1,6 +1,6 @@
 ![Generate synthetic tabular data](https://blueprints.gretel.cloud/use_cases/images/synthetic-tabular-generation.png "Generate synthetic tabular data")
 
-If you’re new to Gretel, our synthetic data blueprint is a great place to start. This gentle introduction to synthetic data generation automatically selects our popular [ACTGAN model](https://gretel.ai/blog/scale-synthetic-data-to-millions-of-rows-with-actgan) and provides a sample healthcare dataset. Just answer a few questions, review the model configuration and hit **Run**. 
+The synthetic data blueprint is a great introduction to synthetic data generation using our [ACTGAN model](https://gretel.ai/blog/scale-synthetic-data-to-millions-of-rows-with-actgan) for numeric and categorical data using a sample healthcare dataset. Just answer a few questions, review the model configuration and hit **Run**. 
 
 Prefer coding? Check out the [Gretel 101 notebook](https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/sdk_blueprints/Gretel_101_Blueprint.ipynb) example. Synthesize data in just 4 lines of code! 
 

diff --git a/use_cases/gretel.json b/use_cases/gretel.json
@@ -16,63 +16,35 @@
         "link": "https://docs.gretel.ai/create-synthetic-data/models/navigator/examples"
       }
     },
-    {
-      "gtmId": "use-case-synthetic",
-      "title": "Generate synthetic data from complex tabular datasets",
-      "description": "Handle high-dimensional data with thousands of columns and millions of rows.",
-      "cardType": "Console",
-      "icon": "synthetics.png",
-      "detailsFileName": "synthetic.md",
-      "modelType": "synthetics",
-      "modelCategory": "synthetics",
-      "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
-      "sampleDataset": {
-        "fileName": "sample-synthetic-healthcare.csv",
-        "description": "Use this sample electronic health records (EHR) dataset to synthesize an entirely new set of statistically equivalent records.",
-        "records": 9999,
-        "fields": 18,
-        "trainingTime": "6 mins",
-        "bytes": 830021
-      },
-      "button1": {
-        "label": "Gretel 101 Notebook",
-        "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/sdk_blueprints/Gretel_101_Blueprint.ipynb"
-      },
-      "button2": {
-        "label": "Advanced Examples Notebook",
-        "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/sdk_blueprints/Gretel_Advanced_Tabular_Blueprint.ipynb"
-      }
-    },
     {
       "gtmId": "use-case-navigator-ft",
-      "title": "[Public Preview] Generate synthetic tabular, text and time series data with Navigator Fine Tuning ",
-      "description": "Try out our latest synthetic model supporting tabular, text, JSON and time series data in a single dataset.",
+      "title": "Synthesize tabular data with Navigator Fine Tuning",
+      "description": "Use Gretel's flagship model to create high-quality, domain-specific tabular datasets, supporting data types such as numeric, categorical, and free text.",
       "cardType": "Console",
-      "tag": "Preview",
+      "tag": "New",
       "icon": "navigator-ft.png",
       "detailsFileName": "navigator-ft.md",
       "modelType": "navigator_ft",
       "modelCategory": "synthetics",
       "defaultConfig": "config_templates/gretel/synthetics/navigator-ft.yml",
-      "sampleDataset": {
-        "fileName": "sample-patient-events.csv",
-        "description": "This medical dataset contains sequences of annotated events (such as hospital admission, diagnosis, treatment, etc.) for 1,712 synthetic patients.",
-        "records": 7348,
-        "fields": 17,
-        "trainingTime": "25 mins",
-        "bytes": 2386363
-      },
       "button1": {
         "label": "SDK Notebook",
         "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/navigator-fine-tuning-intro-tutorial.ipynb"
+      },
+      "sampleDataset": {
+        "fileName": "financial_transactions.csv",
+        "description": "This financial dataset contains transaction ID, date, payment, currency, type, and description for banking transactions. It includes numeric, categorical, and free text tabular data.",
+        "records": 9344,
+        "fields": 6,
+        "trainingTime": "10 mins",
+        "bytes": 683576
       }
     },
     {
       "gtmId": "use-case-redact-pii",
       "title": "Transform unstructured data into AI-ready formats",
       "description": "De-identify, transform, or label text and tabular data for AI.",
       "cardType": "Console",
-      "tag": "New",
       "icon": "transform.png",
       "modelType": "transform_v2",
       "modelCategory": "transform",
@@ -86,6 +58,30 @@
         "bytes": 5647
       }
     },
+    {
+      "gtmId": "use-case-gpt-dp",
+      "title": "Produce free text data with privacy guarantees",
+      "description": "Leverage differentially private fine-tuning with GPT to generate a provably-private version of your free text data.",
+      "cardType": "Console",
+      "tag": "New",
+      "icon": "GPTwithDP.png",
+      "detailsFileName": "gpt-dp.md",
+      "modelType": "gpt_x",
+      "modelCategory": "synthetics",
+      "defaultConfig": "config_templates/gretel/synthetics/natural-language-differential-privacy.yml",
+      "button1": {
+        "label": "SDK Notebook",
+        "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/generate_differentially_private_synthetic_text.ipynb"
+      },
+      "sampleDataset": {
+        "fileName": "clinical_notes.csv",
+        "description": "This clinical notes dataset contains medical observations written by healthcare providers during patient care. It includes personally identifiable information which needs to be redacted before the dataset can be shared or used to train ML models.",
+        "records": 30000,
+        "fields": 1,
+        "trainingTime": "2 hours",
+        "bytes": 103447682
+      }
+    },
     {
       "gtmId": "use-case-gretel_tuner",
       "title": "Optimize your synthetic data",
@@ -102,6 +98,33 @@
         "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/docs/notebooks/demo/gretel-tuner-advanced-tutorial.ipynb"
       }
     },
+    {
+      "gtmId": "use-case-synthetic",
+      "title": "Generate synthetic data from complex tabular datasets",
+      "description": "Handle high-dimensional data with thousands of columns and millions of rows.",
+      "cardType": "Console",
+      "icon": "synthetics.png",
+      "detailsFileName": "synthetic.md",
+      "modelType": "synthetics",
+      "modelCategory": "synthetics",
+      "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml",
+      "sampleDataset": {
+        "fileName": "sample-synthetic-healthcare.csv",
+        "description": "Use this sample electronic health records (EHR) dataset to synthesize an entirely new set of statistically equivalent records.",
+        "records": 9999,
+        "fields": 18,
+        "trainingTime": "6 mins",
+        "bytes": 830021
+      },
+      "button1": {
+        "label": "Gretel 101 Notebook",
+        "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/sdk_blueprints/Gretel_101_Blueprint.ipynb"
+      },
+      "button2": {
+        "label": "Advanced Examples Notebook",
+        "link": "https://colab.research.google.com/github/gretelai/gretel-blueprints/blob/main/sdk_blueprints/Gretel_Advanced_Tabular_Blueprint.ipynb"
+      }
+    },
     {
       "gtmId": "use-case-tabular-dp",
       "title": "Create provably private versions of sensitive data",

diff --git a/use_cases/images/gpt-dp.png b/use_cases/images/gpt-dp.png