From 527abdcd07e64e8db4bbc6f30eaef71bd300e7bd Mon Sep 17 00:00:00 2001 From: Alex Ray Date: Fri, 2 Feb 2024 13:19:31 -0800 Subject: [PATCH] Add some relational samples (#347) # Problem We want to be able to create blueprints that use sample connections provided to ease the onboarding process. This is required to create blueprints that use databases because all database workflows are powered by connections, as opposed to a sample static file. # Solution Create 2 new blueprints that use a new `sampleConnection` field and update unit tests to support this field. --- test_use_cases.py | 23 +++++++++++++++++++++++ use_cases/gretel.json | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/test_use_cases.py b/test_use_cases.py index 85696793..6d38f66a 100644 --- a/test_use_cases.py +++ b/test_use_cases.py @@ -63,6 +63,29 @@ "bytes", ], }, + "sampleConnection": { + "type": "object", + "properties": { + "id": {"type": "string"}, + "type": {"type": "string"}, + "description": {"type": "string"}, + "tables": {"type": "number"}, + "records": {"type": "number"}, + "fields": {"type": "number"}, + "trainingTime": {"type": "string"}, + "bytes": {"type": "number"}, + }, + "required": [ + "id", + "type", + "description", + "tables", + "records", + "fields", + "trainingTime", + "bytes", + ], + }, "detailsFileName": {"type": "string"}, "button1": { "type": "object", diff --git a/use_cases/gretel.json b/use_cases/gretel.json index e6572b88..17429db8 100644 --- a/use_cases/gretel.json +++ b/use_cases/gretel.json @@ -307,6 +307,46 @@ "label": "Launch Gretel Labs", "link": "https://image-synthetics-preview.gretel.cloud/" } + }, + { + "gtmId": "use-case-synthetic-database", + "title": "Synthesize a database", + "description": "Synthesize a database using Gretel Relational.", + "cardType": "Console", + "icon": "relational-db.png", + "modelType": "synthetics", + "modelCategory": "synthetics", + "defaultConfig": "config_templates/gretel/synthetics/tabular-actgan.yml", + "sampleConnection": { + "id": "sample_mysql_telecom", + "type": "mysql", + "description": "Use this sample telecommunications database to synthesize an entirely new database of statistically equivalent records, while maintaining referential integrity.", + "tables": 5, + "records": 230988, + "fields": 29, + "trainingTime": "20 mins", + "bytes": 9537000 + } + }, + { + "gtmId": "use-case-transform-database", + "title": "Redact PII in a database", + "description": "Safeguard sensitive information and uphold compliance standards by automatically removing private information from your database.", + "cardType": "Console", + "icon": "relational-db.png", + "modelType": "transform_v2", + "modelCategory": "transform", + "defaultConfig": "config_templates/gretel/transform/transform_v2.yml", + "sampleConnection": { + "id": "sample_mysql_telecom", + "type": "mysql", + "description": "This telecommunications dataset contains names, addresses, social security numbers, and other personally identifiable information, which needs to be redacted before the dataset can be shared or used to train ML models.", + "tables": 5, + "records": 230988, + "fields": 29, + "trainingTime": "5 mins", + "bytes": 9537000 + } } ] }