From 4071fbaca33de71852b32ee8619e38032e6a18ab Mon Sep 17 00:00:00 2001
From: Matt Kornfield <kornfield@gretel.ai>
Date: Fri, 16 Jun 2023 12:45:36 -0700
Subject: [PATCH 1/2] Do not encode keys for transforms by default

---
 src/gretel_trainer/relational/multi_table.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gretel_trainer/relational/multi_table.py b/src/gretel_trainer/relational/multi_table.py
index 2cf5436f..a5df6505 100644
--- a/src/gretel_trainer/relational/multi_table.py
+++ b/src/gretel_trainer/relational/multi_table.py
@@ -653,6 +653,7 @@ def run_transforms(
         identifier: Optional[str] = None,
         in_place: bool = False,
         data: Optional[dict[str, pd.DataFrame]] = None,
+        encode_keys: bool = False,
     ) -> None:
         """
         identifier: (str, optional): Unique string identifying a specific call to this method. Defaults to `transforms_` + current timestamp
@@ -663,6 +664,9 @@ def run_transforms(
 
         If `data` is supplied, runs only the supplied data through the corresponding transforms models.
         Otherwise runs source data through all existing transforms models.
+
+        If `encode_keys` is set to True, then we'll internally track the keys and update them
+        instead of relying on whatever was transformed
         """
         if data is not None:
             unrunnable_tables = [
@@ -706,9 +710,11 @@ def run_transforms(
         )
         run_task(task, self._extended_sdk)
 
-        output_tables = self._strategy.label_encode_keys(
-            self.relational_data, task.output_tables
-        )
+        output_tables = task.output_tables
+        if encode_keys:
+            output_tables = self._strategy.label_encode_keys(
+                self.relational_data, task.output_tables
+            )
 
         if in_place:
             for table_name, transformed_table in output_tables.items():

From 2cbce02b29d3881a7d412d9c54068495d96b98c0 Mon Sep 17 00:00:00 2001
From: Matt Kornfield <kornfield@gretel.ai>
Date: Fri, 16 Jun 2023 12:49:04 -0700
Subject: [PATCH 2/2] John's better docs :)

---
 src/gretel_trainer/relational/multi_table.py | 21 ++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/gretel_trainer/relational/multi_table.py b/src/gretel_trainer/relational/multi_table.py
index a5df6505..d36fb508 100644
--- a/src/gretel_trainer/relational/multi_table.py
+++ b/src/gretel_trainer/relational/multi_table.py
@@ -656,17 +656,18 @@ def run_transforms(
         encode_keys: bool = False,
     ) -> None:
         """
-        identifier: (str, optional): Unique string identifying a specific call to this method. Defaults to `transforms_` + current timestamp
+        Run pre-trained Gretel Transform models on Relational table data:
 
-        If `in_place` set to True, overwrites source data in all locations
-        (internal Python state, local working directory, project artifact archive).
-        Used for transforms->synthetics workflows.
-
-        If `data` is supplied, runs only the supplied data through the corresponding transforms models.
-        Otherwise runs source data through all existing transforms models.
-
-        If `encode_keys` is set to True, then we'll internally track the keys and update them
-        instead of relying on whatever was transformed
+        Args:
+            identifier: Unique string identifying a specific call to this method. Defaults to `transforms_` + current timestamp
+            in_place: If True, overwrites source data in all locations
+                (internal Python state, local working directory, project artifact archive).
+                Used for transforms->synthetics workflows.
+            data: If supplied, runs only the supplied data through the corresponding transforms models.
+                Otherwise runs source data through all existing transforms models.
+            encode_keys: If set, primary and foreign keys will be replaced with label encoded variants. This can add
+                an additional level of privacy at the cost of referential integrity between transformed and
+                original data.
         """
         if data is not None:
             unrunnable_tables = [