Allow output (flattened) stream schema to file

ednarb29 · Jan 31, 2022 · 9be9bfe · 9be9bfe
1 parent 3a94bbb
commit 9be9bfe
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Change Log
 
+## v1.2.1
+
+* Allow output (flattened) stream schema to file
+
 ## v1.2.0
 
 * Bumped up version to publish package

diff --git a/README.md b/README.md
@@ -26,11 +26,17 @@ target-airtable takes two types of input:
    - **max_batch_size** (according to your Airtable API rate limits)
    - **endpoint** (optional, default="https://api.airtable.com/v0", the Airtable API endpoint)
    - **typecast** (optional, default=True, tries to cast types according to your Airtable table schema)
+   - **output_schema** (optional, default=False, collects and write the (flattened) stream schema to a file)
+   - **output_schema_path** (optional, default="", the output path to write the schema file)
 2. A stream of Singer-formatted data on stdin
 
 target-airtable replicates the incomming streams from a tap into Airtable tables with the same name as the
 stream. Make sure that these tables and the fields exist in your selected Airtable base.
 
+If you want to make sure how the (flattened) schema has to look like in Airtable, set the option `output_schema` to
+True. A file with the required table fields will be written to `output_schema_path/output_schema.txt` and no
+records will be submitted to Airtable.
+
 
 Create a config file with your configuration data:
 
@@ -40,7 +46,9 @@ Create a config file with your configuration data:
   "base": "my_base",
   "max_batch_size": 10,
   "endpoint": "https://api.airtable.com/v0",
-  "typecast": true
+  "typecast": true,
+  "output_schema": false,
+  "output_schema_path": ""
 }
 ```
 ```bash

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
 from setuptools.command.install import install
 from setuptools import setup, find_packages
 
-VERSION = "v1.2.0"
+VERSION = "v1.2.1"
 
 
 class VerifyVersionCommand(install):

diff --git a/target_airtable/__init__.py b/target_airtable/__init__.py
@@ -5,6 +5,7 @@
 import http.client
 import io
 import json
+import os.path
 import sys
 import threading
 import urllib
@@ -84,6 +85,7 @@ def persist_lines(config, lines):
 
     # collect records for batch upload
     records_bulk = dict()
+    records_schema = set()
 
     # Loop over lines from stdin
     for line in lines:
@@ -115,9 +117,15 @@ def persist_lines(config, lines):
             # If the record needs to be flattened, uncomment this line
             flattened_record = flatten(o['record'])
 
-            records_bulk[o['stream']].append({
-                "fields": flattened_record
-            })
+            if config.get("output_schema", False):
+                # store flattened schema
+                for k in flattened_record.keys():
+                    records_schema.add(k)
+            else:
+                # capture record
+                records_bulk[o['stream']].append({
+                    "fields": flattened_record
+                })
 
             state = None
         elif t == 'STATE':
@@ -136,9 +144,14 @@ def persist_lines(config, lines):
             raise Exception("Unknown message type {} in message {}"
                             .format(o['type'], o))
 
-    # process all collected entries
-    for table, records in records_bulk.items():
-        process_records(config, table, records)
+    if config.get("output_schema", False):
+        # write produced schema to file
+        with open(os.path.join(config.get("output_schema_path", ""), "output_schema.txt"), "w") as f:
+            f.write(str(records_schema))
+    else:
+        # process all collected entries
+        for table, records in records_bulk.items():
+            process_records(config, table, records)
 
     return state