diff --git a/.fides/db_dataset.yml b/.fides/db_dataset.yml index f9be0cb212..e2959cfe35 100644 --- a/.fides/db_dataset.yml +++ b/.fides/db_dataset.yml @@ -430,7 +430,7 @@ dataset: data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified - - name: fidesctl_meta + - name: fides_meta data_categories: - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified diff --git a/.fides/redis_dataset.yml b/.fides/redis_dataset.yml index 2a7f7beec1..56f66e7bf5 100644 --- a/.fides/redis_dataset.yml +++ b/.fides/redis_dataset.yml @@ -7,15 +7,11 @@ dataset: fields: - name: EN_ACCESS_GRAPH__ description: This graph is summarized and sent to Fideslog to create high level insight into how graphs change between retries to inform future features. - data_categories: - - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified fidesops_meta: data_type: object # Stores an encrypted representation of the fidesops graph that executes the privacy requests. fields: - name: : # The current collection - data_categories: - - system.operations data_qualifier: aggregated.anonymized.unlinked_pseudonymized.pseudonymized.identified fidesops_meta: data_type: object diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f315c3625..4c8361992a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ The types of changes are: * Update sample project landing page copy to be version-agnostic [#1958](https://github.com/ethyca/fides/pull/1958) * `get` and `ls` CLI commands now return valid `fides` object YAML [#1991](https://github.com/ethyca/fides/pull/1991) +* Remove several fidesops schemas for DSR's in favor of updated Fideslang schemas [#2009](https://github.com/ethyca/fides/pull/2009) ### Developer Experience diff --git a/Dockerfile b/Dockerfile index 3aeade3b53..a7f87df31b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,7 @@ RUN apt-get update && \ g++ \ gnupg \ gcc \ + git \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/data/dataset/bigquery_example_test_dataset.yml b/data/dataset/bigquery_example_test_dataset.yml index bdf4d7f45c..bd33b94d43 100644 --- a/data/dataset/bigquery_example_test_dataset.yml +++ b/data/dataset/bigquery_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: address.id @@ -111,14 +111,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: product.id @@ -130,7 +130,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: bigquery_example_test_dataset field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/email_dataset.yml b/data/dataset/email_dataset.yml index b5c5c6062d..52152068c1 100644 --- a/data/dataset/email_dataset.yml +++ b/data/dataset/email_dataset.yml @@ -7,11 +7,11 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: true - name: customer_id data_categories: [user] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id @@ -22,7 +22,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: true - name: first_name data_categories: [user.childrens] @@ -30,10 +30,10 @@ dataset: data_categories: [user.childrens] - name: birthday data_categories: [user.childrens] - fidesops_meta: + fides_meta: data_type: string - name: report_card - fidesops_meta: + fides_meta: data_type: object fields: - name: grades @@ -45,7 +45,7 @@ dataset: - name: test_scores data_categories: [ user.childrens ] - name: parent_id - fidesops_meta: + fides_meta: references: - dataset: email_dataset field: daycare_customer.id @@ -54,11 +54,11 @@ dataset: fields: - name: id data_categories: [ system.operations ] - fidesops_meta: + fides_meta: primary_key: true - name: payer_email data_categories: [ user.contact.email ] - fidesops_meta: + fides_meta: identity: email - name: ccn data_categories: [user.financial.account_number] diff --git a/data/dataset/example_test_dataset.invalid b/data/dataset/example_test_dataset.invalid index 99b8700f28..deff6212e1 100644 --- a/data/dataset/example_test_dataset.invalid +++ b/data/dataset/example_test_dataset.invalid @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: * name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] * name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string * name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True * name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: * name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: address.id direction: to * name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string * name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True * name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string * name: login fields: * name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: customer.id direction: from * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: * name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: customer.id direction: from * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: address.id @@ -111,14 +111,14 @@ dataset: fields: * name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: orders.id direction: from * name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: product.id @@ -130,7 +130,7 @@ dataset: fields: * name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] * name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: customer.id direction: from * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: * name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: * name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string * name: closed data_categories: [system.operations] * name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string * name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: * dataset: bigquery_example_test_dataset field: employee.id direction: from * name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True * name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: * name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string * name: last_visit diff --git a/data/dataset/example_test_datasets.yml b/data/dataset/example_test_datasets.yml index 3983efa02c..446dd20f7e 100644 --- a/data/dataset/example_test_datasets.yml +++ b/data/dataset/example_test_datasets.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -33,12 +33,12 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -47,19 +47,19 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -68,7 +68,7 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id @@ -82,18 +82,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -104,14 +104,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: product.id @@ -123,7 +123,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -134,7 +134,7 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id @@ -159,7 +159,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id @@ -177,19 +177,19 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: employee.id @@ -203,7 +203,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit @@ -220,7 +220,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -233,7 +233,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -242,12 +242,12 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -256,19 +256,19 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -277,7 +277,7 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id @@ -291,18 +291,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -313,14 +313,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: product.id @@ -332,7 +332,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -343,7 +343,7 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id @@ -368,7 +368,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id @@ -386,19 +386,19 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: employee.id @@ -412,7 +412,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/manual_dataset.yml b/data/dataset/manual_dataset.yml index 6cdf5c18a4..66f5e4a0da 100644 --- a/data/dataset/manual_dataset.yml +++ b/data/dataset/manual_dataset.yml @@ -7,22 +7,22 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: true - name: authorized_user data_categories: [user] - fidesops_meta: + fides_meta: data_type: string - name: customer_id data_categories: [user] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: payment_card_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: payment_card.id @@ -31,10 +31,10 @@ dataset: fields: - name: box_id data_categories: [user] - fidesops_meta: + fides_meta: primary_key: true - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string diff --git a/data/dataset/mariadb_example_test_dataset.yml b/data/dataset/mariadb_example_test_dataset.yml index db1084cdc4..109a7c2299 100644 --- a/data/dataset/mariadb_example_test_dataset.yml +++ b/data/dataset/mariadb_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -33,12 +33,12 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -47,19 +47,19 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -68,7 +68,7 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id @@ -82,18 +82,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -104,14 +104,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: product.id @@ -123,7 +123,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: address.id @@ -134,7 +134,7 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: customer.id @@ -159,7 +159,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id @@ -177,19 +177,19 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mariadb_example_test_dataset field: employee.id @@ -203,7 +203,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/mongo_example_test_dataset.yml b/data/dataset/mongo_example_test_dataset.yml index 10f224cfe9..f3234cb476 100644 --- a/data/dataset/mongo_example_test_dataset.yml +++ b/data/dataset/mongo_example_test_dataset.yml @@ -7,67 +7,67 @@ dataset: fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: gender data_categories: [user.gender] - fidesops_meta: + fides_meta: data_type: string - name: birthday data_categories: [user.date_of_birth] - fidesops_meta: + fides_meta: data_type: string - name: workplace_info - fidesops_meta: + fides_meta: data_type: object fields: - name: employer - fidesops_meta: + fides_meta: data_type: string - name: position data_categories: [user.job_title] - fidesops_meta: + fides_meta: data_type: string - name: direct_reports data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string[] - name: emergency_contacts - fidesops_meta: + fides_meta: data_type: object[] fields: - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: relationship - fidesops_meta: + fides_meta: data_type: string - name: phone data_categories: [user.contact.phone_number] - fidesops_meta: + fides_meta: data_type: string - name: children data_categories: [user.childrens] - fidesops_meta: + fides_meta: data_type: string[] - name: travel_identifiers - fidesops_meta: + fides_meta: data_type: string[] data_categories: [system.operations] - name: comments - fidesops_meta: + fides_meta: data_type: object[] fields: - name: comment_id - fidesops_meta: + fides_meta: data_type: string references: - dataset: mongo_test @@ -77,13 +77,13 @@ dataset: fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: customer_identifiers fields: - name: internal_id - fidesops_meta: + fides_meta: data_type: string references: - dataset: mongo_test @@ -91,63 +91,63 @@ dataset: direction: from - name: derived_emails data_categories: [user] - fidesops_meta: + fides_meta: data_type: string[] identity: email - name: derived_phone data_categories: [user] - fidesops_meta: + fides_meta: data_type: string[] return_all_elements: true identity: phone_number - name: derived_interests data_categories: [user] - fidesops_meta: + fides_meta: data_type: string[] - name: customer_feedback fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: customer_information fields: - name: email - fidesops_meta: + fides_meta: identity: email data_type: string - name: phone data_categories: [user.contact.phone_number] - fidesops_meta: + fides_meta: data_type: string - name: internal_customer_id data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: string - name: rating data_categories: [user] - fidesops_meta: + fides_meta: data_type: integer - name: date data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: string - name: message data_categories: [user] - fidesops_meta: + fides_meta: data_type: string - name: flights fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: passenger_information fields: - name: passenger_ids - fidesops_meta: + fides_meta: data_type: string[] references: - dataset: mongo_test @@ -155,58 +155,58 @@ dataset: direction: from - name: full_name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: flight_no - name: date - name: pilots data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: string[] - name: plane data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: integer - name: conversations fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: thread - fidesops_meta: + fides_meta: data_type: object[] fields: - name: comment - fidesops_meta: + fides_meta: data_type: string - name: message - fidesops_meta: + fides_meta: data_type: string - name: chat_name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: ccn data_categories: [user.financial.account_number] - fidesops_meta: + fides_meta: data_type: string - name: employee fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True references: - dataset: mongo_test @@ -214,18 +214,18 @@ dataset: direction: from - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: aircraft fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: planes data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: string[] references: - dataset: mongo_test @@ -233,20 +233,20 @@ dataset: direction: from - name: model data_categories: [system.operations] - fidesops_meta: + fides_meta: data_type: string - name: payment_card fields: - name: _id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: billing_address_id data_categories: [system.operations] - name: ccn data_categories: [user.financial.account_number] - fidesops_meta: + fides_meta: references: - dataset: mongo_test field: conversations.thread.ccn @@ -257,7 +257,7 @@ dataset: data_categories: [user.unique_id] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -266,17 +266,17 @@ dataset: - name: rewards fields: - name: _id - fidesops_meta: + fides_meta: primary_key: True data_type: object_id - name: owner - fidesops_meta: + fides_meta: data_type: object[] return_all_elements: true fields: - name: phone data_categories: [user.contact.phone_number] - fidesops_meta: + fides_meta: data_type: string references: - dataset: mongo_test @@ -284,6 +284,6 @@ dataset: direction: from - name: shopper_name - name: points - fidesops_meta: + fides_meta: data_type: integer - name: expiration_date diff --git a/data/dataset/mssql_example_test_dataset.yml b/data/dataset/mssql_example_test_dataset.yml index 0a4a3a8e94..81d7e48d9e 100644 --- a/data/dataset/mssql_example_test_dataset.yml +++ b/data/dataset/mssql_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -33,12 +33,12 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -47,19 +47,19 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -68,7 +68,7 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id @@ -82,18 +82,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -104,14 +104,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: product.id @@ -123,7 +123,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: address.id @@ -134,7 +134,7 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: customer.id @@ -159,7 +159,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id @@ -177,19 +177,19 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mssql_example_test_dataset field: employee.id @@ -203,7 +203,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/mysql_example_test_dataset.yml b/data/dataset/mysql_example_test_dataset.yml index c99eb39ef2..17ff0db898 100644 --- a/data/dataset/mysql_example_test_dataset.yml +++ b/data/dataset/mysql_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: address.id @@ -33,12 +33,12 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -47,19 +47,19 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] @@ -68,7 +68,7 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: customer.id @@ -82,18 +82,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: address.id @@ -104,14 +104,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: product.id @@ -123,7 +123,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: address.id @@ -134,7 +134,7 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: customer.id @@ -159,7 +159,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id @@ -177,19 +177,19 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: mysql_example_test_dataset field: employee.id @@ -203,7 +203,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/postgres_example_test_dataset.yml b/data/dataset/postgres_example_test_dataset.yml index 5c33d89d01..9978c88ff8 100644 --- a/data/dataset/postgres_example_test_dataset.yml +++ b/data/dataset/postgres_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -111,14 +111,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: product.id @@ -130,7 +130,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/redshift_example_test_dataset.yml b/data/dataset/redshift_example_test_dataset.yml index aa87f48cf3..1994219c34 100644 --- a/data/dataset/redshift_example_test_dataset.yml +++ b/data/dataset/redshift_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: address.id @@ -111,14 +111,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: order.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: product.id @@ -130,7 +130,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: redshift_example_test_dataset field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/remote_fides_example_test_dataset.yml b/data/dataset/remote_fides_example_test_dataset.yml index e34b43857b..303cba7e7a 100644 --- a/data/dataset/remote_fides_example_test_dataset.yml +++ b/data/dataset/remote_fides_example_test_dataset.yml @@ -7,5 +7,5 @@ dataset: fields: - name: placeholder data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email diff --git a/data/dataset/snowflake_example_test_dataset.yml b/data/dataset/snowflake_example_test_dataset.yml index 54366f436e..e44ad96688 100644 --- a/data/dataset/snowflake_example_test_dataset.yml +++ b/data/dataset/snowflake_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 - name: variant_eg @@ -54,37 +54,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -93,18 +93,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: address.id @@ -115,14 +115,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: order.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: product.id @@ -134,7 +134,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: address.id @@ -145,14 +145,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -163,7 +163,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -174,12 +174,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -194,26 +194,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: snowflake_example_test_dataset field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -222,7 +222,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/data/dataset/timebase_example_test_dataset.yml b/data/dataset/timebase_example_test_dataset.yml index 84a1f10b76..ac0f95594d 100644 --- a/data/dataset/timebase_example_test_dataset.yml +++ b/data/dataset/timebase_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: address.id @@ -111,14 +111,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: product.id @@ -130,7 +130,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: timebase_example field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/docs/fides/Dockerfile b/docs/fides/Dockerfile index cada9b1843..4801a433b4 100644 --- a/docs/fides/Dockerfile +++ b/docs/fides/Dockerfile @@ -5,6 +5,7 @@ RUN apt-get update && \ g++ \ gnupg \ gcc \ + git \ python3-wheel \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* diff --git a/requirements.txt b/requirements.txt index bccc944903..b4cfa0e76b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ deepdiff==5.8.1 fastapi[all]==0.82.0 fastapi-caching[redis]==0.3.0 fastapi-pagination[sqlalchemy]~= 0.10.0 -fideslang==1.3.1 +fideslang @ git+https://github.com/ethyca/fideslang.git@main#egg=fideslang fideslib==3.1.5 fideslog==1.2.10 firebase-admin==5.3.0 diff --git a/src/fides/api/ctl/migrations/versions/1f61c765cd1c_merge_alembic_heads.py b/src/fides/api/ctl/migrations/versions/1f61c765cd1c_merge_alembic_heads.py index 7f7caae110..dabc2216ff 100644 --- a/src/fides/api/ctl/migrations/versions/1f61c765cd1c_merge_alembic_heads.py +++ b/src/fides/api/ctl/migrations/versions/1f61c765cd1c_merge_alembic_heads.py @@ -5,13 +5,12 @@ Create Date: 2022-12-02 17:59:08.490577 """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. -revision = '1f61c765cd1c' -down_revision = ('8f84fad4e00b', 'b72541d79f10') +revision = "1f61c765cd1c" +down_revision = ("8f84fad4e00b", "b72541d79f10") branch_labels = None depends_on = None diff --git a/src/fides/api/ctl/migrations/versions/2fb48b0e268b_update_ctl_datasets_fidesctl_meta.py b/src/fides/api/ctl/migrations/versions/2fb48b0e268b_update_ctl_datasets_fidesctl_meta.py new file mode 100644 index 0000000000..069065f5a8 --- /dev/null +++ b/src/fides/api/ctl/migrations/versions/2fb48b0e268b_update_ctl_datasets_fidesctl_meta.py @@ -0,0 +1,24 @@ +"""update ctl_datasets fidesctl_meta + +Revision ID: 2fb48b0e268b +Revises: b72541d79f10 +Create Date: 2022-12-08 17:49:14.317905 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "2fb48b0e268b" +down_revision = "1f61c765cd1c" +branch_labels = None +depends_on = None + + +def upgrade(): + op.alter_column("ctl_datasets", "fidesctl_meta", new_column_name="fides_meta") + + +def downgrade(): + op.alter_column("ctl_datasets", "fides_meta", new_column_name="fidesctl_meta") diff --git a/src/fides/api/ctl/migrations/versions/58933b5cc6e8_merge_failed_dsr_and_twilio.py b/src/fides/api/ctl/migrations/versions/58933b5cc6e8_merge_failed_dsr_and_twilio.py index c3a9d9a129..90aa8fd84c 100644 --- a/src/fides/api/ctl/migrations/versions/58933b5cc6e8_merge_failed_dsr_and_twilio.py +++ b/src/fides/api/ctl/migrations/versions/58933b5cc6e8_merge_failed_dsr_and_twilio.py @@ -5,9 +5,8 @@ Create Date: 2022-11-14 21:26:49.027809 """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. revision = "58933b5cc6e8" diff --git a/src/fides/api/ctl/migrations/versions/8f84fad4e00b_add_error_message_tracking.py b/src/fides/api/ctl/migrations/versions/8f84fad4e00b_add_error_message_tracking.py index 16cbff29ec..40832c4ec7 100644 --- a/src/fides/api/ctl/migrations/versions/8f84fad4e00b_add_error_message_tracking.py +++ b/src/fides/api/ctl/migrations/versions/8f84fad4e00b_add_error_message_tracking.py @@ -5,9 +5,8 @@ Create Date: 2022-11-15 01:38:28.531640 """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. revision = "8f84fad4e00b" diff --git a/src/fides/api/ctl/sql_models.py b/src/fides/api/ctl/sql_models.py index e33556e8a4..e4409529a2 100644 --- a/src/fides/api/ctl/sql_models.py +++ b/src/fides/api/ctl/sql_models.py @@ -190,7 +190,7 @@ class Dataset(Base, FidesBase): data_categories = Column(ARRAY(String)) data_qualifier = Column(String) collections = Column(JSON) - fidesctl_meta = Column(JSON) + fides_meta = Column(JSON) joint_controller = Column(PGEncryptedString, nullable=True) retention = Column(String) third_country_transfers = Column(ARRAY(String)) diff --git a/src/fides/api/main.py b/src/fides/api/main.py index f9a1779ac1..673295cd3c 100644 --- a/src/fides/api/main.py +++ b/src/fides/api/main.py @@ -230,6 +230,7 @@ async def setup_server() -> None: registry = load_registry(registry_file) db = get_api_session() update_saas_configs(registry, db) + log.info("Finished loading saas templates") except Exception as e: log.error( f"Error occurred during SaaS connector template validation: {str(e)}", diff --git a/src/fides/api/ops/api/v1/endpoints/connection_endpoints.py b/src/fides/api/ops/api/v1/endpoints/connection_endpoints.py index 9ac5d66812..bc7e61a502 100644 --- a/src/fides/api/ops/api/v1/endpoints/connection_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/connection_endpoints.py @@ -8,6 +8,7 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.validation import FidesKey from fideslib.exceptions import KeyOrNameAlreadyExists from pydantic import ValidationError, conlist from sqlalchemy import or_ @@ -65,7 +66,6 @@ from fides.api.ops.schemas.connection_configuration.connection_secrets_saas import ( validate_saas_secrets_external_references, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.service.connectors import get_connector from fides.api.ops.service.privacy_request.request_runner_service import ( queue_privacy_request, @@ -80,7 +80,7 @@ def get_connection_config_or_error( - db: Session, connection_key: FidesOpsKey + db: Session, connection_key: FidesKey ) -> ConnectionConfig: """Helper to load the ConnectionConfig object or throw a 404""" connection_config = ConnectionConfig.get_by(db, field="key", value=connection_key) @@ -189,7 +189,7 @@ def get_connections( response_model=ConnectionConfigurationResponse, ) def get_connection_detail( - connection_key: FidesOpsKey, db: Session = Depends(deps.get_db) + connection_key: FidesKey, db: Session = Depends(deps.get_db) ) -> ConnectionConfig: """Returns connection configuration with matching key.""" return get_connection_config_or_error(db, connection_key) @@ -264,7 +264,7 @@ def patch_connections( status_code=HTTP_204_NO_CONTENT, ) def delete_connection( - connection_key: FidesOpsKey, *, db: Session = Depends(deps.get_db) + connection_key: FidesKey, *, db: Session = Depends(deps.get_db) ) -> None: """Removes the connection configuration with matching key.""" connection_config = get_connection_config_or_error(db, connection_key) @@ -358,7 +358,7 @@ def connection_status( response_model=TestStatusMessage, ) def put_connection_config_secrets( - connection_key: FidesOpsKey, + connection_key: FidesKey, *, db: Session = Depends(deps.get_db), unvalidated_secrets: connection_secrets_schemas, @@ -393,7 +393,7 @@ def put_connection_config_secrets( response_model=TestStatusMessage, ) def test_connection_config_secrets( - connection_key: FidesOpsKey, + connection_key: FidesKey, *, db: Session = Depends(deps.get_db), ) -> TestStatusMessage: diff --git a/src/fides/api/ops/api/v1/endpoints/dataset_endpoints.py b/src/fides/api/ops/api/v1/endpoints/dataset_endpoints.py index 2166a4dbda..24ba341140 100644 --- a/src/fides/api/ops/api/v1/endpoints/dataset_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/dataset_endpoints.py @@ -7,6 +7,9 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.models import Dataset +from fideslang.validation import FidesKey +from pydantic import ValidationError as PydanticValidationError from pydantic import conlist from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session @@ -16,6 +19,7 @@ HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND, HTTP_415_UNSUPPORTED_MEDIA_TYPE, + HTTP_422_UNPROCESSABLE_ENTITY, ) from fides.api.ops.api import deps @@ -48,10 +52,9 @@ from fides.api.ops.schemas.dataset import ( BulkPutDataset, DatasetTraversalDetails, - FidesopsDataset, ValidateDatasetResponse, + validate_data_categories_against_db, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.api_router import APIRouter from fides.api.ops.util.oauth_util import verify_oauth_client from fides.api.ops.util.saas_util import merge_datasets @@ -64,7 +67,7 @@ # Helper method to inject the parent ConnectionConfig into these child routes def _get_connection_config( - connection_key: FidesOpsKey, db: Session = Depends(deps.get_db) + connection_key: FidesKey, db: Session = Depends(deps.get_db) ) -> ConnectionConfig: logger.info("Finding connection config with key '%s'", connection_key) connection_config = ConnectionConfig.get_by(db, field="key", value=connection_key) @@ -76,6 +79,20 @@ def _get_connection_config( return connection_config +def validate_data_categories(dataset: Dataset, db: Session) -> None: + """Validate data categories on a given Dataset + + As a separate method because we want to be able to match against data_categories in the + database instead of a static list. + """ + try: + validate_data_categories_against_db(dataset, db) + except PydanticValidationError as e: + raise HTTPException( + status_code=HTTP_422_UNPROCESSABLE_ENTITY, detail=e.errors() + ) + + @router.put( DATASET_VALIDATE, dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])], @@ -83,7 +100,8 @@ def _get_connection_config( response_model=ValidateDatasetResponse, ) def validate_dataset( - dataset: FidesopsDataset, + dataset: Dataset, + db: Session = Depends(deps.get_db), connection_config: ConnectionConfig = Depends(_get_connection_config), ) -> ValidateDatasetResponse: """ @@ -101,6 +119,7 @@ def validate_dataset( Returns a 200 OK for all valid datasets, and a traversal_details object with information about the traversal (or traversal errors). """ + validate_data_categories(dataset, db) try: # Attempt to generate a traversal for this dataset by providing an empty @@ -146,7 +165,7 @@ def validate_dataset( response_model=BulkPutDataset, ) def patch_datasets( - datasets: conlist(FidesopsDataset, max_items=50), # type: ignore + datasets: conlist(Dataset, max_items=50), # type: ignore db: Session = Depends(deps.get_db), connection_config: ConnectionConfig = Depends(_get_connection_config), ) -> BulkPutDataset: @@ -160,7 +179,7 @@ def patch_datasets( Otherwise, a new dataset will be created. """ - created_or_updated: List[FidesopsDataset] = [] + created_or_updated: List[Dataset] = [] failed: List[BulkUpdateFailed] = [] logger.info("Starting bulk upsert for %s datasets", len(datasets)) @@ -173,6 +192,7 @@ def patch_datasets( ) for dataset in datasets: + validate_data_categories(dataset, db) data = { "connection_config_id": connection_config.id, "fides_key": dataset.fides_key, @@ -216,10 +236,11 @@ async def patch_yaml_datasets( datasets = ( yaml_request_body.get("dataset") if isinstance(yaml_request_body, dict) else [] ) - created_or_updated: List[FidesopsDataset] = [] + created_or_updated: List[Dataset] = [] failed: List[BulkUpdateFailed] = [] if isinstance(datasets, list): for dataset in datasets: # type: ignore + validate_data_categories(Dataset(**dataset), db) data: dict = { "connection_config_id": connection_config.id, "fides_key": dataset["fides_key"], @@ -241,7 +262,7 @@ async def patch_yaml_datasets( def create_or_update_dataset( connection_config: ConnectionConfig, - created_or_updated: List[FidesopsDataset], + created_or_updated: List[Dataset], data: dict, dataset: dict, db: Session, @@ -284,7 +305,7 @@ def create_or_update_dataset( def _validate_saas_dataset( - connection_config: ConnectionConfig, dataset: FidesopsDataset + connection_config: ConnectionConfig, dataset: Dataset ) -> None: if connection_config.saas_config is None: raise SaaSConfigNotFoundException( @@ -313,13 +334,13 @@ def _validate_saas_dataset( @router.get( DATASETS, dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])], - response_model=Page[FidesopsDataset], + response_model=Page[Dataset], ) def get_datasets( db: Session = Depends(deps.get_db), params: Params = Depends(), connection_config: ConnectionConfig = Depends(_get_connection_config), -) -> AbstractPage[FidesopsDataset]: +) -> AbstractPage[Dataset]: """Returns all datasets in the database.""" logger.info( @@ -332,7 +353,7 @@ def get_datasets( ).order_by(DatasetConfig.created_at.desc()) # Generate the paginated results, but don't return them as-is. Instead, - # modify the items array to be just the FidesopsDataset instead of the full + # modify the items array to be just the Dataset instead of the full # DatasetConfig. This has to be done *afterwards* to ensure that the # paginated query is handled by paginate() paginated_results = paginate(dataset_configs, params=params) @@ -345,13 +366,13 @@ def get_datasets( @router.get( DATASET_BY_KEY, dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])], - response_model=FidesopsDataset, + response_model=Dataset, ) def get_dataset( - fides_key: FidesOpsKey, + fides_key: FidesKey, db: Session = Depends(deps.get_db), connection_config: ConnectionConfig = Depends(_get_connection_config), -) -> FidesopsDataset: +) -> Dataset: """Returns a single dataset based on the given key.""" logger.info( @@ -378,7 +399,7 @@ def get_dataset( status_code=HTTP_204_NO_CONTENT, ) def delete_dataset( - fides_key: FidesOpsKey, + fides_key: FidesKey, *, db: Session = Depends(deps.get_db), connection_config: ConnectionConfig = Depends(_get_connection_config), diff --git a/src/fides/api/ops/api/v1/endpoints/messaging_endpoints.py b/src/fides/api/ops/api/v1/endpoints/messaging_endpoints.py index 009b8a5dfd..13cbcf1b4f 100644 --- a/src/fides/api/ops/api/v1/endpoints/messaging_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/messaging_endpoints.py @@ -5,6 +5,7 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.validation import FidesKey from sqlalchemy.orm import Session from starlette.exceptions import HTTPException from starlette.status import ( @@ -38,7 +39,6 @@ from fides.api.ops.schemas.messaging.messaging_secrets_docs_only import ( possible_messaging_secrets, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.service.messaging.messaging_crud_service import ( create_or_update_messaging_config, delete_messaging_config, @@ -98,7 +98,7 @@ def post_config( response_model=MessagingConfigResponse, ) def patch_config_by_key( - config_key: FidesOpsKey, + config_key: FidesKey, *, db: Session = Depends(deps.get_db), messaging_config: MessagingConfigRequest, @@ -133,7 +133,7 @@ def patch_config_by_key( response_model=TestMessagingStatusMessage, ) def put_config_secrets( - config_key: FidesOpsKey, + config_key: FidesKey, *, db: Session = Depends(deps.get_db), unvalidated_messaging_secrets: possible_messaging_secrets, @@ -206,7 +206,7 @@ def get_configs( response_model=MessagingConfigResponse, ) def get_config_by_key( - config_key: FidesOpsKey, *, db: Session = Depends(deps.get_db) + config_key: FidesKey, *, db: Session = Depends(deps.get_db) ) -> MessagingConfigResponse: """ Retrieves configs for messaging service by key. @@ -228,7 +228,7 @@ def get_config_by_key( dependencies=[Security(verify_oauth_client, scopes=[MESSAGING_DELETE])], ) def delete_config_by_key( - config_key: FidesOpsKey, *, db: Session = Depends(deps.get_db) + config_key: FidesKey, *, db: Session = Depends(deps.get_db) ) -> None: """ Deletes messaging configs by key. diff --git a/src/fides/api/ops/api/v1/endpoints/policy_endpoints.py b/src/fides/api/ops/api/v1/endpoints/policy_endpoints.py index cd763ebe15..07cdea4999 100644 --- a/src/fides/api/ops/api/v1/endpoints/policy_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/policy_endpoints.py @@ -5,6 +5,7 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.validation import FidesKey from fideslib.exceptions import KeyOrNameAlreadyExists from fideslib.models.client import ClientDetail from pydantic import conlist @@ -27,7 +28,6 @@ from fides.api.ops.models.storage import StorageConfig from fides.api.ops.schemas import policy as schemas from fides.api.ops.schemas.api import BulkUpdateFailed -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.api_router import APIRouter from fides.api.ops.util.logger import Pii from fides.api.ops.util.oauth_util import verify_oauth_client @@ -56,7 +56,7 @@ def get_policy_list( return paginate(policies, params=params) -def get_policy_or_error(db: Session, policy_key: FidesOpsKey) -> Policy: +def get_policy_or_error(db: Session, policy_key: FidesKey) -> Policy: """Helper method to load Policy or throw a 404""" logger.info("Finding policy with key '%s'", policy_key) policy = Policy.get_by(db=db, field="key", value=policy_key) @@ -77,7 +77,7 @@ def get_policy_or_error(db: Session, policy_key: FidesOpsKey) -> Policy: ) def get_policy( *, - policy_key: FidesOpsKey, + policy_key: FidesKey, db: Session = Depends(deps.get_db), ) -> schemas.PolicyResponse: """ @@ -161,7 +161,7 @@ def create_or_update_rules( verify_oauth_client, scopes=[scope_registry.RULE_CREATE_OR_UPDATE], ), - policy_key: FidesOpsKey, + policy_key: FidesKey, db: Session = Depends(deps.get_db), input_data: conlist(schemas.RuleCreate, max_items=50) = Body(...), # type: ignore ) -> schemas.BulkPutRuleResponse: @@ -275,8 +275,8 @@ def create_or_update_rules( ) def delete_rule( *, - policy_key: FidesOpsKey, - rule_key: FidesOpsKey, + policy_key: FidesKey, + rule_key: FidesKey, db: Session = Depends(deps.get_db), ) -> None: """ @@ -309,8 +309,8 @@ def create_or_update_rule_targets( client: ClientDetail = Security( verify_oauth_client, scopes=[scope_registry.RULE_CREATE_OR_UPDATE] ), - policy_key: FidesOpsKey, - rule_key: FidesOpsKey, + policy_key: FidesKey, + rule_key: FidesKey, db: Session = Depends(deps.get_db), input_data: conlist(schemas.RuleTarget, max_items=50) = Body(...), # type: ignore ) -> schemas.BulkPutRuleTargetResponse: @@ -405,9 +405,9 @@ def create_or_update_rule_targets( ) def delete_rule_target( *, - policy_key: FidesOpsKey, - rule_key: FidesOpsKey, - rule_target_key: FidesOpsKey, + policy_key: FidesKey, + rule_key: FidesKey, + rule_target_key: FidesKey, db: Session = Depends(deps.get_db), ) -> None: """ diff --git a/src/fides/api/ops/api/v1/endpoints/policy_webhook_endpoints.py b/src/fides/api/ops/api/v1/endpoints/policy_webhook_endpoints.py index 0f10b1ecbd..8922355bc0 100644 --- a/src/fides/api/ops/api/v1/endpoints/policy_webhook_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/policy_webhook_endpoints.py @@ -5,6 +5,7 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.validation import FidesKey from fideslib.db.base_class import get_key_from_data from fideslib.exceptions import KeyOrNameAlreadyExists from pydantic import conlist @@ -28,7 +29,6 @@ ) from fides.api.ops.schemas import policy_webhooks as schemas from fides.api.ops.schemas.policy_webhooks import PolicyWebhookDeleteResponse -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.api_router import APIRouter from fides.api.ops.util.oauth_util import verify_oauth_client @@ -46,7 +46,7 @@ def get_policy_pre_execution_webhooks( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, + policy_key: FidesKey, params: Params = Depends(), ) -> AbstractPage[PolicyPreWebhook]: """ @@ -72,7 +72,7 @@ def get_policy_pre_execution_webhooks( def get_policy_post_execution_webhooks( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, + policy_key: FidesKey, params: Params = Depends(), ) -> AbstractPage[PolicyPostWebhook]: """ @@ -91,7 +91,7 @@ def get_policy_post_execution_webhooks( def put_webhooks( webhook_cls: WebhookTypes, - policy_key: FidesOpsKey, + policy_key: FidesKey, db: Session = Depends(deps.get_db), webhooks: List[schemas.PolicyWebhookCreate] = Body(...), ) -> List[WebhookTypes]: @@ -176,7 +176,7 @@ def put_webhooks( ) def create_or_update_pre_execution_webhooks( *, - policy_key: FidesOpsKey, + policy_key: FidesKey, db: Session = Depends(deps.get_db), webhooks: conlist(schemas.PolicyWebhookCreate, max_items=50) = Body(...), # type: ignore ) -> List[PolicyPreWebhook]: @@ -199,7 +199,7 @@ def create_or_update_pre_execution_webhooks( ) def create_or_update_post_execution_webhooks( *, - policy_key: FidesOpsKey, + policy_key: FidesKey, db: Session = Depends(deps.get_db), webhooks: conlist(schemas.PolicyWebhookCreate, max_items=50) = Body(...), # type: ignore ) -> List[PolicyPostWebhook]: @@ -215,7 +215,7 @@ def create_or_update_post_execution_webhooks( def get_policy_webhook_or_error( db: Session, policy: Policy, - webhook_key: FidesOpsKey, + webhook_key: FidesKey, webhook_cls: WebhookTypes, ) -> WebhookTypes: """Helper method to load a Pre-Execution or Post-Execution Policy Webhook or 404 @@ -251,8 +251,8 @@ def get_policy_webhook_or_error( def get_policy_pre_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - pre_webhook_key: FidesOpsKey, + policy_key: FidesKey, + pre_webhook_key: FidesKey, ) -> PolicyPreWebhook: """ Loads the given Pre-Execution Webhook on the Policy @@ -270,8 +270,8 @@ def get_policy_pre_execution_webhook( def get_policy_post_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - post_webhook_key: FidesOpsKey, + policy_key: FidesKey, + post_webhook_key: FidesKey, ) -> PolicyPostWebhook: """ Loads the given Post-Execution Webhook on the Policy @@ -283,8 +283,8 @@ def get_policy_post_execution_webhook( def _patch_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - webhook_key: FidesOpsKey, + policy_key: FidesKey, + webhook_key: FidesKey, webhook_body: schemas.PolicyWebhookUpdate = Body(...), webhook_cls: WebhookTypes, ) -> schemas.PolicyWebhookUpdateResponse: @@ -359,8 +359,8 @@ def _patch_webhook( def update_pre_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - pre_webhook_key: FidesOpsKey, + policy_key: FidesKey, + pre_webhook_key: FidesKey, webhook_body: schemas.PolicyWebhookUpdate = Body(...), ) -> schemas.PolicyWebhookUpdateResponse: """PATCH a single Policy Pre-Execution Webhook that runs **prior** to executing the Privacy Request. @@ -387,8 +387,8 @@ def update_pre_execution_webhook( def update_post_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - post_webhook_key: FidesOpsKey, + policy_key: FidesKey, + post_webhook_key: FidesKey, webhook_body: schemas.PolicyWebhookUpdate = Body(...), ) -> schemas.PolicyWebhookUpdateResponse: """PATCH a single Policy Post-Execution Webhook that runs **after** executing the Privacy Request. @@ -407,8 +407,8 @@ def update_post_execution_webhook( def delete_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - webhook_key: FidesOpsKey, + policy_key: FidesKey, + webhook_key: FidesKey, webhook_cls: WebhookTypes, ) -> PolicyWebhookDeleteResponse: """Handles deleting Pre- or Post-Execution Policy Webhooks. Related webhooks are reordered as necessary""" @@ -456,8 +456,8 @@ def delete_webhook( def delete_pre_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - pre_webhook_key: FidesOpsKey, + policy_key: FidesKey, + pre_webhook_key: FidesKey, ) -> schemas.PolicyWebhookDeleteResponse: """Delete the Pre-Execution Webhook from the Policy and reorder remaining webhooks as necessary.""" return delete_webhook( @@ -477,8 +477,8 @@ def delete_pre_execution_webhook( def delete_post_execution_webhook( *, db: Session = Depends(deps.get_db), - policy_key: FidesOpsKey, - post_webhook_key: FidesOpsKey, + policy_key: FidesKey, + post_webhook_key: FidesKey, ) -> schemas.PolicyWebhookDeleteResponse: """Delete the Post-Execution Webhook from the Policy and reorder remaining webhooks as necessary.""" return delete_webhook( diff --git a/src/fides/api/ops/api/v1/endpoints/saas_config_endpoints.py b/src/fides/api/ops/api/v1/endpoints/saas_config_endpoints.py index fdd54d9b12..6e41d73884 100644 --- a/src/fides/api/ops/api/v1/endpoints/saas_config_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/saas_config_endpoints.py @@ -3,6 +3,7 @@ from fastapi import Depends, HTTPException from fastapi.params import Security +from fideslang.validation import FidesKey from fideslib.exceptions import KeyOrNameAlreadyExists from sqlalchemy.orm import Session from starlette.status import ( @@ -43,7 +44,6 @@ SaaSConfigValidationDetails, ValidateSaaSConfigResponse, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.service.authentication.authentication_strategy import ( AuthenticationStrategy, ) @@ -66,7 +66,7 @@ # Helper method to inject the parent ConnectionConfig into these child routes def _get_saas_connection_config( - connection_key: FidesOpsKey, db: Session = Depends(deps.get_db) + connection_key: FidesKey, db: Session = Depends(deps.get_db) ) -> ConnectionConfig: logger.info("Finding connection config with key '%s'", connection_key) connection_config = ConnectionConfig.get_by(db, field="key", value=connection_key) diff --git a/src/fides/api/ops/api/v1/endpoints/storage_endpoints.py b/src/fides/api/ops/api/v1/endpoints/storage_endpoints.py index 2e8f1b4eee..1f1cd1fd66 100644 --- a/src/fides/api/ops/api/v1/endpoints/storage_endpoints.py +++ b/src/fides/api/ops/api/v1/endpoints/storage_endpoints.py @@ -5,6 +5,7 @@ from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage from fastapi_pagination.ext.sqlalchemy import paginate +from fideslang.validation import FidesKey from fideslib.exceptions import KeyOrNameAlreadyExists from pydantic import conlist from requests import RequestException @@ -40,7 +41,6 @@ from fides.api.ops.schemas.connection_configuration.connection_secrets import ( TestStatusMessage, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.schemas.storage.data_upload_location_response import DataUpload from fides.api.ops.schemas.storage.storage import ( BulkPutStorageConfigResponse, @@ -73,7 +73,7 @@ def upload_data( *, db: Session = Depends(deps.get_db), data: Dict = Body(...), - storage_key: FidesOpsKey = Body(...), + storage_key: FidesKey = Body(...), ) -> DataUpload: """ Uploads data from an access request to specified storage destination. @@ -163,7 +163,7 @@ def patch_config( response_model=TestStatusMessage, ) def put_config_secrets( - config_key: FidesOpsKey, + config_key: FidesKey, *, db: Session = Depends(deps.get_db), unvalidated_storage_secrets: possible_storage_secrets, @@ -250,7 +250,7 @@ def get_configs( response_model=StorageDestinationResponse, ) def get_config_by_key( - config_key: FidesOpsKey, *, db: Session = Depends(deps.get_db) + config_key: FidesKey, *, db: Session = Depends(deps.get_db) ) -> Optional[StorageConfig]: """ Retrieves configs for storage by key. @@ -272,7 +272,7 @@ def get_config_by_key( dependencies=[Security(verify_oauth_client, scopes=[STORAGE_DELETE])], ) def delete_config_by_key( - config_key: FidesOpsKey, *, db: Session = Depends(deps.get_db) + config_key: FidesKey, *, db: Session = Depends(deps.get_db) ) -> None: """ Deletes configs by key. diff --git a/src/fides/api/ops/graph/config.py b/src/fides/api/ops/graph/config.py index 5e77e69b44..2fe6aed445 100644 --- a/src/fides/api/ops/graph/config.py +++ b/src/fides/api/ops/graph/config.py @@ -82,6 +82,7 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, List, Literal, Optional, Set, Tuple +from fideslang.validation import FidesKey from pydantic import BaseModel, validator from fides.api.ops.common_exceptions import FidesopsException @@ -90,7 +91,6 @@ DataTypeConverter, get_data_type_converter, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.collection_util import merge_dicts from fides.api.ops.util.querytoken import QueryToken @@ -239,7 +239,7 @@ class Field(BaseModel, ABC): """references to other fields in any other datasets""" identity: Optional[SeedAddress] = None """an optional pointer to an arbitrary key in an expected json package provided as a seed value""" - data_categories: Optional[List[FidesOpsKey]] + data_categories: Optional[List[FidesKey]] data_type_converter: DataTypeConverter = DataType.no_op.value return_all_elements: Optional[bool] = None # Should field be returned by query if it is in an entrypoint array field, or just if it matches query? @@ -308,8 +308,8 @@ class ObjectField(Field): @validator("data_categories") @classmethod def validate_data_categories( - cls, value: Optional[List[FidesOpsKey]] - ) -> Optional[List[FidesOpsKey]]: + cls, value: Optional[List[FidesKey]] + ) -> Optional[List[FidesKey]]: """To prevent mismatches between data categories on an ObjectField and a nested ScalarField, only allow data categories to be defined on the individual fields. @@ -365,7 +365,7 @@ def __eq__(self, other: object) -> bool: # pylint: disable=too-many-arguments def generate_field( name: str, - data_categories: Optional[List[str]], + data_categories: Optional[List[FidesKey]], identity: Optional[str], data_type_name: str, references: List[Tuple[FieldAddress, Optional[EdgeDirection]]], @@ -466,7 +466,7 @@ def field(self, field_path: FieldPath) -> Optional[Field]: return self.field_dict[field_path] if field_path in self.field_dict else None @property - def field_paths_by_category(self) -> Dict[FidesOpsKey, List[FieldPath]]: + def field_paths_by_category(self) -> Dict[FidesKey, List[FieldPath]]: """Returns mapping of data categories to a list of FieldPaths, flips FieldPaths -> categories to be categories -> FieldPaths. @@ -491,7 +491,7 @@ class Config: arbitrary_types_allowed = True -class Dataset(BaseModel): +class GraphDataset(BaseModel): """Master collection of collections that are accessed in a common way""" name: str @@ -499,4 +499,4 @@ class Dataset(BaseModel): # an optional list of datasets that this dataset must run after after: Set[DatasetAddress] = set() # ConnectionConfig key - connection_key: FidesOpsKey + connection_key: FidesKey diff --git a/src/fides/api/ops/graph/graph.py b/src/fides/api/ops/graph/graph.py index 09e2e6fdd3..e96df7fc9b 100644 --- a/src/fides/api/ops/graph/graph.py +++ b/src/fides/api/ops/graph/graph.py @@ -4,18 +4,19 @@ from collections import defaultdict from typing import Callable, Dict, List, Optional, Set, Tuple +from fideslang.validation import FidesKey + from fides.api.ops.common_exceptions import ValidationError from fides.api.ops.graph.config import ( Collection, CollectionAddress, - Dataset, EdgeDirection, Field, FieldAddress, FieldPath, + GraphDataset, SeedAddress, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey logger = logging.getLogger(__name__) @@ -30,7 +31,7 @@ class Node: Node children are any nodes that are reachable via this traversal_node. """ - def __init__(self, dataset: Dataset, collection: Collection): + def __init__(self, dataset: GraphDataset, collection: Collection): self.address = CollectionAddress(dataset.name, collection.name) self.dataset = dataset self.collection = collection @@ -182,7 +183,7 @@ class DatasetGraph: (or nodes) represent the start nodes. """ - def __init__(self, *datasets: Dataset) -> None: + def __init__(self, *datasets: GraphDataset) -> None: """We create all edges based on field specifications. We also add child references to nodes. Note that this means that this is a destructive operation on the input datasets, as it @@ -231,7 +232,7 @@ def __init__(self, *datasets: Dataset) -> None: @property def data_category_field_mapping( self, - ) -> Dict[CollectionAddress, Dict[FidesOpsKey, List[FieldPath]]]: + ) -> Dict[CollectionAddress, Dict[FidesKey, List[FieldPath]]]: """ Maps the data_categories for each traversal_node to a list of field paths that have that same data category. @@ -248,9 +249,9 @@ def data_category_field_mapping( } """ - mapping: Dict[ - CollectionAddress, Dict[FidesOpsKey, List[FieldPath]] - ] = defaultdict(lambda: defaultdict(list)) + mapping: Dict[CollectionAddress, Dict[FidesKey, List[FieldPath]]] = defaultdict( + lambda: defaultdict(list) + ) for node_address, node in self.nodes.items(): mapping[node_address] = node.collection.field_paths_by_category return mapping diff --git a/src/fides/api/ops/graph/traversal.py b/src/fides/api/ops/graph/traversal.py index baa732f218..d0c760b791 100644 --- a/src/fides/api/ops/graph/traversal.py +++ b/src/fides/api/ops/graph/traversal.py @@ -10,10 +10,10 @@ ROOT_COLLECTION_ADDRESS, Collection, CollectionAddress, - Dataset, Field, FieldAddress, FieldPath, + GraphDataset, ) from fides.api.ops.graph.graph import DatasetGraph, Edge, Node from fides.api.ops.util.collection_util import Row, append @@ -172,7 +172,10 @@ def artificial_traversal_node(address: CollectionAddress) -> TraversalNode: have no actual corresponding collection dataset""" ds: Collection = Collection(name=address.collection, fields=[]) node = Node( - Dataset(name=address.dataset, collections=[ds], connection_key="__IGNORE__"), ds + GraphDataset( + name=address.dataset, collections=[ds], connection_key="__IGNORE__" + ), + ds, ) return TraversalNode(node) diff --git a/src/fides/api/ops/models/datasetconfig.py b/src/fides/api/ops/models/datasetconfig.py index 7f06e1a4ad..ea230d496a 100644 --- a/src/fides/api/ops/models/datasetconfig.py +++ b/src/fides/api/ops/models/datasetconfig.py @@ -1,6 +1,8 @@ import logging from typing import Any, Dict, Optional, Set +from fideslang.models import Dataset, DatasetField, FidesDatasetReference +from fideslang.validation import FidesKey from fideslib.db.base_class import Base from sqlalchemy import Column, ForeignKey, String from sqlalchemy.dialects.postgresql import JSONB @@ -11,20 +13,14 @@ from fides.api.ops.graph.config import ( Collection, CollectionAddress, - Dataset, Field, FieldAddress, FieldPath, + GraphDataset, generate_field, ) from fides.api.ops.graph.data_type import parse_data_type_string from fides.api.ops.models.connectionconfig import ConnectionConfig, ConnectionType -from fides.api.ops.schemas.dataset import ( - FidesopsDataset, - FidesopsDatasetField, - FidesopsDatasetReference, -) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.saas_util import merge_datasets logger = logging.getLogger(__name__) @@ -73,7 +69,7 @@ def create_or_update(cls, db: Session, *, data: Dict[str, Any]) -> "DatasetConfi return dataset - def get_graph(self) -> Dataset: + def get_graph(self) -> GraphDataset: """ Return the saved dataset JSON as a dataset graph for query execution. @@ -81,7 +77,7 @@ def get_graph(self) -> Dataset: the corresponding SaaS config is merged in as well """ dataset_graph = convert_dataset_to_graph( - FidesopsDataset(**self.dataset), self.connection_config.key # type: ignore + Dataset(**self.dataset), self.connection_config.key # type: ignore ) if ( self.connection_config.connection_type == ConnectionType.saas @@ -102,7 +98,7 @@ def get_graph(self) -> Dataset: def to_graph_field( - field: FidesopsDatasetField, return_all_elements: Optional[bool] = None + field: DatasetField, return_all_elements: Optional[bool] = None ) -> Field: """Flattens the dataset field type into its graph representation""" @@ -114,7 +110,7 @@ def to_graph_field( is_pk = False is_array = False references = [] - meta_section = field.fidesops_meta + meta_section = field.fides_meta sub_fields = [] length = None data_type_name = None @@ -182,8 +178,8 @@ def to_graph_field( def convert_dataset_to_graph( - dataset: FidesopsDataset, connection_key: FidesOpsKey -) -> Dataset: + dataset: Dataset, connection_key: FidesKey +) -> GraphDataset: """ Converts the given Fides dataset dataset into the concrete graph representation needed for query execution @@ -191,8 +187,8 @@ def convert_dataset_to_graph( dataset_name = dataset.fides_key after = set() - if dataset.fidesops_meta and dataset.fidesops_meta.after: - after = set(dataset.fidesops_meta.after) + if dataset.fides_meta and dataset.fides_meta.after: + after = set(dataset.fides_meta.after) logger.debug("Parsing dataset '%s' into graph representation", dataset_name) graph_collections = [] for collection in dataset.collections: @@ -204,9 +200,9 @@ def convert_dataset_to_graph( len(graph_fields), ) collection_after: Set[CollectionAddress] = set() - if collection.fidesops_meta and collection.fidesops_meta.after: + if collection.fides_meta and collection.fides_meta.after: collection_after = { - CollectionAddress(*s.split(".")) for s in collection.fidesops_meta.after + CollectionAddress(*s.split(".")) for s in collection.fides_meta.after } graph_collection = Collection( @@ -219,7 +215,7 @@ def convert_dataset_to_graph( len(graph_collections), ) - return Dataset( + return GraphDataset( name=dataset_name, collections=graph_collections, connection_key=connection_key, @@ -228,10 +224,10 @@ def convert_dataset_to_graph( def validate_dataset_reference( - db: Session, dataset_reference: FidesopsDatasetReference + db: Session, dataset_reference: FidesDatasetReference ) -> None: """ - Validates that the provided FidesopsDatasetReference refers + Validates that the provided FidesDatasetReference refers to a `Dataset`, `Collection` and `Field` that actually exist in the DB. Raises a `ValidationError` if not. """ @@ -244,8 +240,8 @@ def validate_dataset_reference( raise ValidationError( f"Unknown dataset '{dataset_reference.dataset}' referenced by external reference" ) - dataset: Dataset = convert_dataset_to_graph( - FidesopsDataset(**dataset_config.dataset), dataset_config.fides_key # type: ignore[arg-type] + dataset: GraphDataset = convert_dataset_to_graph( + Dataset(**dataset_config.dataset), dataset_config.fides_key # type: ignore[arg-type] ) collection_name, *field_name = dataset_reference.field.split(".") if not field_name or not collection_name or not field_name[0]: diff --git a/src/fides/api/ops/models/policy.py b/src/fides/api/ops/models/policy.py index 1aba0b73f2..0830f2fc90 100644 --- a/src/fides/api/ops/models/policy.py +++ b/src/fides/api/ops/models/policy.py @@ -4,6 +4,7 @@ from fideslang import DEFAULT_TAXONOMY from fideslang.models import DataCategory as FideslangDataCategory +from fideslang.validation import FidesKey from fideslib.db.base_class import Base, FidesBase from fideslib.models.client import ClientDetail from sqlalchemy import Column @@ -21,7 +22,6 @@ from fides.api.ops.db.base_class import JSONTypeOverride from fides.api.ops.models.connectionconfig import ConnectionConfig from fides.api.ops.models.storage import StorageConfig -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.data_category import _validate_data_category from fides.ctl.core.config import get_config @@ -152,7 +152,7 @@ def get_rules_for_action(self, action_type: ActionType) -> List["Rule"]: return [rule for rule in self.rules if rule.action_type == action_type] -def _get_ref_from_taxonomy(fides_key: FidesOpsKey) -> FideslangDataCategory: +def _get_ref_from_taxonomy(fides_key: FidesKey) -> FideslangDataCategory: """Returns the DataCategory model from the DEFAULT_TAXONOMY corresponding to fides_key.""" for item in DEFAULT_TAXONOMY.data_category: if item.fides_key == fides_key: @@ -164,7 +164,7 @@ def _get_ref_from_taxonomy(fides_key: FidesOpsKey) -> FideslangDataCategory: def _is_ancestor_of_contained_categories( - fides_key: FidesOpsKey, + fides_key: FidesKey, data_categories: List[str], ) -> Tuple[bool, Optional[str]]: """ diff --git a/src/fides/api/ops/schemas/connection_configuration/connection_config.py b/src/fides/api/ops/schemas/connection_configuration/connection_config.py index 8beaefdd53..dfbfc84cba 100644 --- a/src/fides/api/ops/schemas/connection_configuration/connection_config.py +++ b/src/fides/api/ops/schemas/connection_configuration/connection_config.py @@ -2,14 +2,14 @@ from enum import Enum from typing import List, Optional, Union +from fideslang.models import Dataset +from fideslang.validation import FidesKey from pydantic import BaseModel, Extra from fides.api.ops.models.connectionconfig import AccessLevel, ConnectionType from fides.api.ops.schemas.api import BulkResponse, BulkUpdateFailed from fides.api.ops.schemas.connection_configuration import connection_secrets_schemas -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.schemas.saas.saas_config import SaaSConfigBase -from fides.api.ops.schemas.shared_schemas import FidesOpsKey class CreateConnectionConfiguration(BaseModel): @@ -20,7 +20,7 @@ class CreateConnectionConfiguration(BaseModel): """ name: str - key: Optional[FidesOpsKey] + key: Optional[FidesKey] connection_type: ConnectionType access: AccessLevel disabled: Optional[bool] = False @@ -80,7 +80,7 @@ class ConnectionConfigurationResponse(BaseModel): """ name: str - key: FidesOpsKey + key: FidesKey description: Optional[str] connection_type: ConnectionType access: AccessLevel @@ -108,12 +108,12 @@ class SaasConnectionTemplateValues(BaseModel): """Schema with values to create both a Saas ConnectionConfig and DatasetConfig from a template""" name: str # For ConnectionConfig - key: Optional[FidesOpsKey] # For ConnectionConfig + key: Optional[FidesKey] # For ConnectionConfig description: Optional[str] # For ConnectionConfig secrets: connection_secrets_schemas # For ConnectionConfig - instance_key: FidesOpsKey # For DatasetConfig.fides_key + instance_key: FidesKey # For DatasetConfig.fides_key class SaasConnectionTemplateResponse(BaseModel): connection: ConnectionConfigurationResponse - dataset: FidesopsDataset + dataset: Dataset diff --git a/src/fides/api/ops/schemas/connection_configuration/connection_secrets_saas.py b/src/fides/api/ops/schemas/connection_configuration/connection_secrets_saas.py index 2b6d16fda1..b8c26f3929 100644 --- a/src/fides/api/ops/schemas/connection_configuration/connection_secrets_saas.py +++ b/src/fides/api/ops/schemas/connection_configuration/connection_secrets_saas.py @@ -1,6 +1,7 @@ import abc from typing import Any, Dict, List, Type +from fideslang import FidesDatasetReference from pydantic import BaseModel, Extra, Field, PrivateAttr, create_model, root_validator from pydantic.fields import FieldInfo from sqlalchemy.orm import Session @@ -10,7 +11,6 @@ from fides.api.ops.schemas.connection_configuration.connection_secrets import ( ConnectionConfigSecretsSchema, ) -from fides.api.ops.schemas.dataset import FidesopsDatasetReference from fides.api.ops.schemas.saas.saas_config import SaaSConfig @@ -119,7 +119,7 @@ def get_saas_schema(self) -> Type[SaaSSchema]: if self.saas_config.external_references: for external_reference in self.saas_config.external_references: field_definitions[external_reference.name] = ( - FidesopsDatasetReference, + FidesDatasetReference, FieldInfo( title=external_reference.label, description=external_reference.description, @@ -163,7 +163,7 @@ def validate_saas_secrets_external_references( ) -> None: external_references = schema.external_references() for external_reference in external_references: - dataset_reference: FidesopsDatasetReference = getattr( + dataset_reference: FidesDatasetReference = getattr( connection_secrets, external_reference ) if dataset_reference.direction == "to": diff --git a/src/fides/api/ops/schemas/dataset.py b/src/fides/api/ops/schemas/dataset.py index 5c0fe53870..6f72803ea6 100644 --- a/src/fides/api/ops/schemas/dataset.py +++ b/src/fides/api/ops/schemas/dataset.py @@ -1,219 +1,81 @@ -from typing import Any, Dict, List, Optional +from typing import Any, List, Optional -from fideslang.models import Dataset, DatasetCollection, DatasetFieldBase -from pydantic import BaseModel, ConstrainedStr, Field, validator +from fideslang.models import Dataset, DatasetCollection, DatasetField +from fideslang.validation import FidesKey +from loguru import logger +from pydantic import BaseModel, validator +from sqlalchemy.orm import Session -from fides.api.ops.common_exceptions import ( - InvalidDataLengthValidationError, - InvalidDataTypeValidationError, -) -from fides.api.ops.graph.config import EdgeDirection -from fides.api.ops.graph.data_type import is_valid_data_type, parse_data_type_string +from fides.api.ctl.sql_models import DataCategory # type: ignore[attr-defined] +from fides.api.ops import common_exceptions from fides.api.ops.schemas.api import BulkResponse, BulkUpdateFailed from fides.api.ops.schemas.base_class import BaseSchema -from fides.api.ops.schemas.shared_schemas import FidesOpsKey -from fides.api.ops.util.data_category import _validate_data_category +from fides.api.ops.util.data_category import ( + DataCategory as DefaultTaxonomyDataCategories, +) -def _valid_data_categories( - data_categories: Optional[List[FidesOpsKey]], -) -> Optional[List[FidesOpsKey]]: +def validate_data_categories_against_db(dataset: Dataset, db: Session) -> None: """ - Ensure that every data category provided matches a valid category defined in - the current taxonomy. Throws an error if any of the categories are invalid, - or otherwise returns the list of categories unchanged. - """ - - if data_categories: - return [dc for dc in data_categories if _validate_data_category(dc)] - return data_categories - - -def _valid_data_type(data_type_str: Optional[str]) -> Optional[str]: - """If the data_type is provided ensure that it is a member of DataType.""" - - dt, _ = parse_data_type_string(data_type_str) - if not is_valid_data_type(dt): # type: ignore - raise InvalidDataTypeValidationError( - f"The data type {data_type_str} is not supported." - ) - - return data_type_str - - -def _valid_data_length(data_length: Optional[int]) -> Optional[int]: - """If the data_length is provided ensure that it is a positive non-zero value.""" + Validate that data_categories defined on the Dataset, Collection, and Field levels exist + in the database. Doing this instead of a traditional validator function to have + access to a database session. - if data_length is not None and data_length <= 0: - raise InvalidDataLengthValidationError( - f"Illegal length ({data_length}). Only positive non-zero values are allowed." - ) - - return data_length - - -class FidesCollectionKey(ConstrainedStr): - """ - Dataset:Collection name where both dataset and collection names are valid FidesKeys + If no data categories in the database, default to using data categories from the default taxonomy. """ - - @classmethod - def validate(cls, value: str) -> str: - """ - Overrides validation to check FidesCollectionKey format, and that both the dataset - and collection names have the FidesKey format. - """ - values = value.split(".") - if len(values) == 2: - FidesOpsKey.validate(values[0]) - FidesOpsKey.validate(values[1]) - return value - raise ValueError( - "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + defined_data_categories: List[FidesKey] = [ + cat[0] for cat in db.query(DataCategory.fides_key).all() + ] + if not defined_data_categories: + logger.info( + "No data categories in the database: reverting to default data categories." ) + defined_data_categories = list(DefaultTaxonomyDataCategories.__members__.keys()) + class DataCategoryValidationMixin(BaseModel): + @validator("data_categories", check_fields=False, allow_reuse=True) + def valid_data_categories( + cls, v: Optional[List[FidesKey]] + ) -> Optional[List[FidesKey]]: + """Validate that all annotated data categories exist in the taxonomy""" + return _valid_data_categories(v, defined_data_categories) -# NOTE: this extends pydantic.BaseModel instead of our BaseSchema, for -# consistency with other fideslang models -class FidesopsDatasetReference(BaseModel): - """Reference to a field from another Collection""" - - dataset: FidesOpsKey - field: str - direction: Optional[EdgeDirection] - - -class FidesopsDatasetMeta(BaseModel): - """ "Dataset-level fidesops-specific annotations used for query traversal""" - - after: Optional[List[FidesOpsKey]] - - -class FidesopsCollectionMeta(BaseModel): - """Collection-level fidesops-specific annotations used for query traversal""" + class FieldDataCategoryValidation(DatasetField, DataCategoryValidationMixin): + fields: Optional[List["FieldDataCategoryValidation"]] - after: Optional[List[FidesCollectionKey]] + FieldDataCategoryValidation.update_forward_refs() + class CollectionDataCategoryValidation( + DatasetCollection, DataCategoryValidationMixin + ): + fields: List[FieldDataCategoryValidation] = [] -class FidesopsMeta(BaseModel): - """Fidesops-specific annotations used for query traversal""" + class DatasetDataCategoryValidation(Dataset, DataCategoryValidationMixin): + collections: List[CollectionDataCategoryValidation] - references: Optional[List[FidesopsDatasetReference]] - identity: Optional[str] - primary_key: Optional[bool] - data_type: Optional[str] - """Optionally specify the data type. Fidesops will attempt to cast values to this type when querying.""" - length: Optional[int] - """Optionally specify the allowable field length. Fidesops will not generate values that exceed this size.""" - return_all_elements: Optional[bool] - """Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False.""" - read_only: Optional[bool] - """Optionally specify if a field is read-only, meaning it can't be updated or deleted.""" + DatasetDataCategoryValidation(**dataset.dict()) - @validator("data_type") - def valid_data_type(cls, v: Optional[str]) -> Optional[str]: - """Validate that all annotated data categories exist in the taxonomy""" - return _valid_data_type(v) - @validator("length") - def valid_length(cls, v: Optional[int]) -> Optional[int]: - """Validate that the provided length is valid""" - return _valid_data_length(v) - - -class FidesopsDatasetField(DatasetFieldBase): - """Extends fideslang DatasetField model with additional Fidesops annotations""" - - fidesops_meta: Optional[FidesopsMeta] - fields: Optional[List["FidesopsDatasetField"]] = [] - - @validator("data_categories") - def valid_data_categories( - cls, v: Optional[List[FidesOpsKey]] - ) -> Optional[List[FidesOpsKey]]: - """Validate that all annotated data categories exist in the taxonomy""" - return _valid_data_categories(v) - - @validator("fidesops_meta") - def valid_meta(cls, meta_values: Optional[FidesopsMeta]) -> Optional[FidesopsMeta]: - """Validate upfront that the return_all_elements flag can only be specified on array fields""" - if not meta_values: - return meta_values - - is_array: bool = bool( - meta_values.data_type and meta_values.data_type.endswith("[]") - ) - if not is_array and meta_values.return_all_elements is not None: - raise ValueError( - "The 'return_all_elements' attribute can only be specified on array fields." - ) - return meta_values - - @validator("fields") - def validate_object_fields( - cls, - fields: Optional[List["FidesopsDatasetField"]], - values: Dict[str, Any], - ) -> Optional[List["FidesopsDatasetField"]]: - """Two validation checks for object fields: - - If there are sub-fields specified, type should be either empty or 'object' - - Additionally object fields cannot have data_categories. - """ - declared_data_type = None - - if values.get("fidesops_meta"): - declared_data_type = values["fidesops_meta"].data_type - - if fields and declared_data_type: - data_type, _ = parse_data_type_string(declared_data_type) - if data_type != "object": - raise InvalidDataTypeValidationError( - f"The data type {data_type} is not compatible with specified sub-fields." - ) +def _valid_data_categories( + proposed_data_categories: Optional[List[FidesKey]], + defined_data_categories: List[FidesKey], +) -> Optional[List[FidesKey]]: + """ + Ensure that every data category provided matches a valid defined data category. + Throws an error if any of the categories are invalid, + or otherwise returns the list of categories unchanged. + """ - if (fields or declared_data_type == "object") and values.get("data_categories"): - raise ValueError( - "Object fields cannot have specified data_categories. Specify category on sub-field instead" + def validate_category(data_category: FidesKey) -> FidesKey: + if data_category not in defined_data_categories: + raise common_exceptions.DataCategoryNotSupported( + f"The data category {data_category} is not supported." ) + return data_category - return fields - - -# this is required for the recursive reference in the pydantic model: -FidesopsDatasetField.update_forward_refs() - - -class FidesopsDatasetCollection(DatasetCollection): - """Overrides fideslang DatasetCollection model with additional Fidesops annotations""" - - fidesops_meta: Optional[FidesopsCollectionMeta] - fields: List[FidesopsDatasetField] - """Overrides fideslang.models.DatasetCollection.fields""" - - @validator("data_categories") - def valid_data_categories( - cls, v: Optional[List[FidesOpsKey]] - ) -> Optional[List[FidesOpsKey]]: - """Validate that all annotated data categories exist in the taxonomy""" - return _valid_data_categories(v) - - -class FidesopsDataset(Dataset): - """Overrides fideslang Collection model with additional Fidesops annotations""" - - fides_key: FidesOpsKey = Field( - description="A unique key used to identify this resource." - ) - fidesops_meta: Optional[FidesopsDatasetMeta] - collections: List[FidesopsDatasetCollection] - """Overrides fideslang.models.Collection.collections""" - - @validator("data_categories") - def valid_data_categories( - cls, v: Optional[List[FidesOpsKey]] - ) -> Optional[List[FidesOpsKey]]: - """Validate that all annotated data categories exist in the taxonomy""" - return _valid_data_categories(v) + if proposed_data_categories: + return [dc for dc in proposed_data_categories if validate_category(dc)] + return proposed_data_categories class DatasetTraversalDetails(BaseSchema): @@ -233,14 +95,14 @@ class ValidateDatasetResponse(BaseSchema): traversable or not. """ - dataset: FidesopsDataset + dataset: Dataset traversal_details: DatasetTraversalDetails class BulkPutDataset(BulkResponse): """Schema with mixed success/failure responses for Bulk Create/Update of Datasets.""" - succeeded: List[FidesopsDataset] + succeeded: List[Dataset] failed: List[BulkUpdateFailed] diff --git a/src/fides/api/ops/schemas/messaging/messaging.py b/src/fides/api/ops/schemas/messaging/messaging.py index 5df7bf87a2..0a9608d5e4 100644 --- a/src/fides/api/ops/schemas/messaging/messaging.py +++ b/src/fides/api/ops/schemas/messaging/messaging.py @@ -2,11 +2,11 @@ from re import compile as regex from typing import Any, Dict, List, Optional, Tuple, Union +from fideslang.validation import FidesKey from pydantic import BaseModel, Extra, root_validator from fides.api.ops.models.privacy_request import CheckpointActionRequired from fides.api.ops.schemas import Msg -from fides.api.ops.schemas.shared_schemas import FidesOpsKey class MessagingMethod(Enum): @@ -203,7 +203,7 @@ class MessagingConfigRequest(BaseModel): """Messaging Config Request Schema""" name: str - key: Optional[FidesOpsKey] + key: Optional[FidesKey] service_type: MessagingServiceType details: Optional[MessagingServiceDetailsMailgun] @@ -228,7 +228,7 @@ class MessagingConfigResponse(BaseModel): """Messaging Config Response Schema""" name: str - key: FidesOpsKey + key: FidesKey service_type: MessagingServiceType details: Optional[Dict[MessagingServiceDetails, Any]] diff --git a/src/fides/api/ops/schemas/policy.py b/src/fides/api/ops/schemas/policy.py index 2084302315..8ce7c999ea 100644 --- a/src/fides/api/ops/schemas/policy.py +++ b/src/fides/api/ops/schemas/policy.py @@ -1,9 +1,10 @@ from typing import Any, Dict, List, Optional +from fideslang.validation import FidesKey + from fides.api.ops.models.policy import ActionType, DrpAction from fides.api.ops.schemas.api import BulkResponse, BulkUpdateFailed from fides.api.ops.schemas.base_class import BaseSchema -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.schemas.storage.storage import StorageDestinationResponse from fides.api.ops.util.data_category import DataCategory @@ -29,7 +30,7 @@ class RuleTarget(BaseSchema): """An external representation of a Rule's target DataCategory within a Fidesops Policy""" name: Optional[str] - key: Optional[FidesOpsKey] + key: Optional[FidesKey] data_category: DataCategory # type: ignore class Config: @@ -42,7 +43,7 @@ class RuleBase(BaseSchema): """An external representation of a Rule within a Fidesops Policy""" name: str - key: Optional[FidesOpsKey] + key: Optional[FidesKey] action_type: ActionType class Config: @@ -57,7 +58,7 @@ class RuleCreate(RuleBase): over a composite object. """ - storage_destination_key: Optional[FidesOpsKey] + storage_destination_key: Optional[FidesKey] masking_strategy: Optional[PolicyMaskingSpec] @@ -82,7 +83,7 @@ class Policy(BaseSchema): """An external representation of a Fidesops Policy""" name: str - key: Optional[FidesOpsKey] + key: Optional[FidesKey] drp_action: Optional[DrpAction] execution_timeframe: Optional[int] diff --git a/src/fides/api/ops/schemas/policy_webhooks.py b/src/fides/api/ops/schemas/policy_webhooks.py index 9c6ed080db..2fa8f8ed6a 100644 --- a/src/fides/api/ops/schemas/policy_webhooks.py +++ b/src/fides/api/ops/schemas/policy_webhooks.py @@ -1,25 +1,26 @@ from typing import List, Optional +from fideslang.validation import FidesKey + from fides.api.ops.models.policy import WebhookDirection from fides.api.ops.schemas.base_class import BaseSchema from fides.api.ops.schemas.connection_configuration.connection_config import ( ConnectionConfigurationResponse, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey class WebhookBase(BaseSchema): """Base schema for Webhooks""" direction: WebhookDirection - key: Optional[FidesOpsKey] + key: Optional[FidesKey] name: Optional[str] class PolicyWebhookCreate(WebhookBase): """Request schema for creating/updating a Policy Webhook""" - connection_config_key: FidesOpsKey + connection_config_key: FidesKey class Config: """Populate models with the raw value of enum fields, rather than the enum itself""" @@ -44,7 +45,7 @@ class PolicyWebhookUpdate(BaseSchema): direction: Optional[WebhookDirection] name: Optional[str] - connection_config_key: Optional[FidesOpsKey] + connection_config_key: Optional[FidesKey] order: Optional[int] class Config: @@ -58,7 +59,7 @@ class Config: class WebhookOrder(BaseSchema): """Schema for displaying a minimal amount of information about the webhook and its order""" - key: FidesOpsKey + key: FidesKey order: int class Config: diff --git a/src/fides/api/ops/schemas/privacy_request.py b/src/fides/api/ops/schemas/privacy_request.py index 8cca6dd2da..bdef98ada8 100644 --- a/src/fides/api/ops/schemas/privacy_request.py +++ b/src/fides/api/ops/schemas/privacy_request.py @@ -2,6 +2,7 @@ from enum import Enum as EnumType from typing import Any, Dict, List, Optional, Union +from fideslang.validation import FidesKey from fideslib.models.audit_log import AuditLogAction from fideslib.oauth.schemas.user import PrivacyRequestReviewer from pydantic import Field, validator @@ -16,7 +17,6 @@ from fides.api.ops.schemas.base_class import BaseSchema from fides.api.ops.schemas.policy import PolicyResponse as PolicySchema from fides.api.ops.schemas.redis_cache import Identity -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.encryption.aes_gcm_encryption_scheme import ( verify_encryption_key, ) @@ -62,7 +62,7 @@ class PrivacyRequestCreate(BaseSchema): finished_processing_at: Optional[datetime] requested_at: Optional[datetime] identity: Identity - policy_key: FidesOpsKey + policy_key: FidesKey encryption_key: Optional[str] = None @validator("encryption_key") diff --git a/src/fides/api/ops/schemas/saas/saas_config.py b/src/fides/api/ops/schemas/saas/saas_config.py index 9315d9f7a3..2b988c3740 100644 --- a/src/fides/api/ops/schemas/saas/saas_config.py +++ b/src/fides/api/ops/schemas/saas/saas_config.py @@ -1,21 +1,21 @@ from typing import Any, Dict, List, Optional, Set, Union +from fideslang.models import FidesCollectionKey, FidesDatasetReference +from fideslang.validation import FidesKey from pydantic import BaseModel, Extra, root_validator, validator from fides.api.ops.common_exceptions import ValidationError from fides.api.ops.graph.config import ( Collection, CollectionAddress, - Dataset, Field, FieldAddress, + GraphDataset, ScalarField, ) from fides.api.ops.schemas.base_class import BaseSchema -from fides.api.ops.schemas.dataset import FidesCollectionKey, FidesopsDatasetReference from fides.api.ops.schemas.limiter.rate_limit_config import RateLimitConfig from fides.api.ops.schemas.saas.shared_schemas import HTTPMethod -from fides.api.ops.schemas.shared_schemas import FidesOpsKey class ParamValue(BaseModel): @@ -26,17 +26,17 @@ class ParamValue(BaseModel): name: str identity: Optional[str] - references: Optional[List[Union[FidesopsDatasetReference, str]]] + references: Optional[List[Union[FidesDatasetReference, str]]] connector_param: Optional[str] unpack: Optional[bool] = False @validator("references") def check_reference_direction( - cls, references: Optional[List[Union[FidesopsDatasetReference, str]]] - ) -> Optional[List[Union[FidesopsDatasetReference, str]]]: + cls, references: Optional[List[Union[FidesDatasetReference, str]]] + ) -> Optional[List[Union[FidesDatasetReference, str]]]: """Validates the request_param only contains inbound references""" for reference in references or {}: - if isinstance(reference, FidesopsDatasetReference): + if isinstance(reference, FidesDatasetReference): if reference.direction == "to": raise ValueError( "References can only have a direction of 'from', found 'to'" @@ -158,8 +158,8 @@ def validate_grouped_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]: # reference may be a str, in which case it's an external reference. # since external references are parameterized via secrets, # they cannot be resolved and checked at this point in the validation. - # so here we only perform the check if the reference is a FidesopsDatasetReference - if isinstance(param.references[0], FidesopsDatasetReference): + # so here we only perform the check if the reference is a FidesDatasetReference + if isinstance(param.references[0], FidesDatasetReference): collect = param.references[0].field.split(".")[0] referenced_collections.append(collect) else: @@ -212,7 +212,7 @@ class SaaSRequestMap(BaseModel): class Endpoint(BaseModel): - """A collection of read/update/delete requests which corresponds to a FidesopsDataset collection (by name)""" + """A collection of read/update/delete requests which corresponds to a FidesDataset collection (by name)""" name: str requests: SaaSRequestMap @@ -294,12 +294,12 @@ class SaaSConfigBase(BaseModel): Used to store base info for a saas config """ - fides_key: FidesOpsKey + fides_key: FidesKey name: str type: str @property - def fides_key_prop(self) -> FidesOpsKey: + def fides_key_prop(self) -> FidesKey: return self.fides_key @property @@ -343,7 +343,7 @@ def top_level_endpoint_dict(self) -> Dict[str, Endpoint]: """Returns a map of endpoint names mapped to Endpoints""" return {endpoint.name: endpoint for endpoint in self.endpoints} - def get_graph(self, secrets: Dict[str, Any]) -> Dataset: + def get_graph(self, secrets: Dict[str, Any]) -> GraphDataset: """Converts endpoints to a Dataset with collections and field references""" collections = [] for endpoint in self.endpoints: @@ -388,7 +388,7 @@ def get_graph(self, secrets: Dict[str, Any]) -> Dataset: ) ) - return Dataset( + return GraphDataset( name=super().name_prop, collections=collections, connection_key=super().fides_key_prop, @@ -423,24 +423,24 @@ def _process_param_values( @staticmethod def resolve_param_reference( - reference: Union[str, FidesopsDatasetReference], secrets: Dict[str, Any] - ) -> FidesopsDatasetReference: + reference: Union[str, FidesDatasetReference], secrets: Dict[str, Any] + ) -> FidesDatasetReference: """ If needed, resolves the given `reference` using the provided `secrets` `dict`. - For ease of use, the given `reference` can either be a `str` or `FidesopsDatasetReference`, + For ease of use, the given `reference` can either be a `str` or `FidesDatasetReference`, since a `ParamValue`'s `reference` may be of either type. If the `reference` is a `str`, then it's used as a key look up a value in the provided secrets dict, - and a `FidesopsDatasetReference` is created and returned from the retrieved secrets object. + and a `FidesDatasetReference` is created and returned from the retrieved secrets object. - If the `reference` is a `FidesopsDatasetReference`, then it's just returned as-is. + If the `reference` is a `FidesDatasetReference`, then it's just returned as-is. """ if isinstance(reference, str): if reference not in secrets.keys(): raise ValidationError( f"External dataset reference with provided name {reference} not found in connector's secrets." ) - reference = FidesopsDatasetReference.parse_obj(secrets[reference]) + reference = FidesDatasetReference.parse_obj(secrets[reference]) return reference diff --git a/src/fides/api/ops/schemas/shared_schemas.py b/src/fides/api/ops/schemas/shared_schemas.py deleted file mode 100644 index 8f78c25024..0000000000 --- a/src/fides/api/ops/schemas/shared_schemas.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Optional - -from fideslang.validation import FidesKey - - -class FidesOpsKey(FidesKey): - """ - Overrides fideslang FidesKey validation to throw ValueError - """ - - @classmethod - def validate(cls, value: Optional[str]) -> Optional[str]: - """Throws ValueError if val is not a valid FidesKey""" - if value == "": - # Ignore in saas templates. This value will be replaced with a - # user-specified value. - return value - - if value is not None and not cls.regex.match(value): - raise ValueError( - "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." - ) - - return value diff --git a/src/fides/api/ops/schemas/storage/storage.py b/src/fides/api/ops/schemas/storage/storage.py index d7f0bd3406..c8bfb96be5 100644 --- a/src/fides/api/ops/schemas/storage/storage.py +++ b/src/fides/api/ops/schemas/storage/storage.py @@ -2,11 +2,11 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union +from fideslang.validation import FidesKey from pydantic import Extra, ValidationError, root_validator, validator from pydantic.main import BaseModel from fides.api.ops.schemas.api import BulkResponse, BulkUpdateFailed -from fides.api.ops.schemas.shared_schemas import FidesOpsKey logger = logging.getLogger(__name__) @@ -120,7 +120,7 @@ class StorageDestination(BaseModel): StorageDetailsS3, StorageDetailsLocal, ] - key: Optional[FidesOpsKey] + key: Optional[FidesKey] format: Optional[ResponseFormat] = ResponseFormat.json.value # type: ignore class Config: @@ -187,7 +187,7 @@ class StorageDestinationResponse(BaseModel): name: str type: StorageType details: Dict[StorageDetails, Any] - key: FidesOpsKey + key: FidesKey format: ResponseFormat class Config: diff --git a/src/fides/api/ops/service/connectors/saas/connector_registry_service.py b/src/fides/api/ops/service/connectors/saas/connector_registry_service.py index 12c0ea5705..14a32d2b00 100644 --- a/src/fides/api/ops/service/connectors/saas/connector_registry_service.py +++ b/src/fides/api/ops/service/connectors/saas/connector_registry_service.py @@ -4,6 +4,7 @@ from os.path import exists from typing import Dict, Iterable, List, Optional, Union +from fideslang.models import Dataset from fideslib.core.config import load_toml from packaging.version import LegacyVersion, Version from packaging.version import parse as parse_version @@ -19,7 +20,6 @@ from fides.api.ops.schemas.connection_configuration.connection_config import ( SaasConnectionTemplateValues, ) -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.schemas.saas.saas_config import SaaSConfig from fides.api.ops.util.saas_util import ( load_config, @@ -54,7 +54,7 @@ def validate_config(cls, config: str) -> str: @validator("dataset") def validate_dataset(cls, dataset: str) -> str: """Validates the dataset at the given path""" - FidesopsDataset(**load_dataset(dataset)[0]) + Dataset(**load_dataset(dataset)[0]) return dataset @validator("icon") @@ -141,7 +141,8 @@ def load_registry(config_file: str) -> ConnectorRegistry: """Loads a SaaS connector registry from the given config file.""" global _registry # pylint: disable=W0603 if _registry is None: - _registry = ConnectorRegistry.parse_obj(load_toml([config_file])) + toml_file = load_toml([config_file]) + _registry = ConnectorRegistry.parse_obj(toml_file) return _registry diff --git a/src/fides/api/ops/service/connectors/saas_query_config.py b/src/fides/api/ops/service/connectors/saas_query_config.py index db0646ad0a..67b2ece2fb 100644 --- a/src/fides/api/ops/service/connectors/saas_query_config.py +++ b/src/fides/api/ops/service/connectors/saas_query_config.py @@ -6,13 +6,13 @@ from typing import Any, Dict, List, Literal, Optional, TypeVar import pydash +from fideslang.models import FidesDatasetReference from fides.api.ops.common_exceptions import FidesopsException from fides.api.ops.graph.config import ScalarField from fides.api.ops.graph.traversal import TraversalNode from fides.api.ops.models.policy import Policy from fides.api.ops.models.privacy_request import PrivacyRequest -from fides.api.ops.schemas.dataset import FidesopsDatasetReference from fides.api.ops.schemas.saas.saas_config import Endpoint, SaaSConfig, SaaSRequest from fides.api.ops.schemas.saas.shared_schemas import SaaSRequestParams from fides.api.ops.service.connectors.query_config import QueryConfig @@ -364,10 +364,8 @@ def generate_update_param_values( # pylint: disable=R0914 # however, `references` in update requests can, currently, only reference # the same collection the same collection, and so it is highly unlikely # that this would be an external reference at this point. - reference: FidesopsDatasetReference = ( - SaaSConfig.resolve_param_reference( - param_value.references[0], self.secrets - ) + reference: FidesDatasetReference = SaaSConfig.resolve_param_reference( + param_value.references[0], self.secrets ) param_values[param_value.name] = pydash.get( collection_values, reference.field diff --git a/src/fides/api/ops/service/saas_request/saas_request_override_factory.py b/src/fides/api/ops/service/saas_request/saas_request_override_factory.py index ce0d4b62cf..73c5fcf11d 100644 --- a/src/fides/api/ops/service/saas_request/saas_request_override_factory.py +++ b/src/fides/api/ops/service/saas_request/saas_request_override_factory.py @@ -133,7 +133,7 @@ def validate_read_override_function(f: Callable) -> None: and that it declares at least 5 parameters. """ sig: Signature = signature(f) - if sig.return_annotation is not List[Row]: + if sig.return_annotation != List[Row]: raise InvalidSaaSRequestOverrideException( "Provided SaaS request override function must return a List[Row]" ) diff --git a/src/fides/api/ops/service/storage/storage_uploader_service.py b/src/fides/api/ops/service/storage/storage_uploader_service.py index df918e322e..b561a27b99 100644 --- a/src/fides/api/ops/service/storage/storage_uploader_service.py +++ b/src/fides/api/ops/service/storage/storage_uploader_service.py @@ -1,11 +1,11 @@ import logging from typing import Any, Dict, Optional +from fideslang.validation import FidesKey from sqlalchemy.orm import Session from fides.api.ops.common_exceptions import StorageUploadError from fides.api.ops.models.storage import StorageConfig -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.schemas.storage.storage import ( FileNaming, ResponseFormat, @@ -17,9 +17,7 @@ logger = logging.getLogger(__name__) -def upload( - db: Session, *, request_id: str, data: Dict, storage_key: FidesOpsKey -) -> str: +def upload(db: Session, *, request_id: str, data: Dict, storage_key: FidesKey) -> str: """ Retrieves storage configs and calls appropriate upload method :param db: SQLAlchemy Session diff --git a/src/fides/api/ops/task/filter_results.py b/src/fides/api/ops/task/filter_results.py index 67ec9762a8..cd2a9f7c4b 100644 --- a/src/fides/api/ops/task/filter_results.py +++ b/src/fides/api/ops/task/filter_results.py @@ -3,8 +3,9 @@ from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Union +from fideslang.validation import FidesKey + from fides.api.ops.graph.config import CollectionAddress, FieldPath -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.util.collection_util import Row logger = logging.getLogger(__name__) @@ -13,7 +14,7 @@ def filter_data_categories( access_request_results: Dict[str, List[Dict[str, Optional[Any]]]], target_categories: Set[str], - data_category_fields: Dict[CollectionAddress, Dict[FidesOpsKey, List[FieldPath]]], + data_category_fields: Dict[CollectionAddress, Dict[FidesKey, List[FieldPath]]], rule_key: str = "", fides_connector_datasets: Optional[Set[str]] = None, ) -> Dict[str, List[Dict[str, Optional[Any]]]]: diff --git a/src/fides/api/ops/task/task_resources.py b/src/fides/api/ops/task/task_resources.py index 6ba222609f..de2483e640 100644 --- a/src/fides/api/ops/task/task_resources.py +++ b/src/fides/api/ops/task/task_resources.py @@ -1,6 +1,7 @@ import logging from typing import Any, Dict, List, Optional +from fideslang.validation import FidesKey from sqlalchemy.orm import Session from fides.api.ops.common_exceptions import ConnectorNotFoundException @@ -12,7 +13,6 @@ ExecutionLogStatus, PrivacyRequest, ) -from fides.api.ops.schemas.shared_schemas import FidesOpsKey from fides.api.ops.service.connectors import ( BaseConnector, BigQueryConnector, @@ -184,7 +184,7 @@ def write_execution_log( # pylint: disable=too-many-arguments }, ) - def get_connector(self, key: FidesOpsKey) -> Any: + def get_connector(self, key: FidesKey) -> Any: """Create or return the client corresponding to the given ConnectionConfig key""" if key in self.connection_configs: return self.connections.get_connector(self.connection_configs[key]) diff --git a/src/fides/api/ops/util/cache.py b/src/fides/api/ops/util/cache.py index 078a39b74b..14764ef821 100644 --- a/src/fides/api/ops/util/cache.py +++ b/src/fides/api/ops/util/cache.py @@ -4,8 +4,8 @@ from loguru import logger from redis import Redis -from redis.exceptions import ConnectionError as ConnectionErrorFromRedis from redis.client import Script # type: ignore +from redis.exceptions import ConnectionError as ConnectionErrorFromRedis from fides.api.ops import common_exceptions from fides.api.ops.schemas.masking.masking_secrets import SecretType diff --git a/src/fides/api/ops/util/saas_util.py b/src/fides/api/ops/util/saas_util.py index 0d35097ce2..c7775efa42 100644 --- a/src/fides/api/ops/util/saas_util.py +++ b/src/fides/api/ops/util/saas_util.py @@ -13,7 +13,12 @@ from multidimensional_urlencode import urlencode as multidimensional_urlencode from fides.api.ops.common_exceptions import FidesopsException -from fides.api.ops.graph.config import Collection, CollectionAddress, Dataset, Field +from fides.api.ops.graph.config import ( + Collection, + CollectionAddress, + Field, + GraphDataset, +) from fides.api.ops.schemas.saas.saas_config import SaaSRequest from fides.api.ops.schemas.saas.shared_schemas import SaaSRequestParams @@ -111,9 +116,9 @@ def get_collection_after( return collection.after -def merge_datasets(dataset: Dataset, config_dataset: Dataset) -> Dataset: +def merge_datasets(dataset: GraphDataset, config_dataset: GraphDataset) -> GraphDataset: """ - Merges all Collections and Fields from the config_dataset into the dataset. + Merges all Collections and Fields from the "config_dataset" into the "dataset". In the event of a collection/field name collision, the target field will inherit the identity and field references. This is by design since dataset references for SaaS connectors should not have any references. @@ -135,7 +140,7 @@ def merge_datasets(dataset: Dataset, config_dataset: Dataset) -> Dataset: ) ) - return Dataset( + return GraphDataset( name=dataset.name, collections=collections, connection_key=dataset.connection_key, diff --git a/src/fides/ctl/core/evaluate.py b/src/fides/ctl/core/evaluate.py index 6903be5f05..b3ad4bd4c7 100644 --- a/src/fides/ctl/core/evaluate.py +++ b/src/fides/ctl/core/evaluate.py @@ -273,7 +273,7 @@ def evaluate_dataset_reference( dataset: Dataset, ) -> List[Violation]: """ - Evaluates the contraints of a given rule and dataset that was referenced + Evaluates the constraints of a given rule and dataset that was referenced from a given privacy declaration """ evaluation_violation_list = [] diff --git a/src/fides/ctl/core/export_helpers.py b/src/fides/ctl/core/export_helpers.py index 0490c8440d..09f4e0e937 100644 --- a/src/fides/ctl/core/export_helpers.py +++ b/src/fides/ctl/core/export_helpers.py @@ -6,6 +6,7 @@ import pandas as pd from fideslang.models import DataSubject, DataSubjectRightsEnum, DataUse +from fideslang.validation import FidesKey DATAMAP_TEMPLATE = join( dirname(__file__), @@ -137,7 +138,7 @@ def export_datamap_to_excel( return filename -def format_data_uses(data_uses: List[DataUse]) -> Dict[str, Dict[str, str]]: +def format_data_uses(data_uses: List[DataUse]) -> Dict[FidesKey, Dict[str, str]]: """ This function formats data uses for use when exporting, returning the necessary values as a dict. Formatting @@ -174,7 +175,9 @@ def format_data_uses(data_uses: List[DataUse]) -> Dict[str, Dict[str, str]]: return formatted_data_uses -def format_data_subjects(data_subjects: List[DataSubject]) -> Dict[str, Dict[str, str]]: +def format_data_subjects( + data_subjects: List[DataSubject], +) -> Dict[FidesKey, Dict[str, str]]: """ This function formats data subjects from the server, returning the necessary values as a list of dicts. @@ -190,7 +193,7 @@ def format_data_subjects(data_subjects: List[DataSubject]) -> Dict[str, Dict[str "automated_decisions_or_profiling", ] - formatted_data_subjects = {} + formatted_data_subjects: Dict[FidesKey, Dict[str, str]] = {} for data_subject in data_subjects: data_subject_dict = data_subject.dict() diff --git a/src/fides/ctl/core/utils.py b/src/fides/ctl/core/utils.py index 0a244d5b56..a964bf15fa 100644 --- a/src/fides/ctl/core/utils.py +++ b/src/fides/ctl/core/utils.py @@ -13,7 +13,7 @@ import requests import sqlalchemy from fideslang.models import DatasetField, FidesModel -from fideslang.validation import FidesValidationError +from pydantic import ValidationError from sqlalchemy.engine import Engine from sqlalchemy.exc import SQLAlchemyError @@ -123,7 +123,7 @@ def check_fides_key(proposed_fides_key: str) -> str: try: FidesModel(fides_key=proposed_fides_key) return proposed_fides_key - except FidesValidationError as error: + except ValidationError as error: echo_red(error) return sanitize_fides_key(proposed_fides_key) diff --git a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml index 5c33d89d01..9978c88ff8 100644 --- a/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml +++ b/src/fides/data/sample_project/sample_resources/postgres_example_test_dataset.yml @@ -11,7 +11,7 @@ dataset: data_categories: [user.contact.address.street] - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: state data_categories: [user.contact.address.state] @@ -24,7 +24,7 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -33,16 +33,16 @@ dataset: data_categories: [system.operations] - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string length: 40 @@ -50,37 +50,37 @@ dataset: fields: - name: address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id direction: to - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.name] - fidesops_meta: + fides_meta: data_type: string - name: login fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: time data_categories: [user.sensor] @@ -89,18 +89,18 @@ dataset: fields: - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: shipping_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -111,14 +111,14 @@ dataset: fields: - name: order_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: orders.id direction: from - name: product_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: product.id @@ -130,7 +130,7 @@ dataset: fields: - name: billing_address_id data_categories: [system.operations] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: address.id @@ -141,14 +141,14 @@ dataset: data_categories: [user.financial] - name: customer_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: customer.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [user.financial] @@ -159,7 +159,7 @@ dataset: fields: - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: name data_categories: [system.operations] @@ -170,12 +170,12 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: month data_categories: [system.operations] @@ -190,26 +190,26 @@ dataset: fields: - name: alt_email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: closed data_categories: [system.operations] - name: email data_categories: [system.operations] - fidesops_meta: + fides_meta: identity: email data_type: string - name: employee_id data_categories: [user.unique_id] - fidesops_meta: + fides_meta: references: - dataset: postgres_example_test_dataset field: employee.id direction: from - name: id data_categories: [system.operations] - fidesops_meta: + fides_meta: primary_key: True - name: opened data_categories: [system.operations] @@ -218,7 +218,7 @@ dataset: fields: - name: email data_categories: [user.contact.email] - fidesops_meta: + fides_meta: identity: email data_type: string - name: last_visit diff --git a/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py b/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py index ab997e67cc..86f3172e89 100644 --- a/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_connection_template_endpoints.py @@ -625,8 +625,8 @@ def test_invalid_instance_key(self, db, generate_auth_header, api_client, base_u ) assert resp.json()["detail"][0] == { "loc": ["body", "instance_key"], - "msg": "FidesKey must only contain alphanumeric characters, '.', '_' or '-'.", - "type": "value_error", + "msg": "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: < this is an invalid key! >", + "type": "value_error.fidesvalidation", } @mock.patch( diff --git a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py index a2e529076d..81f8b07781 100644 --- a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py @@ -178,7 +178,7 @@ def test_put_validate_dataset_invalid_length( assert response.status_code == 200 assert ( json.loads(response.text)["dataset"]["collections"][0]["fields"][0][ - "fidesops_meta" + "fides_meta" ]["length"] == 123 ) @@ -192,7 +192,7 @@ def test_put_validate_dataset_invalid_length( assert response.status_code == 422 assert ( json.loads(response.text)["detail"][0]["msg"] - == "Illegal length (-1). Only positive non-zero values are allowed." + == "ensure this value is greater than 0" ) def test_put_validate_dataset_invalid_data_type( @@ -215,7 +215,7 @@ def test_put_validate_dataset_invalid_data_type( assert response.status_code == 200 assert ( json.loads(response.text)["dataset"]["collections"][0]["fields"][0][ - "fidesops_meta" + "fides_meta" ]["data_type"] == "string" ) @@ -265,7 +265,7 @@ def test_put_validate_dataset_invalid_fidesops_meta( 0, "fields", 0, - "fidesops_meta", + "fides_meta", "references", 0, "direction", @@ -288,7 +288,7 @@ def test_put_validate_dataset_invalid_category( ) assert response.status_code == 422 details = json.loads(response.text)["detail"] - assert ["body", "collections", 0, "fields", 0, "data_categories"] in [ + assert ["collections", 0, "fields", 0, "data_categories"] in [ e["loc"] for e in details ] @@ -318,8 +318,8 @@ def test_put_validate_dataset_invalid_traversal( # Remove all the "reference" annotations; this will make traversal impossible for collection in invalid_dataset["collections"]: for field in collection["fields"]: - if field.get("fidesops_meta"): - field["fidesops_meta"]["references"] = None + if field.get("fides_meta"): + field["fides_meta"]["references"] = None response = api_client.put( validate_dataset_url, headers=auth_header, json=invalid_dataset ) diff --git a/tests/ops/api/v1/endpoints/test_messaging_endpoints.py b/tests/ops/api/v1/endpoints/test_messaging_endpoints.py index 6cc417f0cd..1b2d7cd8cc 100644 --- a/tests/ops/api/v1/endpoints/test_messaging_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_messaging_endpoints.py @@ -136,7 +136,7 @@ def test_post_email_config_with_invalid_key( assert 422 == response.status_code assert ( json.loads(response.text)["detail"][0]["msg"] - == "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." + == "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: *invalid-key" ) def test_post_email_config_with_key( diff --git a/tests/ops/api/v1/endpoints/test_policy_endpoints.py b/tests/ops/api/v1/endpoints/test_policy_endpoints.py index de15608400..ed7bb36024 100644 --- a/tests/ops/api/v1/endpoints/test_policy_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_policy_endpoints.py @@ -543,7 +543,7 @@ def test_create_policy_with_invalid_key( assert resp.status_code == 422 assert ( json.loads(resp.text)["detail"][0]["msg"] - == "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." + == "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: here*is*an*invalid*key" ) def test_create_policy_already_exists( diff --git a/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py b/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py index 422d1eb77c..b1764916c7 100644 --- a/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py @@ -4519,7 +4519,10 @@ def test_create_privacy_request_notification_info_deletes_addresses( ): PrivacyRequestNotifications.create( db=db, - data={"email": "test@email.com, test2@email.com", "notify_after_failures": 10}, + data={ + "email": "test@email.com, test2@email.com", + "notify_after_failures": 10, + }, ) auth_header = generate_auth_header( scopes=[PRIVACY_REQUEST_NOTIFICATIONS_CREATE_OR_UPDATE] diff --git a/tests/ops/api/v1/endpoints/test_storage_endpoints.py b/tests/ops/api/v1/endpoints/test_storage_endpoints.py index 29180be54f..8bd737dafc 100644 --- a/tests/ops/api/v1/endpoints/test_storage_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_storage_endpoints.py @@ -172,7 +172,7 @@ def test_put_storage_config_with_invalid_key( assert 422 == response.status_code assert ( json.loads(response.text)["detail"][0]["msg"] - == "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." + == "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: *invalid-key" ) def test_patch_storage_config_with_key( diff --git a/tests/ops/generator/test_data_generator.py b/tests/ops/generator/test_data_generator.py index 7912e4db33..c6431e6176 100644 --- a/tests/ops/generator/test_data_generator.py +++ b/tests/ops/generator/test_data_generator.py @@ -1,4 +1,5 @@ import yaml +from fideslang.models import Dataset from fides.api.ops.graph.graph import * @@ -7,7 +8,6 @@ # ------------------------------------------- from fides.api.ops.graph.traversal import Traversal from fides.api.ops.models.datasetconfig import convert_dataset_to_graph -from fides.api.ops.schemas.dataset import FidesopsDataset from . import sql_data_generator @@ -19,21 +19,21 @@ - name: user fields: - name: id - fidesops_meta: + fides_meta: primary_key: True data_type: integer references: - dataset: db field: address.user_id - name: email - fidesops_meta: + fides_meta: identity: email - name: name - name: address fields: - name: id - fidesops_meta: + fides_meta: primary_key: True data_type: integer - name: user_id @@ -44,11 +44,11 @@ """ -def parse_yaml() -> Dataset: +def parse_yaml() -> GraphDataset: """Test that 'after' parameters are properly read""" d = yaml.safe_load(f) dataset = d.get("dataset")[0] - d: FidesopsDataset = FidesopsDataset.parse_obj(dataset) + d: Dataset = Dataset.parse_obj(dataset) return convert_dataset_to_graph(d, "ignore") diff --git a/tests/ops/graph/graph_test_util.py b/tests/ops/graph/graph_test_util.py index 353c003640..1c0c11d174 100644 --- a/tests/ops/graph/graph_test_util.py +++ b/tests/ops/graph/graph_test_util.py @@ -1,6 +1,7 @@ import random from typing import Iterable +from fideslang.validation import FidesKey from fideslib.db.base_class import FidesBase from sqlalchemy.engine import Engine @@ -25,7 +26,7 @@ class MockResources(TaskResources): def __init__(self, request: PrivacyRequest): super().__init__(request, Policy(), []) - def get_connector(self, key: FidesOpsKey) -> Any: + def get_connector(self, key: FidesKey) -> Any: return MockSqlConnector() @@ -123,7 +124,7 @@ def generate_field_list(num_fields: int) -> List[ScalarField]: def generate_node(dr_name: str, ds_name: str, *field_names: str) -> Node: ds = Collection(name=ds_name, fields=[ScalarField(name=s) for s in field_names]) - dr = Dataset( + dr = GraphDataset( name=dr_name, collections=[ds], connection_key=f"mock_connection_config_key_{dr_name}", @@ -131,9 +132,9 @@ def generate_node(dr_name: str, ds_name: str, *field_names: str) -> Node: return Node(dr, ds) -def field(dataresources: List[Dataset], *address: str) -> ScalarField: +def field(dataresources: List[GraphDataset], *address: str) -> ScalarField: """Test util to access a particular field - can access a nested field one level deep""" - dr: Dataset = next(dr for dr in dataresources if dr.name == address[0]) + dr: GraphDataset = next(dr for dr in dataresources if dr.name == address[0]) ds: Collection = next(ds for ds in dr.collections if ds.name == address[1]) try: @@ -148,12 +149,16 @@ def field(dataresources: List[Dataset], *address: str) -> ScalarField: return df -def collection(dataresources: List[Dataset], address: CollectionAddress) -> Collection: - dr: Dataset = next(dr for dr in dataresources if dr.name == address.dataset) +def collection( + dataresources: List[GraphDataset], address: CollectionAddress +) -> Collection: + dr: GraphDataset = next(dr for dr in dataresources if dr.name == address.dataset) return next(ds for ds in dr.collections if ds.name == address.collection) -def dataresource(dataresources: List[Dataset], address: DatasetAddress) -> Dataset: +def dataresource( + dataresources: List[GraphDataset], address: DatasetAddress +) -> GraphDataset: return next(dr for dr in dataresources if dr.name == address) @@ -168,7 +173,7 @@ def outgoing_edges(traversal: Traversal, node_address: CollectionAddress) -> Set def generate_traversal( - seed: Dict[str, Any], *dataresources: Dataset + seed: Dict[str, Any], *dataresources: GraphDataset ) -> Tuple[Dict[str, Any], List[CollectionAddress]]: graph = DatasetGraph(*dataresources) traversal = Traversal(graph, seed) @@ -195,9 +200,9 @@ def traversal_order_fn( # --------------- generated graphs ------------- -def generate_graph_resources(num_nodes: int) -> List[Dataset]: +def generate_graph_resources(num_nodes: int) -> List[GraphDataset]: return [ - Dataset( + GraphDataset( name=f"dr_{i}", collections=[Collection(name=f"ds_{i}", fields=generate_field_list(3))], connection_key=f"mock_connection_config_key_{i}", @@ -208,9 +213,9 @@ def generate_graph_resources(num_nodes: int) -> List[Dataset]: def generate_binary_tree_resources( num_levels: int, branching_factor: int = 2 -) -> List[Dataset]: +) -> List[GraphDataset]: """Generate a multi-level binary tree for testing""" - root = Dataset( + root = GraphDataset( name=f"root", collections=[Collection(name=f"ds", fields=generate_field_list(3))], connection_key=f"mock_connection_config_key_root", @@ -225,7 +230,7 @@ def generate_binary_tree_resources( next_dr_name, next_ds_name = next_node.name, next_node.collections[0].name for j in range(branching_factor): next_child_key = (f"{next_dr_name}.{j}", f"{next_ds_name}.{j}", "f1") - next_child = Dataset( + next_child = GraphDataset( name=next_child_key[0], collections=[ Collection(name=next_child_key[1], fields=generate_field_list(3)) @@ -242,10 +247,10 @@ def generate_binary_tree_resources( return resources -def generate_fully_connected_resources(size: int) -> List[Dataset]: +def generate_fully_connected_resources(size: int) -> List[GraphDataset]: """Generate a fully connected graph of resources""" - def connect(r1: Dataset, r2: Dataset) -> None: + def connect(r1: GraphDataset, r2: GraphDataset) -> None: field( [r1], r1.name, r1.collections[0].name, random.choice(["f1", "f2", "f3"]) ).references.append( diff --git a/tests/ops/graph/test_graph.py b/tests/ops/graph/test_graph.py index fecc1c4d60..5d8dbf2bd0 100644 --- a/tests/ops/graph/test_graph.py +++ b/tests/ops/graph/test_graph.py @@ -38,7 +38,7 @@ ], ) graph = DatasetGraph( - Dataset( + GraphDataset( name="s1", collections=[t1, t2, t3], connection_key="mock_connection_config_key" ) ) @@ -48,7 +48,7 @@ class TestNode: def test_node_eq(self) -> None: """two nodes are equal if they have the same collection address""" assert graph.nodes[CollectionAddress("s1", "t1")] == Node( - Dataset( + GraphDataset( name="s1", collections=[], connection_key="mock_connection_config_key" ), Collection(name="t1", fields=[]), diff --git a/tests/ops/graph/test_graph_traversal.py b/tests/ops/graph/test_graph_traversal.py index 6ad0489a5f..2a46333ab0 100644 --- a/tests/ops/graph/test_graph_traversal.py +++ b/tests/ops/graph/test_graph_traversal.py @@ -228,7 +228,7 @@ def test_tree_1() -> None: seed = {"email": "foo@bar.com"} traversal_map, terminators = generate_traversal( seed, - Dataset( + GraphDataset( name="s1", collections=[t1, t2, t3, t4], connection_key="mock_connection_config_key", @@ -314,7 +314,7 @@ def test_traversal_ordering() -> None: ], ) graph = DatasetGraph( - Dataset( + GraphDataset( name="mysql", collections=[customers, addresses, orders], connection_key="mock_connection_config_key", @@ -522,7 +522,7 @@ def test_variant_traversals() -> None: ) graph = DatasetGraph( - Dataset( + GraphDataset( name="mysql", collections=[customers, users], connection_key="mock_connection_config_key", diff --git a/tests/ops/integration_tests/test_execution.py b/tests/ops/integration_tests/test_execution.py index 702bbbcc9d..3cf1beea2f 100644 --- a/tests/ops/integration_tests/test_execution.py +++ b/tests/ops/integration_tests/test_execution.py @@ -2,6 +2,7 @@ from unittest import mock import pytest +from fideslang.models import Dataset from fideslib.db.session import get_db_session from pydantic import ValidationError from sqlalchemy.exc import InvalidRequestError @@ -20,7 +21,6 @@ ExecutionLog, PrivacyRequest, ) -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.task import graph_task from fides.api.ops.task.graph_task import get_cached_data_for_erasures from fides.ctl.core.config import get_config @@ -49,9 +49,9 @@ def get_sorted_execution_logs(db, privacy_request: PrivacyRequest): def mongo_postgres_dataset_graph( example_datasets, integration_postgres_config, integration_mongodb_config ): - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, integration_postgres_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -121,11 +121,11 @@ async def test_delete_collection_while_in_progress( name="mongo_example_in_progress", ) mongo_connection_config.save(db) - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph( dataset_postgres, integration_postgres_config.key ) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, mongo_connection_config.key ) @@ -236,7 +236,7 @@ async def test_collection_omitted_on_restart_from_failure( integration_mongodb_config.delete(db) # Just rebuilding a graph without the deleted config. - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph( dataset_postgres, integration_postgres_config.key ) @@ -387,11 +387,11 @@ async def test_run_disabled_collections_in_progress( ) mongo_connection_config.save(db) - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph( dataset_postgres, integration_postgres_config.key ) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, mongo_connection_config.key ) diff --git a/tests/ops/integration_tests/test_integration_email.py b/tests/ops/integration_tests/test_integration_email.py index 47f006d0c1..fca82e3314 100644 --- a/tests/ops/integration_tests/test_integration_email.py +++ b/tests/ops/integration_tests/test_integration_email.py @@ -1,6 +1,7 @@ from unittest import mock import pytest as pytest +from fideslang.models import Dataset from fideslib.models.audit_log import AuditLog, AuditLogAction from fides.api.ops.graph.config import CollectionAddress @@ -13,7 +14,6 @@ ExecutionLogStatus, ManualAction, ) -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.schemas.messaging.messaging import ( MessagingActionType, MessagingServiceType, @@ -74,8 +74,8 @@ async def test_email_connector_cache_and_delayed_send( "email_dataset:payment": [], } - dataset_postgres = FidesopsDataset(**example_datasets[0]) - dataset_email = FidesopsDataset(**example_datasets[9]) + dataset_postgres = Dataset(**example_datasets[0]) + dataset_email = Dataset(**example_datasets[9]) postgres_graph = convert_dataset_to_graph( dataset_postgres, integration_postgres_config.key ) diff --git a/tests/ops/integration_tests/test_mongo_task.py b/tests/ops/integration_tests/test_mongo_task.py index f1724c17d9..9473c67712 100644 --- a/tests/ops/integration_tests/test_mongo_task.py +++ b/tests/ops/integration_tests/test_mongo_task.py @@ -6,8 +6,14 @@ import pytest from bson import ObjectId +from fideslang.models import Dataset -from fides.api.ops.graph.config import Collection, Dataset, FieldAddress, ScalarField +from fides.api.ops.graph.config import ( + Collection, + FieldAddress, + GraphDataset, + ScalarField, +) from fides.api.ops.graph.data_type import ( IntTypeConverter, ObjectIdTypeConverter, @@ -19,7 +25,6 @@ from fides.api.ops.models.datasetconfig import convert_dataset_to_graph from fides.api.ops.models.policy import Policy from fides.api.ops.models.privacy_request import PrivacyRequest -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.service.connectors import get_connector from fides.api.ops.task import graph_task from fides.api.ops.task.filter_results import filter_data_categories @@ -378,7 +383,7 @@ async def test_composite_key_erasure( ], ) - dataset = Dataset( + dataset = GraphDataset( name="mongo_test", collections=[customer, composite_pk_test], connection_key=integration_mongodb_config.key, @@ -473,7 +478,7 @@ async def test_access_erasure_type_conversion( ], ) - dataset = Dataset( + dataset = GraphDataset( name="mongo_test", collections=[employee, type_link], connection_key=integration_mongodb_config.key, @@ -521,9 +526,9 @@ async def test_object_querying_mongo( postgres_config = copy.copy(integration_postgres_config) - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, integration_postgres_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -668,9 +673,9 @@ async def test_return_all_elements_config_access_request( """ postgres_config = copy.copy(integration_postgres_config) - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, integration_postgres_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -793,9 +798,9 @@ async def test_array_querying_mongo( ): postgres_config = copy.copy(integration_postgres_config) - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, integration_postgres_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -1088,7 +1093,7 @@ def connector(self, integration_mongodb_config): @pytest.fixture def traversal_node(self, example_datasets, integration_mongodb_config): - dataset = FidesopsDataset(**example_datasets[1]) + dataset = Dataset(**example_datasets[1]) graph = convert_dataset_to_graph(dataset, integration_mongodb_config.key) customer_details_collection = None for collection in graph.collections: diff --git a/tests/ops/integration_tests/test_sql_task.py b/tests/ops/integration_tests/test_sql_task.py index 5d1a83f133..504c858d1e 100644 --- a/tests/ops/integration_tests/test_sql_task.py +++ b/tests/ops/integration_tests/test_sql_task.py @@ -6,13 +6,14 @@ from uuid import uuid4 import pytest +from fideslang import Dataset from sqlalchemy import text from fides.api.ops.graph.config import ( Collection, CollectionAddress, - Dataset, FieldAddress, + GraphDataset, ScalarField, ) from fides.api.ops.graph.data_type import DataType, StringTypeConverter @@ -22,7 +23,6 @@ from fides.api.ops.models.datasetconfig import convert_dataset_to_graph from fides.api.ops.models.policy import ActionType, Policy, Rule, RuleTarget from fides.api.ops.models.privacy_request import ExecutionLog, PrivacyRequest -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.service.connectors import get_connector from fides.api.ops.task import graph_task from fides.api.ops.task.filter_results import filter_data_categories @@ -176,7 +176,7 @@ async def test_composite_key_erasure( ], ) - dataset = Dataset( + dataset = GraphDataset( name="postgres_example", collections=[customer, composite_pk_test], connection_key=integration_postgres_config.key, @@ -743,7 +743,7 @@ async def test_filter_on_data_categories( }, ) - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, integration_postgres_config.key) dataset_graph = DatasetGraph(*[graph]) @@ -894,7 +894,7 @@ async def test_access_erasure_type_conversion( ], ) - dataset = Dataset( + dataset = GraphDataset( name="postgres_example", collections=[employee, type_link], connection_key=integration_postgres_config.key, @@ -942,7 +942,7 @@ def connector(self, integration_postgres_config): @pytest.fixture def traversal_node(self, example_datasets, integration_postgres_config): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, integration_postgres_config.key) node = Node(graph, graph.collections[1]) # customer collection traversal_node = TraversalNode(node) @@ -1066,7 +1066,7 @@ async def test_retry_access_request( CONFIG.execution.task_retry_delay = 0.1 CONFIG.execution.task_retry_backoff = 0.01 - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, integration_postgres_config.key) dataset_graph = DatasetGraph(*[graph]) @@ -1119,7 +1119,7 @@ async def test_retry_erasure( CONFIG.execution.task_retry_delay = 0.1 CONFIG.execution.task_retry_backoff = 0.01 - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, integration_postgres_config.key) dataset_graph = DatasetGraph(*[graph]) diff --git a/tests/ops/models/test_base.py b/tests/ops/models/test_base.py index 83e9f4eb0f..c5dd80662d 100644 --- a/tests/ops/models/test_base.py +++ b/tests/ops/models/test_base.py @@ -18,7 +18,7 @@ def test_get_key_from_data_method_invalid_key() -> None: get_key_from_data({"key": "test*key", "name": "config name"}, "StorageConfig") assert ( str(exc.value) - == "FidesKeys must only contain alphanumeric characters, '.', '_' or '-'. Value provided: test*key" + == "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: test*key" ) diff --git a/tests/ops/models/test_datasetconfig.py b/tests/ops/models/test_datasetconfig.py index c9a2246036..236bc00bb2 100644 --- a/tests/ops/models/test_datasetconfig.py +++ b/tests/ops/models/test_datasetconfig.py @@ -1,4 +1,5 @@ import pytest +from fideslang.models import Dataset, FidesDatasetReference from sqlalchemy.orm import Session from fides.api.ops.common_exceptions import ValidationError @@ -8,7 +9,6 @@ convert_dataset_to_graph, validate_dataset_reference, ) -from fides.api.ops.schemas.dataset import FidesopsDataset, FidesopsDatasetReference from ..graph.graph_test_util import field @@ -54,7 +54,7 @@ def test_get_graph(dataset_config: DatasetConfig) -> None: def test_convert_dataset_to_graph_no_collections(example_datasets): dataset_json = example_datasets[0].copy() dataset_json["collections"] = [] - dataset = FidesopsDataset(**dataset_json) + dataset = Dataset(**dataset_json) graph = convert_dataset_to_graph(dataset, "mock_connection_config_key") assert graph is not None assert graph.name == "postgres_example_test_dataset" @@ -64,7 +64,7 @@ def test_convert_dataset_to_graph_no_collections(example_datasets): def test_convert_dataset_to_graph(example_datasets): """Test a more complex dataset->graph conversion using the helper method directly""" - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, "mock_connection_config_key") assert graph is not None @@ -123,7 +123,7 @@ def test_convert_dataset_to_graph(example_datasets): def test_convert_dataset_to_graph_array_fields(example_datasets): """Test a more complex dataset->graph conversion using the helper method directly""" - dataset = FidesopsDataset(**example_datasets[1]) + dataset = Dataset(**example_datasets[1]) graph = convert_dataset_to_graph(dataset, "mock_connection_config_key") assert graph is not None @@ -159,7 +159,7 @@ def test_validate_dataset_reference(db: Session, dataset_config: DatasetConfig): dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = dataset_config.dataset["collections"][0]["fields"][0]["name"] - dsr = FidesopsDatasetReference( + dsr = FidesDatasetReference( dataset=dataset_key, field=f"{collection_name}.{field_name}" ) validate_dataset_reference(db, dsr) @@ -172,7 +172,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = "fake_dataset" collection_name = dataset_config.dataset["collections"][0]["name"] field_name = dataset_config.dataset["collections"][0]["fields"][0]["name"] - dsr = FidesopsDatasetReference( + dsr = FidesDatasetReference( dataset=dataset_key, field=f"{collection_name}.{field_name}" ) with pytest.raises(ValidationError) as e: @@ -182,7 +182,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = "fake_collection" field_name = dataset_config.dataset["collections"][0]["fields"][0]["name"] - dsr = FidesopsDatasetReference( + dsr = FidesDatasetReference( dataset=dataset_key, field=f"{collection_name}.{field_name}" ) with pytest.raises(ValidationError) as e: @@ -192,7 +192,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference( + dsr = FidesDatasetReference( dataset=dataset_key, field=f"{collection_name}.{field_name}" ) with pytest.raises(ValidationError) as e: @@ -202,7 +202,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference(dataset=dataset_key, field=f"{collection_name}.") + dsr = FidesDatasetReference(dataset=dataset_key, field=f"{collection_name}.") with pytest.raises(ValidationError) as e: validate_dataset_reference(db, dsr) assert "must include at least two dot-separated components" in e.value.message @@ -210,7 +210,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference(dataset=dataset_key, field=f".{field_name}") + dsr = FidesDatasetReference(dataset=dataset_key, field=f".{field_name}") with pytest.raises(ValidationError) as e: validate_dataset_reference(db, dsr) assert "must include at least two dot-separated components" in e.value.message @@ -218,7 +218,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference(dataset=dataset_key, field=f"{collection_name}") + dsr = FidesDatasetReference(dataset=dataset_key, field=f"{collection_name}") with pytest.raises(ValidationError) as e: validate_dataset_reference(db, dsr) assert "must include at least two dot-separated components" in e.value.message @@ -226,7 +226,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference(dataset=dataset_key, field=f".") + dsr = FidesDatasetReference(dataset=dataset_key, field=f".") with pytest.raises(ValidationError) as e: validate_dataset_reference(db, dsr) assert "must include at least two dot-separated components" in e.value.message @@ -234,7 +234,7 @@ def test_validate_dataset_reference_invalid(db: Session, dataset_config: Dataset dataset_key = dataset_config.fides_key collection_name = dataset_config.dataset["collections"][0]["name"] field_name = "fake_field" - dsr = FidesopsDatasetReference(dataset=dataset_key, field="") + dsr = FidesDatasetReference(dataset=dataset_key, field="") with pytest.raises(ValidationError) as e: validate_dataset_reference(db, dsr) assert "must include at least two dot-separated components" in e.value.message diff --git a/tests/ops/models/test_saasconfig.py b/tests/ops/models/test_saasconfig.py index e52e2d1904..ce43ca4c49 100644 --- a/tests/ops/models/test_saasconfig.py +++ b/tests/ops/models/test_saasconfig.py @@ -1,12 +1,12 @@ from typing import Dict, List import pytest +from fideslang import FidesDatasetReference from pydantic import ValidationError from fides.api.ops.common_exceptions import ValidationError as FidesopsValidationError from fides.api.ops.graph.config import CollectionAddress, FieldAddress from fides.api.ops.models.connectionconfig import ConnectionConfig -from fides.api.ops.schemas.dataset import FidesopsDatasetReference from fides.api.ops.schemas.saas.saas_config import ( ConnectorParam, Endpoint, @@ -44,7 +44,7 @@ def test_saas_request_override(): pv = ParamValue( name="test_param", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="test_field", direction="from" ) ], @@ -285,7 +285,7 @@ def test_matching_grouped_inputs(self): ParamValue( name="a", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.a", direction="from", @@ -295,7 +295,7 @@ def test_matching_grouped_inputs(self): ParamValue( name="b", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.b", direction="from", @@ -312,7 +312,7 @@ def test_matching_grouped_inputs(self): ParamValue( name="a", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.a", direction="from", @@ -322,7 +322,7 @@ def test_matching_grouped_inputs(self): ParamValue( name="b", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.b", direction="from", @@ -349,7 +349,7 @@ def test_mismatching_grouped_inputs(self): ParamValue( name="a", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.a", direction="from", @@ -359,7 +359,7 @@ def test_mismatching_grouped_inputs(self): ParamValue( name="b", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.b", direction="from", @@ -376,7 +376,7 @@ def test_mismatching_grouped_inputs(self): ParamValue( name="b", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.b", direction="from", @@ -386,7 +386,7 @@ def test_mismatching_grouped_inputs(self): ParamValue( name="c", references=[ - FidesopsDatasetReference( + FidesDatasetReference( dataset="test_dataset", field="table.c", direction="from", diff --git a/tests/ops/service/connectors/test_email_connector.py b/tests/ops/service/connectors/test_email_connector.py index dd97f58b61..080bda9801 100644 --- a/tests/ops/service/connectors/test_email_connector.py +++ b/tests/ops/service/connectors/test_email_connector.py @@ -3,8 +3,8 @@ from fides.api.ops.graph.config import ( ROOT_COLLECTION_ADDRESS, Collection, - Dataset, FieldAddress, + GraphDataset, ScalarField, ) from fides.api.ops.graph.graph import Edge, Node @@ -24,7 +24,7 @@ def generate_node_with_data_category( ScalarField(name=s, data_categories=[data_category]) for s in field_names ], ) - dr = Dataset( + dr = GraphDataset( name=dr_name, collections=[ds], connection_key=f"mock_connection_config_key_{dr_name}", diff --git a/tests/ops/service/connectors/test_queryconfig.py b/tests/ops/service/connectors/test_queryconfig.py index 42a0e7e39c..936614ac1b 100644 --- a/tests/ops/service/connectors/test_queryconfig.py +++ b/tests/ops/service/connectors/test_queryconfig.py @@ -1,6 +1,7 @@ from typing import Any, Dict, Set import pytest +from fideslang.models import Dataset from fides.api.ops.graph.config import ( CollectionAddress, @@ -13,7 +14,6 @@ from fides.api.ops.graph.traversal import Traversal, TraversalNode from fides.api.ops.models.datasetconfig import convert_dataset_to_graph from fides.api.ops.models.privacy_request import PrivacyRequest -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.schemas.masking.masking_configuration import HashMaskingConfiguration from fides.api.ops.schemas.masking.masking_secrets import MaskingSecretCache, SecretType from fides.api.ops.service.connectors.query_config import ( @@ -169,7 +169,7 @@ def test_generated_sql_query(self): def test_update_rule_target_fields( self, erasure_policy, example_datasets, connection_config ): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, connection_config.key) dataset_graph = DatasetGraph(*[graph]) traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) @@ -203,7 +203,7 @@ def test_update_rule_target_fields( def test_generate_update_stmt_one_field( self, erasure_policy, example_datasets, connection_config ): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, connection_config.key) dataset_graph = DatasetGraph(*[graph]) traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) @@ -230,7 +230,7 @@ def test_generate_update_stmt_length_truncation( example_datasets, connection_config, ): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, connection_config.key) dataset_graph = DatasetGraph(*[graph]) traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) @@ -261,7 +261,7 @@ def test_generate_update_stmt_length_truncation( def test_generate_update_stmt_multiple_fields_same_rule( self, erasure_policy, example_datasets, connection_config ): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, connection_config.key) dataset_graph = DatasetGraph(*[graph]) traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) @@ -320,7 +320,7 @@ def test_generate_update_stmt_multiple_fields_same_rule( def test_generate_update_stmts_from_multiple_rules( self, erasure_policy_two_rules, example_datasets, connection_config ): - dataset = FidesopsDataset(**example_datasets[0]) + dataset = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset, connection_config.key) dataset_graph = DatasetGraph(*[graph]) traversal = Traversal(dataset_graph, {"email": "customer-1@example.com"}) @@ -419,9 +419,9 @@ def test_generate_query( integration_mongodb_config, connection_config, ): - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, connection_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -481,9 +481,9 @@ def test_generate_update_stmt_multiple_fields( integration_mongodb_config, connection_config, ): - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, connection_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) @@ -544,9 +544,9 @@ def test_generate_update_stmt_multiple_rules( integration_mongodb_config, connection_config, ): - dataset_postgres = FidesopsDataset(**example_datasets[0]) + dataset_postgres = Dataset(**example_datasets[0]) graph = convert_dataset_to_graph(dataset_postgres, connection_config.key) - dataset_mongo = FidesopsDataset(**example_datasets[1]) + dataset_mongo = Dataset(**example_datasets[1]) mongo_graph = convert_dataset_to_graph( dataset_mongo, integration_mongodb_config.key ) diff --git a/tests/ops/task/traversal_data.py b/tests/ops/task/traversal_data.py index 3564fb4e9a..1a84db3ee2 100644 --- a/tests/ops/task/traversal_data.py +++ b/tests/ops/task/traversal_data.py @@ -1,10 +1,12 @@ from typing import Optional, Tuple +from fideslang.validation import FidesKey + from fides.api.ops.graph.config import ( Collection, CollectionAddress, - Dataset, FieldAddress, + GraphDataset, ObjectField, ScalarField, ) @@ -19,7 +21,6 @@ from fides.api.ops.graph.graph import DatasetGraph from fides.api.ops.graph.traversal import Traversal from fides.api.ops.models.connectionconfig import ConnectionConfig -from fides.api.ops.schemas.shared_schemas import FidesOpsKey str_converter = DataType.string.value bool_converter = DataType.boolean.value @@ -28,8 +29,8 @@ def integration_db_mongo_graph( - db_name: str, connection_key: FidesOpsKey -) -> Tuple[Dataset, DatasetGraph]: + db_name: str, connection_key: FidesKey +) -> Tuple[GraphDataset, DatasetGraph]: dataset = integration_db_dataset(db_name, connection_key) for coll in dataset.collections: id_field = next(f for f in coll.fields if f.name == "id") @@ -46,7 +47,7 @@ def integration_db_mongo_graph( def combined_mongo_postgresql_graph( postgres_config: ConnectionConfig, mongo_config: ConnectionConfig -) -> Tuple[Dataset, Dataset]: +) -> Tuple[GraphDataset, GraphDataset]: postgres_dataset = integration_db_dataset("postgres_example", postgres_config.key) mongo_addresses = Collection( @@ -496,7 +497,7 @@ def combined_mongo_postgresql_graph( after=set(), ) - mongo_dataset = Dataset( + mongo_dataset = GraphDataset( name="mongo_test", collections=[ mongo_addresses, @@ -516,8 +517,8 @@ def combined_mongo_postgresql_graph( return mongo_dataset, postgres_dataset -def manual_dataset(db_name: str, postgres_db_name) -> Dataset: - """Manual dataset depending on upstream postgres collection and pointing to a node in a downstream +def manual_graph_dataset(db_name: str, postgres_db_name) -> GraphDataset: + """Manual GraphDataset depending on upstream postgres collection and pointing to a node in a downstream postgres collection""" filing_cabinet = Collection( name="filing_cabinet", @@ -554,7 +555,7 @@ def manual_dataset(db_name: str, postgres_db_name) -> Dataset: ), ], ) - return Dataset( + return GraphDataset( name=db_name, collections=[filing_cabinet, storage_unit], connection_key=db_name, @@ -563,11 +564,11 @@ def manual_dataset(db_name: str, postgres_db_name) -> Dataset: def postgres_and_manual_nodes(postgres_db_name: str, manual_db_name: str): postgres_db = integration_db_dataset(postgres_db_name, postgres_db_name) - manual_db = manual_dataset(manual_db_name, postgres_db_name) + manual_db = manual_graph_dataset(manual_db_name, postgres_db_name) return DatasetGraph(postgres_db, manual_db) -def integration_db_dataset(db_name: str, connection_key: FidesOpsKey) -> Dataset: +def integration_db_dataset(db_name: str, connection_key: FidesKey) -> GraphDataset: """A traversal that maps tables in the postgresql test database""" customers = Collection( name="customer", @@ -632,7 +633,7 @@ def integration_db_dataset(db_name: str, connection_key: FidesOpsKey) -> Dataset ), ], ) - return Dataset( + return GraphDataset( name=db_name, collections=[customers, addresses, orders, payment_cards], connection_key=connection_key, @@ -640,7 +641,7 @@ def integration_db_dataset(db_name: str, connection_key: FidesOpsKey) -> Dataset def integration_db_graph( - db_name: str, connection_key: Optional[FidesOpsKey] = None + db_name: str, connection_key: Optional[FidesKey] = None ) -> DatasetGraph: """A traversal that maps tables in the postgresql test database""" if not connection_key: @@ -682,7 +683,9 @@ def traversal_paired_dependency() -> Traversal: grouped_inputs={"project", "organization", "email"}, ) - mysql = Dataset(name="mysql", collections=[projects, users], connection_key="mysql") + mysql = GraphDataset( + name="mysql", collections=[projects, users], connection_key="mysql" + ) graph = DatasetGraph(mysql) identity = {"email": "email@gmail.com"} @@ -747,11 +750,13 @@ def sample_traversal() -> Traversal: ScalarField(name="name"), ], ) - mysql = Dataset( + mysql = GraphDataset( name="mysql", collections=[customers, addresses, users], connection_key="mysql" ) - postgres = Dataset(name="postgres", collections=[orders], connection_key="postgres") - mssql = Dataset(name="mssql", collections=[addresses], connection_key="mssql") + postgres = GraphDataset( + name="postgres", collections=[orders], connection_key="postgres" + ) + mssql = GraphDataset(name="mssql", collections=[addresses], connection_key="mssql") graph = DatasetGraph(mysql, postgres, mssql) identity = {"email": "email@gmail.com", "user_id": "1"} diff --git a/tests/ops/test_helpers/dataset_utils.py b/tests/ops/test_helpers/dataset_utils.py index a7d106d5f2..aa5a7d83f7 100644 --- a/tests/ops/test_helpers/dataset_utils.py +++ b/tests/ops/test_helpers/dataset_utils.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Iterable, List, Optional import yaml +from fideslang.models import Dataset from fides.api.ops.graph.config import ( Collection, @@ -13,7 +14,6 @@ from fides.api.ops.graph.data_type import DataType, get_data_type, to_data_type_string from fides.api.ops.models.connectionconfig import ConnectionConfig from fides.api.ops.models.datasetconfig import DatasetConfig, convert_dataset_to_graph -from fides.api.ops.schemas.dataset import FidesopsDataset from fides.api.ops.util.collection_util import Row SAAS_DATASET_DIRECTORY = "data/saas/dataset/" @@ -111,12 +111,10 @@ def generate_collections( the existing collections if no API data is available. """ - # convert FidesopsDataset to Dataset to be able to use the Collection helpers + # convert FidesLang Dataset to graph Dataset to be able to use the Collection helpers collection_map = {} if dataset: - graph = convert_dataset_to_graph( - FidesopsDataset(**dataset), dataset["fides_key"] - ) + graph = convert_dataset_to_graph(Dataset(**dataset), dataset["fides_key"]) collection_map = { collection.name: collection for collection in graph.collections } diff --git a/tests/ops/util/test_dataset_yaml.py b/tests/ops/util/test_dataset_yaml.py index 1bb4122963..530d5e1680 100644 --- a/tests/ops/util/test_dataset_yaml.py +++ b/tests/ops/util/test_dataset_yaml.py @@ -2,18 +2,17 @@ import pytest import yaml +from fideslang.models import Dataset from pydantic import ValidationError from fides.api.ops.graph.config import ( CollectionAddress, FieldAddress, - FieldPath, ObjectField, ScalarField, ) from fides.api.ops.graph.graph import DatasetGraph, Edge from fides.api.ops.models.datasetconfig import convert_dataset_to_graph -from fides.api.ops.schemas.dataset import FidesopsDataset from ..graph.graph_test_util import field @@ -128,7 +127,7 @@ def __to_dataset__(yamlstr: str) -> Dict[str, Any]: def test_dataset_yaml_format(): """Test that 'after' parameters are properly read""" dataset = __to_dataset__(example_dataset_yaml) - d: FidesopsDataset = FidesopsDataset.parse_obj(dataset) + d: Dataset = Dataset.parse_obj(dataset) config = convert_dataset_to_graph(d, "ignore") assert config.after == {"db1", "db2", "db3"} assert config.collections[0].after == { @@ -143,7 +142,7 @@ def test_dataset_yaml_format_invalid_format(): dataset = __to_dataset__(example_dataset_yaml) dataset.get("collections")[0].get("fidesops_meta").get("after")[0] = "invalid" with pytest.raises(ValueError) as exc: - d: FidesopsDataset = FidesopsDataset.parse_obj(dataset) + d: Dataset = Dataset.parse_obj(dataset) convert_dataset_to_graph(d, "ignore") assert "FidesCollection must be specified in the form 'FidesKey.FidesKey'" in str( exc.value @@ -157,17 +156,17 @@ def test_dataset_yaml_format_invalid_fides_keys(): 0 ] = "invalid*dataset*name.invalid*collection*name" with pytest.raises(ValueError) as exc: - d: FidesopsDataset = FidesopsDataset.parse_obj(dataset) + d: Dataset = Dataset.parse_obj(dataset) convert_dataset_to_graph(d, "ignore") assert ( - "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." + "FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'." in str(exc.value) ) def test_nested_dataset_format(): dataset = __to_dataset__(example_dataset_nested_yaml) - ds = FidesopsDataset.parse_obj(dataset) + ds = Dataset.parse_obj(dataset) graph = convert_dataset_to_graph(ds, "ignore") comments_field = field([graph], "mongo_nested_test", "photos", "comments") @@ -194,7 +193,7 @@ def test_nested_dataset_format(): def test_nested_dataset_validation(): with pytest.raises(ValidationError): - FidesopsDataset.parse_obj(__to_dataset__(example_bad_dataset_nested_yaml)) + Dataset.parse_obj(__to_dataset__(example_bad_dataset_nested_yaml)) def test_invalid_datatype(): @@ -210,7 +209,7 @@ def test_invalid_datatype(): data_type: this_is_bad""" dataset = __to_dataset__(bad_data_declaration) with pytest.raises(ValidationError): - FidesopsDataset.parse_obj(dataset) + Dataset.parse_obj(dataset) example_postgres_yaml = """dataset: @@ -255,11 +254,11 @@ def test_invalid_datatype(): def test_dataset_graph_connected_by_nested_fields(): """Two of the fields in the postgres dataset references a nested field in the mongo dataset""" dataset = __to_dataset__(example_dataset_nested_yaml) - ds = FidesopsDataset.parse_obj(dataset) + ds = Dataset.parse_obj(dataset) mongo_dataset = convert_dataset_to_graph(ds, "ignore") postgres_dataset = __to_dataset__(example_postgres_yaml) - ds_postgres = FidesopsDataset.parse_obj(postgres_dataset) + ds_postgres = Dataset.parse_obj(postgres_dataset) postgres_dataset = convert_dataset_to_graph(ds_postgres, "ignore") dataset_graph = DatasetGraph(mongo_dataset, postgres_dataset) @@ -314,7 +313,7 @@ def test_dataset_graph_connected_by_nested_fields(): def test_object_data_category_validation(): """Test trying to validate object with data category specified""" with pytest.raises(ValidationError): - FidesopsDataset.parse_obj( + Dataset.parse_obj( __to_dataset__(example_object_with_data_categories_nested_yaml) ) @@ -341,4 +340,4 @@ def test_object_data_category_validation(): def test_return_all_elements_specified_on_non_array_field(): """Test return_all_elements can only be specified on array fields""" with pytest.raises(ValidationError): - FidesopsDataset.parse_obj(__to_dataset__(non_array_field_with_invalid_flag)) + Dataset.parse_obj(__to_dataset__(non_array_field_with_invalid_flag)) diff --git a/tests/ops/util/test_saas_util.py b/tests/ops/util/test_saas_util.py index c8ca489627..05a412afd2 100644 --- a/tests/ops/util/test_saas_util.py +++ b/tests/ops/util/test_saas_util.py @@ -3,9 +3,9 @@ from fides.api.ops.common_exceptions import FidesopsException from fides.api.ops.graph.config import ( Collection, - Dataset, FieldAddress, FieldPath, + GraphDataset, ObjectField, ScalarField, ) @@ -28,7 +28,7 @@ class TestMergeDatasets: def test_add_identity(self): """Augment a SaaS dataset collection with an identity reference""" - saas_dataset = Dataset( + saas_dataset = GraphDataset( name="saas_dataset", collections=[ Collection( @@ -41,7 +41,7 @@ def test_add_identity(self): connection_key="connection_key", ) - saas_config = Dataset( + saas_config = GraphDataset( name="saas_config", collections=[ Collection( @@ -68,7 +68,7 @@ def test_add_identity(self): def test_add_reference(self): """Augment a SaaS dataset collection with a dataset reference""" - saas_dataset = Dataset( + saas_dataset = GraphDataset( name="saas_dataset", collections=[ Collection( @@ -81,7 +81,7 @@ def test_add_reference(self): connection_key="connection_key", ) - saas_config = Dataset( + saas_config = GraphDataset( name="saas_config", collections=[ Collection( @@ -123,7 +123,7 @@ def test_add_reference(self): @pytest.mark.unit_saas def test_add_with_object_fields(self): """Verify complex SaaS dataset fields are preserved after merging""" - saas_dataset = Dataset( + saas_dataset = GraphDataset( name="saas_dataset", collections=[ Collection( @@ -142,7 +142,7 @@ def test_add_with_object_fields(self): connection_key="connection_key", ) - saas_config = Dataset( + saas_config = GraphDataset( name="saas_config", collections=[ Collection( @@ -169,7 +169,7 @@ def test_add_with_object_fields(self): @pytest.mark.unit_saas def test_merge_same_scalar_field(self): """Merge two scalar fields between datsets with the same collection/field name""" - saas_dataset = Dataset( + saas_dataset = GraphDataset( name="saas_dataset", collections=[ Collection( @@ -182,7 +182,7 @@ def test_merge_same_scalar_field(self): connection_key="connection_key", ) - saas_config = Dataset( + saas_config = GraphDataset( name="saas_config", collections=[ Collection( @@ -212,7 +212,7 @@ def test_merge_same_scalar_field(self): @pytest.mark.unit_saas def test_merge_same_object_field(self): """Merge a scalar and object field between datsets with the same collection/field name""" - saas_dataset = Dataset( + saas_dataset = GraphDataset( name="saas_dataset", collections=[ Collection( @@ -231,7 +231,7 @@ def test_merge_same_object_field(self): connection_key="connection_key", ) - saas_config = Dataset( + saas_config = GraphDataset( name="saas_config", collections=[ Collection(