From aa4fc7b1e056f6c527f59ee7d9e8e9f0b05b9e0d Mon Sep 17 00:00:00 2001 From: Robert Keyser <39230492+RobertKeyser@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:42:11 -0500 Subject: [PATCH] Oracle Responsys: Include Profile Extension Tables in DSRs (#4937) --- data/saas/config/oracle_responsys_config.yml | 26 ++- .../saas/dataset/oracle_responsys_dataset.yml | 14 ++ .../oracle_responsys_request_overrides.py | 198 ++++++++++++++++-- .../saas/oracle_responsys_fixtures.py | 13 +- .../saas/test_oracle_responsys_task.py | 4 + 5 files changed, 229 insertions(+), 26 deletions(-) diff --git a/data/saas/config/oracle_responsys_config.yml b/data/saas/config/oracle_responsys_config.yml index e02371a9ef..7660ed9780 100644 --- a/data/saas/config/oracle_responsys_config.yml +++ b/data/saas/config/oracle_responsys_config.yml @@ -3,7 +3,7 @@ saas_config: name: Oracle Responsys type: oracle_responsys description: A sample schema representing the Oracle Responsys connector for Fides - version: 0.0.1 + version: 0.0.2 connector_params: - name: domain @@ -13,7 +13,14 @@ saas_config: - name: password description: Your Oracle Responsys password sensitive: True - + - name: profile_lists + label: Profile Lists + description: A comma-separated list of profile lists, e.g. list_1,list_2,list_3. If value is "all", Fides will execute the DSR against all profile lists. + default_value: "all" + - name: profile_extensions + label: Profile Extensions + description: A comma-separated list of profile extensions in the format of ., e.g. list_1.extension_a,list_2.extension_b. If value is "all", Fides will execute the DSR against all profile extensions. If profile lists are restricted, profile extensions will be limited to those belonging to the specified profile lists. + default_value: "all" client_config: protocol: https host: @@ -75,3 +82,18 @@ saas_config: - dataset: field: profile_list_recipient.riid direction: from + - name: profile_extension_recipient + requests: + read: + - request_override: oracle_responsys_profile_extension_recipients_read + param_values: + - name: profile_list_id + references: + - dataset: + field: profile_list_recipient.profile_list_id + direction: from + - name: responsys_id + references: + - dataset: + field: profile_list_recipient.riid + direction: from diff --git a/data/saas/dataset/oracle_responsys_dataset.yml b/data/saas/dataset/oracle_responsys_dataset.yml index b1e889de00..0cb2403b0f 100644 --- a/data/saas/dataset/oracle_responsys_dataset.yml +++ b/data/saas/dataset/oracle_responsys_dataset.yml @@ -110,3 +110,17 @@ dataset: data_categories: [system.operations] fidesops_meta: data_type: string + - name: profile_extension_recipient + fields: + - name: profile_extension_id + data_categories: [system.operations] + fidesops_meta: + data_type: string + - name: riid + data_categories: [user.unique_id] + fidesops_meta: + data_type: string + - name: user_data + data_categories: [user.content] + fidesops_meta: + data_type: string diff --git a/src/fides/api/service/saas_request/override_implementations/oracle_responsys_request_overrides.py b/src/fides/api/service/saas_request/override_implementations/oracle_responsys_request_overrides.py index 4bab4bbd89..e9d5e8dd8a 100644 --- a/src/fides/api/service/saas_request/override_implementations/oracle_responsys_request_overrides.py +++ b/src/fides/api/service/saas_request/override_implementations/oracle_responsys_request_overrides.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List import pydash +from requests import Response from fides.api.common_exceptions import FidesopsException from fides.api.graph.execution import ExecutionNode @@ -17,19 +18,50 @@ from fides.api.util.saas_util import get_identity -@register("oracle_responsys_profile_list_recipients_read", [SaaSRequestType.READ]) -def oracle_responsys_profile_list_recipients_read( - client: AuthenticatedClient, - node: ExecutionNode, - policy: Policy, - privacy_request: PrivacyRequest, - input_data: Dict[str, List[Any]], - secrets: Dict[str, Any], -) -> List[Row]: +def oracle_responsys_config_parse_profile_lists(list_restrictions: str) -> List[str]: """ - Retrieve data from each profile list. + Parses the list of profile lists entered as part of the connector params from comma-delimited values. Special value "all" indicates that all profile lists are in-scope. + """ + profile_lists = [] + + if list_restrictions != "all": + profile_lists = list_restrictions.split(",") + + return profile_lists + + +def oracle_responsys_config_parse_profile_extensions( + extension_restrictions: str, +) -> Dict[str, List[str]]: + """ + Parses the list of profile extensions entered as part of the connector params from comma-delimited values. Profile extensions are expected to be in the format of `.`. Special value "all" indicates that all profile extensions are in-scope. + """ + unparsed_profile_extensions = [] + profile_extensions: Dict[str, List[str]] = {} + + if extension_restrictions != "all": + unparsed_profile_extensions = extension_restrictions.split(",") + for extension in unparsed_profile_extensions: + ext = extension.split(".") + if len(ext) > 2: + raise FidesopsException( + "Profile extension could not be parsed, more than one '.' found." + ) + if len(ext) < 2: + raise FidesopsException( + "Profile extension could not be parsed, '.' not found." + ) + if ext[0] in profile_extensions: + profile_extensions[ext[0]].append(ext[1]) + else: + profile_extensions[ext[0]] = [ext[1]] + + return profile_extensions - The members endpoint returns data in two separate arrays: one for the keys and one for the values for each result. + +def oracle_responsys_serialize_record_data(response: Response) -> List[Dict[Any, Any]]: + """ + Serializes response data from two separate arrays: one for the keys and one for the values for each result, returning a list of dicts. { "recordData": { "fieldNames": [ @@ -42,9 +74,68 @@ def oracle_responsys_profile_list_recipients_read( ] } """ - list_ids = input_data.get("profile_list_id", []) + response_data = pydash.get(response.json(), "recordData") + serialized_data = [] + if response_data: + normalized_field_names = [ + field.lower().rstrip("_") for field in response_data["fieldNames"] + ] + serialized_data = [ + dict(zip(normalized_field_names, records)) + for records in response_data["records"] + ] + return serialized_data + + +def oracle_responsys_get_profile_extensions( + client: AuthenticatedClient, list_ids: List[str] +) -> Dict[str, List[str]]: + """ + Retrieves a list of profile_extensions for each profile_list, returned as a dict. + """ + results = {} + + for list_id in list_ids: + list_extensions_response = client.send( + SaaSRequestParams( + method=HTTPMethod.GET, + path=f"/rest/api/v1.3/lists/{list_id}/listExtensions", + ) + ) + profile_extension_names = pydash.map_( + list_extensions_response.json(), "profileExtension.objectName" + ) + results[list_id] = profile_extension_names + return results + + +@register("oracle_responsys_profile_list_recipients_read", [SaaSRequestType.READ]) +def oracle_responsys_profile_list_recipients_read( + client: AuthenticatedClient, + node: ExecutionNode, + policy: Policy, + privacy_request: PrivacyRequest, + input_data: Dict[str, List[Any]], + secrets: Dict[str, Any], +) -> List[Row]: + """ + Retrieve data from each profile list. + """ results = [] + list_ids_from_api = input_data.get("profile_list_id", []) + list_ids_from_config_str = secrets["profile_lists"] + + if list_ids_from_config_str != "all": + list_ids_from_config = list_ids_from_config_str.split(",") + # Because Fides will ignore 404s, make sure lists exist, so 404s will only come from the recipient not being found. + for list_id in list_ids_from_config: + if list_id not in list_ids_from_api: + raise FidesopsException("Profile list not found.") + list_ids = list_ids_from_config + else: + list_ids = list_ids_from_api + identity = get_identity(privacy_request) if identity == "email": query_ids = input_data.get("email", []) @@ -72,16 +163,8 @@ def oracle_responsys_profile_list_recipients_read( ), [404], # Returns a 404 if no list member is found ) - response_data = pydash.get(members_response.json(), "recordData") - if response_data: - normalized_field_names = [ - field.lower().rstrip("_") for field in response_data["fieldNames"] - ] - serialized_data = [ - dict(zip(normalized_field_names, records)) - for records in response_data["records"] - ] - + serialized_data = oracle_responsys_serialize_record_data(members_response) + if serialized_data: for record in serialized_data: # Filter out the keys with falsy values and append it filtered_records = { @@ -89,7 +172,78 @@ def oracle_responsys_profile_list_recipients_read( } filtered_records["profile_list_id"] = list_id results.append(filtered_records) + return results + +@register("oracle_responsys_profile_extension_recipients_read", [SaaSRequestType.READ]) +def oracle_responsys_profile_extension_recipients_read( + client: AuthenticatedClient, + node: ExecutionNode, + policy: Policy, + privacy_request: PrivacyRequest, + input_data: Dict[str, List[Any]], + secrets: Dict[str, Any], +) -> List[Row]: + """ + Retrieve a list of profile extension tables and returns the data from each profile extension table for the RIIDs. + """ + list_ids = input_data.get("profile_list_id", []) + riids = input_data.get("responsys_id", []) + + results = [] + extensions: Dict[str, List[str]] = {} + + # If config sets the list of extensions, then use it. Otherwise, all extensions are in scope. + extensions_from_config = oracle_responsys_config_parse_profile_extensions( + secrets["profile_extensions"] + ) + extensions_from_api = oracle_responsys_get_profile_extensions(client, list_ids) + if extensions_from_config: + # Because Fides will ignore 404s, make sure lists/extensions exist, so 404s will only come from the recipient not being found. + for key, value in extensions_from_config.items(): + if key not in list_ids: + raise FidesopsException( + "Profile extension does not belong to a valid profile list." + ) + for profile_extension in value: + if profile_extension not in extensions_from_api[key]: + raise FidesopsException("Profile extension not found.") + extensions = extensions_from_config + else: + extensions = extensions_from_api + + body = { + "fieldList": ["all"], + "ids": riids, + "queryAttribute": "r", + } # queryAttribute 'r' represents RIID + + for key, value in extensions.items(): + for profile_extension in value: + list_extensions_response = client.send( + SaaSRequestParams( + method=HTTPMethod.POST, + path=f"/rest/api/v1.3/lists/{key}/listExtensions/{profile_extension}/members", + query_params={"action": "get"}, + body=json.dumps(body), + headers={"Content-Type": "application/json"}, + ), + [404], + ) + + serialized_data = oracle_responsys_serialize_record_data( + list_extensions_response + ) + + for record in serialized_data: + results.append( + { + "profile_extension_id": profile_extension, + "riid": record.pop("riid", None), + # PETs schemas are fully dynamic, o we need to treat the record as a JSON string in order to treat it as user data. + "user_data": json.dumps(record), + } + ) return results diff --git a/tests/fixtures/saas/oracle_responsys_fixtures.py b/tests/fixtures/saas/oracle_responsys_fixtures.py index 74efb5ad90..88d91105b8 100644 --- a/tests/fixtures/saas/oracle_responsys_fixtures.py +++ b/tests/fixtures/saas/oracle_responsys_fixtures.py @@ -24,6 +24,12 @@ def oracle_responsys_secrets(saas_config) -> Dict[str, Any]: or secrets["username"], "password": pydash.get(saas_config, "oracle_responsys.password") or secrets["password"], + "profile_lists": pydash.get(saas_config, "oracle_responsys.profile_lists") + or secrets["profile_lists"], + "profile_extensions": pydash.get( + saas_config, "oracle_responsys.profile_extensions" + ) + or secrets["profile_extensions"], "test_list": pydash.get(saas_config, "oracle_responsys.test_list") or secrets["test_list"], } @@ -78,7 +84,8 @@ def oracle_responsys_erasure_data( oracle_responsys_secrets, ) -> Generator: """ - Creates a dynamic test data record for erasure tests. + Creates a dynamic test data record for profile_list_recipient for erasure tests. + A profile_extension_recipient is not created, because they take a while to be queryable after being created. Yields RIID as this may be useful to have in test scenarios """ base_url = f"https://{oracle_responsys_secrets['domain']}" @@ -91,7 +98,9 @@ def oracle_responsys_erasure_data( "records": [ [ oracle_responsys_erasure_identity_email, - oracle_responsys_erasure_identity_phone_number, + oracle_responsys_erasure_identity_phone_number[ + 1: + ], # Omit the + prefix ] ], "mapTemplateName": None, diff --git a/tests/ops/integration_tests/saas/test_oracle_responsys_task.py b/tests/ops/integration_tests/saas/test_oracle_responsys_task.py index b1db9b6484..f812b4e7f1 100644 --- a/tests/ops/integration_tests/saas/test_oracle_responsys_task.py +++ b/tests/ops/integration_tests/saas/test_oracle_responsys_task.py @@ -86,6 +86,8 @@ async def test_non_strict_erasure_request_by_email( assert erasure_results == { "oracle_responsys_instance:profile_list_recipient": 1, "oracle_responsys_instance:profile_list": 0, + "oracle_responsys_instance:profile_extension": 0, + "oracle_responsys_instance:profile_extension_recipient": 0, } @pytest.mark.parametrize( @@ -115,4 +117,6 @@ async def test_non_strict_erasure_request_by_phone_number( assert erasure_results == { "oracle_responsys_instance:profile_list_recipient": 1, "oracle_responsys_instance:profile_list": 0, + "oracle_responsys_instance:profile_extension": 0, + "oracle_responsys_instance:profile_extension_recipient": 0, }