Skip to content

Commit

Permalink
fix(ingest/datasettabledescription): Escape special characters for da…
Browse files Browse the repository at this point in the history
…taset table descriptions for BQ
  • Loading branch information
AvaniSiddhapuraAPT committed Feb 29, 2024
1 parent ed10a8d commit 09e101f
Showing 1 changed file with 21 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,9 @@ def gen_dataset_workunits(

dataset_properties = DatasetProperties(
name=datahub_dataset_name.get_table_display_name(),
description=table.comment,
description=self.unquote_and_decode_unicode_escape_seq(table.comment)
if table.comment
else "",
qualifiedName=str(datahub_dataset_name),
created=(
TimeStamp(time=int(table.created.timestamp() * 1000))
Expand Down Expand Up @@ -1381,3 +1383,21 @@ def add_config_to_report(self):
self.config.start_time,
self.config.end_time,
)

def unquote_and_decode_unicode_escape_seq(
self,
string: str,
leading_quote: str = '"',
trailing_quote: Optional[str] = None,
) -> str:
"""
If string starts and ends with a quote, unquote it and decode Unicode escape sequences
"""
trailing_quote = trailing_quote if trailing_quote else leading_quote

if string.startswith(leading_quote) and string.endswith(trailing_quote):
string = string[1:-1]

cleaned_string = string.encode().decode("unicode-escape")

return cleaned_string

0 comments on commit 09e101f

Please sign in to comment.