Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingest): Ingest Previews for Looker Charts, Dashboards, and Explores #6941

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
BrowsePathsClass,
ChangeTypeClass,
DatasetPropertiesClass,
EmbedClass,
EnumTypeClass,
FineGrainedLineageClass,
GlobalTagsClass,
Expand Down Expand Up @@ -163,12 +164,10 @@ class LookerCommonConfig(DatasetSourceConfigBase):
description=f"Pattern for providing dataset names to explores. {LookerNamingPattern.allowed_docstring()}",
default=LookerNamingPattern(pattern="{model}.explore.{name}"),
)

explore_browse_pattern: LookerNamingPattern = pydantic.Field(
description=f"Pattern for providing browse paths to explores. {LookerNamingPattern.allowed_docstring()}",
default=LookerNamingPattern(pattern="/{env}/{platform}/{project}/explores"),
)

view_naming_pattern: LookerNamingPattern = Field(
LookerNamingPattern(pattern="{project}.view.{name}"),
description=f"Pattern for providing dataset names to views. {LookerNamingPattern.allowed_docstring()}",
Expand All @@ -177,7 +176,6 @@ class LookerCommonConfig(DatasetSourceConfigBase):
LookerNamingPattern(pattern="/{env}/{platform}/{project}/views"),
description=f"Pattern for providing browse paths to views. {LookerNamingPattern.allowed_docstring()}",
)

tag_measures_and_dimensions: bool = Field(
True,
description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.",
Expand All @@ -189,6 +187,10 @@ class LookerCommonConfig(DatasetSourceConfigBase):
True,
description="When enabled, extracts column-level lineage from Views and Explores",
)
embed_urls_enabled: bool = Field(
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
True,
description="Produce URLs used to render Looker Explores as Previews inside of DataHub UI. Embeds must be enabled inside of Looker to use this feature.",
)


@dataclass
Expand Down Expand Up @@ -762,6 +764,25 @@ def _get_url(self, base_url):
base_url = m[1]
return f"{base_url}/explore/{self.model_name}/{self.name}"

def _get_embed_url(self, base_url: str) -> str:
# If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
m = re.match("^(.*):([0-9]+)$", base_url)
if m is not None:
base_url = m[1]
return f"{base_url}/embed/explore/{self.model_name}/{self.name}"

def _create_embed_mcp(
self, urn: str, base_url: str
) -> MetadataChangeProposalWrapper:
explore_embed_url = self._get_embed_url(base_url)
return MetadataChangeProposalWrapper(
entityType="dataset",
changeType=ChangeTypeClass.UPSERT,
entityUrn=urn,
aspectName="embed",
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
aspect=EmbedClass(renderUrl=explore_embed_url),
)

def _to_metadata_events( # noqa: C901
self, config: LookerCommonConfig, reporter: SourceReport, base_url: str
) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
Expand Down Expand Up @@ -862,7 +883,17 @@ def _to_metadata_events( # noqa: C901
aspect=SubTypesClass(typeNames=["explore"]),
)

return [mce, mcp]
proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
mce,
mcp,
]

# If extracting embeds is enabled, produce an MCP for embed URL.
if config.embed_urls_enabled:
embed_mcp = self._create_embed_mcp(dataset_snapshot.urn, base_url)
proposals.append(embed_mcp)

return proposals


class LookerExploreRegistry:
Expand Down Expand Up @@ -1057,6 +1088,18 @@ def url(self, base_url: str) -> str:
else:
return f"{base_url}/x/{self.query_slug}"

def embed_url(self, base_url: str) -> Optional[str]:
# A dashboard element can use a look or just a raw query against an explore
# If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
m = re.match("^(.*):([0-9]+)$", base_url)
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
if m is not None:
base_url = m[1]
if self.look_id is not None:
return f"{base_url}/embed/looks/{self.look_id}"
else:
# No embeddable URL
return None

def get_urn_element_id(self):
# A dashboard element can use a look or just a raw query against an explore
return f"dashboard_elements.{self.id}"
Expand Down Expand Up @@ -1101,6 +1144,13 @@ def url(self, base_url):
base_url = m[1]
return f"{base_url}/dashboards/{self.id}"

def embed_url(self, base_url: str) -> str:
# If the base_url contains a port number (like https://company.looker.com:19999) remove the port number
m = re.match("^(.*):([0-9]+)$", base_url)
if m is not None:
base_url = m[1]
return f"{base_url}/embed/dashboards/{self.id}"

def get_urn_dashboard_id(self):
return get_urn_looker_dashboard_id(self.id)

Expand Down
160 changes: 120 additions & 40 deletions metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
ChartInfoClass,
ChartTypeClass,
DashboardInfoClass,
EmbedClass,
InputFieldClass,
InputFieldsClass,
OwnerClass,
Expand Down Expand Up @@ -649,9 +650,20 @@ def _get_chart_type(

return chart_type

def _make_chart_mce(
def _create_embed_mcp(
self, urn: str, embed_url: str
) -> MetadataChangeProposalWrapper:
return MetadataChangeProposalWrapper(
entityType=urn.split(":")[2],
changeType=ChangeTypeClass.UPSERT,
entityUrn=urn,
aspectName="embed",
aspect=EmbedClass(renderUrl=embed_url),
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
)

def _make_chart_metadata_events(
self, dashboard_element: LookerDashboardElement, dashboard: LookerDashboard
) -> MetadataChangeEvent:
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
chart_urn = builder.make_chart_urn(
self.source_config.platform_name, dashboard_element.get_urn_element_id()
)
Expand Down Expand Up @@ -683,7 +695,81 @@ def _make_chart_mce(
if ownership is not None:
chart_snapshot.aspects.append(ownership)

return MetadataChangeEvent(proposedSnapshot=chart_snapshot)
chart_mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)

proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
chart_mce
]

# If extracting embeds is enabled, produce an MCP for embed URL.
if (
self.source_config.embed_urls_enabled
and self.source_config.external_base_url
):
maybe_embed_url = dashboard_element.embed_url(
self.source_config.external_base_url
)
if maybe_embed_url:
proposals.append(
self._create_embed_mcp(
chart_snapshot.urn,
maybe_embed_url,
)
)

return proposals

def _make_dashboard_metadata_events(
self, looker_dashboard: LookerDashboard, chart_urns: List[str]
) -> List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
dashboard_urn = builder.make_dashboard_urn(
self.source_config.platform_name, looker_dashboard.get_urn_dashboard_id()
)
dashboard_snapshot = DashboardSnapshot(
urn=dashboard_urn,
aspects=[],
)

dashboard_info = DashboardInfoClass(
description=looker_dashboard.description or "",
title=looker_dashboard.title,
charts=chart_urns,
lastModified=self._get_change_audit_stamps(looker_dashboard),
dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
)

dashboard_snapshot.aspects.append(dashboard_info)
if looker_dashboard.folder_path is not None:
browse_path = BrowsePathsClass(
paths=[f"/looker/{looker_dashboard.folder_path}"]
)
dashboard_snapshot.aspects.append(browse_path)

ownership = self.get_ownership(looker_dashboard)
if ownership is not None:
dashboard_snapshot.aspects.append(ownership)

dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))

dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)

proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
dashboard_mce
]

# If extracting embeds is enabled, produce an MCP for embed URL.
if (
self.source_config.embed_urls_enabled
and self.source_config.external_base_url
):
proposals.append(
self._create_embed_mcp(
dashboard_snapshot.urn,
looker_dashboard.embed_url(self.source_config.external_base_url),
)
)

return proposals

def _make_explore_metadata_events(
self,
Expand Down Expand Up @@ -732,48 +818,42 @@ def fetch_one_explore(

return events, f"{model}:{explore}", start_time, datetime.datetime.now()

def _extract_event_urn(
self, event: Union[MetadataChangeEvent, MetadataChangeProposalWrapper]
) -> Optional[str]:
if isinstance(event, MetadataChangeEvent):
return event.proposedSnapshot.urn
else:
return event.entityUrn

def _make_dashboard_and_chart_mces(
self, looker_dashboard: LookerDashboard
) -> Iterable[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]:
chart_mces = [
self._make_chart_mce(element, looker_dashboard)
for element in looker_dashboard.dashboard_elements
if element.type == "vis"
]
for chart_mce in chart_mces:
yield chart_mce

dashboard_urn = builder.make_dashboard_urn(
self.source_config.platform_name, looker_dashboard.get_urn_dashboard_id()
)
dashboard_snapshot = DashboardSnapshot(
urn=dashboard_urn,
aspects=[],
)

dashboard_info = DashboardInfoClass(
description=looker_dashboard.description or "",
title=looker_dashboard.title,
charts=[mce.proposedSnapshot.urn for mce in chart_mces],
lastModified=self._get_change_audit_stamps(looker_dashboard),
dashboardUrl=looker_dashboard.url(self.source_config.external_base_url),
# Step 1: Emit metadata for each Chart inside the Dashboard.
chart_events = []
for element in looker_dashboard.dashboard_elements:
if element.type == "vis":
chart_events.extend(
self._make_chart_metadata_events(element, looker_dashboard)
)
for chart_event in chart_events:
yield chart_event
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved

# Step 2: Emit metadata events for the Dashboard itself.
chart_urns: Set[
str
] = set() # Collect the unique child chart urns for dashboard input lineage.
for chart_event in chart_events:
chart_event_urn = self._extract_event_urn(chart_event)
if chart_event_urn:
chart_urns.add(chart_event_urn)

dashboard_events = self._make_dashboard_metadata_events(
looker_dashboard, list(chart_urns)
)

dashboard_snapshot.aspects.append(dashboard_info)
if looker_dashboard.folder_path is not None:
browse_path = BrowsePathsClass(
paths=[f"/looker/{looker_dashboard.folder_path}"]
)
dashboard_snapshot.aspects.append(browse_path)

ownership = self.get_ownership(looker_dashboard)
if ownership is not None:
dashboard_snapshot.aspects.append(ownership)

dashboard_snapshot.aspects.append(Status(removed=looker_dashboard.is_deleted))

dashboard_mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot)
yield dashboard_mce
for dashboard_event in dashboard_events:
yield dashboard_event

def get_ownership(
self, looker_dashboard: LookerDashboard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@
"runId": "looker-test"
}
},
{
"entityType": "dashboard",
"entityUrn": "urn:li:dashboard:(looker,dashboards.11)",
"changeType": "UPSERT",
"aspectName": "embed",
"aspect": {
"value": "{\"renderUrl\": \"https://looker.company.com/embed/dashboards/11\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test"
}
},
{
"entityType": "chart",
"entityUrn": "urn:li:chart:(looker,dashboard_elements.2)",
Expand Down Expand Up @@ -189,6 +203,20 @@
"runId": "looker-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "embed",
"aspect": {
"value": "{\"renderUrl\": \"https://looker.company.com/embed/explore/data/my_view\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
Expand Down Expand Up @@ -299,6 +327,20 @@
"runId": "looker-test"
}
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,bogus data.explore.my_view,PROD)",
"changeType": "UPSERT",
"aspectName": "embed",
"aspect": {
"value": "{\"renderUrl\": \"https://looker.company.com/embed/explore/bogus data/my_view\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "looker-test"
}
},
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
Expand Down
Loading