Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingestion/looker): set project-name for imported_projects views #8086

Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
63c5996
view project map for dependent project views
siddiquebagwan-gslab May 19, 2023
c2a68b4
lint fix
siddiquebagwan-gslab May 19, 2023
b131112
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan-gslab May 19, 2023
939862d
view_project_map derived from fields
siddiquebagwan-gslab May 22, 2023
d571240
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan-gslab May 22, 2023
0da21fc
ignore logging empty map
siddiquebagwan-gslab May 22, 2023
b497e55
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan May 24, 2023
94620bf
resolve merge conflict
siddiquebagwan-gslab May 26, 2023
cb308ed
Merge branch 'master+ing-74-looker-refinements-lineage' of github.com…
siddiquebagwan-gslab May 26, 2023
7008f59
lint fix
siddiquebagwan-gslab May 26, 2023
19c7cf8
Merge branch 'master+ing-74-looker-refinements-lineage' of github.com…
siddiquebagwan-gslab May 26, 2023
4737692
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan-gslab May 26, 2023
f783843
Merge branch 'master+ing-74-looker-refinements-lineage' of github.com…
siddiquebagwan-gslab May 26, 2023
72e3696
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan May 29, 2023
804076c
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan Jun 1, 2023
31110b1
Merge branch 'master' into master+ing-74-looker-refinements-lineage
siddiquebagwan Jun 2, 2023
d17247a
review comments
siddiquebagwan-gslab Jun 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 78 additions & 116 deletions metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,30 @@
from __future__ import print_function

import dataclasses
import datetime
import itertools
import logging
import re
from dataclasses import dataclass, field as dataclasses_field
from enum import Enum
from functools import lru_cache
from typing import (
TYPE_CHECKING,
ClassVar,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
Union,
)
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, Union

import pydantic
from looker_sdk.error import SDKError
from looker_sdk.sdk.api40.models import User, WriteQuery
from pydantic import Field
from looker_sdk.sdk.api40.models import LookmlModelExploreField, User, WriteQuery
from pydantic.class_validators import validator

import datahub.emitter.mce_builder as builder
from datahub.configuration import ConfigModel
from datahub.configuration.common import ConfigurationError
from datahub.configuration.source_common import DatasetSourceConfigMixin
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.emitter.mcp_builder import create_embed_mcp
from datahub.ingestion.api.report import Report
from datahub.ingestion.api.source import SourceReport
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
from datahub.ingestion.source.looker.looker_config import (
LookerCommonConfig,
LookerDashboardSourceConfig,
NamingPatternMapping,
)
from datahub.ingestion.source.looker.looker_constant import IMPORTED_PROJECTS
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
from datahub.ingestion.source.sql.sql_types import (
POSTGRES_TYPES_MAP,
Expand Down Expand Up @@ -95,103 +85,6 @@
logger = logging.getLogger(__name__)


class NamingPattern(ConfigModel):
ALLOWED_VARS: ClassVar[List[str]] = []
REQUIRE_AT_LEAST_ONE_VAR: ClassVar[bool] = True

pattern: str

@classmethod
def __get_validators__(cls):
yield cls.pydantic_accept_raw_pattern
yield cls.validate
yield cls.pydantic_validate_pattern

@classmethod
def pydantic_accept_raw_pattern(cls, v):
if isinstance(v, (NamingPattern, dict)):
return v
assert isinstance(v, str), "pattern must be a string"
return {"pattern": v}

@classmethod
def pydantic_validate_pattern(cls, v):
assert isinstance(v, NamingPattern)
assert v.validate_pattern(cls.REQUIRE_AT_LEAST_ONE_VAR)
return v

@classmethod
def allowed_docstring(cls) -> str:
return f"Allowed variables are {cls.ALLOWED_VARS}"

def validate_pattern(self, at_least_one: bool) -> bool:
variables = re.findall("({[^}{]+})", self.pattern)

variables = [v[1:-1] for v in variables] # remove the {}

for v in variables:
if v not in self.ALLOWED_VARS:
raise ConfigurationError(
f"Failed to find {v} in allowed_variables {self.ALLOWED_VARS}"
)
if at_least_one and len(variables) == 0:
raise ConfigurationError(
f"Failed to find any variable assigned to pattern {self.pattern}. Must have at least one. {self.allowed_docstring()}"
)
return True

def replace_variables(self, values: Union[Dict[str, Optional[str]], object]) -> str:
if not isinstance(values, dict):
# Check that this is a dataclass instance (not a dataclass type).
assert dataclasses.is_dataclass(values) and not isinstance(values, type)
values = dataclasses.asdict(values)
values = {k: v for k, v in values.items() if v is not None}
return self.pattern.format(**values)


@dataclass
class NamingPatternMapping:
platform: str
env: str
project: str
model: str
name: str


class LookerNamingPattern(NamingPattern):
ALLOWED_VARS = [field.name for field in dataclasses.fields(NamingPatternMapping)]


class LookerCommonConfig(DatasetSourceConfigMixin):
explore_naming_pattern: LookerNamingPattern = pydantic.Field(
description=f"Pattern for providing dataset names to explores. {LookerNamingPattern.allowed_docstring()}",
default=LookerNamingPattern(pattern="{model}.explore.{name}"),
)
explore_browse_pattern: LookerNamingPattern = pydantic.Field(
description=f"Pattern for providing browse paths to explores. {LookerNamingPattern.allowed_docstring()}",
default=LookerNamingPattern(pattern="/{env}/{platform}/{project}/explores"),
)
view_naming_pattern: LookerNamingPattern = Field(
LookerNamingPattern(pattern="{project}.view.{name}"),
description=f"Pattern for providing dataset names to views. {LookerNamingPattern.allowed_docstring()}",
)
view_browse_pattern: LookerNamingPattern = Field(
LookerNamingPattern(pattern="/{env}/{platform}/{project}/views"),
description=f"Pattern for providing browse paths to views. {LookerNamingPattern.allowed_docstring()}",
)
tag_measures_and_dimensions: bool = Field(
True,
description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more discoverable. When disabled, adds this information to the description of the column.",
)
platform_name: str = Field(
"looker", description="Default platform name. Don't change."
)
extract_column_level_lineage: bool = Field(
True,
description="When enabled, extracts column-level lineage from Views and Explores",
)


@dataclass
class LookerViewId:
project_name: str
Expand Down Expand Up @@ -246,10 +139,25 @@ class ViewField:
type: str
description: str
field_type: ViewFieldType
project_name: Optional[str] = None
view_name: Optional[str] = None
is_primary_key: bool = False
upstream_fields: List[str] = dataclasses_field(default_factory=list)


def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]:
"""
Each view in a model has unique name.
Use this function in scope of a model.
"""
view_project_map: Dict[str, str] = {}
for view_field in view_fields:
if view_field.view_name is not None and view_field.project_name is not None:
view_project_map[view_field.view_name] = view_field.project_name
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this break if you have the same view name in two projects?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

view within a model has a unique name, so it wouldn't break as it is getting used by LookerExplore.from_api.
I added the respective doc string in the function.


return view_project_map


class LookerUtil:
field_type_mapping = {
**POSTGRES_TYPES_MAP,
Expand Down Expand Up @@ -326,6 +234,37 @@ def _extract_view_from_field(field: str) -> str:
), f"Error: A field must be prefixed by a view name, field is: {field}"
return field.split(".")[0]

@staticmethod
def extract_view_name_from_lookml_model_explore_field(
field: LookmlModelExploreField,
) -> Optional[str]:
"""
View name is either present in original_view or view property
"""
if field.original_view is not None:
return field.original_view

return field.view

@staticmethod
def extract_project_name_from_source_file(
source_file: Optional[str],
) -> Optional[str]:
"""
source_file is a key inside explore.fields. This key point to relative path of included views.
if view is included from another project then source_file is starts with "imported_projects".
Example: imported_projects/datahub-demo/views/datahub-demo/datasets/faa_flights.view.lkml
"""
if source_file is None:
return None

if source_file.startswith(IMPORTED_PROJECTS):
tokens: List[str] = source_file.split("/")
if len(tokens) >= 2:
return tokens[1] # second index is project-name

return None

@staticmethod
def _get_field_type(
native_type: str, reporter: SourceReport
Expand Down Expand Up @@ -543,6 +482,7 @@ def from_dict(
view_names: Set[str] = set()
joins = None
assert "name" in dict, "Explore doesn't have a name field, this isn't allowed"

# The view name that the explore refers to is resolved in the following order of priority:
# 1. view_name: https://cloud.google.com/looker/docs/reference/param-explore-view-name
# 2. from: https://cloud.google.com/looker/docs/reference/param-explore-from
Expand Down Expand Up @@ -624,6 +564,7 @@ def from_api( # noqa: C901
explore_name: str,
client: LookerAPI,
reporter: SourceReport,
source_config: LookerDashboardSourceConfig,
) -> Optional["LookerExplore"]: # noqa: C901
from datahub.ingestion.source.looker.lookml_source import _BASE_PROJECT_NAME

Expand Down Expand Up @@ -696,6 +637,12 @@ def from_api( # noqa: C901
field_type=ViewFieldType.DIMENSION_GROUP
if dim_field.dimension_group is not None
else ViewFieldType.DIMENSION,
project_name=LookerUtil.extract_project_name_from_source_file(
dim_field.source_file
),
view_name=LookerUtil.extract_view_name_from_lookml_model_explore_field(
dim_field
),
is_primary_key=dim_field.primary_key
if dim_field.primary_key
else False,
Expand All @@ -718,13 +665,23 @@ def from_api( # noqa: C901
if measure_field.type is not None
else "",
field_type=ViewFieldType.MEASURE,
project_name=LookerUtil.extract_project_name_from_source_file(
measure_field.source_file
),
view_name=LookerUtil.extract_view_name_from_lookml_model_explore_field(
dim_field
),
is_primary_key=measure_field.primary_key
if measure_field.primary_key
else False,
upstream_fields=[measure_field.name],
)
)

view_project_map: Dict[str, str] = create_view_project_map(view_fields)
if view_project_map:
logger.debug(f"views and their projects: {view_project_map}")

return cls(
name=explore_name,
model_name=model,
Expand All @@ -734,7 +691,9 @@ def from_api( # noqa: C901
fields=view_fields,
upstream_views=list(
ProjectInclude(
project=_BASE_PROJECT_NAME,
project=_BASE_PROJECT_NAME
if view_name not in view_project_map
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can change this to view_project_map.get(view_name, _BASE_PROJECT_NAME)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

else view_project_map[view_name],
include=view_name,
)
for view_name in views
Expand Down Expand Up @@ -916,9 +875,11 @@ def __init__(
self,
looker_api: LookerAPI,
report: SourceReport,
source_config: LookerDashboardSourceConfig,
):
self.client = looker_api
self.report = report
self.source_config = source_config

@lru_cache()
def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]:
Expand All @@ -927,6 +888,7 @@ def get_explore(self, model: str, explore: str) -> Optional[LookerExplore]:
explore,
self.client,
self.report,
self.source_config,
)
return looker_explore

Expand Down
Loading