-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(ingestion): business-glossary - Add values and relatedTerms support #6148
Changes from 17 commits
a17802c
8ee099d
6779aaf
b7db702
0bb731b
19a5ad7
6451471
0a59c60
edf48b7
c8487e1
2de3f2d
5145f43
58888f5
55c5ace
656473c
9f4d739
3922c99
bd5a892
1eeeb6e
b566a92
6fb85ea
d8529d2
8d96b69
2a36fb5
781ca05
3c404ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,3 +59,50 @@ nodes: | |
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Balance" | ||
- name: House | ||
description: Provides terms related to the house construction | ||
owners: | ||
groups: | ||
- interior | ||
terms: | ||
- name: Red | ||
description: red color | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
|
||
- name: Green | ||
description: green color | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
|
||
- name: Pink | ||
description: pink color | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
|
||
- name: WindowColor | ||
description: Supported window colors | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
has_value: | ||
- House.Red | ||
- House.Pink | ||
|
||
- name: Kitchen | ||
description: a room or area where food is prepared and cooked. | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
|
||
- name: Spoon | ||
description: an implement consisting of a small, shallow oval or round bowl on a long handle, used for eating, stirring, and serving food. | ||
term_source: "EXTERNAL" | ||
source_ref: FIBO | ||
source_url: "https://spec.edmcouncil.org/fibo/ontology/FBC/ProductsAndServices/ClientsAndAccounts/Account" | ||
is_related_to: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. call this key There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
- House.Kitchen | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,12 @@ | |
import datahub.metadata.schema_classes as models | ||
from datahub.configuration.common import ConfigModel | ||
from datahub.configuration.config_loader import load_config_file | ||
from datahub.emitter.mce_builder import get_sys_time, make_group_urn, make_user_urn | ||
from datahub.emitter.mce_builder import ( | ||
datahub_guid, | ||
get_sys_time, | ||
make_group_urn, | ||
make_user_urn, | ||
) | ||
from datahub.ingestion.api.decorators import ( # SourceCapability,; capability, | ||
SupportStatus, | ||
config_class, | ||
|
@@ -20,7 +25,6 @@ | |
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
valid_status: models.StatusClass = models.StatusClass(removed=False) | ||
auditStamp = models.AuditStampClass( | ||
time=get_sys_time(), actor="urn:li:corpUser:restEmitter" | ||
|
@@ -41,6 +45,8 @@ class GlossaryTermConfig(ConfigModel): | |
owners: Optional[Owners] | ||
inherits: Optional[List[str]] | ||
contains: Optional[List[str]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's add an additional Optional str field called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
has_value: Optional[List[str]] | ||
is_related_to: Optional[List[str]] | ||
custom_properties: Optional[Dict[str, str]] | ||
|
||
|
||
|
@@ -66,6 +72,8 @@ class DefaultConfig(ConfigModel): | |
|
||
class BusinessGlossarySourceConfig(ConfigModel): | ||
file: str = Field(description="Path to business glossary file to ingest.") | ||
enable_datahub_guid: bool = Field( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggest renaming this field to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
description="Generate DataHub guid from glossary node/term name for glossary urn.", default=False) | ||
|
||
|
||
class BusinessGlossaryConfig(DefaultConfig): | ||
|
@@ -77,14 +85,22 @@ class BusinessGlossaryConfig(DefaultConfig): | |
def version_must_be_1(cls, v): | ||
if v != "1": | ||
raise ValueError("Only version 1 is supported") | ||
return v | ||
|
||
|
||
def create_id(path: List[str], enable_datahub_guid: bool) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should additionally take in the Optional There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
id_: str = ".".join(path) | ||
if enable_datahub_guid: | ||
id_ = datahub_guid({"path": id_}) | ||
return id_ | ||
|
||
|
||
def make_glossary_node_urn(path: List[str]) -> str: | ||
return "urn:li:glossaryNode:" + ".".join(path) | ||
def make_glossary_node_urn(path: List[str], enable_datahub_guid: bool) -> str: | ||
return "urn:li:glossaryNode:" + create_id(path, enable_datahub_guid) | ||
|
||
|
||
def make_glossary_term_urn(path: List[str]) -> str: | ||
return "urn:li:glossaryTerm:" + ".".join(path) | ||
def make_glossary_term_urn(path: List[str], enable_datahub_guid: bool) -> str: | ||
return "urn:li:glossaryTerm:" + create_id(path, enable_datahub_guid) | ||
|
||
|
||
def get_owners(owners: Owners) -> models.OwnershipClass: | ||
|
@@ -109,7 +125,7 @@ def get_owners(owners: Owners) -> models.OwnershipClass: | |
|
||
|
||
def get_mces( | ||
glossary: BusinessGlossaryConfig, | ||
glossary: BusinessGlossaryConfig, ingestion_config: BusinessGlossarySourceConfig | ||
) -> List[models.MetadataChangeEventClass]: | ||
events: List[models.MetadataChangeEventClass] = [] | ||
path: List[str] = [] | ||
|
@@ -123,6 +139,7 @@ def get_mces( | |
parentNode=None, | ||
parentOwners=root_owners, | ||
defaults=glossary, | ||
ingestion_config=ingestion_config, | ||
) | ||
|
||
if glossary.terms: | ||
|
@@ -133,6 +150,7 @@ def get_mces( | |
parentNode=None, | ||
parentOwnership=root_owners, | ||
defaults=glossary, | ||
ingestion_config=ingestion_config, | ||
) | ||
|
||
return events | ||
|
@@ -143,16 +161,18 @@ def get_mce_from_snapshot(snapshot: Any) -> models.MetadataChangeEventClass: | |
|
||
|
||
def get_mces_from_node( | ||
glossaryNode: GlossaryNodeConfig, | ||
path: List[str], | ||
parentNode: Optional[str], | ||
parentOwners: models.OwnershipClass, | ||
defaults: DefaultConfig, | ||
glossaryNode: GlossaryNodeConfig, | ||
path: List[str], | ||
parentNode: Optional[str], | ||
parentOwners: models.OwnershipClass, | ||
defaults: DefaultConfig, | ||
ingestion_config: BusinessGlossarySourceConfig, | ||
) -> List[models.MetadataChangeEventClass]: | ||
node_urn = make_glossary_node_urn(path) | ||
node_urn = make_glossary_node_urn(path, ingestion_config.enable_datahub_guid) | ||
node_info = models.GlossaryNodeInfoClass( | ||
definition=glossaryNode.description, | ||
parentNode=parentNode, | ||
name=glossaryNode.name, | ||
) | ||
node_owners = parentOwners | ||
if glossaryNode.owners is not None: | ||
|
@@ -172,6 +192,7 @@ def get_mces_from_node( | |
parentNode=node_urn, | ||
parentOwners=node_owners, | ||
defaults=defaults, | ||
ingestion_config=ingestion_config, | ||
) | ||
|
||
if glossaryNode.terms: | ||
|
@@ -182,18 +203,20 @@ def get_mces_from_node( | |
parentNode=node_urn, | ||
parentOwnership=node_owners, | ||
defaults=defaults, | ||
ingestion_config=ingestion_config, | ||
) | ||
return mces | ||
|
||
|
||
def get_mces_from_term( | ||
glossaryTerm: GlossaryTermConfig, | ||
path: List[str], | ||
parentNode: Optional[str], | ||
parentOwnership: models.OwnershipClass, | ||
defaults: DefaultConfig, | ||
glossaryTerm: GlossaryTermConfig, | ||
path: List[str], | ||
parentNode: Optional[str], | ||
parentOwnership: models.OwnershipClass, | ||
defaults: DefaultConfig, | ||
ingestion_config: BusinessGlossarySourceConfig, | ||
) -> List[models.MetadataChangeEventClass]: | ||
term_urn = make_glossary_term_urn(path) | ||
term_urn = make_glossary_term_urn(path, ingestion_config.enable_datahub_guid) | ||
aspects: List[ | ||
Union[ | ||
models.GlossaryTermInfoClass, | ||
|
@@ -215,23 +238,50 @@ def get_mces_from_term( | |
sourceUrl=glossaryTerm.source_url if glossaryTerm.source_url else defaults.url, | ||
parentNode=parentNode, | ||
customProperties=glossaryTerm.custom_properties, | ||
name=glossaryTerm.name, | ||
) | ||
aspects.append(term_info) | ||
|
||
isA_related = None | ||
hasA_related = None | ||
is_a = None | ||
has_a = None | ||
has_value = None | ||
is_related_to_term = None | ||
if glossaryTerm.inherits is not None: | ||
assert glossaryTerm.inherits is not None | ||
isA_related = [make_glossary_term_urn([term]) for term in glossaryTerm.inherits] | ||
is_a = [ | ||
make_glossary_term_urn([term], ingestion_config.enable_datahub_guid) | ||
for term in glossaryTerm.inherits | ||
] | ||
if glossaryTerm.contains is not None: | ||
assert glossaryTerm.contains is not None | ||
hasA_related = [ | ||
make_glossary_term_urn([term]) for term in glossaryTerm.contains | ||
has_a = [ | ||
make_glossary_term_urn([term], ingestion_config.enable_datahub_guid) | ||
for term in glossaryTerm.contains | ||
] | ||
if glossaryTerm.has_value is not None: | ||
assert glossaryTerm.has_value is not None | ||
has_value = [ | ||
make_glossary_term_urn([term], ingestion_config.enable_datahub_guid) | ||
for term in glossaryTerm.has_value | ||
] | ||
if glossaryTerm.is_related_to is not None: | ||
assert glossaryTerm.is_related_to is not None | ||
is_related_to_term = [ | ||
make_glossary_term_urn([term], ingestion_config.enable_datahub_guid) | ||
for term in glossaryTerm.is_related_to | ||
] | ||
|
||
if isA_related is not None or hasA_related is not None: | ||
if ( | ||
is_a is not None | ||
or has_a is not None | ||
or has_value is not None | ||
or is_related_to_term is not None | ||
): | ||
relatedTerms = models.GlossaryRelatedTermsClass( | ||
isRelatedTerms=isA_related, hasRelatedTerms=hasA_related | ||
isRelatedTerms=is_a, | ||
hasRelatedTerms=has_a, | ||
hasRelatedTermValues=has_value, | ||
isRelatedToTerms=is_related_to_term, | ||
) | ||
aspects.append(relatedTerms) | ||
|
||
|
@@ -275,7 +325,7 @@ def load_glossary_config(self, file_name: str) -> BusinessGlossaryConfig: | |
|
||
def get_workunits(self) -> Iterable[Union[MetadataWorkUnit, UsageStatsWorkUnit]]: | ||
glossary_config = self.load_glossary_config(self.config.file) | ||
for mce in get_mces(glossary_config): | ||
for mce in get_mces(glossary_config, ingestion_config=self.config): | ||
wu = MetadataWorkUnit(f"{mce.proposedSnapshot.urn}", mce=mce) | ||
self.report.report_workunit(wu) | ||
yield wu | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,4 +46,43 @@ record GlossaryRelatedTerms { | |
} | ||
} | ||
hasRelatedTerms: optional array[GlossaryTermUrn] | ||
|
||
/** | ||
* The relationship Has Value with glossary term. | ||
* These are fixed value a term has. For example a ColorEnum where RED, GREEN and YELLOW are fixed values. | ||
*/ | ||
@Relationship = { | ||
"/*": { | ||
"name": "HasValue", | ||
"entityTypes": [ "glossaryTerm" ] | ||
} | ||
} | ||
@Searchable = { | ||
"/*": { | ||
"fieldName": "hasRelatedTermValues", | ||
"fieldType": "URN", | ||
"boostScore": 2.0 | ||
} | ||
} | ||
hasRelatedTermValues: optional array[GlossaryTermUrn] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should just be called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. simplify to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
|
||
/** | ||
* The relationship isRelatedTo with glossary term | ||
*/ | ||
@Relationship = { | ||
"/*": { | ||
"name": "isRelatedTo", | ||
"entityTypes": [ "glossaryTerm" ] | ||
} | ||
} | ||
@Searchable = { | ||
"/*": { | ||
"fieldName": "isRelatedToTerms", | ||
"fieldType": "URN", | ||
"boostScore": 2.0 | ||
} | ||
} | ||
isRelatedToTerms: optional array[GlossaryTermUrn] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. call this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lets call this key
values
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done