This repository has been archived by the owner on Jan 27, 2025. It is now read-only.
forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ingest): add stateful ingestion to the ldap source (datahub-proj…
…ect#6127) Co-authored-by: Harshal Sheth <[email protected]>
- Loading branch information
1 parent
236de5d
commit fced9b3
Showing
11 changed files
with
844 additions
and
119 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
39 changes: 39 additions & 0 deletions
39
metadata-ingestion/src/datahub/ingestion/source/state/ldap_state.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from typing import Iterable, List | ||
|
||
import pydantic | ||
|
||
from datahub.ingestion.source.state.stale_entity_removal_handler import ( | ||
StaleEntityCheckpointStateBase, | ||
) | ||
from datahub.utilities.urns.urn import guess_entity_type | ||
|
||
|
||
class LdapCheckpointState(StaleEntityCheckpointStateBase["LdapCheckpointState"]): | ||
""" | ||
Base class for representing the checkpoint state for all LDAP based sources. | ||
Stores all corpuser and corpGroup and being ingested and is used to remove any stale entities. | ||
""" | ||
|
||
urns: List[str] = pydantic.Field(default_factory=list) | ||
|
||
@classmethod | ||
def get_supported_types(cls) -> List[str]: | ||
return ["corpuser", "corpGroup"] | ||
|
||
def add_checkpoint_urn(self, type: str, urn: str) -> None: | ||
assert type in self.get_supported_types() | ||
self.urns.append(urn) | ||
|
||
def get_urns_not_in( | ||
self, type: str, other_checkpoint_state: "LdapCheckpointState" | ||
) -> Iterable[str]: | ||
assert type in self.get_supported_types() | ||
diff = set(self.urns) - set(other_checkpoint_state.urns) | ||
yield from (urn for urn in diff if guess_entity_type(urn) == type) | ||
|
||
def get_percent_entities_changed( | ||
self, old_checkpoint_state: "LdapCheckpointState" | ||
) -> float: | ||
return StaleEntityCheckpointStateBase.compute_percent_entities_changed( | ||
[(self.urns, old_checkpoint_state.urns)] | ||
) |
Oops, something went wrong.