This repository has been archived by the owner on Jan 27, 2025. It is now read-only.
forked from datahub-project/datahub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ingest): add
datahub state inspect
command (datahub-project#6763)
- Loading branch information
1 parent
6eaf3b1
commit 0d7b788
Showing
19 changed files
with
182 additions
and
174 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import json | ||
import logging | ||
from datetime import datetime | ||
|
||
import click | ||
from click_default_group import DefaultGroup | ||
|
||
from datahub.cli.cli_utils import get_url_and_token | ||
from datahub.ingestion.api.ingestion_job_state_provider import IngestionJobStateProvider | ||
from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig | ||
from datahub.ingestion.source.state.checkpoint import Checkpoint | ||
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState | ||
from datahub.ingestion.source.state.stale_entity_removal_handler import ( | ||
StaleEntityRemovalHandler, | ||
) | ||
from datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider import ( | ||
DatahubIngestionCheckpointingProvider, | ||
) | ||
from datahub.metadata.schema_classes import DatahubIngestionCheckpointClass | ||
from datahub.telemetry import telemetry | ||
from datahub.upgrade import upgrade | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@click.group(cls=DefaultGroup, default="urn") | ||
def state() -> None: | ||
"""Managed state stored in DataHub by stateful ingestion.""" | ||
pass | ||
|
||
|
||
@state.command() | ||
@click.option("--pipeline-name", required=True, type=str) | ||
@click.option("--platform", required=True, type=str) | ||
@click.option("--platform-instance", required=True, type=str) | ||
@upgrade.check_upgrade | ||
@telemetry.with_telemetry | ||
def inspect(pipeline_name: str, platform: str, platform_instance: str) -> None: | ||
""" | ||
Get the latest stateful ingestion state for a given pipeline. | ||
Only works for state entity removal for now. | ||
""" | ||
|
||
# Note that the platform-instance argument is not generated consistently, | ||
# and is not always equal to the platform_instance config. | ||
|
||
(url, token) = get_url_and_token() | ||
datahub_graph = DataHubGraph(DataHubGraphConfig(server=url, token=token)) | ||
|
||
job_name = StaleEntityRemovalHandler.compute_job_id(platform) | ||
|
||
data_job_urn = IngestionJobStateProvider.get_data_job_urn( | ||
DatahubIngestionCheckpointingProvider.orchestrator_name, | ||
pipeline_name, | ||
job_name, | ||
platform_instance, | ||
) | ||
raw_checkpoint = datahub_graph.get_latest_timeseries_value( | ||
entity_urn=data_job_urn, | ||
filter_criteria_map={ | ||
"pipelineName": pipeline_name, | ||
"platformInstanceId": platform_instance, | ||
}, | ||
aspect_type=DatahubIngestionCheckpointClass, | ||
) | ||
|
||
if not raw_checkpoint: | ||
click.secho("No ingestion state found.", fg="red") | ||
exit(1) | ||
|
||
checkpoint = Checkpoint.create_from_checkpoint_aspect( | ||
job_name=job_name, | ||
checkpoint_aspect=raw_checkpoint, | ||
state_class=GenericCheckpointState, | ||
) | ||
assert checkpoint | ||
|
||
ts = datetime.utcfromtimestamp(raw_checkpoint.timestampMillis / 1000) | ||
logger.info( | ||
f"Found checkpoint with runId {checkpoint.run_id} and timestamp {ts.isoformat()}" | ||
) | ||
|
||
click.echo(json.dumps(checkpoint.state.urns, indent=2)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 1 addition & 11 deletions
12
metadata-ingestion/src/datahub/ingestion/source/state/dbt_state.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,8 @@ | ||
from datahub.ingestion.source.state.entity_removal_state import ( | ||
GenericCheckpointState, | ||
pydantic_state_migrator, | ||
) | ||
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState | ||
|
||
|
||
class DbtCheckpointState(GenericCheckpointState): | ||
""" | ||
Class for representing the checkpoint state for DBT sources. | ||
Stores all nodes and assertions being ingested and is used to remove any stale entities. | ||
""" | ||
|
||
_migration = pydantic_state_migrator( | ||
{ | ||
"encoded_node_urns": "dataset", | ||
"encoded_assertion_urns": "assertion", | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 1 addition & 10 deletions
11
metadata-ingestion/src/datahub/ingestion/source/state/kafka_state.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,8 @@ | ||
from datahub.ingestion.source.state.entity_removal_state import ( | ||
GenericCheckpointState, | ||
pydantic_state_migrator, | ||
) | ||
from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState | ||
|
||
|
||
class KafkaCheckpointState(GenericCheckpointState): | ||
""" | ||
This class represents the checkpoint state for Kafka based sources. | ||
Stores all the topics being ingested and it is used to remove any stale entities. | ||
""" | ||
|
||
_migration = pydantic_state_migrator( | ||
{ | ||
"encoded_topic_urns": "topic", | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.