Skip to content

Commit

Permalink
Merge branch 'master' into jj--adding-entity-services
Browse files Browse the repository at this point in the history
  • Loading branch information
jjoyce0510 authored Sep 14, 2022
2 parents d033d09 + c606abd commit 996ac24
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 34 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/close-stale-issues.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ jobs:
steps:
- uses: actions/stale@v5
with:
ascending: true
operations-per-run: 100
days-before-issue-stale: 30
days-before-issue-close: 30
stale-issue-label: "stale"
Expand Down
4 changes: 2 additions & 2 deletions docker/dev-with-cassandra.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ fi
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose \
-f docker-compose-with-cassandra.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE \
pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose -p datahub \
-f docker-compose-with-cassandra.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE \
Expand Down
4 changes: 2 additions & 2 deletions docker/dev-without-neo4j.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ fi
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose -p datahub \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
Expand Down
4 changes: 2 additions & 2 deletions docker/dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ fi
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.dev.yml \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE $M1_COMPOSE pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose -p datahub \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.dev.yml \
Expand Down
35 changes: 27 additions & 8 deletions docker/elasticsearch-setup/create-indices.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
else
ELASTICSEARCH_PROTOCOL=http
fi
echo -e "Going to use $ELASTICSEARCH_PROTOCOL"

if [[ ! -z $ELASTICSEARCH_USERNAME ]] && [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
AUTH_TOKEN=$(echo -ne "$ELASTICSEARCH_USERNAME:$ELASTICSEARCH_PASSWORD" | base64 --wrap 0)
Expand All @@ -18,31 +19,49 @@ fi

# Add default header if needed
if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
echo -e "Going to use default elastic headers"
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi

function create_datahub_usage_event_datastream() {
echo -e "Going to use prefix $INDEX_PREFIX"
if [[ -z "$INDEX_PREFIX" ]]; then
PREFIX=''
else
PREFIX="${INDEX_PREFIX}_"
fi

if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy") -eq 404 ]
POLICY_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy")
echo -e "Policy GET response code is $POLICY_RESPONSE_CODE"
if [ $POLICY_RESPONSE_CODE -eq 403 ]
then
echo -e "\ncreating datahub_usage_event_policy"
echo -e "Forbidden so exiting"
exit 1
fi
POLICY_NAME="${PREFIX}datahub_usage_event_policy"
if [ $POLICY_RESPONSE_CODE -eq 404 ]
then
echo -e "\ncreating $POLICY_NAME"
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/policy.json | tee -a /tmp/policy.json
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${PREFIX}datahub_usage_event_policy" -H 'Content-Type: application/json' --data @/tmp/policy.json
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_ilm/policy/${POLICY_NAME}" -H 'Content-Type: application/json' --data @/tmp/policy.json
else
echo -e "\ndatahub_usage_event_policy exists"
echo -e "\n${POLICY_NAME} exists"
fi
if [ $(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template") -eq 404 ]
TEMPLATE_RESPONSE_CODE=$(curl -o /dev/null -s -w "%{http_code}" --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template")
echo -e "Template GET response code is $TEMPLATE_RESPONSE_CODE"
if [ $TEMPLATE_RESPONSE_CODE -eq 403 ]
then
echo -e "\ncreating datahub_usage_event_index_template"
echo -e "Forbidden so exiting"
exit 1
fi
TEMPLATE_NAME="${PREFIX}datahub_usage_event_index_template"
if [ $TEMPLATE_RESPONSE_CODE -eq 404 ]
then
echo -e "\ncreating $TEMPLATE_NAME"
sed -e "s/PREFIX/${PREFIX}/g" /index/usage-event/index_template.json | tee -a /tmp/index_template.json
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/${PREFIX}datahub_usage_event_index_template" -H 'Content-Type: application/json' --data @/tmp/index_template.json
curl -XPUT --header "$ELASTICSEARCH_AUTH_HEADER" "$ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/_index_template/$TEMPLATE_NAME" -H 'Content-Type: application/json' --data @/tmp/index_template.json
else
echo -e "\ndatahub_usage_event_index_template exists"
echo -e "\n$TEMPLATE_NAME exists"
fi
}

Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ then
else
echo "No Datahub Neo4j volume found, starting with elasticsearch as graph service"
cd $DIR && \
docker-compose -p datahub \
DOCKER_DEFAULT_PLATFORM="$(uname -m)" docker-compose -p datahub \
-f quickstart/docker-compose-without-neo4j.quickstart.yml \
$MONITORING_COMPOSE $CONSUMERS_COMPOSE $M1_COMPOSE up $@
fi
3 changes: 1 addition & 2 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ def get_long_description():
"cached_property",
"ijson",
"click-spinner",
"GitPython>2",
}

kafka_common = {
Expand Down Expand Up @@ -270,7 +269,7 @@ def get_long_description():
"looker": looker_common,
# lkml>=1.1.2 is required to support the sql_preamble expression in LookML
"lookml": looker_common
| {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.5"},
| {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.5", "GitPython>2"},
"metabase": {"requests", "sqllineage==1.3.5"},
"mode": {"requests", "sqllineage==1.3.5", "tenacity>=8.0.1"},
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@
GITHUB_M1_QUICKSTART_COMPOSE_URL = f"{GITHUB_BASE_URL}/{M1_QUICKSTART_COMPOSE_FILE}"
GITHUB_BOOTSTRAP_MCES_URL = f"{GITHUB_BASE_URL}/{BOOTSTRAP_MCES_FILE}"

DOCKER_COMPOSE_PLATFORM = (
subprocess.run(["uname", "-m"], stdout=subprocess.PIPE)
.stdout.decode("utf-8")
.rstrip()
)


@click.group()
def docker() -> None:
Expand Down Expand Up @@ -208,6 +214,10 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
subprocess.run(
[*base_command, "stop"],
check=True,
env={
**os.environ,
"DOCKER_DEFAULT_PLATFORM": DOCKER_COMPOSE_PLATFORM,
},
)
click.secho("Stopped datahub successfully.", fg="green")
except subprocess.CalledProcessError:
Expand Down Expand Up @@ -638,6 +648,10 @@ def quickstart(
subprocess.run(
[*base_command, "pull", "-q"],
check=True,
env={
**os.environ,
"DOCKER_DEFAULT_PLATFORM": DOCKER_COMPOSE_PLATFORM,
},
)
click.secho("Finished pulling docker images!")
except subprocess.CalledProcessError:
Expand All @@ -659,6 +673,7 @@ def quickstart(
check=True,
env={
**os.environ,
"DOCKER_DEFAULT_PLATFORM": DOCKER_COMPOSE_PLATFORM,
"DOCKER_BUILDKIT": "1",
},
)
Expand All @@ -674,7 +689,13 @@ def quickstart(
# Attempt to run docker compose up every minute.
if (datetime.datetime.now() - start_time) > up_attempts * up_interval:
click.echo()
subprocess.run(base_command + ["up", "-d", "--remove-orphans"])
subprocess.run(
base_command + ["up", "-d", "--remove-orphans"],
env={
**os.environ,
"DOCKER_DEFAULT_PLATFORM": DOCKER_COMPOSE_PLATFORM,
},
)
up_attempts += 1

# Check docker health every few seconds.
Expand All @@ -694,6 +715,10 @@ def quickstart(
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
check=True,
env={
**os.environ,
"DOCKER_DEFAULT_PLATFORM": DOCKER_COMPOSE_PLATFORM,
},
)
log_file.write(ret.stdout)

Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from datahub.cli.check_cli import check
from datahub.cli.cli_utils import DATAHUB_CONFIG_PATH, write_datahub_config
from datahub.cli.delete_cli import delete
from datahub.cli.docker import docker
from datahub.cli.docker_cli import docker
from datahub.cli.get_cli import get
from datahub.cli.ingest_cli import ingest
from datahub.cli.migrate import migrate
Expand Down
4 changes: 3 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def get_aspect_v2(
aspect_type: Type[Aspect],
aspect: str,
aspect_type_name: Optional[str] = None,
version: int = 0,
) -> Optional[Aspect]:
"""
Get an aspect for an entity.
Expand All @@ -150,11 +151,12 @@ def get_aspect_v2(
:param Type[Aspect] aspect_type: The type class of the aspect being requested (e.g. datahub.metadata.schema_classes.DatasetProperties)
:param str aspect: The name of the aspect being requested (e.g. schemaMetadata, datasetProperties, etc.)
:param Optional[str] aspect_type_name: The fully qualified classname of the aspect being requested. Typically not needed and extracted automatically from the class directly. (e.g. com.linkedin.common.DatasetProperties)
:param version: The version of the aspect to retrieve. The default of 0 means latest. Versions > 0 go from oldest to newest, so 1 is the oldest.
:return: the Aspect as a dictionary if present, None if no aspect was found (HTTP status 404)
:rtype: Optional[Aspect]
:raises HttpError: if the HTTP response is not a 200 or a 404
"""
url: str = f"{self._gms_server}/aspects/{Urn.url_encode(entity_urn)}?aspect={aspect}&version=0"
url: str = f"{self._gms_server}/aspects/{Urn.url_encode(entity_urn)}?aspect={aspect}&version={version}"
response = self._session.get(url)
if response.status_code == 404:
# not found
Expand Down
3 changes: 2 additions & 1 deletion metadata-ingestion/src/datahub/ingestion/source/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,8 @@ def get_column_type(
# resolve modified type
if dbt_adapter == "trino":
TypeClass = resolve_trino_modified_type(column_type)
elif dbt_adapter == "postgres":
elif dbt_adapter == "postgres" or dbt_adapter == "redshift":
# Redshift uses a variant of Postgres, so we can use the same logic.
TypeClass = resolve_postgres_modified_type(column_type)

# if still not found, report the warning
Expand Down
13 changes: 1 addition & 12 deletions metadata-ingestion/src/datahub/utilities/sql_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,4 @@ def get_columns(self) -> List[str]:
return self.columns


class DefaultSQLParser(SQLParser):
parser: SQLParser

def __init__(self, sql_query: str) -> None:
super().__init__(sql_query)
self.parser = SqlLineageSQLParser(sql_query)

def get_tables(self) -> List[str]:
return self.parser.get_tables()

def get_columns(self) -> List[str]:
return self.parser.get_columns()
DefaultSQLParser = SqlLineageSQLParser
2 changes: 1 addition & 1 deletion smoke-test/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import requests

from datahub.cli import cli_utils
from datahub.cli.docker import check_local_docker_containers
from datahub.cli.docker_cli import check_local_docker_containers
from datahub.ingestion.run.pipeline import Pipeline


Expand Down

0 comments on commit 996ac24

Please sign in to comment.