diff --git a/docs/quickstart.md b/docs/quickstart.md
index 075517f9338ff..905da3001bbed 100644
--- a/docs/quickstart.md
+++ b/docs/quickstart.md
@@ -4,15 +4,25 @@
To deploy a new instance of DataHub, perform the following steps.
-1. Install [docker](https://docs.docker.com/install/), [jq](https://stedolan.github.io/jq/download/) and [docker-compose v1 ](https://github.com/docker/compose/blob/master/INSTALL.md) (if
- using Linux). Make sure to allocate enough hardware resources for Docker engine. Tested & confirmed config: 2 CPUs,
- 8GB RAM, 2GB Swap area, and 10GB disk space.
-2. Launch the Docker Engine from command line or the desktop app.
+1. Install Docker for your platform.
+- On Windows or Mac, install [Docker Desktop](https://www.docker.com/products/docker-desktop/).
+- On Linux, install [Docker for Linux](https://docs.docker.com/desktop/install/linux-install/).
+
+:::note
+
+Make sure to allocate enough hardware resources for Docker engine.
+Tested & confirmed config: 2 CPUs, 8GB RAM, 2GB Swap area, and 10GB disk space.
+
+:::
+
+2. Install [jq](https://stedolan.github.io/jq/download/)
+
+3. Launch the Docker Engine from command line or the desktop app.
3. Install the DataHub CLI
- a. Ensure you have Python 3.6+ installed & configured. (Check using `python3 --version`)
+ a. Ensure you have Python 3.7+ installed & configured. (Check using `python3 --version`).
b. Run the following commands in your terminal
@@ -37,11 +47,44 @@ To deploy a new instance of DataHub, perform the following steps.
```
This will deploy a DataHub instance using [docker-compose](https://docs.docker.com/compose/).
+ If you are curious, the `docker-compose.yaml` file is downloaded to your home directory under the `.datahub/quickstart` directory.
+
+ If things go well, you should see messages like the ones below:
+
+ ```
+ Fetching docker-compose file https://raw.githubusercontent.com/datahub-project/datahub/master/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml from GitHub
+ Pulling docker images...
+ Finished pulling docker images!
+
+ [+] Running 11/11
+ ⠿ Container zookeeper Running 0.0s
+ ⠿ Container elasticsearch Running 0.0s
+ ⠿ Container broker Running 0.0s
+ ⠿ Container schema-registry Running 0.0s
+ ⠿ Container elasticsearch-setup Started 0.7s
+ ⠿ Container kafka-setup Started 0.7s
+ ⠿ Container mysql Running 0.0s
+ ⠿ Container datahub-gms Running 0.0s
+ ⠿ Container mysql-setup Started 0.7s
+ ⠿ Container datahub-datahub-actions-1 Running 0.0s
+ ⠿ Container datahub-frontend-react Running 0.0s
+ .......
+ ✔ DataHub is now running
+ Ingest some demo data using `datahub docker ingest-sample-data`,
+ or head to http://localhost:9002 (username: datahub, password: datahub) to play around with the frontend.
+ Need support? Get in touch on Slack: https://slack.datahubproject.io/
+ ```
Upon completion of this step, you should be able to navigate to the DataHub UI
at [http://localhost:9002](http://localhost:9002) in your browser. You can sign in using `datahub` as both the
username and password.
+:::note
+
+ On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
+
+:::
+
5. To ingest the sample metadata, run the following CLI command from your terminal
@@ -100,6 +143,13 @@ By default the quickstart deploy will require the following ports to be free on
+
+
+no matching manifest for linux/arm64/v8 in the manifest list entries
+
+On Mac computers with Apple Silicon (M1, M2 etc.), you might see an error like `no matching manifest for linux/arm64/v8 in the manifest list entries`, this typically means that the datahub cli was not able to detect that you are running it on Apple Silicon. To resolve this issue, override the default architecture detection by issuing `datahub docker quickstart --arch m1`
+
+
Miscellaneous Docker issues
diff --git a/docs/wip/get-started.md b/docs/wip/get-started.md
index 973003f5bbfce..b57cd380a8f30 100644
--- a/docs/wip/get-started.md
+++ b/docs/wip/get-started.md
@@ -1,3 +1,5 @@
# Get Started With DataHub
-This page is under construction - more details coming soon!
\ No newline at end of file
+To get started with running the open-source DataHub locally on your system (a.k.a Self-Hosted DataHub), head over to the [QuickStart section](../quickstart.md).
+
+To get started with the [Acryl Data](https://acryldata.io) provided SaaS instance of DataHub (a.k.a Managed DataHub), head over to the [Managed DataHub section](../managed-datahub/saas-slack-setup.md).
\ No newline at end of file
diff --git a/metadata-ingestion/src/datahub/cli/docker_check.py b/metadata-ingestion/src/datahub/cli/docker_check.py
index 25719cef2334d..f5c01d10d18f1 100644
--- a/metadata-ingestion/src/datahub/cli/docker_check.py
+++ b/metadata-ingestion/src/datahub/cli/docker_check.py
@@ -1,3 +1,4 @@
+import os
from contextlib import contextmanager
from typing import Iterator, List, Optional, Tuple
@@ -45,11 +46,21 @@
def get_client_with_error() -> Iterator[
Tuple[docker.DockerClient, Optional[Exception]]
]:
+ docker_cli = None
try:
docker_cli = docker.from_env()
except docker.errors.DockerException as error:
- yield None, error
- else:
+ try:
+ # newer docker versions create the socket in a user directory, try that before giving up
+ maybe_sock_path = os.path.expanduser("~/.docker/run/docker.sock")
+ if os.path.exists(maybe_sock_path):
+ docker_cli = docker.DockerClient(base_url=f"unix://{maybe_sock_path}")
+ else:
+ yield None, error
+ except docker.errors.DockerException as error:
+ yield None, error
+
+ if docker_cli is not None:
try:
yield docker_cli, None
finally:
diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py
index 64a9b69c59cb1..0aa66b1a2efbb 100644
--- a/metadata-ingestion/src/datahub/cli/docker_cli.py
+++ b/metadata-ingestion/src/datahub/cli/docker_cli.py
@@ -9,6 +9,7 @@
import sys
import tempfile
import time
+from enum import Enum
from pathlib import Path
from typing import Dict, List, NoReturn, Optional
@@ -58,6 +59,13 @@
GITHUB_BOOTSTRAP_MCES_URL = f"{GITHUB_BASE_URL}/{BOOTSTRAP_MCES_FILE}"
+class Architectures(Enum):
+ x86 = "x86"
+ arm64 = "arm64"
+ m1 = "m1"
+ m2 = "m2"
+
+
@functools.lru_cache()
def _docker_subprocess_env() -> Dict[str, str]:
try:
@@ -125,6 +133,10 @@ def is_m1() -> bool:
return False
+def is_arch_m1(arch: Architectures) -> bool:
+ return arch in [Architectures.arm64, Architectures.m1, Architectures.m2]
+
+
def should_use_neo4j_for_graph_service(graph_service_override: Optional[str]) -> bool:
if graph_service_override is not None:
if graph_service_override == "elasticsearch":
@@ -383,6 +395,24 @@ def _restore(
return result.returncode
+def detect_quickstart_arch(arch: Optional[str]) -> Architectures:
+ running_on_m1 = is_m1()
+ if running_on_m1:
+ click.secho("Detected M1 machine", fg="yellow")
+
+ quickstart_arch = Architectures.x86 if not running_on_m1 else Architectures.arm64
+ if arch:
+ matched_arch = [a for a in Architectures if arch.lower() == a.value]
+ if not matched_arch:
+ click.secho(
+ f"Failed to match arch {arch} with list of architectures supported {[a.value for a in Architectures]}"
+ )
+ quickstart_arch = matched_arch[0]
+ click.secho(f"Using architecture {quickstart_arch}", fg="yellow")
+
+ return quickstart_arch
+
+
@docker.command()
@click.option(
"--version",
@@ -518,6 +548,11 @@ def _restore(
default=False,
help="Launches MAE & MCE consumers as stand alone docker containers",
)
+@click.option(
+ "--arch",
+ required=False,
+ help="Specify the architecture for the quickstart images to use. Options are x86, arm64, m1 etc.",
+)
@upgrade.check_upgrade
@telemetry.with_telemetry
def quickstart(
@@ -540,6 +575,7 @@ def quickstart(
restore_indices: bool,
no_restore_indices: bool,
standalone_consumers: bool,
+ arch: Optional[str],
) -> None:
"""Start an instance of DataHub locally using docker-compose.
@@ -567,9 +603,7 @@ def quickstart(
)
return
- running_on_m1 = is_m1()
- if running_on_m1:
- click.secho("Detected M1 machine", fg="yellow")
+ quickstart_arch = detect_quickstart_arch(arch)
# Run pre-flight checks.
issues = check_local_docker_containers(preflight_only=True)
@@ -590,16 +624,16 @@ def quickstart(
elif not quickstart_compose_file:
# download appropriate quickstart file
should_use_neo4j = should_use_neo4j_for_graph_service(graph_service_impl)
- if should_use_neo4j and running_on_m1:
+ if should_use_neo4j and is_arch_m1(quickstart_arch):
click.secho(
"Running with neo4j on M1 is not currently supported, will be using elasticsearch as graph",
fg="red",
)
github_file = (
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL
- if should_use_neo4j and not running_on_m1
+ if should_use_neo4j and not is_arch_m1(quickstart_arch)
else GITHUB_ELASTIC_QUICKSTART_COMPOSE_URL
- if not running_on_m1
+ if not is_arch_m1(quickstart_arch)
else GITHUB_M1_QUICKSTART_COMPOSE_URL
)