diff --git a/docker/elasticsearch/Dockerfile b/docker/elasticsearch/Dockerfile index 2b2a8b029634fe..e0cc9d5ba66699 100644 --- a/docker/elasticsearch/Dockerfile +++ b/docker/elasticsearch/Dockerfile @@ -1,17 +1,12 @@ -FROM openjdk:8 +# This "container" is a workaround to pre-create search indices +FROM jwilder/dockerize:0.6.1 -MAINTAINER Kerem Sahin - -RUN apt-get update && apt-get install -y wget && apt-get install -y curl +RUN apk add --no-cache curl COPY corpuser-index-config.json dataset-index-config.json / -ENV DOCKERIZE_VERSION v0.6.1 -RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ - && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ - && rm dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz - -CMD dockerize -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \ - -timeout 120s; \ - curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json; \ - curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json \ No newline at end of file +CMD dockerize \ + -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \ + -timeout 120s \ + curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/corpuserinfodocument --data @corpuser-index-config.json && \ + curl -XPUT $ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT/datasetdocument --data @dataset-index-config.json \ No newline at end of file diff --git a/docker/frontend/Dockerfile b/docker/frontend/Dockerfile index c4db27b45174cf..227fb0b409a8bd 100644 --- a/docker/frontend/Dockerfile +++ b/docker/frontend/Dockerfile @@ -1,7 +1,5 @@ FROM openjdk:8 as builder -MAINTAINER Kerem Sahin ksahin@linkedin.com - RUN apt-get update && apt-get install -y wget \ && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \ && dpkg -i google-chrome-stable_current_amd64.deb; apt-get -fy install diff --git a/docker/frontend/README.md b/docker/frontend/README.md index 0f3b655ae6a0f2..0cfcc992755816 100644 --- a/docker/frontend/README.md +++ b/docker/frontend/README.md @@ -4,18 +4,13 @@ Refer to [DataHub Frontend Service](../../datahub-frontend) to have a quick understanding of the architecture and responsibility of this service for the DataHub. -## Build +## Build & Run ``` -docker image build -t linkedin/datahub-frontend -f docker/frontend/Dockerfile . +cd docker/frontend && docker-compose up --build ``` -This command will build and deploy the image in your local store. +This command will rebuild the docker image and start a container based on the image. -## Run container -``` -cd docker/frontend && docker-compose pull && docker-compose up -``` -This command will start the container. If you have the image available in your local store, this image will be used -for the container otherwise it will download the `latest` image from Docker Hub and then start that. +To start a container using an existing image, run the same command without the `--build` flag. ### Container configuration #### External Port diff --git a/docker/frontend/docker-compose.yml b/docker/frontend/docker-compose.yml index 5e4b8dd7fe9129..6ccae92b8a1ebf 100644 --- a/docker/frontend/docker-compose.yml +++ b/docker/frontend/docker-compose.yml @@ -3,6 +3,9 @@ version: '3.5' services: datahub-frontend: image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/frontend/Dockerfile hostname: datahub-frontend container_name: datahub-frontend ports: diff --git a/docker/gms/Dockerfile b/docker/gms/Dockerfile index 9ffe093f75b233..e795445d774612 100644 --- a/docker/gms/Dockerfile +++ b/docker/gms/Dockerfile @@ -7,9 +7,13 @@ RUN cd /datahub-src && ./gradlew :gms:war:build \ FROM openjdk:8-jre-alpine ENV DOCKERIZE_VERSION v0.6.1 RUN apk --no-cache add curl tar \ - && curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner-9.4.20.v20190813.jar \ + && curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \ && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv COPY --from=builder /gms.war . +COPY docker/gms/start.sh /start.sh +RUN chmod +x /start.sh EXPOSE 8080 + +CMD /start.sh \ No newline at end of file diff --git a/docker/gms/README.md b/docker/gms/README.md index b5fe2ac9113e96..f63a4136bcf10c 100644 --- a/docker/gms/README.md +++ b/docker/gms/README.md @@ -4,18 +4,14 @@ Refer to [DataHub GMS Service](../../gms) to have a quick understanding of the architecture and responsibility of this service for the DataHub. -## Build -``` -docker image build -t linkedin/datahub-gms -f docker/gms/Dockerfile . -``` -This command will build and deploy the image in your local store. -## Run container +## Build & Run ``` -cd docker/gms && docker-compose pull && docker-compose up +cd docker/gms && docker-compose up --build ``` -This command will start the container. If you have the image available in your local store, this image will be used -for the container otherwise it will download the `latest` image from Docker Hub and then start that. +This command will rebuild the local docker image and start a container based on the image. + +To start a container using an existing image, run the same command without the `--build` flag. ### Container configuration #### External Port @@ -66,6 +62,7 @@ The value of `ELASTICSEARCH_HOST` variable should be set to the host name of the ``` environment: + - NEO4J_HOST=neo4j:7474 - NEO4J_URI=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub diff --git a/docker/gms/docker-compose.yml b/docker/gms/docker-compose.yml index d3d6e57ce65ebc..2600dba1e3890c 100644 --- a/docker/gms/docker-compose.yml +++ b/docker/gms/docker-compose.yml @@ -3,6 +3,9 @@ version: '3.5' services: datahub-gms: image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/gms/Dockerfile hostname: datahub-gms container_name: datahub-gms ports: @@ -17,10 +20,10 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 - NEO4J_URI=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub - command: "sh -c 'java -jar jetty-runner-9.4.20.v20190813.jar gms.war'" networks: default: diff --git a/docker/gms/start.sh b/docker/gms/start.sh new file mode 100644 index 00000000000000..c6edab45518cfc --- /dev/null +++ b/docker/gms/start.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +dockerize \ + -wait tcp://$EBEAN_DATASOURCE_HOST \ + -wait tcp://$KAFKA_BOOTSTRAP_SERVER \ + -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \ + -wait http://$NEO4J_HOST \ + -timeout 240s \ + java -jar jetty-runner.jar gms.war \ No newline at end of file diff --git a/docker/kafka/Dockerfile b/docker/kafka/Dockerfile new file mode 100644 index 00000000000000..411485cf4b9e25 --- /dev/null +++ b/docker/kafka/Dockerfile @@ -0,0 +1,8 @@ +# This "container" is a workaround to pre-create topics +FROM confluentinc/cp-kafka:5.4.0 + +CMD echo Waiting for Kafka to be ready... && \ + cub kafka-ready -b $KAFKA_BOOTSTRAP_SERVER 1 60 && \ + kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \ + kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \ + kafka-topics --create --if-not-exists --zookeeper $KAFKA_ZOOKEEPER_CONNECT --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent \ No newline at end of file diff --git a/docker/kafka/README.md b/docker/kafka/README.md index c9117c4ae51998..dc1556c8676688 100644 --- a/docker/kafka/README.md +++ b/docker/kafka/README.md @@ -10,7 +10,7 @@ Below command will start all Kafka related containers. cd docker/kafka && docker-compose pull && docker-compose up ``` As part of `docker-compose`, we also initialize a container called `kafka-setup` to create `MetadataAuditEvent` and -`MetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready. +`MetadataChangeEvent` & `FailedMetadataChangeEvent` topics. The only thing this container does is creating Kafka topics after Kafka broker is ready. There is also a container which provides visual schema registry interface which you can register/unregister schemas. You can connect to `schema-registry-ui` on your web browser to monitor Kafka Schema Registry via below link: diff --git a/docker/kafka/docker-compose.yml b/docker/kafka/docker-compose.yml index b250b1030a1367..b8db356ef83e99 100644 --- a/docker/kafka/docker-compose.yml +++ b/docker/kafka/docker-compose.yml @@ -30,22 +30,16 @@ services: # This "container" is a workaround to pre-create topics kafka-setup: - image: confluentinc/cp-kafka:5.4.0 + build: + context: . hostname: kafka-setup container_name: kafka-setup depends_on: - broker - schema-registry - command: "bash -c 'echo Waiting for Kafka to be ready... && \ - cub kafka-ready -b broker:29092 1 60 && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'" environment: - # The following settings are listed here only to satisfy the image's requirements. - # We override the image's `command` anyways, hence this container will not start a broker. - KAFKA_BROKER_ID: ignored - KAFKA_ZOOKEEPER_CONNECT: ignored + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 kafka-rest-proxy: image: confluentinc/cp-kafka-rest:5.4.0 diff --git a/docker/mae-consumer/Dockerfile b/docker/mae-consumer/Dockerfile index df9c750acf06ac..63541e2a352fbc 100644 --- a/docker/mae-consumer/Dockerfile +++ b/docker/mae-consumer/Dockerfile @@ -1,7 +1,5 @@ FROM openjdk:8 as builder -MAINTAINER Kerem Sahin ksahin@linkedin.com - COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build \ && cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar \ @@ -13,7 +11,9 @@ RUN apk --no-cache add curl tar \ && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv COPY --from=builder /mae-consumer-job.jar /mae-consumer-job.jar +COPY docker/mae-consumer/start.sh /start.sh +RUN chmod +x /start.sh EXPOSE 9091 -ENTRYPOINT ["java", "-jar", "mae-consumer-job.jar"] +CMD /start.sh \ No newline at end of file diff --git a/docker/mae-consumer/README.md b/docker/mae-consumer/README.md index ff2e9452a06278..c37978290d27d2 100644 --- a/docker/mae-consumer/README.md +++ b/docker/mae-consumer/README.md @@ -4,18 +4,13 @@ Refer to [DataHub MAE Consumer Job](../../metadata-jobs/mae-consumer-job) to have a quick understanding of the architecture and responsibility of this service for the DataHub. -## Build +## Build & Run ``` -docker image build -t linkedin/datahub-mae-consumer -f docker/mae-consumer/Dockerfile . +cd docker/mae-consumer && docker-compose up --build ``` -This command will build and deploy the image in your local store. +This command will rebuild the docker image and start a container based on the image. -## Run container -``` -cd docker/mae-consumer && docker-compose pull && docker-compose up -``` -This command will start the container. If you have the image available in your local store, this image will be used -for the container otherwise it will download the `latest` image from Docker Hub and then start that. +To start a container using a previously built image, run the same command without the `--build` flag. ### Container configuration diff --git a/docker/mae-consumer/docker-compose.yml b/docker/mae-consumer/docker-compose.yml index de1f696dc84f8c..aa5f7342a1dfeb 100644 --- a/docker/mae-consumer/docker-compose.yml +++ b/docker/mae-consumer/docker-compose.yml @@ -3,6 +3,9 @@ version: '3.5' services: datahub-mae-consumer: image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/mae-consumer/Dockerfile hostname: datahub-mae-consumer container_name: datahub-mae-consumer ports: @@ -12,6 +15,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 - NEO4J_URI=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub diff --git a/docker/mae-consumer/start.sh b/docker/mae-consumer/start.sh new file mode 100644 index 00000000000000..e06a8b759d0a36 --- /dev/null +++ b/docker/mae-consumer/start.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +dockerize \ + -wait tcp://$KAFKA_BOOTSTRAP_SERVER \ + -wait http://$ELASTICSEARCH_HOST:$ELASTICSEARCH_PORT \ + -wait http://$NEO4J_HOST \ + -timeout 240s \ + java -jar mae-consumer-job.jar \ No newline at end of file diff --git a/docker/mce-consumer/Dockerfile b/docker/mce-consumer/Dockerfile index 51e9d9116647a6..6a9de0b6638da8 100644 --- a/docker/mce-consumer/Dockerfile +++ b/docker/mce-consumer/Dockerfile @@ -1,16 +1,19 @@ FROM openjdk:8 as builder -MAINTAINER Kerem Sahin ksahin@linkedin.com - COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build \ && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar \ && cd .. && rm -rf datahub-src FROM openjdk:8-jre-alpine +ENV DOCKERIZE_VERSION v0.6.1 +RUN apk --no-cache add curl tar \ + && curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv COPY --from=builder /mce-consumer-job.jar /mce-consumer-job.jar +COPY docker/mce-consumer/start.sh /start.sh +RUN chmod +x /start.sh EXPOSE 9090 -ENTRYPOINT ["java", "-jar", "mce-consumer-job.jar"] +CMD /start.sh \ No newline at end of file diff --git a/docker/mce-consumer/README.md b/docker/mce-consumer/README.md index 39ce0fd3b57687..7eebc7280a4db9 100644 --- a/docker/mce-consumer/README.md +++ b/docker/mce-consumer/README.md @@ -4,18 +4,13 @@ Refer to [DataHub MCE Consumer Job](../../metadata-jobs/mce-consumer-job) to have a quick understanding of the architecture and responsibility of this service for the DataHub. -## Build +## Build & Run ``` -docker image build -t linkedin/datahub-mce-consumer -f docker/mce-consumer/Dockerfile . +cd docker/mce-consumer && docker-compose up --build ``` -This command will build and deploy the image in your local store. +This command will rebuild the docker image and start a container based on the image. -## Run container -``` -cd docker/mce-consumer && docker-compose pull && docker-compose up -``` -This command will start the container. If you have the image available in your local store, this image will be used -for the container otherwise it will download the `latest` image from Docker Hub and then start that. +To start a container using a previously built image, run the same command without the `--build` flag. ### Container configuration diff --git a/docker/mce-consumer/docker-compose.yml b/docker/mce-consumer/docker-compose.yml index 5bd4b4266a03b2..07c83d6ccbc0a1 100644 --- a/docker/mce-consumer/docker-compose.yml +++ b/docker/mce-consumer/docker-compose.yml @@ -3,6 +3,9 @@ version: '3.5' services: datahub-mce-consumer: image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest} + build: + context: ../../ + dockerfile: docker/mce-consumer/Dockerfile hostname: datahub-mce-consumer container_name: datahub-mce-consumer ports: diff --git a/docker/mce-consumer/start.sh b/docker/mce-consumer/start.sh new file mode 100644 index 00000000000000..783587371102a5 --- /dev/null +++ b/docker/mce-consumer/start.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# -wait tcp://GMS_HOST:$GMS_PORT \ +dockerize \ + -wait tcp://$KAFKA_BOOTSTRAP_SERVER \ + -timeout 240s \ + java -jar mce-consumer-job.jar \ No newline at end of file diff --git a/docker/quickstart/docker-compose.yml b/docker/quickstart/docker-compose.yml index 1543a48ccd4eae..13f75555a298cf 100644 --- a/docker/quickstart/docker-compose.yml +++ b/docker/quickstart/docker-compose.yml @@ -80,22 +80,16 @@ services: # This "container" is a workaround to pre-create topics kafka-setup: - image: confluentinc/cp-kafka:5.4.0 + build: + context: ../kafka hostname: kafka-setup container_name: kafka-setup depends_on: - broker - schema-registry - command: "bash -c 'echo Waiting for Kafka to be ready... && \ - cub kafka-ready -b broker:29092 1 60 && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataAuditEvent && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic MetadataChangeEvent && \ - kafka-topics --create --if-not-exists --zookeeper zookeeper:2181 --partitions 1 --replication-factor 1 --topic FailedMetadataChangeEvent'" environment: - # The following settings are listed here only to satisfy the image's requirements. - # We override the image's `command` anyways, hence this container will not start a broker. - KAFKA_BROKER_ID: ignored - KAFKA_ZOOKEEPER_CONNECT: ignored + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - KAFKA_BOOTSTRAP_SERVER=broker:29092 schema-registry: image: confluentinc/cp-schema-registry:5.4.0 @@ -191,18 +185,15 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 - NEO4J_URI=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub depends_on: - - elasticsearch - - broker + - elasticsearch-setup + - kafka-setup - mysql - - schema-registry - neo4j - command: "sh -c 'dockerize -wait tcp://mysql:3306 -wait tcp://broker:29092 -wait http://elasticsearch:9200 \ - -timeout 240s \ - java -jar jetty-runner-9.4.20.v20190813.jar gms.war'" datahub-frontend: image: linkedin/datahub-frontend:${DATAHUB_VERSION:-latest} @@ -230,23 +221,23 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - ELASTICSEARCH_HOST=elasticsearch - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=neo4j:7474 - NEO4J_URI=bolt://neo4j - NEO4J_USERNAME=neo4j - NEO4J_PASSWORD=datahub depends_on: - kafka-setup - - elasticsearch + - elasticsearch-setup - neo4j command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \ - echo kafka-setup done! && \ - dockerize -wait http://neo4j:7474 -timeout 240s && java -jar mae-consumer-job.jar'" + echo kafka-setup done! && /start.sh'" datahub-mce-consumer: image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest} hostname: datahub-mce-consumer container_name: datahub-mce-consumer ports: - - "9090:9090" + - "9090:9090" environment: - KAFKA_BOOTSTRAP_SERVER=broker:29092 - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 @@ -256,7 +247,7 @@ services: - kafka-setup - datahub-gms command: "sh -c 'while ping -c1 kafka-setup &>/dev/null; do echo waiting for kafka-setup... && sleep 1; done; \ - echo kafka-setup done! && java -jar mce-consumer-job.jar'" + echo kafka-setup done! && /start.sh'" networks: default: