Skip to content

Commit

Permalink
Make the Docker build more re-usable in Cloud (#50277)
Browse files Browse the repository at this point in the history
Closes #49926 and #46166. Rework the Docker image so that it comes with a tiny
init system, to ensure ML processes are correctly cleaned up, and to run ES
as a regular user instead of root.

Also:

   * Ensure no files in the image have the setuid/setgid flag
   * Also improve dependency tracking in the build
   * Remove TAKE_FILE_OWNERSHIP option and its documentation
  • Loading branch information
pugnascotia authored Jan 23, 2020
1 parent 6d4de1f commit 8a6d68b
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 68 deletions.
1 change: 1 addition & 0 deletions distribution/docker/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ subprojects { Project subProject ->
def tarFile = "${parent.projectDir}/build/elasticsearch${oss ? '-oss' : ''}_test.${VersionProperties.elasticsearch}.docker.tar"

final Task exportDockerImageTask = task(exportTaskName, type: LoggedExec) {
inputs.file("${parent.projectDir}/build/markers/${buildTaskName}.marker")
executable 'docker'
outputs.file(tarFile)
args "save",
Expand Down
27 changes: 21 additions & 6 deletions distribution/docker/src/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ RUN chmod 0775 config data logs
COPY config/elasticsearch.yml config/log4j2.properties config/
RUN chmod 0660 config/elasticsearch.yml config/log4j2.properties

# `tini` is a tiny but valid init for containers. This is used to cleanly
# control how ES and any child processes are shut down.
#
# The tini GitHub page gives instructions for verifying the binary using
# gpg, but the keyservers are slow to return the key and this can fail the
# build. Instead, we check the binary against a checksum that we have
# computed.
ADD https://github.com/krallin/tini/releases/download/v0.18.0/tini /tini
COPY config/tini.sha512 /tini.sha512
RUN sha512sum -c /tini.sha512 && chmod +x /tini

################################################################################
# Build stage 1 (the actual elasticsearch image):
# Copy elasticsearch from stage 0
Expand All @@ -45,6 +56,8 @@ FROM centos:7

ENV ELASTIC_CONTAINER true

COPY --from=builder /tini /tini

RUN for iter in {1..10}; do yum update --setopt=tsflags=nodocs -y && \
yum install --setopt=tsflags=nodocs -y nc shadow-utils zip unzip && \
yum clean all && exit_code=0 && break || exit_code=\$? && echo "yum error: retry \$iter in 10s" && sleep 10; done; \
Expand All @@ -65,14 +78,14 @@ RUN ln -sf /etc/pki/ca-trust/extracted/java/cacerts /usr/share/elasticsearch/jdk

ENV PATH /usr/share/elasticsearch/bin:\$PATH

COPY --chown=1000:0 bin/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
COPY bin/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh

# Openshift overrides USER and uses ones with randomly uid>1024 and gid=0
# Allow ENTRYPOINT (and ES) to run even with a different user
RUN chgrp 0 /usr/local/bin/docker-entrypoint.sh && \
chmod g=u /etc/passwd && \
RUN chmod g=u /etc/passwd && \
chmod 0775 /usr/local/bin/docker-entrypoint.sh

# Ensure that there are no files with setuid or setgid, in order to mitigate "stackclash" attacks.
RUN find / -xdev -perm -4000 -exec chmod ug-s {} +

EXPOSE 9200 9300

LABEL org.label-schema.build-date="${build_date}" \
Expand All @@ -95,7 +108,9 @@ LABEL org.label-schema.build-date="${build_date}" \
org.opencontainers.image.vendor="Elastic" \
org.opencontainers.image.version="${version}"

ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
USER elasticsearch:root

ENTRYPOINT ["/tini", "--", "/usr/local/bin/docker-entrypoint.sh"]
# Dummy overridable parameter parsed by entrypoint
CMD ["eswrapper"]

Expand Down
59 changes: 19 additions & 40 deletions distribution/docker/src/docker/bin/docker-entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,22 @@ set -e
# Files created by Elasticsearch should always be group writable too
umask 0002

run_as_other_user_if_needed() {
if [[ "$(id -u)" == "0" ]]; then
# If running as root, drop to specified UID and run command
exec chroot --userspec=1000 / "${@}"
else
# Either we are running in Openshift with random uid and are a member of the root group
# or with a custom --user
exec "${@}"
fi
}

# Allow user specify custom CMD, maybe bin/elasticsearch itself
# for example to directly specify `-E` style parameters for elasticsearch on k8s
# or simply to run /bin/bash to check the image
if [[ "$1" != "eswrapper" ]]; then
if [[ "$(id -u)" == "0" && $(basename "$1") == "elasticsearch" ]]; then
# centos:7 chroot doesn't have the `--skip-chdir` option and
# changes our CWD.
# Rewrite CMD args to replace $1 with `elasticsearch` explicitly,
# so that we are backwards compatible with the docs
# from the previous Elasticsearch versions<6
# and configuration option D:
# https://www.elastic.co/guide/en/elasticsearch/reference/5.6/docker.html#_d_override_the_image_8217_s_default_ulink_url_https_docs_docker_com_engine_reference_run_cmd_default_command_or_options_cmd_ulink
# Without this, user could specify `elasticsearch -E x.y=z` but
# `bin/elasticsearch -E x.y=z` would not work.
set -- "elasticsearch" "${@:2}"
# Use chroot to switch to UID 1000
exec chroot --userspec=1000 / "$@"
else
# User probably wants to run something else, like /bin/bash, with another uid forced (Openshift?)
exec "$@"
fi
if [[ "$1" == "eswrapper" || $(basename "$1") == "elasticsearch" ]]; then
# Rewrite CMD args to remove the explicit command,
# so that we are backwards compatible with the docs
# from the previous Elasticsearch versions < 6
# and configuration option:
# https://www.elastic.co/guide/en/elasticsearch/reference/5.6/docker.html#_d_override_the_image_8217_s_default_ulink_url_https_docs_docker_com_engine_reference_run_cmd_default_command_or_options_cmd_ulink
# Without this, user could specify `elasticsearch -E x.y=z` but
# `bin/elasticsearch -E x.y=z` would not work. In any case,
# we want to continue through this script, and not exec early.
set -- "${@:2}"
else
# Run whatever command the user wanted
exec "$@"
fi

# Allow environment variables to be set by creating a file with the
Expand All @@ -56,18 +40,13 @@ if [[ -f bin/elasticsearch-users ]]; then
# enabled, but we have no way of knowing which node we are yet. We'll just
# honor the variable if it's present.
if [[ -n "$ELASTIC_PASSWORD" ]]; then
[[ -f /usr/share/elasticsearch/config/elasticsearch.keystore ]] || (run_as_other_user_if_needed elasticsearch-keystore create)
if ! (run_as_other_user_if_needed elasticsearch-keystore list | grep -q '^bootstrap.password$'); then
(run_as_other_user_if_needed echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x 'bootstrap.password')
[[ -f /usr/share/elasticsearch/config/elasticsearch.keystore ]] || (elasticsearch-keystore create)
if ! (elasticsearch-keystore list | grep -q '^bootstrap.password$'); then
(echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x 'bootstrap.password')
fi
fi
fi

if [[ "$(id -u)" == "0" ]]; then
# If requested and running as root, mutate the ownership of bind-mounts
if [[ -n "$TAKE_FILE_OWNERSHIP" ]]; then
chown -R 1000:0 /usr/share/elasticsearch/{data,logs}
fi
fi

run_as_other_user_if_needed /usr/share/elasticsearch/bin/elasticsearch
# Signal forwarding and child reaping is handled by `tini`, which is the
# actual entrypoint of the container
exec /usr/share/elasticsearch/bin/elasticsearch
1 change: 1 addition & 0 deletions distribution/docker/src/docker/config/tini.sha512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ffdb31563e34bca91a094f962544b9d31f5d138432f2d639a0856ff605b3a69f47e48191da42d6956ab62a1b24eafca1a95b299901257832225d26770354ce5e /tini
40 changes: 32 additions & 8 deletions distribution/src/bin/elasticsearch-env-from-file
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,44 @@ for VAR_NAME_FILE in $(env | cut -f1 -d= | grep '_FILE$'); do
fi

if [[ ! -e "${!VAR_NAME_FILE}" ]]; then
echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE does not exist" >&2
# Maybe the file doesn't exist, maybe we just can't read it due to file permissions.
# Check permissions on each part of the path
path=''
if ! echo "${!VAR_NAME_FILE}" | grep -q '^/'; then
path='.'
fi

dirname "${!VAR_NAME_FILE}" | tr '/' '\n' | while read part; do
if [[ "$path" == "/" ]]; then
path="${path}${part}"
else
path="$path/$part"
fi

if ! [[ -x "$path" ]]; then
echo "ERROR: Cannot read ${!VAR_NAME_FILE} from $VAR_NAME_FILE, due to lack of permissions on '$path'" 2>&1
exit 1
fi
done

if ! [[ -r "${!VAR_NAME_FILE}" ]]; then
echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE is not readable." 2>&1
else
echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE does not exist" >&2
fi

exit 1
fi

FILE_PERMS="$(stat -L -c '%a' ${!VAR_NAME_FILE})"

if [[ "$FILE_PERMS" != "400" && "$FILE_PERMS" != "600" ]]; then
if [[ -h "${!VAR_NAME_FILE}" ]]; then
echo "ERROR: File $(readlink "${!VAR_NAME_FILE}") (target of symlink ${!VAR_NAME_FILE} from $VAR_NAME_FILE) must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
else
echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
fi
exit 1
if [[ -L "${!VAR_NAME_FILE}" ]]; then
echo "ERROR: File $(readlink "${!VAR_NAME_FILE}") (target of symlink ${!VAR_NAME_FILE} from $VAR_NAME_FILE) must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
else
echo "ERROR: File ${!VAR_NAME_FILE} from $VAR_NAME_FILE must have file permissions 400 or 600, but actually has: $FILE_PERMS" >&2
fi
exit 1
fi

echo "Setting $VAR_NAME from $VAR_NAME_FILE at ${!VAR_NAME_FILE}" >&2
Expand All @@ -43,4 +68,3 @@ for VAR_NAME_FILE in $(env | cut -f1 -d= | grep '_FILE$'); do
unset "$VAR_NAME_FILE"
fi
done

12 changes: 3 additions & 9 deletions docs/reference/setup/install/docker.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ endif::[]
This sample Docker Compose file brings up a three-node {es} cluster.
Node `es01` listens on `localhost:9200` and `es02` and `es03` talk to `es01` over a Docker network.

Please note that this configuration exposes port 9200 on all network interfaces, and given how
Docker manipulates `iptables` on Linux, this means that your {es} cluster is publically accessible,
potentially ignoring any firewall settings. If you don't want to expose port 9200 and instead use
Please note that this configuration exposes port 9200 on all network interfaces, and given how
Docker manipulates `iptables` on Linux, this means that your {es} cluster is publicly accessible,
potentially ignoring any firewall settings. If you don't want to expose port 9200 and instead use
a reverse proxy, replace `9200:9200` with `127.0.0.1:9200:9200` in the docker-compose.yml file.
{es} will then only be accessible from the host machine itself.

Expand Down Expand Up @@ -221,12 +221,6 @@ chmod g+rwx esdatadir
chgrp 0 esdatadir
--------------------------------------------

As a last resort, you can force the container to mutate the ownership of
any bind-mounts used for the <<path-settings,data and log dirs>> through the
environment variable `TAKE_FILE_OWNERSHIP`. When you do this, they will be owned by
uid:gid `1000:0`, which provides the required read/write access to the {es} process.


===== Increase ulimits for nofile and nproc

Increased ulimits for <<setting-system-settings,nofile>> and <<max-number-threads-check,nproc>>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -651,21 +651,42 @@ public void test120DockerLogsIncludeElasticsearchLogs() throws Exception {
}

/**
* Check that the Java process running inside the container has the expect PID, UID and username.
* Check that the Java process running inside the container has the expected UID, GID and username.
*/
public void test130JavaHasCorrectPidAndOwnership() {
final List<String> processes = sh.run("ps -o pid,uid,user -C java").stdout.lines().skip(1).collect(Collectors.toList());
public void test130JavaHasCorrectOwnership() {
final List<String> processes = sh.run("ps -o uid,gid,user -C java").stdout.lines().skip(1).collect(Collectors.toList());

assertThat("Expected a single java process", processes, hasSize(1));

final String[] fields = processes.get(0).trim().split("\\s+");

assertThat(fields, arrayWithSize(3));
assertThat("Incorrect UID", fields[0], equalTo("1000"));
assertThat("Incorrect GID", fields[1], equalTo("0"));
assertThat("Incorrect username", fields[2], equalTo("elasticsearch"));
}

/**
* Check that the init process running inside the container has the expected PID, UID, GID and user.
* The PID is particularly important because PID 1 handles signal forwarding and child reaping.
*/
public void test131InitProcessHasCorrectPID() {
final List<String> processes = sh.run("ps -o pid,uid,gid,user -p 1").stdout.lines().skip(1).collect(Collectors.toList());

assertThat("Expected a single process", processes, hasSize(1));

final String[] fields = processes.get(0).trim().split("\\s+");

assertThat(fields, arrayWithSize(4));
assertThat("Incorrect PID", fields[0], equalTo("1"));
assertThat("Incorrect UID", fields[1], equalTo("1000"));
assertThat("Incorrect username", fields[2], equalTo("elasticsearch"));
assertThat("Incorrect GID", fields[2], equalTo("0"));
assertThat("Incorrect username", fields[3], equalTo("elasticsearch"));
}

/**
* Check that Elasticsearch reports per-node cgroup information.
*/
public void test140CgroupOsStatsAreAvailable() throws Exception {
waitForElasticsearch(installation);

Expand Down
13 changes: 12 additions & 1 deletion qa/os/src/test/java/org/elasticsearch/packaging/util/Docker.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.http.client.fluent.Request;
import org.elasticsearch.common.CheckedRunnable;

import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.PosixFileAttributes;
Expand Down Expand Up @@ -152,9 +153,19 @@ private static void executeDockerRun(Distribution distribution, Map<Path, Path>

// Bind-mount any volumes
if (volumes != null) {
volumes.forEach((localPath, containerPath) -> args.add("--volume \"" + localPath + ":" + containerPath + "\""));
volumes.forEach((localPath, containerPath) -> {
assertTrue(localPath + " doesn't exist", Files.exists(localPath));

if (Platforms.WINDOWS == false && System.getProperty("user.name").equals("root")) {
// The tests are running as root, but the process in the Docker container runs as `elasticsearch` (UID 1000),
// so we need to ensure that the container process is able to read the bind-mounted files.
sh.run("chown -R 1000:0 " + localPath);
}
args.add("--volume \"" + localPath + ":" + containerPath + "\"");
});
}

// Image name
args.add(distribution.flavor.name + ":test");

final String command = String.join(" ", args);
Expand Down

0 comments on commit 8a6d68b

Please sign in to comment.