Skip to content

Commit

Permalink
Make All OS tests run on GCP instances (#46924)
Browse files Browse the repository at this point in the history
This PR makes the necesary adaptations to the tests and adds a power shell script to
invoke the OS tests on GCP instances connected as CI workers.

Also noticed that logs were not being produced by the tests and that theses were not using log4j so fixed that too.

One of the difficulties in working on theses tests was that the tests just stalled with no indication where the problem is.
To ease with the debugging, after process explorer suggested that the tests are running some commands, we now have multiple timeouts: one for the tests ( which will generate a thread dump ) and one for individual commands ( that bails with the command being ran and output and error so far ) to make it easier to see what went wrong.

The tests were blocking because apparently the pipes to the sub-process were not closing, thus the threads were blocking on them and we were blocking indefinitely on the join. I'm not sure why this doesn't happen in vagrant, but we now properly deal with it.
  • Loading branch information
alpar-t authored Oct 4, 2019
1 parent d8b4556 commit f962d1c
Show file tree
Hide file tree
Showing 15 changed files with 385 additions and 227 deletions.
36 changes: 36 additions & 0 deletions .ci/os.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
If (-NOT ([Security.Principal.WindowsPrincipal] [Security.Principal.WindowsIdentity]::GetCurrent()).IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator))
{
# Relaunch as an elevated process:
Start-Process powershell.exe "-File",('"{0}"' -f $MyInvocation.MyCommand.Path) -Verb RunAs
exit
}

# CI configures these, uncoment if running manually
#
# $env:ES_BUILD_JAVA="java12"
#$env:ES_RUNTIME_JAVA="java11"

$ErrorActionPreference="Stop"
$gradleInit = "C:\Users\$env:username\.gradle\init.d\"
echo "Remove $gradleInit"
Remove-Item -Recurse -Force $gradleInit -ErrorAction Ignore
New-Item -ItemType directory -Path $gradleInit
echo "Copy .ci/init.gradle to $gradleInit"
Copy-Item .ci/init.gradle -Destination $gradleInit

[Environment]::SetEnvironmentVariable("JAVA_HOME", $null, "Machine")
$env:PATH="C:\Users\jenkins\.java\$env:ES_BUILD_JAVA\bin\;$env:PATH"
$env:JAVA_HOME=$null
$env:SYSTEM_JAVA_HOME="C:\Users\jenkins\.java\$env:ES_RUNTIME_JAVA"
Remove-Item -Recurse -Force \tmp -ErrorAction Ignore
New-Item -ItemType directory -Path \tmp

$ErrorActionPreference="Continue"
# TODO: remove the task exclusions once dependencies are set correctly and these don't run for Windows or buldiung the deb on windows is fixed
& .\gradlew.bat -g "C:\Users\$env:username\.gradle" --parallel --scan --console=plain destructiveDistroTest `
-x :distribution:packages:buildOssDeb `
-x :distribution:packages:buildDeb `
-x :distribution:packages:buildOssRpm `
-x :distribution:packages:buildRpm `

exit $?
68 changes: 68 additions & 0 deletions .ci/os.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash

# opensuse 15 has a missing dep for systemd

if which zypper > /dev/null ; then
sudo zypper install -y insserv-compat
fi

# Required by bats
sudo touch /etc/is_vagrant_vm
sudo useradd vagrant

set -e

. .ci/java-versions.properties
RUNTIME_JAVA_HOME=$HOME/.java/$ES_RUNTIME_JAVA
BUILD_JAVA_HOME=$HOME/.java/$ES_BUILD_JAVA

rm -Rfv $HOME/.gradle/init.d/ && mkdir -p $HOME/.gradle/init.d
cp -v .ci/init.gradle $HOME/.gradle/init.d

unset JAVA_HOME

if ! [ -e "/usr/bin/bats" ] ; then
git clone https://github.com/sstephenson/bats /tmp/bats
sudo /tmp/bats/install.sh /usr
fi


if [ -f "/etc/os-release" ] ; then
cat /etc/os-release
. /etc/os-release
if [[ "$ID" == "debian" || "$ID_LIKE" == "debian" ]] ; then
# FIXME: The base image should not have rpm installed
sudo rm -Rf /usr/bin/rpm
fi
else
cat /etc/issue || true
fi

sudo bash -c 'cat > /etc/sudoers.d/elasticsearch_vars' << SUDOERS_VARS
Defaults env_keep += "ZIP"
Defaults env_keep += "TAR"
Defaults env_keep += "RPM"
Defaults env_keep += "DEB"
Defaults env_keep += "PACKAGING_ARCHIVES"
Defaults env_keep += "PACKAGING_TESTS"
Defaults env_keep += "BATS_UTILS"
Defaults env_keep += "BATS_TESTS"
Defaults env_keep += "SYSTEM_JAVA_HOME"
Defaults env_keep += "JAVA_HOME"
SUDOERS_VARS
sudo chmod 0440 /etc/sudoers.d/elasticsearch_vars

# Bats tests still use this locationa
sudo rm -Rf /elasticsearch
sudo mkdir -p /elasticsearch/qa/ && sudo chown jenkins /elasticsearch/qa/ && ln -s $PWD/qa/vagrant /elasticsearch/qa/

# sudo sets it's own PATH thus we use env to override that and call sudo annother time so we keep the secure root PATH
# run with --continue to run both bats and java tests even if one fails
# be explicit about Gradle home dir so we use the same even with sudo
sudo -E env \
PATH=$BUILD_JAVA_HOME/bin:`sudo bash -c 'echo -n $PATH'` \
RUNTIME_JAVA_HOME=`readlink -f -n $RUNTIME_JAVA_HOME` \
--unset=JAVA_HOME \
SYSTEM_JAVA_HOME=`readlink -f -n $RUNTIME_JAVA_HOME` \
./gradlew -g $HOME/.gradle --scan --parallel $@ --continue destructivePackagingTest

Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ import java.util.regex.Matcher

import static org.elasticsearch.gradle.tool.Boilerplate.findByName
import static org.elasticsearch.gradle.tool.Boilerplate.maybeConfigure

/**
* Encapsulates build configuration for elasticsearch projects.
*/
Expand Down Expand Up @@ -899,6 +898,11 @@ class BuildPlugin implements Plugin<Project> {
logging.exceptionFormat = 'full'
}

if (OS.current().equals(OS.WINDOWS) && System.getProperty('tests.timeoutSuite') == null) {
// override the suite timeout to 30 mins for windows, because it has the most inefficient filesystem known to man
test.systemProperty 'tests.timeoutSuite', '1800000!'
}

project.plugins.withType(ShadowPlugin).whenPluginAdded {
// Test against a shadow jar if we made one
test.classpath -= project.configurations.getByName('bundle')
Expand Down
8 changes: 0 additions & 8 deletions distribution/archives/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
* under the License.
*/

import org.apache.tools.ant.taskdefs.condition.Os
import org.elasticsearch.gradle.BuildPlugin
import org.elasticsearch.gradle.EmptyDirTask
import org.elasticsearch.gradle.LoggedExec
Expand All @@ -28,7 +27,6 @@ import org.elasticsearch.gradle.tar.SymbolicLinkPreservingTar

import java.nio.file.Files
import java.nio.file.Path

// need this so Zip/Tar tasks get basic defaults...
apply plugin: 'base'

Expand Down Expand Up @@ -312,12 +310,6 @@ configure(subprojects.findAll { it.name == 'integ-test-zip' }) {
integTest {
dependsOn assemble
includePackaged = true
runner {
if (Os.isFamily(Os.FAMILY_WINDOWS) && System.getProperty('tests.timeoutSuite') == null) {
// override the suite timeout to 30 mins for windows, because it has the most inefficient filesystem known to man
systemProperty 'tests.timeoutSuite', '1800000!'
}
}
}

processTestResources {
Expand Down
2 changes: 1 addition & 1 deletion gradle/build-scan.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ buildScan {
} else {
tag 'LOCAL'
}
}
}
21 changes: 8 additions & 13 deletions qa/os/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ dependencies {
compile "org.apache.httpcomponents:httpcore:${versions.httpcore}"
compile "org.apache.httpcomponents:httpclient:${versions.httpclient}"
compile "org.apache.httpcomponents:fluent-hc:${versions.httpclient}"
compile "org.apache.logging.log4j:log4j-api:${versions.log4j}"
compile "org.apache.logging.log4j:log4j-core:${versions.log4j}"
compile "org.apache.logging.log4j:log4j-jcl:${versions.log4j}"
compile "commons-codec:commons-codec:${versions.commonscodec}"
compile "commons-logging:commons-logging:${versions.commonslogging}"

Expand All @@ -48,24 +51,16 @@ testingConventions.enabled = false
tasks.dependencyLicenses.enabled = false
tasks.dependenciesInfo.enabled = false

tasks.thirdPartyAudit.ignoreMissingClasses (
// commons-logging optional dependencies
'org.apache.avalon.framework.logger.Logger',
'org.apache.log.Hierarchy',
'org.apache.log.Logger',
'org.apache.log4j.Category',
'org.apache.log4j.Level',
'org.apache.log4j.Logger',
'org.apache.log4j.Priority',
// commons-logging provided dependencies
'javax.servlet.ServletContextEvent',
'javax.servlet.ServletContextListener'
)
tasks.thirdPartyAudit.ignoreMissingClasses ()

tasks.register('destructivePackagingTest') {
dependsOn 'destructiveDistroTest', 'destructiveBatsTest.oss', 'destructiveBatsTest.default'
}

processTestResources {
from project(":test:framework").file("src/main/resources/log4j2-test.properties")
}

subprojects { Project platformProject ->

// TODO: remove this property lookup once CI is switched to use an explicit task for the sample tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,10 @@ public void test40CreateKeystoreManually() throws Exception {
// the keystore ends up being owned by the Administrators group, so we manually set it to be owned by the vagrant user here.
// from the server's perspective the permissions aren't really different, this is just to reflect what we'd expect in the tests.
// when we run these commands as a role user we won't have to do this
Platforms.onWindows(() -> sh.run(
bin.elasticsearchKeystore + " create; " +
"$account = New-Object System.Security.Principal.NTAccount 'vagrant'; " +
"$acl = Get-Acl '" + installation.config("elasticsearch.keystore") + "'; " +
"$acl.SetOwner($account); " +
"Set-Acl '" + installation.config("elasticsearch.keystore") + "' $acl"
));
Platforms.onWindows(() -> {
sh.run(bin.elasticsearchKeystore + " create");
sh.chown(installation.config("elasticsearch.keystore"));
});

assertThat(installation.config("elasticsearch.keystore"), file(File, ARCHIVE_OWNER, ARCHIVE_OWNER, p660));

Expand All @@ -148,27 +145,23 @@ public void test50StartAndStop() throws Exception {
Archives.stopElasticsearch(installation);
}

public void assertRunsWithJavaHome() throws Exception {
public void test51JavaHomeOverride() throws Exception {
Platforms.onLinux(() -> {
String systemJavaHome = sh.run("echo $SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome);
String systemJavaHome1 = sh.run("echo $SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome1);
});
Platforms.onWindows(() -> {
final String systemJavaHome = sh.run("$Env:SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome);
final String systemJavaHome1 = sh.run("$Env:SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome1);
});

Archives.runElasticsearch(installation, sh);
ServerUtils.runElasticsearchTests();
Archives.stopElasticsearch(installation);

String systemJavaHome = sh.getEnv().get("JAVA_HOME");
String systemJavaHome1 = sh.getEnv().get("JAVA_HOME");
assertThat(FileUtils.slurpAllLogs(installation.logs, "elasticsearch.log", "*.log.gz"),
containsString(systemJavaHome));
}

public void test51JavaHomeOverride() throws Exception {
assertRunsWithJavaHome();
containsString(systemJavaHome1));
}

public void test52BundledJdkRemoved() throws Exception {
Expand All @@ -177,18 +170,34 @@ public void test52BundledJdkRemoved() throws Exception {
Path relocatedJdk = installation.bundledJdk.getParent().resolve("jdk.relocated");
try {
mv(installation.bundledJdk, relocatedJdk);
assertRunsWithJavaHome();
Platforms.onLinux(() -> {
String systemJavaHome1 = sh.run("echo $SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome1);
});
Platforms.onWindows(() -> {
final String systemJavaHome1 = sh.run("$Env:SYSTEM_JAVA_HOME").stdout.trim();
sh.getEnv().put("JAVA_HOME", systemJavaHome1);
});

Archives.runElasticsearch(installation, sh);
ServerUtils.runElasticsearchTests();
Archives.stopElasticsearch(installation);

String systemJavaHome1 = sh.getEnv().get("JAVA_HOME");
assertThat(FileUtils.slurpAllLogs(installation.logs, "elasticsearch.log", "*.log.gz"),
containsString(systemJavaHome1));
} finally {
mv(relocatedJdk, installation.bundledJdk);
}
}

public void test53JavaHomeWithSpecialCharacters() throws Exception {
Platforms.onWindows(() -> {
final Shell sh = newShell();
final Shell sh = new Shell();
String javaPath = "C:\\Program Files (x86)\\java";
try {
// once windows 2012 is no longer supported and powershell 5.0 is always available we can change this command
sh.run("cmd /c mklink /D 'C:\\Program Files (x86)\\java' $Env:SYSTEM_JAVA_HOME");
sh.run("cmd /c mklink /D '" + javaPath + "' $Env:SYSTEM_JAVA_HOME");

sh.getEnv().put("JAVA_HOME", "C:\\Program Files (x86)\\java");

Expand All @@ -203,7 +212,9 @@ public void test53JavaHomeWithSpecialCharacters() throws Exception {

} finally {
//clean up sym link
sh.run("cmd /c rmdir 'C:\\Program Files (x86)\\java' ");
if (Files.exists(Paths.get(javaPath))) {
sh.run("cmd /c rmdir '" + javaPath + "' ");
}
}
});

Expand Down Expand Up @@ -231,6 +242,7 @@ public void test53JavaHomeWithSpecialCharacters() throws Exception {
}

public void test60AutoCreateKeystore() throws Exception {
sh.chown(installation.config("elasticsearch.keystore"));
assertThat(installation.config("elasticsearch.keystore"), file(File, ARCHIVE_OWNER, ARCHIVE_OWNER, p660));

final Installation.Executables bin = installation.executables();
Expand Down Expand Up @@ -263,17 +275,8 @@ public void test70CustomPathConfAndJvmOptions() throws Exception {
"-Dlog4j2.disable.jmx=true\n";
append(tempConf.resolve("jvm.options"), jvmOptions);

Platforms.onLinux(() -> sh.run("chown -R elasticsearch:elasticsearch " + tempConf));
Platforms.onWindows(() -> sh.run(
"$account = New-Object System.Security.Principal.NTAccount 'vagrant'; " +
"$tempConf = Get-ChildItem '" + tempConf + "' -Recurse; " +
"$tempConf += Get-Item '" + tempConf + "'; " +
"$tempConf | ForEach-Object { " +
"$acl = Get-Acl $_.FullName; " +
"$acl.SetOwner($account); " +
"Set-Acl $_.FullName $acl " +
"}"
));
final Shell sh = newShell();
sh.chown(tempConf);

sh.getEnv().put("ES_PATH_CONF", tempConf.toString());
sh.getEnv().put("ES_JAVA_OPTS", "-XX:-UseCompressedOops");
Expand Down Expand Up @@ -306,17 +309,8 @@ public void test80RelativePathConf() throws Exception {

append(tempConf.resolve("elasticsearch.yml"), "node.name: relative");

Platforms.onLinux(() -> sh.run("chown -R elasticsearch:elasticsearch " + temp));
Platforms.onWindows(() -> sh.run(
"$account = New-Object System.Security.Principal.NTAccount 'vagrant'; " +
"$tempConf = Get-ChildItem '" + temp + "' -Recurse; " +
"$tempConf += Get-Item '" + temp + "'; " +
"$tempConf | ForEach-Object { " +
"$acl = Get-Acl $_.FullName; " +
"$acl.SetOwner($account); " +
"Set-Acl $_.FullName $acl " +
"}"
));
final Shell sh = newShell();
sh.chown(temp);

sh.setWorkingDirectory(temp);
sh.getEnv().put("ES_PATH_CONF", "config");
Expand Down
Loading

0 comments on commit f962d1c

Please sign in to comment.