Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FISH-898 Added timeout option to instance commands #5630

Merged
merged 5 commits into from
Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* holder.
*/

// Portions Copyright [2016-2021] [Payara Foundation and/or affiliates]
// Portions Copyright [2016-2022] [Payara Foundation and/or affiliates]

package com.sun.enterprise.admin.servermgmt.cli;

Expand Down Expand Up @@ -409,23 +409,27 @@ protected boolean isRunning() {
}
}

/**
* Byron Nevins Says: We have quite a historical assortment of ways to determine
* if a server has restarted. There are little teeny timing issues with all of
* them. I'm confident that this new technique will clear them all up. Here we
* are just monitoring the PID of the new server and comparing it to the pid of
* the old server. The oldServerPid is guaranteed to be either the PID of the
* "old" server or -1 if we couldn't get it -- or it isn't running. If it is -1
* then we make the assumption that once we DO get a valid pid that the server
* has started. If the old pid is valid we simply poll until we get a different
* pid. Notice that we will never get a valid pid back unless the server is
* officially up and running and "STARTED" Created April 2013
*
* @param oldServerPid The pid of the server which is being restarted.
* @throws CommandException if we time out.
*/
protected final void waitForRestart(final int oldServerPid) throws CommandException {
long end = getEndTime();
waitForRestart(oldServerPid, CLIConstants.WAIT_FOR_DAS_TIME_MS);
}

/**
* Byron Nevins Says: We have quite a historical assortment of ways to determine
* if a server has restarted. There are little teeny timing issues with all of
* them. I'm confident that this new technique will clear them all up. Here we
* are just monitoring the PID of the new server and comparing it to the pid of
* the old server. The oldServerPid is guaranteed to be either the PID of the
* "old" server or -1 if we couldn't get it -- or it isn't running. If it is -1
* then we make the assumption that once we DO get a valid pid that the server
* has started. If the old pid is valid we simply poll until we get a different
* pid. Notice that we will never get a valid pid back unless the server is
* officially up and running and "STARTED" Created April 2013
*
* @param oldServerPid The pid of the server which is being restarted.
* @throws CommandException if we time out.
*/
protected final void waitForRestart(final int oldServerPid, long timeout) throws CommandException {
long end = getEndTime(timeout);

while (now() < end) {
try {
Expand Down Expand Up @@ -608,10 +612,10 @@ private long now() {
return System.currentTimeMillis();
}

private long getEndTime() {
private long getEndTime(long timeout) {
// it's a method in case we someday allow configuring this VERY long
// timeout at runtime.
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
return CLIConstants.WAIT_FOR_DAS_TIME_MS + now();
return timeout + now();
}

protected boolean dataGridEncryptionEnabled() throws IOException, XMLStreamException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
* holder.
*/

// Portions Copyright [2018-2019] [Payara Foundation and/or its affiliates]
// Portions Copyright [2018-2022] [Payara Foundation and/or its affiliates]

package com.sun.enterprise.v3.admin.cluster;

Expand Down Expand Up @@ -92,6 +92,7 @@ public class ClusterCommandHelper {
private final CommandRunner runner;

private ProgressStatus progress;
private long adminTimeout;

/**
* Construct a ClusterCommandHelper
Expand All @@ -102,6 +103,7 @@ public class ClusterCommandHelper {
public ClusterCommandHelper(Domain domain, CommandRunner runner) {
this.domain = domain;
this.runner = runner;
this.adminTimeout = RemoteRestAdminCommand.getReadTimeout() - 3000;
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down Expand Up @@ -238,14 +240,6 @@ public ActionReport runCommand(String command, ParameterMap map, String targetNa
if (logger.isLoggable(FINE)) {
logger.fine(String.format("%s commands queued, waiting for responses", command));
}

// Make sure we don't wait longer than the admin read timeout. Set
// our limit to be 3 seconds less.
long adminTimeout = RemoteRestAdminCommand.getReadTimeout() - 3000;
if (adminTimeout <= 0) {
// This should never be the case
adminTimeout = 57 * 1000;
}

if (logger.isLoggable(FINE)) {
logger.fine(String.format("Initial cluster command timeout: %d ms", adminTimeout));
Expand Down Expand Up @@ -436,4 +430,8 @@ public static class ReportResult {
public final List<String> succeededServerNames = new ArrayList<>();
public final List<String> failedServerNames = new ArrayList<>();
}

public void setAdminTimeout(long adminTimeout) {
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
this.adminTimeout = adminTimeout;
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
# only if the new code is made subject to such option by the copyright
# holder.
#
## Portions Copyright [2018-2019] [Payara Foundation and/or its affiliates]
## Portions Copyright [2018-2022] [Payara Foundation and/or its affiliates]

#####restart-instance
restart.instance.notInstance=-_restart-instance only works on instances. This is a {0}
Expand Down Expand Up @@ -152,6 +152,7 @@ start.dg.command=Start a deployment group
start.dg=Starting deployment group {0}
stop.dg=Stopping deployment group {0}
restart.dg=Restarting deployment group {0}
restart.dg.timeout=Timed out while waiting for deployment group {0} to restart
## StartClusterCommand
start.cluster.command=Start a cluster
start.cluster=Starting cluster {0}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,26 +37,24 @@
* only if the new code is made subject to such option by the copyright
* holder.
*
* Portions Copyright [2016-2018] [Payara Foundation and/or its affiliates]
* Portions Copyright [2016-2022] [Payara Foundation and/or its affiliates]
*
*/

package com.sun.enterprise.v3.admin.cluster;

import com.sun.enterprise.config.serverbeans.Cluster;
import java.util.logging.Logger;

import org.glassfish.api.admin.*;
import javax.inject.Inject;


import org.jvnet.hk2.annotations.Service;
import org.glassfish.api.Param;
import com.sun.enterprise.config.serverbeans.Domain;
import org.glassfish.api.ActionReport;
import org.glassfish.api.ActionReport.ExitCode;
import org.glassfish.api.Param;
import org.glassfish.api.admin.*;
import org.glassfish.hk2.api.PerLookup;
import org.jvnet.hk2.annotations.Service;

import com.sun.enterprise.config.serverbeans.Domain;
import javax.inject.Inject;
import javax.validation.constraints.Min;
import java.util.logging.Logger;

@Service(name = "restart-cluster")
@ExecuteOn(value={RuntimeType.DAS})
Expand Down Expand Up @@ -88,13 +86,21 @@ public class RestartClusterCommand implements AdminCommand {

@Param(optional = true, defaultValue = "false")
private boolean verbose;

@Param(optional = true, defaultValue = "true")
private boolean rolling;

@Param(optional = true, defaultValue = "0")
private String delay;

@Min(message = "Timeout must be at least 1 second long.", value = 1)
@Param(optional = true, defaultValue = "600")
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
private int instanceTimeout;

@Min(message = "Timeout must be at least 1 second long.", value = 1)
@Param(optional = true, defaultValue = "600")
private int timeout;

@Override
public void execute(AdminCommandContext context) {

Expand All @@ -120,6 +126,8 @@ public void execute(AdminCommandContext context) {
String commandName = "restart-instance";
ParameterMap pm = new ParameterMap();
pm.add("delay", delay);
pm.add("timeout", String.valueOf(instanceTimeout));
clusterHelper.setAdminTimeout(timeout * 1000);
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
clusterHelper.runCommand(commandName, pm, clusterName, context,
verbose, rolling);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,37 @@
* only if the new code is made subject to such option by the copyright
* holder.
*/
// Portions Copyright [2018] [Payara Foundation and/or its affiliates]
// Portions Copyright [2018-2022] [Payara Foundation and/or its affiliates]

package com.sun.enterprise.v3.admin.cluster;

import com.sun.enterprise.admin.remote.RemoteRestAdminCommand;
import com.sun.enterprise.admin.remote.ServerRemoteRestAdminCommand;
import com.sun.enterprise.admin.util.*;
import com.sun.enterprise.config.serverbeans.Config;
import com.sun.enterprise.config.serverbeans.Domain;
import com.sun.enterprise.config.serverbeans.Node;
import com.sun.enterprise.config.serverbeans.Nodes;
import com.sun.enterprise.config.serverbeans.Server;
import com.sun.enterprise.admin.util.InstanceStateService;
import com.sun.enterprise.admin.util.RemoteInstanceCommandHelper;
import com.sun.enterprise.config.serverbeans.*;
import com.sun.enterprise.util.OS;
import com.sun.enterprise.util.ObjectAnalyzer;
import com.sun.enterprise.util.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import java.util.logging.Level;
import org.glassfish.api.*;
import fish.payara.nucleus.executorservice.PayaraExecutorService;
import org.glassfish.api.ActionReport;
import org.glassfish.api.I18n;
import org.glassfish.api.Param;
import org.glassfish.api.admin.*;

import org.jvnet.hk2.annotations.Service;
import org.glassfish.hk2.api.PerLookup;
import org.glassfish.hk2.api.ServiceLocator;
import org.jvnet.hk2.annotations.Service;

import javax.inject.Inject;
import javax.inject.Named;
import javax.validation.constraints.Min;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
*
Expand Down Expand Up @@ -106,6 +109,9 @@ public class RestartInstanceCommand implements AdminCommand {
@Named(ServerEnvironment.DEFAULT_INSTANCE_NAME)
Config dasConfig;

@Inject
private PayaraExecutorService executor;

@Param(optional = false, primary = true)
private String instanceName;

Expand All @@ -115,10 +121,14 @@ public class RestartInstanceCommand implements AdminCommand {

@Param(name = "sync", optional = true, defaultValue = "normal", acceptableValues = "none, normal, full")
private String sync;
@Param(name="delay", optional = true, defaultValue = "0")

@Param(name = "delay", optional = true, defaultValue = "0")
private int delay;

@Min(message = "Timeout must be at least 1 second long.", value = 1)
@Param(optional = true, defaultValue = "600")
private int timeout;

private Logger logger;

private RemoteInstanceCommandHelper helper;
Expand All @@ -134,11 +144,26 @@ public class RestartInstanceCommand implements AdminCommand {
private String oldPid;

private AdminCommandContext context;

private static final long WAIT_TIME_MS = 600000; // 10 minutes


@Override
public void execute(AdminCommandContext ctx) {
CountDownLatch commandTimeout = new CountDownLatch(1);
kalinchan marked this conversation as resolved.
Show resolved Hide resolved
ScheduledFuture<?> commandFuture = executor.schedule(() -> {
restartInstance(ctx);
commandTimeout.countDown();
}, 500, TimeUnit.MILLISECONDS);
try {
if (!commandTimeout.await(timeout, TimeUnit.SECONDS)) {
setError(Strings.get("restart.instance.timeout", instanceName));
}
} catch (InterruptedException e) {
} finally {
commandFuture.cancel(true);
}

}

private void restartInstance(AdminCommandContext ctx) {
try {
context = ctx;
helper = new RemoteInstanceCommandHelper(habitat);
Expand All @@ -158,7 +183,7 @@ public void execute(AdminCommandContext ctx) {
if (logger.isLoggable(Level.FINE))
logger.log(Level.FINE, "Restart-instance old-pid = {0}", oldPid);
callInstance();
waitForRestart();
checkForRestart();

if (!isError()) {
String msg = Strings.get("restart.instance.success", instanceName);
Expand Down Expand Up @@ -189,6 +214,7 @@ private void synchronizeInstance() {
command.add(instanceName);
}

// Convert the command into a string representing the command a human should run.
// Convert the command into a string representing the command a human should run.
humanCommand = makeCommandHuman(command);

Expand Down Expand Up @@ -315,35 +341,31 @@ private boolean isInstanceRestartable() throws InstanceNotRunningException {
String val = rac.findPropertyInReport("restartable");
if (val != null && val.equals("false")) {
return false;
}
}
return true;
}

private void waitForRestart() {
if (isError())
private void checkForRestart() {
if (isError()) {
return;

long deadline = System.currentTimeMillis() + WAIT_TIME_MS;

while (System.currentTimeMillis() < deadline) {
try {
String newpid = getPid();
// when the next statement is true -- the server has restarted.
if (StringUtils.ok(newpid) && !newpid.equals(oldPid)) {
if (logger.isLoggable(Level.FINE))
logger.fine("Restarted instance pid = " + newpid);
try {
Thread.sleep(delay);
} catch(InterruptedException ie) {}
return;
}
try {
String newpid = getPid();
// when the next statement is true -- the server has restarted.
if (StringUtils.ok(newpid) && !newpid.equals(oldPid)) {
if (logger.isLoggable(Level.FINE)) {
logger.fine("Restarted instance pid = " + newpid);
}
Thread.sleep(100);// don't busy wait
}
catch (Exception e) {
// ignore. This is normal!
try {
Thread.sleep(delay);
} catch (InterruptedException ie) {
}
return;
}
} catch (Exception e) {
// ignore. This is normal!
}
setError(Strings.get("restart.instance.timeout", instanceName));
setError(Strings.get("restart.instance.racError", instanceName, "instance pid is the same"));
}

private RemoteRestAdminCommand createRac(String cmdName) throws CommandException {
Expand Down
Loading