Skip to content

Commit

Permalink
Remove HALT option from MCP as it can result in corruption (#2667)
Browse files Browse the repository at this point in the history
* Remove HALT option from MCP as it can result in corruption

Co-authored-by: Konrad Windszus <[email protected]>
  • Loading branch information
davidjgonzalez and kwin authored Aug 25, 2021
1 parent 2769058 commit fb3135b
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com)
- #2612 - Fix build on Windows
- #2648 - Don't implement Provider type SlingHttpServletRequest in FakeSlingHttpServletRequest
- #2650 - Rely on TopologyEvent only instead of refering to DiscoveryService which causes circular reference errors
- #2660 - Remove Halt button from MCP as it's use can result in repository corruption. Also removed ability to set task.timeout on ThrottledTaskRunnerImpl, forcing the timeout to be disabled (-1).
- #2670 - Remove AEM 6.3 support (oak-pal)

## 5.0.6 - 2021-06-12
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
*/
package com.adobe.acs.commons.fam;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Serializable;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
Expand All @@ -31,6 +34,7 @@
*/
@SuppressWarnings("CQRules:CWE-676") // use appropriate in this case
public class CancelHandler implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CancelHandler.class);
private static final long serialVersionUID = 7526472295622776147L;

private final transient Set<Thread> activeWork = ConcurrentHashMap.newKeySet();
Expand All @@ -56,7 +60,8 @@ public boolean isForcefullyCancelled() {

public void trackActiveWork(Thread t) {
if (cancelled) {
t.interrupt();
// Halt UI button has been removed, but just to be safe...
log.warn("Thread interruption is no longer supported as it can result in repository corruption.");
} else {
activeWork.add(t);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,16 @@
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;

@Component(metatype = true, immediate = true,
label = "ACS AEM Commons - Throttled Task Runner Service",
description = "WARNING: Setting a low 'Watchdog time' value that results in the interrupting of writing threads can lead to repository corruption. Ensure that this value is high enough to allow even outlier writing processes to complete.")
@Component(metatype = true,
label = "ACS AEM Commons - Throttled Task Runner Service")
@Service({ThrottledTaskRunner.class, ThrottledTaskRunnerStats.class})
@Properties({
@Property(name = "jmx.objectname", value = "com.adobe.acs.commons.fam:type=Throttled Task Runner", propertyPrivate = true),
@Property(name = "max.threads", label = "Max threads", description = "Default is 4, recommended not to exceed the number of CPU cores",value = "4"),
@Property(name = "max.cpu", label = "Max cpu %", description = "Range is 0..1; -1 means disable this check", doubleValue = 0.75),
@Property(name = "max.heap", label = "Max heap %", description = "Range is 0..1; -1 means disable this check", doubleValue = 0.85),
@Property(name = "cooldown.wait.time", label = "Cooldown time", description="Time to wait for cpu/mem cooldown between checks", value = "100"),
@Property(name = "task.timeout", label = "Watchdog time", description="Maximum time allowed (in ms) per action before it is interrupted forcefully. Defaults to 1 hour.", value = "3600000"),})
@Property(name = "cooldown.wait.time", label = "Cooldown time", description="Time to wait for cpu/mem cooldown between checks", value = "100")
})
public class ThrottledTaskRunnerImpl extends AnnotatedStandardMBean implements ThrottledTaskRunner, ThrottledTaskRunnerStats {

private static final Logger LOG = LoggerFactory.getLogger(ThrottledTaskRunnerImpl.class);
Expand Down Expand Up @@ -318,7 +317,12 @@ protected void activate(ComponentContext componentContext) {
maxHeap = PropertiesUtil.toDouble(properties.get("max.heap"), 0.85);
maxThreads = PropertiesUtil.toInteger(properties.get("max.threads"), defaultThreadCount);
cooldownWaitTime = PropertiesUtil.toInteger(properties.get("cooldown.wait.time"), 100);
taskTimeout = PropertiesUtil.toInteger(properties.get("task.timeout"), 3600000);

/**
* #2660 - Remove configurable timeout/watchdog as this can result in repository corruption.
* Force to -1, which disabled the timeout/watchdog
*/
taskTimeout = -1;

try {
memBeanName = ObjectName.getInstance("java.lang:type=Memory");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ private Runnable watchThread(Thread workThread, Semaphore timerSemaphore) {
LOG.error("Watchdog thread interrupted", ex);
}
if (!finished1) {
LOG.error("Watchdog reached interval timeout, worker thread will now be interrupted!");
workThread.interrupt();
// The watchdog condition should never be met as task.timeout is forced to -1, however just to be safe...
LOG.warn("Thread interruption is no longer supported.");
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,14 @@ protected void delay(long ms) {
try {
Thread.sleep(ms);
} catch (InterruptedException e) {
// We no longer perform thread interrupts, so something else in AEM is causing this interruption.
//
LOG.warn("An ACS AEM Commons Fast Action Manager thread running sleep(..) was interrupted."
+ "The interruption did NOT come FROM ACS AEM Commons as it's code no longer interrupts ANY thread. "
+ "ACS AEM Commons is simply alerting you of this interruption in this log message, "
+ "and will now restore the interrupted status via a call to: Thread.currentThread().interrupt();", e);

// Restore the interrupted status
Thread.currentThread().interrupt();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class ThrottledTaskRunnerTest {

@Test
public void testExecutionOrderOverflow() throws NotCompliantMBeanException, InterruptedException {
ThrottledTaskRunner ttr = osgiContext.registerInjectActivateService(new ThrottledTaskRunnerImpl());
ThrottledTaskRunner ttr = osgiContext.registerService(new ThrottledTaskRunnerImpl());

List<Long> executions = Collections.synchronizedList(new ArrayList<>());

Expand Down Expand Up @@ -86,7 +86,7 @@ public void testExecutionOrderOverflow() throws NotCompliantMBeanException, Inte

@Test
public void testExecutionOrder() throws NotCompliantMBeanException, InterruptedException {
ThrottledTaskRunner ttr = osgiContext.registerInjectActivateService(new ThrottledTaskRunnerImpl());
ThrottledTaskRunner ttr = osgiContext.registerService(new ThrottledTaskRunnerImpl());

final List<Long> executions = Collections.synchronizedList(new ArrayList<>());

Expand Down Expand Up @@ -132,7 +132,7 @@ public void testExecutionOrder() throws NotCompliantMBeanException, InterruptedE

@Test
public void assertFifoOrder() throws NotCompliantMBeanException, InterruptedException {
ThrottledTaskRunner ttr = osgiContext.registerInjectActivateService(new ThrottledTaskRunnerImpl());
ThrottledTaskRunner ttr = osgiContext.registerService(new ThrottledTaskRunnerImpl());

List<Integer> executions = Collections.synchronizedList(new ArrayList<>());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ public class ScrMetadataIT {
COMPONENT_PROPERTIES_TO_IGNORE.add("com.adobe.acs.commons.redirects.filter.RedirectFilter:enabled");
COMPONENT_PROPERTIES_TO_IGNORE.add("com.adobe.acs.commons.redirects.filter.RedirectFilter:event.topics");

// https://github.com/Adobe-Consulting-Services/acs-aem-commons/pull/2667
COMPONENT_PROPERTIES_TO_IGNORE.add("com.adobe.acs.commons.fam.impl.ThrottledTaskRunnerImpl:task.timeout");

COMPONENT_PROPERTIES_TO_IGNORE_FOR_TYPE_CHANGE = new HashSet<>();
COMPONENT_PROPERTIES_TO_IGNORE_FOR_TYPE_CHANGE.add("com.adobe.acs.commons.fam.impl.ThrottledTaskRunnerImpl:max.cpu");
COMPONENT_PROPERTIES_TO_IGNORE_FOR_TYPE_CHANGE.add("com.adobe.acs.commons.fam.impl.ThrottledTaskRunnerImpl:max.heap");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public void init() throws NotCompliantMBeanException, LoginException, Persistenc
doAnswer(this::runImmediately).when(runner).scheduleWork(any(), any());
doAnswer(this::runImmediately).when(runner).scheduleWork(any(), anyInt());
doAnswer(this::runImmediately).when(runner).scheduleWork(any(), any(), anyInt());
slingContext.registerInjectActivateService(runner);
slingContext.registerService(runner);

// Set up FAM action manager factory
actionManagerFactory = new ActionManagerFactoryImpl();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,15 @@ var ScriptRunner = {
innerHTML: response
},
footer: {
innerHTML: '<button id="okButton" is="coral-button" variant="default" coral-close>Close</button>'

/*
- https://github.com/Adobe-Consulting-Services/acs-aem-commons/issues/2660
- Remove Halt button as this is reported to result in Oak repo corruption sometimes
innerHTML: (!ended ? '<button id="haltButton" is="coral-button" variant="default">Halt</button>':'') +
'<button id="okButton" is="coral-button" variant="default" coral-close>Close</button>'
*/
},
closable: true,
variant: "info"
Expand Down

0 comments on commit fb3135b

Please sign in to comment.