Skip to content
This repository was archived by the owner on Jul 2, 2021. It is now read-only.

Commit

Permalink
Merge branch 'hotfix/backpressure-evb'
Browse files Browse the repository at this point in the history
  • Loading branch information
m-gl committed Aug 23, 2018
2 parents a260abc + 071806e commit 9a48dc1
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 38 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>ch.cern</groupId>
<artifactId>DAQExpert</artifactId>
<version>2.13.5</version>
<version>2.13.6</version>

<name>DAQExpert</name>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static String getStringParameter(Properties properties, String key, Class

} catch (NullPointerException e) {
throw new ExpertException(ExpertExceptionCode.LogicModuleUpdateException,
"Could not update LM " + logicModuleClass.getSimpleName() + ", other problem: " + e.getMessage());
"Could not parametrize LM " + logicModuleClass.getSimpleName() + ", properties not provided");
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,10 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {
* Get deadtime. Returns instant deadtimes if available. Per lumisection otherwise
*/
private Map<String, Double> getDeadtimes(DAQ daq){
try {
if(daq.getTcdsGlobalInfo().getDeadTimesInstant() != null && !daq.getTcdsGlobalInfo().getDeadTimesInstant().isEmpty()) {
return daq.getTcdsGlobalInfo().getDeadTimesInstant();
} catch (NullPointerException e) {
} else {
return daq.getTcdsGlobalInfo().getDeadTimes();

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ private boolean isUpgraded(FED fed){
@Override
public boolean satisfied(DAQ daq, Map<String, Output> results) {

boolean ttsDeadtime = false;
ttsDeadtime = results.get(TTSDeadtime.class.getSimpleName()).getResult();
if (!ttsDeadtime)
Output ttsDeadtimeOutput;
ttsDeadtimeOutput = results.get(TTSDeadtime.class.getSimpleName());
if (ttsDeadtimeOutput == null || !ttsDeadtimeOutput.getResult())
return false;

boolean result = false;
Expand Down Expand Up @@ -112,10 +112,10 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {
result = true;
contextHandler.registerForStatistics("VALUE", backpressure, "%", 1);
if (problematicFedsBehindPseudoFed == null) {
contextHandler.register("PROBLEM-FED", topLevelFed.getSrcIdExpected());
contextHandler.registerObject("PROBLEM-FED", topLevelFed, f->f.getSrcIdExpected() + "");
} else {
for (FED fed : problematicFedsBehindPseudoFed) {
contextHandler.register("PROBLEM-FED", fed.getSrcIdExpected());
contextHandler.registerObject("PROBLEM-FED", fed, f->f.getSrcIdExpected() + "");
}
}
TTCPartition p = topLevelFed.getTtcp();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import rcms.utilities.daqaggregator.data.DAQ;
import rcms.utilities.daqexpert.ExpertException;
import rcms.utilities.daqexpert.ExpertExceptionCode;
import rcms.utilities.daqexpert.FailFastParameterReader;
import rcms.utilities.daqexpert.Setting;
import rcms.utilities.daqexpert.persistence.LogicModuleRegistry;
import rcms.utilities.daqexpert.reasoning.base.Output;
Expand Down Expand Up @@ -63,18 +64,8 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {

@Override
public void parametrize(Properties properties) {

try {
this.max = Integer.parseInt(properties.getProperty(Setting.EXPERT_L1_RATE_MAX.getKey()));
this.description = "The readout rate is {{ACTUAL_READOUT_RATE}} which is above the expected maximum " + max + " Hz. This may be a problem with the L1 trigger.";

} catch (NumberFormatException e) {
throw new ExpertException(ExpertExceptionCode.LogicModuleUpdateException, "Could not update LM "
+ this.getClass().getSimpleName() + ", number parsing problem: " + e.getMessage());
} catch (NullPointerException e) {
throw new ExpertException(ExpertExceptionCode.LogicModuleUpdateException,
"Could not update LM " + this.getClass().getSimpleName() + ", other problem: " + e.getMessage());
}
this.max = FailFastParameterReader.getIntegerParameter(properties,Setting.EXPERT_L1_RATE_MAX, this.getClass());
this.description = "The readout rate is {{ACTUAL_READOUT_RATE}} which is above the expected maximum " + max + " Hz. This may be a problem with the L1 trigger.";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public CorruptedData() {
this.name = "Corrupted data received";
this.description = "Run blocked by corrupted data from FED {{PROBLEM-FED}} received by RU {{PROBLEM-RU}} which is now in failed state. "
+ "Problem FED belongs to partition {{PROBLEM-PARTITION}} in {{PROBLEM-SUBSYSTEM}} subsystem "
+ "This causes backpressure at FED {{AFFECTED-FED}} in partition {{AFFECTED-TTCP}} of {{AFFECTED-SUBSYSTEM}}";
+ "This causes backpressure at FED {{AFFECTED-FED}} in partition {{AFFECTED-PARTITION}} of {{AFFECTED-SUBSYSTEM}}";

this.briefDescription = "Run blocked by corrupted data from FED(s) {{PROBLEM-SUBSYSTEM}}/{{PROBLEM-PARTITION}}/{{PROBLEM-FED}}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ public BackpressureFromEventBuilding() {
this.name = "Backpressure from Event Builder";

this.description = "Backpressure from Event Building (i.e. not from HLT). " +
"Exists FEDBuilders with backpressure to FEDs ({{P}}) and 0 requests on RU, 256 fragments in RU. " +
"Exists FEDBuilders with backpressure to FEDs {{PROBLEMATIC-FED}} ({{BACKPRESSURE}}) and 0 requests on RU, 256 fragments in RU. " +
"EVM has few ({{EVM-REQUESTS}}, the threshold is <100) requests. All BUs are enabled.";

this.briefDescription = "Backpressure from EVB to FEDs ({{P}})";
this.briefDescription = "Backpressure from EVB to FEDs ({{BACKPRESSURE}})";

this.action = new SimpleAction("Call the DAQ on-call mentioning that we have backpressure from the event building.");

Expand Down Expand Up @@ -60,6 +60,23 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {

if(fedDeadtimeDueToDAQ || tmpUpgradedFedBackpressured) {


Set<FED> backpressuredFeds = new HashSet<>();

if(tmpUpgradedFedBackpressured) {
Output output = results.get(TmpUpgradedFedProblem.class.getSimpleName());
if(output.getContext() != null) {
backpressuredFeds.addAll(output.getContext().getReusableContextEntry("PROBLEM-FED").getObjectSet());
}
}

if(fedDeadtimeDueToDAQ){
Output output = results.get(FedDeadtimeDueToDaq.class.getSimpleName());
if(output.getContext() != null) {
backpressuredFeds.addAll(output.getContext().getReusableContextEntry("PROBLEM-FED").getObjectSet());
}
}

assignPriority(results);
boolean result = false;

Expand All @@ -77,20 +94,19 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {
//TODO: LATER: looking at dead time of FED. need to take into account FED - pseudoFED relationship.
if (!fed.isFrlMasked()) {

//TODO: use the result of other LMs instead of repeating the job
if (fed.getPercentWarning() + fed.getPercentBusy() > deadtimeThresholdInPercentage) {
// check only those
if(backpressuredFeds.contains(fed)) {

float backpressure = fed.getPercentBackpressure();
if (backpressure > fedBackpressureThreshold) {

logger.debug("Found problematic FED: " + fed.getSrcIdExpected());
contextHandler.register("PROBLEMATIC-FED", fed.getSrcIdExpected());
contextHandler.registerForStatistics("BACKPRESSURE", backpressure);
contextHandler.registerForStatistics("BACKPRESSURE", backpressure, "%", 1);
problematicFeds.add(fed);
foundProblematicFeds = true;
}
}

}
}
if (foundProblematicFeds) {
Expand All @@ -107,10 +123,12 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {
boolean allBusEnabled = true;

for (RU ru : daq.getRus()) {
if (ru.isEVM() && ru.getRequests() < evmFewRequestsThreshold) {
logger.trace("EVM has: " + ru.getRequests() + " requests");
contextHandler.registerForStatistics("EVM-REQUESTS", ru.getRequests());
evmFewRequests = true;
if (ru.isEVM()) {
if(ru.getRequests() < evmFewRequestsThreshold) {
logger.trace("EVM has: " + ru.getRequests() + " requests");
contextHandler.registerForStatistics("EVM-REQUESTS", ru.getRequests());
evmFewRequests = true;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,14 @@ public boolean satisfied(DAQ daq, Map<String, Output> results) {
result = true;

if(problematicFedsBehindPseudoFed == null) {
contextHandler.register("PROBLEM-FED", topLevelFed.getSrcIdExpected());
contextHandler.registerObject("PROBLEM-FED", topLevelFed, f->f.getSrcIdExpected() + "");
} else{
for(FED fed: problematicFedsBehindPseudoFed){
contextHandler.register("PROBLEM-FED", fed.getSrcIdExpected());
contextHandler.registerObject("PROBLEM-FED", fed, f->f.getSrcIdExpected() + "");
}
}
contextHandler.registerForStatistics("DEADTIME", deadPercentage, "%", 1);
contextHandler.registerForStatistics("BACKPRESSURE", deadPercentage, "%", 1);
contextHandler.registerForStatistics("BACKPRESSURE", backpressure, "%", 1);

}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.regex.Pattern;

import org.junit.Ignore;
import org.junit.Test;
import static org.junit.Assert.*;
import rcms.utilities.daqaggregator.data.DAQ;
Expand Down Expand Up @@ -35,6 +37,7 @@ public class RuFailedTest extends FlowchartCaseTestBase {
* REMI: the real reason is in the 2nd (ru-failed) bullet. I guess the 1st (fc5, fed-stuck) one shows up because we
* do not see the backpressure from DAQ on the BPIX FEDs.
*/
@Ignore // this is due to known issue with the testBase - enable it back after new TestBase merged
@Test
public void case1Test() throws URISyntaxException {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.junit.Ignore;
import org.junit.Test;
import rcms.utilities.daqaggregator.data.DAQ;
import rcms.utilities.daqexpert.processing.context.ContextHandler;
import rcms.utilities.daqexpert.reasoning.logic.failures.FlowchartCaseTestBase;

import java.net.URISyntaxException;
Expand All @@ -29,4 +30,21 @@ public void test01() throws URISyntaxException {
assertSatisfiedLogicModules(snapshot, backpressureFromEventBuilding, backpressureFromFerol);
//assertOnlyOneIsSatisified(backpressureFromEventBuilding, snapshot);
}

@Ignore // this test relies on resusable context - enable it after introducing updated test base class
@Test
public void test02() throws URISyntaxException {
DAQ snapshot = getSnapshot("1534269198968.json.gz");

Logger.getLogger(BackpressureFromEventBuilding.class).setLevel(Level.INFO);
Logger.getLogger(BackpressureFromFerol.class).setLevel(Level.INFO);
Logger.getLogger(FedDeadtimeDueToDaq.class).setLevel(Level.INFO);


ContextHandler.highlightMarkup=false;
assertSatisfiedLogicModules(snapshot, backpressureFromEventBuilding);

System.out.println(backpressureFromEventBuilding.getDescriptionWithContext());
System.out.println(backpressureFromEventBuilding.getActionWithContext());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ public void prepareForTest() {
public void test01() throws URISyntaxException {
assertTrue(module.satisfied(FlowchartCaseTestBase.getSnapshot("1507212900008.json.gz"), results));
logger.info(module.getDescriptionWithContext());
assertEquals("FED <strong>622</strong> has a deadtime <strong>5.2%</strong>, due to DAQ backpressure <strong>5.2%</strong>. The threshold for deadtime is 2.0%, backpressure: 2.0%", module.getDescriptionWithContext());
assertEquals("FED <strong>622</strong> has a deadtime <strong>5.2%</strong>, due to DAQ backpressure <strong>21.4%</strong>. The threshold for deadtime is 2.0%, backpressure: 2.0%", module.getDescriptionWithContext());
}

@Test
public void test03() throws URISyntaxException {
assertTrue(module.satisfied(FlowchartCaseTestBase.getSnapshot("1507212240143.json.gz"), results));
logger.info(module.getDescriptionWithContext());
assertEquals("FED <strong>359</strong> has a deadtime <strong>4.4%</strong>, due to DAQ backpressure <strong>4.4%</strong>. The threshold for deadtime is 2.0%, backpressure: 2.0%", module.getDescriptionWithContext());
assertEquals("FED <strong>359</strong> has a deadtime <strong>4.4%</strong>, due to DAQ backpressure <strong>2.6%</strong>. The threshold for deadtime is 2.0%, backpressure: 2.0%", module.getDescriptionWithContext());

}

Expand Down
Binary file not shown.

0 comments on commit 9a48dc1

Please sign in to comment.