Skip to content

Commit

Permalink
#697 job not running告警优化
Browse files Browse the repository at this point in the history
  • Loading branch information
RolfHeG committed Jun 16, 2020
1 parent 9c455bc commit 0b00939
Showing 1 changed file with 26 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,39 @@ private void doCheckAndHandleOutdatedNoRunningJobByShardingItem(List<AbnormalJob
String jobName = abnormalJob.getJobName();
int cversion = getCversion(curatorFrameworkOp, JobNodePath.getExecutionItemNodePath(jobName, item));
long nextFireTime = checkShardingItemState(curatorFrameworkOp, abnormalJob, enabledPath, item);
if (nextFireTime != -1 && doubleCheckShardingState(abnormalJob, item, cversion)) {
if (nextFireTime != -1 && doubleCheckShardingState(abnormalJob, item, cversion)
&& !hasOtherItemRunningBefore(curatorFrameworkOp, abnormalJob, nextFireTime)) {
if (abnormalJob.getCause() == null) {
abnormalJob.setCause(AbnormalJob.Cause.NOT_RUN.name());
}
handleOutdatedNoRunningJob(oldAbnormalJobs, curatorFrameworkOp, abnormalJob, nextFireTime);
}
}

/**
* 判断是否有其他分片在nextFireTime之前就已经开始运行到现在
* 假如有,说明可能处于以下两种情况,作业正常:
* 1.有重新分片任务下发到/necessary节点,当前分片机器正在block等待running的分片运行结束
* 2.当前分片被failover,但是其他executor都有该job的分片任务并处于running状态,failover无法立即运行
* @return
*/
private boolean hasOtherItemRunningBefore(CuratorRepository.CuratorFrameworkOp curatorFrameworkOp,
AbnormalJob abnormalJob, long nextFireTime) {
List<String> executionItems = curatorFrameworkOp
.getChildren(JobNodePath.getExecutionNodePath(abnormalJob.getJobName()));

if (!CollectionUtils.isEmpty(executionItems)) {
for (String item : executionItems) {
String runningNodePath = JobNodePath.getRunningNodePath(abnormalJob.getJobName(), item);
Stat stat = curatorFrameworkOp.getStat(runningNodePath);
if (stat != null && stat.getCtime() < nextFireTime) {
return true;
}
}
}
return false;
}

private int getCversion(CuratorRepository.CuratorFrameworkOp curatorFrameworkOp, String path) {
int cversion = 0;
Stat stat = curatorFrameworkOp.getStat(path);
Expand Down

0 comments on commit 0b00939

Please sign in to comment.