Skip to content

Commit

Permalink
[fix](routine-load) fix auto resume invalid when FE leader change (ap…
Browse files Browse the repository at this point in the history
…ache#37071)

We meet routine load pause and never be auto resume even if it meet the
conditions.
```
                  Id: 134305
                Name: lineitem_balance_dup_persistent_weekly_persistent_flow_weekly
          CreateTime: 2024-06-27 19:54:13
           PauseTime: 2024-06-28 23:02:46
             EndTime: NULL
              DbName: regression_test_stress_load_long_duration_load
           TableName: lineitem_balance_dup_persistent_weekly
        IsMultiTable: false
               State: PAUSED
      DataSourceType: KAFKA
      CurrentTaskNum: 0
       JobProperties: {"max_batch_rows":"550000","timezone":"Asia/Shanghai","send_batch_parallelism":"1","load_to_single_tablet":"false","column_separator":"','","line_delimiter":"\n","current_concurrent_number":"0","delete":"*","partial_columns":"false","merge_type":"APPEND","exec_mem_limit":"2147483648","strict_mode":"false","jsonpaths":"","max_batch_interval":"10","max_batch_size":"409715200","fuzzy_parse":"false","partitions":"*","columnToColumnExpr":"","whereExpr":"*","desired_concurrent_number":"100","precedingFilter":"*","format":"csv","max_error_number":"0","max_filter_ratio":"1.0","json_root":"","strip_outer_array":"false","num_as_string":"false"}
DataSourceProperties: {"topic":"test-topic-persistent-weekly-new","currentKafkaPartitions":"","brokerList":"xxx"}
    CustomProperties: {"kafka_default_offsets":"OFFSET_BEGINNING","group.id":"test-consumer-group","client.id":"test-client-id"}
           Statistic: {"receivedBytes":2234836231654,"runningTxns":[],"errorRows":0,"committedTaskNum":1019074,"loadedRows":11693905636,"loadRowsRate":119675,"abortedTaskNum":13556,"errorRowsAfterResumed":0,"totalRows":11693905636,"unselectedRows":0,"receivedBytesRate":22871277,"taskExecuteTimeMs":97713660}
            Progress: {"0":"81666390","1":"81605244","2":"80934894","3":"81531594","4":"81866067","5":"80841194","6":"81229045","7":"80854534","8":"81305844","9":"81384530","10":"81016926","11":"81018762","12":"81586996","13":"81028852","14":"80836728","15":"81536307","16":"81191324","17":"80790892","18":"81518108","19":"80853947","20":"80944134","21":"81567859","22":"80967795","23":"80962887","24":"81444757","25":"81182803","26":"81081053","27":"81374984","28":"81089548","29":"81161297","30":"81981195","31":"80943196","32":"80979608","33":"81580092","34":"81596130","35":"80926873","36":"81569105","37":"81364000","38":"80947256","39":"81352057","40":"80864511","41":"81287226","42":"81579790","43":"80902247","44":"81059042","45":"81543945","46":"81137005","47":"80790072","48":"81365538","49":"81025127","50":"80887759","51":"81568479","52":"81013907","53":"80947134","54":"81569820","55":"81073842","56":"80873173","57":"81417107","58":"81120060","59":"81216134","60":"81336754","61":"81187291","62":"80989208","63":"81818417","64":"81038338","65":"80761949","66":"81466270","67":"80989322","68":"80962711","69":"81586888","70":"81073447","71":"80885426"}
                 Lag: {"0":-1,"1":-1,"2":-1,"3":-1,"4":-1,"5":-1,"6":-1,"7":-1,"8":-1,"9":-1,"10":-1,"11":-1,"12":-1,"13":-1,"14":-1,"15":-1,"16":-1,"17":-1,"18":-1,"19":-1,"20":-1,"21":-1,"22":-1,"23":-1,"24":-1,"25":-1,"26":-1,"27":-1,"28":-1,"29":-1,"30":-1,"31":-1,"32":-1,"33":-1,"34":-1,"35":-1,"36":-1,"37":-1,"38":-1,"39":-1,"40":-1,"41":-1,"42":-1,"43":-1,"44":-1,"45":-1,"46":-1,"47":-1,"48":-1,"49":-1,"50":-1,"51":-1,"52":-1,"53":-1,"54":-1,"55":-1,"56":-1,"57":-1,"58":-1,"59":-1,"60":-1,"61":-1,"62":-1,"63":-1,"64":-1,"65":-1,"66":-1,"67":-1,"68":-1,"69":-1,"70":-1,"71":-1}
ReasonOfStateChanged: 
        ErrorLogUrls: 
            OtherMsg: 
                User: root
             Comment: 
```


If routine load pause and FE leader changes at the same time,
pauseReason will be null if FE leader changes, so auto resume logic will
never be triggered:
```
if (jobRoutine.pauseReason != null
                && jobRoutine.pauseReason.getCode() != InternalErrorCode.MANUAL_PAUSE_ERR
                && jobRoutine.pauseReason.getCode() != InternalErrorCode.TOO_MANY_FAILURE_ROWS_ERR
                && jobRoutine.pauseReason.getCode() != InternalErrorCode.CANNOT_RESUME_ERR) {
```
  • Loading branch information
sollhui authored Jul 2, 2024
1 parent 66b5472 commit 7e3cfb6
Showing 1 changed file with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,16 @@ public boolean isFinalState() {
@SerializedName("pg")
protected RoutineLoadProgress progress;

@SerializedName("lrt")
protected long latestResumeTimestamp; // the latest resume time
@SerializedName("art")
protected long autoResumeCount;
// some other msg which need to show to user;
@SerializedName("om")
protected String otherMsg = "";
@SerializedName("pr")
protected ErrorReason pauseReason;
@SerializedName("cr")
protected ErrorReason cancelReason;

@SerializedName("cts")
Expand Down

0 comments on commit 7e3cfb6

Please sign in to comment.