doris
doris copied to clipboard
[fix](routine-load) fix auto resume invalid when FE leader change
Proposed changes
We meet routine load pause and never be auto resume even if it meet the conditions.
Id: 134305
Name: lineitem_balance_dup_persistent_weekly_persistent_flow_weekly
CreateTime: 2024-06-27 19:54:13
PauseTime: 2024-06-28 23:02:46
EndTime: NULL
DbName: regression_test_stress_load_long_duration_load
TableName: lineitem_balance_dup_persistent_weekly
IsMultiTable: false
State: PAUSED
DataSourceType: KAFKA
CurrentTaskNum: 0
JobProperties: {"max_batch_rows":"550000","timezone":"Asia/Shanghai","send_batch_parallelism":"1","load_to_single_tablet":"false","column_separator":"','","line_delimiter":"\n","current_concurrent_number":"0","delete":"*","partial_columns":"false","merge_type":"APPEND","exec_mem_limit":"2147483648","strict_mode":"false","jsonpaths":"","max_batch_interval":"10","max_batch_size":"409715200","fuzzy_parse":"false","partitions":"*","columnToColumnExpr":"","whereExpr":"*","desired_concurrent_number":"100","precedingFilter":"*","format":"csv","max_error_number":"0","max_filter_ratio":"1.0","json_root":"","strip_outer_array":"false","num_as_string":"false"}
DataSourceProperties: {"topic":"test-topic-persistent-weekly-new","currentKafkaPartitions":"","brokerList":"xxx"}
CustomProperties: {"kafka_default_offsets":"OFFSET_BEGINNING","group.id":"test-consumer-group","client.id":"test-client-id"}
Statistic: {"receivedBytes":2234836231654,"runningTxns":[],"errorRows":0,"committedTaskNum":1019074,"loadedRows":11693905636,"loadRowsRate":119675,"abortedTaskNum":13556,"errorRowsAfterResumed":0,"totalRows":11693905636,"unselectedRows":0,"receivedBytesRate":22871277,"taskExecuteTimeMs":97713660}
Progress: {"0":"81666390","1":"81605244","2":"80934894","3":"81531594","4":"81866067","5":"80841194","6":"81229045","7":"80854534","8":"81305844","9":"81384530","10":"81016926","11":"81018762","12":"81586996","13":"81028852","14":"80836728","15":"81536307","16":"81191324","17":"80790892","18":"81518108","19":"80853947","20":"80944134","21":"81567859","22":"80967795","23":"80962887","24":"81444757","25":"81182803","26":"81081053","27":"81374984","28":"81089548","29":"81161297","30":"81981195","31":"80943196","32":"80979608","33":"81580092","34":"81596130","35":"80926873","36":"81569105","37":"81364000","38":"80947256","39":"81352057","40":"80864511","41":"81287226","42":"81579790","43":"80902247","44":"81059042","45":"81543945","46":"81137005","47":"80790072","48":"81365538","49":"81025127","50":"80887759","51":"81568479","52":"81013907","53":"80947134","54":"81569820","55":"81073842","56":"80873173","57":"81417107","58":"81120060","59":"81216134","60":"81336754","61":"81187291","62":"80989208","63":"81818417","64":"81038338","65":"80761949","66":"81466270","67":"80989322","68":"80962711","69":"81586888","70":"81073447","71":"80885426"}
Lag: {"0":-1,"1":-1,"2":-1,"3":-1,"4":-1,"5":-1,"6":-1,"7":-1,"8":-1,"9":-1,"10":-1,"11":-1,"12":-1,"13":-1,"14":-1,"15":-1,"16":-1,"17":-1,"18":-1,"19":-1,"20":-1,"21":-1,"22":-1,"23":-1,"24":-1,"25":-1,"26":-1,"27":-1,"28":-1,"29":-1,"30":-1,"31":-1,"32":-1,"33":-1,"34":-1,"35":-1,"36":-1,"37":-1,"38":-1,"39":-1,"40":-1,"41":-1,"42":-1,"43":-1,"44":-1,"45":-1,"46":-1,"47":-1,"48":-1,"49":-1,"50":-1,"51":-1,"52":-1,"53":-1,"54":-1,"55":-1,"56":-1,"57":-1,"58":-1,"59":-1,"60":-1,"61":-1,"62":-1,"63":-1,"64":-1,"65":-1,"66":-1,"67":-1,"68":-1,"69":-1,"70":-1,"71":-1}
ReasonOfStateChanged:
ErrorLogUrls:
OtherMsg:
User: root
Comment:
If routine load pause and FE leader changes at the same time, pauseReason will be null if FE leader changes, so auto resume logic will never be triggered:
if (jobRoutine.pauseReason != null
&& jobRoutine.pauseReason.getCode() != InternalErrorCode.MANUAL_PAUSE_ERR
&& jobRoutine.pauseReason.getCode() != InternalErrorCode.TOO_MANY_FAILURE_ROWS_ERR
&& jobRoutine.pauseReason.getCode() != InternalErrorCode.CANNOT_RESUME_ERR) {
Thank you for your contribution to Apache Doris. Don't know what should be done next? See How to process your PR
Since 2024-03-18, the Document has been moved to doris-website. See Doris Document.
run buildall
PR approved by at least one committer and no changes requested.
PR approved by anyone and no changes requested.
TPC-H: Total hot run time: 39977 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpch-tools
Tpch sf100 test result on commit 0532a6a770a7da51621ffa64b8f434cf98f9ab9e, data reload: false
------ Round 1 ----------------------------------
q1 18206 4482 4321 4321
q2 2031 192 209 192
q3 10533 1198 1095 1095
q4 10251 763 764 763
q5 7522 2691 2624 2624
q6 224 140 139 139
q7 977 604 609 604
q8 9241 2124 2098 2098
q9 8903 6512 6541 6512
q10 8967 3721 3685 3685
q11 454 242 242 242
q12 403 235 228 228
q13 18736 3017 2991 2991
q14 278 225 227 225
q15 528 485 492 485
q16 512 376 379 376
q17 989 649 782 649
q18 8123 7383 7397 7383
q19 4007 1499 1583 1499
q20 665 330 326 326
q21 5108 3194 3997 3194
q22 410 347 346 346
Total cold run time: 117068 ms
Total hot run time: 39977 ms
----- Round 2, with runtime_filter_mode=off -----
q1 4360 4250 4218 4218
q2 385 264 259 259
q3 2981 2862 2948 2862
q4 2010 1713 1764 1713
q5 5603 5468 5451 5451
q6 230 132 130 130
q7 2211 1880 1823 1823
q8 3294 3431 3441 3431
q9 8715 8719 8819 8719
q10 4093 3830 3742 3742
q11 582 516 488 488
q12 830 644 641 641
q13 17082 3185 3185 3185
q14 301 290 268 268
q15 530 478 510 478
q16 475 437 423 423
q17 1819 1536 1489 1489
q18 8248 7926 7812 7812
q19 1786 1720 1573 1573
q20 2128 1899 1888 1888
q21 5166 4879 4710 4710
q22 639 573 580 573
Total cold run time: 73468 ms
Total hot run time: 55876 ms
TPC-DS: Total hot run time: 173580 ms
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/tpcds-tools
TPC-DS sf100 test result on commit 0532a6a770a7da51621ffa64b8f434cf98f9ab9e, data reload: false
query1 924 388 374 374
query2 6442 2502 2331 2331
query3 6647 211 216 211
query4 19090 17673 17187 17187
query5 3661 494 484 484
query6 253 163 179 163
query7 4597 304 294 294
query8 311 324 300 300
query9 8508 2390 2367 2367
query10 578 316 286 286
query11 10612 10016 9982 9982
query12 117 86 86 86
query13 1643 359 374 359
query14 10162 6894 7034 6894
query15 233 189 185 185
query16 7764 278 271 271
query17 1690 546 539 539
query18 1955 288 285 285
query19 205 160 162 160
query20 92 86 87 86
query21 214 135 128 128
query22 4484 4088 4029 4029
query23 33817 33783 33557 33557
query24 12210 2872 2857 2857
query25 716 411 391 391
query26 1651 160 155 155
query27 2949 317 328 317
query28 7773 2103 2116 2103
query29 1087 655 669 655
query30 261 162 158 158
query31 970 759 752 752
query32 105 56 56 56
query33 787 309 303 303
query34 968 488 492 488
query35 760 645 640 640
query36 1151 989 967 967
query37 182 84 83 83
query38 2926 2861 2796 2796
query39 892 851 841 841
query40 268 145 132 132
query41 59 56 55 55
query42 115 105 108 105
query43 604 555 563 555
query44 1202 729 725 725
query45 197 171 167 167
query46 1094 726 703 703
query47 1880 1755 1744 1744
query48 378 300 296 296
query49 921 500 415 415
query50 768 385 383 383
query51 6874 6686 6762 6686
query52 104 97 93 93
query53 375 311 286 286
query54 894 443 452 443
query55 75 73 75 73
query56 289 259 273 259
query57 1132 1061 1069 1061
query58 256 261 245 245
query59 3376 3145 3318 3145
query60 304 281 286 281
query61 120 89 89 89
query62 605 444 445 444
query63 314 297 288 288
query64 9642 2289 1726 1726
query65 3156 3079 3091 3079
query66 1246 324 323 323
query67 15661 14834 14962 14834
query68 8398 554 550 550
query69 716 454 337 337
query70 1177 1148 1152 1148
query71 510 283 274 274
query72 8320 5599 5536 5536
query73 1336 329 334 329
query74 5954 5535 5409 5409
query75 4934 2625 2653 2625
query76 4998 963 925 925
query77 762 315 319 315
query78 10478 9887 9711 9711
query79 8411 518 535 518
query80 997 517 476 476
query81 549 214 218 214
query82 492 108 107 107
query83 326 174 169 169
query84 271 86 83 83
query85 996 286 265 265
query86 380 278 318 278
query87 3263 3119 3097 3097
query88 4435 2475 2421 2421
query89 522 403 378 378
query90 2141 195 183 183
query91 123 101 98 98
query92 57 47 53 47
query93 6777 513 490 490
query94 1362 185 187 185
query95 403 318 314 314
query96 603 270 271 270
query97 3174 3063 3043 3043
query98 209 202 195 195
query99 1118 852 835 835
Total cold run time: 295488 ms
Total hot run time: 173580 ms
ClickBench: Total hot run time: 30.36 s
machine: 'aliyun_ecs.c7a.8xlarge_32C64G'
scripts: https://github.com/apache/doris/tree/master/tools/clickbench-tools
ClickBench test result on commit 0532a6a770a7da51621ffa64b8f434cf98f9ab9e, data reload: false
query1 0.04 0.04 0.03
query2 0.08 0.04 0.04
query3 0.22 0.04 0.05
query4 1.69 0.07 0.06
query5 0.50 0.48 0.48
query6 1.14 0.72 0.72
query7 0.02 0.01 0.01
query8 0.06 0.04 0.04
query9 0.54 0.52 0.49
query10 0.54 0.54 0.54
query11 0.15 0.11 0.12
query12 0.16 0.12 0.12
query13 0.61 0.59 0.59
query14 0.78 0.77 0.77
query15 0.85 0.81 0.82
query16 0.37 0.38 0.36
query17 1.00 1.03 1.02
query18 0.21 0.26 0.24
query19 1.77 1.71 1.69
query20 0.01 0.01 0.01
query21 15.43 0.77 0.66
query22 4.33 7.64 1.65
query23 18.37 1.40 1.32
query24 2.12 0.23 0.25
query25 0.17 0.08 0.08
query26 0.28 0.19 0.18
query27 0.08 0.07 0.08
query28 13.23 1.01 1.01
query29 12.62 3.34 3.24
query30 0.26 0.07 0.07
query31 2.86 0.41 0.39
query32 3.24 0.48 0.50
query33 2.91 2.91 2.99
query34 17.21 4.45 4.40
query35 4.50 4.60 4.46
query36 0.66 0.45 0.47
query37 0.18 0.17 0.15
query38 0.16 0.15 0.14
query39 0.05 0.03 0.03
query40 0.18 0.14 0.13
query41 0.10 0.05 0.06
query42 0.07 0.05 0.06
query43 0.05 0.04 0.04
Total cold run time: 109.8 s
Total hot run time: 30.36 s