chunjun
chunjun copied to clipboard
flinkx1.12同步mysql-hive 数据量700万,本地模式可以跑,yarn-perjob无法跑
Search before asking
- [X] I had searched in the issues and found no similar issues.
What happened
执行JSON:
{
"job": {
"content": [{
"reader": {
"parameter": {
"password": "123456",
"dataSourceId": 14,
"column": [{
"precision": 20,
"name": "id",
"columnDisplaySize": 20,
"type": "BIGINT"
}, {
"precision": 32,
"name": "device_id",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 12,
"name": "point",
"columnDisplaySize": 12,
"type": "VARCHAR"
}, {
"precision": 32,
"name": "hash",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 32,
"name": "value",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 19,
"name": "acq_time",
"columnDisplaySize": 19,
"type": "DATETIME"
}, {
"precision": 12,
"name": "ratio",
"columnDisplaySize": 12,
"type": "FLOAT"
}, {
"precision": 32,
"name": "max_val",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 32,
"name": "min_val",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 32,
"name": "out_param",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 8,
"name": "item_code",
"columnDisplaySize": 8,
"type": "VARCHAR"
}, {
"precision": 32,
"name": "descr",
"columnDisplaySize": 32,
"type": "VARCHAR"
}, {
"precision": 16,
"name": "save_hst",
"columnDisplaySize": 16,
"type": "SMALLINT"
}, {
"precision": 1,
"name": "del_flag",
"columnDisplaySize": 1,
"type": "TINYINT"
}, {
"precision": 20,
"name": "create_user",
"columnDisplaySize": 20,
"type": "BIGINT"
}, {
"precision": 20,
"name": "update_user",
"columnDisplaySize": 20,
"type": "BIGINT"
}, {
"precision": 19,
"name": "create_time",
"columnDisplaySize": 19,
"type": "DATETIME"
}, {
"precision": 19,
"name": "update_time",
"columnDisplaySize": 19,
"type": "DATETIME"
}, {
"precision": 50,
"name": "ext_first",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_second",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_third",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_fourth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_fifth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_sixth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_seventh",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_eighth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_ninth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "ext_tenth",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 50,
"name": "device_add_id",
"columnDisplaySize": 50,
"type": "VARCHAR"
}, {
"precision": 8,
"name": "today_start_value",
"columnDisplaySize": 10,
"type": "DECIMAL"
}, {
"precision": 75,
"name": "unit",
"columnDisplaySize": 75,
"type": "VARCHAR"
}],
"connection": [{
"jdbcUrl": ["jdbc:mysql://172.18.8.209:3306/Vasyslink_yag001?useSSL=false&useUnicode=true&characterEncoding=utf8"],
"table": ["tb_device_point_data_hst_24"]
}],
"splitPk": "id",
"username": "root"
},
"name": "mysqlreader"
},
"writer": {
"parameter": {
"tablesColumn": "{\"tb_device_point_data_hst_666\":[{\"key\":\"id\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"device_id\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"point\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"hash\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"value\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"acq_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ratio\",\"type\":\"float\",\"precision\":7,\"columnDisplaySize\":24},{\"key\":\"max_val\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"min_val\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"out_param\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"item_code\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"descr\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"save_hst\",\"type\":\"smallint\",\"precision\":5,\"columnDisplaySize\":6},{\"key\":\"del_flag\",\"type\":\"tinyint\",\"precision\":3,\"columnDisplaySize\":4},{\"key\":\"create_user\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"update_user\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"create_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"update_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_first\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_second\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_third\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_fourth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_fifth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_sixth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_seventh\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_eighth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_ninth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_tenth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"device_add_id\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"today_start_value\",\"type\":\"decimal\",\"precision\":8,\"columnDisplaySize\":10},{\"key\":\"unit\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647}]}",
"dataSourceId": 54,
"partition": "pt",
"jdbcUrl": "jdbc:hive2://172.18.8.208:10000/Vasyslink_yag001",
"defaultFS": "hdfs://172.18.8.207:8020",
"writeMode": "overwrite",
"maxFileSize": 1073741824,
"partitionValue":"",
"fieldDelimiter": "\t",
"partitionType": "DAY",
"fileType": "text",
"charsetName": "UTF-8"
},
"name": "hivewriter"
}
}],
"setting": {
"log": {
"isLogger": false
},
"errorLimit": {},
"speed": {
"bytes": 0,
"channel": 1
}
}
}
}
What you expected to happen
{ "job": { "content": [{ "reader": { "parameter": { "password": "123456", "dataSourceId": 14, "column": [{ "precision": 20, "name": "id", "columnDisplaySize": 20, "type": "BIGINT" }, { "precision": 32, "name": "device_id", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 12, "name": "point", "columnDisplaySize": 12, "type": "VARCHAR" }, { "precision": 32, "name": "hash", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 32, "name": "value", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 19, "name": "acq_time", "columnDisplaySize": 19, "type": "DATETIME" }, { "precision": 12, "name": "ratio", "columnDisplaySize": 12, "type": "FLOAT" }, { "precision": 32, "name": "max_val", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 32, "name": "min_val", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 32, "name": "out_param", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 8, "name": "item_code", "columnDisplaySize": 8, "type": "VARCHAR" }, { "precision": 32, "name": "descr", "columnDisplaySize": 32, "type": "VARCHAR" }, { "precision": 16, "name": "save_hst", "columnDisplaySize": 16, "type": "SMALLINT" }, { "precision": 1, "name": "del_flag", "columnDisplaySize": 1, "type": "TINYINT" }, { "precision": 20, "name": "create_user", "columnDisplaySize": 20, "type": "BIGINT" }, { "precision": 20, "name": "update_user", "columnDisplaySize": 20, "type": "BIGINT" }, { "precision": 19, "name": "create_time", "columnDisplaySize": 19, "type": "DATETIME" }, { "precision": 19, "name": "update_time", "columnDisplaySize": 19, "type": "DATETIME" }, { "precision": 50, "name": "ext_first", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_second", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_third", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_fourth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_fifth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_sixth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_seventh", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_eighth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_ninth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "ext_tenth", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 50, "name": "device_add_id", "columnDisplaySize": 50, "type": "VARCHAR" }, { "precision": 8, "name": "today_start_value", "columnDisplaySize": 10, "type": "DECIMAL" }, { "precision": 75, "name": "unit", "columnDisplaySize": 75, "type": "VARCHAR" }], "connection": [{ "jdbcUrl": ["jdbc:mysql://172.18.8.209:3306/Vasyslink_yag001?useSSL=false&useUnicode=true&characterEncoding=utf8"], "table": ["tb_device_point_data_hst_24"] }],
"splitPk": "id",
"username": "root"
},
"name": "mysqlreader"
},
"writer": {
"parameter": {
"tablesColumn": "{\"tb_device_point_data_hst_666\":[{\"key\":\"id\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"device_id\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"point\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"hash\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"value\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"acq_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ratio\",\"type\":\"float\",\"precision\":7,\"columnDisplaySize\":24},{\"key\":\"max_val\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"min_val\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"out_param\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"item_code\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"descr\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"save_hst\",\"type\":\"smallint\",\"precision\":5,\"columnDisplaySize\":6},{\"key\":\"del_flag\",\"type\":\"tinyint\",\"precision\":3,\"columnDisplaySize\":4},{\"key\":\"create_user\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"update_user\",\"type\":\"bigint\",\"precision\":19,\"columnDisplaySize\":20},{\"key\":\"create_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"update_time\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_first\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_second\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_third\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_fourth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_fifth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_sixth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_seventh\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_eighth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_ninth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"ext_tenth\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"device_add_id\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647},{\"key\":\"today_start_value\",\"type\":\"decimal\",\"precision\":8,\"columnDisplaySize\":10},{\"key\":\"unit\",\"type\":\"string\",\"precision\":2147483647,\"columnDisplaySize\":2147483647}]}",
"dataSourceId": 54,
"partition": "pt",
"jdbcUrl": "jdbc:hive2://172.18.8.208:10000/Vasyslink_yag001",
"defaultFS": "hdfs://172.18.8.207:8020",
"writeMode": "overwrite",
"maxFileSize": 1073741824,
"partitionValue":"",
"fieldDelimiter": "\t",
"partitionType": "DAY",
"fileType": "text",
"charsetName": "UTF-8"
},
"name": "hivewriter"
}
}],
"setting": {
"log": {
"isLogger": false
},
"errorLimit": {},
"speed": {
"bytes": 0,
"channel": 1
}
}
}
}
How to reproduce
通过日志发现:mysqlsource端 在yarn模式下执行SQL超时,无法获取返回结果,但是local模式下是可以执行的:
SELECT id
, device_id
, point
, hash
, value
, acq_time
, ratio
, max_val
, min_val
, out_param
, item_code
, descr
, save_hst
, del_flag
, create_user
, update_user
, create_time
, update_time
, ext_first
, ext_second
, ext_third
, ext_fourth
, ext_fifth
, ext_sixth
, ext_seventh
, ext_eighth
, ext_ninth
, ext_tenth
, device_add_id
, today_start_value
, unit
FROM tb_device_point_data_hst_01
WHERE 1=1
Anything else
No response
Version
1.12_release
Are you willing to submit PR?
- [X] Yes I am willing to submit a PR!
Code of Conduct
- [X] I agree to follow this project's Code of Conduct
09:52:51.895 [Checkpoint Timer] INFO org.apache.flink.runtime.checkpoint.CheckpointCoordinator - Checkpoint triggering task Source: mysqlsourcefactory -> Sink: hivesinkfactory (1/1) of job 23bbcf78ab7baedafc834823064d203f is not in state RUNNING but SCHEDULED instead. Aborting checkpoint.
09:52:52.498 [flink-akka.actor.default-dispatcher-24] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Trigger heartbeat request.
09:52:52.498 [flink-akka.actor.default-dispatcher-24] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Trigger heartbeat request.
09:52:52.498 [flink-akka.actor.default-dispatcher-24] DEBUG org.apache.flink.runtime.jobmaster.JobMaster - Received heartbeat request from c0a911df124a9912bc29d71d413fe784.
09:52:52.498 [flink-akka.actor.default-dispatcher-24] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Received heartbeat from 4c8d102dc7861e7b13961654d310dd28.
09:52:52.838 [flink-akka.actor.default-dispatcher-25] DEBUG org.apache.flink.runtime.jobmaster.JobMaster - Trigger heartbeat request.
09:52:54.981 [flink-akka.actor.default-dispatcher-25] DEBUG org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl - Slot request with allocation id deae82826394ed9e6a205ab122ee100e failed for slot container_1660007759349_0003_01_000010_3.
java.util.concurrent.TimeoutException: Invocation of public abstract java.util.concurrent.CompletableFuture org.apache.flink.runtime.taskexecutor.TaskExecutorGateway.requestSlot(org.apache.flink.runtime.clusterframework.types.SlotID,org.apache.flink.api.common.JobID,org.apache.flink.runtime.clusterframework.types.AllocationID,org.apache.flink.runtime.clusterframework.types.ResourceProfile,java.lang.String,org.apache.flink.runtime.resourcemanager.ResourceManagerId,org.apache.flink.api.common.time.Time) timed out.
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.allocateSlot(SlotManagerImpl.java:1115)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.lambda$internalRequestSlot$8(SlotManagerImpl.java:940)
at java.util.Optional.ifPresent(Optional.java:159)
at org.apache.flink.util.OptionalConsumer.ifPresent(OptionalConsumer.java:45)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.internalRequestSlot(SlotManagerImpl.java:940)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.handleFailedSlotRequest(SlotManagerImpl.java:1297)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.lambda$allocateSlot$13(SlotManagerImpl.java:1148)
at java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774)
at java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750)
at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:440)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:208)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: akka.pattern.AskTimeoutException: Ask timed out on [Actor[akka.tcp://flink@hadoop01:44544/user/rpc/taskmanager_0#2042025677]] after [10000 ms]. Message of type [org.apache.flink.runtime.rpc.messages.RemoteRpcInvocation]. A typical reason for AskTimeoutException
is that the recipient actor didn't send a reply.
at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
at akka.pattern.PromiseActorRef$$anonfun$1.apply$mcV$sp(AskSupport.scala:648)
at akka.actor.Scheduler$$anon$4.run(Scheduler.scala:205)
at scala.concurrent.Future$InternalCallbackExecutor$.unbatchedExecute(Future.scala:601)
at scala.concurrent.BatchingExecutor$class.execute(BatchingExecutor.scala:109)
at scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:599)
at akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(LightArrayRevolverScheduler.scala:328)
at akka.actor.LightArrayRevolverScheduler$$anon$4.executeBucket$1(LightArrayRevolverScheduler.scala:279)
at akka.actor.LightArrayRevolverScheduler$$anon$4.nextTick(LightArrayRevolverScheduler.scala:283)
at akka.actor.LightArrayRevolverScheduler$$anon$4.run(LightArrayRevolverScheduler.scala:235)
at java.lang.Thread.run(Thread.java:748)
09:53:01.895 [Checkpoint Timer] INFO org.apache.flink.runtime.checkpoint.CheckpointCoordinator - Checkpoint triggering task Source: mysqlsourcefactory -> Sink: hivesinkfactory (1/1) of job 23bbcf78ab7baedafc834823064d203f is not in state RUNNING but SCHEDULED instead. Aborting checkpoint.
09:53:02.519 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Trigger heartbeat request.
09:53:02.519 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Trigger heartbeat request.
09:53:02.519 [flink-akka.actor.default-dispatcher-25] DEBUG org.apache.flink.runtime.jobmaster.JobMaster - Received heartbeat request from c0a911df124a9912bc29d71d413fe784.
09:53:02.519 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Received heartbeat from 4c8d102dc7861e7b13961654d310dd28.
09:53:02.809 [flink-akka.actor.default-dispatcher-3] INFO org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - The heartbeat of TaskManager with id container_1660007759349_0003_01_000010(hadoop01:37810) timed out.
09:53:02.809 [flink-akka.actor.default-dispatcher-3] INFO org.apache.flink.runtime.resourcemanager.active.ActiveResourceManager - Closing TaskExecutor connection container_1660007759349_0003_01_000010(hadoop01:37810) because: The heartbeat of TaskManager with id container_1660007759349_0003_01_000010(hadoop01:37810) timed out.
09:53:02.809 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl - Unregister TaskManager fcd6d38657ccdbac81bc3e5ce7836037 from the SlotManager.
09:53:02.809 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.io.network.partition.ResourceManagerPartitionTrackerImpl - Processing shutdown of task executor container_1660007759349_0003_01_000010.
09:53:02.810 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl - There was no slot with container_1660007759349_0003_01_000010_4 registered. Probably this slot has been already freed.
09:53:02.810 [flink-akka.actor.default-dispatcher-3] DEBUG org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl - Slot request with allocation id deae82826394ed9e6a205ab122ee100e failed for slot container_1660007759349_0003_01_000010_4.
org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException: java.util.concurrent.TimeoutException: The heartbeat of TaskManager with id container_1660007759349_0003_01_000010(hadoop01:37810) timed out.
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.rejectPendingSlotRequest(SlotManagerImpl.java:1322)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.removeSlot(SlotManagerImpl.java:1216)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.removeSlots(SlotManagerImpl.java:1198)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.internalUnregisterTaskManager(SlotManagerImpl.java:1472)
at org.apache.flink.runtime.resourcemanager.slotmanager.SlotManagerImpl.unregisterTaskManager(SlotManagerImpl.java:523)
at org.apache.flink.runtime.resourcemanager.ResourceManager.closeTaskManagerConnection(ResourceManager.java:1052)
at org.apache.flink.runtime.resourcemanager.ResourceManager$TaskManagerHeartbeatListener.notifyHeartbeatTimeout(ResourceManager.java:1438)
at org.apache.flink.runtime.heartbeat.HeartbeatMonitorImpl.run(HeartbeatMonitorImpl.java:111)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRunAsync(AkkaRpcActor.java:440)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:208)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:77)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:158)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:26)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:21)
at scala.PartialFunction$class.applyOrElse(PartialFunction.scala:123)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:21)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:170)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at akka.actor.Actor$class.aroundReceive(Actor.scala:517)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:225)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:592)
at akka.actor.ActorCell.invoke(ActorCell.scala:561)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:258)
at akka.dispatch.Mailbox.run(Mailbox.scala:225)
at akka.dispatch.Mailbox.exec(Mailbox.scala:235)
at akka.dispatch.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at akka.dispatch.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at akka.dispatch.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at akka.dispatch.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: java.util.concurrent.TimeoutException: The heartbeat of TaskManager with id container_1660007759349_0003_01_000010(hadoop01:37810) timed out.
at org.apache.flink.runtime.resourcemanager.ResourceManager$TaskManagerHeartbeatListener.notifyHeartbeatTimeout(ResourceManager.java:1442)
... 25 common frames omitted
这个问题大概率是yarn集群问题,提供几个排查思路:
- 检查下集群之间的网络通信;
- 检查下jobmanager日志,看下是否关键信息;
- 检查下是否开启了debug端口;
我把这两个参数提高一倍 解决该问题
------------------ 原始邮件 ------------------ 发件人: "Paddy @.>; 发送时间: 2022年8月9日(星期二) 上午10:36 收件人: @.>; 抄送: @.>; @.>; 主题: Re: [DTStack/chunjun] flinkx1.12同步mysql-hive 数据量700万,本地模式可以跑,yarn-perjob无法跑 (Issue #1128)
这个问题大概率是yarn集群问题,提供几个排查思路:
检查下集群之间的网络通信;
检查下jobmanager日志,看下是否关键信息;
检查下是否开启了debug端口;
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
我把这两个参数提高一倍 解决该问题 … ------------------ 原始邮件 ------------------ 发件人: "Paddy @.>; 发送时间: 2022年8月9日(星期二) 上午10:36 收件人: @.>; 抄送: @.>; @.>; 主题: Re: [DTStack/chunjun] flinkx1.12同步mysql-hive 数据量700万,本地模式可以跑,yarn-perjob无法跑 (Issue #1128) 这个问题大概率是yarn集群问题,提供几个排查思路: 检查下集群之间的网络通信; 检查下jobmanager日志,看下是否关键信息; 检查下是否开启了debug端口; — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
能否具体描述下,提高了哪些参数呢?