chunjun
chunjun copied to clipboard
写入hdfs 失败
Describe the bug 使用1.12.4版本测试, 运行之前正常的任务,报错:
2022-06-22 15:15:49.849 [flink-akka.actor.default-dispatcher-3] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Stopped Akka RPC service.
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
at org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:811)
at com.dtstack.chunjun.environment.MyLocalStreamEnvironment.execute(MyLocalStreamEnvironment.java:174)
at com.dtstack.chunjun.Main.exeSyncJob(Main.java:227)
at com.dtstack.chunjun.Main.main(Main.java:122)
at com.dtstack.chunjun.client.local.LocalClusterClientHelper.submit(LocalClusterClientHelper.java:35)
at com.dtstack.chunjun.client.Launcher.main(Launcher.java:119)
Caused by: org.apache.flink.runtime.client.JobInitializationException: Could not instantiate JobManager.
at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$createJobManagerRunner$5(Dispatcher.java:488)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.flink.runtime.client.JobExecutionException: Cannot initialize task 'Sink: hdfssinkfactory': null
at org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder.buildGraph(ExecutionGraphBuilder.java:293)
at org.apache.flink.runtime.scheduler.SchedulerBase.createExecutionGraph(SchedulerBase.java:322)
at org.apache.flink.runtime.scheduler.SchedulerBase.createAndRestoreExecutionGraph(SchedulerBase.java:276)
at org.apache.flink.runtime.scheduler.SchedulerBase.<init>(SchedulerBase.java:249)
at org.apache.flink.runtime.scheduler.DefaultScheduler.<init>(DefaultScheduler.java:133)
at org.apache.flink.runtime.scheduler.DefaultSchedulerFactory.createInstance(DefaultSchedulerFactory.java:111)
at org.apache.flink.runtime.jobmaster.JobMaster.createScheduler(JobMaster.java:342)
at org.apache.flink.runtime.jobmaster.JobMaster.<init>(JobMaster.java:327)
at org.apache.flink.runtime.jobmaster.factories.DefaultJobMasterServiceFactory.createJobMasterService(DefaultJobMasterServiceFactory.java:95)
at org.apache.flink.runtime.jobmaster.factories.DefaultJobMasterServiceFactory.createJobMasterService(DefaultJobMasterServiceFactory.java:39)
at org.apache.flink.runtime.jobmaster.JobManagerRunnerImpl.<init>(JobManagerRunnerImpl.java:163)
at org.apache.flink.runtime.dispatcher.DefaultJobManagerRunnerFactory.createJobManagerRunner(DefaultJobManagerRunnerFactory.java:86)
at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$createJobManagerRunner$5(Dispatcher.java:472)
... 4 more
Caused by: java.lang.NullPointerException
at com.dtstack.chunjun.connector.hdfs.sink.HdfsOrcOutputFormat.openSource(HdfsOrcOutputFormat.java:107)
at com.dtstack.chunjun.sink.format.BaseFileOutputFormat.initVariableFields(BaseFileOutputFormat.java:112)
at com.dtstack.chunjun.connector.hdfs.sink.BaseHdfsOutputFormat.initVariableFields(BaseHdfsOutputFormat.java:97)
at com.dtstack.chunjun.sink.format.BaseFileOutputFormat.initializeGlobal(BaseFileOutputFormat.java:65)
at org.apache.flink.runtime.jobgraph.InputOutputFormatVertex.initializeOnMaster(InputOutputFormatVertex.java:110)
at org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder.buildGraph(ExecutionGraphBuilder.java:289)
... 16 more
我把hdfs-site.xml\ core-site.xml 打入了chunjun-connector-hdfs-master.jar , 在之前的版本也是这么做的。
[bigtop@client-10-0-161-28 chunjun]$ bin/start-chunjun \
> -mode local \
> -jobType sync \
> -job /data/soft/chunjun/chunjun-examples/json/hdfs/stream_hdfs_orc2.json \
> -chunjunDistDir $CHUNJUN_HOME
ChunJun starting ...
2022-06-22 16:07:04.637 [main] INFO com.dtstack.chunjun.Main - ------------program params-------------------------
2022-06-22 16:07:04.683 [main] INFO com.dtstack.chunjun.Main - -p
2022-06-22 16:07:04.684 [main] INFO com.dtstack.chunjun.Main -
2022-06-22 16:07:04.684 [main] INFO com.dtstack.chunjun.Main - -job
2022-06-22 16:07:04.684 [main] INFO com.dtstack.chunjun.Main - %7B%0A++%22job%22%3A+%7B%0A++++%22content%22%3A+%5B%0A++++++%7B%0A++++++++%22reader%22%3A+%7B%0A++++++++++%22parameter%22%3A+%7B%0A++++++++++++%22sliceRecordCount%22%3A+%5B%22100000%22%5D%2C%0A++++++++++++%22column%22%3A+%5B%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22id%22%2C%0A++++++++++++++++%22type%22%3A+%22int%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_boolean%22%2C%0A++++++++++++++++%22type%22%3A+%22boolean%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_tinyint%22%2C%0A++++++++++++++++%22type%22%3A+%22tinyint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_smallint%22%2C%0A++++++++++++++++%22type%22%3A+%22smallint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_int%22%2C%0A++++++++++++++++%22type%22%3A+%22int%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_bigint%22%2C%0A++++++++++++++++%22type%22%3A+%22bigint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_float%22%2C%0A++++++++++++++++%22type%22%3A+%22float%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_double%22%2C%0A++++++++++++++++%22type%22%3A+%22double%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_string%22%2C%0A++++++++++++++++%22type%22%3A+%22string%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_varchar%22%2C%0A++++++++++++++++%22type%22%3A+%22varchar%28255%29%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_char%22%2C%0A++++++++++++++++%22type%22%3A+%22char%28255%29%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_binary%22%2C%0A++++++++++++++++%22type%22%3A+%22binary%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_timestamp%22%2C%0A++++++++++++++++%22type%22%3A+%22timestamp%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_date%22%2C%0A++++++++++++++++%22type%22%3A+%22date%22%0A++++++++++++++%7D%0A++++++++++++%5D%0A++++++++++%7D%2C%0A++++++++++%22name%22%3A+%22streamreader%22%0A++++++++%7D%2C%0A++++++++%22writer%22%3A+%7B%0A++++++++++%22name%22%3A+%22hdfswriter%22%2C%0A++++++++++%22parameter%22%3A+%7B%0A++++++++++++%22path%22%3A+%22hdfs%3A%2F%2Fnsprd%2Ftmp%2Fdatax%2Ftmp%2Fflinkx%2Ftest1%22%2C%0A++++++++++++%22defaultFS%22%3A+%22hdfs%3A%2F%2Fnsprd%22%2C%0A++++++++++++%22column%22%3A+%5B%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22id%22%2C%0A++++++++++++++++%22type%22%3A+%22int%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_boolean%22%2C%0A++++++++++++++++%22type%22%3A+%22boolean%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_tinyint%22%2C%0A++++++++++++++++%22type%22%3A+%22tinyint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_smallint%22%2C%0A++++++++++++++++%22type%22%3A+%22smallint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_int%22%2C%0A++++++++++++++++%22type%22%3A+%22int%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_bigint%22%2C%0A++++++++++++++++%22type%22%3A+%22bigint%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_float%22%2C%0A++++++++++++++++%22type%22%3A+%22float%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_double%22%2C%0A++++++++++++++++%22type%22%3A+%22double%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_string%22%2C%0A++++++++++++++++%22type%22%3A+%22string%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_varchar%22%2C%0A++++++++++++++++%22type%22%3A+%22varchar%28255%29%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_char%22%2C%0A++++++++++++++++%22type%22%3A+%22char%28255%29%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_binary%22%2C%0A++++++++++++++++%22type%22%3A+%22binary%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_timestamp%22%2C%0A++++++++++++++++%22type%22%3A+%22timestamp%22%0A++++++++++++++%7D%2C%0A++++++++++++++%7B%0A++++++++++++++++%22name%22%3A+%22col_date%22%2C%0A++++++++++++++++%22type%22%3A+%22date%22%0A++++++++++++++%7D%0A++++++++++++%5D%2C%0A++++++++++++%22fileType%22%3A+%22orc%22%2C%0A++++++++++++%22maxFileSize%22%3A+10485760%2C%0A++++++++++++%22nextCheckRows%22%3A+20000%2C%0A++++++++++++%22fieldDelimiter%22%3A+%22%2C%22%2C%0A++++++++++++%22encoding%22%3A+%22utf-8%22%2C%0A++++++++++++%22fileName%22%3A+%22pt%3D20201214%22%2C%0A++++++++++++%22writeMode%22%3A+%22overwrite%22%0A++++++++++%7D%0A++++++++%7D%0A++++++%7D%0A++++%5D%2C%0A++++%22setting%22%3A+%7B%0A++++++%22speed%22%3A+%7B%0A++++++++%22channel%22%3A+1%2C%0A++++++++%22bytes%22%3A+0%0A++++++%7D%0A++++%7D%0A++%7D%0A%7D%0A
2022-06-22 16:07:04.684 [main] INFO com.dtstack.chunjun.Main - -jobName
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - Flink_Job
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -flinkxDistDir
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - /data/soft/chunjun
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -chunjunDistDir
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - /data/soft/chunjun
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -jobType
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - sync
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -confProp
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - {}
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -pluginLoadMode
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - shipfile
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -mode
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - local
2022-06-22 16:07:04.685 [main] INFO com.dtstack.chunjun.Main - -------------------------------------------
2022-06-22 16:07:05.804 [main] INFO com.dtstack.chunjun.Main - Register to table configuration:{table.dynamic-table-options.enabled=true, pipeline.name=Flink_Job}
2022-06-22 16:07:05.818 [main] WARN com.dtstack.chunjun.options.Options - Option 'flinkxDistDir' is deprecated, please replace with 'chunjunDistDir'.
2022-06-22 16:07:05.818 [main] WARN com.dtstack.chunjun.options.Options - Option 'flinkxDistDir' is deprecated, please replace with 'chunjunDistDir'.
2022-06-22 16:07:05.820 [main] WARN com.dtstack.chunjun.options.Options - Option 'flinkxDistDir' is deprecated, please replace with 'chunjunDistDir'.
2022-06-22 16:07:05.842 [main] INFO com.dtstack.chunjun.util.PluginUtil - Flinkx executionMode: local
2022-06-22 16:07:05.842 [main] INFO com.dtstack.chunjun.util.PluginUtil - Flinkx reset pipeline.jars: [/data/soft/chunjun/connector/stream/chunjun-connector-stream-master.jar, /data/soft/chunjun/connector/hdfs/chunjun-connector-hdfs-master.jar, /data/soft/chunjun/dirty-data-collector/log/chunjun-dirty-log-master.jar, /data/soft/chunjun/metrics/prometheus/chunjun-metrics-prometheus-master.jar, /data/soft/chunjun/chunjun-core-master.jar]
2022-06-22 16:07:05.846 [main] INFO com.dtstack.chunjun.classloader.ClassLoaderManager - jarUrl:file:/data/soft/chunjun/chunjun-core-master.jar_file:/data/soft/chunjun/connector/hdfs/chunjun-connector-hdfs-master.jar_file:/data/soft/chunjun/connector/stream/chunjun-connector-stream-master.jar_file:/data/soft/chunjun/dirty-data-collector/log/chunjun-dirty-log-master.jar_file:/data/soft/chunjun/metrics/prometheus/chunjun-metrics-prometheus-master.jar create ClassLoad successful...
2022-06-22 16:07:06.208 [main] INFO o.a.flink.runtime.taskexecutor.TaskExecutorResourceUtils - The configuration option taskmanager.cpu.cores required for local execution is not set, setting it to the maximal possible value.
2022-06-22 16:07:06.209 [main] INFO o.a.flink.runtime.taskexecutor.TaskExecutorResourceUtils - The configuration option taskmanager.memory.task.heap.size required for local execution is not set, setting it to the maximal possible value.
2022-06-22 16:07:06.209 [main] INFO o.a.flink.runtime.taskexecutor.TaskExecutorResourceUtils - The configuration option taskmanager.memory.task.off-heap.size required for local execution is not set, setting it to the maximal possible value.
2022-06-22 16:07:06.210 [main] INFO o.a.flink.runtime.taskexecutor.TaskExecutorResourceUtils - The configuration option taskmanager.memory.network.min required for local execution is not set, setting it to its default value 64 mb.
2022-06-22 16:07:06.210 [main] INFO o.a.flink.runtime.taskexecutor.TaskExecutorResourceUtils - The configuration option taskmanager.memory.network.max required for local execution is not set, setting it to its default value 64 mb.
2022-06-22 16:07:06.211 [main] INFO o.a.flink.streaming.api.environment.LocalStreamEnvironment - Running job on local embedded Flink mini cluster
2022-06-22 16:07:06.225 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Starting Flink Mini Cluster
2022-06-22 16:07:06.228 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Starting Metrics Registry
2022-06-22 16:07:06.266 [main] INFO org.apache.flink.runtime.metrics.MetricRegistryImpl - No metrics reporter configured, no metrics will be exposed/reported.
2022-06-22 16:07:06.266 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Starting RPC Service(s)
2022-06-22 16:07:06.285 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils - Trying to start local actor system
2022-06-22 16:07:06.658 [flink-akka.actor.default-dispatcher-4] INFO akka.event.slf4j.Slf4jLogger - Slf4jLogger started
2022-06-22 16:07:06.752 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils - Actor system started at akka://flink
2022-06-22 16:07:06.765 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils - Trying to start local actor system
2022-06-22 16:07:06.777 [flink-metrics-2] INFO akka.event.slf4j.Slf4jLogger - Slf4jLogger started
2022-06-22 16:07:06.786 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcServiceUtils - Actor system started at akka://flink-metrics
2022-06-22 16:07:06.800 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Starting RPC endpoint for org.apache.flink.runtime.metrics.dump.MetricQueryService at akka://flink-metrics/user/rpc/MetricQueryService .
2022-06-22 16:07:06.877 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Starting high-availability services
2022-06-22 16:07:06.887 [main] INFO org.apache.flink.runtime.blob.BlobServer - Created BLOB server storage directory /tmp/blobStore-38e699a6-3cde-459a-a413-3d5167214bca
2022-06-22 16:07:06.894 [main] INFO org.apache.flink.runtime.blob.BlobServer - Started BLOB server at 0.0.0.0:35740 - max concurrent requests: 50 - max backlog: 1000
2022-06-22 16:07:06.896 [main] INFO org.apache.flink.runtime.blob.PermanentBlobCache - Created BLOB cache storage directory /tmp/blobStore-a882317c-5e6e-4b5b-894a-aef7e9e19e96
2022-06-22 16:07:06.898 [main] INFO org.apache.flink.runtime.blob.TransientBlobCache - Created BLOB cache storage directory /tmp/blobStore-e5c87633-0729-4a02-8549-fa2fde24e6f7
2022-06-22 16:07:06.898 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Starting 1 TaskManger(s)
2022-06-22 16:07:06.901 [main] INFO org.apache.flink.runtime.taskexecutor.TaskManagerRunner - Starting TaskManager with ResourceID: 8ff81b2f-051d-4447-8890-3996ccfd7862
2022-06-22 16:07:06.910 [main] INFO org.apache.flink.runtime.taskexecutor.TaskManagerServices - Temporary file directory '/tmp': total 99 GB, usable 69 GB (69.70% usable)
2022-06-22 16:07:06.912 [main] INFO org.apache.flink.runtime.io.disk.FileChannelManagerImpl - FileChannelManager uses directory /tmp/flink-io-1167dc24-d301-400a-acfb-b0abb1734535 for spill files.
2022-06-22 16:07:06.918 [main] INFO org.apache.flink.runtime.io.disk.FileChannelManagerImpl - FileChannelManager uses directory /tmp/flink-netty-shuffle-3960095c-3613-499a-9721-8b9d6522ec97 for spill files.
2022-06-22 16:07:06.961 [main] INFO org.apache.flink.runtime.io.network.buffer.NetworkBufferPool - Allocated 64 MB for network buffer pool (number of memory segments: 2048, bytes per segment: 32768).
2022-06-22 16:07:06.970 [main] INFO org.apache.flink.runtime.io.network.NettyShuffleEnvironment - Starting the network environment and its components.
2022-06-22 16:07:06.971 [main] INFO org.apache.flink.runtime.taskexecutor.KvStateService - Starting the kvState service and its components.
2022-06-22 16:07:06.992 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Starting RPC endpoint for org.apache.flink.runtime.taskexecutor.TaskExecutor at akka://flink/user/rpc/taskmanager_0 .
2022-06-22 16:07:07.005 [flink-akka.actor.default-dispatcher-4] INFO o.apache.flink.runtime.taskexecutor.DefaultJobLeaderService - Start job leader service.
2022-06-22 16:07:07.007 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.filecache.FileCache - User file cache uses directory /tmp/flink-dist-cache-7b70d87b-3033-4e49-b61f-265b03a394cf
2022-06-22 16:07:07.036 [main] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Starting rest endpoint.
2022-06-22 16:07:07.105 [main] WARN org.apache.flink.runtime.webmonitor.WebMonitorUtils - Log file environment variable 'log.file' is not set.
2022-06-22 16:07:07.105 [main] WARN org.apache.flink.runtime.webmonitor.WebMonitorUtils - JobManager log files are unavailable in the web dashboard. Log file location not found in environment variable 'log.file' or configuration key 'web.log.path'.
2022-06-22 16:07:07.265 [main] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Rest endpoint listening at localhost:44328
2022-06-22 16:07:07.267 [main] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Proposing leadership to contender http://localhost:44328
2022-06-22 16:07:07.268 [main] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Web frontend listening at http://localhost:44328.
2022-06-22 16:07:07.268 [mini-cluster-io-thread-1] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - http://localhost:44328 was granted leadership with leaderSessionID=6cbe62f7-ef46-4be3-b6a1-48f6253c532d
2022-06-22 16:07:07.269 [mini-cluster-io-thread-1] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Received confirmation of leadership for leader http://localhost:44328 , session=6cbe62f7-ef46-4be3-b6a1-48f6253c532d
2022-06-22 16:07:07.282 [main] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Starting RPC endpoint for org.apache.flink.runtime.resourcemanager.StandaloneResourceManager at akka://flink/user/rpc/resourcemanager_1 .
2022-06-22 16:07:07.294 [main] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Proposing leadership to contender LeaderContender: DefaultDispatcherRunner
2022-06-22 16:07:07.294 [flink-akka.actor.default-dispatcher-4] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Proposing leadership to contender LeaderContender: StandaloneResourceManager
2022-06-22 16:07:07.297 [flink-akka.actor.default-dispatcher-4] INFO o.a.flink.runtime.resourcemanager.StandaloneResourceManager - ResourceManager akka://flink/user/rpc/resourcemanager_1 was granted leadership with fencing token 8d0986fec2f7c9980e95c7ac711e4262
2022-06-22 16:07:07.297 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Flink Mini Cluster started successfully
2022-06-22 16:07:07.301 [flink-akka.actor.default-dispatcher-4] INFO o.a.f.runtime.resourcemanager.slotmanager.SlotManagerImpl - Starting the SlotManager.
2022-06-22 16:07:07.304 [mini-cluster-io-thread-2] INFO o.a.f.r.dispatcher.runner.SessionDispatcherLeaderProcess - Start SessionDispatcherLeaderProcess.
2022-06-22 16:07:07.306 [mini-cluster-io-thread-5] INFO o.a.f.r.dispatcher.runner.SessionDispatcherLeaderProcess - Recover all persisted job graphs.
2022-06-22 16:07:07.306 [mini-cluster-io-thread-5] INFO o.a.f.r.dispatcher.runner.SessionDispatcherLeaderProcess - Successfully recovered 0 persisted job graphs.
2022-06-22 16:07:07.307 [mini-cluster-io-thread-6] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Received confirmation of leadership for leader akka://flink/user/rpc/resourcemanager_1 , session=0e95c7ac-711e-4262-8d09-86fec2f7c998
2022-06-22 16:07:07.314 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Connecting to ResourceManager akka://flink/user/rpc/resourcemanager_1(8d0986fec2f7c9980e95c7ac711e4262).
2022-06-22 16:07:07.318 [mini-cluster-io-thread-5] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Starting RPC endpoint for org.apache.flink.runtime.dispatcher.StandaloneDispatcher at akka://flink/user/rpc/dispatcher_2 .
2022-06-22 16:07:07.331 [mini-cluster-io-thread-5] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Received confirmation of leadership for leader akka://flink/user/rpc/dispatcher_2 , session=31d13b6c-c8be-4572-a7ba-45402cb543fd
2022-06-22 16:07:07.338 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Resolved ResourceManager address, beginning registration
2022-06-22 16:07:07.346 [flink-akka.actor.default-dispatcher-2] INFO o.a.flink.runtime.resourcemanager.StandaloneResourceManager - Registering TaskManager with ResourceID 8ff81b2f-051d-4447-8890-3996ccfd7862 (akka://flink/user/rpc/taskmanager_0) at ResourceManager
2022-06-22 16:07:07.348 [flink-akka.actor.default-dispatcher-2] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Successful registration at resource manager akka://flink/user/rpc/resourcemanager_1 under registration id 9491af82ca949f4b3056166d92c48014.
2022-06-22 16:07:08.099 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Received JobGraph submission 8d21917691a552d4a4f0751f7c765a9f (Flink_Job).
2022-06-22 16:07:08.100 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Submitting job 8d21917691a552d4a4f0751f7c765a9f (Flink_Job).
2022-06-22 16:07:08.123 [mini-cluster-io-thread-12] INFO o.a.f.r.h.nonha.embedded.EmbeddedLeaderService - Proposing leadership to contender LeaderContender: JobManagerRunnerImpl
2022-06-22 16:07:08.132 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Starting RPC endpoint for org.apache.flink.runtime.jobmaster.JobMaster at akka://flink/user/rpc/jobmanager_3 .
2022-06-22 16:07:08.138 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.jobmaster.JobMaster - Initializing job Flink_Job (8d21917691a552d4a4f0751f7c765a9f).
2022-06-22 16:07:08.152 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.jobmaster.JobMaster - Using restart back off time strategy NoRestartBackoffTimeStrategy for Flink_Job (8d21917691a552d4a4f0751f7c765a9f).
2022-06-22 16:07:08.162 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.jobmaster.JobMaster - start to buildGraph for job Flink_Job (8d21917691a552d4a4f0751f7c765a9f).
2022-06-22 16:07:08.180 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.jobmaster.JobMaster - trying to download shipFile from blobServer for job Flink_Job (8d21917691a552d4a4f0751f7c765a9f).
2022-06-22 16:07:08.181 [mini-cluster-io-thread-12] INFO org.apache.flink.runtime.jobmaster.JobMaster - Running initialization on master for job Flink_Job (8d21917691a552d4a4f0751f7c765a9f).
log4j:ERROR Could not find value for key log4j.appender.logfile
log4j:ERROR Could not instantiate appender named "logfile".
2022-06-22 16:07:08,305 - 0 INFO [mini-cluster-io-thread-12] org.apache.hadoop.conf.Configuration:fs.default.name is deprecated. Instead, use fs.defaultFS
2022-06-22 16:07:08,318 - 13 INFO [mini-cluster-io-thread-12] org.apache.hadoop.conf.Configuration:fs.default.name is deprecated. Instead, use fs.defaultFS
2022-06-22 16:07:08,329 - 24 INFO [mini-cluster-io-thread-12] org.apache.hadoop.conf.Configuration:fs.default.name is deprecated. Instead, use fs.defaultFS
2022-06-22 16:07:08,699 - 394 WARN [mini-cluster-io-thread-12] org.apache.hadoop.util.NativeCodeLoader:Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2022-06-22 16:07:09,020 - 715 WARN [mini-cluster-io-thread-12] org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory:The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
2022-06-22 16:07:09.051 [flink-akka.actor.default-dispatcher-4] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Job 8d21917691a552d4a4f0751f7c765a9f reached terminal state FAILED.
2022-06-22 16:07:09.051 [main] INFO org.apache.flink.runtime.minicluster.MiniCluster - Shutting down Flink Mini Cluster
2022-06-22 16:07:09.052 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Stopping TaskExecutor akka://flink/user/rpc/taskmanager_0.
2022-06-22 16:07:09.052 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Close ResourceManager connection f73c80d6463fe8a62f799286af13c6f9.
2022-06-22 16:07:09.053 [main] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Shutting down rest endpoint.
2022-06-22 16:07:09.053 [flink-akka.actor.default-dispatcher-2] INFO o.a.flink.runtime.resourcemanager.StandaloneResourceManager - Closing TaskExecutor connection 8ff81b2f-051d-4447-8890-3996ccfd7862 because: The TaskExecutor is shutting down.
2022-06-22 16:07:09.063 [flink-akka.actor.default-dispatcher-6] INFO o.apache.flink.runtime.taskexecutor.DefaultJobLeaderService - Stop job leader service.
2022-06-22 16:07:09.064 [flink-akka.actor.default-dispatcher-6] INFO o.a.flink.runtime.state.TaskExecutorLocalStateStoresManager - Shutting down TaskExecutorLocalStateStoresManager.
2022-06-22 16:07:09.072 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.io.disk.FileChannelManagerImpl - FileChannelManager removed spill file directory /tmp/flink-io-1167dc24-d301-400a-acfb-b0abb1734535
2022-06-22 16:07:09.073 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.io.network.NettyShuffleEnvironment - Shutting down the network environment and its components.
2022-06-22 16:07:09.079 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.io.disk.FileChannelManagerImpl - FileChannelManager removed spill file directory /tmp/flink-netty-shuffle-3960095c-3613-499a-9721-8b9d6522ec97
2022-06-22 16:07:09.080 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.taskexecutor.KvStateService - Shutting down the kvState service and its components.
2022-06-22 16:07:09.081 [flink-akka.actor.default-dispatcher-6] INFO o.apache.flink.runtime.taskexecutor.DefaultJobLeaderService - Stop job leader service.
2022-06-22 16:07:09.082 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.filecache.FileCache - removed file cache directory /tmp/flink-dist-cache-7b70d87b-3033-4e49-b61f-265b03a394cf
2022-06-22 16:07:09.085 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.taskexecutor.TaskExecutor - Stopped TaskExecutor akka://flink/user/rpc/taskmanager_0.
2022-06-22 16:07:09.092 [ForkJoinPool.commonPool-worker-50] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Removing cache directory /tmp/flink-web-ui
2022-06-22 16:07:09.093 [ForkJoinPool.commonPool-worker-50] INFO org.apache.flink.runtime.dispatcher.DispatcherRestEndpoint - Shut down complete.
2022-06-22 16:07:09.096 [flink-akka.actor.default-dispatcher-6] INFO o.a.flink.runtime.resourcemanager.StandaloneResourceManager - Shut down cluster because application is in CANCELED, diagnostics DispatcherResourceManagerComponent has been closed..
2022-06-22 16:07:09.097 [flink-akka.actor.default-dispatcher-2] INFO o.a.f.r.e.component.DispatcherResourceManagerComponent - Closing components.
2022-06-22 16:07:09.098 [flink-akka.actor.default-dispatcher-2] INFO o.a.f.r.dispatcher.runner.SessionDispatcherLeaderProcess - Stopping SessionDispatcherLeaderProcess.
2022-06-22 16:07:09.098 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Stopping dispatcher akka://flink/user/rpc/dispatcher_2.
2022-06-22 16:07:09.098 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Stopping all currently running jobs of dispatcher akka://flink/user/rpc/dispatcher_2.
2022-06-22 16:07:09.099 [flink-akka.actor.default-dispatcher-4] INFO o.a.f.runtime.resourcemanager.slotmanager.SlotManagerImpl - Closing the SlotManager.
2022-06-22 16:07:09.099 [flink-akka.actor.default-dispatcher-4] INFO o.a.f.runtime.resourcemanager.slotmanager.SlotManagerImpl - Suspending the SlotManager.
2022-06-22 16:07:09.099 [flink-akka.actor.default-dispatcher-6] INFO o.a.f.r.r.h.l.backpressure.BackPressureRequestCoordinator - Shutting down back pressure request coordinator.
2022-06-22 16:07:09.100 [flink-akka.actor.default-dispatcher-6] INFO org.apache.flink.runtime.dispatcher.StandaloneDispatcher - Stopped dispatcher akka://flink/user/rpc/dispatcher_2.
2022-06-22 16:07:09.104 [mini-cluster-io-thread-19] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Stopping Akka RPC service.
2022-06-22 16:07:09.152 [flink-metrics-2] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Stopping Akka RPC service.
2022-06-22 16:07:09.153 [flink-metrics-2] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Stopped Akka RPC service.
2022-06-22 16:07:09.170 [flink-akka.actor.default-dispatcher-2] INFO org.apache.flink.runtime.blob.PermanentBlobCache - Shutting down BLOB cache
2022-06-22 16:07:09.171 [flink-akka.actor.default-dispatcher-2] INFO org.apache.flink.runtime.blob.TransientBlobCache - Shutting down BLOB cache
2022-06-22 16:07:09.172 [flink-akka.actor.default-dispatcher-2] INFO org.apache.flink.runtime.blob.BlobServer - Stopped BLOB server at 0.0.0.0:35740
2022-06-22 16:07:09.173 [flink-akka.actor.default-dispatcher-2] INFO org.apache.flink.runtime.rpc.akka.AkkaRpcService - Stopped Akka RPC service.
Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:144)
at org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:811)
at com.dtstack.chunjun.environment.MyLocalStreamEnvironment.execute(MyLocalStreamEnvironment.java:174)
at com.dtstack.chunjun.Main.exeSyncJob(Main.java:227)
at com.dtstack.chunjun.Main.main(Main.java:122)
at com.dtstack.chunjun.client.local.LocalClusterClientHelper.submit(LocalClusterClientHelper.java:35)
at com.dtstack.chunjun.client.Launcher.main(Launcher.java:119)
Caused by: org.apache.flink.runtime.client.JobInitializationException: Could not instantiate JobManager.
at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$createJobManagerRunner$5(Dispatcher.java:488)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.flink.runtime.client.JobExecutionException: Cannot initialize task 'Sink: hdfssinkfactory': null
at org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder.buildGraph(ExecutionGraphBuilder.java:293)
at org.apache.flink.runtime.scheduler.SchedulerBase.createExecutionGraph(SchedulerBase.java:322)
at org.apache.flink.runtime.scheduler.SchedulerBase.createAndRestoreExecutionGraph(SchedulerBase.java:276)
at org.apache.flink.runtime.scheduler.SchedulerBase.<init>(SchedulerBase.java:249)
at org.apache.flink.runtime.scheduler.DefaultScheduler.<init>(DefaultScheduler.java:133)
at org.apache.flink.runtime.scheduler.DefaultSchedulerFactory.createInstance(DefaultSchedulerFactory.java:111)
at org.apache.flink.runtime.jobmaster.JobMaster.createScheduler(JobMaster.java:342)
at org.apache.flink.runtime.jobmaster.JobMaster.<init>(JobMaster.java:327)
at org.apache.flink.runtime.jobmaster.factories.DefaultJobMasterServiceFactory.createJobMasterService(DefaultJobMasterServiceFactory.java:95)
at org.apache.flink.runtime.jobmaster.factories.DefaultJobMasterServiceFactory.createJobMasterService(DefaultJobMasterServiceFactory.java:39)
at org.apache.flink.runtime.jobmaster.JobManagerRunnerImpl.<init>(JobManagerRunnerImpl.java:163)
at org.apache.flink.runtime.dispatcher.DefaultJobManagerRunnerFactory.createJobManagerRunner(DefaultJobManagerRunnerFactory.java:86)
at org.apache.flink.runtime.dispatcher.Dispatcher.lambda$createJobManagerRunner$5(Dispatcher.java:472)
... 4 more
Caused by: java.lang.NullPointerException
at com.dtstack.chunjun.connector.hdfs.sink.HdfsOrcOutputFormat.openSource(HdfsOrcOutputFormat.java:107)
at com.dtstack.chunjun.sink.format.BaseFileOutputFormat.initVariableFields(BaseFileOutputFormat.java:112)
at com.dtstack.chunjun.connector.hdfs.sink.BaseHdfsOutputFormat.initVariableFields(BaseHdfsOutputFormat.java:97)
at com.dtstack.chunjun.sink.format.BaseFileOutputFormat.initializeGlobal(BaseFileOutputFormat.java:65)
at org.apache.flink.runtime.jobgraph.InputOutputFormatVertex.initializeOnMaster(InputOutputFormatVertex.java:110)
at org.apache.flink.runtime.executiongraph.ExecutionGraphBuilder.buildGraph(ExecutionGraphBuilder.java:289)
... 16 more
cat /data/soft/chunjun/chunjun-examples/json/hdfs/stream_hdfs_orc2.json
{
"job": {
"content": [
{
"reader": {
"parameter": {
"sliceRecordCount": ["100000"],
"column": [
{
"name": "id",
"type": "int"
},
{
"name": "col_boolean",
"type": "boolean"
},
{
"name": "col_tinyint",
"type": "tinyint"
},
{
"name": "col_smallint",
"type": "smallint"
},
{
"name": "col_int",
"type": "int"
},
{
"name": "col_bigint",
"type": "bigint"
},
{
"name": "col_float",
"type": "float"
},
{
"name": "col_double",
"type": "double"
},
{
"name": "col_string",
"type": "string"
},
{
"name": "col_varchar",
"type": "varchar(255)"
},
{
"name": "col_char",
"type": "char(255)"
},
{
"name": "col_binary",
"type": "binary"
},
{
"name": "col_timestamp",
"type": "timestamp"
},
{
"name": "col_date",
"type": "date"
}
]
},
"name": "streamreader"
},
"writer": {
"name": "hdfswriter",
"parameter": {
"path": "hdfs://nsprd/tmp/datax/tmp/flinkx/test1",
"defaultFS": "hdfs://nsprd",
"column": [
{
"name": "id",
"type": "int"
},
{
"name": "col_boolean",
"type": "boolean"
},
{
"name": "col_tinyint",
"type": "tinyint"
},
{
"name": "col_smallint",
"type": "smallint"
},
{
"name": "col_int",
"type": "int"
},
{
"name": "col_bigint",
"type": "bigint"
},
{
"name": "col_float",
"type": "float"
},
{
"name": "col_double",
"type": "double"
},
{
"name": "col_string",
"type": "string"
},
{
"name": "col_varchar",
"type": "varchar(255)"
},
{
"name": "col_char",
"type": "char(255)"
},
{
"name": "col_binary",
"type": "binary"
},
{
"name": "col_timestamp",
"type": "timestamp"
},
{
"name": "col_date",
"type": "date"
}
],
"fileType": "orc",
"maxFileSize": 10485760,
"nextCheckRows": 20000,
"fieldDelimiter": ",",
"encoding": "utf-8",
"fileName": "pt=20201214",
"writeMode": "overwrite"
}
}
}
],
"setting": {
"speed": {
"channel": 1,
"bytes": 0
}
}
}
}

- 你是用 json 还是 sql。 目前 sql 还不支持。 我还在想办法做的更通用。 json 的情况需要手动填上这两个字段。这两个字段是你读取文件的全部列名和列类型。
- 后面的改造思路是,text 的处理逻辑是直接切分,但是无法拿到所有的元数据信息。 orc 和 parquet 是可以根据文件拿到所有字段的元数据信息的。