kafka-connect-file-pulse
kafka-connect-file-pulse copied to clipboard
How to fix namespace not found issues
Document looks like this:
<env:Envelope
xmlns:env="http://schemas.xmlsoap.org/soap/envelope/">
<env:Body>
<wd:Get_Organizations_Response
xmlns:wd="urn:com.workday/bsvc" wd:version="v38.0">
<wd:Request_Criteria>
...
when I start the File Pulse adpater using:
curl -i -X PUT -H "Accept:application/json" \
-H "Content-Type:application/json" http://localhost:8083/connectors/source-filepulse-xml-02/config \
-d '{
"connector.class":"io.streamthoughts.kafka.connect.filepulse.source.FilePulseSourceConnector",
"fs.scan.directory.path":"/data/",
"fs.scan.interval.ms":"10000",
"fs.scan.filters":"io.streamthoughts.kafka.connect.filepulse.scanner.local.filter.RegexFileListFilter",
"file.filter.regex.pattern":".*Resources\\.xml$",
"offset.strategy":"name",
"task.reader.class": "io.streamthoughts.kafka.connect.filepulse.reader.XMLFileInputReader",
"xpath.expression": "/*/*/wd:Get_Organizations_Response",
"topic":"humanResources-01",
"internal.kafka.reporter.bootstrap.servers": "broker:29092",
"internal.kafka.reporter.topic":"_connect-file-pulse-status",
"fs.cleanup.policy.class": "io.streamthoughts.kafka.connect.filepulse.clean.LogCleanupPolicy",
"tasks.max": 1
}'
I get the following exception:
kafka-connect | [2022-04-13 19:13:58,404] ERROR [source-filepulse-xml-02|task-0] Error while processing source file '[name='humanResources.xml', path='/data', size=7255, lastModified=1649874446404, inode=7558778, hash=399748521]' (io.streamthoughts.kafka.connect.filepulse.source.KafkaFileStateReporter:119)
kafka-connect | io.streamthoughts.kafka.connect.filepulse.reader.ReaderException: Cannot compile XPath expression '/*/*/wd:Get_Organizations_Response'
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.reader.XMLFileInputReader$XMLFileInputIterator.<init>(XMLFileInputReader.java:127)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.reader.XMLFileInputReader.newIterator(XMLFileInputReader.java:81)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.reader.AbstractFileInputReader.newIterator(AbstractFileInputReader.java:55)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.source.FileInputIterable.open(FileInputIterable.java:66)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.source.DefaultFileRecordsPollingConsumer.openAndGetIteratorOrNullIfInvalid(DefaultFileRecordsPollingConsumer.java:278)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.source.DefaultFileRecordsPollingConsumer.next(DefaultFileRecordsPollingConsumer.java:156)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.source.FilePulseSourceTask.poll(FilePulseSourceTask.java:126)
kafka-connect | at org.apache.kafka.connect.runtime.WorkerSourceTask.poll(WorkerSourceTask.java:289)
kafka-connect | at org.apache.kafka.connect.runtime.WorkerSourceTask.execute(WorkerSourceTask.java:256)
kafka-connect | at org.apache.kafka.connect.runtime.WorkerTask.doRun(WorkerTask.java:185)
kafka-connect | at org.apache.kafka.connect.runtime.WorkerTask.run(WorkerTask.java:235)
kafka-connect | at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
kafka-connect | at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
kafka-connect | at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
kafka-connect | at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
kafka-connect | at java.base/java.lang.Thread.run(Thread.java:834)
kafka-connect | Caused by: javax.xml.xpath.XPathExpressionException: net.sf.saxon.trans.XPathException: Namespace prefix 'wd' has not been declared
kafka-connect | at net.sf.saxon.xpath.XPathEvaluator.compile(XPathEvaluator.java:203)
kafka-connect | at io.streamthoughts.kafka.connect.filepulse.reader.XMLFileInputReader$XMLFileInputIterator.<init>(XMLFileInputReader.java:121)
kafka-connect | ... 15 more
kafka-connect | Caused by: net.sf.saxon.trans.XPathException: Namespace prefix 'wd' has not been declared
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.grumble(XPathParser.java:315)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.grumble(XPathParser.java:272)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.makeStructuredQName(XPathParser.java:3796)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.makeNameTest(XPathParser.java:3848)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseNodeTest(XPathParser.java:2757)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseBasicStep(XPathParser.java:2253)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseStepExpression(XPathParser.java:2076)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseRemainingPath(XPathParser.java:2035)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parsePathExpression(XPathParser.java:1925)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseSimpleMappingExpression(XPathParser.java:1971)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseUnaryExpression(XPathParser.java:1824)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseExprSingle(XPathParser.java:753)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parseExpression(XPathParser.java:660)
kafka-connect | at net.sf.saxon.expr.parser.XPathParser.parse(XPathParser.java:522)
kafka-connect | at net.sf.saxon.expr.parser.ExpressionTool.make(ExpressionTool.java:107)
kafka-connect | at net.sf.saxon.xpath.XPathEvaluator.compile(XPathEvaluator.java:193)
kafka-connect | ... 16 more
what is the proper way to ignore the env: and ensure the Namespace is declared?
Hi @folivieri, here is the documentation for XMLReader: https://streamthoughts.github.io/kafka-connect-file-pulse/docs/developer-guide/file-readers/#xxxxmlfileinputreader
You shoud try to configure reader.xml.parser.namespace.aware.enabled=true.
This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.
This issue was closed because it has been stalled for 30 days with no activity.