inlong icon indicating copy to clipboard operation
inlong copied to clipboard

[Bug][SDK] Limitation of Transform Sql Statement Parser

Open ying-hua opened this issue 1 year ago • 0 comments

What happened

Transform sql cannot support parse long attribute chains after accessing array, such as "$child(0).msg.msgId"

Error Log

net.sf.jsqlparser.JSQLParserException: Encountered unexpected token: "," ","
    at line 1, column 37.

Was expecting one of:

    "("


	at net.sf.jsqlparser.parser.CCJSqlParserManager.parse(CCJSqlParserManager.java:25)
	at org.apache.inlong.sdk.transform.process.TransformProcessor.initTransformSql(TransformProcessor.java:102)
	at org.apache.inlong.sdk.transform.process.TransformProcessor.init(TransformProcessor.java:94)
	at org.apache.inlong.sdk.transform.process.TransformProcessor.<init>(TransformProcessor.java:83)
	at org.apache.inlong.sdk.transform.process.TransformProcessor.create(TransformProcessor.java:72)
	at org.apache.inlong.sdk.transform.process.processor.TestJson2CsvProcessor.testJson2CsvForBug(TestJson2CsvProcessor.java:194)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
	at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
	at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
	at org.junit.runner.JUnitCore.run(JUnitCore.java:137)
	at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:69)
	at com.intellij.rt.junit.IdeaTestRunner$Repeater$1.execute(IdeaTestRunner.java:38)
	at com.intellij.rt.execution.junit.TestsRepeater.repeat(TestsRepeater.java:11)
	at com.intellij.rt.junit.IdeaTestRunner$Repeater.startRunnerWithArgs(IdeaTestRunner.java:35)
	at com.intellij.rt.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:232)
	at com.intellij.rt.junit.JUnitStarter.main(JUnitStarter.java:55)
Caused by: net.sf.jsqlparser.parser.ParseException: Encountered unexpected token: "," ","
    at line 1, column 37.

Was expecting one of:

    "("

	at net.sf.jsqlparser.parser.CCJSqlParser.generateParseException(CCJSqlParser.java:38866)
	at net.sf.jsqlparser.parser.CCJSqlParser.jj_consume_token(CCJSqlParser.java:38696)
	at net.sf.jsqlparser.parser.CCJSqlParser.InternalFunction(CCJSqlParser.java:17060)
	at net.sf.jsqlparser.parser.CCJSqlParser.Function(CCJSqlParser.java:16986)
	at net.sf.jsqlparser.parser.CCJSqlParser.InternalFunction(CCJSqlParser.java:17154)
	at net.sf.jsqlparser.parser.CCJSqlParser.Function(CCJSqlParser.java:16986)
	at net.sf.jsqlparser.parser.CCJSqlParser.PrimaryExpression(CCJSqlParser.java:13961)
	at net.sf.jsqlparser.parser.CCJSqlParser.BitwiseXor(CCJSqlParser.java:13763)
	at net.sf.jsqlparser.parser.CCJSqlParser.MultiplicativeExpression(CCJSqlParser.java:13716)
	at net.sf.jsqlparser.parser.CCJSqlParser.AdditiveExpression(CCJSqlParser.java:13679)
	at net.sf.jsqlparser.parser.CCJSqlParser.BitwiseAndOr(CCJSqlParser.java:13625)
	at net.sf.jsqlparser.parser.CCJSqlParser.ConcatExpression(CCJSqlParser.java:13601)
	at net.sf.jsqlparser.parser.CCJSqlParser.SimpleExpression(CCJSqlParser.java:13587)
	at net.sf.jsqlparser.parser.CCJSqlParser.SQLCondition(CCJSqlParser.java:12761)
	at net.sf.jsqlparser.parser.CCJSqlParser.Condition(CCJSqlParser.java:12491)
	at net.sf.jsqlparser.parser.CCJSqlParser.SelectExpressionItem(CCJSqlParser.java:8561)
	at net.sf.jsqlparser.parser.CCJSqlParser.SelectItem(CCJSqlParser.java:8597)
	at net.sf.jsqlparser.parser.CCJSqlParser.SelectItemsList(CCJSqlParser.java:8551)
	at net.sf.jsqlparser.parser.CCJSqlParser.PlainSelect(CCJSqlParser.java:7838)
	at net.sf.jsqlparser.parser.CCJSqlParser.SetOperationList(CCJSqlParser.java:8107)
	at net.sf.jsqlparser.parser.CCJSqlParser.SelectBody(CCJSqlParser.java:7718)
	at net.sf.jsqlparser.parser.CCJSqlParser.Select(CCJSqlParser.java:7713)
	at net.sf.jsqlparser.parser.CCJSqlParser.SingleStatement(CCJSqlParser.java:377)
	at net.sf.jsqlparser.parser.CCJSqlParser.Statement(CCJSqlParser.java:285)
	at net.sf.jsqlparser.parser.CCJSqlParserManager.parse(CCJSqlParserManager.java:23)
	... 32 more


Process finished with exit code 255

What you expected to happen

This sql statement should be parsed normally.

How to reproduce

Add this test function to TestJson2CsvProcessor.java and run.

@Test
    public void testJson2CsvForBug() throws Exception {
        List<FieldInfo> fields = this.getTestFieldList("sid", "msgId", "msgBody");
        JsonSourceInfo jsonSource = new JsonSourceInfo("UTF-8", "msgs");
        CsvSinkInfo csvSink = new CsvSinkInfo("UTF-8", '|', '\\', fields);
        String transformSql = "select $root.sid,$child.msg.msgId,$child.msg.msgBody from source";
        TransformConfig config = new TransformConfig(transformSql);
        // case1 with no bug
        TransformProcessor<String, String> processor = TransformProcessor
                .create(config, SourceDecoderFactory.createJsonDecoder(jsonSource),
                        SinkEncoderFactory.createCsvEncoder(csvSink));
        String srcString = "{\n" +
                "    \"sid\": \"value1\",\n" +
                "    \"msgs\": [\n" +
                "    {\"msg\":{ \"msgId\": \"111\", \"msgBody\": \"msg1_body\"}},\n" +
                "    {\"msg\":{ \"msgId\": \"222\", \"msgBody\": \"msg2_body\"}}\n" +
                "    ]\n" +
                "}";
        List<String> output = processor.transform(srcString, new HashMap<>());
        Assert.assertEquals(2, output.size());
        Assert.assertEquals(output.get(0), "value1|111|msg1_body");
        Assert.assertEquals(output.get(1), "value1|222|msg2_body");

        // case2 with bug
        transformSql = "select $root.sid,$child(0).msg.msgId,$child(1).msg.msgBody from source";
        config = new TransformConfig(transformSql);

        processor = TransformProcessor
                .create(config, SourceDecoderFactory.createJsonDecoder(jsonSource),
                        SinkEncoderFactory.createCsvEncoder(csvSink));
        srcString = "{\n" +
                "    \"sid\": \"value1\",\n" +
                "    \"msgs\": [\n" +
                "    {\"msg\":{ \"msgId\": \"111\", \"msgBody\": \"msg1_body\"}},\n" +
                "    {\"msg\":{ \"msgId\": \"222\", \"msgBody\": \"msg2_body\"}}\n" +
                "    ]\n" +
                "}";
        output = processor.transform(srcString, new HashMap<>());
        Assert.assertEquals(1, output.size());
        Assert.assertEquals(output.get(0), "value1|111|msg2_body");
    }

Environment

No response

InLong version

master

InLong Component

InLong SDK

Are you willing to submit PR?

  • [ ] Yes, I am willing to submit a PR!

Code of Conduct

ying-hua avatar Sep 28 '24 07:09 ying-hua