trino
trino copied to clipboard
OpenXJson Reordering Columns in Projection doesn't work with Top-level Arrays
The OpenXJson Format will deserialize JSON arrays as Rows. But if the user stores the top-level JSON values as Arrays and tries to select only specific columns, the results will be incorrect.
For example, a JSON value of:
[31, "second value", "third value"]
In a Table defined as:
create table my_json_table (
first_col int,
skipped_col varchar,
third_col varchar)
with (format = 'openx_json')
And a query of:
select
third_col,
first_col
from my_json_table
The result will be:
(second value, 31)
Instead of:
(third value, 31)
This isn't impacting me, but I found it while implementing my own Hive Format and wanted to raise it, in case others care. I do not know if/how it worked or didn't with the Hive SerDes, but this seems like a correctness issue regardless.
Here is a unit test I put together that runs against 454 and shows the issue:
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystem;
import io.trino.testing.QueryRunner;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
import static io.trino.testing.TestingNames.randomNameSuffix;
public class OpenXTopLevelArrayTest
extends BaseHiveConnectorTest
{
@Override
protected QueryRunner createQueryRunner()
throws Exception
{
return createHiveQueryRunner(HiveQueryRunner.builder());
}
@Test
public void testOpenXArrayAsTopLevelValueWithColumnReorder()
throws IOException
{
// this needs to be made protected in BaseHiveConnectorTest
TrinoFileSystem fileSystem = getTrinoFileSystem();
Location tempDir = Location.of("local:///temp_" + UUID.randomUUID());
fileSystem.createDirectory(tempDir);
Location dataFile = tempDir.appendPath("data.json");
// passes when json is object
// String jsonText = "{ first_col: 31, skipped_col: \"second value\", third_col: \"third value\" }";
// fails when json is corresponding array
String jsonText = "[31, \"second value\", \"third value\"]";
try (OutputStream out = fileSystem.newOutputFile(dataFile).create()) {
out.write(jsonText.getBytes(StandardCharsets.UTF_8));
out.flush();
}
String table = "test_openx_" + randomNameSuffix();
assertUpdate("""
create table %s (
first_col int,
skipped_col varchar,
third_col varchar)
with (format = 'openx_json', external_location = '%s')"""
.formatted(table, tempDir));
assertQuery("""
select
third_col,
first_col
from %s""".formatted(table),
"VALUES ('third value', 31)");
}
}