[Java][C++] Java VectorSchemaRoot to C++ RecordBatchreader
I have a Java library that is writing an Arrow Table to a VectorSchemaRoot object in memory. And I want read the data with C++. But it keeps getting an error, what should I do?
From the docs I only find C++ to Java, not Java to C++ docs
Here is my example: [Java]
public String rootToString(){
try(
BufferAllocator allcator = new RootAllocator();
FileInputStream fileInputStream = new FileInputStream(filepath);
ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), allcator);
ByteArrayOutputStream out = new ByteArrayOutputStream();
){
System.out.println("Record batches in file : " + reader.getRecordBlocks().size()); // Actually size = 1
ArrowBlock arrowBlock = reader.getRecordBlocks().get(0);
VectorSchemaRoot root = reader.getVectorSchemaRoot();
reader.loadRecordBatch(arrowBlock);
System.out.println(root.contentToTSVString());
ArrowFileWriter writer = new ArrowFileWriter(root, null, Channels.newChannel(out));
writer.start();
writer.writeBatch();
writer.end();
writer.close();
return out.toString();}}
[C++]
....
std::String JStringToString(JNIEnv* env, jstring string){
if(string == nullptr){
return std::string();
}
const char* chars = env->GetStringUTFChars(string, nullptr);
std::string ret(chars);
env->ReleaseStringUTFChars(string, chars);
return ret;
}
std::string test(std::string name){
if (status != JNI_ERR){
jclass cls = env->FindClass("com/xxxxx");
jmethodID mid = env->GetMethodID(cls, "", "(Ljava/lang/String;)V");
jstring arg = NewJString(name.c_str());
jobject obj = env->NewObject(cls, mid, arg);
mid = env->GetMethodID(cls, "rootToString", "()Ljava/lang/String;");
jstring ret = *(jstring)env->CallObjectMethod(test, mid);
std::cout << "Java String length is : " << env->GetStringLength(ret) << std::endl; // length is 2563
std::string result = JStringToCString(env, ret);
std::cout << "result length is : " << result.length() << std::endl; // length is 4609
return result;
}
int main(){
std::string test_result = test("/data/...../4stock_5day.arrow");
std::shared_ptrarrow::io::BufferReader bufferReader = std::make_sharedarrow::io::BufferReader(test_result);
std::shared_ptrarrow::ipc::RecordBatchFileReader reader =
arrow::ipc::RecordBatchFileReader::Open(bufferReader.get()).ValueOrDie();
std::cout << reader -> num_record_batches() << std::endl;
return 0;
}
The error is as follow:
xxxxxxxxxx/work/cpp/src/arrow/result.cc:28 : ValueOrDie called on an error : Invalid: File is smaller than indicated metadata size /usr/local/conda3/lib/libarrow.so.500(+0x518f0c)[0x7f44fc2d3f0c] /usr/local/conda3/lib/libarrow.so.500(_ZN) ...
what should I do? Is there any other way ? Thanks
cc @lidavidm @davisusanibar
@davisusanibar you already had a solution here I think, can you post it?
Or actually @Oooorchid see https://github.com/apache/arrow/pull/13788
FWIW, I believe the immediate problem in the code is the use of a string to transport the buffer over JNI. It should be byte[]. ByteArrayOutputStream#toString will attempt to decode the contents as text which corrupts the content.
Or actually @Oooorchid see #13788
FWIW, I believe the immediate problem in the code is the use of a string to transport the buffer over JNI. It should be byte[].
ByteArrayOutputStream#toStringwill attempt to decode the contents as text which corrupts the content.
Thanks for your reply, your solution is perfect. I also solved this problem few days ago. as you said using string transport over JNI will cause some questions. I used byte[] to slove this problem finally, and there is my example:
......
jclass cls = env->FindClass->("com/xxxxx");
jmethodID mid = env->GetMethodID(cls, "init", "(Ljava/lang/String;)V");
jstring arg = NewJString(name.c_str());
jobject obj = env->NewObject(cls, mid, arg);
mid = env->GetMethodID(cls, "rootToByte", "[B");
jbyteArray dataArray = (jbyteArray)env->CallObjectMethod(test, mid);
int arr_len = env->GetArrayLength(dataArray);
std::cout << "arr_len is :" << arr_len << std::endl;
jbyte* bytes = env->GetByteArrayElements(dataArray, 0);
char* ret = (char*)bytes;
env->SetByteArrayRegion(dataArray, 0, arr_len, bytes);
// Actually if thers is no _arrlen_ specified, the result will stop when (char*)ret encounters 0
std::string result = std::string(ret, arr_len);
std::cout << "result is : " << result << std::endl;
return result;
.....
[JAVA] At JAVA side,it will return byte[] like this:
......
VectorSchemaRoot root = reader.getVectorSchemaRoot();
reader.loadRecordBatch(arrowBlock);
System.out.println(root.contentToTSVString());
ArrowFileWriter writer = new ArrowFileWriter(root, null, Channels.newChannel(out));
writer.start();
writer.writeBatch();
writer.end();
writer.close();
return out.toByteArray();}}
.....