pandora
pandora copied to clipboard
Segmentation fault with hdf5 serialization
Dear all, I have a strange error on F24. The serialization code fails with SIGSEG:
./epidemy
simulation: 0 of: 1 initialized
[ws121924:07778] *** Process received signal ***
[ws121924:07778] Signal: Segmentation fault (11)
[ws121924:07778] Signal code: Address not mapped (1)
[ws121924:07778] Failing at address: 0x7
[ws121924:07778] [ 0] /lib64/libc.so.6(+0x347e0)[0x7f9d6825f7e0]
[ws121924:07778] [ 1] /lib64/libc.so.6(strlen+0x26)[0x7f9d682b3336]
[ws121924:07778] [ 2] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(H5T__conv_vlen+0x43e)[0x7f9d6901227e]
[ws121924:07778] [ 3] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(H5T_convert+0x6f)[0x7f9d69007cbf]
[ws121924:07778] [ 4] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(H5D__scatgath_write+0x1e7)[0x7f9d68f20db7]
[ws121924:07778] [ 5] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(+0x73a54)[0x7f9d68f08a54]
[ws121924:07778] [ 6] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(+0x84a3d)[0x7f9d68f19a3d]
[ws121924:07778] [ 7] /usr/local/HDF_Group/HDF5/1.8.17/lib/libhdf5.so.10.2.0(H5Dwrite+0x104)[0x7f9d68f19fc4]
[ws121924:07778] [ 8] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine10Serializer25executeAgentSerializationERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEi+0x12c8)[0x7f9d6b559ab8]
[ws121924:07778] [ 9] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine10Serializer14serializeAgentEPNS_5AgentERKii+0x19e)[0x7f9d6b55b49e]
[ws121924:07778] [10] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine10Serializer15serializeAgentsERKiSt20_List_const_iteratorISt10shared_ptrINS_5AgentEEES7_+0x4b)[0x7f9d6b55b58b]
[ws121924:07778] [11] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine14SpacePartition15serializeAgentsERKi+0x19)[0x7f9d6b565819]
[ws121924:07778] [12] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine5World4stepEv+0x3dd)[0x7f9d6b57558d]
[ws121924:07778] [13] /home/mario/git/pandora/build-release/libpandora.so(_ZN6Engine5World3runEv+0x3d8)[0x7f9d6b576068]
[ws121924:07778] [14] ./epidemy(main+0x9b)[0x40c37b]
[ws121924:07778] [15] /lib64/libc.so.6(__libc_start_main+0xf1)[0x7f9d6824b731]
[ws121924:07778] [16] ./epidemy(_start+0x29)[0x40c599]
[ws121924:07778] *** End of error message ***
Segmentation fault (core dumped)
I've tried to debug it for a while but I cannot understand why it's happening. I link against a parallel version of hdf5 v1.8.17 built separately
Thanks for any help you could give me.
Best,
Mario
The data it is trying to write are (from Serializer::executeAgentSerialization): H5Dwrite(datasetId, idType, memorySpace, fileSpace, H5P_DEFAULT, &(data->at(0)));
Type: Human/step0/id File: 16777217 Size: 20000
fileSpace: 67108871 Offset: 0 Stride: 1 Count: 1 Block: 20000
datasetId: 83886080 idType: 50331843 memorySpace: 67108872 fileSpace: 67108871 Data: Human_0
After some more debugging I found that the problem happens if the array has more than one element (usually has one element per agent). If I just leave only one string it works fine.
This is confirmed because if I serialize each agent id separately it (seems to) work (no segfault):
hsize_t block;
block = 1;
hsize_t simpleDimension = 1;
hsize_t newSize;
newSize = currentIndex+1;
itI->second = currentIndex+data->size();
std::ostringstream oss;
oss << type << "/step" << step << "/" << itM->first;
hid_t datasetId = H5Dopen(_agentsFileId, oss.str().c_str(), H5P_DEFAULT);
for (auto d : *data){
H5Dset_extent( datasetId, &newSize);
hid_t fileSpace = H5Dget_space(datasetId);
H5Sselect_hyperslab(fileSpace, H5S_SELECT_SET, offset, stride, count, &block);
log_INFO(logName.str(), "block " << block);
hid_t idType = H5Tcopy(H5T_C_S1);
H5Tset_size (idType, H5T_VARIABLE);
hid_t memorySpace = H5Screate_simple(1, &simpleDimension, 0);
log_INFO(logName.str(), "Dimension " << simpleDimension);
log_INFO(logName.str(), "d " << d);
H5Dwrite(datasetId, idType, memorySpace, fileSpace, H5P_DEFAULT, &d);
data->clear();
H5Sclose(memorySpace);
H5Sclose(fileSpace);
}
Everything points to a problem into the way the original code access the dataset, because I can write the data with this (inspired from SO):
const size_t n = data->size();
hsize_t simpleDimension = n;
//log_INFO(logName.str(), "Serializing: " << simpleDimension);
char* dataA[n];
for (size_t i = 0; i < n; i++) {
dataA[i] = data->at(i).c_str();
}
data->clear();
hid_t hdf5file= H5Fcreate("test.hdf5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
hid_t group = H5Gcreate2(hdf5file, "/MyGroup", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
hsize_t dim1=data->size();
hid_t dataspace = H5Screate_simple(1, &dim1, NULL);
hid_t datatype = H5Tcopy(H5T_C_S1);
int ret = H5Tset_size (datatype, H5T_VARIABLE);
hid_t dataset = H5Dcreate2(group, "Samples", datatype, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
H5Dwrite(dataset, datatype, dataspace, H5S_ALL, H5P_DEFAULT, dataA);
H5Dclose(dataset);
H5Tclose(datatype);
H5Sclose(dataspace);
H5Gclose(group);
H5Fclose(hdf5file);
Okay, now it works if I convert the string to serialize to a continous array first:
@@ -503,18 +503,30 @@ void Serializer::executeAgentSerialization( const std::string & type, int step)
for(StringMap::iterator itM=attributesS->begin(); itM!=attributesS->end(); itM++)
{
std::vector<std::string> * data = itM->second;
- hsize_t block[1];
- block[0] = data->size();
- hsize_t simpleDimension = data->size();
- // TODO es repeteix per cada atribut
- hsize_t newSize[1];
- newSize[0] = currentIndex+data->size();
itI->second = currentIndex+data->size();
-
+
+ std::stringstream logName;
+ logName << "Serializer_" << _scheduler.getId();
+
+ const size_t n = data->size();
+ hsize_t simpleDimension = n;
+ //log_INFO(logName.str(), "Serializing: " << simpleDimension);
+ char* dataA[n];
+ for (size_t i = 0; i < n; i++) {
+ dataA[i] = data->at(i).c_str();
+ }
+
+ hsize_t block[1];
+ block[0] = data->size();
+
+ // TODO es repeteix per cada atribut
+ hsize_t newSize[1];
+ newSize[0] = currentIndex+data->size();
+
std::ostringstream oss;
oss << type << "/step" << step << "/" << itM->first;
-
+
hid_t datasetId = H5Dopen(_agentsFileId, oss.str().c_str(), H5P_DEFAULT);
H5Dset_extent( datasetId, newSize);
hid_t fileSpace = H5Dget_space(datasetId);
@@ -522,12 +534,13 @@ void Serializer::executeAgentSerialization( const std::string & type, int step)
hid_t idType = H5Tcopy(H5T_C_S1);
H5Tset_size (idType, H5T_VARIABLE);
hid_t memorySpace = H5Screate_simple(1, &simpleDimension, 0);
- H5Dwrite(datasetId, idType, memorySpace, fileSpace, H5P_DEFAULT, &(data->at(0)));
+ H5Dwrite(datasetId, idType, memorySpace, fileSpace, H5P_DEFAULT, dataA);
data->clear();
H5Sclose(memorySpace);
H5Sclose(fileSpace);
H5Dclose(datasetId);
}
}