hdf5 icon indicating copy to clipboard operation
hdf5 copied to clipboard

Segmentation violation with H5Lget_info with HDF5 1.14.4.3

Open abhibaruah opened this issue 1 year ago • 3 comments
trafficstars

  • HDF5 version - 1.14.4.3
  • OS : Debian 11
  • Compiler : gcc

I have a dataset named 'OBJECT_REFERENCES' of type H5T_REFERENCE. I have a workflow (derived from one of our tests) where I read the reference dataset and then call H5Rdereference and H5Lget_info on a subset of the reference dataset.

The code below works with HDF5 1.10.11 but with 1.14.4.3, I see a segmentation violation during the call to H5Lget_info. I see the segV for both H5Lget_info1 and H5Lget_info2 (in v1.14.4.3).

Can someone take a look at it and let me know if this is a bug, and if there is a workaround to revert to the non segV behavior?

Link to the h5 file for reproduction: https://mathworks-my.sharepoint.com/:u:/p/abaruah/EdkeRkHv9SZMgEDGfvULYR4BcaBMX8bI9Q-UfylNeqMyog?e=ulkR7K

#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include "string.h"

#define FILE1           "h5treferences.h5"
#define DATASET1        "/OBJECT_REFERENCES"
#define DIM10            4
#define DIM11            8


int main() {
    hid_t           file, space, dset, dcpl;    /* Handles */
    herr_t          status;
    

    uint8_t* ref = new uint8_t[DIM10 * DIM11];
    /*
     * Create a new file using the default properties.
     */
    file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);

    /*
     * Create the chunked dataset.
     */
    dset = H5Dopen(file, DATASET1, H5P_DEFAULT);
	
	hid_t  h_type = H5Dget_type(dset);
    hid_t my_file_space_id = H5Dget_space(dset);
    hid_t my_mem_space_id = H5Dget_space(dset);
    hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
	
    //dset = H5Dcreate1 (file, DATASET, H5T_NATIVE_DOUBLE, space, dcpl);
    
    /*
     * Write the data to the dataset.
     */
    status = H5Dread(dset, h_type, my_mem_space_id, my_file_space_id, xfer_plist_id,
        ref);
	std::cout << "Read Status: " << status <<std::endl;
		
	//for (int i = 0; i<32; i++){
	//	std::cout << (int)*(ref + i) <<std::endl;
	//}	
	uint8_t* ref_subset = &ref[3*DIM11];
	
	//for (int i = 0; i<8; i++){
	//	std::cout << (int)*(ref_subset + i) <<std::endl;
	//}
	
	H5R_type_t ref_type = H5R_OBJECT;
		
	ssize_t name_len = H5Rget_name(file,ref_type,ref_subset,NULL,0);

    if ( name_len < 0 ) {
        std::cout << "Error 1" << std::endl;
    }
	
	std::string name;
    name.resize(name_len+1);
    name_len = H5Rget_name(file,ref_type,ref_subset,&name[0],name_len+1);
    name.resize(name_len);
	
	H5T_cset_t cset = H5T_CSET_ASCII;
	
	if (name != "/") {
		// Get the object's ID to determine its encoding
        hid_t obj_id = H5Rdereference1(file, ref_type, ref_subset);
        if ( obj_id >= 0 ) {
            std::cout << "H5Rdereference1 success!" <<std::endl;
        }

        // Get link info to know how to interpret name
        H5L_info_t linfo;
        herr_t status = H5Lget_info(obj_id, name.c_str(), &linfo, H5P_DEFAULT);
		if ( status >= 0 ) {
            std::cout << "H5Lget_info success!" <<std::endl;
        }
        cset = linfo.cset;

        // Don't leak resource
        status = H5Oclose(obj_id);
        if ( status >= 0 ) {
            std::cout << "H5Oclose success!" <<std::endl; // Shouldn't happen
        }
    }
    
    status = H5Dclose(dset);
    status = H5Fclose(file);

    std::cout << "Success!!" <<std::endl;
    delete [] ref;
	
	return 0;
}

abhibaruah avatar Jul 11 '24 19:07 abhibaruah

I'm able to replicate this crash. The root cause likely has something to do with trying to use the old file format, as running it with the debug library and valgrind shows that H5Lget_info2() fails at H5VL_set_vol_wrapper()'s assertion that vol_obj->data exists.

Adaptation of the test script for the C API:


#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include "string.h"

#define FILE1 "h5treferences.h5"
#define DATASET1 "/OBJECT_REFERENCES"
#define DIM10 4
#define DIM11 8

int main() {
  hid_t file, space, dset, dcpl; /* Handles */
  herr_t status;

  uint8_t ref[DIM10 * DIM11];

  /*
   * Create a new file using the default properties.
   */
  file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);

  /*
   * Create the chunked dataset.
   */
  dset = H5Dopen(file, DATASET1, H5P_DEFAULT);

  hid_t h_type = H5Dget_type(dset);
  hid_t my_file_space_id = H5Dget_space(dset);
  hid_t my_mem_space_id = H5Dget_space(dset);
  hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);

  /*
   * Write the data to the dataset.
   */
  status = H5Dread(dset, h_type, my_mem_space_id, my_file_space_id, xfer_plist_id, ref);

  size_t type_size = H5Tget_size(h_type);

  printf("Type size: %zu\n", type_size);
  printf("Read Status: %d\n", status);

  uint8_t *ref_subset = &ref[3 * DIM11];

  H5R_type_t ref_type = H5R_OBJECT;
  ssize_t name_len = H5Rget_name(file, ref_type, ref_subset, NULL, 0);

  if (name_len < 0) {
    printf("Error 1\n");
  }

  char *name = NULL;
  name_len = H5Rget_name(file, ref_type, ref_subset, &name[0], name_len + 1);

  printf("Allocating %zu bytes for name\n", name_len + 1);
  name = malloc(name_len + 1);
  name_len = H5Rget_name(file, ref_type, ref_subset, &name[0], name_len + 1);
  printf("name = %s\n", name);

  H5T_cset_t cset = H5T_CSET_ASCII;

  if (name != "/") {
    // Get the object's ID to determine its encoding
    hid_t obj_id = H5Rdereference(file, H5P_DEFAULT, ref_type, ref_subset);
    if (obj_id >= 0) {
      printf("H5Rdereference1 success! \n");
    }

    // Get link info to know how to interpret name
    H5L_info_t linfo;
    herr_t status = H5Lget_info2(obj_id, name, &linfo, H5P_DEFAULT);
    if (status >= 0) {
      printf("H5Lget_info success! \n");
    }
    cset = linfo.cset;

    // Don't leak resource
    status = H5Oclose(obj_id);
    if (status >= 0) {
      printf("H5Oclose success! (Shouldn't happen) \n");
    }
  }

  status = H5Dclose(dset);
  status = H5Fclose(file);

  printf("Success!! \n");

  free(name);
  return 0;
}

mattjala avatar Jul 15 '24 15:07 mattjala

@abhibaruah I believe you need to use "ref" like this: hobj_ref_t *ref_obj; <- I just used a different name for distinction ref_obj = new hobj_ref_t[sizeof(hobj_ref_t) * DIM10];

instead of this: uint8_t* ref = new uint8_t[DIM10 * DIM11];

Then you pass ref_obj into H5Dread and pass &ref_obj[0] (or 1 or 2) into H5Rget_name. Please let us know if that works.

bmribler avatar Jul 18 '24 10:07 bmribler

Thanks @bmribler. I replaced uint8_t* ref = new uint8_t[DIM10 * DIM11];

with

hobj_ref_t *ref; ref = new hobj_ref_t[sizeof(hobj_ref_t) * DIM10];

and

uint8_t* ref_subset = &ref[3DIM11]; with hobj_ref_t ref_subset = &ref[3];

However, I am still seeing the segmentation fault.

My code:

#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include "string.h"

#define FILE1           "h5treferences.h5"
#define DATASET1        "/OBJECT_REFERENCES"
#define DIM10            4
#define DIM11            8


int main() {
    hid_t           file, space, dset, dcpl;    /* Handles */
    herr_t          status;
    

    //uint8_t* ref = new uint8_t[DIM10 * DIM11];
	hobj_ref_t *ref;
	ref = new hobj_ref_t[sizeof(hobj_ref_t) * DIM10];


    /*
     * Create a new file using the default properties.
     */
    file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);

    /*
     * Create the chunked dataset.
     */
    dset = H5Dopen(file, DATASET1, H5P_DEFAULT);
	
	hid_t  h_type = H5Dget_type(dset);
    hid_t my_file_space_id = H5Dget_space(dset);
    hid_t my_mem_space_id = H5Dget_space(dset);
    hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
	
    //dset = H5Dcreate1 (file, DATASET, H5T_NATIVE_DOUBLE, space, dcpl);
    
    /*
     * Write the data to the dataset.
     */
    status = H5Dread(dset, h_type, my_mem_space_id, my_file_space_id, xfer_plist_id,
        ref);
	std::cout << "Read Status: " << status <<std::endl;
		
	//for (int i = 0; i<32; i++){
	//	std::cout << (int)*(ref + i) <<std::endl;
	//}	
	//uint8_t* ref_subset = &ref[2*DIM11];
	hobj_ref_t* ref_subset = &ref[3];
	
	//for (int i = 0; i<8; i++){
	//	std::cout << (int)*(ref_subset + i) <<std::endl;
	//}
	
	H5R_type_t ref_type = H5R_OBJECT;
		
	ssize_t name_len = H5Rget_name(file,ref_type,ref_subset,NULL,0);

    if ( name_len < 0 ) {
        std::cout << "Error 1" << std::endl;
    }
	
	std::string name;
    name.resize(name_len+1);
    name_len = H5Rget_name(file,ref_type,ref_subset,&name[0],name_len+1);
    name.resize(name_len);
	
	H5T_cset_t cset = H5T_CSET_ASCII;
	
	if (name != "/") {
		// Get the object's ID to determine its encoding
        hid_t obj_id = H5Rdereference1(file, ref_type, ref_subset);
        if ( obj_id >= 0 ) {
            std::cout << "H5Rdereference1 success!" <<std::endl;
        }
        std::cout << "Name: " << name <<std::endl;
        // Get link info to know how to interpret name
        H5L_info_t linfo;
        herr_t status = H5Lget_info(obj_id, name.c_str(), &linfo, H5P_DEFAULT);
		if ( status >= 0 ) {
            std::cout << "H5Lget_info success!" <<std::endl;
        }
        cset = linfo.cset;

        // Don't leak resource
        status = H5Oclose(obj_id);
        if ( status >= 0 ) {
            std::cout << "H5Oclose success!" <<std::endl; // Shouldn't happen
        }
    }
    
    status = H5Dclose(dset);
    status = H5Fclose(file);

    std::cout << "Success!!" <<std::endl;
    delete [] ref;
	
	return 0;
}

abhibaruah avatar Jul 19 '24 18:07 abhibaruah

Hi @abhibaruah, It appeared that the segfault happened here: herr_t status = H5Lget_info(obj_id, name.c_str(), &linfo, H5P_DEFAULT); when obj_id, which is a datatype ID in this case, was passed in.

I'm aware that the documentation says: "The identifier may be that of a file, group, dataset, named datatype, or attribute." However, it doesn't seem to work properly. While I'm getting advice from the developers, you can by pass this segfault by passing in the file ID for loc_id instead. Please note that the obj_id (aka datatype ID) can still be passed in to other APIs such as H5Tget_class, H5Tget_size, H5Oget_info2, etc...

bmribler avatar Jul 21 '24 03:07 bmribler

Thanks @bmribler. Replacing obj_id withthe file ID helped to resolve the segV and work around it.

abhibaruah avatar Jul 23 '24 17:07 abhibaruah

FYI, @abhibaruah, this issue has been fixed (PR #4733)

bmribler avatar Aug 23 '24 16:08 bmribler