Reading a hyperslab of a limited dataset after an unlimited dataset increases the execution time of the former after multiple runs.
Seen only in Windows
I have 2 h5 files with 2 datasets. The first dataset is of size 1999 x 512 x 512, with the first dimension being unlimited. It is compressed using deflate compression (level 6) and has chunk size of 20 x 10 x 10.
The second dataset is of size 2000 x 512 x 512 (no unlimited dimensions). It is compressed using deflate compression (level 6) and has chunk size of 70 x 20 x 50.
In my reproduction code, I read the whole of the first dataset (then close the identifiers of the first file). Then, I open the second dataset and read a hyperslab of the dataset (start = {0,0,0} and stride = {2,2,2}). Then I close all identifiers.
I have noticed that the execution time of the hyperslab read increases in successive iterations. This happens only on Windows, and I could not reproduce the issue in Linux.
- HDF5 version (if building from a maintenance branch, please include the commit hash) : HDF5 1.10.11
- OS and version : Windows 11
You can find the h5 files used inthe reproduction here: https://mathworks-my.sharepoint.com/:f:/p/abaruah/Ekr_sEmqx1hFlgPM4tWx55UBS7bzUtxAmHPM33bT6t51CQ?e=ChoCCJ
Reproduction Code:
#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <chrono>
#include <array>
#include <random>
#define FILE1 "unlimDim_smallChunkSize_deflate_2.h5"
#define DATASET1 "/lrg_unl_dset2_double_dset"
#define DIM10 1999
#define DIM11 512
#define DIM12 512
#define FILE2 "largeGroupNesting.h5"
#define DATASET2 "/lrg_dset2_double_dset"
#define DIM20 1000
#define DIM21 256
#define DIM22 256
void unlimDsetRead()
{
hid_t file, space, dset, dcpl; /* Handles */
herr_t status;
double* dset_read = new double[DIM10 * DIM11 * DIM12];
/*
* Create a new file using the default properties.
*/
file = H5Fopen(FILE1, H5F_ACC_RDONLY, H5P_DEFAULT);
/*
* Create the chunked dataset.
*/
dset = H5Dopen(file, DATASET1, H5P_DEFAULT);
hid_t my_file_space_id = H5Dget_space(dset);
hid_t my_mem_space_id = H5Dget_space(dset);
hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
//dset = H5Dcreate1 (file, DATASET, H5T_NATIVE_DOUBLE, space, dcpl);
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
start = std::chrono::high_resolution_clock::now();
/*
* Write the data to the dataset.
*/
status = H5Dread(dset, H5T_NATIVE_FLOAT, my_mem_space_id, my_file_space_id, xfer_plist_id,
dset_read);
end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end - start;
double durationInSeconds = duration.count();
std::cout << "unlimDsetRead Execution time: " << durationInSeconds << " seconds" << std::endl;
std::cout << "Full dset read status: " << status <<std::endl;
/*
* Close and release resources.
*/
status = H5Dclose(dset);
status = H5Fclose(file);
delete [] dset_read;
}
void hypersladDsetRead()
{
hid_t file, dataset; /* Handles */
herr_t status;
hid_t dataspace;
hid_t memspace;
hsize_t start[3] = {0,0,0};
hsize_t count[3];
hsize_t stride[3] = {2,2,2};
double* dset_read = new double[DIM20 * DIM21 * DIM22];
/*
* Open the file and the dataset.
*/
file = H5Fopen(FILE2, H5F_ACC_RDONLY, H5P_DEFAULT);
dataset = H5Dopen(file, DATASET2, H5P_DEFAULT);
dataspace = H5Dget_space(dataset); /* dataspace handle */
/*
* Define hyperslab in the dataset.
*/
count[0] = DIM20;
count[1] = DIM21;
count[2] = DIM22;
status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, start, stride, count, NULL);
memspace = H5Screate_simple(3,count,count);
std::chrono::time_point<std::chrono::high_resolution_clock> starttime, end;
starttime = std::chrono::high_resolution_clock::now();
/*
* Write the data to the dataset.
*/
/*
* Read data from hyperslab in the file into the hyperslab in
* memory and display.
*/
status = H5Dread(dataset, H5T_NATIVE_FLOAT, memspace, dataspace,
H5P_DEFAULT, dset_read);
end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end - starttime;
double durationInSeconds = duration.count();
std::cout << " hypersladDsetRead Execution time: " << durationInSeconds << " seconds" << std::endl;
std::cout << "Hyperslab read status: " << status <<std::endl;
/*
* Close and release resources.
*/
status = H5Sclose(dataspace);
status = H5Sclose(memspace);
status = H5Dclose(dataset);
status = H5Fclose(file);
delete [] dset_read;
}
int main() {
std::cout << "In main" << std::endl;
for (int i = 0; i<30; i++)
{
unlimDsetRead();
hypersladDsetRead();
}
return 0;
}
Timing output based on the repro code above: You can see how the ' hypersladDsetRead Execution time' increases from 17.0207s to 22.7113 seconds. If I increase the number of iterations, it increases further more.
In main
unlimDsetRead Execution time: 97.685 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 17.0207 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 95.7474 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 17.4012 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.3763 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 17.2836 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 92.0514 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 17.0541 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.7215 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 18.0148 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.3346 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 18.5549 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.4006 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 17.8201 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.9821 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 18.8718 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.8652 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 18.3498 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 98.9125 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 18.3856 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 107.486 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 19.4889 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 95.8097 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 19.0471 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 94.8884 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 19.7365 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 93.8644 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.4456 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 96.0603 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 19.7554 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 95.9646 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.0125 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 99.1804 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 22.102 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 99.2952 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 21.3127 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 106.476 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.44 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 104.146 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.1246 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 103.349 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 21.0415 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 104.561 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.7447 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 101.318 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.6427 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 102.102 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 21.3635 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 101.485 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.3536 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 102.986 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.6622 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 102.533 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 20.6267 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 101.49 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 21.4892 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 101.257 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 21.612 seconds
Hyperslab read status: 0
unlimDsetRead Execution time: 108.484 seconds
Full dset read status: 0
hypersladDsetRead Execution time: 22.7113 seconds
Hyperslab read status: 0
Possibly due to realloc() being slow on Windows.