Segfault in iterate_files_by_contenttype_expat_callback_element_start
Given a certain file (which is unfortunately contains proprietary data), created by a client's business intelligence systems (i.e. not created by Microsoft Excel)...
During sheet_open, I ran into serious issues with the else if (XML_Char_icmp(name, X("Default")) == 0) branch of iterate_files_by_contenttype_expat_callback_element_start.
Foremost was that you are explicitly passing NULL as the second argument to unzGetCurrentFileInfo, which, at least in my version of minizip (remember that my build is wonky), crashes because the minizip function dereferences this without checking for NULL. I fixed this by allocating and then freeing a temporary struct of the correct type even though we ignore its values.
Once that crash was fixed, I found that this while loop was endless because this call set the minizip file pointer back to file index 0 each time data->filecallbackfn(data->zip, filename, contenttype, data->filecallbackdata);
I hack-fixed this by maintaining my own counter of which file index we should be on and re-incrementing the minizip file pointer each pass through the loop like this:
// prevent the endless loop
int looperx = 0;
for( looperx = 0; looperx < fileindex && status == UNZ_OK; ++looperx ) {
status = unzGoToNextFile(data->zip);
}
The complete solution is a mess, but is given below. Ignore doFindRelsFirst that was a wild goose chase that I haven't deleted out yet.
I'll try to figure out how to create a test file that demonstrates these issues without the proprietary data.
//callback function definition for use with iterate_files_by_contenttype
typedef void (*contenttype_file_callback_fn)(ZIPFILETYPE* zip, const XML_Char* filename, const XML_Char* contenttype, void* callbackdata);
struct iterate_files_by_contenttype_callback_data {
ZIPFILETYPE* zip;
const XML_Char* contenttype;
contenttype_file_callback_fn filecallbackfn;
void* filecallbackdata;
bool doFindRelsFirst;
};
//expat callback function for element start used by iterate_files_by_contenttype
void iterate_files_by_contenttype_expat_callback_element_start (void* callbackdata, const XML_Char* name, const XML_Char** atts)
{
struct iterate_files_by_contenttype_callback_data* data = (struct iterate_files_by_contenttype_callback_data*)callbackdata;
if (XML_Char_icmp(name, X("Override")) == 0) {
//explicitly specified file
const XML_Char* contenttype;
const XML_Char* partname;
if ((contenttype = get_expat_attr_by_name(atts, X("ContentType"))) != NULL && XML_Char_icmp(contenttype, data->contenttype) == 0) {
if ((partname = get_expat_attr_by_name(atts, X("PartName"))) != NULL) {
if (partname[0] == '/')
partname++;
data->filecallbackfn(data->zip, partname, contenttype, data->filecallbackdata);
}
}
} else if (XML_Char_icmp(name, X("Default")) == 0) {
//by extension
const XML_Char* contenttype;
const XML_Char* extension;
if ((contenttype = get_expat_attr_by_name(atts, X("ContentType"))) != NULL && XML_Char_icmp(contenttype, data->contenttype) == 0) {
if ((extension = get_expat_attr_by_name(atts, X("Extension"))) != NULL) {
// if( data->doFindRelsFirst && XML_Char_icmp( X( "rels" ), extension ) != 0 )
// {
// extension = X("rels");
//
// }
XML_Char* filename;
size_t filenamelen;
size_t extensionlen = XML_Char_len(extension);
#ifdef USE_MINIZIP
#define UNZIP_FILENAME_BUFFER_STEP 32
char* buf;
size_t buflen;
int status;
unz_global_info zipglobalinfo;
unzGetGlobalInfo(data->zip, &zipglobalinfo);
buf = (char*)malloc(buflen = UNZIP_FILENAME_BUFFER_STEP);
status = unzGoToFirstFile(data->zip);
// this loop is problematic
int fileindex = 0;
while (status == UNZ_OK) {
buf[buflen - 1] = 0;
unz_file_info* finfo = malloc(sizeof(unz_file_info));
// this was previously crashing because we were passing NULL as the second argument
while ((status = unzGetCurrentFileInfo(data->zip, finfo, buf, buflen, NULL, 0, NULL, 0)) == UNZ_OK && buf[buflen - 1] != 0) {
buflen += UNZIP_FILENAME_BUFFER_STEP;
buf = (char*)realloc(buf, buflen);
buf[buflen - 1] = 0;
}
free(finfo);
finfo = NULL;
if (status != UNZ_OK) {
break;
}
filename = XML_Char_dupchar(buf);
status = unzGoToNextFile(data->zip);
++fileindex;
// unz64_s* s = (unz64_s*)data->zip;
// ZPOS64_T currnumfile = s->num_file;
#else
zip_int64_t i;
zip_int64_t zipnumfiles = zip_get_num_entries(data->zip, 0);
for (i = 0; i < zipnumfiles; i++) {
filename = XML_Char_dupchar(zip_get_name(data->zip, i, ZIP_FL_ENC_GUESS));
#endif
filenamelen = XML_Char_len(filename);
// if the extension is a match
if (filenamelen > extensionlen && filename[filenamelen - extensionlen - 1] == '.' && XML_Char_icmp(filename + filenamelen - extensionlen, extension) == 0) {
// this call causes data->zip->num_file to revert to zero causing an endless loop?
data->filecallbackfn(data->zip, filename, contenttype, data->filecallbackdata);
// prevent the endless loop
int looperx = 0;
for( looperx = 0; looperx < fileindex && status == UNZ_OK; ++looperx ) {
status = unzGoToNextFile(data->zip);
}
}
free(filename);
}
#ifdef USE_MINIZIP
free(buf);
#endif
} // while (status == UNZ_OK)
}
}
}
If you could provide an .xlsx file that reproduces this issue I would be happy to look into this. Is this problem still present in version 0.2.19 ?
Can you tell me if this issue can still be reproduced with the latest version?