hyperscan
hyperscan copied to clipboard
Literal matching unknown error
Reproducible example
#undef NDEBUG
#include <hs.h>
#include <cassert>
/*
Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
Version: 5.4.0
*/
int on_match(unsigned int id,
unsigned long long from,
unsigned long long to,
unsigned int /* flags */,
void * context) {
return 0;
}
int main() {
hs_database_t *db = nullptr;
hs_compile_error_t *compile_err = nullptr;
const char* data = "\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
const size_t size = 43;
const char *expr[] = {
"\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
"\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SOM_LEFTMOST, 0};
unsigned ids[] = {0, 1, 2, 3, 4};
size_t lens[] = {46, 12, 10, 41, 45};
hs_error_t err = hs_compile_lit_multi(expr, flags, ids, lens, sizeof(ids)/sizeof(*ids), HS_MODE_BLOCK, nullptr, &db, &compile_err);
assert(HS_SUCCESS == err);
assert(db != nullptr);
hs_scratch_t *scratch = nullptr;
err = hs_alloc_scratch(db, &scratch);
assert(HS_SUCCESS == err);
assert(scratch != nullptr);
err = hs_scan(db, data, size, 0, scratch, on_match, nullptr);
assert(HS_UNKNOWN_ERROR == err); // Got
assert(HS_SUCCESS == err); // Expected
hs_free_database(db);
err = hs_free_scratch(scratch);
assert(HS_SUCCESS == err);
}
In case you need inputs without zero bytes
// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack:
const char* data = "\xf0\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x76\x0d";
const size_t size = 49;
// Patterns:
const char *expr[] = {
"\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
"\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
"\xff\xff\xff",
};
unsigned ids[] = {0, 1, 2, 3};
size_t lens[] = {44, 45, 40, 3};
unsigned flags[] = {HS_FLAG_CASELESS, 0, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS};
// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack:
const char* data = "\x47\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20";
const size_t size = 49;
// Patterns:
const char *expr[] = {
"\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
"\x29\x1d\x47\x47\x01\x01\x01\x01\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
"\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20",
"\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
"\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
};
unsigned ids[] = {0, 1, 2, 3, 4};
size_t lens[] = {46, 32, 14, 28, 47};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SOM_LEFTMOST};
The problem is in switch in roseRunProgram_l
For ROSE_INSTR_TRIGGER_SUFFIX
// Haystack:
const char* data = "\xcb\xcb\xcb\xcb\x00\x00\x00\x00\x00\x00\x00\x00\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0";
const size_t size = 35;
// Patterns:
const char *expr[] = {
"\x61",
"\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0",
};
unsigned ids[] = {0, 1};
size_t lens[] = {1, 38};
unsigned flags[] = {HS_FLAG_SOM_LEFTMOST, 0};
For ROSE_INSTR_CATCH_UP_MPV
// Haystack:
const char* data = "\x0a\x41\x61\x61\xec\xf2\x41\x41";
const size_t size = 8;
// Patterns:
const char *expr[] = {
"\x61",
"\x19\x24\x03\x00\x41",
"\x41\x00\x28\x41\x41\x41\x41\x41\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79",
};
unsigned ids[] = {0, 1, 2};
size_t lens[] = {1, 5, 48};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH};
Will try to give a fix in next release.
One more thing. Streams do not deallocate memory on hs_close_stream if UNKNOWN_ERROR was returned
HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
if (!id) {
return HS_INVALID;
}
if (onEvent) {
if (!scratch || !validScratch(id->rose, scratch)) {
return HS_INVALID;
}
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
report_eod_matches(id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch); // hs_stream_free?
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
}
hs_stream_free(id);
return HS_SUCCESS;
}
Like, there is no way to close stream without memory leak
Please refer to latest develop branch for literal matching unknown error. We'll look into the memory deallocation issue then.
The last memory deallocation issue will be fixed in develop branch soon,
Please refer to latest develop branch. Commit id: 85f68b874bd407df50fe7acc8599f4c65b2070dd