hyperscan icon indicating copy to clipboard operation
hyperscan copied to clipboard

Literal matching unknown error

Open danlark1 opened this issue 4 years ago • 5 comments

Reproducible example

#undef NDEBUG
#include <hs.h>
#include <cassert>

/*
Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
Version: 5.4.0
*/

int on_match(unsigned int id,
             unsigned long long from,
             unsigned long long to,
             unsigned int /* flags */,
             void * context) {
    return 0;
}

int main() {
    hs_database_t *db = nullptr;
    hs_compile_error_t *compile_err = nullptr;
    const char* data = "\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
    const size_t size = 43;
    const char *expr[] = {
        "\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x2a\xed\xe2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    };
    unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_SOM_LEFTMOST, 0};
    unsigned ids[] = {0, 1, 2, 3, 4};
    size_t lens[] = {46, 12, 10, 41, 45};
    hs_error_t err = hs_compile_lit_multi(expr, flags, ids, lens, sizeof(ids)/sizeof(*ids), HS_MODE_BLOCK, nullptr, &db, &compile_err);

    assert(HS_SUCCESS == err);
    assert(db != nullptr);

    hs_scratch_t *scratch = nullptr;
    err = hs_alloc_scratch(db, &scratch);
    assert(HS_SUCCESS == err);
    assert(scratch != nullptr);

    err = hs_scan(db, data, size, 0, scratch, on_match, nullptr);
    assert(HS_UNKNOWN_ERROR == err); // Got
    assert(HS_SUCCESS == err); // Expected

    hs_free_database(db);
    err = hs_free_scratch(scratch);
    assert(HS_SUCCESS == err);
}

danlark1 avatar Feb 27 '21 17:02 danlark1

In case you need inputs without zero bytes

// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack: 
const char* data = "\xf0\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x76\x0d";
const size_t size = 49;
// Patterns: 
const char *expr[] = {
    "\x66\x01\x76\x01\x01\x01\x76\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff",
    "\x01\x01\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\xff\xff\xff",
};
unsigned ids[] = {0, 1, 2, 3};
size_t lens[] = {44, 45, 40, 3};
unsigned flags[] = {HS_FLAG_CASELESS, 0, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS};
// Explanation: Failed to scan with hyperscan: HS_UNKNOWN_ERROR
// Haystack: 
const char* data = "\x47\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20";
const size_t size = 49;
// Patterns: 
const char *expr[] = {
    "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\x29\x1d\x47\x47\x01\x01\x01\x01\x73\x6f\x6d\x65\x20\x6e\x6f\x6e\x20\x65\x78\x69\x73\x74\x69\x6e\x67\x20\x73\x74\x72\x69\x6e\x67",
    "\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20",
    "\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
    "\x47\x2c\x47\x29\x1d\x47\x47\x01\x01\x01\x01\x01\x01\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20",
};
unsigned ids[] = {0, 1, 2, 3, 4};
size_t lens[] = {46, 32, 14, 28, 47};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH, HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS|HS_FLAG_SOM_LEFTMOST};

danlark1 avatar Feb 27 '21 18:02 danlark1

The problem is in switch in roseRunProgram_l

For ROSE_INSTR_TRIGGER_SUFFIX

// Haystack: 
const char* data = "\xcb\xcb\xcb\xcb\x00\x00\x00\x00\x00\x00\x00\x00\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xcb\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0";
const size_t size = 35;
// Patterns: 
const char *expr[] = {
    "\x61",
    "\xbf\xff\xff\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0\xc0",
};
unsigned ids[] = {0, 1};
size_t lens[] = {1, 38};
unsigned flags[] = {HS_FLAG_SOM_LEFTMOST, 0};

For ROSE_INSTR_CATCH_UP_MPV

// Haystack: 
const char* data = "\x0a\x41\x61\x61\xec\xf2\x41\x41";
const size_t size = 8;
// Patterns: 
const char *expr[] = {
    "\x61",
    "\x19\x24\x03\x00\x41",
    "\x41\x00\x28\x41\x41\x41\x41\x41\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79\x79",
};
unsigned ids[] = {0, 1, 2};
size_t lens[] = {1, 5, 48};
unsigned flags[] = {HS_FLAG_SINGLEMATCH, HS_FLAG_CASELESS, HS_FLAG_CASELESS|HS_FLAG_SINGLEMATCH};

danlark1 avatar Mar 01 '21 16:03 danlark1

Will try to give a fix in next release.

Nor7th avatar Mar 09 '21 11:03 Nor7th

One more thing. Streams do not deallocate memory on hs_close_stream if UNKNOWN_ERROR was returned

HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
                                    match_event_handler onEvent,
                                    void *context) {
    if (!id) {
        return HS_INVALID;
    }

    if (onEvent) {
        if (!scratch || !validScratch(id->rose, scratch)) {
            return HS_INVALID;
        }
        if (unlikely(markScratchInUse(scratch))) {
            return HS_SCRATCH_IN_USE;
        }
        report_eod_matches(id, scratch, onEvent, context);
        if (unlikely(internal_matching_error(scratch))) {
            unmarkScratchInUse(scratch); // hs_stream_free?
            return HS_UNKNOWN_ERROR;
        }
        unmarkScratchInUse(scratch);
    }

    hs_stream_free(id);

    return HS_SUCCESS;
}

Like, there is no way to close stream without memory leak

danlark1 avatar Jun 19 '21 19:06 danlark1

Please refer to latest develop branch for literal matching unknown error. We'll look into the memory deallocation issue then.

hongyang7 avatar Jun 01 '22 10:06 hongyang7

The last memory deallocation issue will be fixed in develop branch soon,

hongyang7 avatar Oct 26 '22 14:10 hongyang7

Please refer to latest develop branch. Commit id: 85f68b874bd407df50fe7acc8599f4c65b2070dd

hongyang7 avatar Oct 27 '22 18:10 hongyang7