python-hyperscan icon indicating copy to clipboard operation
python-hyperscan copied to clipboard

How to match an exact string with hyperscan like with re.findall

Open rafikg opened this issue 3 years ago • 0 comments

from typing import Optional, Any, List
import hyperscan


def on_match(id: int, start: int, end: int, flags: int, context: Optional[Any] = None) -> Optional[bool]:
    context['results'].append((id, start, end))
    return 0


db = hyperscan.Database()
patterns = (
    # expression,  id, flags
    (br'O+M',      0,  hyperscan.HS_FLAG_CASELESS|hyperscan.HS_FLAG_SOM_LEFTMOST),
)
expressions, ids, flags = zip(*patterns)
db.compile(
    expressions=expressions, ids=ids, elements=len(patterns), flags=flags
)
lines = ['Om', 'OOm', 'oom', 'sroom', 'communication', 'surveillance']

context = {'results': []}

text = str.encode("\n".join(lines))
print(text)

db.scan(text, match_event_handler=on_match, context=context)

for result in context['results']:
    print(result)

(0, 0, 2) ->Om
(0, 3, 6) -> OOm
(0, 7, 10) -> oom
(0, 13, 16) ->oom (in sroom)
(0, 18, 20)->om (in communication)

with re.findall()

re.findall(rb'O+M', text, flags=re.IGNORECASE)
[b'Om', b'OOm', b'oom', b'oom']

rafikg avatar Jun 09 '22 13:06 rafikg