python-hyperscan
python-hyperscan copied to clipboard
How to match an exact string with hyperscan like with re.findall
from typing import Optional, Any, List
import hyperscan
def on_match(id: int, start: int, end: int, flags: int, context: Optional[Any] = None) -> Optional[bool]:
context['results'].append((id, start, end))
return 0
db = hyperscan.Database()
patterns = (
# expression, id, flags
(br'O+M', 0, hyperscan.HS_FLAG_CASELESS|hyperscan.HS_FLAG_SOM_LEFTMOST),
)
expressions, ids, flags = zip(*patterns)
db.compile(
expressions=expressions, ids=ids, elements=len(patterns), flags=flags
)
lines = ['Om', 'OOm', 'oom', 'sroom', 'communication', 'surveillance']
context = {'results': []}
text = str.encode("\n".join(lines))
print(text)
db.scan(text, match_event_handler=on_match, context=context)
for result in context['results']:
print(result)
(0, 0, 2) ->Om
(0, 3, 6) -> OOm
(0, 7, 10) -> oom
(0, 13, 16) ->oom (in sroom)
(0, 18, 20)->om (in communication)
with re.findall()
re.findall(rb'O+M', text, flags=re.IGNORECASE)
[b'Om', b'OOm', b'oom', b'oom']