Improve YARA plugin
This PR aims to improve the YARA plugin.
dissect.target.plugins.filesystem.yarais now anInternalPlugintarget-yarais now a command which calls thefilesystem.yaraplugin- it is now possible to run compiled YARA rules with the YARA plugin
- provided rules can be compiled together which improves scan speed
- provided rules can be checked on validity before compiling them together
- arguments aim to be fully backwards compatible
target-query -f yarawill no longer work as it is replaced bytarget-yara- added tests for
filesystem.yaraandtarget-yara
Can you also elaborate the reasoning for moving this towards target-yara and removing the functionality from target-query? On the one hand I can understand that it's nice to be able to run target-yara, but I don't understand the reasoning for completely walling it off from target-query. Is it a design choice?
target.yara() is still accessible programmatically and for cli invocations we now have target-yara. It's a design choice. Initially both still worked, but with a very elaborate method on defining argparse arguments. Decided against that and now we're here.
~Is YARA-X integration welcome in this PR, or should we wait until this has been merged in main?~ We will push this to a separate PR.
It seems like that was all feedback, apologies for the delay @Schamper. Is this good to go now?
Apologies for the long delay, a lot got in the way :wink:. Hope we can collaborate fruitfully to get this merged quickly.
I didn't want to push to the branch directly as to get your opinion on it first, but with this patch it seems that this can happily co-exist as both target-yara and target-query -f yara (along with some other minor changes). I don't think this method of defining arguments is overly elaborate as you first pointed out.
diff --git a/dissect/target/plugins/filesystem/yara.py b/dissect/target/plugins/filesystem/yara.py
index d934d88..77e675b 100644
--- a/dissect/target/plugins/filesystem/yara.py
+++ b/dissect/target/plugins/filesystem/yara.py
@@ -1,8 +1,8 @@
+import hashlib
import logging
-from hashlib import md5
from io import BytesIO
from pathlib import Path
-from typing import Iterator, Optional
+from typing import Iterator
from dissect.target.helpers import hashutil
@@ -16,7 +16,7 @@ except ImportError:
from dissect.target.exceptions import FileNotFoundError, UnsupportedPluginError
from dissect.target.helpers.record import TargetRecordDescriptor
-from dissect.target.plugin import InternalPlugin
+from dissect.target.plugin import Plugin, arg, export
log = logging.getLogger(__name__)
@@ -34,13 +34,18 @@ YaraMatchRecord = TargetRecordDescriptor(
DEFAULT_MAX_SCAN_SIZE = 10 * 1024 * 1024
-class YaraPlugin(InternalPlugin):
+class YaraPlugin(Plugin):
"""Plugin to scan files against a local YARA rules file."""
def check_compatible(self) -> None:
if not HAS_YARA:
raise UnsupportedPluginError("Please install 'yara-python' to use the yara plugin.")
+ @arg("-r", "--rules", required=True, nargs="*", help="path(s) to YARA rule file(s) or folder(s)")
+ @arg("-p", "--path", default="/", help="path on target(s) to recursively scan")
+ @arg("-m", "--max-size", default=DEFAULT_MAX_SCAN_SIZE, help="maximum file size in bytes to scan")
+ @arg("-c", "--check", default=False, action="store_true", help="check if every YARA rule is valid")
+ @export(record=YaraMatchRecord)
def yara(
self,
rules: list[str | Path],
@@ -82,11 +87,11 @@ class YaraPlugin(InternalPlugin):
)
continue
- file_content = file.open().read()
- for match in compiled_rules.match(data=file_content):
+ buf = file.open().read()
+ for match in compiled_rules.match(data=buf):
yield YaraMatchRecord(
path=self.target.fs.path(file.path),
- digest=hashutil.common(BytesIO(file_content)),
+ digest=hashutil.common(BytesIO(buf)),
rule=match.rule,
tags=match.tags,
namespace=match.namespace,
@@ -102,7 +107,7 @@ class YaraPlugin(InternalPlugin):
self.target.log.debug("", exc_info=e)
-def process_rules(paths: list[str | Path], check: bool = False) -> Optional[yara.Rules]:
+def process_rules(paths: list[str | Path], check: bool = False) -> yara.Rules | None:
"""Generate compiled YARA rules from the given path(s).
Provide path to one (compiled) YARA file or directory containing YARA files.
@@ -153,14 +158,14 @@ def process_rules(paths: list[str | Path], check: bool = False) -> Optional[yara
if files and not compiled_rules:
try:
- compiled_rules = compile_yara({md5(file.as_posix().encode("utf-8")).digest().hex(): file for file in files})
+ compiled_rules = compile_yara({hashlib.md5(file.as_posix().encode()).hexdigest(): file for file in files})
except yara.Error as e:
log.error("Failed to compile YARA file(s): %s", e)
return compiled_rules
-def compile_yara(files: dict[str, Path] | Path, is_compiled: bool = False) -> Optional[yara.Rules]:
+def compile_yara(files: dict[str, Path] | Path, is_compiled: bool = False) -> yara.Rules | None:
"""Compile or load the given YARA file(s) to rules."""
if is_compiled and isinstance(files, Path):
return yara.load(files.as_posix())
diff --git a/dissect/target/tools/yara.py b/dissect/target/tools/yara.py
index d008b6d..5c21cb6 100755
--- a/dissect/target/tools/yara.py
+++ b/dissect/target/tools/yara.py
@@ -5,7 +5,7 @@ import logging
from dissect.target import Target
from dissect.target.exceptions import TargetError
-from dissect.target.plugins.filesystem.yara import DEFAULT_MAX_SCAN_SIZE, HAS_YARA
+from dissect.target.plugins.filesystem.yara import HAS_YARA, YaraPlugin
from dissect.target.tools.query import record_output
from dissect.target.tools.utils import (
catch_sigpipe,
@@ -26,11 +26,11 @@ def main():
)
parser.add_argument("targets", metavar="TARGETS", nargs="*", help="Targets to load")
- parser.add_argument("-r", "--rules", required=True, nargs="*", help="path(s) to YARA rule file(s) or folder(s)")
- parser.add_argument("-p", "--path", default="/", help="path on target(s) to recursively scan")
- parser.add_argument("-m", "--max-size", default=DEFAULT_MAX_SCAN_SIZE, help="maximum file size in bytes to scan")
- parser.add_argument("-c", "--check", default=False, action="store_true", help="check if every YARA rule is valid")
parser.add_argument("-s", "--strings", default=False, action="store_true", help="print output as string")
+
+ for args, kwargs in getattr(YaraPlugin.yara, "__args__", []):
+ parser.add_argument(*args, **kwargs)
+
configure_generic_arguments(parser)
args = parser.parse_args()
diff --git a/tests/tools/test_yara.py b/tests/tools/test_yara.py
index f18caac..4264a10 100644
--- a/tests/tools/test_yara.py
+++ b/tests/tools/test_yara.py
@@ -11,7 +11,7 @@ from tests._utils import absolute_path
@pytest.mark.skipif(not HAS_YARA, reason="requires python-yara")
-def test_yara(target_default: Target, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture):
+def test_yara(target_default: Target, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture) -> None:
vfs = VirtualFilesystem()
vfs.map_file_fh("test_file", BytesIO(b"hello there this is a test string!"))
vfs.map_file_fh("/test/dir/to/test_file", BytesIO(b"this is another test string for YARA testing."))
Let me know what you think.
+ for args, kwargs in getattr(YaraPlugin.yara, "__args__", []):
+ parser.add_argument(*args, **kwargs)
That's actually a very neat solution, thanks!
We've taken the liberty to add your patch as a commit to the PR :smile:
Codecov Report
Attention: Patch coverage is 80.99174% with 23 lines in your changes missing coverage. Please review.
Project coverage is 75.21%. Comparing base (
20496fb) to head (ff560e9).
| Files | Patch % | Lines |
|---|---|---|
| dissect/target/plugins/filesystem/yara.py | 83.33% | 14 Missing :warning: |
| dissect/target/tools/yara.py | 75.67% | 9 Missing :warning: |
Additional details and impacted files
@@ Coverage Diff @@
## main #646 +/- ##
==========================================
+ Coverage 75.17% 75.21% +0.04%
==========================================
Files 295 296 +1
Lines 25318 25422 +104
==========================================
+ Hits 19032 19121 +89
- Misses 6286 6301 +15
| Flag | Coverage Δ | |
|---|---|---|
| unittests | 75.21% <80.99%> (+0.04%) |
:arrow_up: |
Flags with carried forward coverage won't be shown. Click here to find out more.
:umbrella: View full report in Codecov by Sentry.
:loudspeaker: Have feedback on the report? Share it here.
That seems to have done the trick :wink: