driller
driller copied to clipboard
Testing driller with a program that reads a file
Hi, all. I want to test the driller with a program that reads a file, like djpeg. First, I want to test a very simple example,
#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc, char** argv) {
int fd = open(argv[1], O_RDONLY);
int input = 0;
read(fd, &input, sizeof(input));
if (input == 0xdeadbeef)
printf("Good");
close(fd);
}
To do it, I modified the following line to get fs, and argv.
s = p.factory.tracer_state(input_content=self.input, magic_content=r.magic, args=self.argv, fs=self._fs)
And run the driller as follows
input_file = 'sample/input.bin'
binary = 'sample/main'
with open(input_file, 'rb') as f:
inputs = f.read()
fs = {input_file: simuvex.storage.file.SimFile(input_file, "rb", size=os.path.getsize(input_file))}
d = driller.Driller(binary, inputs, argv=[binary, input_file], fs=fs)
for drilled in d.drill_generator():
print(drilled)
The input_file
is just "AAAA". But unfortunately, I couldn't get 0xdeadbeef.
Driller generates a testcase, but it was just empty string.
I checked that open() returns SimFile.
Could you let me know where do I have to take a look?
Thanks.
Driller is heavily hardcoded to use stdin. You'll have to find in tracer where it preconstrains the input and make it use the file instead. And you'll have to look at where driller dumps inputs, because it's also just dumping stdin. It will definitely require some work to change from using stdin to using an input file.
The other option, if it doesn't use stdin. Is to hook/change the file reads so they read from stdin instead. Then you can use driller as is
Thanks. Let me try the former way, and if fail, then I will try the latter. I am worried that testing programs could have file-related calls such as fseek or ftell.
I want to do exactly that, have you managed to do it? @jakkdu
Hi @agarciagonzalez. I ran a program with /dev/stdin, and pass the input through stdin. It seems working and generates some testcases, but it seems that it couldn't generate many testcases. I stop there and stop to debug it.
@agarciagonzalez
FYI, this is my code for reading file input using driller.
This uses kinda bug in the angr that it treats /dev/stdin as a normal file, and it lets us do file operations
such as lseek
.
@salls Do you think it will be fine?
I didn't change any core part of driller, but the qemu part(to get file as input), and use /dev/stdin if we use angr.
import os
import copy
import logging
import angr
import driller
import tracer
l = logging.getLogger("driller.driller_file")
# logging.getLogger("angr.state_plugins.posix").setLevel(logging.DEBUG)
# commit: c536408e9d70b8b0743db55efb9aa2e7e96c5601
# NOTE: The file path should be absolute path!
# argv should be ['./binary', '@/tmp/input_file.txt']
def patch_argv_qemu(argv):
patched_argv = copy.copy(argv)
index = -1
for i, arg in enumerate(argv):
if arg.startswith("@"):
patched_argv[i] = argv[i][1:]
assert(index == -1)
index = i
return patched_argv
def patch_argv_angr(argv):
patched_argv = copy.copy(argv)
index = -1
for i, arg in enumerate(argv):
if arg.startswith("@"):
patched_argv[i] = "/dev/stdin"
assert(index == -1)
index = i
return patched_argv
class DrillerFile(driller.Driller):
def _parse_size(self):
input_file = None
for i, arg in enumerate(self.argv):
if arg.startswith("@"):
assert(input_file is None)
input_file = self.argv[i][1:]
return os.path.getsize(input_file)
def _drill_input(self):
"""
Symbolically step down a path with a tracer, trying to concretize inputs for unencountered
state transitions.
"""
# initialize the tracer
r = tracer.qemu_runner.QEMURunner(self.binary, self.input, argv=patch_argv_qemu(self.argv))
p = angr.Project(self.binary)
for addr, proc in self._hooks.items():
p.hook(addr, proc)
l.debug("Hooking %#x -> %s...", addr, proc.display_name)
if p.loader.main_object.os == 'cgc':
p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])
files = {'/dev/stdin': angr.storage.file.SimFile("/dev/stdin", "r", size=self._parse_size())}
s = p.factory.tracer_state(input_content=self.input, magic_content=r.magic, args=patch_argv_angr(self.argv), fs=files)
simgr = p.factory.simgr(s, save_unsat=True, hierarchy=False, save_unconstrained=r.crash_mode)
t = angr.exploration_techniques.Tracer(trace=r.trace)
c = angr.exploration_techniques.CrashMonitor(trace=r.trace, crash_mode=r.crash_mode, crash_addr=r.crash_addr)
self._core = angr.exploration_techniques.DrillerCore(trace=r.trace)
simgr.use_technique(c)
simgr.use_technique(t)
simgr.use_technique(angr.exploration_techniques.Oppologist())
simgr.use_technique(self._core)
self._set_concretization s(simgr.one_active)
l.debug("Drilling into %r.", self.input)
l.debug("Input is %r.", self.input)
while simgr.active and simgr.one_active.globals['bb_cnt'] < len(r.trace):
simgr.step()
# Check here to see if a crash has been found.
if self.redis and self.redis.sismember(self.identifier + '-finished', True):
return
if 'diverted' not in simgr.stashes:
continue
while simgr.diverted:
state = simgr.diverted.pop(0)
l.debug("Found a diverted state, exploring to some extent.")
w = self._writeout(state.history.bbl_addrs[-1], state)
if w is not None:
yield w
for i in self._symbolic_explorer_stub(state):
yield i
print(r.trace, simgr.one_active.globals['bb_cnt'])
hi @jakkdu
you try the former way provided by the owner,salls?
and I have another problem,why change the qemu part(to get file as input), but use /dev/stdin if we use angr? could you explain the connection between “/dev/stdin" and input file?
thanks and looking forward to your reply.
-
I changed qemu part to make qemu to execute a program with a given input. So, the qemu will get correct concrete path that if a program gets the input.
-
I changed angr to get /dev/stdin because current driller is dedicatedly design for stdin. The trick is that (I think its kinda bug) angr allows file operations even we open /dev/stdin If you see seek in posix.py, it only checks whether the file descriptor is 0,1,2. But if you open /dev/stdin, it will be more than that, but still stdin. So we can use file operations with stdin.
So what the above script does is that it connects /dev/stdin of angr and input file of concrete execution both to use driller feature (which is designed for stdin) and to get correct path for concrete execution.
Hope this can help you. Thanks.
@jakkdu I use the above script to test the above example, but the can not generate any testcases..and..give the following warnnings :
/Desktop/driller/driller-master/tests$ python test_driller.py drilling_file
WARNING | 2018-04-19 04:22:18,639 | angr.analyses.disassembly_utils | Your verison of capstone does not support MIPS instruction groups.
DEBUG | 2018-04-19 04:22:18,651 | driller.driller | [test] drilling started on Thu Apr 19 04:22:18 2018.
WARNING | 2018-04-19 04:22:18,651 | driller.driller | Debug directory is not set. Will not log fuzzing bitmap.
WARNING | 2018-04-19 04:22:20,264 | angr.simos.linux | Tracer has been heavily tested only for CGC. If you find it buggy for Linux binaries, we are sorry!
WARNING | 2018-04-19 04:22:21,597 | angr.exploration_techniques.tracer | Unable to correct discrepancy between qemu and angr.
DEBUG | 2018-04-19 04:22:21,648 | driller.driller | [test] dumping input for 0x4006c6 -> 0x4006d0.
DEBUG | 2018-04-19 04:22:21,648 | driller.driller | Generated: 41414141
DEBUG | 2018-04-19 04:22:22,475 | driller.driller | [test] started symbolic exploration at Thu Apr 19 04:22:22 2018.
DEBUG | 2018-04-19 04:22:44,076 | driller.driller | [test] stopped symbolic exploration at Thu Apr 19 04:22:44 2018.
WARNING | 2018-04-19 04:22:44,133 | angr.exploration_techniques.tracer | Unable to correct discrepancy between qemu and angr.
** the test example**
`#include <fcntl.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
int main(int argc, char** argv) {
int fd = open(argv[1], O_RDONLY);
int input = 0;
read(fd, &input, sizeof(input));
if (input == 0xdeadbeef)
printf("Good");
close(fd);
}`
the input file contains "AAAA"
run the driller as follows
def test_drilling_file():
"""
Test drilling on the cgc binary, palindrome.
"""
binary = "sample/test"
input_file = "sample/input.bin"
with open(input_file, 'rb') as f:
inputs = f.read()
# fuzzbitmap says every transition is worth satisfying.
d = driller.DrillerFile(binary, inputs, argv=[binary, "@sample/input.bin"])
new_inputs = d.drill()
could you give me some advice?@jakkdu
Sorry, I forgot to mention that the file path should be absolute path.
Could you try with the absolute path?
i.e., @sample/input.bin
--> @/tmp/sample/input.bin
@xianghaohyman I saw another comment from you, but seems removed. Is it working now?
@jakkdu it works , thanks for your advice
Because of the version update of Angr, It's a pity that the scripts mentioned above cannot suit for current environment anymore. :(
For example, function tracer_state
is not supported by Angr, and so is the class CrashMode
.
I tried to replace the original out-of-date APIs with currently avaliable ones:
class DrillerFile(Driller):
def _parse_size(self):
input_file = None
for i, arg in enumerate(self.argv):
if arg.startswith("@"):
assert(input_file is None)
input_file = self.argv[i][1:]
return os.path.getsize(input_file)
def _writeout(self, prev_addr, state):
generated = state.fs.get("/dev/stdin").concretize() # state.posix.stdin.load(0, state.posix.stdin.pos)
key = (len(generated), prev_addr, state.addr)
# Checks here to see if the generation is worth writing to disk.
# If we generate too many inputs which are not really different we'll seriously slow down AFL.
if self._in_catalogue(*key):
self._core.encounters.remove((prev_addr, state.addr))
return None
else:
self._add_to_catalogue(*key)
l.debug("[%s] dumping input for %#x -> %#x.", self.identifier, prev_addr, state.addr)
self._generated.add((key, generated))
if self.redis:
# Publish it out in real-time so that inputs get there immediately.
channel = self.identifier + '-generated'
self.redis.publish(channel, pickle.dumps({'meta': key, 'data': generated, "tag": self.tag}))
else:
l.debug("Generated: %s", binascii.hexlify(generated))
return (key, generated)
def _drill_input(self):
"""
Symbolically step down a path with a tracer, trying to concretize inputs for unencountered
state transitions.
"""
# initialize the tracer
r = tracer.qemu_runner.QEMURunner(self.binary, self.input, argv=patch_argv_qemu(self.argv))
p = angr.Project(self.binary)
for addr, proc in self._hooks.items():
p.hook(addr, proc)
l.debug("Hooking %#x -> %s...", addr, proc.display_name)
if p.loader.main_object.os == 'cgc':
p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])
files = {'/dev/stdin': angr.storage.file.SimFile("/dev/stdin", "r", size=self._parse_size())}
if p.loader.main_object.os == 'cgc':
p.simos.syscall_library.update(angr.SIM_LIBRARIES['cgcabi_tracer'])
s = p.factory.entry_state(stdin=angr.SimFileStream, flag_page=r.magic, mode='tracing')
else:
s = p.factory.entry_state(args=patch_argv_angr(self.argv), fs=files)
# s = p.factory.full_init_state(args=patch_argv_angr(self.argv), fs=files)
s.preconstrainer.preconstrain_file(self.input, s.posix.stdin, True)
simgr = p.factory.simgr(s, save_unsat=True, hierarchy=False, save_unconstrained=r.crash_mode)
t = angr.exploration_techniques.Tracer(trace=r.trace, crash_addr=r.crash_addr, copy_states=True, follow_unsat=True, mode="permissive")
self._core = angr.exploration_techniques.DrillerCore(trace=r.trace, fuzz_bitmap=self.fuzz_bitmap)
simgr.use_technique(t)
simgr.use_technique(angr.exploration_techniques.Oppologist())
simgr.use_technique(self._core)
self._set_concretizations(simgr.one_active)
l.debug("Drilling into %r.", self.input)
l.debug("Input is %r.", self.input)
while simgr.active and simgr.one_active.globals['trace_idx'] < len(r.trace) - 1:
simgr.step()
# Check here to see if a crash has been found.
if self.redis and self.redis.sismember(self.identifier + '-finished', True):
return
if 'diverted' not in simgr.stashes:
continue
while simgr.diverted:
state = simgr.diverted.pop(0)
l.debug("Found a diverted state, exploring to some extent.")
w = self._writeout(state.history.bbl_addrs[-1], state)
if w is not None:
yield w
for i in self._symbolic_explorer_stub(state):
yield i
It works fine for this simple test program:
/// echo -e "\x00\x00\x00\x00" > input
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main(int argc, char **argv) {
int x;
printf("ready to open %s\n", argv[1]);
int fd = open(argv[1], O_RDONLY);
if (fd < 0) {
printf("GG\n");
return 0;
}
read(fd, &x, 4);
close(fd);
if (x > 60000) printf("xxx\n");
else printf("YYY\n");
return 0;
}