symqemu
symqemu copied to clipboard
atoi() is not solved
Example:
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
#define bail(msg, pos) \
while (1) { \
\
fprintf(stderr, "%s at %u\n", (char *)msg, (uint32_t)pos); \
return 0; \
\
}
int LLVMFuzzerTestOneInput(uint8_t *buf, size_t len) {
uint8_t buff[100];
if (len < 8) bail("too short", 0);
if (len > sizeof(buff)) bail("too long", sizeof(buff));
memcpy(buff, buf, len);
buff[sizeof(buff) - 1] = 0;
// string to int
if (atoi((char *)buff) != 66766) bail("wrong string", 0);
abort();
return 0;
}
int main(int argc, char **argv) {
unsigned char buf[64];
ssize_t len;
int fd = 0;
if (argc > 1) fd = open(argv[1], O_RDONLY);
if ((len = read(fd, buf, sizeof(buf))) <= 0) exit(0);
LLVMFuzzerTestOneInput(buf, len);
exit(0);
}
# gcc -o test -g test.c
# echo AAAAAAAAAAAAAAAAAAAAA|symqemu-x86_64 ./test
[STAT] SMT: { "solving_time": 0, "total_time": 65148 }
[STAT] SMT: { "solving_time": 4651 }
[INFO] New testcase: /tmp/output/000000
[...]
[INFO] New testcase: /tmp/output/000028-optimistic
[STAT] SMT: { "solving_time": 120114, "total_time": 560178 }
[STAT] SMT: { "solving_time": 121562 }
[STAT] SMT: { "solving_time": 121562, "total_time": 562343 }
[STAT] SMT: { "solving_time": 255014 }
but none of the 29 generated inputs contain the correct value:
# for i in /tmp/output/*; do hexdump -C $i;done|grep 00000000
00000000 be 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |.AAAAAAAAAAAAAAA|
00000000 00 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |.AAAAAAAAAAAAAAA|
00000000 2d 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |-AAAAAAAAAAAAAAA|
00000000 2b 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |+AAAAAAAAAAAAAAA|
00000000 30 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |0AAAAAAAAAAAAAAA|
00000000 00 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |.AAAAAAAAAAAAAAA|
00000000 30 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |0AAAAAAAAAAAAAAA|
00000000 be 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |.AAAAAAAAAAAAAAA|
00000000 be 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |.AAAAAAAAAAAAAAA|
00000000 41 41 41 41 41 41 41 41 80 4e 80 00 40 00 00 00 |AAAAAAAA.N..@...|
00000000 41 41 41 41 41 41 41 41 76 4e 80 00 40 00 00 00 |AAAAAAAAvN..@...|
00000000 41 41 41 41 41 41 41 41 2e 20 00 00 40 00 00 00 |AAAAAAAA. ..@...|
00000000 41 41 41 41 41 41 41 41 01 c8 a4 9d 18 00 00 00 |AAAAAAAA........|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 74 4e 80 00 40 00 00 00 |AAAAAAAAtN..@...|
00000000 41 41 41 41 41 41 41 41 74 4e 80 00 40 00 00 00 |AAAAAAAAtN..@...|
00000000 41 41 41 41 41 41 41 41 f9 1f 00 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 7f 2e 80 00 40 00 00 00 |AAAAAAAA....@...|
00000000 41 41 41 41 41 41 41 41 6f 4e 80 00 40 00 00 00 |AAAAAAAAoN..@...|
00000000 00 00 00 00 41 41 41 41 ee ff ff ff 41 41 41 41 |....AAAA....AAAA|
00000000 00 00 00 00 41 41 41 41 ee ff ff ff 41 41 41 41 |....AAAA....AAAA|
00000000 41 00 00 00 41 41 41 41 80 2e 80 00 40 00 00 00 |A...AAAA....@...|
00000000 00 00 00 00 41 41 41 41 ee ff ff ff 41 41 41 41 |....AAAA....AAAA|
why is this the case?
Probably atoi
decodes in a loop; concolic execution then can't reason about the loop as a whole (i.e., as an abstract construct with many possible paths through it), but instead sees only the check of the loop condition in each iteration. It will generate a deviating input, but after that the outcome of the check becomes part of the path constraints and isn't reasoned about any further.