Medium Level IL SSA
Version and Platform (required):
- Binary Ninja Version: 4.1.5747
- OS: Ubuntu Linux
- OS Version: Ubuntu 22.04.2 LTS
- CPU Architecture: x64
Bug Description: When tracking a variable through the medium level IL, there is no clear path from when a variable is initialized to when its used in a function call.
Steps To Reproduce: Please provide all steps required to reproduce the behavior:
- Create a file
command.cand enter the following codes:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdarg.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#define PORT 8080
#define CMD_MAX 1024
void do_vuln(char* filename) {
printf(filename); // format string bug should be detected
FILE *f = fopen(filename,"rb");
fseek(f, 0, SEEK_END);
size_t fs = ftell(f);
fseek(f, 0, SEEK_SET);
char *command = malloc(0x100);
fread(command, 1, fs, f);
system(command);
strcpy(command, filename);
system(command);
free(command);
}
int main(int argc, char* argv[]) {
int sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock < 0) return EXIT_FAILURE;
struct sockaddr_in client_addr;
client_addr.sin_family = AF_INET;
client_addr.sin_port = htons(PORT);
int sockfd = connect(sock, (struct sockaddr*)&client_addr, sizeof(client_addr));
if (sockfd < 0) return EXIT_FAILURE;
char src[0x10] = { 0 };
recv(sockfd, &src, sizeof(src), MSG_WAITALL);
do_vuln(src);
return 0;
}
- Compile this on an x64 platform
- Open the executable in binary ninja and open view the MLIL interface.
- From the MLIL view below, you'll notice there's actually no clear path from when contents are
arg1#0written tor0_6#9through the fread operation to when system command is executed at address00000834.
000007a8 int32_t do_vuln(char* arg1)
0 @ 000007b4 var_1c#1 = arg1#0
1 @ 000007b8 r0#1 = var_1c#1
2 @ 000007bc mem#1 = printf(r0#1) @ mem#0
3 @ 000007cc r0_1#2 = var_1c#1
4 @ 000007d0 fp#3, mem#2 = fopen(filename: r0_1#2, mode: &data_a54) @ mem#1
5 @ 000007d4 r3#1 = fp#3
6 @ 000007d8 var_c#1 = r3#1
7 @ 000007e4 r0_2#4 = var_c#1
8 @ 000007e8 mem#3 = fseek(fp: r0_2#4, offset: 0, whence: 2) @ mem#2
9 @ 000007ec r0_3#5 = var_c#1
10 @ 000007f0 count#6, mem#4 = ftell(fp: r0_3#5) @ mem#3
11 @ 000007f4 r3_1#2 = count#6
12 @ 000007f8 var_10#1 = r3_1#2
13 @ 00000804 r0_4#7 = var_c#1
14 @ 00000808 mem#5 = fseek(fp: r0_4#7, offset: 0, whence: 0) @ mem#4
15 @ 00000810 r0_5#8, mem#6 = malloc(bytes: 0x100) @ mem#5
16 @ 00000814 r3_2#3 = r0_5#8
17 @ 00000818 var_14#1 = r3_2#3
18 @ 0000081c r3_3#4 = var_c#1
19 @ 00000820 r2#1 = var_10#1
20 @ 00000828 r0_6#9 = var_14#1
21 @ 0000082c mem#7 = fread(buf: r0_6#9, size: 1, count: r2#1, fp: r3_3#4) @ mem#6
22 @ 00000830 r0_7#10 = var_14#1
23 @ 00000834 r2_1#2, r3_4#5, mem#8 = system(line: r0_7#10) @ mem#7
24 @ 00000838 r1#1 = var_1c#1
25 @ 0000083c r0_8#11 = var_14#1
26 @ 00000840 mem#9 = strcpy(dest: r0_8#11, src: r1#1, src: r2_1#2, dest: r3_4#5) @ mem#8
27 @ 00000844 r0_9#12 = var_14#1
28 @ 00000848 r1_1#2, r2_2#3, r3_5#6, mem#10 = system(line: r0_9#12) @ mem#9
29 @ 0000084c r0_10#13 = var_14#1
30 @ 00000850 r0_11#14, mem#11 = free(mem: r0_10#13, r1_1#2, r2_2#3, r3_5#6) @ mem#10
31 @ 00000854 r0_12#15 = r0_11#14
32 @ 0000085c return r0_12#15
Expected Behavior:
The HLIL view however, shows the correct program flow to when system is executed at address 00000834.
000007a8 int32_t do_vuln(char* arg1)
000007bc printf(arg1)
000007d0 FILE* fp = fopen(filename: arg1, mode: &data_a54)
000007e8 fseek(fp, offset: 0, whence: 2)
000007f0 int32_t count = ftell(fp)
00000808 fseek(fp, offset: 0, whence: 0)
00000810 char* r0_5 = malloc(bytes: 0x100)
0000082c fread(buf: r0_5, size: 1, count, fp)
00000834 char* r2_1
00000834 char* r3_4
00000834 r2_1, r3_4 = system(line: r0_5)
00000840 strcpy(dest: r0_5, src: arg1, src: r2_1, dest: r3_4)
00000848 mstate r1_1
00000848 mstate r2_2
00000848 mstate r3_5
00000848 r1_1, r2_2, r3_5 = system(line: r0_5)
0000085c return free(mem: r0_5, r1_1, r2_2, r3_5)
Screenshots/Video Recording:
Additional Information: Please add any other context about the problem here.
Sorry but I am not really sure I understand your question. First of all, just a reminder that you are viewing the MLIL SSA, which is not the MLIL. Also, I see the MLIL SSA you showed is tracking the usage of arg1 correctly, which eventually get to the system call