panda
panda copied to clipboard
taint label is not accurate when applied to input stream
- The taint labels of buf aren't deleted after buf gets new content.
test.c is as below:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char buf[100] = "";
int main()
{
FILE *pFile = fopen("tmp", "rb");
if (pFile == NULL)
{
perror ("Error opening file");
return 0;
}
fread(buf, sizeof(char), 30, pFile); //line 15
puts("read 10");
puts(buf); //line 17
fread(buf+10, sizeof(char), 10, pFile); //line 19
puts("input 20");
puts(buf+10); // line 21
puts("input 30");
fread(buf+20, sizeof(char), 10, pFile); //line 24
puts(buf+20); //line 25
return 0;
}
when I taint the buf at the line 17, after the fread(buf, sizeof(char), 30, pFile);
at line 15, and check the taint labels of (buf+10 - buf+30) at the line 25. The taint labels of (buf+10 - buf+30) is still exist, I think they should be deleted because of the read operation at line 19 and line 24, But they didn't.
- If I taint the buf with new_label_num when buf has been tainted with old_label_num, the label number of buf is still old_label_num in the end.
My pypanda code is as below:
import os
from shutil import copy
from sys import argv, exit, stderr
import sys
from pandare import Panda
from os.path import exists
from time import time
from ipdb import set_trace
arch = "x86_64"
copydir = f"{os.getcwd()}"
target = "test"
args = []
full_targ = f"./{copydir}/{target}" if not copydir.startswith("/") else f"{copydir}/{target}"
panda = Panda(generic=arch)
# record the program behavior
recording_name = 'test'
if not exists(f"{recording_name}-rr-snp"):
print(f"taking {recording_name} recording")
@panda.queue_blocking
def do_stuff():
panda.revert_sync("root")
panda.copy_to_guest(copydir, absolute_paths=True)
panda.run_serial_cmd(f"chmod +x {full_targ}")
panda.run_serial_cmd(f"cd {copydir}")
panda.run_monitor_cmd(f"begin_record {recording_name}")
print(panda.run_serial_cmd(f"./{target}"))
panda.run_monitor_cmd("end_record")
panda.stop_run()
panda.run()
else:
print("recording exists. not remaking recording")
buf = 0x404080
str1 = 0x40201E
str2 = 0x402026
str3 = 0x40202F
def taint_addr(cpustate,vaddr,label):
taint_paddr = panda.virt_to_phys(cpustate, vaddr)
panda.taint_label_ram(taint_paddr, label)
print('tainted',hex(vaddr), label)
def get_vaddr_label(cpustate,vaddr):
phys_addr = panda.virt_to_phys(cpustate, vaddr)
assert(panda.taint_check_ram(phys_addr)), f"{vaddr:x} is not tainted"
tq = panda.taint_get_ram(phys_addr)
taint_labels = tq.get_labels()
print(hex(vaddr), taint_labels)
#question = 1
question = 2
@panda.hook_symbol("libc", "puts")
def hook_memcpy(cpu,tb, h):
if panda.get_process_name(cpu) == target:
one = panda.arch.get_arg(cpu,0)
if question == 1:
if one == buf: #line 17 in test.c
for i in range(30):
taint_addr(cpu,buf+i,i)
if one == buf+20: #line 25 in test.c
for i in range(30):
if i == 10: # the program will craft when i == 10, and i didn't know the reason.
continue
get_vaddr_label(cpu,buf+i)
elif question == 2:
if one == buf: #line 17 in test.c
for i in range(30):
taint_addr(cpu,buf+i,i)
if one == buf+10: #line 21 in test.c
for i in range(10):
taint_addr(cpu,buf+10+i,100+i)
if one == buf+20:
for i in range(30): #line 25 in test.c
if i == 10: # the program will craft when i == 10, and i didn't know the reason.
continue
get_vaddr_label(cpu,buf+i)
panda.load_plugin("taint2")
panda.enable_precise_pc()
panda.load_plugin("trace", {"target": target, 'log': f"./{target}.log"})
panda.run_replay(recording_name)
The output when question == 1 is as below:
tainted 0x404080 0
tainted 0x404081 1
tainted 0x404082 2
tainted 0x404083 3
tainted 0x404084 4
tainted 0x404085 5
tainted 0x404086 6
tainted 0x404087 7
tainted 0x404088 8
tainted 0x404089 9
tainted 0x40408a 10
tainted 0x40408b 11
tainted 0x40408c 12
tainted 0x40408d 13
tainted 0x40408e 14
tainted 0x40408f 15
tainted 0x404090 16
tainted 0x404091 17
tainted 0x404092 18
tainted 0x404093 19
tainted 0x404094 20
tainted 0x404095 21
tainted 0x404096 22
tainted 0x404097 23
tainted 0x404098 24
tainted 0x404099 25
tainted 0x40409a 26
tainted 0x40409b 27
tainted 0x40409c 28
tainted 0x40409d 29
0x404080 [0]
0x404081 [1]
0x404082 [2]
0x404083 [3]
0x404084 [4]
0x404085 [5]
0x404086 [6]
0x404087 [7]
0x404088 [8]
0x404089 [9]
0x40408b [11]
0x40408c [12]
0x40408d [13]
0x40408e [14]
0x40408f [15]
0x404090 [16]
0x404091 [17]
0x404092 [18]
0x404093 [19]
0x404094 [20]
0x404095 [21]
0x404096 [22]
0x404097 [23]
0x404098 [24]
0x404099 [25]
0x40409a [26]
0x40409b [27]
0x40409c [28]
0x40409d [29]
The output when question == 2 is as below:
tainted 0x404080 0
tainted 0x404081 1
tainted 0x404082 2
tainted 0x404083 3
tainted 0x404084 4
tainted 0x404085 5
tainted 0x404086 6
tainted 0x404087 7
tainted 0x404088 8
tainted 0x404089 9
tainted 0x40408a 10
tainted 0x40408b 11
tainted 0x40408c 12
tainted 0x40408d 13
tainted 0x40408e 14
tainted 0x40408f 15
tainted 0x404090 16
tainted 0x404091 17
tainted 0x404092 18
tainted 0x404093 19
tainted 0x404094 20
tainted 0x404095 21
tainted 0x404096 22
tainted 0x404097 23
tainted 0x404098 24
tainted 0x404099 25
tainted 0x40409a 26
tainted 0x40409b 27
tainted 0x40409c 28
tainted 0x40409d 29
tainted 0x40408a 100
tainted 0x40408b 101
tainted 0x40408c 102
tainted 0x40408d 103
tainted 0x40408e 104
tainted 0x40408f 105
tainted 0x404090 106
tainted 0x404091 107
tainted 0x404092 108
tainted 0x404093 109
0x404080 [0]
0x404081 [1]
0x404082 [2]
0x404083 [3]
0x404084 [4]
0x404085 [5]
0x404086 [6]
0x404087 [7]
0x404088 [8]
0x404089 [9]
0x40408b [11]
0x40408c [12]
0x40408d [13]
0x40408e [14]
0x40408f [15]
0x404090 [16]
0x404091 [17]
0x404092 [18]
0x404093 [19]
0x404094 [20]
0x404095 [21]
0x404096 [22]
0x404097 [23]
0x404098 [24]
0x404099 [25]
0x40409a [26]
0x40409b [27]
0x40409c [28]
0x40409d [29]