SVF
SVF copied to clipboard
Wrong result of backward slicing
This is a follow-up issue related to https://github.com/SVF-tools/SVF/issues/1574.
First, thank you for your help, @jumormt! With your guidance, I was able to perform a backward traversal from a given target site to identify the relevant data dependencies. Using the code below, I iteratively traverse inEdge to find all predecessor nodes, successfully gathering all data dependencies associated with the svfval value
void traverseOnVFG(const SVFG* vfg, const SVFValue* svfval)
{
SVFIR* pag = SVFIR::getPAG();
PAGNode* pNode = pag->getGNode(pag->getValueNode(svfval));
if (!vfg->hasDefSVFGNode(pNode))
return;
const VFGNode* vNode = vfg->getDefSVFGNode(pNode);
//llvm::outs() << "definition node: " << *(LLVMModuleSet::getLLVMModuleSet()->getLLVMValue(vNode->getValue())) << "\n";
FIFOWorkList<const VFGNode*> worklist;
Set<const VFGNode*> visited;
worklist.push(vNode);
/// Backward Traverse along VFG
while (!worklist.empty()){
const VFGNode* vNode = worklist.pop();
for (VFGNode::const_iterator it = vNode->InEdgeBegin(), eit =
vNode->InEdgeEnd(); it != eit; ++it)
{
VFGEdge* edge = *it;
VFGNode* preNode = edge->getSrcNode();
if (visited.find(preNode) == visited.end())
{
visited.insert(preNode);
worklist.push(preNode);
}
}
}
// Loop through the visited set and collect source information related to the nodes
for (const VFGNode* visitedNode : visited){
if (visitedNode->getValue()) {
// Parse the source location and store it in the set
std::string sourceInfo = parseSourceInfo(visitedNode->getValue()->getSourceLoc());
//llvm::outs() << "Source Location: " << sourceInfo << "\n";
if (!sourceInfo.empty() && sourceInfo.find("/usr/lib") != 0) {
SourceLocations.insert(sourceInfo);
}
}
}
This code works perfectly and collects all data dependency for a toy program:
1 #include <stdio.h>
2 #include <stdlib.h>
3
4 // Function to modify 'a' and 'b' with more complexity
5 void modifyInputs(int *a, int *b) {
6 int *temp = (int *)malloc(sizeof(int));
7 *temp = *a + *b; // Add 'a' and 'b' together
8
9 if (*temp % 2 == 0) {
10 *a = *temp / 2; // If even, set 'a' to half of the sum
11 } else {
12 *a = *temp + 3; // If odd, increment by 3 and assign to 'a'
13 }
14
15 *b = (*temp * *a) % 7; // Modify 'b' based on the new 'a' value
16
17 free(temp); // Free dynamically allocated memory
18 }
19
20 int compute(int *a, int *b) {
21 int *x = (int *)malloc(sizeof(int)); // Step 1: Dynamic memory for 'x'
22 int *y = (int *)malloc(sizeof(int)); // Step 2: Dynamic memory for 'y'
23 int *z = (int *)malloc(sizeof(int)); // Step 3: Dynamic memory for 'z'
24 *x = *a * 2; // Multiply 'a' by 2
25 *y = *b + 3; // Add 3 to 'b'
26
27 // Branch: Depending on the value of 'x', perform different operations
28 if (*x > 10) {
29 *z = *y * 2; // If 'x' > 10, multiply 'y' by 2
30 } else {
31 *z = *y - 1; // If 'x' <= 10, subtract 1 from 'y'
32 }
33
34 int *result = (int *)malloc(sizeof(int)); // Dynamic memory for 'result'
35 *result = *z + *a; // Add 'a' to 'z'
36
37 // Additional branching based on 'z' and 'a'
38 if (*z < *a) {
39 *result = *z - *y;
40 } else {
41 *result = 10;
42 }
43
44 // Complex while loop involving indirect pointer accesses
45 while (*a) {
46 if (*b > 2 + *a) {
47 (*result)--;
48 }
49 *result += *x;
50 (*a)--;
51 }
52
53 // Free dynamically allocated memory
54 free(x);
55 free(y);
56 free(z);
57
58 int finalResult = *result;
59 free(result);
60
61 return finalResult; // Slicing criterion with indirection
62 }
63
64 int main() {
65 int *a = (int *)malloc(sizeof(int)); // Step 6: Dynamic memory for 'a'
66 int *b = (int *)malloc(sizeof(int)); // Step 7: Dynamic memory for 'b'
67 *a = 4;
68 *b = 5;
69
70 // Modify 'a' and 'b' before passing them to compute
71 modifyInputs(a, b);
72
73 int result = compute(a, b); // Step 8: Call compute with modified pointers
74 printf("Result: %d\n", result);
75
76 // Free dynamically allocated memory
77 free(a);
78 free(b);
79
80 return 0;
81 }
For example, in this case, I can collect all data dependency nodes related to line 49:
Unique related node: toy.c:10
Unique related node: toy.c:12
Unique related node: toy.c:15
Unique related node: toy.c:20
Unique related node: toy.c:21
Unique related node: toy.c:22
Unique related node: toy.c:23
Unique related node: toy.c:24
Unique related node: toy.c:25
Unique related node: toy.c:29
Unique related node: toy.c:31
Unique related node: toy.c:34
Unique related node: toy.c:35
Unique related node: toy.c:39
Unique related node: toy.c:41
Unique related node: toy.c:47
Unique related node: toy.c:49
Unique related node: toy.c:5
Unique related node: toy.c:6
Unique related node: toy.c:65
Unique related node: toy.c:66
Unique related node: toy.c:67
Unique related node: toy.c:68
Unique related node: toy.c:7
Unique related node: toy.c:71
Unique related node: toy.c:73
However, when I apply this to a real-world program (e.g., libpng), It works incorrectly on line 623 inside a function:
....//More code
598 if (info_ptr->unknown_chunks != NULL &&
599 ((mask & PNG_FREE_UNKN) & info_ptr->free_me) != 0)
600 {
601 if (num != -1)
602 {
603 png_free(png_ptr, info_ptr->unknown_chunks[num].data);
604 info_ptr->unknown_chunks[num].data = NULL;
605 }
606
607 else
608 {
609 int i;
610
611 for (i = 0; i < info_ptr->unknown_chunks_num; i++)
612 png_free(png_ptr, info_ptr->unknown_chunks[i].data);
613
614 png_free(png_ptr, info_ptr->unknown_chunks);
615 info_ptr->unknown_chunks = NULL;
616 info_ptr->unknown_chunks_num = 0;
617 }
618 }
619 #endif
620
621 #ifdef PNG_eXIf_SUPPORTED
622 #ifdef MAGMA_ENABLE_CANARIES
623 MAGMA_LOG("PNG006", MAGMA_AND(info_ptr->eXIf_buf != NULL, (mask & info_ptr->free_me & PNG_FREE_EXIF) == 0));
624 #endif
625 /* Free any eXIf entry */
626 if (((mask & PNG_FREE_EXIF) & info_ptr->free_me) != 0)
627 {
628 # ifdef PNG_READ_eXIf_SUPPORTED
629 if (info_ptr->eXIf_buf)
630 {
631 png_free(png_ptr, info_ptr->eXIf_buf);
632 info_ptr->eXIf_buf = NULL;
633 }
634 # endif
635 if (info_ptr->exif)
636 {
637 png_free(png_ptr, info_ptr->exif);
638 info_ptr->exif = NULL;
639 }
640 info_ptr->valid &= ~PNG_INFO_eXIf;
641 }
642 #endif
643
644 #ifdef PNG_hIST_SUPPORTED
645 /* Free any hIST entry */
646 if (((mask & PNG_FREE_HIST) & info_ptr->free_me) != 0)
647 {
648 png_free(png_ptr, info_ptr->hist);
649 info_ptr->hist = NULL;
650 info_ptr->valid &= ~PNG_INFO_hIST;
651 }
652 #endif
653
654 /* Free any PLTE entry that was internally allocated */
655 if (((mask & PNG_FREE_PLTE) & info_ptr->free_me) != 0)
656 {
.....//More code
This backward slicing collects the node after png:623:
....// More nodes
Unique related node: png.c:575
Unique related node: png.c:576
Unique related node: png.c:590
Unique related node: png.c:591
Unique related node: png.c:592
Unique related node: png.c:604
Unique related node: png.c:615
Unique related node: png.c:616
Unique related node: png.c:62
Unique related node: png.c:623
Unique related node: png.c:632
Unique related node: png.c:638
Unique related node: png.c:640
Unique related node: png.c:649
Unique related node: png.c:650
Unique related node: png.c:658
Unique related node: png.c:659
Unique related node: png.c:660
Unique related node: png.c:674
Unique related node: png.c:676
Unique related node: png.c:681
Unique related node: png.c:683
...//More nodes
I've confirmed that there are no loops in this function, so the node following line 623 should not be collected as expected (e.g., png.c:632 ... are collected).
Do you know why this might be happening, or could you provide some insight into the cause? I'm performing backward slicing, so this node shouldn't be collected, even if the slicing is potentially unsound and over-approximated.
Thanks