llvm-project icon indicating copy to clipboard operation
llvm-project copied to clipboard

[BOLT] Static binary patchelf fix

Open paschalis-mpeis opened this issue 1 year ago • 1 comments
trafficstars

DRAFT / WIP: Work In Progress

static binaries fix: don't patch anything that is not in orig text segment

paschalis-mpeis avatar Jul 04 '24 10:07 paschalis-mpeis

@llvm/pr-subscribers-bolt

Author: Paschalis Mpeis (paschalis-mpeis)

Changes

DRAFT / WIP: Work In Progress

static binaries fix: don't patch anything that is not in orig text segment


Full diff: https://github.com/llvm/llvm-project/pull/97710.diff

3 Files Affected:

  • (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+13)
  • (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+27-2)
  • (added) bolt/test/AArch64/test-indirect-branch.s (+110)
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b..12464493bcbb4 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -5291,6 +5291,19 @@ void RewriteInstance::patchELFGOT(ELFObjectFile<ELFT> *File) {
                                                      GOTContents.size());
        ++GOTEntry) {
     if (uint64_t NewAddress = getNewFunctionAddress(*GOTEntry)) {
+      auto *Function = BC->getBinaryFunctionAtAddress(*GOTEntry);
+
+      // On static binaries, avoid patching got entries that did not belong to
+      // the original text section. One such special case is the '_init'
+      // function, belonging to the '.init' section.
+      if (BC->IsStaticExecutable &&
+          Function->getOriginSectionName() != ".bolt.org.text") {
+        LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring GOT entry 0x"
+                          << Twine::utohexstr(*GOTEntry) << " for '"
+                          << Function->getOneName() << "'" << '\n');
+        continue;
+      }
+
       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: patching GOT entry 0x"
                         << Twine::utohexstr(*GOTEntry) << " with 0x"
                         << Twine::utohexstr(NewAddress) << '\n');
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index a74eda8e4a566..c5af2d4d56888 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -706,8 +706,20 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     unsigned ShiftVal = AArch64_AM::getArithShiftValue(OperandExtension);
     AArch64_AM::ShiftExtendType ExtendType =
         AArch64_AM::getArithExtendType(OperandExtension);
-    if (ShiftVal != 2)
-      llvm_unreachable("Failed to match indirect branch! (fragment 2)");
+    if (ShiftVal != 2) {
+      // TODO: Handle the patten where ShiftVal != 2.
+      // The following code sequence below has no shift amount,
+      // the range could be 0 to 4.
+      // The pattern comes from libc, it occurs when the binary is static.
+      //   adr     x6, 0x219fb0 <sigall_set+0x88>
+      //   add     x6, x6, x14, lsl #2
+      //   ldr     w7, [x6]
+      //   add     x6, x6, w7, sxtw => no shift amount
+      //   br      x6
+      errs() << "BOLT-WARNING: "
+                "Failed to match indirect branch: ShiftVAL != 2 \n";
+      return false;
+    }
 
     if (ExtendType == AArch64_AM::SXTB)
       ScaleValue = 1LL;
@@ -752,6 +764,19 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
       return true;
     }
 
+    if (DefJTBaseAdd->getOpcode() == AArch64::ADR) {
+      // TODO: Handle the pattern where there is no adrp/add pair.
+      // It also occurs when the binary is static.
+      //  adr     x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
+      //  ldrh    w13, [x13, w12, uxtw #1]
+      //  adr     x12, 0x247b30 <__gettextparse+0x5b0>
+      //  add     x13, x12, w13, sxth #2
+      //  br      x13
+      errs() << "BOLT-WARNING: Failed to match indirect branch: "
+                "nop/adr instead of adrp/add \n";
+      return false;
+    }
+
     assert(DefJTBaseAdd->getOpcode() == AArch64::ADDXri &&
            "Failed to match jump table base address pattern! (1)");
 
diff --git a/bolt/test/AArch64/test-indirect-branch.s b/bolt/test/AArch64/test-indirect-branch.s
new file mode 100644
index 0000000000000..cb9e325654774
--- /dev/null
+++ b/bolt/test/AArch64/test-indirect-branch.s
@@ -0,0 +1,110 @@
+// Test how BOLT handles indirect branch sequence of instructions in
+// AArch64MCPlus builder.
+// This test checks the pattern where there is no shift amount after add
+// instruction. The pattern come from libc, it can be reproduced with
+// a 'static' built binary.
+//
+//   adr     x6, 0x219fb0 <sigall_set+0x88>
+//   add     x6, x6, x14, lsl #2
+//   ldr     w7, [x6]
+//   add     x6, x6, w7, sxtw => no shift amount
+//   br      x6
+//
+// It also tests another case where there is no adrp/add pair.
+// The pattern also come from libc, and it only represents in the binary
+// if the lld linker is used to create the static binary.
+// It doesn't occur with GCC ld linker.
+//
+//  nop   => nop/adr instead of adrp/add
+//  adr     x13, 0x215a18 <_nl_value_type_LC_COLLATE+0x50>
+//  ldrh    w13, [x13, w12, uxtw #1]
+//  adr     x12, 0x247b30 <__gettextparse+0x5b0>
+//  add     x13, x12, w13, sxth #2
+//  br      x13
+
+// clang-format off
+
+// REQUIRES: system-linux
+// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
+// RUN: %clang %cflags --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+// RUN: llvm-bolt %t.exe -o %t.bolt --print-cfg \
+// RUN:  -v=1 2>&1 | FileCheck %s
+
+// CHECK: BOLT-WARNING: Failed to match indirect branch: nop/adr instead of adrp/add
+// CHECK: BOLT-WARNING: Failed to match indirect branch: ShiftVAL != 2
+
+
+  .section .text
+  .align 4
+  .globl _start
+  .type  _start, %function
+_start:
+  bl bar
+  bl end
+  mov x0, #4
+  mov w8, #93
+  svc #0
+
+bar:
+  mov     w1, #3
+  cmp     x1, #0
+  b.eq    end
+  nop
+  adr     x3, jump_table
+  ldrh    w3, [x3, x1, lsl #1]
+  adr     x1, .case0
+  add     x3, x1, w3, sxth #2
+  br      x3
+.case0:
+  mov     w0, #1
+  ret
+.case1:
+  mov     w0, #2
+  ret
+.case3:
+  mov     w0, #3
+  ret
+.case4:
+  nop
+  mov     x1, #0
+  adr     x3, datatable
+  add     x3, x3, x1, lsl #2
+  ldr     w2, [x3]
+  add     x3, x3, w2, sxtw
+  br      x3
+  nop
+  mov     w0, #4
+  ret
+.case7:
+  mov     w0, #4
+  ret
+
+foo1:
+   ret
+
+foo2:
+   add    w0, w0, #3
+   ret
+
+foo3:
+   add    w0, w0, #3
+   ret
+
+end:
+  add     x0, x0, #99
+  ret
+
+  .section .rodata,"a",@progbits
+jump_table:
+  .hword  (.case0-.case0)>>2
+  .hword  (.case1-.case0)>>2
+  .hword  (.case3-.case0)>>2
+  .hword  (.case4-.case0)>>2
+  .hword  (.case7-.case0)>>2
+
+
+datatable:
+  .word foo1-datatable
+  .word foo2-datatable
+  .word foo3-datatable
+  .word 20

llvmbot avatar Jul 04 '24 20:07 llvmbot

Minimal Reproducer:

main.c:

int main() { return 0; }
clang main.c -Wl,-q -static -o out
llvm-bolt out -o out.bolt
./out.bolt

paschalis-mpeis avatar Jul 09 '24 06:07 paschalis-mpeis

As I see, this branch has not been rebased on top of the recently merged changes with the introduction of the createDummyReturn function (or the final version of its name).

Please, rebase the branch to distinguish the correct set of changes, what makes the review much simpler.

samolisov avatar Jul 19 '24 09:07 samolisov

Hi, I've filed https://github.com/llvm/llvm-project/issues/100096 to try and capture in detail the issue that this PR is trying to solve.

peterwaller-arm avatar Jul 23 '24 12:07 peterwaller-arm

In the issue below, it was reached a consensus that BOLT can reject such cases, therefore I am closing this PR.

  • https://github.com/llvm/llvm-project/issues/100096

I rebased it (as it was a stacked PR) and added some comments in case someone has such a binary and needs a workaround or a starting point.

paschalis-mpeis avatar Aug 19 '24 13:08 paschalis-mpeis