zig icon indicating copy to clipboard operation
zig copied to clipboard

Function pointers inside a struct in a DLL are zero

Open dee0xeed opened this issue 1 year ago • 4 comments

Zig Version

0.14.0-dev.872+a60810b5a

Steps to Reproduce and Observed Behavior

System: Linux Mint 21.3

// file 1. api.zig (interface defintion)

pub const Api = extern struct {
    add: *const fn(i32,i32) callconv(.C) i32,
    sub: *const fn(i32,i32) callconv(.C) i32,
};
// file 2. ari.zig (the dll)

const Api = @import("api.zig").Api;

fn add(a: i32, b: i32) callconv(.C) i32 {
    return a + b;
}

fn sub(a: i32, b: i32) callconv(.C) i32 {
    return a - b;
}

export const api: Api = .{
    .add = &add,
    .sub = &sub,
};
// file 3. app.zig (the application)

const std = @import("std");
const log = std.debug.print;
const Dll = std.DynLib;

const Api = @import("api.zig").Api;

pub fn main() !void {
    var lib = try Dll.open("./libari.so");
//    log("{any}\n", .{lib});
    defer lib.close();
    const api: *Api = lib.lookup(*Api, "api").?;

    log("api = {*}\n", .{api});
    log("api.add = {*}\n", .{api.add});
    log("api.sub = {*}\n", .{api.sub});

    const a: i32 = 10;
    const b: i32 = 7;
    log("{} + {} = {}\n", .{a, b, api.add(a, b)});
    log("{} - {} = {}\n", .{a, b, api.sub(a, b)});
}

Compile with

zig build-lib -dynamic ari.zig -O ReleaseSmall
zig build-exe app.zig -O ReleaseSmall

Result:

$ ./app
api = api.Api@7fb76ef79390
api.add = fn (i32, i32) callconv(.C) i32@0
api.sub = fn (i32, i32) callconv(.C) i32@0
Segmentation fault at address 0x0

Addresses may vary depending on release mode, for ex. (default mode):

$ ./app
api = api.Api@7fcb827d1870
api.add = fn (i32, i32) callconv(.C) i32@3f
api.sub = fn (i32, i32) callconv(.C) i32@a002200000419
Segmentation fault at address 0x3f

Also tried 0.11.0, 0.12.0, 0.13.0, behavior is identical.

Expected Behavior

Should work. DLL produced by Zig is ok by itself, it's usable from C:

#include <stdio.h>
#include <dlfcn.h>

struct api {
    int (*add)(int, int);
    int (*sub)(int, int);
};

int main(void) {
    void *lib = dlopen("./libari.so", RTLD_NOW);
    struct api *api = dlsym(lib, "api");
    int a = 10;
    int b = 7;
    printf("%d + %d = %d\n", a, b, api->add(a, b));
    printf("%d - %d = %d\n", a, b, api->sub(a, b));
    dlclose(lib);
}
$ ./a.out 
10 + 7 = 17
10 - 7 = 3

Also usable from a Zig program with dlopen.

dee0xeed avatar Aug 07 '24 21:08 dee0xeed

It looks like ElfDynLib is not perfoming relocation. In the example given, the zeros observed in the Api struct are placeholders that should have been filled by the loader using the information in the relocation sections.

LucasSantos91 avatar Aug 07 '24 22:08 LucasSantos91

Also it's has been observed that linking the app with libc makes DynLib work properly:

$ /opt/zig-0.14/zig build-exe app.zig -lc -O ReleaseSmall
$ ./app
api = api.Api@7f7c50144390
api.add = fn (i32, i32) callconv(.C) i32@7f7c50143374
api.sub = fn (i32, i32) callconv(.C) i32@7f7c50143378
10 + 7 = 17
10 - 7 = 3

dee0xeed avatar Aug 08 '24 07:08 dee0xeed

related to #5360

dee0xeed avatar Aug 08 '24 07:08 dee0xeed

Nevermind it actually makes complete sense, because the segments are smaller than page size and I had my std.mem.indexOf arguments in the wrong order :facepalm: I guess it's just the relocation logic that's missing.

~~The relocation logic is definitely missing, but it seems to me that there is also something wrong with how the program headers are loaded into the memory in the first place. The .so from the reproduction has 3 LOAD segments (notice how the .text section is in the second segment) :~~

❯ readelf -l libari.so

Elf file type is DYN (Shared object file)
Entry point 0x0
There are 8 program headers, starting at offset 64

Program Headers:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  PHDR           0x0000000000000040 0x0000000000000040 0x0000000000000040
                 0x00000000000001c0 0x00000000000001c0  R      0x8
  LOAD           0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x000000000000030c 0x000000000000030c  R      0x1000
  LOAD           0x000000000000030c 0x000000000000130c 0x000000000000130c
                 0x0000000000000009 0x0000000000000009  R E    0x1000
  LOAD           0x0000000000000318 0x0000000000002318 0x0000000000002318
                 0x00000000000000f0 0x0000000000000ce8  RW     0x1000
  DYNAMIC        0x0000000000000328 0x0000000000002328 0x0000000000002328
                 0x00000000000000e0 0x00000000000000e0  RW     0x8
  GNU_RELRO      0x0000000000000318 0x0000000000002318 0x0000000000002318
                 0x00000000000000f0 0x0000000000000ce8  R      0x1
  GNU_EH_FRAME   0x00000000000002a8 0x00000000000002a8 0x00000000000002a8
                 0x000000000000001c 0x000000000000001c  R      0x4
  GNU_STACK      0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x0000000000000000 0x0000000000000000  RW     0x0

 Section to Segment mapping:
  Segment Sections...
   00     
   01     .dynsym .gnu.hash .hash .dynstr .rela.dyn .eh_frame_hdr .eh_frame 
   02     .text 
   03     .data.rel.ro .dynamic .relro_padding 
   04     .dynamic 
   05     .data.rel.ro .dynamic .relro_padding 
   06     .eh_frame_hdr 
   07

~~Looking at the strace we can definitely see 3 segments being mapped:~~

mmap(0x79fe1c8ee000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x79fe1c8ee000
mmap(0x79fe1c8ef000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x79fe1c8ef000
mmap(0x79fe1c8f0000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x79fe1c8f0000

~~The second mmap call looks suspicious, because it's using offset 0, same as the first one. This looks to me like the .text section is never loaded. In fact if we search the mapped memory for the machine code, it's nowhere to be found (also first 0x1000 bytes are equal to the second 0x1000 bytes, but that's just confirmation):~~

❯ objdump -dr libari.so

libari.so:     file format elf64-x86-64


Disassembly of section .text:

000000000000130c <.text>:
    130c:       8d 04 37                lea    (%rdi,%rsi,1),%eax
    130f:       c3                      ret
    1310:       89 f8                   mov    %edi,%eax
    1312:       29 f0                   sub    %esi,%eax
    1314:       c3                      ret
// main.zig
const std = @import("std");
const log = std.debug.print;
const Dll = std.DynLib;

const Api = @import("api.zig").Api;

pub fn main() !void {
    var lib = try Dll.open("./libari.so");
    defer lib.close();
    
    const same = std.mem.eql(u8, lib.inner.memory[0..0x1000], lib.inner.memory[0x1000..0x2000]);
    log("Same: {}\n", .{same});

    const ix = std.mem.indexOf(u8, &[_]u8{ 0x8d, 0x04, 0x37 }, lib.inner.memory);
    log("Index: {?}\n", .{ix});
}
❯ ./bin/zig run main.zig
Same: true
Index: null

fardragon avatar Aug 20 '24 15:08 fardragon

I'm working on rudimentary support for relocations when loading ELF shared libraries. It will not solve all the issues that ElfDynLib has but I'm currently able to run the reproduction with expected behaviour on x86_64 linux.

fardragon avatar Aug 25 '24 11:08 fardragon