Function pointers inside a struct in a DLL are zero
Zig Version
0.14.0-dev.872+a60810b5a
Steps to Reproduce and Observed Behavior
System: Linux Mint 21.3
// file 1. api.zig (interface defintion)
pub const Api = extern struct {
add: *const fn(i32,i32) callconv(.C) i32,
sub: *const fn(i32,i32) callconv(.C) i32,
};
// file 2. ari.zig (the dll)
const Api = @import("api.zig").Api;
fn add(a: i32, b: i32) callconv(.C) i32 {
return a + b;
}
fn sub(a: i32, b: i32) callconv(.C) i32 {
return a - b;
}
export const api: Api = .{
.add = &add,
.sub = &sub,
};
// file 3. app.zig (the application)
const std = @import("std");
const log = std.debug.print;
const Dll = std.DynLib;
const Api = @import("api.zig").Api;
pub fn main() !void {
var lib = try Dll.open("./libari.so");
// log("{any}\n", .{lib});
defer lib.close();
const api: *Api = lib.lookup(*Api, "api").?;
log("api = {*}\n", .{api});
log("api.add = {*}\n", .{api.add});
log("api.sub = {*}\n", .{api.sub});
const a: i32 = 10;
const b: i32 = 7;
log("{} + {} = {}\n", .{a, b, api.add(a, b)});
log("{} - {} = {}\n", .{a, b, api.sub(a, b)});
}
Compile with
zig build-lib -dynamic ari.zig -O ReleaseSmall
zig build-exe app.zig -O ReleaseSmall
Result:
$ ./app
api = api.Api@7fb76ef79390
api.add = fn (i32, i32) callconv(.C) i32@0
api.sub = fn (i32, i32) callconv(.C) i32@0
Segmentation fault at address 0x0
Addresses may vary depending on release mode, for ex. (default mode):
$ ./app
api = api.Api@7fcb827d1870
api.add = fn (i32, i32) callconv(.C) i32@3f
api.sub = fn (i32, i32) callconv(.C) i32@a002200000419
Segmentation fault at address 0x3f
Also tried 0.11.0, 0.12.0, 0.13.0, behavior is identical.
Expected Behavior
Should work.
DLL produced by Zig is ok by itself, it's usable from C:
#include <stdio.h>
#include <dlfcn.h>
struct api {
int (*add)(int, int);
int (*sub)(int, int);
};
int main(void) {
void *lib = dlopen("./libari.so", RTLD_NOW);
struct api *api = dlsym(lib, "api");
int a = 10;
int b = 7;
printf("%d + %d = %d\n", a, b, api->add(a, b));
printf("%d - %d = %d\n", a, b, api->sub(a, b));
dlclose(lib);
}
$ ./a.out
10 + 7 = 17
10 - 7 = 3
Also usable from a Zig program with dlopen.
It looks like ElfDynLib is not perfoming relocation. In the example given, the zeros observed in the Api struct are placeholders that should have been filled by the loader using the information in the relocation sections.
Also it's has been observed that linking the app with libc makes DynLib work properly:
$ /opt/zig-0.14/zig build-exe app.zig -lc -O ReleaseSmall
$ ./app
api = api.Api@7f7c50144390
api.add = fn (i32, i32) callconv(.C) i32@7f7c50143374
api.sub = fn (i32, i32) callconv(.C) i32@7f7c50143378
10 + 7 = 17
10 - 7 = 3
related to #5360
Nevermind it actually makes complete sense, because the segments are smaller than page size and I had my std.mem.indexOf arguments in the wrong order :facepalm: I guess it's just the relocation logic that's missing.
~~The relocation logic is definitely missing, but it seems to me that there is also something wrong with how the program headers are loaded into the memory in the first place. The .so from the reproduction has 3 LOAD segments (notice how the .text section is in the second segment) :~~
❯ readelf -l libari.so
Elf file type is DYN (Shared object file)
Entry point 0x0
There are 8 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr
FileSiz MemSiz Flags Align
PHDR 0x0000000000000040 0x0000000000000040 0x0000000000000040
0x00000000000001c0 0x00000000000001c0 R 0x8
LOAD 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x000000000000030c 0x000000000000030c R 0x1000
LOAD 0x000000000000030c 0x000000000000130c 0x000000000000130c
0x0000000000000009 0x0000000000000009 R E 0x1000
LOAD 0x0000000000000318 0x0000000000002318 0x0000000000002318
0x00000000000000f0 0x0000000000000ce8 RW 0x1000
DYNAMIC 0x0000000000000328 0x0000000000002328 0x0000000000002328
0x00000000000000e0 0x00000000000000e0 RW 0x8
GNU_RELRO 0x0000000000000318 0x0000000000002318 0x0000000000002318
0x00000000000000f0 0x0000000000000ce8 R 0x1
GNU_EH_FRAME 0x00000000000002a8 0x00000000000002a8 0x00000000000002a8
0x000000000000001c 0x000000000000001c R 0x4
GNU_STACK 0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 RW 0x0
Section to Segment mapping:
Segment Sections...
00
01 .dynsym .gnu.hash .hash .dynstr .rela.dyn .eh_frame_hdr .eh_frame
02 .text
03 .data.rel.ro .dynamic .relro_padding
04 .dynamic
05 .data.rel.ro .dynamic .relro_padding
06 .eh_frame_hdr
07
~~Looking at the strace we can definitely see 3 segments being mapped:~~
mmap(0x79fe1c8ee000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x79fe1c8ee000
mmap(0x79fe1c8ef000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 0) = 0x79fe1c8ef000
mmap(0x79fe1c8f0000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x79fe1c8f0000
~~The second mmap call looks suspicious, because it's using offset 0, same as the first one. This looks to me like the .text section is never loaded. In fact if we search the mapped memory for the machine code, it's nowhere to be found (also first 0x1000 bytes are equal to the second 0x1000 bytes, but that's just confirmation):~~
❯ objdump -dr libari.so
libari.so: file format elf64-x86-64
Disassembly of section .text:
000000000000130c <.text>:
130c: 8d 04 37 lea (%rdi,%rsi,1),%eax
130f: c3 ret
1310: 89 f8 mov %edi,%eax
1312: 29 f0 sub %esi,%eax
1314: c3 ret
// main.zig
const std = @import("std");
const log = std.debug.print;
const Dll = std.DynLib;
const Api = @import("api.zig").Api;
pub fn main() !void {
var lib = try Dll.open("./libari.so");
defer lib.close();
const same = std.mem.eql(u8, lib.inner.memory[0..0x1000], lib.inner.memory[0x1000..0x2000]);
log("Same: {}\n", .{same});
const ix = std.mem.indexOf(u8, &[_]u8{ 0x8d, 0x04, 0x37 }, lib.inner.memory);
log("Index: {?}\n", .{ix});
}
❯ ./bin/zig run main.zig
Same: true
Index: null
I'm working on rudimentary support for relocations when loading ELF shared libraries. It will not solve all the issues that ElfDynLib has but I'm currently able to run the reproduction with expected behaviour on x86_64 linux.