rust-bindgen
rust-bindgen copied to clipboard
Make it possible so that function pointers aren't always wrapped in an Option.
I'm working on bindings of a C library (OpenUCX) where the majority, but not all, of function pointers used are never null (both typedefs and struct fields) - hence they shouldn't be generated as Option<extern fn ...>
but as extern fn ...
. A very small number, however, can be null, and so needs to stay as Option<extern fn ..>
. It'd be nice to do this via bindgen. Currently I post-process them using bindgen-wrapper
(a horribly brittle tool of my own devising).
Would it possible for bindgen to take a list to override this behaviour, of structs and typedefs?
What's so bad about calling unwrap()
/ wrapping it with Some(fn)
? We can certainly add an API for this but this hasn't looked really annoying so far, at least for me.
-
It uses code as documentation - what the code says is what's correct, and the type system makes it clear when something is genuinely null. (A similar argument could be made for
NonNull<x>
vs*mut X
, although that's something I haven't explored with UCX). -
There is a performance hit with
unwrap
of two extra x86-64 instructions for every unwrap of an Option in release mode. In OpenUCX this can actually matter - it abstracts away RDMA operations. -
It is is annoying; it pollutes every call, making basic code obnoxiously verbose for no gain
-
It makes it tedious to work with C libraries which implement 'classes' as a gigantic (struct) table of function pointers... eg
/**
* Transport interface operations.
* Every operation exposed in the API should appear in the table below, to allow
* creating interface/endpoint with custom operations.
*/
typedef struct uct_iface_ops {
/* endpoint - put */
ucs_status_t (*ep_put_short)(uct_ep_h ep, const void *buffer, unsigned length,
uint64_t remote_addr, uct_rkey_t rkey);
ssize_t (*ep_put_bcopy)(uct_ep_h ep, uct_pack_callback_t pack_cb,
void *arg, uint64_t remote_addr, uct_rkey_t rkey);
ucs_status_t (*ep_put_zcopy)(uct_ep_h ep, const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp);
/* endpoint - get */
ucs_status_t (*ep_get_bcopy)(uct_ep_h ep, uct_unpack_callback_t unpack_cb,
void *arg, size_t length,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp);
ucs_status_t (*ep_get_zcopy)(uct_ep_h ep, const uct_iov_t *iov, size_t iovcnt,
uint64_t remote_addr, uct_rkey_t rkey,
uct_completion_t *comp);
/* endpoint - active message */
ucs_status_t (*ep_am_short)(uct_ep_h ep, uint8_t id, uint64_t header,
const void *payload, unsigned length);
ssize_t (*ep_am_bcopy)(uct_ep_h ep, uint8_t id,
uct_pack_callback_t pack_cb, void *arg,
unsigned flags);
ucs_status_t (*ep_am_zcopy)(uct_ep_h ep, uint8_t id, const void *header,
unsigned header_length, const uct_iov_t *iov,
size_t iovcnt, unsigned flags,
uct_completion_t *comp);
/* endpoint - atomics */
ucs_status_t (*ep_atomic_add64)(uct_ep_h ep, uint64_t add,
uint64_t remote_addr, uct_rkey_t rkey);
ucs_status_t (*ep_atomic_fadd64)(uct_ep_h ep, uint64_t add,
uint64_t remote_addr, uct_rkey_t rkey,
uint64_t *result, uct_completion_t *comp);
ucs_status_t (*ep_atomic_swap64)(uct_ep_h ep, uint64_t swap,
uint64_t remote_addr, uct_rkey_t rkey,
uint64_t *result, uct_completion_t *comp);
ucs_status_t (*ep_atomic_cswap64)(uct_ep_h ep, uint64_t compare, uint64_t swap,
uint64_t remote_addr, uct_rkey_t rkey,
uint64_t *result, uct_completion_t *comp);
ucs_status_t (*ep_atomic_add32)(uct_ep_h ep, uint32_t add,
uint64_t remote_addr, uct_rkey_t rkey);
ucs_status_t (*ep_atomic_fadd32)(uct_ep_h ep, uint32_t add,
uint64_t remote_addr, uct_rkey_t rkey,
uint32_t *result, uct_completion_t *comp);
ucs_status_t (*ep_atomic_swap32)(uct_ep_h ep, uint32_t swap,
uint64_t remote_addr, uct_rkey_t rkey,
uint32_t *result, uct_completion_t *comp);
ucs_status_t (*ep_atomic_cswap32)(uct_ep_h ep, uint32_t compare, uint32_t swap,
uint64_t remote_addr, uct_rkey_t rkey,
uint32_t *result, uct_completion_t *comp);
/* endpoint - tagged operations */
ucs_status_t (*ep_tag_eager_short)(uct_ep_h ep, uct_tag_t tag,
const void *data, size_t length);
ssize_t (*ep_tag_eager_bcopy)(uct_ep_h ep, uct_tag_t tag, uint64_t imm,
uct_pack_callback_t pack_cb, void *arg,
unsigned flags);
ucs_status_t (*ep_tag_eager_zcopy)(uct_ep_h ep, uct_tag_t tag, uint64_t imm,
const uct_iov_t *iov, size_t iovcnt,
unsigned flags, uct_completion_t *comp);
ucs_status_ptr_t (*ep_tag_rndv_zcopy)(uct_ep_h ep, uct_tag_t tag,
const void *header,
unsigned header_length,
const uct_iov_t *iov,
size_t iovcnt, unsigned flags,
uct_completion_t *comp);
ucs_status_t (*ep_tag_rndv_cancel)(uct_ep_h ep, void *op);
ucs_status_t (*ep_tag_rndv_request)(uct_ep_h ep, uct_tag_t tag,
const void* header,
unsigned header_length, unsigned flags);
/* interface - tagged operations */
ucs_status_t (*iface_tag_recv_zcopy)(uct_iface_h iface, uct_tag_t tag,
uct_tag_t tag_mask,
const uct_iov_t *iov,
size_t iovcnt,
uct_tag_context_t *ctx);
ucs_status_t (*iface_tag_recv_cancel)(uct_iface_h iface,
uct_tag_context_t *ctx,
int force);
/* endpoint - pending queue */
ucs_status_t (*ep_pending_add)(uct_ep_h ep, uct_pending_req_t *n);
void (*ep_pending_purge)(uct_ep_h ep, uct_pending_purge_callback_t cb,
void *arg);
/* endpoint - synchronization */
ucs_status_t (*ep_flush)(uct_ep_h ep, unsigned flags,
uct_completion_t *comp);
ucs_status_t (*ep_fence)(uct_ep_h ep, unsigned flags);
ucs_status_t (*ep_check)(uct_ep_h ep, unsigned flags, uct_completion_t *comp);
/* endpoint - connection establishment */
ucs_status_t (*ep_create)(uct_iface_h iface, uct_ep_h *ep_p);
ucs_status_t (*ep_create_connected)(uct_iface_h iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr,
uct_ep_h* ep_p);
ucs_status_t (*ep_create_sockaddr)(uct_iface_h iface,
const ucs_sock_addr_t *sockaddr,
const void *priv_data, size_t length,
uct_ep_h *ep_p);
void (*ep_destroy)(uct_ep_h ep);
ucs_status_t (*ep_get_address)(uct_ep_h ep, uct_ep_addr_t *addr);
ucs_status_t (*ep_connect_to_ep)(uct_ep_h ep,
const uct_device_addr_t *dev_addr,
const uct_ep_addr_t *ep_addr);
/* interface - synchronization */
ucs_status_t (*iface_flush)(uct_iface_h iface, unsigned flags,
uct_completion_t *comp);
ucs_status_t (*iface_fence)(uct_iface_h iface, unsigned flags);
/* interface - progress control */
void (*iface_progress_enable)(uct_iface_h iface, unsigned flags);
void (*iface_progress_disable)(uct_iface_h iface, unsigned flags);
unsigned (*iface_progress)(uct_iface_h iface);
/* interface - events */
ucs_status_t (*iface_event_fd_get)(uct_iface_h iface, int *fd_p);
ucs_status_t (*iface_event_arm)(uct_iface_h iface, unsigned events);
/* interface - management */
void (*iface_close)(uct_iface_h iface);
ucs_status_t (*iface_query)(uct_iface_h iface,
uct_iface_attr_t *iface_attr);
/* interface - connection establishment */
ucs_status_t (*iface_get_device_address)(uct_iface_h iface,
uct_device_addr_t *addr);
ucs_status_t (*iface_get_address)(uct_iface_h iface, uct_iface_addr_t *addr);
int (*iface_is_reachable)(const uct_iface_h iface,
const uct_device_addr_t *dev_addr,
const uct_iface_addr_t *iface_addr);
} uct_iface_ops_t;
Well, if you do care about the instruction count I guess that you can unsafely match / transmute, like:
#[macro_use] extern crate debug_unreachable;
match my_func {
Some(f) => f,
None => debug_unreachable!(),
};
But yeah, I agree. Generally we've supported field annotations via doc comments, but if you're using a library that may not be a great solution. For C this is easy-ish to map to a struct + name, for C++.. not so much I guess.
I can mentor this if you want, I don't know if a ParseCallbacks
API vs. just an add-hoc listing would suit this kind of use case better.
Thank for the offer to mentor. If I wasn't so snowed under with work I'd love to take up your offer. I make extensive use of bindgen in my projects and would love to get some changes to make it easier in my workflow.
I'd like this not for some performance reason but so that I could declare function pointers for initializing a struct of function pointers which are guaranteed not to be null. The option wrapped version makes this impossible. This is a useful pattern when loading dynamic libraries rather than linking against a library at compile time.
If the functions are marked with _Nonnull
/ __attribute__((nonnull))
, this could be solved in the same manner as https://github.com/rust-lang/rust-bindgen/issues/1791 could be solved
Function pointer tables occur every now and then in C libraries, however "nonnull" attributes seem to be extremely rare, meaning a decision based solely on them is likely ineffective for a large part of the C ecosystem.
An example I currently face: Godot's GDExtension binding. A big typedef struct
with function pointers as member variables.
There are workarounds like macros based on Option::unwrap_unchecked()
, but of course it would be nice if Option
could be omitted in the first place. It's a matter of expressivity, not just performance. However I'm not sure what the best way to specify that would be... An allowlist for certain types and their fields?
If you don't mind recompiling bindgen, you can tweak the code yourself. The code that wraps function types in Option
currently (as of writing now) resides in src/codegen/mod.rs
, there's a part like:
TypeKind::Function(ref fs) => {
// We can't rely on the sizeof(Option<NonZero<_>>) ==
// sizeof(NonZero<_>) optimization with opaque blobs (because
// they aren't NonZero), so don't *ever* use an or_opaque
// variant here.
let ty = fs.try_to_rust_ty(ctx, &())?;
let prefix = ctx.trait_prefix();
Ok(quote! {
::#prefix::option::Option<#ty>
})
}
Modify it to:
TypeKind::Function(ref fs) => {
// We can't rely on the sizeof(Option<NonZero<_>>) ==
// sizeof(NonZero<_>) optimization with opaque blobs (because
// they aren't NonZero), so don't *ever* use an or_opaque
// variant here.
let ty = fs.try_to_rust_ty(ctx, &())?;
let prefix = ctx.trait_prefix();
Ok(quote! { #ty })
}
Now run cargo build
Dealing with unsafe code is now entirely your responsibility. But I'd wish to have a switchable option for bindgen to toggle this feature. This will help while dealing with structs with function type members, used mainly for callbacks.
I would love this feature too - for now, I'm going to utilize the recompiled version with the change above, but having some option to switch it could be very handy.
As @emilio mentioned there are two possible APIs for this:
- Using an option: Something like
--non-null-fn-ptr-fields=<regex>
, such that every field being a function pointer matchingregex
removes theOption
in it. The disadvantage here is that the regex needs to be over the "path" of the field and not just the field name so the user can have enough control to apply this to fields with the same name separately - Using a callback: Something like
fn non_null_fn_ptr_field(&self, item: &str, field: &str) -> bool
where returningtrue
means that any function pointer field calledfield
inside an item calleditem
omits theOption
. The disadvantage here is that you cannot use this from the CLI.
Not sure what's better to be honest.
I might have hit a use case where this option would be essential, although please correct me if I’m wrong.
I’m trying to use bindgen on the Vulkan headers. In general the Vulkan API is accessed almost entirely through function pointers because most entry points are only optionally implemented in the driver so you have to check for the extension that provides the functionality before getting the function pointer and calling the function.
The functions to get access to the function pointers look like this:
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr(
VkDevice device,
const char* pName);
So you pass the name of the function you want and then it returns a pointer to a function with no arguments and a void return type. You are then expected to cast it to the right function type.
The header also includes a typedef for each possible function that the driver could provide like this:
typedef void (VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer,
uint32_t vertexCount,
uint32_t instanceCount,
uint32_t firstVertex,
uint32_t firstInstance);
In C this is quite convenient because you can just cast it via void* like this:
PFN_vkCmdDraw cmd_draw = (void *) vkGetDeviceProcAddr(device, "vkCmdDraw");
cmd_draw(3, 1, 0, 0);
With the rust bindings generated by bindgen the PFN_vkCmdDraw
type is an Option<complicated_fn_type>
. I can’t find any way to do the cast from Option<stub_fn_type>
to Option<complicated_fn_type>
because as far as can tell there’s no way to get access to the internal type of an Option
.
If you statically that the type is never None
, you could just do Option::unwrap_unchecked
.
Another option is just a simple mem::transmute::<Option<stub_fn_type>, Option<complicated_fn_type>>(fnptr)
(with the types explicitly specified, to reduce the chance of a mistake!).