wasm-minimal-protocol icon indicating copy to clipboard operation
wasm-minimal-protocol copied to clipboard

Crate generates recursive function call

Open mainrs opened this issue 10 months ago • 0 comments

I am trying to write my own plugin. However, my code results in a recursive call to the wrapped function itself, leading to typst throwing a stackoverflow error.

Here is the original code:

// extern crate alloc;

use tree_sitter_c2rust::{ffi::CaptureListPool, Language, Parser, Query, QueryCursor};
use wasm_minimal_protocol::*;

// Provides Rust implementations for stdlib functions for languages written in C.
// https://github.com/cacticouncil/lilypad/blob/5acfefb9aca74cd4934d3b414d0a6fb0ca41dfad/src/lib.rs
#[cfg(target_arch = "wasm32")]
pub mod c_shim;

initiate_protocol!();

#[wasm_func]
pub fn apply_query(code: &[u8], language: &[u8], query: &[u8], capture_index: &[u8]) -> Vec<u8> {
    let query = unsafe { std::str::from_utf8_unchecked(query) };

    let capture_index = capture_index[0] as u32;
    let result = apply_query_inner(code, language, query, capture_index);
    "hello".as_bytes().to_vec()
}

fn apply_query_inner(code: &[u8], language: &[u8], query: &str, capture_index: u32) -> String {
    // Initialize the parser.
    let mut parser = Parser::new();

    "".to_string()
}

fn language_for_tag(language: &[u8]) -> Language {
    // println!(":: language_for_tag: {}", language);
    match language {
        b"csharp" => tree_sitter_c_sharp::language(),
        b"cpp" => tree_sitter_cpp::language(),
        // b"haskell" | b"hs" => tree_sitter_haskell::language(),
        b"java" => tree_sitter_java::language(),
        b"python" => tree_sitter_python::language(),
        b"rust" => tree_sitter_rust::language(),
        _ => panic!("unknown language: {:?}", language),
    }
}

The expanded code that this crate produces is this:

#![feature(prelude_import)]
#[prelude_import]
use std::prelude::rust_2021::*;
#[macro_use]
extern crate std;
use tree_sitter_c2rust::{ffi::CaptureListPool, Language, Parser, Query, QueryCursor};
use wasm_minimal_protocol::*;
#[cfg(target_arch = "wasm32")]
pub mod c_shim {
    use std::{
        alloc::{self, Layout},
        ffi::{c_int, c_void},
        mem::align_of, ptr,
    };
    #[no_mangle]
    pub unsafe extern "C" fn malloc(size: usize) -> *mut c_void {
        if size == 0 {
            return ptr::null_mut();
        }
        let (layout, offset_to_data) = layout_for_size_prepended(size);
        let buf = alloc::alloc(layout);
        store_layout(buf, layout, offset_to_data)
    }
    #[no_mangle]
    pub unsafe extern "C" fn calloc(count: usize, size: usize) -> *mut c_void {
        if count == 0 || size == 0 {
            return ptr::null_mut();
        }
        let (layout, offset_to_data) = layout_for_size_prepended(size * count);
        let buf = alloc::alloc_zeroed(layout);
        store_layout(buf, layout, offset_to_data)
    }
    #[no_mangle]
    pub unsafe extern "C" fn realloc(buf: *mut c_void, new_size: usize) -> *mut c_void {
        if buf.is_null() {
            malloc(new_size)
        } else if new_size == 0 {
            free(buf);
            ptr::null_mut()
        } else {
            let (old_buf, old_layout) = retrieve_layout(buf);
            let (new_layout, offset_to_data) = layout_for_size_prepended(new_size);
            let new_buf = alloc::realloc(old_buf, old_layout, new_layout.size());
            store_layout(new_buf, new_layout, offset_to_data)
        }
    }
    #[no_mangle]
    pub unsafe extern "C" fn free(buf: *mut c_void) {
        if buf.is_null() {
            return;
        }
        let (buf, layout) = retrieve_layout(buf);
        alloc::dealloc(buf, layout);
    }
    /// Given a pointer to the data, retrieve the layout and the pointer to the layout.
    unsafe fn retrieve_layout(buf: *mut c_void) -> (*mut u8, Layout) {
        let (_, layout_offset) = Layout::new::<Layout>()
            .extend(Layout::from_size_align(0, align_of::<*const u8>() * 2).unwrap())
            .unwrap();
        let buf = (buf as *mut u8).offset(-(layout_offset as isize));
        let layout = *(buf as *mut Layout);
        (buf, layout)
    }
    /// Calculate a layout for a given size with space for storing a layout at the start.
    /// Returns the layout and the offset to the data.
    fn layout_for_size_prepended(size: usize) -> (Layout, usize) {
        Layout::new::<Layout>()
            .extend(Layout::from_size_align(size, align_of::<*const u8>() * 2).unwrap())
            .unwrap()
    }
    /// Store a layout in the pointer, returning a pointer to where the data should be stored.
    unsafe fn store_layout(
        buf: *mut u8,
        layout: Layout,
        offset_to_data: usize,
    ) -> *mut c_void {
        *(buf as *mut Layout) = layout;
        (buf as *mut u8).offset(offset_to_data as isize) as *mut c_void
    }
    #[no_mangle]
    pub unsafe extern "C" fn memcpy(
        dest: *mut c_void,
        src: *const c_void,
        size: usize,
    ) -> *mut c_void {
        std::ptr::copy_nonoverlapping(src, dest, size);
        dest
    }
    #[no_mangle]
    pub unsafe extern "C" fn memmove(
        dest: *mut c_void,
        src: *const c_void,
        size: usize,
    ) -> *mut c_void {
        std::ptr::copy(src, dest, size);
        dest
    }
    #[no_mangle]
    pub unsafe extern "C" fn memset(s: *mut c_void, c: i32, n: usize) -> *mut c_void {
        let slice = std::slice::from_raw_parts_mut(s as *mut u8, n);
        slice.fill(c as u8);
        s
    }
    #[no_mangle]
    pub unsafe extern "C" fn strlen(s: *const c_void) -> usize {
        let mut len = 0;
        let mut ptr = s as *const u8;
        while *ptr != 0 {
            len += 1;
            ptr = ptr.add(1);
        }
        len
    }
    #[no_mangle]
    pub unsafe extern "C" fn iswspace(c: c_int) -> bool {
        char::from_u32(c as u32).map_or(false, |c| c.is_whitespace())
    }
    #[no_mangle]
    pub unsafe extern "C" fn iswalnum(c: c_int) -> bool {
        char::from_u32(c as u32).map_or(false, |c| c.is_alphanumeric())
    }
    #[no_mangle]
    pub unsafe extern "C" fn iswlower(c: c_int) -> bool {
        char::from_u32(c as u32).map_or(false, |c| c.is_lowercase())
    }
    #[no_mangle]
    pub unsafe extern "C" fn iswdigit(c: c_int) -> bool {
        char::from_u32(c as u32).map_or(false, |c| c.is_digit(10))
    }
    #[no_mangle]
    pub unsafe extern "C" fn iswalpha(c: c_int) -> bool {
        char::from_u32(c as u32).map_or(false, |c| c.is_alphabetic())
    }
}
#[link(wasm_import_module = "typst_env")]
extern "C" {
    #[link_name = "wasm_minimal_protocol_send_result_to_host"]
    fn __send_result_to_host(ptr: *const u8, len: usize);
    #[link_name = "wasm_minimal_protocol_write_args_to_buffer"]
    fn __write_args_to_buffer(ptr: *mut u8);
}
trait __BytesOrResultBytes {
    type Err;
    fn convert(self) -> ::std::result::Result<Vec<u8>, Self::Err>;
}
impl __BytesOrResultBytes for Vec<u8> {
    type Err = i32;
    fn convert(
        self,
    ) -> ::std::result::Result<Vec<u8>, <Self as __BytesOrResultBytes>::Err> {
        Ok(self)
    }
}
impl<E> __BytesOrResultBytes for ::std::result::Result<Vec<u8>, E> {
    type Err = E;
    fn convert(
        self,
    ) -> ::std::result::Result<Vec<u8>, <Self as __BytesOrResultBytes>::Err> {
        self
    }
}
pub fn apply_query(
    code: &[u8],
    language: &[u8],
    query: &[u8],
    capture_index: &[u8],
) -> Vec<u8> {
    let query = unsafe { std::str::from_utf8_unchecked(query) };
    let capture_index = capture_index[0] as u32;
    let result = apply_query_inner(code, language, query, capture_index);
    "hello".as_bytes().to_vec()
}
#[export_name = "apply_query"]
pub extern "C" fn __wasm_minimal_protocol_internal_function_apply_query(
    __code_idx: usize,
    __language_idx: usize,
    __query_idx: usize,
    __capture_index_idx: usize,
) -> i32 {
    let __total_len = __code_idx + __language_idx + __query_idx + __capture_index_idx
        + 0;
    let mut __unsplit_params = ::alloc::vec::from_elem(0u8, __total_len);
    unsafe {
        __write_args_to_buffer(__unsplit_params.as_mut_ptr());
    }
    let code: &[u8] = &__unsplit_params[0usize..0usize + __code_idx];
    let language: &[u8] = &__unsplit_params[0usize
        + __code_idx..0usize + __code_idx + __language_idx];
    let query: &[u8] = &__unsplit_params[0usize + __code_idx
        + __language_idx..0usize + __code_idx + __language_idx + __query_idx];
    let capture_index = &__unsplit_params[0usize + __code_idx + __language_idx
        + __query_idx..];
    let result = __BytesOrResultBytes::convert(
        apply_query(code, language, query, capture_index),
    );
    let (message, code) = match result {
        Ok(s) => (s.into_boxed_slice(), 0),
        Err(err) => (err.to_string().into_bytes().into_boxed_slice(), 1),
    };
    unsafe {
        __send_result_to_host(message.as_ptr(), message.len());
    }
    code
}
fn apply_query_inner(
    code: &[u8],
    language: &[u8],
    query: &str,
    capture_index: u32,
) -> String {
    let mut parser = Parser::new();
    "".to_string()
}
fn language_for_tag(language: &[u8]) -> Language {
    match language {
        b"csharp" => tree_sitter_c_sharp::language(),
        b"cpp" => tree_sitter_cpp::language(),
        b"java" => tree_sitter_java::language(),
        b"python" => tree_sitter_python::language(),
        b"rust" => tree_sitter_rust::language(),
        _ => {
            ::core::panicking::panic_fmt(
                format_args!("unknown language: {0:?}", language),
            );
        }
    }
}

Am I using this library wrong? It is hard to get more error messages. I cannot call the function directly using tools like wasmtime because of the API wrapping done to make it compatible with typst.

However, running the same code on my host machine (linux) works perfectly and returns hello. That's why I think the issue lies in the protocol library here.

mainrs avatar Apr 24 '24 16:04 mainrs