Search code examples
rustffilibcgribreqwest

Get C FILE pointer from bytes::Bytes in Rust


I would like to read a GRIB file downloaded from server using ecCodes library in Rust. However, my current solution results in segmentation fault. The extracted example, replicating the problem, is below.

I download the file using reqwest crate and get the response as Bytes1 using bytes(). To read the file with ecCodes I need to create a codes_handle using codes_grib_handle_new_from_file()2, which as argument requires *FILE usually get from fopen(). However, I would like to skip IO operations. So I figured I could use libc::fmemopen() to get *FILE from Bytes. But when I pass the *mut FILE from fmemopen() to codes_grib_handle_new_from_file() segmentation fault occurs.

I suspect the issue is when I get from Bytes a *mut c_void required by fmemopen(). I figured I can do this like that:

//get a *mut c_void pointer fom Bytes
//file has &Bytes type
let mut buf = BytesMut::from(file.as_ref());
let ptr = buf.as_mut_ptr();
let ptr = ptr as *mut c_void;

Because *mut is required, I create BytesMut from which I can then get mut pointer. I think those conversion are problematic, because in debugger info ptr contains a diffrent memory adress than ptr field of file.

Using *FILE got from libc::fopen() for the same file does not result in segfault. So the problem is somwhere around fmemopen().

The ecCodes library is correctly built (passes all tests and works in C) and linked (the calls in callstack are correct).

The full extracted example:

#![allow(unused)]
#![allow(non_camel_case_types)]

use bytes::{Bytes, BytesMut};
use libc::{c_char, c_void, fmemopen, size_t, FILE};
use reqwest;
use tokio;

// generated by bindgen
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct codes_handle {
    _unused: [u8; 0],
}

// generated by bindgen
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct codes_context {
    _unused: [u8; 0],
}

#[tokio::main]
async fn main() {
    // download the grib file from server
    // then get response as bytes
    let url = "https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/gfs.20210612/00/atmos/gfs.t00z.pgrb2.1p00.f000";
    let file = reqwest::get(url).await.unwrap().bytes().await.unwrap();

    // get Bytes from *FILE with fmemopen
    // file must outlive the pointer so it is borrowed here
    let file_handle = open_with_fmemopen(&file);

    let grib_handle = open_with_codes(file_handle);
}

pub fn open_with_fmemopen(file: &Bytes) -> *mut FILE {
    // size of buffer and mode to be read with
    let size = file.len() as size_t;
    let mode = "r".as_ptr() as *const c_char;

    // get a *mut c_void pointer fom Bytes
    let mut buf = BytesMut::from(file.as_ref());
    let ptr = buf.as_mut_ptr();
    let ptr = ptr as *mut c_void;

    // get *FILE with fmemopen
    let obj;
    unsafe {
        obj = fmemopen(ptr, size, mode);
    }
    
    obj
}

pub fn open_with_codes(file_handle: *mut FILE) -> *mut codes_handle {
    
    // default context for ecCodes
    let context: *mut codes_context = std::ptr::null_mut();

    // variable to hold error code
    let mut error: i32 = 0;

    // get codes_handle from *FILE
    let grib_handle;
    unsafe {
        // segmentation fault occurs here
        grib_handle = codes_grib_handle_new_from_file(context, file_handle, &mut error as *mut i32);
    }

    grib_handle
}

// binding to ecCodes C library
#[link(name = "eccodes")]
extern "C" {
    pub fn codes_grib_handle_new_from_file(
        c: *mut codes_context,
        f: *mut FILE,
        error: *mut i32,
    ) -> *mut codes_handle;
}

And because the example might require considerable effort to set up I also attach the call stack from GDB of the seg fault:

__memmove_avx_unaligned_erms 0x00007f738b415fa6
fmemopen_read 0x00007f738b31c9b4
_IO_new_file_underflow 0x00007f738b31fd51
__GI___underflow 0x00007f738b32142e
__GI___underflow 0x00007f738b32142e
__GI__IO_default_xsgetn 0x00007f738b32142e
__GI__IO_fread 0x00007f738b312493
stdio_read 0x00007f738bb8db37
_read_any 0x00007f738bb8cf1b
read_any 0x00007f738bb8cfa3
_wmo_read_any_from_file_malloc 0x00007f738bb8e6f7
wmo_read_grib_from_file_malloc 0x00007f738bb8e7d7
grib_handle_new_from_file_no_multi 0x00007f738bb872a2
grib_new_from_file 0x00007f738bb8678f
grib_handle_new_from_file 0x00007f738bb85998
codes_grib_handle_new_from_file 0x00007f738bb8532b
example::open_with_codes main.rs:68
example::main::{{closure}} main.rs:34
core::future::from_generator::{{impl}}::poll<generator-0> mod.rs:80
tokio::park::thread::{{impl}}::block_on::{{closure}}<core::future::from_generator::GenFuture<generator-0>> thread.rs:263
tokio::coop::with_budget::{{closure}}<core::task::poll::Poll<()>,closure-0> coop.rs:106
std::thread::local::LocalKey<core::cell::Cell<tokio::coop::Budget>>::try_with<core::cell::Cell<tokio::coop::Budget>,closure-0,core::task::poll::Poll<()>> local.rs:272
std::thread::local::LocalKey<core::cell::Cell<tokio::coop::Budget>>::with<core::cell::Cell<tokio::coop::Budget>,closure-0,core::task::poll::Poll<()>> local.rs:248
tokio::coop::with_budget<core::task::poll::Poll<()>,closure-0> coop.rs:99
tokio::coop::budget<core::task::poll::Poll<()>,closure-0> coop.rs:76
tokio::park::thread::CachedParkThread::block_on<core::future::from_generator::GenFuture<generator-0>> thread.rs:263
tokio::runtime::enter::Enter::block_on<core::future::from_generator::GenFuture<generator-0>> enter.rs:151
tokio::runtime::thread_pool::ThreadPool::block_on<core::future::from_generator::GenFuture<generator-0>> mod.rs:71
tokio::runtime::Runtime::block_on<core::future::from_generator::GenFuture<generator-0>> mod.rs:452
example::main main.rs:34
core::ops::function::FnOnce::call_once<fn(),()> function.rs:227
std::sys_common::backtrace::__rust_begin_short_backtrace<fn(),()> backtrace.rs:125
std::rt::lang_start::{{closure}}<()> rt.rs:66
core::ops::function::impls::{{impl}}::call_once<(),Fn<()>> function.rs:259
std::panicking::try::do_call<&Fn<()>,i32> panicking.rs:379
std::panicking::try<i32,&Fn<()>> panicking.rs:343
std::panic::catch_unwind<&Fn<()>,i32> panic.rs:431
std::rt::lang_start_internal rt.rs:51
std::rt::lang_start<()> rt.rs:65
main 0x0000560f1d93c76c
__libc_start_main 0x00007f738b2bb565
_start 0x0000560f1d935f0e

1 From bytes crate, not std::io
2 grib_handle returned by the function is just an alias of codes_handle


Solution

  • 1- Try changing

    let mode = "r".as_ptr() as *const c_char;
    

    to

    let mode = "r\0".as_ptr() as *const c_char;
    

    Rust's &str is not null-terminated, while you're passing it to C where string literals are expected to be null-terminated.

    2- Try the following implementation for open_with_fmemopen:

    pub fn open_with_fmemopen(file: &Bytes) -> *mut FILE {
        unsafe {
            let obj = fmemopen(file.as_ref() as *const _ as _, file.len(), "r\0".as_ptr() as _);
            obj
        }
    }