The Unfun Cat The Unfun Cat - 1 month ago 9
Python Question

Dereference FFI pointer in Python to get underlying array

I have a C FFI written in Rust, called

src/lib.rs
that looks like the following:

// compile with $ cargo build

extern crate libc;
use self::libc::{size_t, int32_t};

use std::cmp::min;
use std::slice;

#[no_mangle]
pub extern "C" fn rle_new(values_data: *const int32_t, values_length: size_t) -> *mut Rle {
let values = unsafe { slice::from_raw_parts(values_data, values_length as usize).to_vec() };

return Box::into_raw(Box::new(Rle::new(values)));

}

#[no_mangle]
pub extern "C" fn rle_free(ptr: *mut Rle) {
if ptr.is_null() {
return;
}
unsafe {
Box::from_raw(ptr);
}
}

#[no_mangle]
pub extern "C" fn rle_values_size(rle: *mut Rle) -> int32_t {
unsafe { (*rle).values.len() as i32 }
}

#[no_mangle]
pub extern "C" fn rle_values(rle: *mut Rle) -> *mut int32_t {
unsafe { &mut (*rle).values[0] }
}


#[derive(Debug, PartialEq)]
pub struct Rle {
pub values: Vec<i32>,
}


impl Rle {
pub fn new(values: Vec<i32>) -> Self {
return Rle { values: values };
}
}


This is my Cargo.toml in the project base folder:

[package]
name = "minimal_example"
version = "0.1.0"
authors = ["Dumbass"]

[dependencies]
libc = "0.2.16"

[lib]
crate-type = ["dylib"] # you might need a different type on linux/windows ?


This is the Python code calling Rust, also put in the base folder:

import os
import sys, ctypes
from ctypes import c_char_p, c_uint32, Structure, POINTER, c_int32, c_size_t, pointer

class RleS(Structure):
pass

prefix = {'win32': ''}.get(sys.platform, 'lib')
extension = {'darwin': '.dylib', 'win32': '.dll'}.get(sys.platform, '.so')
libpath = os.environ.get("LD_LIBRARY_PATH", "target/debug") + "/"
libpath = libpath + prefix + "minimal_example" + extension

try:
lib = ctypes.cdll.LoadLibrary(libpath)
except OSError:
print("Library not found at " + libpath)
sys.exit()

lib.rle_new.restype = POINTER(RleS)

lib.rle_free.argtypes = (POINTER(RleS), )

lib.rle_values.argtypes = (POINTER(RleS), )
lib.rle_values.restypes = POINTER(c_int32)

lib.rle_values_size.argtypes = (POINTER(RleS), )
lib.rle_values_size.restypes = c_int32


class Rle:
def __init__(self, values):

values_length = len(values)

values_array = (c_int32 * len(values))(*values)

self.obj = lib.rle_new(values_array, c_size_t(values_length))

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
lib.rle_free(self.obj)

def __str__(self):
values_size = lib.rle_values_size(self.obj)
print(values_size, "values_size") # prints correct value

values_pointer = lib.rle_values(self.obj)
print("values_pointer:", values_pointer)
ar = ctypes.cast(values_pointer, ctypes.POINTER(ctypes.c_int32)).contents
print(ar) # segfaults!

rle = Rle([1, 1, 2] * 10)
print(rle)


I have good reason to believe that the C code is correct, since the
rle_values_size
and
rle_values
refer to the same object, namely a Rust vector within a struct, and the
rle_values_size
function works.

However, when I try to dereference the pointer given by
rle_values
and read it as an array I get segfaults.

I have tried every single permutation of code snippets I have found on Stack Overflow, but it segfaults.

Why is this crashing? What am I doing wrong?

I added the Rust tag since I might be getting the address of the vector in the wrong way.

Ps. If somebody also knows how to read this directly into a numpy array I would upvote that too.

Background info: How do I return an array in a pub extern "C" fn?

Answer

The cast should be the first warning sign. Why do you have to cast from the type to what should be the same type? This is because there are simple typos:

lib.rle_values.restype = POINTER(c_int32)    
lib.rle_values_size.restype = c_int32

Note that it's supposed to be restype, not restypes.

def __str__(self):
    values_size = lib.rle_values_size(self.obj)
    print(values_size, "values_size")

    values_pointer = lib.rle_values(self.obj)
    print("values_pointer:", values_pointer)

    thing = values_pointer[:values_size]
    return str(thing)

It's also better to use as_mut_ptr:

#[no_mangle]
pub extern "C" fn rle_values(rle: *mut Rle) -> *mut int32_t {
    let mut rle = unsafe { &mut *rle };
    rle.values.as_mut_ptr()
}

Running the program appears to work:

$ LD_LIBRARY_PATH=$PWD/target/debug/ python3 main.py
new
30 values_size
values_pointer: <__main__.LP_c_int object at 0x10f124048>
[1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2]

I'd also recommend:

  • the default ctypes return value is a cint. Not specifying a return type for free is probably not a good idea, as it should be void.
  • return an unsigned number for the length of the data; what would -53 items mean?
  • reduce the scope of the unsafe blocks to just the part that is unsafe and the code that ensures it is actually safe.
  • speaking of which, you could check for NULL pointers in each function.

    #[no_mangle]
    pub extern "C" fn rle_values_size(rle: *mut Rle) -> int32_t {
        match unsafe { rle.as_ref() } {
            Some(rle) => rle.values.len() as i32,
            None => 0,
        }
    }
    
    #[no_mangle]
    pub extern "C" fn rle_values(rle: *mut Rle) -> *mut int32_t {
        match unsafe { rle.as_mut() } {
            Some(mut rle) => rle.values.as_mut_ptr(),
            None => ptr::null_mut(),
        }
    }