diff --git a/pgx-pg-sys/cshim/pgx-cshim.c b/pgx-pg-sys/cshim/pgx-cshim.c index 6952812f7..18ee4da79 100644 --- a/pgx-pg-sys/cshim/pgx-cshim.c +++ b/pgx-pg-sys/cshim/pgx-cshim.c @@ -25,6 +25,7 @@ Use of this source code is governed by the MIT license that can be found in the #include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/builtins.h" +#include "utils/array.h" PGDLLEXPORT MemoryContext pgx_GetMemoryContextChunk(void *ptr); @@ -110,3 +111,33 @@ PGDLLEXPORT char *pgx_GETSTRUCT(HeapTuple tuple); char *pgx_GETSTRUCT(HeapTuple tuple) { return GETSTRUCT(tuple); } + +PGDLLEXPORT char *pgx_ARR_DATA_PTR(ArrayType *arr); +char *pgx_ARR_DATA_PTR(ArrayType *arr) { + return ARR_DATA_PTR(arr); +} + +PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr); +int pgx_ARR_NELEMS(ArrayType *arr) { + return ArrayGetNItems(arr->ndim, ARR_DIMS(arr)); +} + +PGDLLEXPORT bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr); +bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr) { + return ARR_NULLBITMAP(arr); +} + +PGDLLEXPORT int pgx_ARR_NDIM(ArrayType *arr); +int pgx_ARR_NDIM(ArrayType *arr) { + return ARR_NDIM(arr); +} + +PGDLLEXPORT bool pgx_ARR_HASNULL(ArrayType *arr); +bool pgx_ARR_HASNULL(ArrayType *arr) { + return ARR_HASNULL(arr); +} + +PGDLLEXPORT int *pgx_ARR_DIMS(ArrayType *arr); +int *pgx_ARR_DIMS(ArrayType *arr){ + return ARR_DIMS(arr); +} diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 07194d577..3e6b73ba6 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,6 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ +use pgx::array::RawArray; use pgx::*; use serde_json::*; @@ -99,6 +100,44 @@ fn return_zero_length_vec() -> Vec { Vec::new() } +#[pg_extern] +fn get_arr_nelems(arr: Array) -> libc::c_int { + // SAFETY: Eh it's fine, it's just a len check. + unsafe { RawArray::from_array(arr) }.unwrap().len() as _ +} + +#[pg_extern] +fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { + // SAFETY: this is Known to be an Array from ArrayType, + // and it's valid-ish to see any bitpattern of an i32 inbounds of a slice. + unsafe { + let raw = RawArray::from_array(arr).unwrap().data::(); + let slice = &(*raw.as_ptr()); + slice.get(elem as usize).copied() + } +} + +#[pg_extern] +fn display_get_arr_nullbitmap(arr: Array) -> String { + let raw = unsafe { RawArray::from_array(arr) }.unwrap(); + + if let Some(slice) = raw.nulls() { + // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, + // so reborrow NonNull<[u8]> as &[u8] for the hot second we're looking at it. + let slice = unsafe { &*slice.as_ptr() }; + // might panic if the array is len 0 + format!("{:#010b}", slice[0]) + } else { + String::from("") + } +} + +#[pg_extern] +fn get_arr_ndim(arr: Array) -> libc::c_int { + // SAFETY: This is a valid ArrayType and it's just a field access. + unsafe { RawArray::from_array(arr) }.unwrap().dims().len() as _ +} + #[pg_extern] fn over_implicit_drop() -> Vec { // Create an array of exactly Datum-sized numbers. @@ -255,6 +294,51 @@ mod tests { assert_eq!(json.0, json! {{"values": [1, 2, 3, null, 4]}}); } + #[pg_test] + fn test_arr_data_ptr() { + let len = Spi::get_one::("SELECT get_arr_nelems('{1,2,3,4,5}'::int[])") + .expect("failed to get SPI result"); + + assert_eq!(len, 5); + } + + #[pg_test] + fn test_get_arr_data_ptr_nth_elem() { + let nth = Spi::get_one::("SELECT get_arr_data_ptr_nth_elem('{1,2,3,4,5}'::int[], 2)") + .expect("failed to get SPI result"); + + assert_eq!(nth, 3); + } + + #[pg_test] + fn test_display_get_arr_nullbitmap() { + let bitmap_str = Spi::get_one::( + "SELECT display_get_arr_nullbitmap(ARRAY[1,NULL,3,NULL,5]::int[])", + ) + .expect("failed to get SPI result"); + + assert_eq!(bitmap_str, "0b00010101"); + + let bitmap_str = + Spi::get_one::("SELECT display_get_arr_nullbitmap(ARRAY[1,2,3,4,5]::int[])") + .expect("failed to get SPI result"); + + assert_eq!(bitmap_str, ""); + } + + #[pg_test] + fn test_get_arr_ndim() { + let ndim = Spi::get_one::("SELECT get_arr_ndim(ARRAY[1,2,3,4,5]::int[])") + .expect("failed to get SPI result"); + + assert_eq!(ndim, 1); + + let ndim = Spi::get_one::("SELECT get_arr_ndim('{{1,2,3},{4,5,6}}'::int[])") + .expect("failed to get SPI result"); + + assert_eq!(ndim, 2); + } + #[pg_test] fn test_array_over_direct() { let vals = crate::tests::array_tests::over_implicit_drop(); diff --git a/pgx/src/array.rs b/pgx/src/array.rs new file mode 100644 index 000000000..3eab9281f --- /dev/null +++ b/pgx/src/array.rs @@ -0,0 +1,265 @@ +use crate::datum::{Array, FromDatum}; +use crate::pg_sys; +use core::ptr::{slice_from_raw_parts_mut, NonNull}; +use core::slice; +use pgx_pg_sys::*; + +extern "C" { + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; + /// # Safety + /// Must only be used on a "valid" (Postgres-constructed) ArrayType + fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; +} + +/** +An aligned, dereferenceable `NonNull` with low-level accessors. + +It offers safe accessors to the fields of [pg_sys::ArrayType] and mostly-safe accessors +to the "dynamic fields" of the defined Postgres varlena array, but only requires validity +of ArrayType itself and the dimensions slice (always valid if `ndim == 0`). +This means the [NonNull] pointers that are returned may not be valid to read. +Validating the correctness of the entire array requires a bit more effort. + +It is not Copy or Clone to make it slightly harder to misuse versus *mut ArrayType. +However, `&mut self` accessors do not give lifetimes to returned [`NonNull<[T]>`][nonnull]! +Instead, these are raw pointers, and `&mut RawArray` only makes `&RawArray` safer. + +The reason RawArray works almost entirely with raw pointers is that +it is not currently valid to go from `&mut ArrayType` to `*mut ArrayType`, +take an offset beyond ArrayType's fields, and then create a new slice there +and read from that. The result is currently undefined behavior, +though with emphasis on "undefined": it may become defined in the future of Rust. + +At the current moment, however, it is best to exercise an abundance of caution. + +# On sizes and subscripts + +Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses [usize]. +Thus various functions of RawArray return `c_int` values, but you must convert to usize. +On 32-bit or 64-bit machines with 32-bit `c_int`s, you may losslessly upgrade `as usize`, +except with negative indices, which Postgres asserts against creating. +PGX currently only intentionally supports 64-bit machines, +and while support for ILP32 or I64LP128 C data models may become possible, +PGX will **not** support 16-bit machines in any practical case, even though Rust does. + +[nonnull]: core::ptr::NonNull +*/ +#[derive(Debug)] +pub struct RawArray { + ptr: NonNull, + len: usize, +} + +#[deny(unsafe_op_in_unsafe_fn)] +impl RawArray { + /** + Returns a handle to the raw array header. + + # Safety + + When calling this method, you have to ensure that all of the following is true: + * The pointer must be properly aligned. + * It must be "dereferenceable" in the sense defined in [the std documentation]. + * The pointer must point to an initialized instance of [pg_sys::ArrayType]. + * The `ndim` field must be a correct value, or **0**, so `dims` is aligned and readable, + or no data is actually read at all. + * This is a unique, "owning pointer" for the varlena, so it won't be aliased while held, + and it points to data in the Postgres ArrayType format. + + It should be noted that despite all these requirements, RawArray has no lifetime, + nor produces slices with such, so it can still be racy and unsafe! + + [the std documentation]: core::ptr#safety + */ + pub unsafe fn from_ptr(ptr: NonNull) -> RawArray { + // SAFETY: Validity asserted by the caller. + let len = unsafe { pgx_ARR_NELEMS(ptr.as_ptr()) } as usize; + RawArray { ptr, len } + } + + /// # Safety + /// Array must have been made from an ArrayType pointer, + /// or a null value, as-if [RawArray::from_ptr]. + pub unsafe fn from_array(arr: Array) -> Option { + let array_type = arr.into_array_type() as *mut _; + // SAFETY: Validity asserted by the caller. + let len = unsafe { pgx_ARR_NELEMS(array_type) } as usize; + Some(RawArray { + ptr: NonNull::new(array_type)?, + len, + }) + } + + /// Returns the inner raw pointer to the ArrayType. + pub fn into_raw(self) -> NonNull { + self.ptr + } + + /// Get the number of dimensions. + /// Will be in 0..=[pg_sys::MAXDIM]. + fn ndim(&self) -> libc::c_int { + // SAFETY: Validity asserted on construction. + unsafe { + (*self.ptr.as_ptr()).ndim + /* + FIXME: While this is a c_int, the max ndim is normally 6 + While the value can be set higher, it is... unlikely + that it is going to actually challenge even 16-bit pointer widths. + It would be preferable to return a usize instead, + however, PGX has trouble with that, unfortunately. + */ + as _ + } + } + + /** + A slice of the dimensions. + + Oxidized form of [ARR_DIMS(ArrayType*)][ARR_DIMS]. + The length will be within 0..=[pg_sys::MAXDIM]. + + Safe to use because validity of this slice was asserted on construction. + + [ARR_DIMS]: + */ + pub fn dims(&self) -> &[libc::c_int] { + /* + SAFETY: Welcome to the infernal bowels of FFI. + Because the initial ptr was NonNull, we can assume this is also NonNull. + Validity of the ptr and ndim field was asserted on construction of RawArray, + so can assume the dims ptr is also valid, allowing making the slice. + */ + unsafe { + let ndim = self.ndim() as usize; + slice::from_raw_parts(pgx_ARR_DIMS(self.ptr.as_ptr()), ndim) + } + } + + /// The flattened length of the array over every single element. + /// Includes all items, even the ones that might be null. + pub fn len(&self) -> usize { + self.len + } + + /// Accessor for ArrayType's elemtype. + pub fn oid(&self) -> pg_sys::Oid { + // SAFETY: Validity asserted on construction. + unsafe { (*self.ptr.as_ptr()).elemtype } + } + + /// Gets the offset to the ArrayType's data. + /// Should not be "taken literally". + fn data_offset(&self) -> i32 { + // SAFETY: Validity asserted on construction. + unsafe { (*self.ptr.as_ptr()).dataoffset } + // This field is an "int32" in Postgres + } + + /** + Equivalent to [ARR_HASNULL(ArrayType*)][ARR_HASNULL]. + + Note this means that it only asserts that there MIGHT be a null + + [ARR_HASNULL]: + */ + #[allow(unused)] + fn nullable(&self) -> bool { + self.data_offset() != 0 + } + + /** + Oxidized form of [ARR_NULLBITMAP(ArrayType*)][ARR_NULLBITMAP] + + If this returns None, the array cannot have nulls. + If this returns Some, it points to the bitslice that marks nulls in this array. + + Note that unlike the `is_null: bool` that appears elsewhere, here a 0 bit is null, + or possibly out of bounds for the final byte of the bitslice. + + Note that if this is None, that does not mean it's always okay to read! + If len is 0, then this slice will be valid for 0 bytes. + + [ARR_NULLBITMAP]: + */ + pub fn nulls(&self) -> Option> { + let len = self.len + 7 >> 3; // Obtains 0 if len was 0. + + /* + SAFETY: This obtains the nulls pointer, which is valid to obtain because + the len was asserted on construction. However, unlike the other cases, + it isn't correct to trust it. Instead, this gets null-checked. + This is because, while the initial pointer is NonNull, + ARR_NULLBITMAP can return a nullptr! + */ + NonNull::new(unsafe { + slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.ptr.as_ptr()), len) + }) + } + + /** + Oxidized form of [ARR_DATA_PTR(ArrayType*)][ARR_DATA_PTR] + + # Safety + + While this function is safe to call, using the slice may risk undefined behavior. + The raw slice is not guaranteed to be legible at any given index as T, + e.g. it may be an "SQL null" if so indicated in the null bitmap. + As a result, it is dangerous to reborrow this as `&[T]` or `&mut [T]` + unless the type considers all bitpatterns to be valid values. + + That is the primary reason this returns [`NonNull<[T]>`][nonnull]. If it returned `&mut [T]`, + then for many possible types that can be **undefined behavior**, + as it would assert each particular index was a valid `T`. + A Rust borrow, including of a slice, will always be + * non-null + * aligned + * **validly initialized**, except in the case of [MaybeUninit] types + It is reasonable to assume data Postgres exposes logically to SQL is initialized, + but it may be incorrect to assume data Postgres has marked "null" + otherwise follows Rust-level initialization requirements. + + As Postgres handles alignment requirements in its own particular ways, + it is up to you to validate that each index is aligned correctly. + The first element should be correctly aligned to the type, but that is not certain. + Successive indices are even less likely to match the data type you want + unless Postgres also uses an identical layout. + + This returns a slice to make it somewhat harder to fail to read it correctly. + However, it should be noted that a len 0 slice may not be read via raw pointers. + + [MaybeUninit]: core::mem::MaybeUninit + [nonnull]: core::ptr::NonNull + [ARR_DATA_PTR]: + */ + pub fn data(&mut self) -> NonNull<[T]> { + /* + SAFETY: Welcome to the infernal bowels of FFI. + Because the initial ptr was NonNull, we can assume this is also NonNull. + As validity of the initial ptr was asserted on construction of RawArray, + this can assume the data ptr is also valid, or harmlessly incorrect. + + This code doesn't assert validity per se, but in practice, + the caller may immediately turn this into a borrowed slice, + opening up the methods that are available on borrowed slices. + This is fine as long as the caller heeds the caveats already given. + In particular, for simply sized and aligned data, where alignment is the size + (e.g. u8, i16, f32, u64), and there are no invalid bitpatterns to worry about, + the caller can almost certainly go to town with it, + needing only their initial assertion regarding the type being correct. + */ + unsafe { + NonNull::new_unchecked(slice_from_raw_parts_mut( + pgx_ARR_DATA_PTR(self.ptr.as_ptr()).cast(), + self.len, + )) + } + } +} diff --git a/pgx/src/lib.rs b/pgx/src/lib.rs index 50e57e6aa..ebce6c13d 100644 --- a/pgx/src/lib.rs +++ b/pgx/src/lib.rs @@ -46,6 +46,7 @@ pub mod itemptr; pub mod list; #[macro_use] pub mod log; +pub mod array; pub mod atomics; pub mod bgworkers; pub mod heap_tuple;