Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low-level interop for PG arrays #636

Merged
merged 37 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e8650f7
c-shims for PG array functions
mhov Aug 17, 2022
39566c0
suggested changes for shims
mhov Aug 19, 2022
98196df
Merge branch 'develop' into array-shims
workingjubilee Aug 23, 2022
4eea77c
Fix cshim exports
workingjubilee Aug 24, 2022
9cffc1b
Introduce RawArray abstraction
workingjubilee Aug 24, 2022
48176a2
Port ARR_HASNULL to RawArray
workingjubilee Aug 24, 2022
82790eb
Clarify comment
workingjubilee Aug 24, 2022
d67dbf0
Introduce fat pointer fns into RawArray
workingjubilee Aug 24, 2022
431e89e
Port ARR_DATA_PTR to RawArray::data_slice
workingjubilee Aug 24, 2022
a60a6dd
Clarify safety requirements for RawArray::dims
workingjubilee Aug 24, 2022
dfcfbb6
Clarify safety requirements for RawArray::data
workingjubilee Aug 24, 2022
e5fc2f5
Cleanup
workingjubilee Aug 24, 2022
48ccccb
Fix RawArray::data_offset signature
workingjubilee Aug 24, 2022
11c5a63
Fix testing
workingjubilee Aug 24, 2022
d74719c
Port ARR_NULLBITMAP to RawArray::nulls
workingjubilee Aug 24, 2022
6900356
Document test safety remarks
workingjubilee Aug 24, 2022
bbad726
sub pub from pgx::array extern fn
workingjubilee Aug 24, 2022
133b44d
Explain Rust type init requirements
workingjubilee Aug 25, 2022
59dac25
Expand on RawArray description
workingjubilee Aug 25, 2022
0da6f1c
Note reborrow in test
workingjubilee Aug 25, 2022
7bfa2e4
Remove extra comment in pgx::array
workingjubilee Aug 25, 2022
55f62ac
Introduce even-lower-level ArrayPtr
workingjubilee Aug 26, 2022
b4a7364
Reformat
workingjubilee Aug 26, 2022
f763c0a
Rough draft of ArrayPtr and RawArray
workingjubilee Aug 26, 2022
b0b3477
Format again
workingjubilee Aug 26, 2022
639d0b4
Merge back into RawArray
workingjubilee Aug 26, 2022
55deefe
Cleanup fn that might be dupes or confusing
workingjubilee Aug 26, 2022
1785c07
Clean up RawArray docs and comments
workingjubilee Aug 26, 2022
16cf849
Add dims_mut, to see if it works
workingjubilee Aug 26, 2022
e2a3509
Cleanup and explanations for RawArray::{dims_mut, nulls}
workingjubilee Aug 26, 2022
9979f77
Format
workingjubilee Aug 26, 2022
f15eabd
Add hint about lens
workingjubilee Aug 26, 2022
b2c9508
Remove unnecessary unsafe on test
workingjubilee Aug 26, 2022
4ba1b0d
Lift remarks into public docs
workingjubilee Aug 26, 2022
5ab4ba8
One last format
workingjubilee Aug 26, 2022
fd3c21d
One last cleanup
workingjubilee Aug 26, 2022
1dd9155
Remove dubious dims_mut function
workingjubilee Aug 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pgx-pg-sys/cshim/pgx-cshim.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Use of this source code is governed by the MIT license that can be found in the
#include "parser/parsetree.h"
#include "utils/memutils.h"
#include "utils/builtins.h"
#include "utils/array.h"


PGDLLEXPORT MemoryContext pgx_GetMemoryContextChunk(void *ptr);
Expand Down Expand Up @@ -110,3 +111,33 @@ PGDLLEXPORT char *pgx_GETSTRUCT(HeapTuple tuple);
char *pgx_GETSTRUCT(HeapTuple tuple) {
return GETSTRUCT(tuple);
}

PGDLLEXPORT char *pgx_ARR_DATA_PTR(ArrayType *arr);
char *pgx_ARR_DATA_PTR(ArrayType *arr) {
return ARR_DATA_PTR(arr);
}

PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr);
int pgx_ARR_NELEMS(ArrayType *arr) {
return ArrayGetNItems(arr->ndim, ARR_DIMS(arr));
}

PGDLLEXPORT bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr);
bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr) {
return ARR_NULLBITMAP(arr);
}

PGDLLEXPORT int pgx_ARR_NDIM(ArrayType *arr);
int pgx_ARR_NDIM(ArrayType *arr) {
return ARR_NDIM(arr);
}

PGDLLEXPORT bool pgx_ARR_HASNULL(ArrayType *arr);
bool pgx_ARR_HASNULL(ArrayType *arr) {
return ARR_HASNULL(arr);
}

PGDLLEXPORT int *pgx_ARR_DIMS(ArrayType *arr);
int *pgx_ARR_DIMS(ArrayType *arr){
return ARR_DIMS(arr);
}
86 changes: 85 additions & 1 deletion pgx-tests/src/tests/array_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ All rights reserved.
Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

use pgx::*;
use core::ptr::NonNull;
use pgx::array::RawArray;
use pgx::{pg_sys::ArrayType, *};
use serde_json::*;

#[pg_extern(name = "sum_array")]
Expand Down Expand Up @@ -99,6 +101,43 @@ fn return_zero_length_vec() -> Vec<i32> {
Vec::new()
}

#[pg_extern]
fn get_arr_nelems(arr: Array<i32>) -> libc::c_int {
unsafe { RawArray::from_array(arr).unwrap().len() }
}

#[pg_extern]
fn get_arr_data_ptr_nth_elem(arr: Array<i32>, elem: i32) -> Option<i32> {
unsafe {
let raw = RawArray::from_array(arr).unwrap().data_slice::<i32>();
let slice = &(*raw.as_ptr());
slice.get(elem as usize).copied()
}
}

#[pg_extern]
fn display_get_arr_nullbitmap(arr: Array<i32>) -> String {
let arr_type = arr.into_array_type();

if unsafe { RawArray::from_raw(NonNull::new_unchecked(arr_type.clone() as *mut _)).nullable() }
{
let bitmap_slice = array::get_arr_nullbitmap(arr_type as *mut ArrayType);
format!("{:#010b}", bitmap_slice[0])
} else {
String::from("")
}
}

#[pg_extern]
fn get_arr_ndim(arr: Array<i32>) -> libc::c_int {
unsafe { RawArray::from_array(arr).unwrap().ndims() }
}

#[pg_extern]
fn get_arr_hasnull(arr: Array<i32>) -> bool {
unsafe { RawArray::from_array(arr).unwrap().nullable() }
}

#[pg_extern]
fn over_implicit_drop() -> Vec<i64> {
// Create an array of exactly Datum-sized numbers.
Expand Down Expand Up @@ -255,6 +294,51 @@ mod tests {
assert_eq!(json.0, json! {{"values": [1, 2, 3, null, 4]}});
}

#[pg_test]
fn test_arr_data_ptr() {
let len = Spi::get_one::<i32>("SELECT get_arr_nelems('{1,2,3,4,5}'::int[])")
.expect("failed to get SPI result");

assert_eq!(len, 5);
}

#[pg_test]
fn test_get_arr_data_ptr_nth_elem() {
let nth = Spi::get_one::<i32>("SELECT get_arr_data_ptr_nth_elem('{1,2,3,4,5}'::int[], 2)")
.expect("failed to get SPI result");

assert_eq!(nth, 3);
}

#[pg_test]
fn test_display_get_arr_nullbitmap() {
let bitmap_str = Spi::get_one::<String>(
"SELECT display_get_arr_nullbitmap(ARRAY[1,NULL,3,NULL,5]::int[])",
)
.expect("failed to get SPI result");

assert_eq!(bitmap_str, "0b00010101");

let bitmap_str =
Spi::get_one::<String>("SELECT display_get_arr_nullbitmap(ARRAY[1,2,3,4,5]::int[])")
.expect("failed to get SPI result");

assert_eq!(bitmap_str, "");
}

#[pg_test]
fn test_get_arr_ndim() {
let ndim = Spi::get_one::<i32>("SELECT get_arr_ndim(ARRAY[1,2,3,4,5]::int[])")
.expect("failed to get SPI result");

assert_eq!(ndim, 1);

let ndim = Spi::get_one::<i32>("SELECT get_arr_ndim('{{1,2,3},{4,5,6}}'::int[])")
.expect("failed to get SPI result");

assert_eq!(ndim, 2);
}

#[pg_test]
fn test_array_over_direct() {
let vals = crate::tests::array_tests::over_implicit_drop();
Expand Down
166 changes: 166 additions & 0 deletions pgx/src/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use crate::datum::{Array, FromDatum};
use crate::pg_sys;
use core::ptr::{slice_from_raw_parts_mut, NonNull};
use pgx_pg_sys::*;

extern "C" {
pub fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32;
pub fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8;
pub fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8;
pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int;
}

#[inline]
pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] {
unsafe {
let len = (pgx_ARR_NELEMS(arr) + 7) / 8;
std::slice::from_raw_parts(pgx_ARR_NULLBITMAP(arr), len as usize)
}
}

#[inline]
pub fn get_arr_nullbitmap_mut<'a>(arr: *mut ArrayType) -> &'a mut [u8] {
unsafe {
let len = (pgx_ARR_NELEMS(arr) + 7) / 8;
std::slice::from_raw_parts_mut(pgx_ARR_NULLBITMAP(arr), len as usize)
}
}

/// Handle describing a bare, "untyped" pointer to an array,
/// offering safe accessors to the various fields of one.
#[repr(transparent)]
#[derive(Debug)]
pub struct RawArray {
at: NonNull<ArrayType>,
}

#[deny(unsafe_op_in_unsafe_fn)]
impl RawArray {
// General implementation notes:
// RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType.
// But this also offers safe accessors to the fields, like &ArrayType,
// so it requires validity assertions in order to be constructed.
// The main reason it uses NonNull and denies Clone, however, is access soundness:
// It is not sound to go from &mut Type to *mut T if *mut T is not a field of Type.
// This creates an obvious complication for handing out pointers into varlenas.
// Thus also why this does not use lifetime-bounded borrows.

/// Returns a handle to the raw array header.
///
/// # Safety
///
/// When calling this method, you have to ensure that all of the following is true:
/// * The pointer must be properly aligned.
/// * It must be "dereferenceable" in the sense defined in [the std documentation].
/// * The pointer must point to an initialized instance of `ArrayType`.
/// * You aren't going to alias the data like mad.
///
/// It should be noted as RawArray is not inherently lifetime-bound, it can be racy and unsafe!
///
/// [the std documentation]: core::ptr#safety
pub unsafe fn from_raw(at: NonNull<ArrayType>) -> RawArray {
// SAFETY: the caller must guarantee that `self` meets all the
// requirements for a mutable reference, as we're going to treat this like one.
RawArray { at }
}

/// # Safety
///
/// Array must have been constructed from an ArrayType pointer.
pub unsafe fn from_array<T: FromDatum>(arr: Array<T>) -> Option<RawArray> {
let array_type = arr.into_array_type() as *mut _;
Comment on lines +91 to +92
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

@workingjubilee workingjubilee Aug 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to change it to not do that NULL-check in a followup PR. Should I fix that now? It feels vaguely off to diff outside this addition.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nah that's fine, just leave this unresolved until the followup PR, it's nonblocking.

Some(RawArray {
at: NonNull::new(array_type)?,
})
}

/// Returns the inner raw pointer to the ArrayType.
pub fn into_raw(self) -> NonNull<ArrayType> {
self.at
}

/// Get the number of dimensions.
pub fn ndims(&self) -> libc::c_int {
// SAFETY: Validity asserted on construction.
unsafe {
(*self.at.as_ptr()).ndim
// FIXME: While this is a c_int, the max ndim is normally 6
// While the value can be set higher, it is... unlikely
// that it is going to actually challenge even 16-bit pointer widths.
// It would be preferable to return a usize instead,
// however, PGX has trouble with that, unfortunately.
as _
}
}

/// A raw slice of the dimensions.
/// Oxidized form of ARR_DIMS(ArrayType*)
///
/// # Safety
///
/// Be aware that if you get clever and use this pointer beyond owning RawArray, it's wrong!
/// Raw pointer validity is **asserted on dereference, not construction**,
/// and this slice is no longer valid if you do not also hold RawArray.
pub fn dims(&mut self) -> NonNull<[libc::c_int]> {
// must match or use postgres/src/include/utils/array.h #define ARR_DIMS
unsafe {
let len = self.ndims() as usize;
NonNull::new_unchecked(slice_from_raw_parts_mut(
pgx_ARR_DIMS(self.at.as_ptr()),
len,
))
}
}

/// The flattened length of the array.
pub fn len(&self) -> libc::c_int {
// SAFETY: Validity asserted on construction, and...
// ...well, hopefully Postgres knows what it's doing.
unsafe {
pgx_ARR_NELEMS(self.at.as_ptr())
// FIXME: While this was indeed a function that returns a c_int,
// using a usize is more idiomatic in Rust, to say the least.
// In addition, the actual sizes are under various restrictions,
// so we probably can further constrain the value, honestly.
// However, PGX has trouble with returning usizes
as _
}
}

pub fn oid(&self) -> pg_sys::Oid {
// SAFETY: Validity asserted on construction.
unsafe { (*self.at.as_ptr()).elemtype }
}

/// Gets the offset to the ArrayType's data.
/// Note that this should not be "taken literally".
pub fn data_offset(&self) -> libc::c_int {
// SAFETY: Validity asserted on construction.
unsafe { (*self.at.as_ptr()).dataoffset }
}

/// Equivalent to ARR_HASNULL(ArrayType*)
/// Note this means that it only asserts that there MIGHT be a null
pub fn nullable(&self) -> bool {
// must match postgres/src/include/utils/array.h #define ARR_HASNULL
self.data_offset() != 0
}

/// # Safety
///
/// This is not inherently typesafe!
/// Thus you must know the implied type of the underlying ArrayType when calling this.
/// In addition, the raw slice is not guaranteed to be legible at any given index,
/// e.g. it may be an "SQL null" if so indicated in the null bitmap.
/// But even if the null bitmap does not indicate null, the value itself may still be null,
/// thus leaving it correct to read the value but incorrect to then dereference.
pub unsafe fn data_slice<T>(&mut self) -> NonNull<[T]> {
let len = self.len() as usize;
unsafe {
NonNull::new_unchecked(slice_from_raw_parts_mut(
pgx_ARR_DATA_PTR(self.at.as_ptr()).cast(),
len,
))
}
}
}
1 change: 1 addition & 0 deletions pgx/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod itemptr;
pub mod list;
#[macro_use]
pub mod log;
pub mod array;
pub mod atomics;
pub mod bgworkers;
pub mod heap_tuple;
Expand Down