Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low-level interop for PG arrays #636

Merged
merged 37 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e8650f7
c-shims for PG array functions
mhov Aug 17, 2022
39566c0
suggested changes for shims
mhov Aug 19, 2022
98196df
Merge branch 'develop' into array-shims
workingjubilee Aug 23, 2022
4eea77c
Fix cshim exports
workingjubilee Aug 24, 2022
9cffc1b
Introduce RawArray abstraction
workingjubilee Aug 24, 2022
48176a2
Port ARR_HASNULL to RawArray
workingjubilee Aug 24, 2022
82790eb
Clarify comment
workingjubilee Aug 24, 2022
d67dbf0
Introduce fat pointer fns into RawArray
workingjubilee Aug 24, 2022
431e89e
Port ARR_DATA_PTR to RawArray::data_slice
workingjubilee Aug 24, 2022
a60a6dd
Clarify safety requirements for RawArray::dims
workingjubilee Aug 24, 2022
dfcfbb6
Clarify safety requirements for RawArray::data
workingjubilee Aug 24, 2022
e5fc2f5
Cleanup
workingjubilee Aug 24, 2022
48ccccb
Fix RawArray::data_offset signature
workingjubilee Aug 24, 2022
11c5a63
Fix testing
workingjubilee Aug 24, 2022
d74719c
Port ARR_NULLBITMAP to RawArray::nulls
workingjubilee Aug 24, 2022
6900356
Document test safety remarks
workingjubilee Aug 24, 2022
bbad726
sub pub from pgx::array extern fn
workingjubilee Aug 24, 2022
133b44d
Explain Rust type init requirements
workingjubilee Aug 25, 2022
59dac25
Expand on RawArray description
workingjubilee Aug 25, 2022
0da6f1c
Note reborrow in test
workingjubilee Aug 25, 2022
7bfa2e4
Remove extra comment in pgx::array
workingjubilee Aug 25, 2022
55f62ac
Introduce even-lower-level ArrayPtr
workingjubilee Aug 26, 2022
b4a7364
Reformat
workingjubilee Aug 26, 2022
f763c0a
Rough draft of ArrayPtr and RawArray
workingjubilee Aug 26, 2022
b0b3477
Format again
workingjubilee Aug 26, 2022
639d0b4
Merge back into RawArray
workingjubilee Aug 26, 2022
55deefe
Cleanup fn that might be dupes or confusing
workingjubilee Aug 26, 2022
1785c07
Clean up RawArray docs and comments
workingjubilee Aug 26, 2022
16cf849
Add dims_mut, to see if it works
workingjubilee Aug 26, 2022
e2a3509
Cleanup and explanations for RawArray::{dims_mut, nulls}
workingjubilee Aug 26, 2022
9979f77
Format
workingjubilee Aug 26, 2022
f15eabd
Add hint about lens
workingjubilee Aug 26, 2022
b2c9508
Remove unnecessary unsafe on test
workingjubilee Aug 26, 2022
4ba1b0d
Lift remarks into public docs
workingjubilee Aug 26, 2022
5ab4ba8
One last format
workingjubilee Aug 26, 2022
fd3c21d
One last cleanup
workingjubilee Aug 26, 2022
1dd9155
Remove dubious dims_mut function
workingjubilee Aug 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions pgx-pg-sys/cshim/pgx-cshim.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Use of this source code is governed by the MIT license that can be found in the
#include "parser/parsetree.h"
#include "utils/memutils.h"
#include "utils/builtins.h"
#include "utils/array.h"


PGDLLEXPORT MemoryContext pgx_GetMemoryContextChunk(void *ptr);
Expand Down Expand Up @@ -110,3 +111,33 @@ PGDLLEXPORT char *pgx_GETSTRUCT(HeapTuple tuple);
char *pgx_GETSTRUCT(HeapTuple tuple) {
return GETSTRUCT(tuple);
}

PGDLLEXPORT char *pgx_ARR_DATA_PTR(ArrayType *arr);
char *pgx_ARR_DATA_PTR(ArrayType *arr) {
return ARR_DATA_PTR(arr);
}

PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr);
int pgx_ARR_NELEMS(ArrayType *arr) {
return ArrayGetNItems(arr->ndim, ARR_DIMS(arr));
}

PGDLLEXPORT bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr);
bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr) {
return ARR_NULLBITMAP(arr);
}

PGDLLEXPORT int pgx_ARR_NDIM(ArrayType *arr);
int pgx_ARR_NDIM(ArrayType *arr) {
return ARR_NDIM(arr);
}

PGDLLEXPORT bool pgx_ARR_HASNULL(ArrayType *arr);
bool pgx_ARR_HASNULL(ArrayType *arr) {
return ARR_HASNULL(arr);
}

PGDLLEXPORT int *pgx_ARR_DIMS(ArrayType *arr);
int *pgx_ARR_DIMS(ArrayType *arr){
return ARR_DIMS(arr);
}
92 changes: 92 additions & 0 deletions pgx-tests/src/tests/array_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ All rights reserved.
Use of this source code is governed by the MIT license that can be found in the LICENSE file.
*/

use pgx::array::RawArray;
use pgx::*;
use serde_json::*;

Expand Down Expand Up @@ -99,6 +100,52 @@ fn return_zero_length_vec() -> Vec<i32> {
Vec::new()
}

#[pg_extern]
fn get_arr_nelems(arr: Array<i32>) -> libc::c_int {
// SAFETY: Eh it's fine, it's just a len check.
unsafe { RawArray::from_array(arr).unwrap().len() }
}

/// # Safety
/// Don't call with an actually-null index, or you might see an invalid value.
#[pg_extern]
#[deny(unsafe_op_in_unsafe_fn)]
unsafe fn get_arr_data_ptr_nth_elem(arr: Array<i32>, elem: i32) -> Option<i32> {
// SAFETY: this is Known to be an Array from ArrayType,
// and the index has to be a valid one.
unsafe {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
let raw = RawArray::from_array(arr).unwrap().data::<i32>();
let slice = &(*raw.as_ptr());
slice.get(elem as usize).copied()
}
}

#[pg_extern]
fn display_get_arr_nullbitmap(arr: Array<i32>) -> String {
let raw = unsafe { RawArray::from_array(arr) }.unwrap();

if let Some(slice) = raw.nulls() {
// SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes.
let slice = unsafe { &*slice.as_ptr() };
BradyBonnette marked this conversation as resolved.
Show resolved Hide resolved
// might panic if the array is len 0
format!("{:#010b}", slice[0])
} else {
String::from("")
}
}

#[pg_extern]
fn get_arr_ndim(arr: Array<i32>) -> libc::c_int {
// SAFETY: This is a valid ArrayType and it's just a field access.
unsafe { RawArray::from_array(arr).unwrap().ndims() }
}

#[pg_extern]
fn get_arr_hasnull(arr: Array<i32>) -> bool {
// SAFETY: This is a valid ArrayType and it's just a field access.
unsafe { RawArray::from_array(arr).unwrap().nullable() }
}

#[pg_extern]
fn over_implicit_drop() -> Vec<i64> {
// Create an array of exactly Datum-sized numbers.
Expand Down Expand Up @@ -255,6 +302,51 @@ mod tests {
assert_eq!(json.0, json! {{"values": [1, 2, 3, null, 4]}});
}

#[pg_test]
fn test_arr_data_ptr() {
let len = Spi::get_one::<i32>("SELECT get_arr_nelems('{1,2,3,4,5}'::int[])")
.expect("failed to get SPI result");

assert_eq!(len, 5);
}

#[pg_test]
fn test_get_arr_data_ptr_nth_elem() {
let nth = Spi::get_one::<i32>("SELECT get_arr_data_ptr_nth_elem('{1,2,3,4,5}'::int[], 2)")
.expect("failed to get SPI result");

assert_eq!(nth, 3);
}

#[pg_test]
fn test_display_get_arr_nullbitmap() {
let bitmap_str = Spi::get_one::<String>(
"SELECT display_get_arr_nullbitmap(ARRAY[1,NULL,3,NULL,5]::int[])",
)
.expect("failed to get SPI result");

assert_eq!(bitmap_str, "0b00010101");
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

let bitmap_str =
Spi::get_one::<String>("SELECT display_get_arr_nullbitmap(ARRAY[1,2,3,4,5]::int[])")
.expect("failed to get SPI result");

assert_eq!(bitmap_str, "");
}

#[pg_test]
fn test_get_arr_ndim() {
let ndim = Spi::get_one::<i32>("SELECT get_arr_ndim(ARRAY[1,2,3,4,5]::int[])")
.expect("failed to get SPI result");

assert_eq!(ndim, 1);

let ndim = Spi::get_one::<i32>("SELECT get_arr_ndim('{{1,2,3},{4,5,6}}'::int[])")
.expect("failed to get SPI result");

assert_eq!(ndim, 2);
}

#[pg_test]
fn test_array_over_direct() {
let vals = crate::tests::array_tests::over_implicit_drop();
Expand Down
215 changes: 215 additions & 0 deletions pgx/src/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
use crate::datum::{Array, FromDatum};
use crate::pg_sys;
use core::ptr::{slice_from_raw_parts_mut, NonNull};
use pgx_pg_sys::*;

extern "C" {
/// # Safety
/// Does a field access, but doesn't deref out of bounds of ArrayType
fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8;
/// # Safety
/// Does a field access, but doesn't deref out of bounds of ArrayType
fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int;
/// # Safety
/// Must only be used on a "valid" (Postgres-constructed) ArrayType
fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32;
/// # Safety
/// Does a field access, but doesn't deref out of bounds of ArrayType
fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8;
}

/// Handle describing a bare, "untyped" pointer to an array,
/// offering safe accessors to the various fields of one.
#[repr(transparent)]
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
#[derive(Debug)]
pub struct RawArray {
at: NonNull<ArrayType>,
}

#[deny(unsafe_op_in_unsafe_fn)]
impl RawArray {
// General implementation notes:
// RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType.
// But this also offers safe accessors to the fields, like &ArrayType,
// so it requires validity assertions in order to be constructed.
// The main reason it uses NonNull and denies Clone, however, is access soundness:
// It is not sound to go from &mut Type to *mut T if *mut T is not a field of Type.
// This creates an obvious complication for handing out pointers into varlenas.
// Thus also why this does not use lifetime-bounded borrows.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

/// Returns a handle to the raw array header.
///
/// # Safety
///
/// When calling this method, you have to ensure that all of the following is true:
/// * The pointer must be properly aligned.
/// * It must be "dereferenceable" in the sense defined in [the std documentation].
/// * The pointer must point to an initialized instance of `ArrayType`.
/// * This can be considered a unique, "owning pointer",
/// so it won't be aliased while RawArray is held,
/// and it points to data in the Postgres ArrayType format.
///
/// It should be noted as RawArray is not inherently lifetime-bound, it can be racy and unsafe!
///
/// [the std documentation]: core::ptr#safety
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
pub unsafe fn from_raw(at: NonNull<ArrayType>) -> RawArray {
RawArray { at }
}

/// # Safety
///
/// Array must have been constructed from an ArrayType pointer.
pub unsafe fn from_array<T: FromDatum>(arr: Array<T>) -> Option<RawArray> {
let array_type = arr.into_array_type() as *mut _;
Comment on lines +91 to +92
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

@workingjubilee workingjubilee Aug 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to change it to not do that NULL-check in a followup PR. Should I fix that now? It feels vaguely off to diff outside this addition.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nah that's fine, just leave this unresolved until the followup PR, it's nonblocking.

Some(RawArray {
at: NonNull::new(array_type)?,
})
}

/// Returns the inner raw pointer to the ArrayType.
pub fn into_raw(self) -> NonNull<ArrayType> {
self.at
}

/// Get the number of dimensions.
pub fn ndims(&self) -> libc::c_int {
// SAFETY: Validity asserted on construction.
unsafe {
(*self.at.as_ptr()).ndim
// FIXME: While this is a c_int, the max ndim is normally 6
// While the value can be set higher, it is... unlikely
// that it is going to actually challenge even 16-bit pointer widths.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// It would be preferable to return a usize instead,
// however, PGX has trouble with that, unfortunately.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
as _
}
}

/// A raw slice of the dimensions.
/// Oxidized form of ARR_DIMS(ArrayType*)
///
/// # Safety
///
/// This requires &mut to collect the slice, but be aware: this is a raw pointer!
/// If you find ways to store it, you are probably violating ownership.
/// Raw pointer validity is **asserted on dereference, not construction**,
/// so remember, this slice is only guaranteed to be valid almost immediately
/// after obtaining it, or if you continue to hold the RawArray.
pub fn dims(&mut self) -> NonNull<[libc::c_int]> {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// for expected behavior, see:
// postgres/src/include/utils/array.h
// #define ARR_DIMS
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this path be visible in the public documentation? I suppose they can click through to see the source, but the public docs will support links.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, actually, directly linking is a good idea. And that way we could also link to the doxygen.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doxygen is too unstable, will point to master and lines might change. Instead used gitweb links.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a pgx-maintainers perspective, how much of a maintenance nightmare is it linking to the doxygen pages? How often would that change?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially every time someone lands a commit into postgresql/master.


// SAFETY: Welcome to the infernal bowels of FFI.
// Because the initial ptr was NonNull, we can assume this is also NonNull.
// As validity of the initial ptr was asserted on construction of RawArray,
// this can assume the dims ptr is also valid, allowing making the slice.
// This code doesn't assert validity per se, but in practice,
// the caller will probably immediately turn this into a borrowed slice,
// opening up the methods that are available on borrowed slices.
//
// So, to be clear, yes, everything done so far allows the caller to do so,
// though it is possible the caller can misuse this in various ways.
// Only the "naive" case is well-guarded.
unsafe {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
let len = self.ndims() as usize;
NonNull::new_unchecked(slice_from_raw_parts_mut(
pgx_ARR_DIMS(self.at.as_ptr()),
len,
))
}
}

/// The flattened length of the array.
pub fn len(&self) -> libc::c_int {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// SAFETY: Validity asserted on construction, and...
// ...well, hopefully Postgres knows what it's doing.
unsafe {
pgx_ARR_NELEMS(self.at.as_ptr())
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// FIXME: While this was indeed a function that returns a c_int,
// using a usize is more idiomatic in Rust, to say the least.
// In addition, the actual sizes are under various restrictions,
// so we probably can further constrain the value, honestly.
// However, PGX has trouble with returning usizes
BradyBonnette marked this conversation as resolved.
Show resolved Hide resolved
as _
}
}

pub fn oid(&self) -> pg_sys::Oid {
// SAFETY: Validity asserted on construction.
unsafe { (*self.at.as_ptr()).elemtype }
}

/// Gets the offset to the ArrayType's data.
/// Note that this should not be "taken literally".
fn data_offset(&self) -> i32 {
// SAFETY: Validity asserted on construction.
unsafe { (*self.at.as_ptr()).dataoffset }
// This field is an "int32" in Postgres
}

/// Equivalent to ARR_HASNULL(ArrayType*)
/// Note this means that it only asserts that there MIGHT be a null
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
pub fn nullable(&self) -> bool {
// for expected behavior, see:
// postgres/src/include/utils/array.h
// #define ARR_HASNULL
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
self.data_offset() != 0
}

/// Oxidized form of ARR_NULLBITMAP(ArrayType*)
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
pub fn nulls(&self) -> Option<NonNull<[u8]>> {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// for expected behavior, see:
// postgres/src/include/utils/array.h
// #define ARR_NULLBITMAP
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
let len = (self.len() as usize + 7) >> 3; // Obtains 0 if len was 0.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

// SAFETY: This obtains the nulls pointer, which is valid to obtain because
// the validity was asserted on construction. However, unlike the other cases,
// it isn't correct to trust it. Instead, this gets null-checked.
// This is because, while the initial pointer is NonNull,
// ARR_NULLBITMAP can return a nullptr!
let nulls = unsafe { slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.at.as_ptr()), len) };
// This code doesn't assert validity per se, but in practice,
// the caller will probably immediately turn this into a borrowed slice,
// opening up the methods that are available on borrowed slices.
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
Some(NonNull::new(nulls)?)
}

/// The slice of the data.
/// Oxidized form of ARR_DATA_PTR(ArrayType*)
///
/// # Safety
///
/// This is not inherently typesafe!
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
/// Thus you must know the implied type of the underlying ArrayType when calling this.
/// In addition, the raw slice is not guaranteed to be legible at any given index,
/// e.g. it may be an "SQL null" if so indicated in the null bitmap.
/// But even if the index is not marked as null, the value may be equal to nullptr,
/// thus leaving it correct to read the value but incorrect to then dereference.
///
/// That is why this returns `NonNull<[T]>`: if it returned `&mut [T]`,
/// then for many possible types, that would actually be UB, as it would assert
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
/// that each particular index was a valid `T`.
pub unsafe fn data<T>(&mut self) -> NonNull<[T]> {
let len = self.len() as usize;
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

// SAFETY: Welcome to the infernal bowels of FFI.
// Because the initial ptr was NonNull, we can assume this is also NonNull.
// As validity of the initial ptr was asserted on construction of RawArray,
// this can assume the data ptr is also valid, allowing making the slice.
// This code doesn't assert validity per se, but in practice,
// the caller will probably immediately turn this into a borrowed slice,
// opening up the methods that are available on borrowed slices.
//
// Most importantly, the caller has asserted this is in fact a valid [T],
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
// by calling this function, so both this code and the caller can rely
// on that assertion, only requiring that it is correct.
unsafe {
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
NonNull::new_unchecked(slice_from_raw_parts_mut(
pgx_ARR_DATA_PTR(self.at.as_ptr()).cast(),
len,
))
}
}
}
1 change: 1 addition & 0 deletions pgx/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod itemptr;
pub mod list;
#[macro_use]
pub mod log;
pub mod array;
pub mod atomics;
pub mod bgworkers;
pub mod heap_tuple;
Expand Down