From e8650f7a76570cb91bf128d7d777cb2c6bb238a6 Mon Sep 17 00:00:00 2001 From: mhov Date: Tue, 16 Aug 2022 22:34:27 -0700 Subject: [PATCH 01/36] c-shims for PG array functions --- pgx-pg-sys/cshim/pgx-cshim.c | 26 +++++++++ pgx-pg-sys/src/lib.rs | 41 +++++++++++++ pgx-tests/src/tests/array_tests.rs | 94 +++++++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 1 deletion(-) diff --git a/pgx-pg-sys/cshim/pgx-cshim.c b/pgx-pg-sys/cshim/pgx-cshim.c index 6952812f7..7c49a2b72 100644 --- a/pgx-pg-sys/cshim/pgx-cshim.c +++ b/pgx-pg-sys/cshim/pgx-cshim.c @@ -25,6 +25,7 @@ Use of this source code is governed by the MIT license that can be found in the #include "parser/parsetree.h" #include "utils/memutils.h" #include "utils/builtins.h" +#include "utils/array.h" PGDLLEXPORT MemoryContext pgx_GetMemoryContextChunk(void *ptr); @@ -110,3 +111,28 @@ PGDLLEXPORT char *pgx_GETSTRUCT(HeapTuple tuple); char *pgx_GETSTRUCT(HeapTuple tuple) { return GETSTRUCT(tuple); } + +PGDLLEXPORT char *pgx_ARR_DATA_PTR(ArrayType *arr); +char *pgx_ARR_DATA_PTR(ArrayType *arr) { + return ARR_DATA_PTR(arr); +} + +PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr); +int pgx_ARR_NELEMS(ArrayType *arr) { + return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); +} + +PGDLLEXPORT bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr); +bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr) { + return ARR_NULLBITMAP(arr); +} + +PGDLLEXPORT int pgx_ARR_NDIM(ArrayType *arr); +int pgx_ARR_NDIM(ArrayType *arr) { + return ARR_NDIM(arr); +} + +PGDLLEXPORT bool pgx_ARR_HASNULL(ArrayType *arr); +bool pgx_ARR_HASNULL(ArrayType *arr) { + return ARR_HASNULL(arr); +} diff --git a/pgx-pg-sys/src/lib.rs b/pgx-pg-sys/src/lib.rs index 281856128..2b0f90bef 100644 --- a/pgx-pg-sys/src/lib.rs +++ b/pgx-pg-sys/src/lib.rs @@ -240,6 +240,11 @@ mod all_versions { pub fn pgx_list_nth_oid(list: *mut super::List, nth: i32) -> super::Oid; pub fn pgx_list_nth_cell(list: *mut super::List, nth: i32) -> *mut super::ListCell; pub fn pgx_GETSTRUCT(tuple: pg_sys::HeapTuple) -> *mut std::os::raw::c_char; + pub fn pgx_ARR_DATA_PTR(arrayType: *mut pg_sys::ArrayType) -> *mut u8; + pub fn pgx_ARR_NELEMS(arrayType: *mut pg_sys::ArrayType) -> i32; + pub fn pgx_ARR_NDIM(arrayType: *mut pg_sys::ArrayType) -> i32; + pub fn pgx_ARR_NULLBITMAP(arrayType: *mut pg_sys::ArrayType) -> *mut pg_sys::bits8; + pub fn pgx_ARR_HASNULL(arrayType: *mut pg_sys::ArrayType) -> bool; } #[inline] @@ -445,6 +450,42 @@ mod all_versions { context: *mut ::std::os::raw::c_void, ) -> bool; } + + #[inline] + pub fn get_arr_data_ptr(arr: *mut super::ArrayType) -> *mut T { + unsafe { pgx_ARR_DATA_PTR(arr) as *mut T } + } + + #[inline] + pub fn get_arr_nelems(arr: *mut super::ArrayType) -> i32 { + unsafe { pgx_ARR_NELEMS(arr) } + } + + #[inline] + pub fn get_arr_ndim(arr: *mut super::ArrayType) -> i32 { + unsafe { pgx_ARR_NDIM(arr) } + } + + #[inline] + pub fn get_arr_nullbitmap<'a>(arr: *mut super::ArrayType) -> &'a [pg_sys::bits8] { + unsafe { + let len = (pgx_ARR_NELEMS(arr) + 7) / 8; + std::slice::from_raw_parts(pgx_ARR_NULLBITMAP(arr), len as usize) + } + } + + #[inline] + pub fn get_arr_nullbitmap_mut<'a>(arr: *mut super::ArrayType) -> &'a mut [u8] { + unsafe { + let len = (pgx_ARR_NELEMS(arr) + 7) / 8; + std::slice::from_raw_parts_mut(pgx_ARR_NULLBITMAP(arr), len as usize) + } + } + + #[inline] + pub fn get_arr_hasnull(arr: *mut super::ArrayType) -> bool { + unsafe { pgx_ARR_HASNULL(arr) } + } } mod internal { diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index cc4e07306..b1635d34e 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,7 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ -use pgx::*; +use pgx::{pg_sys::ArrayType, *}; use serde_json::*; #[pg_extern(name = "sum_array")] @@ -99,6 +99,53 @@ fn return_zero_length_vec() -> Vec { Vec::new() } +#[pg_extern] +fn get_arr_nelems(arr: Array) -> i32 { + let arr_type = arr.into_array_type(); + pg_sys::get_arr_nelems(arr_type as *mut ArrayType) +} + +#[pg_extern] +fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { + let len = arr.len(); + let arr_type = arr.into_array_type(); + let ptr = pg_sys::get_arr_data_ptr::(arr_type as *mut ArrayType); + + unsafe { + let elem = elem as usize; + let data = std::slice::from_raw_parts(ptr, len); + if elem < len { + Some(data[elem]) + } else { + None + } + } +} + +#[pg_extern] +fn display_get_arr_nullbitmap(arr: Array) -> String { + let arr_type = arr.into_array_type(); + + if pg_sys::get_arr_hasnull(arr_type as *mut ArrayType) { + let bitmap_slice = pg_sys::get_arr_nullbitmap(arr_type as *mut ArrayType); + format!("{:#010b}", bitmap_slice[0]) + } else { + String::from("") + } +} + +#[pg_extern] +fn get_arr_ndim(arr: Array) -> i32 { + let arr_type = arr.into_array_type(); + pg_sys::get_arr_ndim(arr_type as *mut ArrayType) +} + +#[pg_extern] +fn get_arr_hasnull(arr: Array) -> bool { + let arr_type = arr.into_array_type(); + pg_sys::get_arr_hasnull(arr_type as *mut ArrayType) +} + #[cfg(any(test, feature = "pg_test"))] #[pgx::pg_schema] mod tests { @@ -240,4 +287,49 @@ mod tests { .expect("Failed to return json even though it's right there ^^"); assert_eq!(json.0, json! {{"values": [1, 2, 3, null, 4]}}); } + + #[pg_test] + fn test_arr_data_ptr() { + let len = Spi::get_one::("SELECT get_arr_nelems('{1,2,3,4,5}'::int[])") + .expect("failed to get SPI result"); + + assert_eq!(len, 5); + } + + #[pg_test] + fn test_get_arr_data_ptr_nth_elem() { + let nth = Spi::get_one::("SELECT get_arr_data_ptr_nth_elem('{1,2,3,4,5}'::int[], 2)") + .expect("failed to get SPI result"); + + assert_eq!(nth, 3); + } + + #[pg_test] + fn test_display_get_arr_nullbitmap() { + let bitmap_str = Spi::get_one::( + "SELECT display_get_arr_nullbitmap(ARRAY[1,NULL,3,NULL,5]::int[])", + ) + .expect("failed to get SPI result"); + + assert_eq!(bitmap_str, "0b00010101"); + + let bitmap_str = + Spi::get_one::("SELECT display_get_arr_nullbitmap(ARRAY[1,2,3,4,5]::int[])") + .expect("failed to get SPI result"); + + assert_eq!(bitmap_str, ""); + } + + #[pg_test] + fn test_get_arr_ndim() { + let ndim = Spi::get_one::("SELECT get_arr_ndim(ARRAY[1,2,3,4,5]::int[])") + .expect("failed to get SPI result"); + + assert_eq!(ndim, 1); + + let ndim = Spi::get_one::("SELECT get_arr_ndim('{{1,2,3},{4,5,6}}'::int[])") + .expect("failed to get SPI result"); + + assert_eq!(ndim, 2); + } } From 39566c041ad1f8d169cbd1924966790037086065 Mon Sep 17 00:00:00 2001 From: mhov Date: Fri, 19 Aug 2022 16:39:37 -0700 Subject: [PATCH 02/36] suggested changes for shims --- pgx-pg-sys/cshim/pgx-cshim.c | 11 ++++-- pgx-pg-sys/src/lib.rs | 41 -------------------- pgx-tests/src/tests/array_tests.rs | 12 +++--- pgx/src/array.rs | 60 ++++++++++++++++++++++++++++++ pgx/src/lib.rs | 1 + 5 files changed, 75 insertions(+), 50 deletions(-) create mode 100644 pgx/src/array.rs diff --git a/pgx-pg-sys/cshim/pgx-cshim.c b/pgx-pg-sys/cshim/pgx-cshim.c index 7c49a2b72..3838687e4 100644 --- a/pgx-pg-sys/cshim/pgx-cshim.c +++ b/pgx-pg-sys/cshim/pgx-cshim.c @@ -117,9 +117,9 @@ char *pgx_ARR_DATA_PTR(ArrayType *arr) { return ARR_DATA_PTR(arr); } -PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr); -int pgx_ARR_NELEMS(ArrayType *arr) { - return ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); +PGDLLEXPORT int pgx_ARRNELEMS(ArrayType *arr); +int ARRNELEMS(ArrayType *arr) { + return ArrayGetNItems(arr->ndim, ARR_DIMS(arr)); } PGDLLEXPORT bits8 *pgx_ARR_NULLBITMAP(ArrayType *arr); @@ -136,3 +136,8 @@ PGDLLEXPORT bool pgx_ARR_HASNULL(ArrayType *arr); bool pgx_ARR_HASNULL(ArrayType *arr) { return ARR_HASNULL(arr); } + +PGDLLEXPORT int *pgx_ARR_HASNULL(ArrayType *arr); +int *pgx_ARR_DIMS(ArrayType *arr){ + return ARR_DIMS(arr); +} diff --git a/pgx-pg-sys/src/lib.rs b/pgx-pg-sys/src/lib.rs index 2b0f90bef..281856128 100644 --- a/pgx-pg-sys/src/lib.rs +++ b/pgx-pg-sys/src/lib.rs @@ -240,11 +240,6 @@ mod all_versions { pub fn pgx_list_nth_oid(list: *mut super::List, nth: i32) -> super::Oid; pub fn pgx_list_nth_cell(list: *mut super::List, nth: i32) -> *mut super::ListCell; pub fn pgx_GETSTRUCT(tuple: pg_sys::HeapTuple) -> *mut std::os::raw::c_char; - pub fn pgx_ARR_DATA_PTR(arrayType: *mut pg_sys::ArrayType) -> *mut u8; - pub fn pgx_ARR_NELEMS(arrayType: *mut pg_sys::ArrayType) -> i32; - pub fn pgx_ARR_NDIM(arrayType: *mut pg_sys::ArrayType) -> i32; - pub fn pgx_ARR_NULLBITMAP(arrayType: *mut pg_sys::ArrayType) -> *mut pg_sys::bits8; - pub fn pgx_ARR_HASNULL(arrayType: *mut pg_sys::ArrayType) -> bool; } #[inline] @@ -450,42 +445,6 @@ mod all_versions { context: *mut ::std::os::raw::c_void, ) -> bool; } - - #[inline] - pub fn get_arr_data_ptr(arr: *mut super::ArrayType) -> *mut T { - unsafe { pgx_ARR_DATA_PTR(arr) as *mut T } - } - - #[inline] - pub fn get_arr_nelems(arr: *mut super::ArrayType) -> i32 { - unsafe { pgx_ARR_NELEMS(arr) } - } - - #[inline] - pub fn get_arr_ndim(arr: *mut super::ArrayType) -> i32 { - unsafe { pgx_ARR_NDIM(arr) } - } - - #[inline] - pub fn get_arr_nullbitmap<'a>(arr: *mut super::ArrayType) -> &'a [pg_sys::bits8] { - unsafe { - let len = (pgx_ARR_NELEMS(arr) + 7) / 8; - std::slice::from_raw_parts(pgx_ARR_NULLBITMAP(arr), len as usize) - } - } - - #[inline] - pub fn get_arr_nullbitmap_mut<'a>(arr: *mut super::ArrayType) -> &'a mut [u8] { - unsafe { - let len = (pgx_ARR_NELEMS(arr) + 7) / 8; - std::slice::from_raw_parts_mut(pgx_ARR_NULLBITMAP(arr), len as usize) - } - } - - #[inline] - pub fn get_arr_hasnull(arr: *mut super::ArrayType) -> bool { - unsafe { pgx_ARR_HASNULL(arr) } - } } mod internal { diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index b1635d34e..d124d1612 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -102,16 +102,16 @@ fn return_zero_length_vec() -> Vec { #[pg_extern] fn get_arr_nelems(arr: Array) -> i32 { let arr_type = arr.into_array_type(); - pg_sys::get_arr_nelems(arr_type as *mut ArrayType) + array::get_arr_nelems(arr_type as *mut ArrayType) } #[pg_extern] fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { let len = arr.len(); let arr_type = arr.into_array_type(); - let ptr = pg_sys::get_arr_data_ptr::(arr_type as *mut ArrayType); unsafe { + let ptr = array::get_arr_data_ptr::(arr_type as *mut ArrayType); let elem = elem as usize; let data = std::slice::from_raw_parts(ptr, len); if elem < len { @@ -126,8 +126,8 @@ fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { fn display_get_arr_nullbitmap(arr: Array) -> String { let arr_type = arr.into_array_type(); - if pg_sys::get_arr_hasnull(arr_type as *mut ArrayType) { - let bitmap_slice = pg_sys::get_arr_nullbitmap(arr_type as *mut ArrayType); + if array::get_arr_hasnull(arr_type as *mut ArrayType) { + let bitmap_slice = array::get_arr_nullbitmap(arr_type as *mut ArrayType); format!("{:#010b}", bitmap_slice[0]) } else { String::from("") @@ -137,13 +137,13 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> i32 { let arr_type = arr.into_array_type(); - pg_sys::get_arr_ndim(arr_type as *mut ArrayType) + array::get_arr_ndim(arr_type as *mut ArrayType) } #[pg_extern] fn get_arr_hasnull(arr: Array) -> bool { let arr_type = arr.into_array_type(); - pg_sys::get_arr_hasnull(arr_type as *mut ArrayType) + array::get_arr_hasnull(arr_type as *mut ArrayType) } #[cfg(any(test, feature = "pg_test"))] diff --git a/pgx/src/array.rs b/pgx/src/array.rs new file mode 100644 index 000000000..a1e153047 --- /dev/null +++ b/pgx/src/array.rs @@ -0,0 +1,60 @@ +use pgx_pg_sys::*; + +extern "C" { + pub fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; + pub fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; +} + +#[inline] +pub unsafe fn get_arr_data_ptr(arr: *mut ArrayType) -> *mut T { + extern "C" { + pub fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; + } + pgx_ARR_DATA_PTR(arr) as *mut T +} + +#[inline] +pub fn get_arr_nelems(arr: *mut ArrayType) -> i32 { + unsafe { pgx_ARR_NELEMS(arr) } +} + +#[inline] +pub fn get_arr_ndim(arr: *mut ArrayType) -> i32 { + extern "C" { + pub fn pgx_ARR_NDIM(arrayType: *mut ArrayType) -> i32; + } + unsafe { pgx_ARR_NDIM(arr) } +} + +#[inline] +pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] { + unsafe { + let len = (pgx_ARR_NELEMS(arr) + 7) / 8; + std::slice::from_raw_parts(pgx_ARR_NULLBITMAP(arr), len as usize) + } +} + +#[inline] +pub fn get_arr_nullbitmap_mut<'a>(arr: *mut ArrayType) -> &'a mut [u8] { + unsafe { + let len = (pgx_ARR_NELEMS(arr) + 7) / 8; + std::slice::from_raw_parts_mut(pgx_ARR_NULLBITMAP(arr), len as usize) + } +} + +#[inline] +pub fn get_arr_hasnull(arr: *mut ArrayType) -> bool { + // copied from array.h + unsafe { (*arr).dataoffset != 0 } +} + +#[inline] +pub fn get_arr_dims<'a>(arr: *mut ArrayType) -> &'a [i32] { + extern "C" { + pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut i32; + } + unsafe { + let len = (*arr).ndim; + std::slice::from_raw_parts(pgx_ARR_DIMS(arr), len as usize) + } +} diff --git a/pgx/src/lib.rs b/pgx/src/lib.rs index 50e57e6aa..ebce6c13d 100644 --- a/pgx/src/lib.rs +++ b/pgx/src/lib.rs @@ -46,6 +46,7 @@ pub mod itemptr; pub mod list; #[macro_use] pub mod log; +pub mod array; pub mod atomics; pub mod bgworkers; pub mod heap_tuple; From 4eea77c4d0fa46ca5fcfa2b42f59e365de5de7d5 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 17:08:52 -0700 Subject: [PATCH 03/36] Fix cshim exports --- pgx-pg-sys/cshim/pgx-cshim.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pgx-pg-sys/cshim/pgx-cshim.c b/pgx-pg-sys/cshim/pgx-cshim.c index 3838687e4..18ee4da79 100644 --- a/pgx-pg-sys/cshim/pgx-cshim.c +++ b/pgx-pg-sys/cshim/pgx-cshim.c @@ -117,8 +117,8 @@ char *pgx_ARR_DATA_PTR(ArrayType *arr) { return ARR_DATA_PTR(arr); } -PGDLLEXPORT int pgx_ARRNELEMS(ArrayType *arr); -int ARRNELEMS(ArrayType *arr) { +PGDLLEXPORT int pgx_ARR_NELEMS(ArrayType *arr); +int pgx_ARR_NELEMS(ArrayType *arr) { return ArrayGetNItems(arr->ndim, ARR_DIMS(arr)); } @@ -137,7 +137,7 @@ bool pgx_ARR_HASNULL(ArrayType *arr) { return ARR_HASNULL(arr); } -PGDLLEXPORT int *pgx_ARR_HASNULL(ArrayType *arr); +PGDLLEXPORT int *pgx_ARR_DIMS(ArrayType *arr); int *pgx_ARR_DIMS(ArrayType *arr){ return ARR_DIMS(arr); } From 9cffc1b0b58944ba69fc2c5dea337e53f2a3a3f1 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 21:01:20 -0700 Subject: [PATCH 04/36] Introduce RawArray abstraction This offers safe accessors to the underlying ArrayType's fields, without imposing any more burden on the data in question. --- pgx-tests/src/tests/array_tests.rs | 6 +-- pgx/src/array.rs | 87 +++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 33826aabd..73e381db6 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,6 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ +use pgx::array::RawArray; use pgx::{pg_sys::ArrayType, *}; use serde_json::*; @@ -135,9 +136,8 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { } #[pg_extern] -fn get_arr_ndim(arr: Array) -> i32 { - let arr_type = arr.into_array_type(); - array::get_arr_ndim(arr_type as *mut ArrayType) +fn get_arr_ndim(arr: Array) -> libc::c_int { + unsafe { RawArray::from_array(arr).unwrap().ndims() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index a1e153047..16e188ca5 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,3 +1,6 @@ +use crate::datum::{Array, FromDatum}; +use crate::pg_sys; +use core::ptr::NonNull; use pgx_pg_sys::*; extern "C" { @@ -18,14 +21,6 @@ pub fn get_arr_nelems(arr: *mut ArrayType) -> i32 { unsafe { pgx_ARR_NELEMS(arr) } } -#[inline] -pub fn get_arr_ndim(arr: *mut ArrayType) -> i32 { - extern "C" { - pub fn pgx_ARR_NDIM(arrayType: *mut ArrayType) -> i32; - } - unsafe { pgx_ARR_NDIM(arr) } -} - #[inline] pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] { unsafe { @@ -58,3 +53,79 @@ pub fn get_arr_dims<'a>(arr: *mut ArrayType) -> &'a [i32] { std::slice::from_raw_parts(pgx_ARR_DIMS(arr), len as usize) } } + +/// Handle describing a bare, "untyped" pointer to an array, +/// offering safe accessors to the various fields of one. +#[repr(transparent)] +#[derive(Debug)] +pub struct RawArray { + at: NonNull, +} + +#[deny(unsafe_op_in_unsafe_fn)] +impl RawArray { + // General implementation notes: + // RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType. + // But this also offers safe accessors to the fields, like &ArrayType, + // so it requires validity assertions in order to be constructed. + + /// Returns a handle to the raw array header. + /// + /// # Safety + /// + /// When calling this method, you have to ensure that all of the following is true: + /// * The pointer must be properly aligned. + /// * It must be "dereferenceable" in the sense defined in [the std documentation]. + /// * The pointer must point to an initialized instance of `ArrayType`. + /// * You aren't going to alias the data like mad. + /// + /// It should be noted as RawArray is not inherently lifetime-bound, it can be racy and unsafe! + /// + /// [the std documentation]: core::ptr#safety + pub unsafe fn from_raw(at: NonNull) -> RawArray { + // SAFETY: the caller must guarantee that `self` meets all the + // requirements for a mutable reference, as we're going to treat this like one. + RawArray { at } + } + + /// # Safety + /// + /// Array must have been constructed from an ArrayType pointer. + pub unsafe fn from_array(arr: Array) -> Option { + let array_type = arr.into_array_type() as *mut _; + Some(RawArray { + at: NonNull::new(array_type)?, + }) + } + + /// Returns the inner raw pointer to the ArrayType. + pub fn into_raw(self) -> NonNull { + self.at + } + + /// Get the number of dimensions. + pub fn ndims(&self) -> libc::c_int { + // SAFETY: Validity asserted on construction. + unsafe { + (*self.at.as_ptr()).ndim + // FIXME: While this is a c_int, the max ndim is normally 6 + // While the value can be set higher, it is... unlikely + // that it is going to actually challenge even 16-bit pointer widths. + // It would be preferable to return a usize instead, + // however, PGX has trouble with that, unfortunately. + as _ + } + } + + pub fn oid(&self) -> pg_sys::Oid { + // SAFETY: Validity asserted on construction. + unsafe { (*self.at.as_ptr()).elemtype } + } + + /// Gets the offset to the ArrayType's data. + /// Note that this should not be "taken literally". + pub fn data_offset(&self) -> libc::c_int { + // SAFETY: Validity asserted on construction. + unsafe { (*self.at.as_ptr()).dataoffset } + } +} From 48176a225dedde5f107b238a2b89ded9d3d8a413 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 21:47:18 -0700 Subject: [PATCH 05/36] Port ARR_HASNULL to RawArray --- pgx-tests/src/tests/array_tests.rs | 7 ++++--- pgx/src/array.rs | 13 +++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 73e381db6..9ed358d37 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,6 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ +use core::ptr::NonNull; use pgx::array::RawArray; use pgx::{pg_sys::ArrayType, *}; use serde_json::*; @@ -127,7 +128,8 @@ fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { fn display_get_arr_nullbitmap(arr: Array) -> String { let arr_type = arr.into_array_type(); - if array::get_arr_hasnull(arr_type as *mut ArrayType) { + if unsafe { RawArray::from_raw(NonNull::new_unchecked(arr_type.clone() as *mut _)).nullable() } + { let bitmap_slice = array::get_arr_nullbitmap(arr_type as *mut ArrayType); format!("{:#010b}", bitmap_slice[0]) } else { @@ -142,8 +144,7 @@ fn get_arr_ndim(arr: Array) -> libc::c_int { #[pg_extern] fn get_arr_hasnull(arr: Array) -> bool { - let arr_type = arr.into_array_type(); - array::get_arr_hasnull(arr_type as *mut ArrayType) + unsafe { RawArray::from_array(arr).unwrap().nullable() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 16e188ca5..bd8f0dcb2 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -37,12 +37,6 @@ pub fn get_arr_nullbitmap_mut<'a>(arr: *mut ArrayType) -> &'a mut [u8] { } } -#[inline] -pub fn get_arr_hasnull(arr: *mut ArrayType) -> bool { - // copied from array.h - unsafe { (*arr).dataoffset != 0 } -} - #[inline] pub fn get_arr_dims<'a>(arr: *mut ArrayType) -> &'a [i32] { extern "C" { @@ -128,4 +122,11 @@ impl RawArray { // SAFETY: Validity asserted on construction. unsafe { (*self.at.as_ptr()).dataoffset } } + + /// Equivalent to ARR_HASNULL(ArrayType*) + /// Note this means that it only asserts that there MIGHT be a null + pub fn nullable(&self) -> bool { + // copied from postgres/src/include/utils/array.h + self.data_offset() != 0 + } } From 82790eb3bee4a7c45dd0268caced8dc26826d622 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 22:20:11 -0700 Subject: [PATCH 06/36] Clarify comment --- pgx/src/array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index bd8f0dcb2..e92453a48 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -126,7 +126,7 @@ impl RawArray { /// Equivalent to ARR_HASNULL(ArrayType*) /// Note this means that it only asserts that there MIGHT be a null pub fn nullable(&self) -> bool { - // copied from postgres/src/include/utils/array.h + // must match postgres/src/include/utils/array.h #define ARR_HASNULL self.data_offset() != 0 } } From d67dbf0dcd260c34eceecaa41fa6376585c05729 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 23:02:09 -0700 Subject: [PATCH 07/36] Introduce fat pointer fns into RawArray This also ports over ARR_DIMS(ArrayType*), and ARR_NELEMS, which wraps ArrayGetNItems(ndim, *dims) --- pgx-tests/src/tests/array_tests.rs | 5 ++- pgx/src/array.rs | 57 +++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 9ed358d37..8cd038797 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -102,9 +102,8 @@ fn return_zero_length_vec() -> Vec { } #[pg_extern] -fn get_arr_nelems(arr: Array) -> i32 { - let arr_type = arr.into_array_type(); - array::get_arr_nelems(arr_type as *mut ArrayType) +fn get_arr_nelems(arr: Array) -> libc::c_int { + unsafe { RawArray::from_array(arr).unwrap().len() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index e92453a48..76641de6b 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,11 +1,12 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; -use core::ptr::NonNull; +use core::ptr::{slice_from_raw_parts_mut, NonNull}; use pgx_pg_sys::*; extern "C" { pub fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; pub fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; + pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; } #[inline] @@ -16,11 +17,6 @@ pub unsafe fn get_arr_data_ptr(arr: *mut ArrayType) -> *mut T { pgx_ARR_DATA_PTR(arr) as *mut T } -#[inline] -pub fn get_arr_nelems(arr: *mut ArrayType) -> i32 { - unsafe { pgx_ARR_NELEMS(arr) } -} - #[inline] pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] { unsafe { @@ -37,17 +33,6 @@ pub fn get_arr_nullbitmap_mut<'a>(arr: *mut ArrayType) -> &'a mut [u8] { } } -#[inline] -pub fn get_arr_dims<'a>(arr: *mut ArrayType) -> &'a [i32] { - extern "C" { - pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut i32; - } - unsafe { - let len = (*arr).ndim; - std::slice::from_raw_parts(pgx_ARR_DIMS(arr), len as usize) - } -} - /// Handle describing a bare, "untyped" pointer to an array, /// offering safe accessors to the various fields of one. #[repr(transparent)] @@ -62,6 +47,10 @@ impl RawArray { // RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType. // But this also offers safe accessors to the fields, like &ArrayType, // so it requires validity assertions in order to be constructed. + // The main reason it uses NonNull and denies Clone, however, is access soundness: + // It is not sound to go from &mut Type to *mut T if *mut T is not a field of Type. + // This creates an obvious complication for handing out pointers into varlenas. + // Thus also why this does not use lifetime-bounded borrows. /// Returns a handle to the raw array header. /// @@ -111,6 +100,40 @@ impl RawArray { } } + /// A raw slice of the dimensions. + /// Oxidized form of ARR_DIMS(ArrayType*) + /// + /// # Safety + /// + /// Be aware that if you get clever and use this pointer beyond owning RawArray, it's wrong! + /// Raw pointer validity is **asserted on dereference, not construction**, + /// and this slice is no longer valid if you do not also hold RawArray. + pub fn dims(&mut self) -> NonNull<[libc::c_int]> { + // must match or use postgres/src/include/utils/array.h #define ARR_DIMS + unsafe { + let len = self.ndims() as usize; + NonNull::new_unchecked(slice_from_raw_parts_mut( + pgx_ARR_DIMS(self.at.as_ptr()), + len, + )) + } + } + + /// The flattened length of the array. + pub fn len(&self) -> libc::c_int { + // SAFETY: Validity asserted on construction, and... + // ...well, hopefully Postgres knows what it's doing. + unsafe { + pgx_ARR_NELEMS(self.at.as_ptr()) + // FIXME: While this was indeed a function that returns a c_int, + // using a usize is more idiomatic in Rust, to say the least. + // In addition, the actual sizes are under various restrictions, + // so we probably can further constrain the value, honestly. + // However, PGX has trouble with returning usizes + as _ + } + } + pub fn oid(&self) -> pg_sys::Oid { // SAFETY: Validity asserted on construction. unsafe { (*self.at.as_ptr()).elemtype } From 431e89e49e3ed749fcdd213379e0fd9c63a45268 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 23 Aug 2022 23:31:00 -0700 Subject: [PATCH 08/36] Port ARR_DATA_PTR to RawArray::data_slice --- pgx-tests/src/tests/array_tests.rs | 14 +++----------- pgx/src/array.rs | 27 +++++++++++++++++++-------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 8cd038797..a6c266c90 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -108,18 +108,10 @@ fn get_arr_nelems(arr: Array) -> libc::c_int { #[pg_extern] fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { - let len = arr.len(); - let arr_type = arr.into_array_type(); - unsafe { - let ptr = array::get_arr_data_ptr::(arr_type as *mut ArrayType); - let elem = elem as usize; - let data = std::slice::from_raw_parts(ptr, len); - if elem < len { - Some(data[elem]) - } else { - None - } + let raw = RawArray::from_array(arr).unwrap().data_slice::(); + let slice = &(*raw.as_ptr()); + slice.get(elem as usize).copied() } } diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 76641de6b..8b831d5fb 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -6,17 +6,10 @@ use pgx_pg_sys::*; extern "C" { pub fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; pub fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; + pub fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; } -#[inline] -pub unsafe fn get_arr_data_ptr(arr: *mut ArrayType) -> *mut T { - extern "C" { - pub fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; - } - pgx_ARR_DATA_PTR(arr) as *mut T -} - #[inline] pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] { unsafe { @@ -152,4 +145,22 @@ impl RawArray { // must match postgres/src/include/utils/array.h #define ARR_HASNULL self.data_offset() != 0 } + + /// # Safety + /// + /// This is not inherently typesafe! + /// Thus you must know the implied type of the underlying ArrayType when calling this. + /// In addition, the raw slice is not guaranteed to be legible at any given index, + /// e.g. it may be an "SQL null" if so indicated in the null bitmap. + /// But even if the null bitmap does not indicate null, the value itself may still be null, + /// thus leaving it correct to read the value but incorrect to then dereference. + pub unsafe fn data_slice(&mut self) -> NonNull<[T]> { + let len = self.len() as usize; + unsafe { + NonNull::new_unchecked(slice_from_raw_parts_mut( + pgx_ARR_DATA_PTR(self.at.as_ptr()).cast(), + len, + )) + } + } } From a60a6dd1911f29b888997e9b669b643e9e48dd39 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 10:12:51 -0700 Subject: [PATCH 09/36] Clarify safety requirements for RawArray::dims --- pgx/src/array.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 8b831d5fb..1db2f9d40 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -53,7 +53,9 @@ impl RawArray { /// * The pointer must be properly aligned. /// * It must be "dereferenceable" in the sense defined in [the std documentation]. /// * The pointer must point to an initialized instance of `ArrayType`. - /// * You aren't going to alias the data like mad. + /// * This can be considered a unique, "owning pointer", + /// so it won't be aliased while RawArray is held, + /// and it points to data in the Postgres ArrayType format. /// /// It should be noted as RawArray is not inherently lifetime-bound, it can be racy and unsafe! /// @@ -98,11 +100,25 @@ impl RawArray { /// /// # Safety /// - /// Be aware that if you get clever and use this pointer beyond owning RawArray, it's wrong! + /// This requires &mut to collect the slice, but be aware: this is a raw pointer! + /// If you find ways to store it, you are probably violating ownership. /// Raw pointer validity is **asserted on dereference, not construction**, - /// and this slice is no longer valid if you do not also hold RawArray. + /// so remember, this slice is only guaranteed to be valid almost immediately + /// after obtaining it, or if you continue to hold the RawArray. pub fn dims(&mut self) -> NonNull<[libc::c_int]> { - // must match or use postgres/src/include/utils/array.h #define ARR_DIMS + // for expected behavior, see: + // postgres/src/include/utils/array.h + // #define ARR_DIMS + + // SAFETY: Welcome to the infernal bowels of FFI. + // Because the initial ptr was NonNull, we can assume this is also NonNull. + // As validity of the initial ptr was asserted on construction, + // this can assume the dims ptr is also valid, allowing making the slice. + // We don't assert validity yet, but in practice, + // the caller will probably immediately turn this into a borrowed slice, + // opening up the methods that are available on borrowed slices. + // So, to be clear, yes, everything done so far allows the caller to do so, + // though it is possible the caller can misuse this in various ways. unsafe { let len = self.ndims() as usize; NonNull::new_unchecked(slice_from_raw_parts_mut( From dfcfbb64a264dab44300eb4da21ca7bd3c265ebb Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 10:29:59 -0700 Subject: [PATCH 10/36] Clarify safety requirements for RawArray::data --- pgx/src/array.rs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 1db2f9d40..108118bfb 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -112,13 +112,15 @@ impl RawArray { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction, + // As validity of the initial ptr was asserted on construction of RawArray, // this can assume the dims ptr is also valid, allowing making the slice. - // We don't assert validity yet, but in practice, + // This code doesn't assert validity per se, but in practice, // the caller will probably immediately turn this into a borrowed slice, // opening up the methods that are available on borrowed slices. + // // So, to be clear, yes, everything done so far allows the caller to do so, // though it is possible the caller can misuse this in various ways. + // Only the "naive" case is well-guarded. unsafe { let len = self.ndims() as usize; NonNull::new_unchecked(slice_from_raw_parts_mut( @@ -162,16 +164,35 @@ impl RawArray { self.data_offset() != 0 } + /// The slice of the data. + /// Oxidized form of ARR_DATA_PTR(ArrayType*) + /// /// # Safety /// /// This is not inherently typesafe! /// Thus you must know the implied type of the underlying ArrayType when calling this. /// In addition, the raw slice is not guaranteed to be legible at any given index, /// e.g. it may be an "SQL null" if so indicated in the null bitmap. - /// But even if the null bitmap does not indicate null, the value itself may still be null, + /// But even if the index is not marked as null, the value may be equal to nullptr, /// thus leaving it correct to read the value but incorrect to then dereference. - pub unsafe fn data_slice(&mut self) -> NonNull<[T]> { + /// + /// That is why this returns `NonNull<[T]>`: if it returned `&mut [T]`, + /// then for many possible types, that would actually be UB, as it would assert + /// that each particular index was a valid `T`. + pub unsafe fn data(&mut self) -> NonNull<[T]> { let len = self.len() as usize; + + // SAFETY: Welcome to the infernal bowels of FFI. + // Because the initial ptr was NonNull, we can assume this is also NonNull. + // As validity of the initial ptr was asserted on construction of RawArray, + // this can assume the data ptr is also valid, allowing making the slice. + // This code doesn't assert validity per se, but in practice, + // the caller will probably immediately turn this into a borrowed slice, + // opening up the methods that are available on borrowed slices. + // + // Most importantly, the caller has asserted this is in fact a valid [T], + // by calling this function, so both this code and the caller can rely + // on that assertion, only requiring that it is correct. unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut( pgx_ARR_DATA_PTR(self.at.as_ptr()).cast(), From e5fc2f57c3bcf7fa886933c3fcfcab056fd9a226 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 10:34:25 -0700 Subject: [PATCH 11/36] Cleanup --- pgx/src/array.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 108118bfb..adfe0160e 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -61,8 +61,6 @@ impl RawArray { /// /// [the std documentation]: core::ptr#safety pub unsafe fn from_raw(at: NonNull) -> RawArray { - // SAFETY: the caller must guarantee that `self` meets all the - // requirements for a mutable reference, as we're going to treat this like one. RawArray { at } } @@ -160,7 +158,9 @@ impl RawArray { /// Equivalent to ARR_HASNULL(ArrayType*) /// Note this means that it only asserts that there MIGHT be a null pub fn nullable(&self) -> bool { - // must match postgres/src/include/utils/array.h #define ARR_HASNULL + // for expected behavior, see: + // postgres/src/include/utils/array.h + // #define ARR_HASNULL self.data_offset() != 0 } From 48ccccb4f24c4598c0773a2c1f1b1355441955b0 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 10:35:22 -0700 Subject: [PATCH 12/36] Fix RawArray::data_offset signature Includes making it non-pub. --- pgx/src/array.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index adfe0160e..6fb35bbcb 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -150,9 +150,10 @@ impl RawArray { /// Gets the offset to the ArrayType's data. /// Note that this should not be "taken literally". - pub fn data_offset(&self) -> libc::c_int { + fn data_offset(&self) -> i32 { // SAFETY: Validity asserted on construction. unsafe { (*self.at.as_ptr()).dataoffset } + // This field is an "int32" in Postgres } /// Equivalent to ARR_HASNULL(ArrayType*) From 11c5a63f635025433bcc622594ef76acdaf2dd13 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 11:05:19 -0700 Subject: [PATCH 13/36] Fix testing --- pgx-tests/src/tests/array_tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index a6c266c90..2221e9aad 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -109,7 +109,7 @@ fn get_arr_nelems(arr: Array) -> libc::c_int { #[pg_extern] fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { unsafe { - let raw = RawArray::from_array(arr).unwrap().data_slice::(); + let raw = RawArray::from_array(arr).unwrap().data::(); let slice = &(*raw.as_ptr()); slice.get(elem as usize).copied() } From d74719ce365fa1cd935c29d5e70462475eb08b9c Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 11:13:50 -0700 Subject: [PATCH 14/36] Port ARR_NULLBITMAP to RawArray::nulls --- pgx-tests/src/tests/array_tests.rs | 12 +++++----- pgx/src/array.rs | 35 ++++++++++++++++-------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 2221e9aad..86472cb44 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,9 +7,8 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ -use core::ptr::NonNull; use pgx::array::RawArray; -use pgx::{pg_sys::ArrayType, *}; +use pgx::*; use serde_json::*; #[pg_extern(name = "sum_array")] @@ -117,12 +116,11 @@ fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { #[pg_extern] fn display_get_arr_nullbitmap(arr: Array) -> String { - let arr_type = arr.into_array_type(); + let raw = unsafe { RawArray::from_array(arr) }.unwrap(); - if unsafe { RawArray::from_raw(NonNull::new_unchecked(arr_type.clone() as *mut _)).nullable() } - { - let bitmap_slice = array::get_arr_nullbitmap(arr_type as *mut ArrayType); - format!("{:#010b}", bitmap_slice[0]) + if let Some(slice) = raw.nulls() { + let slice = unsafe { &*slice.as_ptr() }; + format!("{:#010b}", slice[0]) } else { String::from("") } diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 6fb35bbcb..6fd0e9e48 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -10,22 +10,6 @@ extern "C" { pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; } -#[inline] -pub fn get_arr_nullbitmap<'a>(arr: *mut ArrayType) -> &'a [bits8] { - unsafe { - let len = (pgx_ARR_NELEMS(arr) + 7) / 8; - std::slice::from_raw_parts(pgx_ARR_NULLBITMAP(arr), len as usize) - } -} - -#[inline] -pub fn get_arr_nullbitmap_mut<'a>(arr: *mut ArrayType) -> &'a mut [u8] { - unsafe { - let len = (pgx_ARR_NELEMS(arr) + 7) / 8; - std::slice::from_raw_parts_mut(pgx_ARR_NULLBITMAP(arr), len as usize) - } -} - /// Handle describing a bare, "untyped" pointer to an array, /// offering safe accessors to the various fields of one. #[repr(transparent)] @@ -165,6 +149,25 @@ impl RawArray { self.data_offset() != 0 } + /// Oxidized form of ARR_NULLBITMAP(ArrayType*) + pub fn nulls(&self) -> Option> { + // for expected behavior, see: + // postgres/src/include/utils/array.h + // #define ARR_NULLBITMAP + let len = (self.len() as usize + 7) >> 3; // Obtains 0 if len was 0. + + // SAFETY: This obtains the nulls pointer, which is valid to obtain because + // the validity was asserted on construction. However, unlike the other cases, + // it isn't correct to trust it. Instead, this gets null-checked. + // This is because, while the initial pointer is NonNull, + // ARR_NULLBITMAP can return a nullptr! + let nulls = unsafe { slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.at.as_ptr()), len) }; + // This code doesn't assert validity per se, but in practice, + // the caller will probably immediately turn this into a borrowed slice, + // opening up the methods that are available on borrowed slices. + Some(NonNull::new(nulls)?) + } + /// The slice of the data. /// Oxidized form of ARR_DATA_PTR(ArrayType*) /// From 6900356892fe5d9ca6e2b8c924870c0b47512915 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 14:48:28 -0700 Subject: [PATCH 15/36] Document test safety remarks --- pgx-tests/src/tests/array_tests.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 86472cb44..74ce49883 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -102,11 +102,17 @@ fn return_zero_length_vec() -> Vec { #[pg_extern] fn get_arr_nelems(arr: Array) -> libc::c_int { + // SAFETY: Eh it's fine, it's just a len check. unsafe { RawArray::from_array(arr).unwrap().len() } } +/// # Safety +/// Don't call with an actually-null index, or you might see an invalid value. #[pg_extern] -fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { +#[deny(unsafe_op_in_unsafe_fn)] +unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { + // SAFETY: this is Known to be an Array from ArrayType, + // and the index has to be a valid one. unsafe { let raw = RawArray::from_array(arr).unwrap().data::(); let slice = &(*raw.as_ptr()); @@ -119,7 +125,9 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { let raw = unsafe { RawArray::from_array(arr) }.unwrap(); if let Some(slice) = raw.nulls() { + // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes. let slice = unsafe { &*slice.as_ptr() }; + // might panic if the array is len 0 format!("{:#010b}", slice[0]) } else { String::from("") @@ -128,11 +136,13 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> libc::c_int { + // SAFETY: This is a valid ArrayType and it's just a field access. unsafe { RawArray::from_array(arr).unwrap().ndims() } } #[pg_extern] fn get_arr_hasnull(arr: Array) -> bool { + // SAFETY: This is a valid ArrayType and it's just a field access. unsafe { RawArray::from_array(arr).unwrap().nullable() } } From bbad7264cc51da273452ef539c73d9d0c0b56dd8 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 24 Aug 2022 14:51:59 -0700 Subject: [PATCH 16/36] sub pub from pgx::array extern fn --- pgx/src/array.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 6fd0e9e48..a5f2bb655 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -4,10 +4,18 @@ use core::ptr::{slice_from_raw_parts_mut, NonNull}; use pgx_pg_sys::*; extern "C" { - pub fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; - pub fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; - pub fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; - pub fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_DATA_PTR(arrayType: *mut ArrayType) -> *mut u8; + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_DIMS(arrayType: *mut ArrayType) -> *mut libc::c_int; + /// # Safety + /// Must only be used on a "valid" (Postgres-constructed) ArrayType + fn pgx_ARR_NELEMS(arrayType: *mut ArrayType) -> i32; + /// # Safety + /// Does a field access, but doesn't deref out of bounds of ArrayType + fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; } /// Handle describing a bare, "untyped" pointer to an array, From 133b44db58b36b339722ebcce647531a6b94383f Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 13:13:01 -0700 Subject: [PATCH 17/36] Explain Rust type init requirements --- pgx/src/array.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index a5f2bb655..38afcc489 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -188,9 +188,18 @@ impl RawArray { /// But even if the index is not marked as null, the value may be equal to nullptr, /// thus leaving it correct to read the value but incorrect to then dereference. /// - /// That is why this returns `NonNull<[T]>`: if it returned `&mut [T]`, - /// then for many possible types, that would actually be UB, as it would assert - /// that each particular index was a valid `T`. + /// That is why this returns [`NonNull<[T]>`]: if it returned `&mut [T]`, + /// then for many possible types that can be **undefined behavior**, + /// as it would assert that each particular index was a valid `T`. + /// A Rust borrow, including of a slice, will always be + /// - non-null + /// - aligned + /// - **validly initialized**, except in the case of [MaybeUninit] types + /// It can be incorrect to assume that data that Postgres has marked "null" + /// follows Rust-level initialization requirements. + /// + /// [MaybeUninit]: core::mem::MaybeUninit + /// [`NonNull<[T]>`]: core::ptr::NonNull pub unsafe fn data(&mut self) -> NonNull<[T]> { let len = self.len() as usize; From 59dac253391768a5dc92b16096709c798520d243 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 13:21:37 -0700 Subject: [PATCH 18/36] Expand on RawArray description Includes a general usage note on lengths. --- pgx/src/array.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 38afcc489..127d07288 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -18,8 +18,19 @@ extern "C" { fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; } -/// Handle describing a bare, "untyped" pointer to an array, -/// offering safe accessors to the various fields of one. +/// Handle describing a bare, "untyped" pointer to a Postgres varlena array, +/// offering safe accessors to the various fields of the ArrayType, +/// and access to the various slices that are implicitly present in the varlena. +/// +/// # On sizes and subscripts +/// +/// Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses `usize`. +/// Thus various functions of RawArray return `c_int` values, but you must convert to usize. +/// On 32-bit or 64-bit machines with 32-bit `c_int`s, you can losslessly upgrade to `as usize`, +/// except with negative indices, which Postgres asserts against creating. +/// PGX currently only intentionally supports 64-bit machines, +/// and while support for ILP32 or I64LP128 C data models may become possible, +/// PGX will **not** support 16-bit machines in any practical case, even though Rust does. #[repr(transparent)] #[derive(Debug)] pub struct RawArray { From 0da6f1cb387321659920517dc30a0dc620a27bb2 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 15:24:10 -0700 Subject: [PATCH 19/36] Note reborrow in test --- pgx-tests/src/tests/array_tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 74ce49883..19ad0c1df 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -125,7 +125,8 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { let raw = unsafe { RawArray::from_array(arr) }.unwrap(); if let Some(slice) = raw.nulls() { - // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes. + // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, + // so reborrow NonNull<[u8]> as &[u8] for the hot second we're looking at it. let slice = unsafe { &*slice.as_ptr() }; // might panic if the array is len 0 format!("{:#010b}", slice[0]) From 7bfa2e44a46f1a87b9ff3b1d04727f8b02a5560a Mon Sep 17 00:00:00 2001 From: Jubilee <46493976+workingjubilee@users.noreply.github.com> Date: Thu, 25 Aug 2022 16:35:05 -0700 Subject: [PATCH 20/36] Remove extra comment in pgx::array --- pgx/src/array.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 127d07288..b19ad0e1b 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -181,9 +181,6 @@ impl RawArray { // This is because, while the initial pointer is NonNull, // ARR_NULLBITMAP can return a nullptr! let nulls = unsafe { slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.at.as_ptr()), len) }; - // This code doesn't assert validity per se, but in practice, - // the caller will probably immediately turn this into a borrowed slice, - // opening up the methods that are available on borrowed slices. Some(NonNull::new(nulls)?) } From 55f62ac3a7686e7f1443d04481dfbe34b80192c7 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 18:10:44 -0700 Subject: [PATCH 21/36] Introduce even-lower-level ArrayPtr --- pgx-tests/src/tests/array_tests.rs | 12 +++--- pgx/src/array.rs | 65 ++++++++++++++++++------------ 2 files changed, 46 insertions(+), 31 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 19ad0c1df..e7a6877f9 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,7 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ -use pgx::array::RawArray; +use pgx::array::ArrayPtr; use pgx::*; use serde_json::*; @@ -103,7 +103,7 @@ fn return_zero_length_vec() -> Vec { #[pg_extern] fn get_arr_nelems(arr: Array) -> libc::c_int { // SAFETY: Eh it's fine, it's just a len check. - unsafe { RawArray::from_array(arr).unwrap().len() } + unsafe { ArrayPtr::from_array(arr).unwrap().len() } } /// # Safety @@ -114,7 +114,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { // SAFETY: this is Known to be an Array from ArrayType, // and the index has to be a valid one. unsafe { - let raw = RawArray::from_array(arr).unwrap().data::(); + let raw = ArrayPtr::from_array(arr).unwrap().data::(); let slice = &(*raw.as_ptr()); slice.get(elem as usize).copied() } @@ -122,7 +122,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { #[pg_extern] fn display_get_arr_nullbitmap(arr: Array) -> String { - let raw = unsafe { RawArray::from_array(arr) }.unwrap(); + let raw = unsafe { ArrayPtr::from_array(arr) }.unwrap(); if let Some(slice) = raw.nulls() { // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, @@ -138,13 +138,13 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> libc::c_int { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { RawArray::from_array(arr).unwrap().ndims() } + unsafe { ArrayPtr::from_array(arr) .unwrap().ndims() } } #[pg_extern] fn get_arr_hasnull(arr: Array) -> bool { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { RawArray::from_array(arr).unwrap().nullable() } + unsafe { ArrayPtr::from_array(arr).unwrap().nullable() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index b19ad0e1b..6282f70fa 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,6 +1,7 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; use core::ptr::{slice_from_raw_parts_mut, NonNull}; +use core::marker::PhantomData; use pgx_pg_sys::*; extern "C" { @@ -18,29 +19,34 @@ extern "C" { fn pgx_ARR_NULLBITMAP(arrayType: *mut ArrayType) -> *mut bits8; } -/// Handle describing a bare, "untyped" pointer to a Postgres varlena array, -/// offering safe accessors to the various fields of the ArrayType, -/// and access to the various slices that are implicitly present in the varlena. -/// -/// # On sizes and subscripts -/// -/// Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses `usize`. -/// Thus various functions of RawArray return `c_int` values, but you must convert to usize. -/// On 32-bit or 64-bit machines with 32-bit `c_int`s, you can losslessly upgrade to `as usize`, -/// except with negative indices, which Postgres asserts against creating. -/// PGX currently only intentionally supports 64-bit machines, -/// and while support for ILP32 or I64LP128 C data models may become possible, -/// PGX will **not** support 16-bit machines in any practical case, even though Rust does. +/** +An aligned, dereferenceable `NonNull` with low-level accessors. + +It offers technically-safe accessors to the "dynamic" fields of a Postgres varlena array, +as well as any fields not covered by such, but does not require any more validity. +This also means that the [NonNull] pointers that are returned may not be valid to read. +Validating the correctness of the entire array requires a bit more effort. + +# On sizes and subscripts + +Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses [usize]. +Thus various functions of ArrayPtr return `c_int` values, but you must convert to usize. +On 32-bit or 64-bit machines with 32-bit `c_int`s, you may losslessly upgrade `as usize`, +except with negative indices, which Postgres asserts against creating. +PGX currently only intentionally supports 64-bit machines, +and while support for ILP32 or I64LP128 C data models may become possible, +PGX will **not** support 16-bit machines in any practical case, even though Rust does. +*/ #[repr(transparent)] #[derive(Debug)] -pub struct RawArray { +pub struct ArrayPtr { at: NonNull, } #[deny(unsafe_op_in_unsafe_fn)] -impl RawArray { +impl ArrayPtr { // General implementation notes: - // RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType. + // ArrayPtr is not Copy or Clone, making it harder to misuse versus *mut ArrayType. // But this also offers safe accessors to the fields, like &ArrayType, // so it requires validity assertions in order to be constructed. // The main reason it uses NonNull and denies Clone, however, is access soundness: @@ -57,22 +63,22 @@ impl RawArray { /// * It must be "dereferenceable" in the sense defined in [the std documentation]. /// * The pointer must point to an initialized instance of `ArrayType`. /// * This can be considered a unique, "owning pointer", - /// so it won't be aliased while RawArray is held, + /// so it won't be aliased while ArrayPtr is held, /// and it points to data in the Postgres ArrayType format. /// - /// It should be noted as RawArray is not inherently lifetime-bound, it can be racy and unsafe! + /// It should be noted as ArrayPtr is not inherently lifetime-bound, it can be racy and unsafe! /// /// [the std documentation]: core::ptr#safety - pub unsafe fn from_raw(at: NonNull) -> RawArray { - RawArray { at } + pub unsafe fn from_ptr(at: NonNull) -> ArrayPtr { + ArrayPtr { at } } /// # Safety /// /// Array must have been constructed from an ArrayType pointer. - pub unsafe fn from_array(arr: Array) -> Option { + pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; - Some(RawArray { + Some(ArrayPtr { at: NonNull::new(array_type)?, }) } @@ -105,7 +111,7 @@ impl RawArray { /// If you find ways to store it, you are probably violating ownership. /// Raw pointer validity is **asserted on dereference, not construction**, /// so remember, this slice is only guaranteed to be valid almost immediately - /// after obtaining it, or if you continue to hold the RawArray. + /// after obtaining it, or if you continue to hold the ArrayPtr. pub fn dims(&mut self) -> NonNull<[libc::c_int]> { // for expected behavior, see: // postgres/src/include/utils/array.h @@ -113,7 +119,7 @@ impl RawArray { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of RawArray, + // As validity of the initial ptr was asserted on construction of ArrayPtr, // this can assume the dims ptr is also valid, allowing making the slice. // This code doesn't assert validity per se, but in practice, // the caller will probably immediately turn this into a borrowed slice, @@ -213,7 +219,7 @@ impl RawArray { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of RawArray, + // As validity of the initial ptr was asserted on construction of ArrayPtr, // this can assume the data ptr is also valid, allowing making the slice. // This code doesn't assert validity per se, but in practice, // the caller will probably immediately turn this into a borrowed slice, @@ -230,3 +236,12 @@ impl RawArray { } } } + +/** +A pointer into a Postgres varlena. +*/ +pub struct RawArray { + _ptr: ArrayPtr, + _len: usize, + ty: PhantomData, +} From b4a73643f12cf8065430b9cc401aa1164f54c4ab Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 18:30:00 -0700 Subject: [PATCH 22/36] Reformat --- pgx-tests/src/tests/array_tests.rs | 2 +- pgx/src/array.rs | 106 +++++++++++++++-------------- 2 files changed, 56 insertions(+), 52 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index e7a6877f9..b65c4b57e 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -138,7 +138,7 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> libc::c_int { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { ArrayPtr::from_array(arr) .unwrap().ndims() } + unsafe { ArrayPtr::from_array(arr).unwrap().ndims() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 6282f70fa..cf5a6d91e 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,7 +1,7 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; -use core::ptr::{slice_from_raw_parts_mut, NonNull}; use core::marker::PhantomData; +use core::ptr::{slice_from_raw_parts_mut, NonNull}; use pgx_pg_sys::*; extern "C" { @@ -54,27 +54,28 @@ impl ArrayPtr { // This creates an obvious complication for handing out pointers into varlenas. // Thus also why this does not use lifetime-bounded borrows. - /// Returns a handle to the raw array header. - /// - /// # Safety - /// - /// When calling this method, you have to ensure that all of the following is true: - /// * The pointer must be properly aligned. - /// * It must be "dereferenceable" in the sense defined in [the std documentation]. - /// * The pointer must point to an initialized instance of `ArrayType`. - /// * This can be considered a unique, "owning pointer", - /// so it won't be aliased while ArrayPtr is held, - /// and it points to data in the Postgres ArrayType format. - /// - /// It should be noted as ArrayPtr is not inherently lifetime-bound, it can be racy and unsafe! - /// - /// [the std documentation]: core::ptr#safety + /** + Returns a handle to the raw array header. + + # Safety + + When calling this method, you have to ensure that all of the following is true: + * The pointer must be properly aligned. + * It must be "dereferenceable" in the sense defined in [the std documentation]. + * The pointer must point to an initialized instance of `ArrayType`. + * This can be considered a unique, "owning pointer", + so it won't be aliased while ArrayPtr is held, + and it points to data in the Postgres ArrayType format. + + It should be noted as ArrayPtr is not inherently lifetime-bound, it can be racy and unsafe! + + [the std documentation]: core::ptr#safety + */ pub unsafe fn from_ptr(at: NonNull) -> ArrayPtr { ArrayPtr { at } } /// # Safety - /// /// Array must have been constructed from an ArrayType pointer. pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; @@ -102,16 +103,18 @@ impl ArrayPtr { } } - /// A raw slice of the dimensions. - /// Oxidized form of ARR_DIMS(ArrayType*) - /// - /// # Safety - /// - /// This requires &mut to collect the slice, but be aware: this is a raw pointer! - /// If you find ways to store it, you are probably violating ownership. - /// Raw pointer validity is **asserted on dereference, not construction**, - /// so remember, this slice is only guaranteed to be valid almost immediately - /// after obtaining it, or if you continue to hold the ArrayPtr. + /** + A raw slice of the dimensions. + Oxidized form of ARR_DIMS(ArrayType*) + + # Safety + + This requires &mut to collect the slice, but be aware: this is a raw pointer! + If you find ways to store it, you are probably violating ownership. + Raw pointer validity is **asserted on dereference, not construction**, + so remember, this slice is only guaranteed to be valid almost immediately + after obtaining it, or if you continue to hold the ArrayPtr. + */ pub fn dims(&mut self) -> NonNull<[libc::c_int]> { // for expected behavior, see: // postgres/src/include/utils/array.h @@ -190,30 +193,31 @@ impl ArrayPtr { Some(NonNull::new(nulls)?) } - /// The slice of the data. - /// Oxidized form of ARR_DATA_PTR(ArrayType*) - /// - /// # Safety - /// - /// This is not inherently typesafe! - /// Thus you must know the implied type of the underlying ArrayType when calling this. - /// In addition, the raw slice is not guaranteed to be legible at any given index, - /// e.g. it may be an "SQL null" if so indicated in the null bitmap. - /// But even if the index is not marked as null, the value may be equal to nullptr, - /// thus leaving it correct to read the value but incorrect to then dereference. - /// - /// That is why this returns [`NonNull<[T]>`]: if it returned `&mut [T]`, - /// then for many possible types that can be **undefined behavior**, - /// as it would assert that each particular index was a valid `T`. - /// A Rust borrow, including of a slice, will always be - /// - non-null - /// - aligned - /// - **validly initialized**, except in the case of [MaybeUninit] types - /// It can be incorrect to assume that data that Postgres has marked "null" - /// follows Rust-level initialization requirements. - /// - /// [MaybeUninit]: core::mem::MaybeUninit - /// [`NonNull<[T]>`]: core::ptr::NonNull + /** + Shim to ARR_DATA_PTR(ArrayType*) + + # Safety + + This is not inherently typesafe! + Thus you must know the implied type of the underlying ArrayType when calling this. + In addition, the raw slice is not guaranteed to be legible at any given index, + e.g. it may be an "SQL null" if so indicated in the null bitmap. + But even if the index is not marked as null, the value may be equal to nullptr, + thus leaving it correct to read the value but incorrect to then dereference. + + That is why this returns [`NonNull<[T]>`]: if it returned `&mut [T]`, + then for many possible types that can be **undefined behavior**, + as it would assert that each particular index was a valid `T`. + A Rust borrow, including of a slice, will always be + * non-null + * aligned + * **validly initialized**, except in the case of [MaybeUninit] types + It can be incorrect to assume that data that Postgres has marked "null" + follows Rust-level initialization requirements. + + [MaybeUninit]: core::mem::MaybeUninit + [`NonNull<[T]>`]: core::ptr::NonNull + */ pub unsafe fn data(&mut self) -> NonNull<[T]> { let len = self.len() as usize; From f763c0a7cd8cd48e8e5c88b645e58ce7bdf47f0e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 19:54:53 -0700 Subject: [PATCH 23/36] Rough draft of ArrayPtr and RawArray --- pgx-tests/src/tests/array_tests.rs | 6 +- pgx/src/array.rs | 99 ++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index b65c4b57e..e6545dfb2 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,7 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ -use pgx::array::ArrayPtr; +use pgx::array::{ArrayPtr, RawArray}; use pgx::*; use serde_json::*; @@ -114,7 +114,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { // SAFETY: this is Known to be an Array from ArrayType, // and the index has to be a valid one. unsafe { - let raw = ArrayPtr::from_array(arr).unwrap().data::(); + let raw = RawArray::from_valid(ArrayPtr::from_array(arr).unwrap()).data::(); let slice = &(*raw.as_ptr()); slice.get(elem as usize).copied() } @@ -122,7 +122,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { #[pg_extern] fn display_get_arr_nullbitmap(arr: Array) -> String { - let raw = unsafe { ArrayPtr::from_array(arr) }.unwrap(); + let raw = unsafe { RawArray::from_valid(ArrayPtr::from_array(arr).unwrap()) }; if let Some(slice) = raw.nulls() { // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, diff --git a/pgx/src/array.rs b/pgx/src/array.rs index cf5a6d91e..a4f40e053 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -2,6 +2,7 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; use core::marker::PhantomData; use core::ptr::{slice_from_raw_parts_mut, NonNull}; +use core::slice; use pgx_pg_sys::*; extern "C" { @@ -90,6 +91,7 @@ impl ArrayPtr { } /// Get the number of dimensions. + /// Will be in 0..=[pg_sys::MAXDIM]. pub fn ndims(&self) -> libc::c_int { // SAFETY: Validity asserted on construction. unsafe { @@ -105,7 +107,9 @@ impl ArrayPtr { /** A raw slice of the dimensions. - Oxidized form of ARR_DIMS(ArrayType*) + + Oxidized form of ARR_DIMS(ArrayType*). + The length will be within 0..=[pg_sys::MAXDIM]. # Safety @@ -161,14 +165,15 @@ impl ArrayPtr { } /// Gets the offset to the ArrayType's data. - /// Note that this should not be "taken literally". + /// Should not be "taken literally". fn data_offset(&self) -> i32 { // SAFETY: Validity asserted on construction. unsafe { (*self.at.as_ptr()).dataoffset } // This field is an "int32" in Postgres } - /// Equivalent to ARR_HASNULL(ArrayType*) + /// Equivalent to ARR_HASNULL(ArrayType*). + /// /// Note this means that it only asserts that there MIGHT be a null pub fn nullable(&self) -> bool { // for expected behavior, see: @@ -178,18 +183,17 @@ impl ArrayPtr { } /// Oxidized form of ARR_NULLBITMAP(ArrayType*) - pub fn nulls(&self) -> Option> { + pub fn nulls(&self) -> Option> { // for expected behavior, see: // postgres/src/include/utils/array.h // #define ARR_NULLBITMAP - let len = (self.len() as usize + 7) >> 3; // Obtains 0 if len was 0. // SAFETY: This obtains the nulls pointer, which is valid to obtain because // the validity was asserted on construction. However, unlike the other cases, // it isn't correct to trust it. Instead, this gets null-checked. // This is because, while the initial pointer is NonNull, // ARR_NULLBITMAP can return a nullptr! - let nulls = unsafe { slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.at.as_ptr()), len) }; + let nulls = unsafe { pgx_ARR_NULLBITMAP(self.at.as_ptr()) }; Some(NonNull::new(nulls)?) } @@ -205,22 +209,21 @@ impl ArrayPtr { But even if the index is not marked as null, the value may be equal to nullptr, thus leaving it correct to read the value but incorrect to then dereference. - That is why this returns [`NonNull<[T]>`]: if it returned `&mut [T]`, + That is the primary reason this returns [`NonNull`]. If it returned `&mut [T]`, then for many possible types that can be **undefined behavior**, - as it would assert that each particular index was a valid `T`. + as it would assert each particular index was a valid `T`. A Rust borrow, including of a slice, will always be * non-null * aligned * **validly initialized**, except in the case of [MaybeUninit] types - It can be incorrect to assume that data that Postgres has marked "null" - follows Rust-level initialization requirements. + It is reasonable to assume data Postgres exposes logically to SQL is initialized, + but it may be incorrect to assume data Postgres has marked "null" + otherwise follows Rust-level initialization requirements. [MaybeUninit]: core::mem::MaybeUninit - [`NonNull<[T]>`]: core::ptr::NonNull + [`NonNull`]: core::ptr::NonNull */ - pub unsafe fn data(&mut self) -> NonNull<[T]> { - let len = self.len() as usize; - + pub fn data(&mut self) -> NonNull { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. // As validity of the initial ptr was asserted on construction of ArrayPtr, @@ -232,20 +235,68 @@ impl ArrayPtr { // Most importantly, the caller has asserted this is in fact a valid [T], // by calling this function, so both this code and the caller can rely // on that assertion, only requiring that it is correct. - unsafe { - NonNull::new_unchecked(slice_from_raw_parts_mut( - pgx_ARR_DATA_PTR(self.at.as_ptr()).cast(), - len, - )) - } + unsafe { NonNull::new_unchecked(pgx_ARR_DATA_PTR(self.at.as_ptr()).cast()) } } } /** A pointer into a Postgres varlena. */ -pub struct RawArray { - _ptr: ArrayPtr, - _len: usize, - ty: PhantomData, +pub struct RawArray { + ptr: ArrayPtr, + len: usize, +} + +#[deny(unsafe_op_in_unsafe_fn)] +impl RawArray { + /** + # Safety + + Requires a pointer that really is from a properly constructed varlena, + using the format used internally by Postgres. + */ + pub unsafe fn from_valid(ptr: ArrayPtr) -> RawArray { + let len = unsafe { ptr.len() } as usize; + RawArray { ptr, len } + } + + fn len(&self) -> usize { + self.len + } + + fn into_ptr(self) -> ArrayPtr { + self.ptr + } + + /// Oxidized form of ARR_NULLBITMAP(ArrayType*) + pub fn nulls(&self) -> Option> { + // for expected behavior, see: + // postgres/src/include/utils/array.h + // #define ARR_NULLBITMAP + let len = self.len + 7 >> 3; // Obtains 0 if len was 0. + + // SAFETY: This obtains the nulls pointer, which is valid to obtain because + // the validity was asserted on construction. However, unlike the other cases, + // it isn't correct to trust it. Instead, this gets null-checked. + // This is because, while the initial pointer is NonNull, + // ARR_NULLBITMAP can return a nullptr! + Some(unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.nulls()?.as_ptr(), len)) } ) + } + + /// # Safety + /// Asserts the data is mutable, properly aligned, all that jazz. + pub unsafe fn data(&mut self) -> NonNull<[T]> { + // SAFETY: Welcome to the infernal bowels of FFI. + // Because the initial ptr was NonNull, we can assume this is also NonNull. + // As validity of the initial ptr was asserted on construction of ArrayPtr, + // this can assume the data ptr is also valid, allowing making the slice. + // This code doesn't assert validity per se, but in practice, + // the caller will probably immediately turn this into a borrowed slice, + // opening up the methods that are available on borrowed slices. + // + // Most importantly, the caller has asserted this is in fact a valid [T], + // by calling this function, so both this code and the caller can rely + // on that assertion, only requiring that it is correct. + unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.data::().as_ptr(), self.len)) } + } } From b0b3477fa2b79245185f3c250c2ca7add2796451 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 19:56:14 -0700 Subject: [PATCH 24/36] Format again --- pgx/src/array.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index a4f40e053..84ead71c1 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -267,7 +267,7 @@ impl RawArray { fn into_ptr(self) -> ArrayPtr { self.ptr } - + /// Oxidized form of ARR_NULLBITMAP(ArrayType*) pub fn nulls(&self) -> Option> { // for expected behavior, see: @@ -280,7 +280,9 @@ impl RawArray { // it isn't correct to trust it. Instead, this gets null-checked. // This is because, while the initial pointer is NonNull, // ARR_NULLBITMAP can return a nullptr! - Some(unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.nulls()?.as_ptr(), len)) } ) + Some(unsafe { + NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.nulls()?.as_ptr(), len)) + }) } /// # Safety @@ -297,6 +299,11 @@ impl RawArray { // Most importantly, the caller has asserted this is in fact a valid [T], // by calling this function, so both this code and the caller can rely // on that assertion, only requiring that it is correct. - unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.data::().as_ptr(), self.len)) } + unsafe { + NonNull::new_unchecked(slice_from_raw_parts_mut( + self.ptr.data::().as_ptr(), + self.len, + )) + } } } From 639d0b4954676bb7f29dfdfa05955df7b5fd68e1 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 20:26:55 -0700 Subject: [PATCH 25/36] Merge back into RawArray --- pgx-tests/src/tests/array_tests.rs | 12 +-- pgx/src/array.rs | 142 +++++++++-------------------- 2 files changed, 50 insertions(+), 104 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index e6545dfb2..0bedce7a7 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -7,7 +7,7 @@ All rights reserved. Use of this source code is governed by the MIT license that can be found in the LICENSE file. */ -use pgx::array::{ArrayPtr, RawArray}; +use pgx::array::RawArray; use pgx::*; use serde_json::*; @@ -103,7 +103,7 @@ fn return_zero_length_vec() -> Vec { #[pg_extern] fn get_arr_nelems(arr: Array) -> libc::c_int { // SAFETY: Eh it's fine, it's just a len check. - unsafe { ArrayPtr::from_array(arr).unwrap().len() } + unsafe { RawArray::from_array(arr) }.unwrap().len() as _ } /// # Safety @@ -114,7 +114,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { // SAFETY: this is Known to be an Array from ArrayType, // and the index has to be a valid one. unsafe { - let raw = RawArray::from_valid(ArrayPtr::from_array(arr).unwrap()).data::(); + let raw = RawArray::from_array(arr).unwrap().data::(); let slice = &(*raw.as_ptr()); slice.get(elem as usize).copied() } @@ -122,7 +122,7 @@ unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { #[pg_extern] fn display_get_arr_nullbitmap(arr: Array) -> String { - let raw = unsafe { RawArray::from_valid(ArrayPtr::from_array(arr).unwrap()) }; + let raw = unsafe { RawArray::from_array(arr) }.unwrap(); if let Some(slice) = raw.nulls() { // SAFETY: If the test has gotten this far, the ptr is good for 0+ bytes, @@ -138,13 +138,13 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> libc::c_int { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { ArrayPtr::from_array(arr).unwrap().ndims() } + unsafe { RawArray::from_array(arr).unwrap().ndims() } } #[pg_extern] fn get_arr_hasnull(arr: Array) -> bool { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { ArrayPtr::from_array(arr).unwrap().nullable() } + unsafe { RawArray::from_array(arr).unwrap().nullable() } } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 84ead71c1..f0123ed83 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,8 +1,6 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; -use core::marker::PhantomData; use core::ptr::{slice_from_raw_parts_mut, NonNull}; -use core::slice; use pgx_pg_sys::*; extern "C" { @@ -31,23 +29,23 @@ Validating the correctness of the entire array requires a bit more effort. # On sizes and subscripts Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses [usize]. -Thus various functions of ArrayPtr return `c_int` values, but you must convert to usize. +Thus various functions of RawArray return `c_int` values, but you must convert to usize. On 32-bit or 64-bit machines with 32-bit `c_int`s, you may losslessly upgrade `as usize`, except with negative indices, which Postgres asserts against creating. PGX currently only intentionally supports 64-bit machines, and while support for ILP32 or I64LP128 C data models may become possible, PGX will **not** support 16-bit machines in any practical case, even though Rust does. */ -#[repr(transparent)] #[derive(Debug)] -pub struct ArrayPtr { - at: NonNull, +pub struct RawArray { + ptr: NonNull, + len: usize, } #[deny(unsafe_op_in_unsafe_fn)] -impl ArrayPtr { +impl RawArray { // General implementation notes: - // ArrayPtr is not Copy or Clone, making it harder to misuse versus *mut ArrayType. + // RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType. // But this also offers safe accessors to the fields, like &ArrayType, // so it requires validity assertions in order to be constructed. // The main reason it uses NonNull and denies Clone, however, is access soundness: @@ -63,31 +61,35 @@ impl ArrayPtr { When calling this method, you have to ensure that all of the following is true: * The pointer must be properly aligned. * It must be "dereferenceable" in the sense defined in [the std documentation]. - * The pointer must point to an initialized instance of `ArrayType`. - * This can be considered a unique, "owning pointer", - so it won't be aliased while ArrayPtr is held, + * The pointer must point to an initialized instance of [pg_sys::ArrayType]. + * The `ndim` field must be a correct value, so the `dims` slice is aligned and readable. + * This is a unique, "owning pointer" for the varlena, so it won't be aliased while held, and it points to data in the Postgres ArrayType format. - It should be noted as ArrayPtr is not inherently lifetime-bound, it can be racy and unsafe! + It should be noted that despite all these requirements, RawArray has no lifetime, + nor produces slices with such, so it can still be racy and unsafe! [the std documentation]: core::ptr#safety */ - pub unsafe fn from_ptr(at: NonNull) -> ArrayPtr { - ArrayPtr { at } + pub unsafe fn from_ptr(ptr: NonNull) -> RawArray { + let len = unsafe { pgx_ARR_NELEMS(ptr.as_ptr()) } as usize; + RawArray { ptr, len } } /// # Safety /// Array must have been constructed from an ArrayType pointer. - pub unsafe fn from_array(arr: Array) -> Option { + pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; - Some(ArrayPtr { - at: NonNull::new(array_type)?, + let len = unsafe { pgx_ARR_NELEMS(array_type) } as usize; + Some(RawArray { + ptr: NonNull::new(array_type)?, + len, }) } /// Returns the inner raw pointer to the ArrayType. pub fn into_raw(self) -> NonNull { - self.at + self.ptr } /// Get the number of dimensions. @@ -95,7 +97,7 @@ impl ArrayPtr { pub fn ndims(&self) -> libc::c_int { // SAFETY: Validity asserted on construction. unsafe { - (*self.at.as_ptr()).ndim + (*self.ptr.as_ptr()).ndim // FIXME: While this is a c_int, the max ndim is normally 6 // While the value can be set higher, it is... unlikely // that it is going to actually challenge even 16-bit pointer widths. @@ -117,7 +119,7 @@ impl ArrayPtr { If you find ways to store it, you are probably violating ownership. Raw pointer validity is **asserted on dereference, not construction**, so remember, this slice is only guaranteed to be valid almost immediately - after obtaining it, or if you continue to hold the ArrayPtr. + after obtaining it, or if you continue to hold the RawArray. */ pub fn dims(&mut self) -> NonNull<[libc::c_int]> { // for expected behavior, see: @@ -126,7 +128,7 @@ impl ArrayPtr { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of ArrayPtr, + // As validity of the initial ptr was asserted on construction of RawArray, // this can assume the dims ptr is also valid, allowing making the slice. // This code doesn't assert validity per se, but in practice, // the caller will probably immediately turn this into a borrowed slice, @@ -136,20 +138,20 @@ impl ArrayPtr { // though it is possible the caller can misuse this in various ways. // Only the "naive" case is well-guarded. unsafe { - let len = self.ndims() as usize; + let ndim = self.ndims() as usize; NonNull::new_unchecked(slice_from_raw_parts_mut( - pgx_ARR_DIMS(self.at.as_ptr()), - len, + pgx_ARR_DIMS(self.ptr.as_ptr()), + ndim, )) } } /// The flattened length of the array. - pub fn len(&self) -> libc::c_int { + pub fn len(&self) -> usize { // SAFETY: Validity asserted on construction, and... // ...well, hopefully Postgres knows what it's doing. unsafe { - pgx_ARR_NELEMS(self.at.as_ptr()) + pgx_ARR_NELEMS(self.ptr.as_ptr()) // FIXME: While this was indeed a function that returns a c_int, // using a usize is more idiomatic in Rust, to say the least. // In addition, the actual sizes are under various restrictions, @@ -161,14 +163,14 @@ impl ArrayPtr { pub fn oid(&self) -> pg_sys::Oid { // SAFETY: Validity asserted on construction. - unsafe { (*self.at.as_ptr()).elemtype } + unsafe { (*self.ptr.as_ptr()).elemtype } } /// Gets the offset to the ArrayType's data. /// Should not be "taken literally". fn data_offset(&self) -> i32 { // SAFETY: Validity asserted on construction. - unsafe { (*self.at.as_ptr()).dataoffset } + unsafe { (*self.ptr.as_ptr()).dataoffset } // This field is an "int32" in Postgres } @@ -183,7 +185,7 @@ impl ArrayPtr { } /// Oxidized form of ARR_NULLBITMAP(ArrayType*) - pub fn nulls(&self) -> Option> { + pub fn nulls(&self) -> Option> { // for expected behavior, see: // postgres/src/include/utils/array.h // #define ARR_NULLBITMAP @@ -193,8 +195,16 @@ impl ArrayPtr { // it isn't correct to trust it. Instead, this gets null-checked. // This is because, while the initial pointer is NonNull, // ARR_NULLBITMAP can return a nullptr! - let nulls = unsafe { pgx_ARR_NULLBITMAP(self.at.as_ptr()) }; - Some(NonNull::new(nulls)?) + let len = self.len + 7 >> 3; // Obtains 0 if len was 0. + + // SAFETY: This obtains the nulls pointer, which is valid to obtain because + // the validity was asserted on construction. However, unlike the other cases, + // it isn't correct to trust it. Instead, this gets null-checked. + // This is because, while the initial pointer is NonNull, + // ARR_NULLBITMAP can return a nullptr! + NonNull::new(unsafe { + slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.ptr.as_ptr()), len) + }) } /** @@ -223,74 +233,10 @@ impl ArrayPtr { [MaybeUninit]: core::mem::MaybeUninit [`NonNull`]: core::ptr::NonNull */ - pub fn data(&mut self) -> NonNull { - // SAFETY: Welcome to the infernal bowels of FFI. - // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of ArrayPtr, - // this can assume the data ptr is also valid, allowing making the slice. - // This code doesn't assert validity per se, but in practice, - // the caller will probably immediately turn this into a borrowed slice, - // opening up the methods that are available on borrowed slices. - // - // Most importantly, the caller has asserted this is in fact a valid [T], - // by calling this function, so both this code and the caller can rely - // on that assertion, only requiring that it is correct. - unsafe { NonNull::new_unchecked(pgx_ARR_DATA_PTR(self.at.as_ptr()).cast()) } - } -} - -/** -A pointer into a Postgres varlena. -*/ -pub struct RawArray { - ptr: ArrayPtr, - len: usize, -} - -#[deny(unsafe_op_in_unsafe_fn)] -impl RawArray { - /** - # Safety - - Requires a pointer that really is from a properly constructed varlena, - using the format used internally by Postgres. - */ - pub unsafe fn from_valid(ptr: ArrayPtr) -> RawArray { - let len = unsafe { ptr.len() } as usize; - RawArray { ptr, len } - } - - fn len(&self) -> usize { - self.len - } - - fn into_ptr(self) -> ArrayPtr { - self.ptr - } - - /// Oxidized form of ARR_NULLBITMAP(ArrayType*) - pub fn nulls(&self) -> Option> { - // for expected behavior, see: - // postgres/src/include/utils/array.h - // #define ARR_NULLBITMAP - let len = self.len + 7 >> 3; // Obtains 0 if len was 0. - - // SAFETY: This obtains the nulls pointer, which is valid to obtain because - // the validity was asserted on construction. However, unlike the other cases, - // it isn't correct to trust it. Instead, this gets null-checked. - // This is because, while the initial pointer is NonNull, - // ARR_NULLBITMAP can return a nullptr! - Some(unsafe { - NonNull::new_unchecked(slice_from_raw_parts_mut(self.ptr.nulls()?.as_ptr(), len)) - }) - } - - /// # Safety - /// Asserts the data is mutable, properly aligned, all that jazz. - pub unsafe fn data(&mut self) -> NonNull<[T]> { + pub fn data(&mut self) -> NonNull<[T]> { // SAFETY: Welcome to the infernal bowels of FFI. // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of ArrayPtr, + // As validity of the initial ptr was asserted on construction of RawArray, // this can assume the data ptr is also valid, allowing making the slice. // This code doesn't assert validity per se, but in practice, // the caller will probably immediately turn this into a borrowed slice, @@ -301,7 +247,7 @@ impl RawArray { // on that assertion, only requiring that it is correct. unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut( - self.ptr.data::().as_ptr(), + pgx_ARR_DATA_PTR(self.ptr.as_ptr()).cast(), self.len, )) } From 55deefee2549ffe81f8f4d8b9d59d7f3e593587e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 20:33:16 -0700 Subject: [PATCH 26/36] Cleanup fn that might be dupes or confusing --- pgx-tests/src/tests/array_tests.rs | 8 +---- pgx/src/array.rs | 57 ++++++++++-------------------- 2 files changed, 19 insertions(+), 46 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 0bedce7a7..6b34f9cd5 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -138,13 +138,7 @@ fn display_get_arr_nullbitmap(arr: Array) -> String { #[pg_extern] fn get_arr_ndim(arr: Array) -> libc::c_int { // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { RawArray::from_array(arr).unwrap().ndims() } -} - -#[pg_extern] -fn get_arr_hasnull(arr: Array) -> bool { - // SAFETY: This is a valid ArrayType and it's just a field access. - unsafe { RawArray::from_array(arr).unwrap().nullable() } + unsafe { RawArray::from_array(arr) }.unwrap().dims().len() as _ } #[pg_extern] diff --git a/pgx/src/array.rs b/pgx/src/array.rs index f0123ed83..10ce3c261 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -2,6 +2,7 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; use core::ptr::{slice_from_raw_parts_mut, NonNull}; use pgx_pg_sys::*; +use core::slice; extern "C" { /// # Safety @@ -21,8 +22,8 @@ extern "C" { /** An aligned, dereferenceable `NonNull` with low-level accessors. -It offers technically-safe accessors to the "dynamic" fields of a Postgres varlena array, -as well as any fields not covered by such, but does not require any more validity. +It offers technically-safe accessors to the "dynamic" fields of a Postgres varlena array +but only requires validity of ArrayType itself as well as the dimensions slice. This also means that the [NonNull] pointers that are returned may not be valid to read. Validating the correctness of the entire array requires a bit more effort. @@ -94,7 +95,7 @@ impl RawArray { /// Get the number of dimensions. /// Will be in 0..=[pg_sys::MAXDIM]. - pub fn ndims(&self) -> libc::c_int { + fn ndim(&self) -> libc::c_int { // SAFETY: Validity asserted on construction. unsafe { (*self.ptr.as_ptr()).ndim @@ -108,20 +109,14 @@ impl RawArray { } /** - A raw slice of the dimensions. + A slice of the dimensions. Oxidized form of ARR_DIMS(ArrayType*). The length will be within 0..=[pg_sys::MAXDIM]. - # Safety - - This requires &mut to collect the slice, but be aware: this is a raw pointer! - If you find ways to store it, you are probably violating ownership. - Raw pointer validity is **asserted on dereference, not construction**, - so remember, this slice is only guaranteed to be valid almost immediately - after obtaining it, or if you continue to hold the RawArray. + Safe to use because validity of this slice was asserted on construction. */ - pub fn dims(&mut self) -> NonNull<[libc::c_int]> { + pub fn dims(&self) -> &[libc::c_int] { // for expected behavior, see: // postgres/src/include/utils/array.h // #define ARR_DIMS @@ -138,27 +133,17 @@ impl RawArray { // though it is possible the caller can misuse this in various ways. // Only the "naive" case is well-guarded. unsafe { - let ndim = self.ndims() as usize; - NonNull::new_unchecked(slice_from_raw_parts_mut( + let ndim = self.ndim() as usize; + slice::from_raw_parts( pgx_ARR_DIMS(self.ptr.as_ptr()), ndim, - )) + ) } } /// The flattened length of the array. pub fn len(&self) -> usize { - // SAFETY: Validity asserted on construction, and... - // ...well, hopefully Postgres knows what it's doing. - unsafe { - pgx_ARR_NELEMS(self.ptr.as_ptr()) - // FIXME: While this was indeed a function that returns a c_int, - // using a usize is more idiomatic in Rust, to say the least. - // In addition, the actual sizes are under various restrictions, - // so we probably can further constrain the value, honestly. - // However, PGX has trouble with returning usizes - as _ - } + self.len } pub fn oid(&self) -> pg_sys::Oid { @@ -177,7 +162,8 @@ impl RawArray { /// Equivalent to ARR_HASNULL(ArrayType*). /// /// Note this means that it only asserts that there MIGHT be a null - pub fn nullable(&self) -> bool { + #[allow(unused)] + fn nullable(&self) -> bool { // for expected behavior, see: // postgres/src/include/utils/array.h // #define ARR_HASNULL @@ -190,11 +176,6 @@ impl RawArray { // postgres/src/include/utils/array.h // #define ARR_NULLBITMAP - // SAFETY: This obtains the nulls pointer, which is valid to obtain because - // the validity was asserted on construction. However, unlike the other cases, - // it isn't correct to trust it. Instead, this gets null-checked. - // This is because, while the initial pointer is NonNull, - // ARR_NULLBITMAP can return a nullptr! let len = self.len + 7 >> 3; // Obtains 0 if len was 0. // SAFETY: This obtains the nulls pointer, which is valid to obtain because @@ -212,14 +193,12 @@ impl RawArray { # Safety - This is not inherently typesafe! - Thus you must know the implied type of the underlying ArrayType when calling this. - In addition, the raw slice is not guaranteed to be legible at any given index, + The raw slice is not guaranteed to be legible at any given index as T, e.g. it may be an "SQL null" if so indicated in the null bitmap. - But even if the index is not marked as null, the value may be equal to nullptr, - thus leaving it correct to read the value but incorrect to then dereference. + As a result, it is dangerous to reborrow this as `&[T]` or `&mut [T]` + unless the type considers all bitpatterns to be valid values. - That is the primary reason this returns [`NonNull`]. If it returned `&mut [T]`, + That is the primary reason this returns [`NonNull<[T]>`][nonnull]. If it returned `&mut [T]`, then for many possible types that can be **undefined behavior**, as it would assert each particular index was a valid `T`. A Rust borrow, including of a slice, will always be @@ -231,7 +210,7 @@ impl RawArray { otherwise follows Rust-level initialization requirements. [MaybeUninit]: core::mem::MaybeUninit - [`NonNull`]: core::ptr::NonNull + [nonnull]: core::ptr::NonNull */ pub fn data(&mut self) -> NonNull<[T]> { // SAFETY: Welcome to the infernal bowels of FFI. From 1785c0726ec0cb7b4d09d5772124524d2e7b1132 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 21:10:13 -0700 Subject: [PATCH 27/36] Clean up RawArray docs and comments --- pgx/src/array.rs | 67 +++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 10ce3c261..9ef61002d 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -63,7 +63,8 @@ impl RawArray { * The pointer must be properly aligned. * It must be "dereferenceable" in the sense defined in [the std documentation]. * The pointer must point to an initialized instance of [pg_sys::ArrayType]. - * The `ndim` field must be a correct value, so the `dims` slice is aligned and readable. + * The `ndim` field must be a correct value, or **0**, so `dims` is aligned and readable, + or no data is actually read at all. * This is a unique, "owning pointer" for the varlena, so it won't be aliased while held, and it points to data in the Postgres ArrayType format. @@ -121,17 +122,12 @@ impl RawArray { // postgres/src/include/utils/array.h // #define ARR_DIMS - // SAFETY: Welcome to the infernal bowels of FFI. - // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of RawArray, - // this can assume the dims ptr is also valid, allowing making the slice. - // This code doesn't assert validity per se, but in practice, - // the caller will probably immediately turn this into a borrowed slice, - // opening up the methods that are available on borrowed slices. - // - // So, to be clear, yes, everything done so far allows the caller to do so, - // though it is possible the caller can misuse this in various ways. - // Only the "naive" case is well-guarded. + /* + SAFETY: Welcome to the infernal bowels of FFI. + Because the initial ptr was NonNull, we can assume this is also NonNull. + Validity of the ptr and ndim field was asserted on construction of RawArray, + so can assume the dims ptr is also valid, allowing making the slice. + */ unsafe { let ndim = self.ndim() as usize; slice::from_raw_parts( @@ -178,21 +174,24 @@ impl RawArray { let len = self.len + 7 >> 3; // Obtains 0 if len was 0. - // SAFETY: This obtains the nulls pointer, which is valid to obtain because - // the validity was asserted on construction. However, unlike the other cases, - // it isn't correct to trust it. Instead, this gets null-checked. - // This is because, while the initial pointer is NonNull, - // ARR_NULLBITMAP can return a nullptr! + /* + SAFETY: This obtains the nulls pointer, which is valid to obtain because + the len was asserted on construction. However, unlike the other cases, + it isn't correct to trust it. Instead, this gets null-checked. + This is because, while the initial pointer is NonNull, + ARR_NULLBITMAP can return a nullptr! + */ NonNull::new(unsafe { slice_from_raw_parts_mut(pgx_ARR_NULLBITMAP(self.ptr.as_ptr()), len) }) } /** - Shim to ARR_DATA_PTR(ArrayType*) + Oxidized form of ARR_DATA_PTR(ArrayType*) # Safety + While this function is safe to call, using the slice may risk undefined behavior. The raw slice is not guaranteed to be legible at any given index as T, e.g. it may be an "SQL null" if so indicated in the null bitmap. As a result, it is dangerous to reborrow this as `&[T]` or `&mut [T]` @@ -209,21 +208,31 @@ impl RawArray { but it may be incorrect to assume data Postgres has marked "null" otherwise follows Rust-level initialization requirements. + As Postgres handles alignment requirements in its own particular ways, + it is up to you to validate that each index is aligned correctly. + The first element should be correctly aligned to the type, but that is not certain. + Successive indices are even less likely to match the data type you want + unless Postgres also uses an identical layout. + [MaybeUninit]: core::mem::MaybeUninit [nonnull]: core::ptr::NonNull */ pub fn data(&mut self) -> NonNull<[T]> { - // SAFETY: Welcome to the infernal bowels of FFI. - // Because the initial ptr was NonNull, we can assume this is also NonNull. - // As validity of the initial ptr was asserted on construction of RawArray, - // this can assume the data ptr is also valid, allowing making the slice. - // This code doesn't assert validity per se, but in practice, - // the caller will probably immediately turn this into a borrowed slice, - // opening up the methods that are available on borrowed slices. - // - // Most importantly, the caller has asserted this is in fact a valid [T], - // by calling this function, so both this code and the caller can rely - // on that assertion, only requiring that it is correct. + /* + SAFETY: Welcome to the infernal bowels of FFI. + Because the initial ptr was NonNull, we can assume this is also NonNull. + As validity of the initial ptr was asserted on construction of RawArray, + this can assume the data ptr is also valid, or harmlessly incorrect. + + This code doesn't assert validity per se, but in practice, + the caller may immediately turn this into a borrowed slice, + opening up the methods that are available on borrowed slices. + This is fine as long as the caller heeds the caveats already given. + In particular, for simply sized and aligned data, where alignment is the size + (e.g. u8, i16, f32, u64), and there are no invalid bitpatterns to worry about, + the caller can almost certainly go to town with it, + needing only their initial assertion regarding the type being correct. + */ unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut( pgx_ARR_DATA_PTR(self.ptr.as_ptr()).cast(), From 16cf84974a779fb0d6b767f9eb17a3ef2fe6e9a1 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 21:26:28 -0700 Subject: [PATCH 28/36] Add dims_mut, to see if it works --- pgx/src/array.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 9ef61002d..5f5f655dc 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -137,6 +137,24 @@ impl RawArray { } } + /** + The ability to rewrite the dimensions slice. + + You almost certainly do not actually want to call this. + Returns a triple tuple of + * a mutable reference to ndim, + * a pointer to the first c_int + * a mutable reference to RawArray's len field. + + Write to them in order. + */ + pub unsafe fn dims_mut(&mut self) -> (&mut libc::c_int, NonNull, &mut usize) { + let dims_ptr = unsafe { NonNull::new_unchecked(pgx_ARR_DIMS(self.ptr.as_ptr())) }; + let len = &mut self.len; + + (unsafe { &mut self.ptr.as_mut().ndim }, dims_ptr, len) + } + /// The flattened length of the array. pub fn len(&self) -> usize { self.len From e2a3509471ec16dc36bcba708674ec09a8805fa2 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 21:32:03 -0700 Subject: [PATCH 29/36] Cleanup and explanations for RawArray::{dims_mut, nulls} --- pgx/src/array.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 5f5f655dc..11a05b7b4 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -140,11 +140,12 @@ impl RawArray { /** The ability to rewrite the dimensions slice. - You almost certainly do not actually want to call this. + You almost certainly do not actually want to call this, + unless you intentionally stored the actually intended ndim and wrote 0 instead. Returns a triple tuple of - * a mutable reference to ndim, - * a pointer to the first c_int - * a mutable reference to RawArray's len field. + * a mutable reference to the underlying ArrayType's ndim field + * a pointer to the first c_int of the dimensions slice + * a mutable reference to RawArray's len field Write to them in order. */ @@ -184,7 +185,15 @@ impl RawArray { self.data_offset() != 0 } - /// Oxidized form of ARR_NULLBITMAP(ArrayType*) + /** + Oxidized form of ARR_NULLBITMAP(ArrayType*) + + If this returns None, the array cannot have nulls. + If this returns Some, it points to the bitslice that marks nulls in this array. + + Note that unlike the `is_null: bool` that appears elsewhere, here a 0 bit is null, + or possibly out of bounds for the final byte of the bitslice. + */ pub fn nulls(&self) -> Option> { // for expected behavior, see: // postgres/src/include/utils/array.h From 9979f7771f87199f522b1c05f989213b165e265f Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 21:35:31 -0700 Subject: [PATCH 30/36] Format --- pgx/src/array.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 11a05b7b4..e48469f90 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -1,8 +1,8 @@ use crate::datum::{Array, FromDatum}; use crate::pg_sys; use core::ptr::{slice_from_raw_parts_mut, NonNull}; -use pgx_pg_sys::*; use core::slice; +use pgx_pg_sys::*; extern "C" { /// # Safety @@ -130,10 +130,7 @@ impl RawArray { */ unsafe { let ndim = self.ndim() as usize; - slice::from_raw_parts( - pgx_ARR_DIMS(self.ptr.as_ptr()), - ndim, - ) + slice::from_raw_parts(pgx_ARR_DIMS(self.ptr.as_ptr()), ndim) } } @@ -201,7 +198,7 @@ impl RawArray { let len = self.len + 7 >> 3; // Obtains 0 if len was 0. - /* + /* SAFETY: This obtains the nulls pointer, which is valid to obtain because the len was asserted on construction. However, unlike the other cases, it isn't correct to trust it. Instead, this gets null-checked. From f15eabda3e7cec362169ffb5610338d0917eb278 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 21:40:29 -0700 Subject: [PATCH 31/36] Add hint about lens --- pgx/src/array.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index e48469f90..c094d7e74 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -190,6 +190,9 @@ impl RawArray { Note that unlike the `is_null: bool` that appears elsewhere, here a 0 bit is null, or possibly out of bounds for the final byte of the bitslice. + + Note that if this is None, that does not mean it's always okay to read! + If len is 0, then this slice will be valid for 0 bytes. */ pub fn nulls(&self) -> Option> { // for expected behavior, see: @@ -238,6 +241,9 @@ impl RawArray { Successive indices are even less likely to match the data type you want unless Postgres also uses an identical layout. + This returns a slice to make it somewhat harder to fail to read it correctly. + However, it should be noted that a len 0 slice may not be read via raw pointers. + [MaybeUninit]: core::mem::MaybeUninit [nonnull]: core::ptr::NonNull */ From b2c9508c6f6bd582497077480b6362a084395ae9 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 22:09:56 -0700 Subject: [PATCH 32/36] Remove unnecessary unsafe on test --- pgx-tests/src/tests/array_tests.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pgx-tests/src/tests/array_tests.rs b/pgx-tests/src/tests/array_tests.rs index 6b34f9cd5..3e6b73ba6 100644 --- a/pgx-tests/src/tests/array_tests.rs +++ b/pgx-tests/src/tests/array_tests.rs @@ -106,13 +106,10 @@ fn get_arr_nelems(arr: Array) -> libc::c_int { unsafe { RawArray::from_array(arr) }.unwrap().len() as _ } -/// # Safety -/// Don't call with an actually-null index, or you might see an invalid value. #[pg_extern] -#[deny(unsafe_op_in_unsafe_fn)] -unsafe fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { +fn get_arr_data_ptr_nth_elem(arr: Array, elem: i32) -> Option { // SAFETY: this is Known to be an Array from ArrayType, - // and the index has to be a valid one. + // and it's valid-ish to see any bitpattern of an i32 inbounds of a slice. unsafe { let raw = RawArray::from_array(arr).unwrap().data::(); let slice = &(*raw.as_ptr()); From 4ba1b0dbc82e7b5f8a7599899418ec58cabae21d Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 22:28:40 -0700 Subject: [PATCH 33/36] Lift remarks into public docs Includes links that are permanent to some version of Postgres. --- pgx/src/array.rs | 69 +++++++++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index c094d7e74..e9097db8d 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -22,11 +22,24 @@ extern "C" { /** An aligned, dereferenceable `NonNull` with low-level accessors. -It offers technically-safe accessors to the "dynamic" fields of a Postgres varlena array -but only requires validity of ArrayType itself as well as the dimensions slice. -This also means that the [NonNull] pointers that are returned may not be valid to read. +It offers safe accessors to the fields of [pg_sys::ArrayType] and mostly-safe accessors +to the "dynamic fields" of the defined Postgres varlena array, but only requires validity +of ArrayType itself and the dimensions slice (always valid if `ndim == 0`). +This means the [NonNull] pointers that are returned may not be valid to read. Validating the correctness of the entire array requires a bit more effort. +It is not Copy or Clone to make it slightly harder to misuse versus *mut ArrayType. +However, `&mut self` accessors do not give lifetimes to returned [`NonNull<[T]>`][nonnull]! +Instead, these are raw pointers, and `&mut RawArray` only makes `&RawArray` safer. + +The reason RawArray works almost entirely with raw pointers is that +it is not currently valid to go from `&mut ArrayType` to `*mut ArrayType`, +take an offset beyond ArrayType's fields, and then create a new slice there +and read from that. The result is currently undefined behavior, +though with emphasis on "undefined": it may become defined in the future of Rust. + +At the current moment, however, it is best to exercise an abundance of caution. + # On sizes and subscripts Postgres uses C's `int` (`c_int` in Rust) for sizes, and Rust uses [usize]. @@ -36,6 +49,8 @@ except with negative indices, which Postgres asserts against creating. PGX currently only intentionally supports 64-bit machines, and while support for ILP32 or I64LP128 C data models may become possible, PGX will **not** support 16-bit machines in any practical case, even though Rust does. + +[nonnull]: core::ptr::NonNull */ #[derive(Debug)] pub struct RawArray { @@ -45,15 +60,6 @@ pub struct RawArray { #[deny(unsafe_op_in_unsafe_fn)] impl RawArray { - // General implementation notes: - // RawArray is not Copy or Clone, making it harder to misuse versus *mut ArrayType. - // But this also offers safe accessors to the fields, like &ArrayType, - // so it requires validity assertions in order to be constructed. - // The main reason it uses NonNull and denies Clone, however, is access soundness: - // It is not sound to go from &mut Type to *mut T if *mut T is not a field of Type. - // This creates an obvious complication for handing out pointers into varlenas. - // Thus also why this does not use lifetime-bounded borrows. - /** Returns a handle to the raw array header. @@ -74,14 +80,17 @@ impl RawArray { [the std documentation]: core::ptr#safety */ pub unsafe fn from_ptr(ptr: NonNull) -> RawArray { + // SAFETY: Validity asserted by the caller. let len = unsafe { pgx_ARR_NELEMS(ptr.as_ptr()) } as usize; RawArray { ptr, len } } /// # Safety - /// Array must have been constructed from an ArrayType pointer. + /// Array must have been made from an ArrayType pointer, + /// or a null value, as-if [RawArray::from_ptr]. pub unsafe fn from_array(arr: Array) -> Option { let array_type = arr.into_array_type() as *mut _; + // SAFETY: Validity asserted by the caller. let len = unsafe { pgx_ARR_NELEMS(array_type) } as usize; Some(RawArray { ptr: NonNull::new(array_type)?, @@ -112,16 +121,14 @@ impl RawArray { /** A slice of the dimensions. - Oxidized form of ARR_DIMS(ArrayType*). + Oxidized form of [ARR_DIMS(ArrayType*)][ARR_DIMS]. The length will be within 0..=[pg_sys::MAXDIM]. Safe to use because validity of this slice was asserted on construction. + + [ARR_DIMS]: */ pub fn dims(&self) -> &[libc::c_int] { - // for expected behavior, see: - // postgres/src/include/utils/array.h - // #define ARR_DIMS - /* SAFETY: Welcome to the infernal bowels of FFI. Because the initial ptr was NonNull, we can assume this is also NonNull. @@ -147,17 +154,21 @@ impl RawArray { Write to them in order. */ pub unsafe fn dims_mut(&mut self) -> (&mut libc::c_int, NonNull, &mut usize) { + // SAFETY: Validity asserted on construction, origin ptr is non-null. let dims_ptr = unsafe { NonNull::new_unchecked(pgx_ARR_DIMS(self.ptr.as_ptr())) }; let len = &mut self.len; + // SAFETY: Validity asserted on construction. Just reborrowing a subfield. (unsafe { &mut self.ptr.as_mut().ndim }, dims_ptr, len) } - /// The flattened length of the array. + /// The flattened length of the array over every single element. + /// Includes all items, even the ones that might be null. pub fn len(&self) -> usize { self.len } + /// Accessor for ArrayType's elemtype. pub fn oid(&self) -> pg_sys::Oid { // SAFETY: Validity asserted on construction. unsafe { (*self.ptr.as_ptr()).elemtype } @@ -171,19 +182,20 @@ impl RawArray { // This field is an "int32" in Postgres } - /// Equivalent to ARR_HASNULL(ArrayType*). - /// - /// Note this means that it only asserts that there MIGHT be a null + /** + Equivalent to [ARR_HASNULL(ArrayType*)][ARR_HASNULL]. + + Note this means that it only asserts that there MIGHT be a null + + [ARR_HASNULL]: + */ #[allow(unused)] fn nullable(&self) -> bool { - // for expected behavior, see: - // postgres/src/include/utils/array.h - // #define ARR_HASNULL self.data_offset() != 0 } /** - Oxidized form of ARR_NULLBITMAP(ArrayType*) + Oxidized form of [ARR_NULLBITMAP(ArrayType*)][ARR_NULLBITMAP] If this returns None, the array cannot have nulls. If this returns Some, it points to the bitslice that marks nulls in this array. @@ -193,6 +205,8 @@ impl RawArray { Note that if this is None, that does not mean it's always okay to read! If len is 0, then this slice will be valid for 0 bytes. + + [ARR_NULLBITMAP]: */ pub fn nulls(&self) -> Option> { // for expected behavior, see: @@ -214,7 +228,7 @@ impl RawArray { } /** - Oxidized form of ARR_DATA_PTR(ArrayType*) + Oxidized form of [ARR_DATA_PTR(ArrayType*)][ARR_DATA_PTR] # Safety @@ -246,6 +260,7 @@ impl RawArray { [MaybeUninit]: core::mem::MaybeUninit [nonnull]: core::ptr::NonNull + [ARR_DATA_PTR]: */ pub fn data(&mut self) -> NonNull<[T]> { /* From 5ab4ba8544328a000395eb17c627df58558664c4 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 25 Aug 2022 22:53:58 -0700 Subject: [PATCH 34/36] One last format --- pgx/src/array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index e9097db8d..d8c793f78 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -205,7 +205,7 @@ impl RawArray { Note that if this is None, that does not mean it's always okay to read! If len is 0, then this slice will be valid for 0 bytes. - + [ARR_NULLBITMAP]: */ pub fn nulls(&self) -> Option> { From fd3c21d08e2d68e7760a33139603eff09e211550 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Fri, 26 Aug 2022 11:51:18 -0700 Subject: [PATCH 35/36] One last cleanup --- pgx/src/array.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index d8c793f78..4069a3bf1 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -109,11 +109,13 @@ impl RawArray { // SAFETY: Validity asserted on construction. unsafe { (*self.ptr.as_ptr()).ndim - // FIXME: While this is a c_int, the max ndim is normally 6 - // While the value can be set higher, it is... unlikely - // that it is going to actually challenge even 16-bit pointer widths. - // It would be preferable to return a usize instead, - // however, PGX has trouble with that, unfortunately. + /* + FIXME: While this is a c_int, the max ndim is normally 6 + While the value can be set higher, it is... unlikely + that it is going to actually challenge even 16-bit pointer widths. + It would be preferable to return a usize instead, + however, PGX has trouble with that, unfortunately. + */ as _ } } @@ -209,10 +211,6 @@ impl RawArray { [ARR_NULLBITMAP]: */ pub fn nulls(&self) -> Option> { - // for expected behavior, see: - // postgres/src/include/utils/array.h - // #define ARR_NULLBITMAP - let len = self.len + 7 >> 3; // Obtains 0 if len was 0. /* From 1dd915541923eff9e841fab0665abab21706247e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Fri, 26 Aug 2022 12:20:07 -0700 Subject: [PATCH 36/36] Remove dubious dims_mut function --- pgx/src/array.rs | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/pgx/src/array.rs b/pgx/src/array.rs index 4069a3bf1..3eab9281f 100644 --- a/pgx/src/array.rs +++ b/pgx/src/array.rs @@ -143,27 +143,6 @@ impl RawArray { } } - /** - The ability to rewrite the dimensions slice. - - You almost certainly do not actually want to call this, - unless you intentionally stored the actually intended ndim and wrote 0 instead. - Returns a triple tuple of - * a mutable reference to the underlying ArrayType's ndim field - * a pointer to the first c_int of the dimensions slice - * a mutable reference to RawArray's len field - - Write to them in order. - */ - pub unsafe fn dims_mut(&mut self) -> (&mut libc::c_int, NonNull, &mut usize) { - // SAFETY: Validity asserted on construction, origin ptr is non-null. - let dims_ptr = unsafe { NonNull::new_unchecked(pgx_ARR_DIMS(self.ptr.as_ptr())) }; - let len = &mut self.len; - - // SAFETY: Validity asserted on construction. Just reborrowing a subfield. - (unsafe { &mut self.ptr.as_mut().ndim }, dims_ptr, len) - } - /// The flattened length of the array over every single element. /// Includes all items, even the ones that might be null. pub fn len(&self) -> usize {