Skip to content

Commit

Permalink
Adjust StringInfo wrapper impl to match actual definition (#1457)
Browse files Browse the repository at this point in the history
A commit that alters the definition of StringInfo was introduced about 2
months ago to Postgres:
postgres/postgres@f0efa5a

It contains the following comment:
```
  * As a special case, a StringInfoData can be initialized with a read-only
  * string buffer.  In this case "data" does not necessarily point at a
  * palloc'd chunk, and management of the buffer storage is the caller's
  * responsibility.  maxlen is set to zero to indicate that this is the case.
  * Read-only StringInfoDatas cannot be appended to or reset.
  * Also, it is caller's option whether a read-only string buffer has a
  * terminating '\0' or not.  This depends on the intended usage.
```

This will make the following sequence of Rust code completely unsound
**by mere invocation** as of Postgres 17, even assuming the lifetime
constraint is correct:
```rust
let stringinfo: *mut pg_sys::StringInfoData = something_that_returns_ptr_to_stringinfodata();
let stringinfo: StringInfo<AllocatedByPostgres> = unsafe { StringInfoData::from_pg(stringinfo) };
let cstr_from_stringinfo: &'static CStr = stringinfo.into();
```

However, that commit has its own remarks:
```
There were various places in our codebase which conjured up a StringInfo
by manually assigning the StringInfo fields and setting the data field
to point to some existing buffer.  There wasn't much consistency here as
to what fields like maxlen got set to and in one location we didn't
correctly ensure that the buffer was correctly NUL terminated at len
bytes, as per what was documented as required in stringinfo.h
```

..."In one location we didn't correctly ensure that the buffer was
correctly NUL terminated"?

On actually reviewing the commit, in multiple places, the maximum len
was previously set to -1 or other incorrect values to indicate similar
sorts of "borrow-like" usages. In other words, the `max_len > len`
invariant was not preserved either, so it was not impossible for the
pointee buffer to actually end at exactly len N, instead of also having
another byte available.

While the only constructor for the StringInfoData wrapper is its `unsafe
fn from_pg`, we were most certainly relying on this being true, and
making comments as if we expected Postgres to not randomly pass us bogus
data. In actuality, we find now that Postgres passes us bogus data all
the time. As this guarantee has never actually been enforced, we should
remove the implementation of `impl From<StringInfo<_>> for &'static
CStr` immediately. This also means our Drop implementation has to go.

A more direct API is offered instead via `StringInfo::leak_cstr`.
  • Loading branch information
workingjubilee authored Jan 2, 2024
1 parent e7112ed commit 483606b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 42 deletions.
5 changes: 3 additions & 2 deletions pgrx-examples/custom_types/src/hexint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,11 @@ fn hexint_in(input: &CStr) -> Result<HexInt, Box<dyn Error>> {
/// `requires = [ "shell_type" ]` indicates that the CREATE FUNCTION SQL for this function must happen
/// *after* the SQL for the "shell_type" block.
#[pg_extern(immutable, parallel_safe, requires = [ "shell_type" ])]
fn hexint_out<'a>(value: HexInt) -> &'a CStr {
fn hexint_out(value: HexInt) -> &'static CStr {
let mut s = StringInfo::new();
s.push_str(&value.to_string());
s.into()
// SAFETY: We just constructed this StringInfo ourselves
unsafe { s.leak_cstr() }
}

//
Expand Down
19 changes: 11 additions & 8 deletions pgrx-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ fn complex_in(input: &core::ffi::CStr) -> PgBox<Complex> {
}
#[pg_extern(immutable)]
fn complex_out(complex: PgBox<Complex>) -> &'static core::ffi::CStr {
fn complex_out(complex: PgBox<Complex>) -> &'static ::core::ffi::CStr {
todo!()
}
Expand Down Expand Up @@ -836,11 +836,12 @@ fn impl_postgres_type(ast: DeriveInput) -> syn::Result<proc_macro2::TokenStream>
}

#[doc(hidden)]
#[::pgrx::pgrx_macros::pg_extern(immutable,parallel_safe)]
pub fn #funcname_out #generics(input: #name #generics) -> &#lifetime ::core::ffi::CStr {
#[::pgrx::pgrx_macros::pg_extern (immutable,parallel_safe)]
pub fn #funcname_out #generics(input: #name #generics) -> &'static ::core::ffi::CStr {
let mut buffer = ::pgrx::stringinfo::StringInfo::new();
::pgrx::inoutfuncs::JsonInOutFuncs::output(&input, &mut buffer);
buffer.into()
// SAFETY: We just constructed this StringInfo ourselves
unsafe { buffer.leak_cstr() }
}

});
Expand All @@ -860,10 +861,11 @@ fn impl_postgres_type(ast: DeriveInput) -> syn::Result<proc_macro2::TokenStream>

#[doc(hidden)]
#[::pgrx::pgrx_macros::pg_extern(immutable,parallel_safe)]
pub fn #funcname_out #generics(input: #name #generics) -> &#lifetime ::core::ffi::CStr {
pub fn #funcname_out #generics(input: #name #generics) -> &'static ::core::ffi::CStr {
let mut buffer = ::pgrx::stringinfo::StringInfo::new();
::pgrx::inoutfuncs::InOutFuncs::output(&input, &mut buffer);
buffer.into()
// SAFETY: We just constructed this StringInfo ourselves
unsafe { buffer.leak_cstr() }
}
});
} else if args.contains(&PostgresTypeAttribute::PgVarlenaInOutFuncs) {
Expand All @@ -882,10 +884,11 @@ fn impl_postgres_type(ast: DeriveInput) -> syn::Result<proc_macro2::TokenStream>

#[doc(hidden)]
#[::pgrx::pgrx_macros::pg_extern(immutable,parallel_safe)]
pub fn #funcname_out #generics(input: ::pgrx::datum::PgVarlena<#name #generics>) -> &#lifetime ::core::ffi::CStr {
pub fn #funcname_out #generics(input: ::pgrx::datum::PgVarlena<#name #generics>) -> &'static ::core::ffi::CStr {
let mut buffer = ::pgrx::stringinfo::StringInfo::new();
::pgrx::inoutfuncs::PgVarlenaInOutFuncs::output(&*input, &mut buffer);
buffer.into()
// SAFETY: We just constructed this StringInfo ourselves
unsafe { buffer.leak_cstr() }
}
});
}
Expand Down
5 changes: 3 additions & 2 deletions pgrx-tests/src/tests/struct_type_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//LICENSE All rights reserved.
//LICENSE
//LICENSE Use of this source code is governed by the MIT license that can be found in the LICENSE file.
use core::ffi::CStr;
use pgrx::pgrx_sql_entity_graph::metadata::{
ArgumentError, Returns, ReturnsError, SqlMapping, SqlTranslatable,
};
Expand Down Expand Up @@ -76,10 +77,10 @@ fn complex_in(input: &core::ffi::CStr) -> PgBox<Complex, AllocatedByRust> {
}

#[pg_extern(immutable)]
fn complex_out(complex: PgBox<Complex>) -> &'static core::ffi::CStr {
fn complex_out(complex: PgBox<Complex>) -> &'static CStr {
let mut sb = StringInfo::new();
sb.push_str(&format!("{}, {}", &complex.x, &complex.y));
sb.into()
unsafe { sb.leak_cstr() }
}

extension_sql!(
Expand Down
52 changes: 22 additions & 30 deletions pgrx/src/stringinfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#![allow(dead_code, non_snake_case)]

use crate::{pg_sys, AllocatedByPostgres, AllocatedByRust, PgBox, WhoAllocated};
use core::ffi::CStr;
use core::fmt::{Display, Formatter};
use core::slice;
use core::str::Utf8Error;
use std::io::Error;

Expand All @@ -21,20 +23,6 @@ pub struct StringInfo<AllocatedBy: WhoAllocated = AllocatedByRust> {
inner: PgBox<pg_sys::StringInfoData, AllocatedBy>,
}

impl<AllocatedBy: WhoAllocated> From<StringInfo<AllocatedBy>> for &'static core::ffi::CStr {
fn from(val: StringInfo<AllocatedBy>) -> Self {
let len = val.len();
let ptr = val.into_char_ptr();

unsafe {
core::ffi::CStr::from_bytes_with_nul_unchecked(std::slice::from_raw_parts(
ptr as *const u8,
len + 1, // +1 to get the trailing null byte
))
}
}
}

impl<AllocatedBy: WhoAllocated> std::io::Write for StringInfo<AllocatedBy> {
fn write(&mut self, buf: &[u8]) -> Result<usize, Error> {
self.push_bytes(buf);
Expand Down Expand Up @@ -247,9 +235,28 @@ impl<AllocatedBy: WhoAllocated> StringInfo<AllocatedBy> {
unsafe {
// SAFETY: we just made the StringInfoData pointer so we know it's valid and properly
// initialized throughout
sid_ptr.as_ref().unwrap_unchecked().data
(*sid_ptr).data
}
}

/// Convert this `StringInfo` into a `CStr`
///
/// # Safety
/// Postgres can create a StringInfo that does not have `nul` terminated data.
/// This function may panic in some cases when it detects incorrect data.
/// However, these panics should not be relied on: you must fulfill the safety requirements
/// of [`CStr::from_bytes_with_nul`].
///
/// This safety requirement should be fulfilled automatically if you created this StringInfo
/// and performed all modifications to it using this type's implemented functions.
#[inline]
pub unsafe fn leak_cstr<'a>(self) -> &'a CStr {
let len = self.len();
let char_ptr = self.into_char_ptr();
assert!(!char_ptr.is_null(), "stringinfo char ptr was null");
CStr::from_bytes_with_nul(unsafe { slice::from_raw_parts(char_ptr.cast(), len + 1) })
.expect("incorrectly constructed stringinfo")
}
}

impl Default for StringInfo<AllocatedByRust> {
Expand Down Expand Up @@ -287,18 +294,3 @@ impl From<&[u8]> for StringInfo<AllocatedByRust> {
rc
}
}

impl<AllocatedBy: WhoAllocated> Drop for StringInfo<AllocatedBy> {
fn drop(&mut self) {
unsafe {
// SAFETY: self.inner could represent the null pointer, but if it doesn't, then it's
// one we can use as self.inner.data will always be allocated if self.inner is.
//
// It's also prescribed by Postgres that to fully deallocate a StringInfo pointer, the
// owner is responsible for freeing its .data member... and that's us
if !self.inner.is_null() {
AllocatedBy::maybe_pfree(self.inner.data.cast())
}
}
}
}

0 comments on commit 483606b

Please sign in to comment.