Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove PyString::as_bytes since it cannot return raw bytes #1020

Merged
merged 2 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

### Changed
- Update `parking_lot` dependency to `0.11`. [#1010](https://github.com/PyO3/pyo3/pull/1010)
- `PyString::to_string` is now renamed `to_str` and returns `&str` instead of `Cow<str>`. [#1023](https://github.com/PyO3/pyo3/pull/1023)

### Removed
- Remove `PyString::as_bytes`. [#1023](https://github.com/PyO3/pyo3/pull/1023)
- Remove `Python::register_gil`. [#1023](https://github.com/PyO3/pyo3/pull/1023)

## [0.11.0] - 2020-06-28
### Added
Expand Down
108 changes: 35 additions & 73 deletions src/gil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,10 @@
use crate::{ffi, internal_tricks::Unsendable, Python};
use parking_lot::{const_mutex, Mutex};
use std::cell::{Cell, RefCell};
use std::{any, mem::ManuallyDrop, ptr::NonNull, sync};
use std::{mem::ManuallyDrop, ptr::NonNull, sync};

static START: sync::Once = sync::Once::new();

/// Holds temporally owned objects.
struct ObjectHolder {
/// Objects owned by the current thread
obj: Vec<NonNull<ffi::PyObject>>,
/// Non-python objects(e.g., String) owned by the current thread
any: Vec<Box<dyn any::Any>>,
}

impl ObjectHolder {
fn new() -> Self {
Self {
obj: Vec::with_capacity(256),
any: Vec::with_capacity(4),
}
}
fn len(&self) -> (usize, usize) {
(self.obj.len(), self.any.len())
}
}

thread_local! {
/// This is a internal counter in pyo3 monitoring whether this thread has the GIL.
///
Expand All @@ -41,7 +21,7 @@ thread_local! {
pub(crate) static GIL_COUNT: Cell<u32> = Cell::new(0);

/// Temporally hold objects that will be released when the GILPool drops.
static OWNED_OBJECTS: RefCell<ObjectHolder> = RefCell::new(ObjectHolder::new());
static OWNED_OBJECTS: RefCell<Vec<NonNull<ffi::PyObject>>> = RefCell::new(Vec::with_capacity(256));
}

/// Check whether the GIL is acquired.
Expand All @@ -51,7 +31,7 @@ thread_local! {
/// 2) PyGILState_Check always returns 1 if the sub-interpreter APIs have ever been called,
/// which could lead to incorrect conclusions that the GIL is held.
fn gil_is_acquired() -> bool {
GIL_COUNT.with(|c| c.get() > 0)
GIL_COUNT.try_with(|c| c.get() > 0).unwrap_or(false)
}

/// Prepares the use of Python in a free-threaded context.
Expand Down Expand Up @@ -159,21 +139,19 @@ impl GILGuard {
pub fn acquire() -> GILGuard {
prepare_freethreaded_python();

unsafe {
let gstate = ffi::PyGILState_Ensure(); // acquire GIL

// If there's already a GILPool, we should not create another or this could lead to
// incorrect dangling references in safe code (see #864).
let pool = if !gil_is_acquired() {
Some(GILPool::new())
} else {
None
};

GILGuard {
gstate,
pool: ManuallyDrop::new(pool),
}
let gstate = unsafe { ffi::PyGILState_Ensure() }; // acquire GIL

// If there's already a GILPool, we should not create another or this could lead to
// incorrect dangling references in safe code (see #864).
let pool = if !gil_is_acquired() {
Some(unsafe { GILPool::new() })
} else {
None
};

GILGuard {
gstate,
pool: ManuallyDrop::new(pool),
}
}

Expand Down Expand Up @@ -252,7 +230,7 @@ static POOL: ReferencePool = ReferencePool::new();
pub struct GILPool {
/// Initial length of owned objects and anys.
/// `Option` is used since TSL can be broken when `new` is called from `atexit`.
start: Option<(usize, usize)>,
start: Option<usize>,
no_send: Unsendable,
}

Expand Down Expand Up @@ -283,20 +261,19 @@ impl GILPool {

impl Drop for GILPool {
fn drop(&mut self) {
unsafe {
if let Some((obj_len_start, any_len_start)) = self.start {
let dropping_obj = OWNED_OBJECTS.with(|holder| {
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
// `GILPool::drop` recursively, resulting in invalid borrowing.
let mut holder = holder.borrow_mut();
holder.any.truncate(any_len_start);
if obj_len_start < holder.obj.len() {
holder.obj.split_off(obj_len_start)
} else {
Vec::new()
}
});
for obj in dropping_obj {
if let Some(obj_len_start) = self.start {
let dropping_obj = OWNED_OBJECTS.with(|holder| {
// `holder` must be dropped before calling Py_DECREF, or Py_DECREF may call
// `GILPool::drop` recursively, resulting in invalid borrowing.
let mut holder = holder.borrow_mut();
if obj_len_start < holder.len() {
holder.split_off(obj_len_start)
} else {
Vec::new()
}
});
for obj in dropping_obj {
unsafe {
ffi::Py_DECREF(obj.as_ptr());
}
}
Expand Down Expand Up @@ -343,36 +320,21 @@ pub unsafe fn register_decref(obj: NonNull<ffi::PyObject>) {
/// The object must be an owned Python reference.
pub unsafe fn register_owned(_py: Python, obj: NonNull<ffi::PyObject>) {
debug_assert!(gil_is_acquired());
// Ignoring the error means we do nothing if the TLS is broken.
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().obj.push(obj));
}

/// Register any value inside the GILPool.
///
/// # Safety
/// It is the caller's responsibility to ensure that the inferred lifetime 'p is not inferred by
/// the Rust compiler to outlast the current GILPool.
pub unsafe fn register_any<'p, T: 'static>(obj: T) -> &'p T {
debug_assert!(gil_is_acquired());
OWNED_OBJECTS.with(|holder| {
let boxed = Box::new(obj);
let value_ref: *const T = &*boxed;
holder.borrow_mut().any.push(boxed);
&*value_ref
})
// Ignores the error in case this function called from `atexit`.
let _ = OWNED_OBJECTS.try_with(|holder| holder.borrow_mut().push(obj));
}

/// Increment pyo3's internal GIL count - to be called whenever GILPool or GILGuard is created.
// Ignores the error in case this function called from `atexit`.
#[inline(always)]
fn increment_gil_count() {
// Ignores the error in case this function called from `atexit`.
let _ = GIL_COUNT.with(|c| c.set(c.get() + 1));
}

/// Decrement pyo3's internal GIL count - to be called whenever GILPool or GILGuard is dropped.
// Ignores the error in case this function called from `atexit`.
#[inline(always)]
fn decrement_gil_count() {
// Ignores the error in case this function called from `atexit`.
let _ = GIL_COUNT.try_with(|c| {
let current = c.get();
debug_assert!(
Expand Down Expand Up @@ -431,7 +393,7 @@ mod test {
}

fn owned_object_count() -> usize {
OWNED_OBJECTS.with(|holder| holder.borrow().obj.len())
OWNED_OBJECTS.with(|holder| holder.borrow().len())
}

#[test]
Expand Down
7 changes: 0 additions & 7 deletions src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,13 +435,6 @@ impl<'p> Python<'p> {
FromPyPointer::from_borrowed_ptr_or_opt(self, ptr)
}

#[doc(hidden)]
/// Passes value ownership to `Python` object and get reference back.
/// Value get cleaned up on the GIL release.
pub fn register_any<T: 'static>(self, ob: T) -> &'p T {
unsafe { gil::register_any(ob) }
}

/// Releases a PyObject reference.
#[inline]
pub fn release<T>(self, ob: T)
Expand Down
2 changes: 1 addition & 1 deletion src/types/bytearray.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ mod test {
slice[0..5].copy_from_slice(b"Hi...");

assert_eq!(
&bytearray.str().unwrap().to_string().unwrap(),
bytearray.str().unwrap().to_str().unwrap(),
"bytearray(b'Hi... Python')"
);
}
Expand Down
71 changes: 17 additions & 54 deletions src/types/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::{
PyTryFrom, Python, ToPyObject,
};
use std::borrow::Cow;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::str;

Expand Down Expand Up @@ -44,41 +43,33 @@ impl PyString {
/// Returns a `UnicodeEncodeError` if the input is not valid unicode
/// (containing unpaired surrogates).
#[inline]
pub fn as_bytes(&self) -> PyResult<&[u8]> {
pub fn to_str(&self) -> PyResult<&str> {
unsafe {
let mut size: ffi::Py_ssize_t = 0;
let data = ffi::PyUnicode_AsUTF8AndSize(self.as_ptr(), &mut size) as *const u8;
if data.is_null() {
Err(PyErr::fetch(self.py()))
} else {
Ok(std::slice::from_raw_parts(data, size as usize))
let slice = std::slice::from_raw_parts(data, size as usize);
Ok(std::str::from_utf8_unchecked(slice))
}
}
}

/// Converts the `PyString` into a Rust string.
pub fn to_string(&self) -> PyResult<Cow<str>> {
let bytes = self.as_bytes()?;
let string = std::str::from_utf8(bytes)?;
Ok(Cow::Borrowed(string))
}

/// Converts the `PyString` into a Rust string.
///
/// Unpaired surrogates invalid UTF-8 sequences are
/// replaced with `U+FFFD REPLACEMENT CHARACTER`.
pub fn to_string_lossy(&self) -> Cow<str> {
match self.to_string() {
Ok(s) => s,
match self.to_str() {
Ok(s) => Cow::Borrowed(s),
Err(_) => {
let bytes = unsafe {
self.py()
.from_owned_ptr::<PyBytes>(ffi::PyUnicode_AsEncodedString(
self.as_ptr(),
CStr::from_bytes_with_nul(b"utf-8\0").unwrap().as_ptr(),
CStr::from_bytes_with_nul(b"surrogatepass\0")
.unwrap()
.as_ptr(),
b"utf-8\0" as *const _ as _,
b"surrogatepass\0" as *const _ as _,
))
};
String::from_utf8_lossy(bytes.as_bytes())
Expand Down Expand Up @@ -136,24 +127,9 @@ impl<'a> IntoPy<PyObject> for &'a String {

/// Allows extracting strings from Python objects.
/// Accepts Python `str` and `unicode` objects.
impl<'source> crate::FromPyObject<'source> for Cow<'source, str> {
impl<'source> crate::FromPyObject<'source> for &'source str {
fn extract(ob: &'source PyAny) -> PyResult<Self> {
<PyString as PyTryFrom>::try_from(ob)?.to_string()
}
}

/// Allows extracting strings from Python objects.
/// Accepts Python `str` and `unicode` objects.
impl<'a> crate::FromPyObject<'a> for &'a str {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
let s: Cow<'a, str> = crate::FromPyObject::extract(ob)?;
match s {
Cow::Borrowed(r) => Ok(r),
Cow::Owned(r) => {
let r = ob.py().register_any(r);
davidhewitt marked this conversation as resolved.
Show resolved Hide resolved
Ok(r.as_str())
}
}
<PyString as PyTryFrom>::try_from(ob)?.to_str()
}
}

Expand All @@ -162,8 +138,8 @@ impl<'a> crate::FromPyObject<'a> for &'a str {
impl<'source> FromPyObject<'source> for String {
fn extract(obj: &'source PyAny) -> PyResult<Self> {
<PyString as PyTryFrom>::try_from(obj)?
.to_string()
.map(Cow::into_owned)
.to_str()
.map(ToOwned::to_owned)
}
}

Expand All @@ -174,7 +150,6 @@ mod test {
use crate::object::PyObject;
use crate::Python;
use crate::{FromPyObject, PyTryFrom, ToPyObject};
use std::borrow::Cow;

#[test]
fn test_non_bmp() {
Expand All @@ -197,44 +172,32 @@ mod test {
}

#[test]
fn test_as_bytes() {
fn test_to_str_ascii() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "ascii 🐈";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert_eq!(s.as_bytes(), py_string.as_bytes().unwrap());
assert_eq!(s, py_string.to_str().unwrap());
}

#[test]
fn test_as_bytes_surrogate() {
fn test_to_str_surrogate() {
let gil = Python::acquire_gil();
let py = gil.python();
let obj: PyObject = py.eval(r#"'\ud800'"#, None, None).unwrap().into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.as_bytes().is_err());
}

#[test]
fn test_to_string_ascii() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "ascii";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.to_string().is_ok());
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
assert!(py_string.to_str().is_err());
}

#[test]
fn test_to_string_unicode() {
fn test_to_str_unicode() {
let gil = Python::acquire_gil();
let py = gil.python();
let s = "哈哈🐈";
let obj: PyObject = PyString::new(py, s).into();
let py_string = <PyString as PyTryFrom>::try_from(obj.as_ref(py)).unwrap();
assert!(py_string.to_string().is_ok());
assert_eq!(Cow::Borrowed(s), py_string.to_string().unwrap());
assert_eq!(s, py_string.to_str().unwrap());
}

#[test]
Expand Down