Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sort: Allow percentages as values for buffer-size #4059

Closed
wants to merge 9 commits into from
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion src/uu/sort/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ rand = "0.8"
rayon = "1.5"
tempfile = "3"
unicode-width = "0.1.8"
uucore = { version=">=0.0.16", package="uucore", path="../../uucore", features=["fs"] }
uucore = { version=">=0.0.16", package="uucore", path="../../uucore", features=["fs", "libc"] }

[target.'cfg(target_os = "windows")'.dependencies]
windows-sys = { version = "0.42.0", default-features=false, features=["Win32_System_SystemInformation"]}

[target.'cfg(any(target_os = "linux", target_os = "android"))'.dependencies]
nix = "0.25"

[[bin]]
name = "sort"
Expand Down
159 changes: 138 additions & 21 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/sort.html
// https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html

// spell-checker:ignore (misc) HFKJFK Mbdfhn
// spell-checker:ignore (misc) HFKJFK Mbdfhn sysinfo MEMORYSTATUS MEMSIZE

mod check;
mod chunks;
Expand Down Expand Up @@ -340,32 +340,46 @@ impl GlobalSettings {
/// The unit may be k, K, m, M, g, G, t, T, P, E, Z, Y (powers of 1024), or b which is 1.
/// Default is K.
fn parse_byte_count(input: &str) -> Result<usize, ParseSizeError> {
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
const ALLOW_LIST: &[char] = &[
'b', 'k', 'K', 'm', 'M', 'g', 'G', 't', 'T', 'P', 'E', 'Z', 'Y',
];
let mut size_string = input.trim().to_string();

if size_string.ends_with(|c: char| ALLOW_LIST.contains(&c) || c.is_ascii_digit()) {
// b 1, K 1024 (default)
if size_string.ends_with(|c: char| c.is_ascii_digit()) {
match size_string.chars().last() {
// Default unit is K (1024)
Some('0'..='9') => {
size_string.push('K');
} else if size_string.ends_with('b') {
}
// b should be treated as 1, but parse_size treats it as 1024.
// So remove the suffix
Some('b') => {
size_string.pop();
}
let size = parse_size(&size_string)?;
usize::try_from(size).map_err(|_| {
ParseSizeError::SizeTooBig(format!(
"Buffer size {} does not fit in address space",
size
))
})
} else if size_string.starts_with(|c: char| c.is_ascii_digit()) {
Err(ParseSizeError::InvalidSuffix("invalid suffix".to_string()))
// Valid suffixes that require no further preprocessing
// GNU sort (8.32) valid: 1b, k, K, m, M, g, G, t, T, P, E, Z, Y, %
// GNU sort (8.32) invalid: b, B, 1B, p, e, z, y
Some('k' | 'K' | 'm' | 'M' | 'g' | 'G' | 't' | 'T' | 'P' | 'E' | 'Z' | 'Y' | '%') => {}
// All other suffixes are invalid (only return InvalidSuffix if size_string is a number)
Some(_) if size_string.starts_with(|c: char| c.is_ascii_digit()) => {
return Err(ParseSizeError::InvalidSuffix("invalid suffix".to_string()))
}
// Catch non-number strings (ex. '%', 'K')
Some(_) | None => {
return Err(ParseSizeError::ParseFailure("parse failure".to_string()))
}
};

let parse_function = if size_string.ends_with('%') {
parse_memory_percentage
} else {
Err(ParseSizeError::ParseFailure("parse failure".to_string()))
}
parse_size
};

let size: u64 = parse_function(&size_string)?;

usize::try_from(size).map_err(|_| {
ParseSizeError::SizeTooBig(format!(
"Buffer size {} does not fit in address space",
size
))
})
}

/// Precompute some data needed for sorting.
Expand All @@ -387,6 +401,74 @@ impl GlobalSettings {
.count();
}
}
#[cfg(any(target_os = "linux", target_os = "android"))]
fn total_physical_memory() -> Option<u64> {
use nix::sys::sysinfo::sysinfo;
sysinfo().ok().map(|sys| sys.ram_total())
}
#[cfg(any(target_os = "macos"))]
fn total_physical_memory() -> Option<u64> {
use std::mem::{size_of, MaybeUninit};
use std::ptr::null_mut;
use uucore::libc::{c_void, sysctl};

let mut total_memory: MaybeUninit<u64> = MaybeUninit::uninit();
// Indices to access total physical memory: CTL_HW, HW_MEMSIZE
// In a nutshell the correct path for sysctl
let mut mib: [i32; 2] = [6, 24];
let mut size = size_of::<u64>();
let result = unsafe {
sysctl(
mib.as_mut_ptr(),
mib.len() as _,
total_memory.as_mut_ptr() as *mut c_void,
&mut size as *mut usize,
null_mut(),
0,
)
};

if result == 0 {
// if the syscall was successful this will be initialized.
Some(unsafe { total_memory.assume_init() })
} else {
None
}
}
#[cfg(target_os = "windows")]
fn total_physical_memory() -> Option<u64> {
use std::mem::{size_of, zeroed};
use windows_sys::Win32::System::SystemInformation::{GlobalMemoryStatus, MEMORYSTATUS};
let mut mem_info: MEMORYSTATUS = unsafe { zeroed() };
mem_info.dwLength = size_of::<MEMORYSTATUS>() as u32;
// This call has no indication of failure.
unsafe { GlobalMemoryStatus(&mut mem_info) };
Some(mem_info.dwTotalPhys as _)
}

/// Parse a string containing a percentage value of total memory
/// assert_eq!(parse_memory_percentage("0%"), Ok(0));
fn parse_memory_percentage(size_string: &str) -> Result<u64, ParseSizeError> {
let total_memory = total_physical_memory().ok_or_else(|| {
ParseSizeError::ParseFailure("failed to retrieve total system memory".to_string())
});
// Parse the percentage as u128 to avoid overflows when multiplying with the memory size.
match size_string[..size_string.len() - 1].parse::<u128>() {
Ok(percentage) => {
// Can't allocate more than 100% of memory
if percentage > 100 {
return Err(ParseSizeError::SizeTooBig(size_string.to_string()));
}

let result: u128 = (percentage * total_memory? as u128) / 100;
// This is safe since percentage <= 100. Therefore result is at most == total_memory.
// Since total_memory is a u64, result must fit in u64 as well.
Ok(result as u64)
}

Err(_) => Err(ParseSizeError::ParseFailure(size_string.to_string())),
}
}

impl Default for GlobalSettings {
fn default() -> Self {
Expand Down Expand Up @@ -1896,6 +1978,41 @@ mod tests {
buffer
}

#[test]
fn test_parse_memory_percentages() {
let ram_total = total_physical_memory().unwrap();
assert!(ram_total > 1024);
assert!(ram_total < u64::MAX);
assert_eq!(GlobalSettings::parse_byte_count("0%").unwrap(), 0);
#[cfg(not(target_pointer_width = "32"))]
{
assert_eq!(
GlobalSettings::parse_byte_count("100%").unwrap(),
ram_total as usize
);
assert_eq!(
GlobalSettings::parse_byte_count("50%").unwrap(),
(ram_total / 2) as usize
);
}
}
#[test]
fn test_invalid_memory_percentages() {
assert_eq!(
GlobalSettings::parse_byte_count("110%").unwrap_err(),
ParseSizeError::SizeTooBig("110%".to_string())
);
assert_eq!(
GlobalSettings::parse_byte_count("101%").unwrap_err(),
ParseSizeError::SizeTooBig("101%".to_string())
);

assert_eq!(
GlobalSettings::parse_byte_count("%").unwrap_err(),
ParseSizeError::ParseFailure("%".to_string())
);
}

#[test]
fn test_get_hash() {
let a = "Ted".to_string();
Expand Down
17 changes: 16 additions & 1 deletion tests/by-util/test_sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ fn test_buffer_sizes() {

#[cfg(not(target_pointer_width = "32"))]
{
let buffer_sizes = ["1000G", "10T"];
// percentages cause overflows in 32bit system for ram sizes >4GB (2^32)
let buffer_sizes = ["1000G", "10T", "10%", "100%"];
for buffer_size in &buffer_sizes {
TestScenario::new(util_name!())
.ucmd_keepenv()
Expand All @@ -63,13 +64,27 @@ fn test_invalid_buffer_size() {
.code_is(2)
.stderr_only("sort: invalid --buffer-size argument 'asd'");

new_ucmd!()
.arg("-S")
.arg("%")
.fails()
.code_is(2)
.stderr_only("sort: invalid --buffer-size argument '%'");

new_ucmd!()
.arg("-S")
.arg("100f")
.fails()
.code_is(2)
.stderr_only("sort: invalid suffix in --buffer-size argument '100f'");

new_ucmd!()
.arg("-S")
.arg("101%")
.fails()
.code_is(2)
.stderr_only("sort: --buffer-size argument '101%' too large");

#[cfg(not(target_pointer_width = "128"))]
new_ucmd!()
.arg("-n")
Expand Down