Skip to content

Commit

Permalink
fix: substr - correct behaivour with negative start pos
Browse files Browse the repository at this point in the history
  • Loading branch information
ovr committed Jan 24, 2022
1 parent 6ec18bb commit b5c0409
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 7 deletions.
78 changes: 78 additions & 0 deletions datafusion/src/physical_plan/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3582,6 +3582,30 @@ mod tests {
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
lit(ScalarValue::Int64(Some(0))),
],
Ok(Some("joséésoj")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
],
Ok(Some("joséésoj")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
Expand Down Expand Up @@ -3680,6 +3704,60 @@ mod tests {
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(0))),
lit(ScalarValue::Int64(Some(5))),
],
Ok(Some("alpha")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(10))),
],
Ok(Some("alpha")),
&str,
Utf8,
StringArray
);
// starting from -1 (4 + -5)
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(4))),
],
Ok(Some("")),
&str,
Utf8,
StringArray
);
// starting from 0 (5 + -5)
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(5))),
],
Ok(Some("")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
Expand Down
14 changes: 7 additions & 7 deletions datafusion/src/physical_plan/unicode_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,19 +455,19 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
Err(DataFusionError::Execution(
"negative substring length not allowed".to_string(),
))
} else if start <= 0 {
Ok(Some(string.to_string()))
} else {
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
let start_pos = start as usize - 1;
let count_usize = count as usize;
if graphemes.len() < start_pos {
let start_pos =
if start > 0 { start as usize - 1 } else { 0 };
let count = if start < 0 { count + start } else { count };

if count <= 0 || graphemes.len() < start_pos {
Ok(Some("".to_string()))
} else if graphemes.len() < start_pos + count_usize {
} else if graphemes.len() < start_pos + (count as usize) {
Ok(Some(graphemes[start_pos..].concat()))
} else {
Ok(Some(
graphemes[start_pos..start_pos + count_usize]
graphemes[start_pos..start_pos + (count as usize)]
.concat(),
))
}
Expand Down

0 comments on commit b5c0409

Please sign in to comment.