Skip to content

Commit

Permalink
fix: substr - correct behaivour with negative start pos
Browse files Browse the repository at this point in the history
  • Loading branch information
ovr committed Jan 24, 2022
1 parent 6ec18bb commit d5f13ee
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 4 deletions.
78 changes: 78 additions & 0 deletions datafusion/src/physical_plan/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3582,6 +3582,30 @@ mod tests {
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
lit(ScalarValue::Int64(Some(0))),
],
Ok(Some("joséésoj")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
],
Ok(Some("joséésoj")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
Expand Down Expand Up @@ -3680,6 +3704,60 @@ mod tests {
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(0))),
lit(ScalarValue::Int64(Some(5))),
],
Ok(Some("alpha")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(10))),
],
Ok(Some("alpha")),
&str,
Utf8,
StringArray
);
// starting from -1 (4 + -5)
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(4))),
],
Ok(Some("")),
&str,
Utf8,
StringArray
);
// starting from 0 (5 + -5)
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
lit(ScalarValue::Int64(Some(-5))),
lit(ScalarValue::Int64(Some(5))),
],
Ok(Some("")),
&str,
Utf8,
StringArray
);
#[cfg(feature = "unicode_expressions")]
test_function!(
Substr,
&[
Expand Down
20 changes: 16 additions & 4 deletions datafusion/src/physical_plan/unicode_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,12 +455,24 @@ pub fn substr<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
Err(DataFusionError::Execution(
"negative substring length not allowed".to_string(),
))
} else if start <= 0 {
Ok(Some(string.to_string()))
} else {
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
let start_pos = start as usize - 1;
let count_usize = count as usize;
let start_pos = if start > 0 {
start as usize - 1
} else {
0
};
let count_usize = if start < 0 {
let count_size = count + start;
if count_size <= 0 {
return Ok(Some("".to_string()))
} else {
count_size as usize
}
} else {
count as usize
};

if graphemes.len() < start_pos {
Ok(Some("".to_string()))
} else if graphemes.len() < start_pos + count_usize {
Expand Down

0 comments on commit d5f13ee

Please sign in to comment.