Skip to content

Commit

Permalink
Fix the panic when lpad/rpad parameter is negative
Browse files Browse the repository at this point in the history
* fix the panic when lpad/rpad parameter is negative

* add lpad test
  • Loading branch information
ZuoTiJia committed Oct 14, 2022
1 parent 0aa050a commit 3cd5b16
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 26 deletions.
4 changes: 4 additions & 0 deletions datafusion/core/tests/sql/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ async fn test_unicode_expressions() -> Result<()> {
test_expression!("length('chars')", "5");
test_expression!("length('josé')", "4");
test_expression!("length(NULL)", "NULL");
test_expression!("lpad('hi', -1, 'xy')", "");
test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
test_expression!("lpad('hi', -1)", "");
test_expression!("lpad('hi', 0)", "");
test_expression!("lpad('hi', 21, 'abcdef')", "abcdefabcdefabcdefahi");
test_expression!("lpad('hi', 5, 'xy')", "xyxhi");
Expand All @@ -71,7 +73,9 @@ async fn test_unicode_expressions() -> Result<()> {
test_expression!("right('abcde', CAST(NULL AS INT))", "NULL");
test_expression!("right(NULL, 2)", "NULL");
test_expression!("right(NULL, CAST(NULL AS INT))", "NULL");
test_expression!("rpad('hi', -1, 'xy')", "");
test_expression!("rpad('hi', 5, 'xy')", "hixyx");
test_expression!("rpad('hi', -1)", "");
test_expression!("rpad('hi', 0)", "");
test_expression!("rpad('hi', 21, 'abcdef')", "hiabcdefabcdefabcdefa");
test_expression!("rpad('hi', 5, 'xy')", "hixyx");
Expand Down
74 changes: 48 additions & 26 deletions datafusion/physical-expr/src/unicode_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,23 +129,29 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
.zip(length_array.iter())
.map(|(string, length)| match (string, length) {
(Some(string), Some(length)) => {
let length = length as usize;
if length > i32::MAX as i64 {
return Err(DataFusionError::Internal(
"lpad requested length too large".to_string(),
));
}

let length = if length < 0 { 0 } else { length as usize };
if length == 0 {
Some("".to_string())
Ok(Some("".to_string()))
} else {
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
if length < graphemes.len() {
Some(graphemes[..length].concat())
Ok(Some(graphemes[..length].concat()))
} else {
let mut s: String = " ".repeat(length - graphemes.len());
s.push_str(string);
Some(s)
Ok(Some(s))
}
}
}
_ => None,
_ => Ok(None),
})
.collect::<GenericStringArray<T>>();
.collect::<Result<GenericStringArray<T>>>()?;

Ok(Arc::new(result) as ArrayRef)
}
Expand All @@ -160,18 +166,23 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
.zip(fill_array.iter())
.map(|((string, length), fill)| match (string, length, fill) {
(Some(string), Some(length), Some(fill)) => {
let length = length as usize;
if length > i32::MAX as i64 {
return Err(DataFusionError::Internal(
"lpad requested length too large".to_string(),
));
}

let length = if length < 0 { 0 } else { length as usize };
if length == 0 {
Some("".to_string())
Ok(Some("".to_string()))
} else {
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
let fill_chars = fill.chars().collect::<Vec<char>>();

if length < graphemes.len() {
Some(graphemes[..length].concat())
Ok(Some(graphemes[..length].concat()))
} else if fill_chars.is_empty() {
Some(string.to_string())
Ok(Some(string.to_string()))
} else {
let mut s = string.to_string();
let mut char_vector =
Expand All @@ -185,13 +196,13 @@ pub fn lpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
0,
char_vector.iter().collect::<String>().as_str(),
);
Some(s)
Ok(Some(s))
}
}
}
_ => None,
_ => Ok(None),
})
.collect::<GenericStringArray<T>>();
.collect::<Result<GenericStringArray<T>>>()?;

Ok(Arc::new(result) as ArrayRef)
}
Expand Down Expand Up @@ -262,24 +273,29 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
.zip(length_array.iter())
.map(|(string, length)| match (string, length) {
(Some(string), Some(length)) => {
let length = length as usize;
if length > i32::MAX as i64 {
return Err(DataFusionError::Internal(
"lpad requested length too large".to_string(),
));
}

let length = if length < 0 { 0 } else { length as usize };
if length == 0 {
Some("".to_string())
Ok(Some("".to_string()))
} else {
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
if length < graphemes.len() {
Some(graphemes[..length].concat())
Ok(Some(graphemes[..length].concat()))
} else {
let mut s = string.to_string();
s.push_str(" ".repeat(length - graphemes.len()).as_str());
Some(s)
Ok(Some(s))
}
}
}
_ => None,
_ => Ok(None),
})
.collect::<GenericStringArray<T>>();

.collect::<Result<GenericStringArray<T>>>()?;
Ok(Arc::new(result) as ArrayRef)
}
3 => {
Expand All @@ -293,14 +309,20 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
.zip(fill_array.iter())
.map(|((string, length), fill)| match (string, length, fill) {
(Some(string), Some(length), Some(fill)) => {
let length = length as usize;
if length > i32::MAX as i64 {
return Err(DataFusionError::Internal(
"lpad requested length too large".to_string(),
));
}

let length = if length < 0 { 0 } else { length as usize };
let graphemes = string.graphemes(true).collect::<Vec<&str>>();
let fill_chars = fill.chars().collect::<Vec<char>>();

if length < graphemes.len() {
Some(graphemes[..length].concat())
Ok(Some(graphemes[..length].concat()))
} else if fill_chars.is_empty() {
Some(string.to_string())
Ok(Some(string.to_string()))
} else {
let mut s = string.to_string();
let mut char_vector =
Expand All @@ -310,12 +332,12 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
.push(*fill_chars.get(l % fill_chars.len()).unwrap());
}
s.push_str(char_vector.iter().collect::<String>().as_str());
Some(s)
Ok(Some(s))
}
}
_ => None,
_ => Ok(None),
})
.collect::<GenericStringArray<T>>();
.collect::<Result<GenericStringArray<T>>>()?;

Ok(Arc::new(result) as ArrayRef)
}
Expand Down

0 comments on commit 3cd5b16

Please sign in to comment.