Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(type): fix parsing array literal and printing struct value #13229

Merged
merged 22 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions e2e_test/batch/types/struct/struct.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,29 @@ select * from t where Row(1,v1*2) > Row(1,2);

statement ok
drop table t;

# row to text
query TTTT
select Row('a'), Row(''), Row('"'), Row(' a ');
----
(a) ("") ("""") (" a ")

query TTT
select Row('{}'), Row('[]'), Row('()'), Row(',');
----
({}) ([]) ("()") (",")

query TTT
select Row(NULL), Row(NULL, NULL), Row('null');
----
() (,) (null)

query TTT
select Row(Array[] :: varchar[]), Row(Array[1] :: varchar[]), Row(Array[1,2] :: varchar[]);
----
({}) ({1}) ("{1,2}")

query T
select Row(Array['"'] :: varchar[]);
----
("{""\\""""}")
8 changes: 4 additions & 4 deletions e2e_test/streaming/struct_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ flush;
query II
select * from t1 order by v1;
----
1 (2,(1,0))
2 (5,(4,1))
3 (6,(3,4))
4 (3,(2,2))
1 (2,"(1,0)")
2 (5,"(4,1)")
3 (6,"(3,4)")
4 (3,"(2,2)")

statement ok
create materialized view mv3 as select * from t1 order by (v2).v3;
Expand Down
128 changes: 127 additions & 1 deletion src/common/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt;
use std::fmt::Debug;
Expand All @@ -31,7 +32,8 @@ use crate::buffer::{Bitmap, BitmapBuilder};
use crate::estimate_size::EstimateSize;
use crate::row::Row;
use crate::types::{
hash_datum, DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarRefImpl, ToDatumRef, ToText,
hash_datum, DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarImpl, ScalarRefImpl,
ToDatumRef, ToText,
};
use crate::util::memcmp_encoding;
use crate::util::value_encoding::estimate_serialize_datum_size;
Expand Down Expand Up @@ -624,6 +626,85 @@ impl<'a> From<&'a ListValue> for ListRef<'a> {
}
}

impl ListValue {
/// Construct an array from literal string.
pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
let elem_type = data_type.as_list();
let trimmed = input.trim();
if !trimmed.starts_with('{') {
return Err(r#"Array value must start with "{""#.into());
}
if !trimmed.ends_with('}') {
return Err(eoi());
}
let trimmed = &trimmed[1..trimmed.len() - 1];

fn eoi() -> String {
"Unexpected end of input.".into()
}

let mut elems = Vec::new();
let mut depth = 0;
let mut start = 0;
let mut chars = trimmed.chars().enumerate();
while let Some((i, c)) = chars.next() {
match c {
'{' => depth += 1,
'}' => depth -= 1,
'"' => loop {
match chars.next().ok_or_else(eoi)?.1 {
'\\' => {
chars.next().ok_or_else(eoi)?;
}
'"' => break,
_ => {}
}
},
',' if depth == 0 => {
elems.push(match trimmed[start..i].trim() {
"" => return Err("Unexpected \"}\" character.".into()),
s if s.eq_ignore_ascii_case("null") => None,
s => Some(ScalarImpl::from_literal(&unquote(s), elem_type)?),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ScalarImpl::from_literal and ScalarImpl::from_text shall be consolidate to a FromText (opposite of ToText). For now I am okay with using either one.

});
start = i + 1;
}
_ => {}
}
}
if depth != 0 {
return Err(eoi());
}
match trimmed[start..].trim() {
"" => {
if !elems.is_empty() {
return Err("Unexpected \"}\" character.".into());
}
}
s if s.eq_ignore_ascii_case("null") => elems.push(None),
s => elems.push(Some(ScalarImpl::from_literal(&unquote(s), elem_type)?)),
}
Ok(ListValue::new(elems))
}
}

/// Remove double quotes from a string.
fn unquote(input: &str) -> Cow<'_, str> {
if !(input.starts_with('"') && input.ends_with('"')) {
return Cow::Borrowed(input);
}

let mut output = String::with_capacity(input.len() - 2);
let mut chars = input[1..input.len() - 1].chars().peekable();

while let Some(mut ch) = chars.next() {
if ch == '\\' {
ch = chars.next().unwrap();
}
output.push(ch);
}
output.into()
}

#[cfg(test)]
mod tests {
use more_asserts::{assert_gt, assert_lt};
Expand Down Expand Up @@ -1035,4 +1116,49 @@ mod tests {
let scalar = list_ref.get(1).unwrap();
assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
}

#[test]
fn test_from_to_literal() {
#[track_caller]
fn test(typestr: &str, input: &str, output: Option<&str>) {
let datatype: DataType = typestr.parse().unwrap();
let list = ListValue::from_str(input, &datatype).unwrap();
let actual = list.as_scalar_ref().to_text();
let output = output.unwrap_or(input);
assert_eq!(actual, output);
}

#[track_caller]
fn test_err(typestr: &str, input: &str, err: &str) {
let datatype: DataType = typestr.parse().unwrap();
let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
assert_eq!(actual_err, err);
}

test("varchar[]", "{}", None);
test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
test("int[]", "{1,2,3}", None);
test("varchar[]", r#"{"1,2"}"#, None);
test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
test("varchar[]", r#"{"\""}"#, None);
test(
"varchar[]",
r#"{"null", "NULL", null, NuLL}"#,
Some(r#"{"null","NULL",NULL,NULL}"#),
);
test(
"varchar[][]",
"{{1, 2, 3}, {4, 5, 6}}",
Some("{{1,2,3},{4,5,6}}"),
);
test(
"varchar[][][]",
"{{{1, 2, 3}}, {{4, 5, 6}}}",
Some("{{{1,2,3}},{{4,5,6}}}"),
);
test_err("varchar[]", "()", r#"Array value must start with "{""#);
test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
test_err("varchar[]", "{1,,3}", r#"Unexpected "}" character."#);
}
}
82 changes: 80 additions & 2 deletions src/common/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
// limitations under the License.

use core::fmt;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::fmt::{Debug, Write};
use std::hash::Hash;
use std::sync::Arc;

Expand Down Expand Up @@ -411,6 +412,7 @@ impl Debug for StructRef<'_> {

impl ToText for StructRef<'_> {
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
let mut raw_text = String::new();
iter_fields_ref!(*self, it, {
write!(f, "(")?;
let mut is_first = true;
Expand All @@ -420,7 +422,12 @@ impl ToText for StructRef<'_> {
} else {
write!(f, ",")?;
}
ToText::write(&x, f)?;
// print nothing for null
if x.is_some() {
raw_text.clear();
x.write(&mut raw_text)?;
quote_if_need(&raw_text, f)?;
}
}
write!(f, ")")
})
Expand All @@ -434,6 +441,58 @@ impl ToText for StructRef<'_> {
}
}

/// Double quote a string if it contains any special characters.
fn quote_if_need(input: &str, writer: &mut impl Write) -> std::fmt::Result {
if !input.is_empty() // non-empty
&& input.trim() == input // no leading or trailing whitespace
&& !input.contains(['(', ')', ',', '"', '\\'])
{
return writer.write_str(input);
}

writer.write_char('"')?;

for ch in input.chars() {
match ch {
'"' => writer.write_str("\"\"")?,
'\\' => writer.write_str("\\\\")?,
_ => writer.write_char(ch)?,
}
}

writer.write_char('"')
}

/// Remove double quotes from a string.
/// This is the reverse of [`quote_if_need`].
#[allow(dead_code)]
fn unquote_if_need(input: &str) -> Cow<'_, str> {
if !(input.starts_with('"') && input.ends_with('"')) {
return Cow::Borrowed(input);
}

let mut output = String::with_capacity(input.len() - 2);
let mut chars = input[1..input.len() - 1].chars().peekable();

while let Some(ch) = chars.next() {
match ch {
'"' => {
if chars.peek() == Some(&'"') {
chars.next();
output.push('"');
}
}
'\\' => {
if let Some(next_char) = chars.next() {
output.push(next_char);
}
}
_ => output.push(ch),
}
}
output.into()
}

#[cfg(test)]
mod tests {
use more_asserts::assert_gt;
Expand Down Expand Up @@ -711,4 +770,23 @@ mod tests {
assert_eq!(lhs_serialized.cmp(&rhs_serialized), order);
}
}

#[test]
fn test_quote() {
#[track_caller]
fn test(input: &str, quoted: &str) {
let mut actual = String::new();
quote_if_need(input, &mut actual).unwrap();
assert_eq!(quoted, actual);
assert_eq!(unquote_if_need(quoted), input);
}
test("abc", "abc");
test("", r#""""#);
test(" x ", r#"" x ""#);
test(r#"a"bc"#, r#""a""bc""#);
test(r#"a\bc"#, r#""a\\bc""#);
test("{1}", "{1}");
test("{1,2}", r#""{1,2}""#);
test(r#"{"f": 1}"#, r#""{""f"": 1}""#);
}
}
7 changes: 3 additions & 4 deletions src/common/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -997,11 +997,10 @@ impl ScalarImpl {
DataType::Timestamptz => Timestamptz::from_str(s).map_err(|e| e.to_string())?.into(),
DataType::Time => Time::from_str(s).map_err(|e| e.to_string())?.into(),
DataType::Interval => Interval::from_str(s).map_err(|e| e.to_string())?.into(),
// Not processing list or struct literal right now. Leave it for later phase (normal backend
// evaluation).
DataType::List { .. } => return Err("not supported".into()),
DataType::List { .. } => ListValue::from_str(s, t)?.into(),
// Not processing struct literal right now. Leave it for later phase (normal backend evaluation).
DataType::Struct(_) => return Err("not supported".into()),
DataType::Jsonb => return Err("not supported".into()),
DataType::Jsonb => JsonbVal::from_str(s).map_err(|e| e.to_string())?.into(),
DataType::Bytea => str_to_bytea(s)?.into(),
})
}
Expand Down
5 changes: 5 additions & 0 deletions src/expr/impl/benches/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,11 @@ fn bench_expr(c: &mut Criterion) {
});
}

c.bench_function("cast(character varying) -> int8[]", |bencher| {
let expr = build_from_pretty(r#"(cast:int8[] {1,"2"}:varchar)"#);
bencher.to_async(FuturesExecutor).iter(|| expr.eval(&input))
});

let sigs = FUNCTION_REGISTRY
.iter_aggregates()
.sorted_by_cached_key(|sig| format!("{sig:?}"));
Expand Down
Loading