diff --git a/Cargo.toml b/Cargo.toml index fcac7d25..b1a0468f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,16 +8,21 @@ edition = "2021" [features] default = ["full-opa"] +base64 = ["dep:data-encoding"] +base64url = ["dep:data-encoding"] crypto = ["dep:constant_time_eq", "dep:hmac", "dep:hex", "dep:md-5", "dep:sha1", "dep:sha2"] deprecated = [] +hex = ["dep:data-encoding"] glob = ["dep:wax"] +jsonschema = ["dep:jsonschema"] regex = ["dep:regex"] semver = ["dep:semver"] +urlquery = ["dep:url"] yaml = ["serde_yaml"] -full-opa = ["crypto", "deprecated", "glob", "regex", "semver", "yaml"] +full-opa = ["base64", "base64url", "crypto", "deprecated", "glob", "hex", "jsonschema", "regex", "semver", "urlquery", "yaml"] [dependencies] -anyhow = "1.0.66" +anyhow = {version = "1.0.66", features = ["backtrace"] } serde = {version = "1.0.150", features = ["derive", "rc"] } serde_json = {version = "1.0.89", features = ["arbitrary_precision"] } serde_yaml = {version = "0.9.16", optional = true } @@ -25,11 +30,7 @@ log = "0.4.17" env_logger="0.10.0" lazy_static = "1.4.0" rand = "0.8.5" -data-encoding = "2.4.0" num = "0.4.1" -rust_decimal = { version = "1.33.1", features = ["serde-with-arbitrary-precision"] } - - # Crypto constant_time_eq = {version = "0.3.0", optional = true} @@ -39,9 +40,17 @@ hex = {version = "0.4.3", optional = true} sha1 = {version = "0.10.6", optional = true} md-5 = {version = "0.10.6", optional = true} +data-encoding = { version = "2.4.0", optional = true } +jsonschema = { version = "0.17.1", optional = true } + regex = {version = "1.10.2", optional = true} semver = {version = "1.0.20", optional = true} wax = { version = "0.6.0", features = [], default-features = false, optional = true } +url = { version = "2.5.0", optional = true } +dashu-float = { version = "0.4.1", features = ["num-traits"] } +num-traits = "0.2.17" +dashu-base = "0.4.0" + [dev-dependencies] clap = { version = "4.4.7", features = ["derive"] } diff --git a/src/builtins/encoding.rs b/src/builtins/encoding.rs index 7df87b8c..8ce83f0f 100644 --- a/src/builtins/encoding.rs +++ b/src/builtins/encoding.rs @@ -7,16 +7,40 @@ use crate::builtins::utils::{ensure_args_count, ensure_string}; use crate::lexer::Span; use crate::value::Value; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; -use anyhow::{Context, Result}; -use data_encoding::BASE64; +use anyhow::{bail, Context, Result}; pub fn register(m: &mut HashMap<&'static str, builtins::BuiltinFcn>) { - m.insert("base64.decode", (base64_decode, 1)); + #[cfg(feature = "base64")] + { + m.insert("base64.decode", (base64_decode, 1)); + m.insert("base64.encode", (base64_encode, 1)); + m.insert("base64.is_valid", (base64_is_valid, 1)); + } + #[cfg(feature = "base64url")] + { + m.insert("base64url.decode", (base64url_decode, 1)); + m.insert("base64url.encode", (base64url_encode, 1)); + m.insert("base64url.encode_no_pad", (base64url_encode_no_pad, 1)); + } + #[cfg(feature = "hex")] + { + m.insert("hex.decode", (hex_decode, 1)); + m.insert("hex.encode", (hex_encode, 1)); + } + #[cfg(feature = "urlquery")] + { + m.insert("urlquery.decode_object", (urlquery_decode_object, 1)); + } m.insert("json.is_valid", (json_is_valid, 1)); m.insert("json.marshal", (json_marshal, 1)); - m.insert("jsonunmarshal", (json_unmarshal, 1)); + m.insert("json.unmarshal", (json_unmarshal, 1)); + #[cfg(feature = "jsonschema")] + { + m.insert("json.match_schema", (json_match_schema, 2)); + m.insert("json.verify_schema", (json_verify_schema, 1)); + } #[cfg(feature = "yaml")] { @@ -26,6 +50,7 @@ pub fn register(m: &mut HashMap<&'static str, builtins::BuiltinFcn>) { } } +#[cfg(feature = "base64")] fn base64_decode( span: &Span, params: &[Ref], @@ -36,12 +61,183 @@ fn base64_decode( ensure_args_count(span, name, params, args, 1)?; let encoded_str = ensure_string(name, ¶ms[0], &args[0])?; - let decoded_bytes = BASE64.decode(encoded_str.as_bytes())?; + let decoded_bytes = data_encoding::BASE64.decode(encoded_str.as_bytes())?; + Ok(Value::String( + String::from_utf8_lossy(&decoded_bytes).into(), + )) +} + +#[cfg(feature = "base64")] +fn base64_encode( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "base64.encode"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + Ok(Value::String( + data_encoding::BASE64.encode(string.as_bytes()).into(), + )) +} + +#[cfg(feature = "base64")] +fn base64_is_valid( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "base64.is_valid"; + ensure_args_count(span, name, params, args, 1)?; + + let encoded_str = ensure_string(name, ¶ms[0], &args[0])?; + Ok(Value::Bool( + data_encoding::BASE64.decode(encoded_str.as_bytes()).is_ok(), + )) +} + +#[cfg(feature = "base64url")] +fn base64url_decode( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "base64url.decode"; + ensure_args_count(span, name, params, args, 1)?; + + let encoded_str = ensure_string(name, ¶ms[0], &args[0])?; + let decoded_bytes = match data_encoding::BASE64URL.decode(encoded_str.as_bytes()) { + Ok(b) => b, + Err(_) => { + #[cfg(feature = "base64url")] + { + data_encoding::BASE64URL_NOPAD.decode(encoded_str.as_bytes())? + } + #[cfg(not(feature = "base64url"))] + { + bail!(params[0].span().error("nor a valid url")); + } + } + }; + + Ok(Value::String( + String::from_utf8_lossy(&decoded_bytes).into(), + )) +} + +#[cfg(feature = "base64url")] +fn base64url_encode( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "base64url.encode"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + Ok(Value::String( + data_encoding::BASE64URL.encode(string.as_bytes()).into(), + )) +} + +#[cfg(feature = "base64url")] +fn base64url_encode_no_pad( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "base64url.encode_no_pad"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + Ok(Value::String( + data_encoding::BASE64URL_NOPAD + .encode(string.as_bytes()) + .into(), + )) +} + +#[cfg(feature = "hex")] +fn hex_decode(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> Result { + let name = "hex.decode"; + ensure_args_count(span, name, params, args, 1)?; + + let encoded_str = ensure_string(name, ¶ms[0], &args[0])?; + let decoded_bytes = data_encoding::HEXLOWER_PERMISSIVE.decode(encoded_str.as_bytes())?; Ok(Value::String( String::from_utf8_lossy(&decoded_bytes).into(), )) } +#[cfg(feature = "hex")] +fn hex_encode(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> Result { + let name = "hex.encode"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + Ok(Value::String( + data_encoding::HEXLOWER_PERMISSIVE + .encode(string.as_bytes()) + .into(), + )) +} + +#[cfg(feature = "urlquery")] +fn urlquery_decode_object( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "urlquery.encode"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + let url_string = "https://non-existent?".to_owned() + string.as_ref(); + let url = match url::Url::parse(&url_string) { + Ok(v) => v, + Err(_) => bail!(params[0].span().error("not a valid url query")), + }; + + let mut map = BTreeMap::new(); + for (k, v) in url.query_pairs() { + let key = Value::String(k.clone().into()); + let value = Value::String(v.clone().into()); + if let Ok(a) = map.entry(key).or_insert(Value::new_array()).as_array_mut() { + a.push(value) + } + } + Ok(Value::from_map(map)) +} +/* +#[cfg(feature = "urlquery")] +fn urlquery_encode( + span: &Span, + params: &[Ref], + args: &[Value], + _strict: bool, +) -> Result { + let name = "urlquery.encode"; + ensure_args_count(span, name, params, args, 1)?; + + let string = ensure_string(name, ¶ms[0], &args[0])?; + let url_string = "https://non-existent?" + string; + let url = url::Url::parse(&url_string) + .map_err(|_| bail!(params[0].span().error("not a valid url query")))?; + + Ok(Value::from_object( + url.query_pairs() + .map(|(k, v)| (Value::from(k.clone()), Value::from(v.clone()))) + .collect(), + )) +}*/ + #[cfg(feature = "yaml")] fn yaml_is_valid( span: &Span, @@ -114,3 +310,72 @@ fn json_unmarshal( let json_str = ensure_string(name, ¶ms[0], &args[0])?; Value::from_json_str(&json_str).with_context(|| span.error("could not deserialize json.")) } + +#[cfg(feature = "jsonschema")] +fn compile_json_schema(param: &Ref, arg: &Value) -> Result { + let schema_str = match arg { + Value::String(schema_str) => schema_str.as_ref().to_string(), + _ => arg.to_json_str()?, + }; + + if let Ok(schema) = serde_json::from_str(&schema_str) { + match jsonschema::JSONSchema::compile(&schema) { + Ok(schema) => return Ok(schema), + Err(e) => bail!(e.to_string()), + } + } + bail!(param.span().error("not a valid json schema")) +} + +#[cfg(feature = "jsonschema")] +fn json_verify_schema( + span: &Span, + params: &[Ref], + args: &[Value], + strict: bool, +) -> Result { + let name = "json.verify_schema"; + ensure_args_count(span, name, params, args, 1)?; + + Ok(Value::from_array( + match compile_json_schema(¶ms[0], &args[0]) { + Ok(_) => [Value::Bool(true), Value::Null], + Err(e) if strict => bail!(params[0] + .span() + .error(format!("invalid schema: {e}").as_str())), + Err(e) => [Value::Bool(false), Value::String(e.to_string().into())], + } + .to_vec(), + )) +} + +#[cfg(feature = "jsonschema")] +fn json_match_schema( + span: &Span, + params: &[Ref], + args: &[Value], + strict: bool, +) -> Result { + let name = "json.match_schema"; + ensure_args_count(span, name, params, args, 2)?; + + // The following is expected to succeed. + let document: serde_json::Value = serde_json::from_str(&args[0].to_json_str()?)?; + + Ok(Value::from_array( + match compile_json_schema(¶ms[1], &args[1]) { + Ok(schema) => match schema.validate(&document) { + Ok(_) => [Value::Bool(true), Value::Null], + Err(e) => [ + Value::Bool(false), + Value::from_array(e.map(|e| Value::String(e.to_string().into())).collect()), + ], + }, + Err(e) if strict => bail!(params[1] + .span() + .error(format!("invalid schema: {e}").as_str())), + Err(e) => [Value::Bool(false), Value::String(e.to_string().into())], + } + .to_vec(), + )) +} diff --git a/src/builtins/objects.rs b/src/builtins/objects.rs index 7f23a52e..830150d6 100644 --- a/src/builtins/objects.rs +++ b/src/builtins/objects.rs @@ -15,7 +15,7 @@ use anyhow::{bail, Result}; pub fn register(m: &mut HashMap<&'static str, builtins::BuiltinFcn>) { m.insert("json.filter", (json_filter, 2)); - // m.insert("json.patch", (json_patch)); + m.insert("json.remove", (json_remove, 2)); m.insert("object.filter", (filter, 2)); m.insert("object.get", (get, 3)); m.insert("object.keys", (keys, 1)); @@ -25,6 +25,9 @@ pub fn register(m: &mut HashMap<&'static str, builtins::BuiltinFcn>) { fn json_filter_impl(v: &Value, filter: &Value) -> Value { let filters = match filter { + Value::Object(fields) if fields.len() == 1 && filter[&Value::Null] == Value::Null => { + return v.clone() + } Value::Object(fields) if !fields.is_empty() => fields, _ => return v.clone(), }; @@ -76,6 +79,69 @@ fn json_filter_impl(v: &Value, filter: &Value) -> Value { } } +fn json_remove_impl(v: &Value, filter: &Value) -> Value { + let filters = match filter { + Value::Object(fields) if !fields.is_empty() => fields, + _ => return v.clone(), + }; + + if filter[&Value::Null] == Value::Null { + return Value::Undefined; + } + + match v { + Value::Array(a) => { + let mut items = vec![]; + for (idx, item) in a.iter().enumerate() { + let idx = Value::String(format!("{idx}").into()); + if let Some(f) = filters.get(&idx) { + let v = json_remove_impl(item, f); + if v != Value::Undefined { + items.push(v); + } + } else { + // Retain the item. + items.push(item.clone()); + } + } + Value::from_array(items) + } + + Value::Set(s) => { + let mut items = BTreeSet::new(); + for item in s.iter() { + if let Some(f) = filters.get(item) { + let v = json_remove_impl(item, f); + if v != Value::Undefined { + items.insert(v); + } + } else { + // Retain the item. + items.insert(item.clone()); + } + } + Value::from_set(items) + } + + Value::Object(obj) => { + let mut items = BTreeMap::new(); + for (key, value) in obj.iter() { + if let Some(f) = filters.get(key) { + let v = json_remove_impl(value, f); + if v != Value::Undefined { + items.insert(key.clone(), v); + } + } else { + items.insert(key.clone(), value.clone()); + } + } + Value::from_map(items) + } + + _ => Value::Undefined, + } +} + fn merge_filters( name: &str, param: &Expr, @@ -94,6 +160,9 @@ fn merge_filters( } f = vref; } + if let Ok(f) = f.as_object_mut() { + f.insert(Value::Null, Value::Null); + }; filters = fc; } Some(Value::Array(a)) => { @@ -109,6 +178,9 @@ fn merge_filters( }; f = vref; } + if let Ok(f) = f.as_object_mut() { + f.insert(Value::Null, Value::Null); + }; filters = fc; } Some(_) => { @@ -119,6 +191,7 @@ fn merge_filters( None => break, } } + Ok(filters) } @@ -133,9 +206,29 @@ fn json_filter(span: &Span, params: &[Ref], args: &[Value], _strict: bool) _ => bail!(span.error(format!("`{name}` requires set/array argument").as_str())), }; + if let Ok(v) = filters.as_object() { + if v.is_empty() { + return Ok(Value::new_object()); + } + } + Ok(json_filter_impl(&args[0], &filters)) } +fn json_remove(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> Result { + let name = "json.remove"; + ensure_args_count(span, name, params, args, 2)?; + ensure_object(name, ¶ms[0], args[0].clone())?; + + let filters = match &args[1] { + Value::Array(a) => merge_filters(name, ¶ms[1], &mut a.iter(), Value::new_object())?, + Value::Set(s) => merge_filters(name, ¶ms[1], &mut s.iter(), Value::new_object())?, + _ => bail!(span.error(format!("`{name}` requires set/array argument").as_str())), + }; + + Ok(json_remove_impl(&args[0], &filters)) +} + fn filter(span: &Span, params: &[Ref], args: &[Value], _strict: bool) -> Result { let name = "object.filter"; ensure_args_count(span, name, params, args, 2)?; diff --git a/src/interpreter.rs b/src/interpreter.rs index 2b441494..2e68e408 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -4,6 +4,7 @@ use crate::ast::*; use crate::builtins::{self, BuiltinFcn}; use crate::lexer::*; +use crate::number::*; use crate::parser::Parser; use crate::scheduler::*; use crate::utils::*; @@ -14,6 +15,7 @@ use log::info; use serde::Serialize; use std::collections::{hash_map::Entry, BTreeMap, BTreeSet, HashMap}; use std::rc::Rc; +use std::str::FromStr; type Scope = BTreeMap; @@ -104,7 +106,7 @@ struct LoopExpr { span: Span, expr: ExprRef, value: ExprRef, - index: SourceStr, + index: Ref, } impl Interpreter { @@ -348,20 +350,26 @@ impl Interpreter { // First hoist any loops in refr self.hoist_loops_impl(refr, loops); + // hoist any loops in index expression. + self.hoist_loops_impl(index, loops); + // Then hoist the current bracket operation. - match index.as_ref() { + let mut indices = Vec::with_capacity(1); + let _ = traverse(index, &mut |e| match e.as_ref() { Var(ident) if self.is_loop_index_var(&ident.source_str()) => { - loops.push(LoopExpr { - span: span.clone(), - expr: expr.clone(), - value: refr.clone(), - index: ident.source_str(), - }) - } - _ => { - // hoist any loops in index expression. - self.hoist_loops_impl(index, loops); + indices.push(ident.source_str()); + Ok(false) } + Array { .. } | Object { .. } => Ok(true), + _ => Ok(false), + }); + if !indices.is_empty() { + loops.push(LoopExpr { + span: span.clone(), + expr: expr.clone(), + value: refr.clone(), + index: index.clone(), + }) } } @@ -503,10 +511,16 @@ impl Interpreter { let (name, value) = match op { AssignOp::Eq => { match (lhs.as_ref(), rhs.as_ref()) { - (_, Expr::Var(var)) if self.lookup_var(var, &[], true)? == Value::Undefined => { + (_, Expr::Var(var)) + if var.source_str().text() != "input" + && self.lookup_var(var, &[], true)? == Value::Undefined => + { (var.source_str(), self.eval_expr(lhs)?) } - (Expr::Var(var), _) if self.lookup_var(var, &[], true)? == Value::Undefined => { + (Expr::Var(var), _) + if var.source_str().text() != "input" + && self.lookup_var(var, &[], true)? == Value::Undefined => + { (var.source_str(), self.eval_expr(rhs)?) } ( @@ -565,7 +579,7 @@ impl Interpreter { let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); return self - .make_bindings(false, &mut type_match, &mut cache, lhs, &value) + .make_bindings(false, &mut type_match, &mut cache, lhs, &value, false) .map(Value::Bool); } (_, Expr::Array { .. }) => { @@ -573,7 +587,7 @@ impl Interpreter { let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); return self - .make_bindings(false, &mut type_match, &mut cache, rhs, &value) + .make_bindings(false, &mut type_match, &mut cache, rhs, &value, false) .map(Value::Bool); } (Expr::Object { .. }, _) => { @@ -581,7 +595,7 @@ impl Interpreter { let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); return self - .make_bindings(false, &mut type_match, &mut cache, lhs, &value) + .make_bindings(false, &mut type_match, &mut cache, lhs, &value, false) .map(Value::Bool); } (_, Expr::Object { .. }) => { @@ -589,7 +603,7 @@ impl Interpreter { let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); return self - .make_bindings(false, &mut type_match, &mut cache, rhs, &value) + .make_bindings(false, &mut type_match, &mut cache, rhs, &value, false) .map(Value::Bool); } // Treat the assignment as comparison if neither lhs nor rhs is a variable @@ -608,7 +622,7 @@ impl Interpreter { let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); return self - .make_bindings(false, &mut type_match, &mut cache, lhs, &rhs_value) + .make_bindings(false, &mut type_match, &mut cache, lhs, &rhs_value, false) .map(Value::Bool); }; @@ -734,6 +748,7 @@ impl Interpreter { cache: &mut BTreeMap, expr: &ExprRef, value: &Value, + check_existing_value: bool, ) -> Result { // Propagate undefined. if value == &Value::Undefined { @@ -744,6 +759,13 @@ impl Interpreter { match (expr.as_ref(), value) { (Expr::Var(ident), _) if ident.text().as_ref() == &"_" => Ok(true), + (Expr::Var(ident), _) + if check_existing_value + && self.lookup_local_var(&ident.source_str()) == Some(value.clone()) => + { + Ok(false) + } + (Expr::Var(ident), _) => { self.add_variable(&ident.source_str(), value.clone())?; Ok(true) @@ -766,12 +788,19 @@ impl Interpreter { } type_match.insert(expr.clone()); - let mut r = false; + let mut r = true; for (idx, item) in items.iter().enumerate() { - r = self.make_bindings(is_last, type_match, cache, item, &a[idx])? || r; + r = self.make_bindings( + is_last, + type_match, + cache, + item, + &a[idx], + check_existing_value, + )? && r; } - Ok(true) + Ok(r) } // Destructure objects (Expr::Object { fields, .. }, Value::Object(_)) => { @@ -797,6 +826,7 @@ impl Interpreter { cache, value_expr, field_value, + check_existing_value, )?; } type_match.insert(expr.clone()); @@ -837,10 +867,18 @@ impl Interpreter { cache: &mut BTreeMap, expr: &ExprRef, value: &Value, + check_existing_value: bool, ) -> Result { let prev = self.no_rules_lookup; self.no_rules_lookup = true; - let r = self.make_bindings_impl(is_last, type_match, cache, expr, value); + let r = self.make_bindings_impl( + is_last, + type_match, + cache, + expr, + value, + check_existing_value, + ); self.no_rules_lookup = prev; r } @@ -856,11 +894,11 @@ impl Interpreter { let (key_expr, value_expr) = exprs; let (key, value) = values; if let Some(key_expr) = key_expr { - if !self.make_bindings(is_last, type_match, cache, key_expr, key)? { + if !self.make_bindings(is_last, type_match, cache, key_expr, key, false)? { return Ok(false); } } - self.make_bindings(is_last, type_match, cache, value_expr, value) + self.make_bindings(is_last, type_match, cache, value_expr, value, false) } fn eval_some_in( @@ -1197,13 +1235,15 @@ impl Interpreter { // If the loop's index variable has already been assigned a value // (this can happen if the same index is used for two different collections), // then evaluate statements only if the index applies to this collection. - if let Some(idx) = self.lookup_local_var(&loop_expr.index) { - if loop_expr_value[&idx] != Value::Undefined { - result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; - return Ok(result); - } else if idx != Value::Undefined { - // The index is not valid for this collection. - return Ok(false); + if let Expr::Var(index_var) = loop_expr.index.as_ref() { + if let Some(idx) = self.lookup_local_var(&index_var.source_str()) { + if loop_expr_value[&idx] != Value::Undefined { + result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + return Ok(result); + } else if idx != Value::Undefined { + // The index is not valid for this collection. + return Ok(false); + } } } @@ -1217,9 +1257,20 @@ impl Interpreter { for (idx, v) in items.iter().enumerate() { self.loop_var_values .insert(loop_expr.expr.clone(), v.clone()); - self.add_variable(&loop_expr.index, Value::from(idx))?; - result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + let mut type_match = BTreeSet::new(); + let mut cache = BTreeMap::new(); + if self.make_bindings( + false, + &mut type_match, + &mut cache, + &loop_expr.index, + &Value::from(idx), + true, + )? { + result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + } + self.loop_var_values.remove(&loop_expr.expr); *self.current_scope_mut()? = scope_saved.clone(); if let Some(ctx) = self.contexts.last_mut() { @@ -1231,9 +1282,21 @@ impl Interpreter { for v in items.iter() { self.loop_var_values .insert(loop_expr.expr.clone(), v.clone()); + // For sets, index is also the value. - self.add_variable(&loop_expr.index, v.clone())?; - result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + let mut type_match = BTreeSet::new(); + let mut cache = BTreeMap::new(); + if self.make_bindings( + false, + &mut type_match, + &mut cache, + &loop_expr.index, + v, + true, + )? { + result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + } + self.loop_var_values.remove(&loop_expr.expr); *self.current_scope_mut()? = scope_saved.clone(); if let Some(ctx) = self.contexts.last_mut() { @@ -1246,8 +1309,18 @@ impl Interpreter { self.loop_var_values .insert(loop_expr.expr.clone(), v.clone()); // For objects, index is key. - self.add_variable(&loop_expr.index, k.clone())?; - result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + let mut type_match = BTreeSet::new(); + let mut cache = BTreeMap::new(); + if self.make_bindings( + false, + &mut type_match, + &mut cache, + &loop_expr.index, + k, + true, + )? { + result = self.eval_stmts_in_loop(stmts, &loops[1..])? || result; + } self.loop_var_values.remove(&loop_expr.expr); *self.current_scope_mut()? = scope_saved.clone(); if let Some(ctx) = self.contexts.last_mut() { @@ -1801,8 +1874,14 @@ impl Interpreter { let mut type_match = BTreeSet::new(); for (idx, a) in args.iter().enumerate() { - let b = - self.make_bindings(false, &mut type_match, &mut cache, a, ¶m_values[idx]); + let b = self.make_bindings( + false, + &mut type_match, + &mut cache, + a, + ¶m_values[idx], + false, + ); if b.ok() != Some(true) { self.scopes = scopes; @@ -1913,7 +1992,7 @@ impl Interpreter { &mut self, span: &Span, fcn: &ExprRef, - params: &Vec, + params: &[ExprRef], extra_arg: Option, allow_return_arg: bool, ) -> Result { @@ -1933,7 +2012,7 @@ impl Interpreter { let ret_value = self.eval_call_impl(span, fcn, ¶ms[..params.len() - 1])?; let mut cache = BTreeMap::new(); let mut type_match = BTreeSet::new(); - self.make_bindings(false, &mut type_match, &mut cache, &ea, &ret_value) + self.make_bindings(false, &mut type_match, &mut cache, &ea, &ret_value, false) .map(Value::Bool) } _ => { @@ -2081,14 +2160,15 @@ impl Interpreter { Expr::Null(_) => Ok(Value::Null), Expr::True(_) => Ok(Value::Bool(true)), Expr::False(_) => Ok(Value::Bool(false)), - Expr::Number(span) => match serde_json::from_str::(*span.text()) { - Ok(v) => Ok(v), - Err(e) => Err(span.source.error( - span.line, - span.col, - format!("could not parse number. {e}").as_str(), - )), - }, + Expr::Number(span) => { + let v = match Number::from_str(*span.text()) { + Ok(v) => Ok(Value::Number(v)), + Err(_) => Err(span + .source + .error(span.line, span.col, "could not parse number")), + }; + v + } // TODO: Handle string vs rawstring Expr::String(span) => { match serde_json::from_str::(format!("\"{}\"", span.text()).as_str()) { @@ -2189,7 +2269,7 @@ impl Interpreter { &mut self, ctx: Context, span: &Span, - bodies: &Vec, + bodies: &[RuleBody], ) -> Result { let n_scopes = self.scopes.len(); let result = if bodies.is_empty() { diff --git a/src/number.rs b/src/number.rs index 8d46b196..c2977d7d 100644 --- a/src/number.rs +++ b/src/number.rs @@ -3,45 +3,52 @@ use core::fmt::{Debug, Formatter}; use std::cmp::{Ord, Ordering}; -use std::ops::{AddAssign, Div, MulAssign, Rem, SubAssign}; +use std::ops::{AddAssign, Div, MulAssign, SubAssign}; use std::rc::Rc; use std::str::FromStr; use anyhow::{bail, Result}; -use num::{FromPrimitive, ToPrimitive}; -use rust_decimal; +use dashu_float; +use num_traits::cast::ToPrimitive; use serde::ser::Serializer; use serde::Serialize; pub type BigInt = i128; +type BigFloat = dashu_float::DBig; +const PRECISION: usize = 100; + #[derive(Clone, Debug, PartialEq)] pub struct BigDecimal { - d: rust_decimal::Decimal, + d: BigFloat, } -impl AsRef for BigDecimal { - fn as_ref(&self) -> &rust_decimal::Decimal { +impl AsRef for BigDecimal { + fn as_ref(&self) -> &BigFloat { &self.d } } -impl AsMut for BigDecimal { - fn as_mut(&mut self) -> &mut rust_decimal::Decimal { +impl AsMut for BigDecimal { + fn as_mut(&mut self) -> &mut BigFloat { &mut self.d } } -impl From for BigDecimal { - fn from(value: rust_decimal::Decimal) -> Self { +impl From for BigDecimal { + fn from(value: BigFloat) -> Self { BigDecimal { d: value } } } impl From for BigDecimal { fn from(value: i128) -> Self { - BigDecimal { d: value.into() } + BigDecimal { + d: Into::::into(value) + .with_precision(PRECISION) + .value(), + } } } @@ -57,6 +64,12 @@ impl Serialize for BigDecimal { } } +impl BigDecimal { + fn is_integer(&self) -> bool { + self.d.floor() == self.d + } +} + #[derive(Clone)] pub enum Number { // TODO: maybe specialize for u64, i64, f64 @@ -84,7 +97,7 @@ impl Serialize for Number { n.serialize(serializer) } else { if let Some(f) = self.as_f64() { - if &Number::from(f) == self { + if b.d.digits() <= 15 { return f.serialize(serializer); } } @@ -100,69 +113,70 @@ impl Serialize for Number { use Number::*; +impl From for Number { + fn from(n: BigFloat) -> Self { + Self::Big(BigDecimal::from(n.with_precision(PRECISION).value()).into()) + } +} + impl From for Number { fn from(n: u64) -> Self { - Self::Big(BigDecimal { d: n.into() }.into()) + BigFloat::from(n).into() } } impl From for Number { fn from(n: usize) -> Self { - Self::Big(BigDecimal { d: n.into() }.into()) + BigFloat::from(n).into() } } impl From for Number { fn from(n: u128) -> Self { - Self::Big(BigDecimal { d: n.into() }.into()) + BigFloat::from(n).into() } } impl From for Number { fn from(n: i128) -> Self { - Self::Big(BigDecimal { d: n.into() }.into()) + BigFloat::from(n).into() } } impl From for Number { fn from(n: i64) -> Self { - Self::Big(BigDecimal { d: n.into() }.into()) + BigFloat::from(n).into() } } impl From for Number { fn from(n: f64) -> Self { - match rust_decimal::Decimal::from_f64(n) { - Some(v) => v.into(), - _ => rust_decimal::Decimal::ZERO.into(), + // Reading from float is not precise. Therefore, serialize to string and read. + match Self::from_str(&format!("{n}")) { + Ok(v) => v, + _ => BigFloat::ZERO.into(), } } } -impl From for Number { - fn from(d: rust_decimal::Decimal) -> Self { - Self::Big(BigDecimal { d }.into()) - } -} - impl Number { pub fn as_u64(&self) -> Option { match self { - Big(b) if b.d.is_integer() => b.d.to_u64(), + Big(b) if b.is_integer() => b.d.to_u64(), _ => None, } } pub fn as_i64(&self) -> Option { match self { - Big(b) if b.d.is_integer() => b.d.to_i64(), + Big(b) if b.is_integer() => b.d.to_i64(), _ => None, } } pub fn as_f64(&self) -> Option { match self { - Big(b) => b.d.to_f64(), + Big(b) => Some(b.d.to_binary().value().to_f64().value()), } } @@ -187,10 +201,10 @@ impl FromStr for Number { type Err = ParseNumberError; fn from_str(s: &str) -> Result { - Ok(match rust_decimal::Decimal::from_str(s) { - Ok(v) => v.into(), - _ => f64::from_str(s).map_err(|_| ParseNumberError)?.into(), - }) + if let Ok(v) = BigFloat::from_str(s) { + return Ok(v.into()); + } + Ok(f64::from_str(s).map_err(|_| ParseNumberError)?.into()) } } @@ -222,7 +236,7 @@ impl Number { pub fn add_assign(&mut self, rhs: &Self) -> Result<()> { match (self, rhs) { (Big(ref mut a), Big(b)) => { - Rc::make_mut(a).d.add_assign(b.d); + Rc::make_mut(a).d.add_assign(&b.d); } } Ok(()) @@ -237,7 +251,7 @@ impl Number { pub fn sub_assign(&mut self, rhs: &Self) -> Result<()> { match (self, rhs) { (Big(ref mut a), Big(b)) => { - Rc::make_mut(a).d.sub_assign(b.d); + Rc::make_mut(a).d.sub_assign(&b.d); } } Ok(()) @@ -252,7 +266,7 @@ impl Number { pub fn mul_assign(&mut self, rhs: &Self) -> Result<()> { match (self, rhs) { (Big(ref mut a), Big(b)) => { - Rc::make_mut(a).d.mul_assign(b.d); + Rc::make_mut(a).d.mul_assign(&b.d); } } Ok(()) @@ -266,32 +280,36 @@ impl Number { pub fn divide(self, rhs: &Self) -> Result { Ok(match (self, rhs) { - (Big(a), Big(b)) => a.d.div(b.d).into(), + (Big(a), Big(b)) => a.d.clone().div(&b.d).into(), }) } pub fn modulo(self, rhs: &Self) -> Result { + use dashu_base::RemEuclid; Ok(match (self, rhs) { - (Big(a), Big(b)) => a.d.rem(b.d).into(), + (Big(a), Big(b)) => a.d.clone().rem_euclid(&b.d).into(), }) } pub fn is_integer(&self) -> bool { match self { - Big(b) => b.d.is_integer(), + Big(b) => b.is_integer(), } } pub fn is_positive(&self) -> bool { match self { - Big(b) => b.d.is_sign_positive(), + Big(b) => b.d.sign() == dashu_base::Sign::Positive, } } fn ensure_integers(a: &Number, b: &Number) -> Option<(BigInt, BigInt)> { match (a, b) { - (Big(a), Big(b)) if a.d.is_integer() && b.d.is_integer() => { - Some((a.d.mantissa(), b.d.mantissa())) + (Big(a), Big(b)) if a.is_integer() && b.is_integer() => { + match (a.d.to_i128(), b.d.to_i128()) { + (Some(a), Some(b)) => Some((a, b)), + _ => None, + } } _ => None, } @@ -299,7 +317,7 @@ impl Number { fn ensure_integer(&self) -> Option { match self { - Big(a) if a.d.is_integer() => Some(a.d.mantissa()), + Big(a) if a.is_integer() => a.d.to_i128(), _ => None, } } @@ -344,8 +362,9 @@ impl Number { } pub fn abs(&self) -> Number { + use dashu_base::Abs; match self { - Big(b) => b.d.abs().into(), + Big(b) => b.d.clone().abs().into(), } } @@ -368,11 +387,21 @@ impl Number { } pub fn two_pow(e: i32) -> Number { - 2.0_f64.powi(e).into() + use num_traits::Pow; + BigFloat::from(2) + .with_precision(80) + .value() + .pow(&BigFloat::from(e)) + .into() } pub fn ten_pow(e: i32) -> Number { - 10.0_f64.powi(e).into() + use num_traits::Pow; + BigFloat::from(10) + .with_precision(80) + .value() + .pow(&BigFloat::from(e)) + .into() } pub fn format_bin(&self) -> String { diff --git a/src/parser.rs b/src/parser.rs index 1618b85a..2b82c0c5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -119,7 +119,7 @@ impl<'source> Parser<'source> { Ok(comps) } - fn handle_import_future_keywords(&mut self, comps: &Vec) -> Result { + fn handle_import_future_keywords(&mut self, comps: &[Span]) -> Result { if comps.len() >= 2 && *comps[0].text() == "future" && *comps[1].text() == "keywords" { match comps.len() - 2 { 1 => self.set_future_keyword(&comps[2].text(), &comps[2])?, diff --git a/src/scheduler.rs b/src/scheduler.rs index 9de26258..6579a692 100644 --- a/src/scheduler.rs +++ b/src/scheduler.rs @@ -210,7 +210,7 @@ pub struct Scope { pub inputs: BTreeSet, } -fn traverse(expr: &Ref, f: &mut dyn FnMut(&Ref) -> Result) -> Result<()> { +pub fn traverse(expr: &Ref, f: &mut dyn FnMut(&Ref) -> Result) -> Result<()> { if !f(expr)? { return Ok(()); } @@ -336,17 +336,10 @@ fn gather_input_vars(expr: &Ref, parent_scopes: &[Scope], scope: &mut Scop fn gather_loop_vars(expr: &Ref, parent_scopes: &[Scope], scope: &mut Scope) -> Result<()> { traverse(expr, &mut |e| match e.as_ref() { - Var(v) if var_exists(v, parent_scopes) => Ok(false), RefBrack { index, .. } => { - if let Var(v) = index.as_ref() { - if !matches!(*v.text(), "_" | "input" | "data") && !var_exists(v, parent_scopes) { - // Treat this as an index var. - scope.unscoped.insert(v.source_str()); - } - } + gather_assigned_vars(index, false, parent_scopes, scope)?; Ok(true) } - _ => Ok(true), }) } @@ -651,26 +644,31 @@ impl Analyzer { } RefBrack { refr, index, .. } => { - if let Var(v) = index.as_ref() { - let var = v.source_str(); - if scope.locals.contains_key(&var) || scope.unscoped.contains(&var) { - let (rb_used_vars, rb_comprs) = Self::gather_used_vars_comprs_index_vars( - refr, - scope, - first_use, - definitions, - assigned_vars, - )?; - definitions.push(Definition { - var: var.clone(), - used_vars: rb_used_vars.clone(), - }); - used_vars.extend(rb_used_vars); - used_vars.push(var); - comprs.extend(rb_comprs); - return Ok(false); + traverse(index, &mut |e| match e.as_ref() { + Var(v) => { + let var = v.source_str(); + if scope.locals.contains_key(&var) || scope.unscoped.contains(&var) { + let (rb_used_vars, rb_comprs) = + Self::gather_used_vars_comprs_index_vars( + refr, + scope, + first_use, + definitions, + assigned_vars, + )?; + definitions.push(Definition { + var: var.clone(), + used_vars: rb_used_vars.clone(), + }); + used_vars.extend(rb_used_vars); + used_vars.push(var); + comprs.extend(rb_comprs); + } + Ok(false) } - } + Array { .. } | Object { .. } => Ok(true), + _ => Ok(false), + })?; Ok(true) } @@ -812,9 +810,10 @@ impl Analyzer { self.gather_assigned_vars(lhs, scope, check_first_use, first_use)?; for var in &assigned_vars { + let used_vars = used_vars.iter().filter(|v| v != &var).cloned().collect(); definitions.push(Definition { var: var.clone(), - used_vars: used_vars.clone(), + used_vars, }); } if assigned_vars.is_empty() { @@ -837,9 +836,10 @@ impl Analyzer { let assigned_vars = self.gather_assigned_vars(rhs, scope, check_first_use, first_use)?; for var in &assigned_vars { + let used_vars = used_vars.iter().filter(|v| v != &var).cloned().collect(); definitions.push(Definition { var: var.clone(), - used_vars: used_vars.clone(), + used_vars, }); } if assigned_vars.is_empty() { diff --git a/tests/interpreter/cases/builtins/numbers/div.yaml b/tests/interpreter/cases/builtins/numbers/div.yaml index b37900f8..068d025f 100644 --- a/tests/interpreter/cases/builtins/numbers/div.yaml +++ b/tests/interpreter/cases/builtins/numbers/div.yaml @@ -20,7 +20,7 @@ cases: z { x = 1/3 - y = 0.3333333333333333333333333333 + y = 0.3333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 x == y } query: data.test diff --git a/tests/interpreter/cases/builtins/units/parse.yaml b/tests/interpreter/cases/builtins/units/parse.yaml index 857c3445..31da93f3 100644 --- a/tests/interpreter/cases/builtins/units/parse.yaml +++ b/tests/interpreter/cases/builtins/units/parse.yaml @@ -96,14 +96,14 @@ cases: - 1152921504606846976 - 1152921504606846976 p4: - - 1.1805916207174113e21 - - 1.1805916207174113e21 - - 1.1805916207174113e21 - - 1.1805916207174113e21 - - 1.2089258196146292e24 - - 1.2089258196146292e24 - - 1.2089258196146292e24 - - 1.2089258196146292e24 + - 1180591620717411303424 + - 1180591620717411303424 + - 1180591620717411303424 + - 1180591620717411303424 + - 1208925819614629174706176 + - 1208925819614629174706176 + - 1208925819614629174706176 + - 1208925819614629174706176 p5: [1, 1] - note: extra argument diff --git a/tests/opa.passing b/tests/opa.passing index 87ff1446..f4662dd8 100644 --- a/tests/opa.passing +++ b/tests/opa.passing @@ -4,6 +4,8 @@ any arithmetic array assignments +base64builtins +base64urlbuiltins bitsand bitsnegate bitsor @@ -14,6 +16,7 @@ casts comparisonexpr completedoc compositebasedereference +compositereferences comprehensions containskeyword cryptohmacequal @@ -29,6 +32,7 @@ defaultkeyword disjunction elsekeyword embeddedvirtualdoc +eqexpr evaltermexpr every example @@ -38,10 +42,13 @@ functions globmatch globquotemeta helloworld +hexbuiltins indexing indirectreferences +inputvalues intersection invalidkeyerror +jsonfilter jsonfilteridempotent jwtencodesignheadererrors jwtencodesignpayloaderrors diff --git a/tests/opa.rs b/tests/opa.rs index 208ab24f..97e0f9c6 100644 --- a/tests/opa.rs +++ b/tests/opa.rs @@ -61,7 +61,15 @@ fn eval_test_case(case: &TestCase) -> Result { engine.set_input(input.clone()); } if let Some(input_term) = &case.input_term { - let input = Value::from_json_str(&input_term)?; + let input = match engine.eval_query(input_term.clone(), true)?.result.last() { + Some(r) if r.expressions.last().is_some() => r + .expressions + .last() + .expect("no expressions in result") + .value + .clone(), + _ => bail!("no results in evaluated input term"), + }; engine.set_input(input); } if let Some(modules) = &case.modules { @@ -89,6 +97,22 @@ fn eval_test_case(case: &TestCase) -> Result { Value::from_json_str(&result.to_string()) } +fn json_schema_tests_check(actual: &Value, expected: &Value) -> bool { + // Fetch `x` binding. + let actual = &actual[0][&Value::String("x".into())]; + let expected = &expected[0][&Value::String("x".into())]; + + match (actual, expected) { + (Value::Array(actual), Value::Array(expected)) + if actual.len() == expected.len() && actual.len() == 2 => + { + // Only check the result since error messages may be different. + actual[0] == expected[0] + } + _ => false, + } +} + fn run_opa_tests(opa_tests_dir: String, folders: &[String]) -> Result<()> { println!("OPA TESTSUITE: {opa_tests_dir}"); let tests_path = Path::new(&opa_tests_dir); @@ -126,7 +150,15 @@ fn run_opa_tests(opa_tests_dir: String, folders: &[String]) -> Result<()> { let test: YamlTest = serde_yaml::from_str(&yaml_str)?; for case in &test.cases { + let is_json_schema_test = case.note.starts_with("json_verify_schema") + || case.note.starts_with("json_match_schema"); + match (eval_test_case(case), &case.want_result) { + (Ok(actual), Some(expected)) + if is_json_schema_test && json_schema_tests_check(&actual, &expected) => + { + entry.0 += 1; + } (Ok(actual), Some(expected)) if &actual == expected => { entry.0 += 1; } diff --git a/tests/parser/mod.rs b/tests/parser/mod.rs index eecf189f..9344af87 100644 --- a/tests/parser/mod.rs +++ b/tests/parser/mod.rs @@ -79,7 +79,7 @@ fn match_vec(s: &Span, vec: &Vec>, v: &Value) -> Result<()> { Ok(()) } -fn match_object(s: &Span, fields: &Vec<(Span, Ref, Ref)>, v: &Value) -> Result<()> { +fn match_object(s: &Span, fields: &[(Span, Ref, Ref)], v: &Value) -> Result<()> { if skip_value(v) { return Ok(()); } @@ -525,7 +525,7 @@ fn match_rule_body(b: &RuleBody, v: &Value) -> Result<()> { match_query(&b.query, &v["query"]) } -fn match_rule_bodies(span: &Span, bodies: &Vec, v: &Value) -> Result<()> { +fn match_rule_bodies(span: &Span, bodies: &[RuleBody], v: &Value) -> Result<()> { if skip_value(v) { return Ok(()); }