From f471403fe5c0275c586d44f6110cb581f07bda40 Mon Sep 17 00:00:00 2001 From: Anand Krishnamoorthi <anakrish@microsoft.com> Date: Wed, 31 Jan 2024 21:10:35 -0800 Subject: [PATCH] Conform to OPA 0.61.0. Implement `import rego.v1` https://www.openpolicyagent.org/docs/latest/policy-language/#the-regov1-import - `if` required before rule body - import rego.v1 automatically imports future.keywords - handle import shadowing - data, input cannot be shadowed - deprecated functions as disallowed - rules must have assignment or body - `contains` required for parital set Signed-off-by: Anand Krishnamoorthi <anakrish@microsoft.com> --- Cargo.toml | 1 - README.md | 8 +- src/ast.rs | 1 + src/interpreter.rs | 19 +- src/parser.rs | 75 +++- src/tests/interpreter/mod.rs | 1 - src/utils.rs | 13 +- tests/interpreter/cases/rego.v1/tests.yaml | 481 +++++++++++++++++++++ tests/opa.rs | 2 +- tests/parser/cases/import/import.yaml | 4 +- 10 files changed, 575 insertions(+), 30 deletions(-) create mode 100644 tests/interpreter/cases/rego.v1/tests.yaml diff --git a/Cargo.toml b/Cargo.toml index 2bd61db1..11447b04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,7 +89,6 @@ uuid = { version = "1.6.1", features = ["v4", "fast-rng"], optional = true } jsonschema = { version = "0.17.1", default-features = false, optional = true } chrono = { version = "0.4.31", optional = true } chrono-tz = { version = "0.8.5", optional = true } -document-features = "0.2.8" [dev-dependencies] diff --git a/README.md b/README.md index 50d33d03..2159c4c7 100644 --- a/README.md +++ b/README.md @@ -58,15 +58,15 @@ $ cargo build -r --example regorus --features "yaml" --no-default-features; stri ``` -Regorus passes the [OPA v0.60.0 test-suite](https://www.openpolicyagent.org/docs/latest/ir/#test-suite) barring a few +Regorus passes the [OPA v0.61.0 test-suite](https://www.openpolicyagent.org/docs/latest/ir/#test-suite) barring a few builtins. See [OPA Conformance](#opa-conformance) below. ## Bindings Regorus can be used from a variety of languages: -- Javascript: Via npm package `regorusjs`. This package is Regorus compiled into WASM. -- Python: Via `regorus` package. +- Javascript: To compile Regorus to WASM and use it in Javascript, see [bindings/wasm](bindings/wasm) +- Python: To use Regorus from Python, see [bindings/python](bindings/python) ## Getting Started @@ -199,7 +199,7 @@ Benchmark 1: opa eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.jso ``` ## OPA Conformance -Regorus has been verified to be compliant with [OPA v0.60.0](https://github.com/open-policy-agent/opa/releases/tag/v0.60.0) +Regorus has been verified to be compliant with [OPA v0.61.0](https://github.com/open-policy-agent/opa/releases/tag/v0.61.0) using a [test driver](https://github.com/microsoft/regorus/blob/main/tests/opa.rs) that loads and runs the OPA testsuite using Regorus, and verifies that expected outputs are produced. diff --git a/src/ast.rs b/src/ast.rs index 23e4ea33..40cb219b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -353,6 +353,7 @@ pub struct Module { pub package: Package, pub imports: Vec<Import>, pub policy: Vec<Ref<Rule>>, + pub rego_v1: bool, } pub type ExprRef = Ref<Expr>; diff --git a/src/interpreter.rs b/src/interpreter.rs index 8b8e154f..0f69e688 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -1998,14 +1998,14 @@ impl Interpreter { } } - fn lookup_function_by_name(&self, path: &str) -> Option<&Vec<Ref<Rule>>> { + fn lookup_function_by_name(&self, path: &str) -> Option<(&Vec<Ref<Rule>>, &Ref<Module>)> { let mut path = path.to_owned(); if !path.starts_with("data.") { path = self.current_module_path.clone() + "." + &path; } match self.functions.get(&path) { - Some((f, _)) => Some(f), + Some((f, _, m)) => Some((f, m)), _ => None, } } @@ -2058,7 +2058,8 @@ impl Interpreter { #[cfg(feature = "deprecated")] if let Some(builtin) = builtins::DEPRECATED.get(path) { - if !self.allow_deprecated { + let allow = self.allow_deprecated && !self.current_module()?.rego_v1; + if !allow { bail!(span.error(format!("{path} is deprecated").as_str())) } return Ok(Some(builtin)); @@ -2120,9 +2121,9 @@ impl Interpreter { _ => orig_fcn_path.clone(), }; - let empty = vec![]; - let fcns_rules = match self.lookup_function_by_name(&fcn_path) { - Some(r) => r, + let empty: Vec<Ref<Rule>> = vec![]; + let (fcns_rules, fcn_module) = match self.lookup_function_by_name(&fcn_path) { + Some((fcns, m)) => (fcns, Some(m.clone())), _ => { if self.default_rules.get(&fcn_path).is_some() || self @@ -2131,10 +2132,10 @@ impl Interpreter { .is_some() { // process default functions later. - &empty + (&empty, self.module.clone()) } // Look up builtin function. - else if let Ok(Some(builtin)) = self.lookup_builtin(span, &fcn_path) { + else if let Some(builtin) = self.lookup_builtin(span, &fcn_path)? { let r = self.eval_builtin_call(span, &fcn_path.clone(), *builtin, params); if let Some(with_functions) = with_functions_saved { self.with_functions = with_functions; @@ -2213,6 +2214,7 @@ impl Interpreter { ..Context::default() }; + let prev_module = self.set_current_module(fcn_module.clone())?; let value = match self.eval_rule_bodies(ctx, span, bodies) { Ok(v) => v, Err(e) => { @@ -2222,6 +2224,7 @@ impl Interpreter { continue; } }; + self.set_current_module(prev_module)?; let result = match &value { Value::Set(s) if s.len() == 1 => s.iter().next().unwrap().clone(), diff --git a/src/parser.rs b/src/parser.rs index 09d29576..1fa5d3b3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,7 @@ pub struct Parser<'source> { line: u16, end: u16, future_keywords: BTreeMap<String, Span>, + rego_v1: bool, } const FUTURE_KEYWORDS: [&str; 4] = ["contains", "every", "if", "in"]; @@ -30,6 +31,7 @@ impl<'source> Parser<'source> { line: 0, end: 0, future_keywords: BTreeMap::new(), + rego_v1: false, }) } @@ -76,19 +78,19 @@ impl<'source> Parser<'source> { pub fn set_future_keyword(&mut self, kw: &str, span: &Span) -> Result<()> { match &self.future_keywords.get(kw) { - Some(s) if false => Err(self.source.error( + Some(s) if self.rego_v1 => Err(self.source.error( span.line, span.col, format!( "this import shadows previous import of `{kw}` defined at:{}", - self.source - .message(s.line, s.col, "", "this import is shadowed.") + s.message("", "this import is shadowed.") ) .as_str(), )), _ => { self.future_keywords.insert(kw.to_string(), span.clone()); - if kw == "every" { + if kw == "every" && !self.rego_v1 { + //rego.v1 explicitly adds each keyword. self.future_keywords.insert("in".to_string(), span.clone()); } Ok(()) @@ -782,6 +784,17 @@ impl<'source> Parser<'source> { span.start = start; let op = match self.token_text() { "=" => AssignOp::Eq, + ":=" if self.rego_v1 => { + if let Expr::Var(v) = &expr { + if v.text() == "input" { + bail!(span.error("input cannot be shadowed")); + } + if v.text() == "data" { + bail!(span.error("data cannot be shadowed")); + } + } + AssignOp::ColEq + } ":=" => AssignOp::ColEq, _ => { *self = state; @@ -974,6 +987,7 @@ impl<'source> Parser<'source> { let stmt = match self.parse_literal_stmt() { Ok(stmt) => stmt, Err(e) if is_definite_query => return Err(e), + Err(e) if matches!(self.token_text(), "=" | ":=") => return Err(e), Err(_) => { // There was error parsing the first literal // Restore the state and return. @@ -1117,7 +1131,16 @@ impl<'source> Parser<'source> { let span = self.tok.1.clone(); let mut term = if self.tok.0 == TokenKind::Ident { - Expr::Var(self.parse_var()?) + let v = self.parse_var()?; + if self.rego_v1 { + if v.text() == "input" { + bail!(span.error("input cannot be shadowed")); + } + if v.text() == "data" { + bail!(span.error("data cannot be shadowed")); + } + } + Expr::Var(v) } else { return Err(self.source.error( span.line, @@ -1311,6 +1334,9 @@ impl<'source> Parser<'source> { false } "{" => { + if self.rego_v1 { + bail!(span.error("`if` keyword is required before rule body")); + } self.next_token()?; let query = Ref::new(self.parse_query(span.clone(), "}")?); span.end = self.end; @@ -1378,6 +1404,9 @@ impl<'source> Parser<'source> { }); } "{" => { + if self.rego_v1 { + bail!(span.error("`if` keyword is required before rule body")); + } self.next_token()?; let query = Ref::new(self.parse_query(span.clone(), "}")?); span.end = self.end; @@ -1463,6 +1492,25 @@ impl<'source> Parser<'source> { let head = self.parse_rule_head()?; let bodies = self.parse_rule_bodies()?; span.end = self.end; + + if self.rego_v1 && bodies.is_empty() { + match &head { + RuleHead::Compr { assign, .. } | RuleHead::Func { assign, .. } + if assign.is_none() => + { + bail!(span.error("rule must have a body or assignment")); + } + RuleHead::Set { refr, key, .. } if key.is_none() => { + if Self::get_path_ref_components(refr)?.len() == 2 { + bail!(span.error("`contains` keyword is required for partial set rules")); + } else { + bail!(span.error("rule must have a body or assignment")); + } + } + _ => (), + } + } + Ok(Rule::Spec { span, head, bodies }) } @@ -1526,15 +1574,25 @@ impl<'source> Parser<'source> { let refr = Ref::new(self.parse_path_ref()?); let comps = Self::get_path_ref_components(&refr)?; - if !matches!(comps[0].text(), "data" | "future" | "input") { + span.end = self.end; + if !matches!(comps[0].text(), "data" | "future" | "input" | "rego") { return Err(self.source.error( comps[0].line, comps[0].col, - "import path must begin with one of: {data, future, input}", + "import path must begin with one of: {data, future, input, rego}", )); } - let is_future_kw = self.handle_import_future_keywords(&comps)?; + let is_future_kw = + if comps.len() == 2 && comps[0].text() == "rego" && comps[1].text() == "v1" { + self.rego_v1 = true; + for kw in FUTURE_KEYWORDS { + self.set_future_keyword(kw, &span)?; + } + true + } else { + self.handle_import_future_keywords(&comps)? + }; let var = if self.token_text() == "as" { if is_future_kw { @@ -1588,6 +1646,7 @@ impl<'source> Parser<'source> { package, imports, policy, + rego_v1: self.rego_v1, }) } diff --git a/src/tests/interpreter/mod.rs b/src/tests/interpreter/mod.rs index 3a6decd3..dcecd9b3 100644 --- a/src/tests/interpreter/mod.rs +++ b/src/tests/interpreter/mod.rs @@ -84,7 +84,6 @@ fn match_values(computed: &Value, expected: &Value) -> Result<()> { pub fn check_output(computed_results: &[Value], expected_results: &[Value]) -> Result<()> { if computed_results.len() != expected_results.len() { - dbg!((&computed_results, &expected_results)); bail!( "the number of computed results ({}) and expected results ({}) is not equal", computed_results.len(), diff --git a/src/utils.rs b/src/utils.rs index 68e36636..5d6c682e 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -109,7 +109,7 @@ pub fn get_path_string(refr: &Expr, document: Option<&str>) -> Result<String> { Ok(comps.join(".")) } -pub type FunctionTable = BTreeMap<String, (Vec<Ref<Rule>>, u8)>; +pub type FunctionTable = BTreeMap<String, (Vec<Ref<Rule>>, u8, Ref<Module>)>; fn get_extra_arg_impl( expr: &Expr, @@ -118,11 +118,11 @@ fn get_extra_arg_impl( ) -> Result<Option<Ref<Expr>>> { if let Expr::Call { fcn, params, .. } = expr { let full_path = get_path_string(fcn, module)?; - let n_args = if let Some((_, n_args)) = functions.get(&full_path) { + let n_args = if let Some((_, n_args, _)) = functions.get(&full_path) { *n_args } else { let path = get_path_string(fcn, None)?; - if let Some((_, n_args)) = functions.get(&path) { + if let Some((_, n_args, _)) = functions.get(&path) { *n_args } else if let Some((_, n_args)) = BUILTINS.get(path.as_str()) { *n_args @@ -169,7 +169,7 @@ pub fn gather_functions(modules: &[Ref<Module>]) -> Result<FunctionTable> { { let full_path = get_path_string(refr, Some(module_path.as_str()))?; - if let Some((functions, arity)) = table.get_mut(&full_path) { + if let Some((functions, arity, _)) = table.get_mut(&full_path) { if args.len() as u8 != *arity { bail!(span.error( format!("{full_path} was previously defined with {arity} arguments.") @@ -178,7 +178,10 @@ pub fn gather_functions(modules: &[Ref<Module>]) -> Result<FunctionTable> { } functions.push(rule.clone()); } else { - table.insert(full_path, (vec![rule.clone()], args.len() as u8)); + table.insert( + full_path, + (vec![rule.clone()], args.len() as u8, module.clone()), + ); } } } diff --git a/tests/interpreter/cases/rego.v1/tests.yaml b/tests/interpreter/cases/rego.v1/tests.yaml new file mode 100644 index 00000000..cda632d9 --- /dev/null +++ b/tests/interpreter/cases/rego.v1/tests.yaml @@ -0,0 +1,481 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +cases: + - note: conflict future after rego.v1 + data: {} + modules: + - | + package test + + import rego.v1 + import future.keywords.in + + query: data + error: "this import shadows previous import" + + - note: conflict rego after future + data: {} + modules: + - | + package test + + import future.keywords.in + import rego.v1 + + query: data + error: "this import shadows previous import" + + - note: conflict rego after rego + data: {} + modules: + - | + package test + + import rego.v1 + import rego.v1 + + query: data + error: "this import shadows previous import" + + - note: allowed future after future + data: {} + modules: + - | + package test + + import future.keywords + import future.keywords + + query: data.test + want_result: {} + + - note: if-required-before-body + data: {} + modules: + - | + package test + import rego.v1 + allow { + 1 > 2 + } + query: data + error: "`if` keyword is required before rule body" + + - note: ok-if-before-body + data: {} + modules: + - | + package test + import rego.v1 + allow if { + 1 < 2 + } + query: data.test + want_result: + allow: true + + - note: if-required-before-else-body + data: {} + modules: + - | + package test + import rego.v1 + allow if { + 1 > 2 + } else = 5 { + 1 < 2 + } + query: data + error: "`if` keyword is required before rule body" + + - note: ok-if-before-else-body + data: {} + modules: + - | + package test + import rego.v1 + allow if { + 1 > 2 + } else = 5 if { + 1 < 2 + } + query: data.test + want_result: + allow: 5 + + # cases from https://www.openpolicyagent.org/docs/latest/opa-1/#backwards-compatibility-in-opa-v10 + - note: invalid1 + data: {} + modules: + - | + package test + import rego.v1 + p { true } + query: data + error: "`if` keyword is required before rule body" + + - note: invalid2 + data: {} + modules: + - | + package test + import rego.v1 + p.a { true } + query: data + error: "`if` keyword is required before rule body" + + - note: invalid3 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b { true } + query: data + error: "`if` keyword is required before rule body" + + - note: valid1 + data: {} + modules: + - | + package test + import rego.v1 + p if true + query: data.test + want_result: + p: true + + - note: valid2 + data: {} + modules: + - | + package test + import rego.v1 + p.a if true + query: data.test + want_result: + p: + a: true + + - note: valid3 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b if true + query: data.test + want_result: + p: + a: + b: true + + - note: valid4 + data: {} + modules: + - | + package test + import rego.v1 + p contains "a" + query: data.test + want_result: + p: + set!: ["a"] + + - note: valid5 + data: {} + modules: + - | + package test + import rego.v1 + p := 1 + query: data.test + want_result: + p: 1 + + - note: valid6 + data: {} + modules: + - | + package test + import rego.v1 + p.a := 1 + query: data.test + want_result: + p: + a: 1 + + - note: valid6 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b := 1 + query: data.test + want_result: + p: + a: + b: 1 + + - note: invalid11 + data: {} + modules: + - | + package test + import rego.v1 + p + query: data.test + error: rule must have a body + + - note: invalid12 + data: {} + modules: + - | + package test + import rego.v1 + p.a + query: data.test + error: "`contains` keyword is required for partial set rules" + + - note: invalid13 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b + query: data.test + error: rule must have a body + + - note: invalid21 + data: {} + modules: + - | + package test + import rego.v1 + p { true } + query: data.test + error: "`if` keyword is required before rule body" + + - note: valid21 + data: {} + modules: + - | + package test + import rego.v1 + p if { true } + query: data.test + want_result: + p: true + + - note: invalid22 + data: {} + modules: + - | + package test + import rego.v1 + p.a + query: data.test + error: "`contains` keyword is required for partial set rules" + + - note: valid22 + data: {} + modules: + - | + package test + import rego.v1 + p contains "a" + query: data.test + want_result: + p: + set!: ["a"] + + - note: invalid23 + data: {} + modules: + - | + package test + import rego.v1 + p.a { true } + query: data.test + error: "`if` keyword is required before rule body" + + - note: valid22 + data: {} + modules: + - | + package test + import rego.v1 + p contains "a" if { true } + query: data.test + want_result: + p: + set!: ["a"] + + - note: invalid24 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b + query: data.test + error: "rule must have a body or assignment" + + - note: valid22 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b := true + query: data.test + want_result: + p: + a: + b: true + + - note: invalid25 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b { true } + query: data.test + error: "`if` keyword is required before rule body" + + - note: valid22 + data: {} + modules: + - | + package test + import rego.v1 + p.a.b if { true } + query: data.test + want_result: + p: + a: + b: true + + - note: data-shadowed-by-rule + data: {} + modules: + - | + package test + data = 1 + query: data.test + want_result: + data: 1 + + - note: invalid-data-shadowed-by-rule + data: {} + modules: + - | + package test + import rego.v1 + data = 1 + query: data.test + error: data cannot be shadowed + + - note: input-shadowed-by-rule + data: {} + modules: + - | + package test + input = 1 + query: data.test + want_result: + input: 1 + + - note: invalid-input-shadowed-by-rule + data: {} + modules: + - | + package test + import rego.v1 + input = 1 + query: data.test + error: input cannot be shadowed + + - note: input-shadowed-by-local-var + data: {} + modules: + - | + package test + x { + input := 1 + input > 0 + } + y { + # This evaluates to false + input = 1 + input > 0 + } + query: data.test + want_result: + x: true + + - note: invalid-input-shadowed-by-local-var + data: {} + modules: + - | + package test + import rego.v1 + x if { + input := 1 + input > 0 + } + query: data.test + error: input cannot be shadowed + + - note: data-shadowed-by-local-var + data: {} + modules: + - | + package test + x { + data := 1 + data > 0 + } + query: data.test + want_result: + x: true + + - note: invalid-data-shadowed-by-local-var + data: {} + modules: + - | + package test + import rego.v1 + x if { + data := 1 + data > 0 + } + query: data.test + error: data cannot be shadowed + + - note: deprecated-function + data: {} + modules: + - | + package test + x { + cast_array([1]) + } + query: data.test + want_result: + x: true + + - note: invalid-deprecated-function + data: {} + modules: + - | + package test + import rego.v1 + x if { + cast_array([1]) + } + query: data.test + error: is deprecated diff --git a/tests/opa.rs b/tests/opa.rs index bef1d7e7..874b6852 100644 --- a/tests/opa.rs +++ b/tests/opa.rs @@ -13,7 +13,7 @@ use serde::{Deserialize, Serialize}; use walkdir::WalkDir; const OPA_REPO: &str = "https://github.com/open-policy-agent/opa"; -const OPA_BRANCH: &str = "v0.60.0"; +const OPA_BRANCH: &str = "v0.61.0"; #[derive(Serialize, Deserialize, PartialEq, Debug)] #[serde(deny_unknown_fields)] diff --git a/tests/parser/cases/import/import.yaml b/tests/parser/cases/import/import.yaml index afdc5759..3cbf2f3d 100644 --- a/tests/parser/cases/import/import.yaml +++ b/tests/parser/cases/import/import.yaml @@ -224,13 +224,13 @@ cases: rego: | package test import foo - error: "import path must begin with one of: {data, future, input}" + error: "import path must begin with one of: {data, future, input, rego}" - note: invalid-beginning-1 rego: | package test import foo.bar - error: "import path must begin with one of: {data, future, input}" + error: "import path must begin with one of: {data, future, input, rego}" - note: missing-field-1 rego: |