Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Abort reference resolution when a reference loop is detected #32

Merged
merged 3 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/node/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::path::PathBuf;
use yaml_merge_keys::merge_keys_serde;

use crate::list::{List, RemovableList, UniqueList};
use crate::refs::Token;
use crate::refs::{ResolveState, Token};
use crate::types::{Mapping, Value};
use crate::{to_lexical_absolute, Reclass};

Expand Down Expand Up @@ -216,7 +216,10 @@ impl Node {
if let Some(clstoken) = clstoken {
// If we got a token, render it, and convert it into a string with
// `raw_string()` to ensure no spurious quotes are injected.
clstoken.render(&root.parameters)?.raw_string()?
let mut state = ResolveState::default();
clstoken
.render(&root.parameters, &mut state)?
.raw_string()?
} else {
// If Token::parse() returns None, the class name can't contain any references,
// just convert cls into an owned String.
Expand Down
90 changes: 77 additions & 13 deletions src/refs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod parser;
use crate::types::{Mapping, Value};
use anyhow::{anyhow, Result};
use nom::error::{convert_error, VerboseError};
use std::collections::HashSet;

#[derive(Debug, PartialEq, Eq)]
/// Represents a parsed Reclass reference
Expand All @@ -16,6 +17,33 @@ pub enum Token {
Combined(Vec<Token>),
}

#[derive(Clone, Debug, Default)]
pub struct ResolveState {
/// Reference paths which we've seen during reference resolution
seen_paths: HashSet<String>,
/// Recursion depth of the resolution (in number of calls to Token::resolve() for Token::Ref
/// objects).
depth: usize,
}

impl ResolveState {
/// Formats paths that have been seen as a comma-separated list.
fn seen_paths_list(&self) -> String {
let mut paths = self
.seen_paths
.iter()
.map(|p| format!("\"{p}\""))
.collect::<Vec<String>>();
paths.sort();
paths.join(", ")
}
}

/// Maximum allowed recursion depth for Token::resolve(). We're fairly conservative with the value,
/// since it's rather unlikely that a well-formed inventory will have any references that are
/// nested deeper than 64.
const RESOLVE_MAX_DEPTH: usize = 64;

impl Token {
/// Parses an arbitrary string into a `Token`. Returns None, if the string doesn't contain any
/// opening reference markers.
Expand Down Expand Up @@ -48,25 +76,25 @@ impl Token {
/// the Mapping provided through parameter `params`.
///
/// The heavy lifting is done by `Token::resolve()`.
pub fn render(&self, params: &Mapping) -> Result<Value> {
pub fn render(&self, params: &Mapping, state: &mut ResolveState) -> Result<Value> {
if self.is_ref() {
// handle value refs (i.e. refs where the full value of the key is replaced)
// We call `interpolate()` after `resolve()` to ensure that we fully interpolate all
// references if the result of `resolve()` is a complex Value (Mapping or Sequence).
self.resolve(params)?.interpolate(params)
self.resolve(params, state)?.interpolate(params, state)
} else {
Ok(Value::Literal(self.resolve(params)?.raw_string()?))
Ok(Value::Literal(self.resolve(params, state)?.raw_string()?))
}
}

/// Resolves the Token into a [`Value`]. References are looked up in the provided `params`
/// Mapping.
fn resolve(&self, params: &Mapping) -> Result<Value> {
fn resolve(&self, params: &Mapping, state: &mut ResolveState) -> Result<Value> {
match self {
// Literal tokens can be directly turned into `Value::Literal`
Self::Literal(s) => Ok(Value::Literal(s.to_string())),
Self::Combined(tokens) => {
let res = interpolate_token_slice(tokens, params)?;
let res = interpolate_token_slice(tokens, params, state)?;
// The result of `interpolate_token_slice()` for a `Token::Combined()` can't result
// in more unresolved refs since we iterate over each segment until there's no
// Value::String() left, so we return a Value::Literal().
Expand All @@ -76,9 +104,33 @@ impl Token {
// `interpolate_token_slice()`. Then we split the resolved reference path into segments
// on `:` and iteratively look up each segment in the provided `params` Mapping.
Self::Ref(parts) => {
// We track the number of calls to `Token::resolve()` for Token::Ref that the
// current `state` has seen in state.depth.
state.depth += 1;
if state.depth > RESOLVE_MAX_DEPTH {
// If we've called `Token::resolve()` more than RESOLVE_MAX_DEPTH (64) times
// recursively, it's likely that there's still an edge case where we don't
// detect a reference loop with the current reference path tracking
// implementation. We abort at a recursion depth of 64, since it's quite
// unlikely that there's a legitimate case where we have a recursion depth of
// 64 when resolving references for a well formed inventory.
let paths = state.seen_paths_list();
return Err(anyhow!(
"Token resolution exceeded recursion depth of {RESOLVE_MAX_DEPTH}. \
We've seen the following reference paths: [{paths}].",
));
}
// Construct flattened ref path by resolving any potential nested references in the
// Ref's Vec<Token>.
let path = interpolate_token_slice(parts, params)?;
let path = interpolate_token_slice(parts, params, state)?;

if state.seen_paths.contains(&path) {
// we've already seen this reference, so we know there's a loop, and can abort
// resolution.
let paths = state.seen_paths_list();
return Err(anyhow!("Reference loop with reference paths [{paths}]."));
}
state.seen_paths.insert(path.clone());

// generate iterator containing flattened reference path segments
let mut refpath_iter = path.split(':');
Expand Down Expand Up @@ -122,16 +174,20 @@ impl Token {
// individual references are resolved, and always do value lookups on
// resolved references.
Value::String(_) => {
newv = v.interpolate(params)?;
newv = v.interpolate(params, state)?;
v = newv.get(&key.into()).ok_or_else(|| {
anyhow!("unable to lookup key '{key}' for '{path}'")
})?;
}
Value::ValueList(l) => {
let mut i = vec![];
for v in l {
// When resolving references in ValueLists, we want to track state
// separately for each layer, since reference loops can't be
// stretched across layers.
let mut st = state.clone();
let v = if v.is_string() {
v.interpolate(params)?
v.interpolate(params, &mut st)?
} else {
v.clone()
};
Expand Down Expand Up @@ -159,9 +215,9 @@ impl Token {
let mut v = v.clone();
// Finally, we iteratively interpolate `v` while it's a `Value::String()` or
// `Value::ValueList`. This ensures that the returned Value will never contain
// further references.
// further references. Here, we want to continue tracking the state normally.
while v.is_string() || v.is_value_list() {
v = v.interpolate(params)?;
v = v.interpolate(params, state)?;
}
Ok(v)
}
Expand Down Expand Up @@ -195,15 +251,23 @@ impl std::fmt::Display for Token {

/// Interpolate a `Vec<Token>`. Called from `Token::resolve()` for `Token::Combined` and
/// `Token::Ref` Vecs.
fn interpolate_token_slice(tokens: &[Token], params: &Mapping) -> Result<String> {
fn interpolate_token_slice(
tokens: &[Token],
params: &Mapping,
state: &mut ResolveState,
) -> Result<String> {
// Iterate through each element of the Vec, and call Token::resolve() on each element.
// Additionally, we repeatedly call `Value::interpolate()` on the resolved value for each
// element, as long as that Value is a `Value::String`.
let mut res = String::new();
for t in tokens {
let mut v = t.resolve(params)?;
// Multiple separate refs in a combined or ref token can't form loops between each other.
// Each individual ref can still be part of a loop, so we make a fresh copy of the input
// state before resolving each element.
let mut st = state.clone();
let mut v = t.resolve(params, &mut st)?;
while v.is_string() {
v = v.interpolate(params)?;
v = v.interpolate(params, &mut st)?;
}
res.push_str(&v.raw_string()?);
}
Expand Down
103 changes: 80 additions & 23 deletions src/refs/token_resolve_parse_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ fn test_resolve_ref_str() {
let token = Token::Ref(vec![Token::literal_from_str("foo")]);
let params = Mapping::from_str("foo: bar").unwrap();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Literal("bar".into()));
}

Expand All @@ -15,7 +16,8 @@ fn test_resolve_ref_val() {
let token = Token::Ref(vec![Token::literal_from_str("foo")]);
let params = Mapping::from_str("foo: True").unwrap();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Bool(true));
}

Expand All @@ -24,7 +26,8 @@ fn test_resolve_literal() {
let token = Token::literal_from_str("foo");
let params = Mapping::new();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Literal("foo".into()));
}

Expand All @@ -36,7 +39,8 @@ fn test_resolve_combined() {
]);
let params = Mapping::from_str("{foo: bar, bar: baz}").unwrap();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Literal("foobar".into()));
}
#[test]
Expand All @@ -48,7 +52,8 @@ fn test_resolve_combined_2() {
]);
let params = Mapping::from_str(r#"{foo: "${bar}", bar: baz}"#).unwrap();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Literal("foobaz".into()));
}

Expand All @@ -64,7 +69,8 @@ fn test_resolve_combined_3() {
"#;
let params = Mapping::from_str(params).unwrap();

let v = token.resolve(&params).unwrap();
let mut state = ResolveState::default();
let v = token.resolve(&params, &mut state).unwrap();
assert_eq!(v, Value::Literal("foo${bar}".into()));
}

Expand Down Expand Up @@ -105,23 +111,31 @@ fn test_resolve() {
let p = Mapping::from_str("foo: foo").unwrap();
let reftoken = parse_ref(&"${foo}").unwrap();

assert_eq!(reftoken.resolve(&p).unwrap(), Value::Literal("foo".into()));
let mut state = ResolveState::default();
assert_eq!(
reftoken.resolve(&p, &mut state).unwrap(),
Value::Literal("foo".into())
);
}

#[test]
fn test_resolve_subkey() {
let p = Mapping::from_str("foo: {foo: foo}").unwrap();
let reftoken = parse_ref(&"${foo:foo}").unwrap();

assert_eq!(reftoken.resolve(&p).unwrap(), Value::Literal("foo".into()));
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(v, Value::Literal("foo".into()));
}

#[test]
fn test_resolve_nested() {
let p = Mapping::from_str("{foo: foo, bar: {foo: foo}}").unwrap();
let reftoken = parse_ref(&"${bar:${foo}}").unwrap();

assert_eq!(reftoken.resolve(&p).unwrap(), Value::Literal("foo".into()));
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(v, Value::Literal("foo".into()));
}

#[test]
Expand All @@ -135,10 +149,9 @@ fn test_resolve_nested_subkey() {

// ${bar:${foo:bar}} == ${bar:foo} == foo
let reftoken = parse_ref(&"${bar:${foo:bar}}").unwrap();
assert_eq!(
reftoken.resolve(&p).unwrap(),
Value::Literal("foo".to_string())
);
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(v, Value::Literal("foo".to_string()));
}

#[test]
Expand All @@ -152,10 +165,9 @@ fn test_resolve_kapitan_secret_ref() {

let reftoken = parse_ref(&"?{vaultkv:foo/bar/${baz:baz}/qux}").unwrap();
dbg!(&reftoken);
assert_eq!(
reftoken.resolve(&p).unwrap(),
Value::Literal("?{vaultkv:foo/bar/baz/qux}".to_string())
);
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(v, Value::Literal("?{vaultkv:foo/bar/baz/qux}".to_string()));
}

#[test]
Expand All @@ -168,10 +180,9 @@ fn test_resolve_escaped_ref() {
let p = Mapping::from_str(params).unwrap();

let reftoken = parse_ref("\\${PROJECT_LABEL}").unwrap();
assert_eq!(
reftoken.resolve(&p).unwrap(),
Value::Literal("${PROJECT_LABEL}".to_string())
);
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(v, Value::Literal("${PROJECT_LABEL}".to_string()));
}

#[test]
Expand All @@ -183,8 +194,10 @@ fn test_resolve_mapping_value() {
"#;
let p = Mapping::from_str(p).unwrap();
let reftoken = parse_ref("${foo}").unwrap();
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(
reftoken.resolve(&p).unwrap(),
v,
Value::Mapping(Mapping::from_str("{bar: bar, baz: baz}").unwrap())
);
}
Expand All @@ -198,10 +211,54 @@ fn test_resolve_mapping_embedded() {
"#;
let p = Mapping::from_str(p).unwrap();
let reftoken = parse_ref("foo: ${foo}").unwrap();
let mut state = ResolveState::default();
let v = reftoken.resolve(&p, &mut state).unwrap();
assert_eq!(
reftoken.resolve(&p).unwrap(),
v,
// Mapping is serialized as JSON when embedded in a string. serde_json emits JSON maps
// with lexically sorted keys and minimal whitespace.
Value::Literal(r#"foo: {"bar":"bar","baz":"baz"}"#.to_string())
);
}

#[test]
#[should_panic(expected = "Reference loop with reference paths [\"foo\"].")]
fn test_resolve_recursive_error() {
let p = r#"
foo: ${foo}
"#;
let p = Mapping::from_str(p).unwrap();
let reftoken = parse_ref("${foo}").unwrap();

let mut state = ResolveState::default();
let _v = reftoken.resolve(&p, &mut state).unwrap();
}

#[test]
#[should_panic(expected = "Reference loop with reference paths [\"bar\", \"foo\"].")]
fn test_resolve_recursive_error_2() {
let p = r#"
foo: ${bar}
bar: ${foo}
"#;
let p = Mapping::from_str(p).unwrap();
let reftoken = parse_ref("${foo}").unwrap();

let mut state = ResolveState::default();
let _v = reftoken.resolve(&p, &mut state).unwrap();
}

#[test]
#[should_panic(expected = "Reference loop with reference paths [\"baz\", \"foo\"].")]
fn test_resolve_nested_recursive_error() {
let p = r#"
foo: ${baz}
baz:
qux: ${foo}
"#;
let p = Mapping::from_str(p).unwrap();
let reftoken = parse_ref("${foo}").unwrap();

let mut state = ResolveState::default();
let _v = reftoken.resolve(&p, &mut state).unwrap();
}
Loading