Skip to content

Commit 103c2a5

Browse files
committed
feat: add std regex builtins
Upstream issue: google/jsonnet#1039
1 parent 11193ce commit 103c2a5

File tree

8 files changed

+416
-88
lines changed

8 files changed

+416
-88
lines changed

Cargo.lock

+212-87
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ clap_complete = "4.4"
6464
lsp-server = "0.7.4"
6565
lsp-types = "0.94.1"
6666

67+
regex = "1.8.4"
68+
lru = "0.10.0"
69+
6770
#[profile.test]
6871
#opt-level = 1
6972

cmds/jrsonnet/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ exp-destruct = ["jrsonnet-evaluator/exp-destruct"]
2929
exp-object-iteration = ["jrsonnet-evaluator/exp-object-iteration"]
3030
# Bigint type
3131
exp-bigint = ["jrsonnet-evaluator/exp-bigint", "jrsonnet-cli/exp-bigint"]
32+
# std.regex and co.
33+
exp-regex = [
34+
"jrsonnet-stdlib/exp-regex",
35+
]
3236
# obj?.field, obj?.['field']
3337
exp-null-coaelse = [
3438
"jrsonnet-evaluator/exp-null-coaelse",

crates/jrsonnet-cli/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ exp-null-coaelse = [
2020
"jrsonnet-evaluator/exp-null-coaelse",
2121
"jrsonnet-stdlib/exp-null-coaelse",
2222
]
23+
exp-regex = [
24+
"jrsonnet-stdlib/exp-regex",
25+
]
2326
legacy-this-file = ["jrsonnet-stdlib/legacy-this-file"]
2427

2528
[dependencies]

crates/jrsonnet-evaluator/src/typed/conversions.rs

+16
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,22 @@ impl Typed for String {
304304
}
305305
}
306306

307+
impl Typed for StrValue {
308+
const TYPE: &'static ComplexValType = &ComplexValType::Simple(ValType::Str);
309+
310+
fn into_untyped(value: Self) -> Result<Val> {
311+
Ok(Val::Str(value))
312+
}
313+
314+
fn from_untyped(value: Val) -> Result<Self> {
315+
<Self as Typed>::TYPE.check(&value)?;
316+
match value {
317+
Val::Str(s) => Ok(s),
318+
_ => unreachable!(),
319+
}
320+
}
321+
}
322+
307323
impl Typed for char {
308324
const TYPE: &'static ComplexValType = &ComplexValType::Char;
309325

crates/jrsonnet-stdlib/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ exp-preserve-order = ["jrsonnet-evaluator/exp-preserve-order"]
2020
exp-bigint = ["num-bigint", "jrsonnet-evaluator/exp-bigint"]
2121

2222
exp-null-coaelse = ["jrsonnet-parser/exp-null-coaelse", "jrsonnet-evaluator/exp-null-coaelse"]
23+
# std.regexMatch and other helpers
24+
exp-regex = ["regex", "lru", "rustc-hash"]
2325

2426
[dependencies]
2527
jrsonnet-evaluator.workspace = true
@@ -49,6 +51,11 @@ serde_yaml_with_quirks.workspace = true
4951

5052
num-bigint = { workspace = true, optional = true }
5153

54+
# regex
55+
regex = { workspace = true, optional = true }
56+
lru = { workspace = true, optional = true }
57+
rustc-hash = { workspace = true, optional = true }
58+
5259
[build-dependencies]
5360
jrsonnet-parser.workspace = true
5461
structdump = { workspace = true, features = ["derive"] }

crates/jrsonnet-stdlib/src/lib.rs

+37-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ mod sets;
4343
pub use sets::*;
4444
mod compat;
4545
pub use compat::*;
46+
#[cfg(feature = "exp-regex")]
47+
mod regex;
48+
#[cfg(feature = "exp-regex")]
49+
pub use crate::regex::*;
4650

4751
pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
4852
let mut builder = ObjValueBuilder::new();
@@ -185,6 +189,9 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
185189
("setInter", builtin_set_inter::INST),
186190
("setDiff", builtin_set_diff::INST),
187191
("setUnion", builtin_set_union::INST),
192+
// Regex
193+
#[cfg(feature = "exp-regex")]
194+
("regexQuoteMeta", builtin_regex_quote_meta::INST),
188195
// Compat
189196
("__compare", builtin___compare::INST),
190197
]
@@ -207,9 +214,38 @@ pub fn stdlib_uncached(settings: Rc<RefCell<Settings>>) -> ObjValue {
207214
},
208215
);
209216
builder.method("trace", builtin_trace { settings });
210-
211217
builder.method("id", FuncVal::Id);
212218

219+
#[cfg(feature = "exp-regex")]
220+
{
221+
// Regex
222+
let regex_cache = RegexCache::default();
223+
builder.method(
224+
"regexFullMatch",
225+
builtin_regex_full_match {
226+
cache: regex_cache.clone(),
227+
},
228+
);
229+
builder.method(
230+
"regexPartialMatch",
231+
builtin_regex_partial_match {
232+
cache: regex_cache.clone(),
233+
},
234+
);
235+
builder.method(
236+
"regexReplace",
237+
builtin_regex_replace {
238+
cache: regex_cache.clone(),
239+
},
240+
);
241+
builder.method(
242+
"regexGlobalReplace",
243+
builtin_regex_global_replace {
244+
cache: regex_cache.clone(),
245+
},
246+
);
247+
};
248+
213249
builder.build()
214250
}
215251

crates/jrsonnet-stdlib/src/regex.rs

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
use std::{cell::RefCell, hash::BuildHasherDefault, num::NonZeroUsize, rc::Rc};
2+
3+
use ::regex::Regex;
4+
use jrsonnet_evaluator::{
5+
error::{ErrorKind::*, Result},
6+
val::StrValue,
7+
IStr, ObjValueBuilder, Val,
8+
};
9+
use jrsonnet_macros::builtin;
10+
use lru::LruCache;
11+
use rustc_hash::FxHasher;
12+
13+
pub struct RegexCacheInner {
14+
cache: RefCell<LruCache<IStr, Rc<Regex>, BuildHasherDefault<FxHasher>>>,
15+
}
16+
impl Default for RegexCacheInner {
17+
fn default() -> Self {
18+
Self {
19+
cache: RefCell::new(LruCache::with_hasher(
20+
NonZeroUsize::new(20).unwrap(),
21+
BuildHasherDefault::default(),
22+
)),
23+
}
24+
}
25+
}
26+
pub type RegexCache = Rc<RegexCacheInner>;
27+
impl RegexCacheInner {
28+
fn parse(&self, pattern: IStr) -> Result<Rc<Regex>> {
29+
let mut cache = self.cache.borrow_mut();
30+
if let Some(found) = cache.get(&pattern) {
31+
return Ok(found.clone());
32+
}
33+
let regex = Regex::new(&pattern)
34+
.map_err(|e| RuntimeError(format!("regex parse failed: {e}").into()))?;
35+
let regex = Rc::new(regex);
36+
cache.push(pattern, regex.clone());
37+
Ok(regex)
38+
}
39+
}
40+
41+
pub fn regex_match_inner(regex: &Regex, str: String) -> Result<Val> {
42+
let mut out = ObjValueBuilder::with_capacity(3);
43+
44+
let mut captures = Vec::with_capacity(regex.captures_len());
45+
let mut named_captures = ObjValueBuilder::with_capacity(regex.capture_names().len());
46+
47+
let Some(captured) = regex.captures(&str) else {
48+
return Ok(Val::Null)
49+
};
50+
51+
for ele in captured.iter().skip(1) {
52+
if let Some(ele) = ele {
53+
captures.push(Val::Str(StrValue::Flat(ele.as_str().into())))
54+
} else {
55+
captures.push(Val::Str(StrValue::Flat(IStr::empty())))
56+
}
57+
}
58+
for (i, name) in regex
59+
.capture_names()
60+
.skip(1)
61+
.enumerate()
62+
.flat_map(|(i, v)| Some((i, v?)))
63+
{
64+
let capture = captures[i].clone();
65+
named_captures.member(name.into()).value(capture)?;
66+
}
67+
68+
out.member("string".into())
69+
.value_unchecked(Val::Str(captured.get(0).unwrap().as_str().into()));
70+
out.member("captures".into())
71+
.value_unchecked(Val::Arr(captures.into()));
72+
out.member("namedCaptures".into())
73+
.value_unchecked(Val::Obj(named_captures.build()));
74+
75+
Ok(Val::Obj(out.build()))
76+
}
77+
78+
#[builtin(fields(
79+
cache: RegexCache,
80+
))]
81+
pub fn builtin_regex_partial_match(
82+
this: &builtin_regex_partial_match,
83+
pattern: IStr,
84+
str: String,
85+
) -> Result<Val> {
86+
let regex = this.cache.parse(pattern)?;
87+
regex_match_inner(&regex, str)
88+
}
89+
90+
#[builtin(fields(
91+
cache: RegexCache,
92+
))]
93+
pub fn builtin_regex_full_match(
94+
this: &builtin_regex_full_match,
95+
pattern: StrValue,
96+
str: String,
97+
) -> Result<Val> {
98+
let pattern = format!("^{pattern}$").into();
99+
let regex = this.cache.parse(pattern)?;
100+
regex_match_inner(&regex, str)
101+
}
102+
103+
#[builtin]
104+
pub fn builtin_regex_quote_meta(pattern: String) -> String {
105+
regex::escape(&pattern)
106+
}
107+
108+
#[builtin(fields(
109+
cache: RegexCache,
110+
))]
111+
pub fn builtin_regex_replace(
112+
this: &builtin_regex_replace,
113+
str: String,
114+
pattern: IStr,
115+
to: String,
116+
) -> Result<String> {
117+
let regex = this.cache.parse(pattern)?;
118+
let replaced = regex.replace(&str, to);
119+
Ok(replaced.to_string())
120+
}
121+
122+
#[builtin(fields(
123+
cache: RegexCache,
124+
))]
125+
pub fn builtin_regex_global_replace(
126+
this: &builtin_regex_global_replace,
127+
str: String,
128+
pattern: IStr,
129+
to: String,
130+
) -> Result<String> {
131+
let regex = this.cache.parse(pattern)?;
132+
let replaced = regex.replace_all(&str, to);
133+
Ok(replaced.to_string())
134+
}

0 commit comments

Comments
 (0)