From d69a5d37ab7c64224d25a23bd5a3cede406c97df Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Fri, 7 Feb 2025 21:15:08 +0100 Subject: [PATCH] feat: Async retriever Signed-off-by: Dmitry Dygalo --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 31 + MIGRATION.md | 38 + README.md | 2 +- crates/jsonschema-py/src/lib.rs | 20 +- crates/jsonschema-py/src/registry.rs | 6 +- crates/jsonschema-py/src/retriever.rs | 5 +- crates/jsonschema-referencing/Cargo.toml | 8 + .../jsonschema-referencing/src/anchors/mod.rs | 52 +- crates/jsonschema-referencing/src/error.rs | 1 + crates/jsonschema-referencing/src/lib.rs | 3 + crates/jsonschema-referencing/src/meta.rs | 3 + crates/jsonschema-referencing/src/registry.rs | 825 +++++++++++++++--- .../jsonschema-referencing/src/retriever.rs | 48 +- crates/jsonschema-referencing/src/uri.rs | 1 + crates/jsonschema/Cargo.toml | 5 + crates/jsonschema/src/compiler.rs | 105 ++- crates/jsonschema/src/keywords/ref_.rs | 8 +- crates/jsonschema/src/lib.rs | 396 ++++++++- crates/jsonschema/src/options.rs | 232 +++-- crates/jsonschema/src/retriever.rs | 311 +++++-- crates/jsonschema/src/validator.rs | 34 + crates/jsonschema/tests/suite.rs | 8 +- 23 files changed, 1771 insertions(+), 373 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b739e758..59e1bd64 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,7 @@ jobs: cache-all-crates: "true" key: ${{ matrix.os }} - - run: cargo test --no-fail-fast + - run: cargo test --no-fail-fast --all-features test-wasm: name: Test on WASM diff --git a/CHANGELOG.md b/CHANGELOG.md index dce21587..9729e89b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,39 @@ ## [Unreleased] +### Breaking Changes + +- All builder methods on `ValidationOptions` now take ownership of `self` instead of `&mut self`. + This change enables better support for non-blocking retrieval of external resources during the process of building a validator. + Update your code to chain the builder methods instead of reusing the options instance: + + ```rust + // Before (0.28.x) + let mut options = jsonschema::options(); + options.with_draft(Draft::Draft202012); + options.with_format("custom", my_format); + let validator = options.build(&schema)?; + + // After (0.29.0) + let validator = jsonschema::options() + .with_draft(Draft::Draft202012) + .with_format("custom", my_format) + .build(&schema)?; + +- The `Retrieve` trait's `retrieve` method now accepts URI references as `&Uri` instead of `&Uri<&str>`. + This aligns with the async version and simplifies internal URI handling. The behavior and available methods remain the same, this is purely a type-level change. + + ```rust + // Before + fn retrieve(&self, uri: &Uri<&str>) -> Result> + + // After + fn retrieve(&self, uri: &Uri) -> Result> + ``` + ### Added +- Support non-blocking retrieval for external resources during schema resolution via the new `resolve-async` feature. [#385](https://github.com/Stranger6667/jsonschema/issues/385) - Re-export `referencing::Registry` as `jsonschema::Registry`. - `ValidationOptions::with_registry` that allows for providing a predefined `referencing::Registry`. [#682](https://github.com/Stranger6667/jsonschema/issues/682) diff --git a/MIGRATION.md b/MIGRATION.md index 2d474038..2e6777a8 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -1,5 +1,43 @@ # Migration Guide +## Upgrading from 0.28.x to 0.29.0 + +The builder methods on `ValidationOptions` now take ownership of `self`. Change your code to use method chaining instead of reusing the options instance: + +```rust +// Old (0.28.x) +let mut options = jsonschema::options(); +options.with_draft(Draft::Draft202012); +options.with_format("custom", |s| s.len() > 3); +let validator = options.build(&schema)?; + +// New (0.29.0) +let validator = jsonschema::options() + .with_draft(Draft::Draft202012) + .with_format("custom", |s| s.len() > 3) + .build(&schema)?; +``` + +If you implement the `Retrieve` trait, update the `uri` parameter type in the `retrieve` method: + +```rust +// Old (0.28.x) +impl Retrieve for MyRetriever { + fn retrieve(&self, uri: &Uri<&str>) -> Result> { + // ... + } +} + +// New (0.29.0) +impl Retrieve for MyRetriever { + fn retrieve(&self, uri: &Uri) -> Result> { + // ... + } +} +``` + +This is a type-level change only; the behavior and available methods remain the same. + ## Upgrading from 0.25.x to 0.26.0 The `Validator::validate` method now returns `Result<(), ValidationError<'i>>` instead of an error iterator. If you need to iterate over all validation errors, use the new `Validator::iter_errors` method. diff --git a/README.md b/README.md index f60d566f..58c61913 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ See more usage examples in the [documentation](https://docs.rs/jsonschema). - 📚 Full support for popular JSON Schema drafts - 🔧 Custom keywords and format validators -- 🌐 Remote reference fetching (network/file) +- 🌐 Blocking & non-blocking remote reference fetching (network/file) - 🎨 `Basic` output style as per JSON Schema spec - ✨ Meta-schema validation for schema documents - 🔗 Bindings for [Python](https://github.com/Stranger6667/jsonschema/tree/master/crates/jsonschema-py) diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index c5f2a4ed..30ef1fda 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -419,13 +419,13 @@ fn make_options( ) -> PyResult { let mut options = jsonschema::options(); if let Some(raw_draft_version) = draft { - options.with_draft(get_draft(raw_draft_version)?); + options = options.with_draft(get_draft(raw_draft_version)?); } if let Some(yes) = validate_formats { - options.should_validate_formats(yes); + options = options.should_validate_formats(yes); } if let Some(yes) = ignore_unknown_formats { - options.should_ignore_unknown_formats(yes); + options = options.should_ignore_unknown_formats(yes); } if let Some(formats) = formats { for (name, callback) in formats.iter() { @@ -442,9 +442,10 @@ fn make_options( callback.call(py, (value,), None)?.is_truthy(py) }) }; - options.with_format( - name.to_string(), - move |value: &str| match call_py_callback(value) { + options = + options.with_format(name.to_string(), move |value: &str| match call_py_callback( + value, + ) { Ok(r) => r, Err(e) => { LAST_FORMAT_ERROR.with(|last| { @@ -454,16 +455,15 @@ fn make_options( // Should be caught panic!("Format checker failed") } - }, - ); + }); } } if let Some(retriever) = retriever { let func = into_retriever(retriever)?; - options.with_retriever(Retriever { func }); + options = options.with_retriever(Retriever { func }); } if let Some(registry) = registry { - options.with_registry(registry.inner.clone()); + options = options.with_registry(registry.inner.clone()); } Ok(options) } diff --git a/crates/jsonschema-py/src/registry.rs b/crates/jsonschema-py/src/registry.rs index 02cd1615..972ff348 100644 --- a/crates/jsonschema-py/src/registry.rs +++ b/crates/jsonschema-py/src/registry.rs @@ -1,5 +1,3 @@ -use std::sync::Arc; - use jsonschema::Resource; use pyo3::{exceptions::PyValueError, prelude::*}; @@ -29,7 +27,7 @@ impl Registry { if let Some(retriever) = retriever { let func = into_retriever(retriever)?; - options = options.retriever(Arc::new(Retriever { func })); + options = options.retriever(Retriever { func }); } let pairs = resources.try_iter()?.map(|item| { @@ -45,7 +43,7 @@ impl Registry { let pairs: Result, PyErr> = pairs.collect(); let registry = options - .try_from_resources(pairs?.into_iter()) + .build(pairs?) .map_err(|e| PyValueError::new_err(e.to_string()))?; Ok(Registry { inner: registry }) diff --git a/crates/jsonschema-py/src/retriever.rs b/crates/jsonschema-py/src/retriever.rs index 83ae2981..11850f69 100644 --- a/crates/jsonschema-py/src/retriever.rs +++ b/crates/jsonschema-py/src/retriever.rs @@ -10,7 +10,10 @@ pub(crate) struct Retriever PyResult> { } impl PyResult> Retrieve for Retriever { - fn retrieve(&self, uri: &Uri<&str>) -> Result> { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { Ok((self.func)(uri.as_str())?) } } diff --git a/crates/jsonschema-referencing/Cargo.toml b/crates/jsonschema-referencing/Cargo.toml index 5ad06e31..b6fa54eb 100644 --- a/crates/jsonschema-referencing/Cargo.toml +++ b/crates/jsonschema-referencing/Cargo.toml @@ -17,6 +17,13 @@ parking_lot = "0.12.3" percent-encoding = "2.3.1" serde_json.workspace = true +async-trait = { version = "0.1.86", optional = true } +futures = { version = "0.3.31", optional = true } + +[features] +default = [] +retrieve-async = ["dep:async-trait", "dep:futures"] + [lints] workspace = true @@ -26,6 +33,7 @@ codspeed-criterion-compat = { version = "2.7", default-features = false } criterion = { version = "0.5", default-features = false } referencing_testsuite = { package = "jsonschema-referencing-testsuite", path = "../jsonschema-referencing-testsuite/" } test-case = "3.3.1" +tokio = { version = "1", features = ["macros", "rt"] } [[bench]] harness = false diff --git a/crates/jsonschema-referencing/src/anchors/mod.rs b/crates/jsonschema-referencing/src/anchors/mod.rs index f9d63a8c..710a5a2e 100644 --- a/crates/jsonschema-referencing/src/anchors/mod.rs +++ b/crates/jsonschema-referencing/src/anchors/mod.rs @@ -243,14 +243,11 @@ mod tests { }, })); - let registry = Registry::try_from_resources( - [ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ] - .into_iter(), - ) + let registry = Registry::try_from_resources([ + ("http://example.com".to_string(), root.clone()), + ("http://example.com/foo/".to_string(), true_resource), + ("http://example.com/foo/bar".to_string(), root.clone()), + ]) .expect("Invalid resources"); let resolver = registry .try_resolver("http://example.com") @@ -287,14 +284,11 @@ mod tests { }, })); - let registry = Registry::try_from_resources( - [ - ("http://example.com".to_string(), two.clone()), - ("http://example.com/foo/".to_string(), one), - ("http://example.com/foo/bar".to_string(), two.clone()), - ] - .into_iter(), - ) + let registry = Registry::try_from_resources([ + ("http://example.com".to_string(), two.clone()), + ("http://example.com/foo/".to_string(), one), + ("http://example.com/foo/bar".to_string(), two.clone()), + ]) .expect("Invalid resources"); let resolver = registry .try_resolver("http://example.com") @@ -397,14 +391,11 @@ mod tests { }, })); - let registry = Registry::try_from_resources( - vec![ - ("http://example.com".to_string(), root.clone()), - ("http://example.com/foo/".to_string(), true_resource), - ("http://example.com/foo/bar".to_string(), root.clone()), - ] - .into_iter(), - ) + let registry = Registry::try_from_resources(vec![ + ("http://example.com".to_string(), root.clone()), + ("http://example.com/foo/".to_string(), true_resource), + ("http://example.com/foo/bar".to_string(), root.clone()), + ]) .expect("Invalid resources"); let resolver = registry @@ -442,14 +433,11 @@ mod tests { })); let three = Draft::Draft201909.create_resource(json!({"$recursiveAnchor": false})); - let registry = Registry::try_from_resources( - vec![ - ("http://example.com".to_string(), three), - ("http://example.com/foo/".to_string(), two.clone()), - ("http://example.com/foo/bar".to_string(), one), - ] - .into_iter(), - ) + let registry = Registry::try_from_resources(vec![ + ("http://example.com".to_string(), three), + ("http://example.com/foo/".to_string(), two.clone()), + ("http://example.com/foo/bar".to_string(), one), + ]) .expect("Invalid resources"); let resolver = registry diff --git a/crates/jsonschema-referencing/src/error.rs b/crates/jsonschema-referencing/src/error.rs index 1ac38dfa..2364abc7 100644 --- a/crates/jsonschema-referencing/src/error.rs +++ b/crates/jsonschema-referencing/src/error.rs @@ -153,6 +153,7 @@ impl std::error::Error for Error { } } +/// Errors that can occur during URI handling. #[derive(Debug)] pub enum UriError { Parse { diff --git a/crates/jsonschema-referencing/src/lib.rs b/crates/jsonschema-referencing/src/lib.rs index c909d95c..2ef2b525 100644 --- a/crates/jsonschema-referencing/src/lib.rs +++ b/crates/jsonschema-referencing/src/lib.rs @@ -27,3 +27,6 @@ pub use retriever::{DefaultRetriever, Retrieve}; pub(crate) use segments::Segments; pub use specification::Draft; pub use vocabularies::{Vocabulary, VocabularySet}; + +#[cfg(feature = "retrieve-async")] +pub use retriever::AsyncRetrieve; diff --git a/crates/jsonschema-referencing/src/meta.rs b/crates/jsonschema-referencing/src/meta.rs index c2bb5ff0..5d309fa7 100644 --- a/crates/jsonschema-referencing/src/meta.rs +++ b/crates/jsonschema-referencing/src/meta.rs @@ -1,3 +1,6 @@ +//! Built-in JSON Schema meta-schemas. +//! +//! This module provides access to the official JSON Schema meta-schemas for different draft versions. use std::sync::Arc; use once_cell::sync::Lazy; diff --git a/crates/jsonschema-referencing/src/registry.rs b/crates/jsonschema-referencing/src/registry.rs index b2aab37a..ad1974a3 100644 --- a/crates/jsonschema-referencing/src/registry.rs +++ b/crates/jsonschema-referencing/src/registry.rs @@ -63,6 +63,95 @@ pub static SPECIFICATIONS: Lazy = Lazy::new(|| { /// They eagerly process all added resources, including their subresources and anchors. /// This means that subresources contained within any added resources are immediately /// discoverable and retrievable via their own IDs. +/// +/// # Resource Retrieval +/// +/// Registry supports both blocking and non-blocking retrieval of external resources. +/// +/// ## Blocking Retrieval +/// +/// ```rust +/// use referencing::{Registry, Resource, Retrieve, Uri}; +/// use serde_json::{json, Value}; +/// +/// struct ExampleRetriever; +/// +/// impl Retrieve for ExampleRetriever { +/// fn retrieve( +/// &self, +/// uri: &Uri +/// ) -> Result> { +/// // Always return the same value for brevity +/// Ok(json!({"type": "string"})) +/// } +/// } +/// +/// # fn example() -> Result<(), Box> { +/// let registry = Registry::options() +/// .retriever(ExampleRetriever) +/// .build([ +/// // Initial schema that might reference external schemas +/// ( +/// "https://example.com/user.json", +/// Resource::from_contents(json!({ +/// "type": "object", +/// "properties": { +/// // Should be retrieved by `ExampleRetriever` +/// "role": {"$ref": "https://example.com/role.json"} +/// } +/// }))? +/// ) +/// ])?; +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Non-blocking Retrieval +/// +/// ```rust +/// # #[cfg(feature = "retrieve-async")] +/// # mod example { +/// use referencing::{Registry, Resource, AsyncRetrieve, Uri}; +/// use serde_json::{json, Value}; +/// +/// struct ExampleRetriever; +/// +/// #[async_trait::async_trait] +/// impl AsyncRetrieve for ExampleRetriever { +/// async fn retrieve( +/// &self, +/// uri: &Uri +/// ) -> Result> { +/// // Always return the same value for brevity +/// Ok(json!({"type": "string"})) +/// } +/// } +/// +/// # async fn example() -> Result<(), Box> { +/// let registry = Registry::options() +/// .async_retriever(ExampleRetriever) +/// .build([ +/// ( +/// "https://example.com/user.json", +/// Resource::from_contents(json!({ +/// // Should be retrieved by `ExampleRetriever` +/// "$ref": "https://example.com/common/user.json" +/// }))? +/// ) +/// ]) +/// .await?; +/// # Ok(()) +/// # } +/// # } +/// ``` +/// +/// The registry will automatically: +/// +/// - Resolve external references +/// - Cache retrieved schemas +/// - Handle nested references +/// - Process JSON Schema anchors +/// #[derive(Debug)] pub struct Registry { documents: DocumentStore, @@ -83,12 +172,21 @@ impl Clone for Registry { } /// Configuration options for creating a [`Registry`]. -pub struct RegistryOptions { - retriever: Arc, +pub struct RegistryOptions { + retriever: R, draft: Draft, } -impl RegistryOptions { +impl RegistryOptions { + /// Set specification version under which the resources should be interpreted under. + #[must_use] + pub fn draft(mut self, draft: Draft) -> Self { + self.draft = draft; + self + } +} + +impl RegistryOptions> { /// Create a new [`RegistryOptions`] with default settings. #[must_use] pub fn new() -> Self { @@ -99,38 +197,90 @@ impl RegistryOptions { } /// Set a custom retriever for the [`Registry`]. #[must_use] - pub fn retriever(mut self, retriever: Arc) -> Self { - self.retriever = retriever; + pub fn retriever(mut self, retriever: impl IntoRetriever) -> Self { + self.retriever = retriever.into_retriever(); self } - /// Set specification version under which the resources should be interpreted under. + /// Set a custom async retriever for the [`Registry`]. + #[cfg(feature = "retrieve-async")] #[must_use] - pub fn draft(mut self, draft: Draft) -> Self { - self.draft = draft; - self + pub fn async_retriever( + self, + retriever: impl IntoAsyncRetriever, + ) -> RegistryOptions> { + RegistryOptions { + retriever: retriever.into_retriever(), + draft: self.draft, + } } - /// Create a [`Registry`] with a single resource using these options. + /// Create a [`Registry`] from multiple resources using these options. /// /// # Errors /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_new(self, uri: impl AsRef, resource: Resource) -> Result { - Registry::try_new_impl(uri, resource, &*self.retriever, self.draft) + /// Returns an error if: + /// - Any URI is invalid + /// - Any referenced resources cannot be retrieved + pub fn build( + self, + pairs: impl IntoIterator, Resource)>, + ) -> Result { + Registry::try_from_resources_impl(pairs, &*self.retriever, self.draft) } - /// Create a [`Registry`] from multiple resources using these options. +} + +#[cfg(feature = "retrieve-async")] +impl RegistryOptions> { + /// Create a [`Registry`] from multiple resources using these options with async retrieval. /// /// # Errors /// - /// Returns an error if any URI is invalid or if there's an issue processing the resources. - pub fn try_from_resources( + /// Returns an error if: + /// - Any URI is invalid + /// - Any referenced resources cannot be retrieved + pub async fn build( self, - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, ) -> Result { - Registry::try_from_resources_impl(pairs, &*self.retriever, self.draft) + Registry::try_from_resources_async_impl(pairs, &*self.retriever, self.draft).await + } +} + +pub trait IntoRetriever { + fn into_retriever(self) -> Arc; +} + +impl IntoRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +impl IntoRetriever for Arc { + fn into_retriever(self) -> Arc { + self + } +} + +#[cfg(feature = "retrieve-async")] +pub trait IntoAsyncRetriever { + fn into_retriever(self) -> Arc; +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for T { + fn into_retriever(self) -> Arc { + Arc::new(self) + } +} + +#[cfg(feature = "retrieve-async")] +impl IntoAsyncRetriever for Arc { + fn into_retriever(self) -> Arc { + self } } -impl Default for RegistryOptions { +impl Default for RegistryOptions> { fn default() -> Self { Self::new() } @@ -139,7 +289,7 @@ impl Default for RegistryOptions { impl Registry { /// Get [`RegistryOptions`] for configuring a new [`Registry`]. #[must_use] - pub fn options() -> RegistryOptions { + pub fn options() -> RegistryOptions> { RegistryOptions::new() } /// Create a new [`Registry`] with a single resource. @@ -165,7 +315,7 @@ impl Registry { /// /// Returns an error if any URI is invalid or if there's an issue processing the resources. pub fn try_from_resources( - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, ) -> Result { Self::try_from_resources_impl(pairs, &DefaultRetriever, Draft::default()) } @@ -175,10 +325,10 @@ impl Registry { retriever: &dyn Retrieve, draft: Draft, ) -> Result { - Self::try_from_resources_impl([(uri, resource)].into_iter(), retriever, draft) + Self::try_from_resources_impl([(uri, resource)], retriever, draft) } fn try_from_resources_impl( - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, retriever: &dyn Retrieve, draft: Draft, ) -> Result { @@ -202,32 +352,56 @@ impl Registry { resolution_cache: resolution_cache.into_shared(), }) } - /// Create a new registry with a new resource. + /// Create a new [`Registry`] from an iterator of (URI, Resource) pairs using an async retriever. + /// + /// # Arguments + /// + /// * `pairs` - An iterator of (URI, Resource) pairs. /// /// # Errors /// - /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_with_resource( - self, - uri: impl AsRef, - resource: Resource, - ) -> Result { - let draft = resource.draft(); - self.try_with_resources([(uri, resource)].into_iter(), draft) + /// Returns an error if any URI is invalid or if there's an issue processing the resources. + #[cfg(feature = "retrieve-async")] + async fn try_from_resources_async_impl( + pairs: impl IntoIterator, Resource)>, + retriever: &dyn crate::AsyncRetrieve, + draft: Draft, + ) -> Result { + let mut documents = AHashMap::new(); + let mut resources = ResourceMap::new(); + let mut anchors = AHashMap::new(); + let mut resolution_cache = UriCache::new(); + + process_resources_async( + pairs, + retriever, + &mut documents, + &mut resources, + &mut anchors, + &mut resolution_cache, + draft, + ) + .await?; + + Ok(Registry { + documents, + resources, + anchors, + resolution_cache: resolution_cache.into_shared(), + }) } - /// Create a new registry with a new resource and using the given retriever. + /// Create a new registry with a new resource. /// /// # Errors /// /// Returns an error if the URI is invalid or if there's an issue processing the resource. - pub fn try_with_resource_and_retriever( + pub fn try_with_resource( self, uri: impl AsRef, resource: Resource, - retriever: &dyn Retrieve, ) -> Result { let draft = resource.draft(); - self.try_with_resources_and_retriever([(uri, resource)].into_iter(), retriever, draft) + self.try_with_resources([(uri, resource)], draft) } /// Create a new registry with new resources. /// @@ -236,7 +410,7 @@ impl Registry { /// Returns an error if any URI is invalid or if there's an issue processing the resources. pub fn try_with_resources( self, - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, draft: Draft, ) -> Result { self.try_with_resources_and_retriever(pairs, &DefaultRetriever, draft) @@ -248,7 +422,7 @@ impl Registry { /// Returns an error if any URI is invalid or if there's an issue processing the resources. pub fn try_with_resources_and_retriever( self, - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, retriever: &dyn Retrieve, draft: Draft, ) -> Result { @@ -272,6 +446,39 @@ impl Registry { resolution_cache: resolution_cache.into_shared(), }) } + /// Create a new registry with new resources and using the given non-blocking retriever. + /// + /// # Errors + /// + /// Returns an error if any URI is invalid or if there's an issue processing the resources. + #[cfg(feature = "retrieve-async")] + pub async fn try_with_resources_and_retriever_async( + self, + pairs: impl IntoIterator, Resource)>, + retriever: &dyn crate::AsyncRetrieve, + draft: Draft, + ) -> Result { + let mut documents = self.documents; + let mut resources = self.resources; + let mut anchors = self.anchors; + let mut resolution_cache = self.resolution_cache.into_local(); + process_resources_async( + pairs, + retriever, + &mut documents, + &mut resources, + &mut anchors, + &mut resolution_cache, + draft, + ) + .await?; + Ok(Registry { + documents, + resources, + anchors, + resolution_cache: resolution_cache.into_shared(), + }) + } /// Create a new [`Resolver`] for this registry with the given base URI. /// /// # Errors @@ -343,7 +550,7 @@ impl Registry { } fn process_meta_schemas( - pairs: impl Iterator, Resource)>, + pairs: impl IntoIterator, Resource)>, documents: &mut DocumentStore, resources: &mut ResourceMap, anchors: &mut AHashMap, @@ -384,120 +591,243 @@ fn process_meta_schemas( Ok(()) } -fn process_resources( - pairs: impl Iterator, Resource)>, - retriever: &dyn Retrieve, +struct ProcessingState { + queue: VecDeque<(Arc>, InnerResourcePtr)>, + seen: HashSet, + external: AHashSet>, + scratch: String, + refers_metaschemas: bool, +} + +impl ProcessingState { + fn new() -> Self { + Self { + queue: VecDeque::with_capacity(32), + seen: HashSet::with_hasher(BuildNoHashHasher::default()), + external: AHashSet::new(), + scratch: String::new(), + refers_metaschemas: false, + } + } +} + +fn process_input_resources( + pairs: impl IntoIterator, Resource)>, documents: &mut DocumentStore, resources: &mut ResourceMap, - anchors: &mut AHashMap, - resolution_cache: &mut UriCache, - default_draft: Draft, + state: &mut ProcessingState, ) -> Result<(), Error> { - let mut queue = VecDeque::with_capacity(32); - let mut seen = HashSet::with_hasher(BuildNoHashHasher::default()); - let mut external = AHashSet::new(); - let mut scratch = String::new(); - let mut refers_metaschemas = false; - for (uri, resource) in pairs { let uri = uri::from_str(uri.as_ref().trim_end_matches('#'))?; let key = Arc::new(uri); match documents.entry(Arc::clone(&key)) { - Entry::Occupied(_) => { - // SAFETY: Do not remove any existing documents so that all pointers are valid - // The registry does not allow overriding existing resources right now - } + Entry::Occupied(_) => {} Entry::Vacant(entry) => { let (draft, contents) = resource.into_inner(); let boxed = Arc::pin(contents); let contents = std::ptr::addr_of!(*boxed); let resource = InnerResourcePtr::new(contents, draft); resources.insert(Arc::clone(&key), resource.clone()); - queue.push_back((key, resource)); + state.queue.push_back((key, resource)); entry.insert(boxed); } } } + Ok(()) +} - loop { - if queue.is_empty() && external.is_empty() { - break; +fn process_queue( + state: &mut ProcessingState, + resources: &mut ResourceMap, + anchors: &mut AHashMap, + resolution_cache: &mut UriCache, +) -> Result<(), Error> { + while let Some((mut base, resource)) = state.queue.pop_front() { + if let Some(id) = resource.id() { + base = resolution_cache.resolve_against(&base.borrow(), id)?; + resources.insert(base.clone(), resource.clone()); } - // Process current queue and collect references to external resources - while let Some((mut base, resource)) = queue.pop_front() { - if let Some(id) = resource.id() { - base = resolution_cache.resolve_against(&base.borrow(), id)?; - resources.insert(base.clone(), resource.clone()); - } + for anchor in resource.anchors() { + anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); + } - // Look for anchors - for anchor in resource.anchors() { - anchors.insert(AnchorKey::new(base.clone(), anchor.name()), anchor); - } + collect_external_resources( + &base, + resource.contents(), + &mut state.external, + &mut state.seen, + resolution_cache, + &mut state.scratch, + &mut state.refers_metaschemas, + )?; - // Collect references to external resources in this resource - collect_external_resources( - &base, - resource.contents(), - &mut external, - &mut seen, - resolution_cache, - &mut scratch, - &mut refers_metaschemas, - )?; - - // Process subresources - for contents in resource.draft().subresources_of(resource.contents()) { - let subresource = InnerResourcePtr::new(contents, resource.draft()); - queue.push_back((base.clone(), subresource)); - } + for contents in resource.draft().subresources_of(resource.contents()) { + let subresource = InnerResourcePtr::new(contents, resource.draft()); + state.queue.push_back((base.clone(), subresource)); + } + } + Ok(()) +} + +fn handle_fragment( + uri: &Uri, + resource: &InnerResourcePtr, + key: &Arc>, + default_draft: Draft, + queue: &mut VecDeque<(Arc>, InnerResourcePtr)>, +) -> Result<(), Error> { + if let Some(fragment) = uri.fragment() { + if let Some(resolved) = pointer(resource.contents(), fragment.as_str()) { + let draft = default_draft.detect(resolved)?; + let contents = std::ptr::addr_of!(*resolved); + let resource = InnerResourcePtr::new(contents, draft); + queue.push_back((Arc::clone(key), resource)); + } + } + Ok(()) +} + +fn handle_metaschemas( + refers_metaschemas: bool, + resources: &mut ResourceMap, + anchors: &mut AHashMap, +) { + if refers_metaschemas { + resources.reserve(SPECIFICATIONS.resources.len()); + for (key, resource) in &SPECIFICATIONS.resources { + resources.insert(Arc::clone(key), resource.clone()); + } + anchors.reserve(SPECIFICATIONS.anchors.len()); + for (key, anchor) in &SPECIFICATIONS.anchors { + anchors.insert(key.clone(), anchor.clone()); + } + } +} + +fn create_resource( + retrieved: Value, + fragmentless: Uri, + default_draft: Draft, + documents: &mut DocumentStore, + resources: &mut ResourceMap, +) -> Result<(Arc>, InnerResourcePtr), Error> { + let draft = default_draft.detect(&retrieved)?; + let boxed = Arc::pin(retrieved); + let contents = std::ptr::addr_of!(*boxed); + let resource = InnerResourcePtr::new(contents, draft); + let key = Arc::new(fragmentless); + documents.insert(Arc::clone(&key), boxed); + resources.insert(Arc::clone(&key), resource.clone()); + Ok((key, resource)) +} + +fn process_resources( + pairs: impl IntoIterator, Resource)>, + retriever: &dyn Retrieve, + documents: &mut DocumentStore, + resources: &mut ResourceMap, + anchors: &mut AHashMap, + resolution_cache: &mut UriCache, + default_draft: Draft, +) -> Result<(), Error> { + let mut state = ProcessingState::new(); + process_input_resources(pairs, documents, resources, &mut state)?; + + loop { + if state.queue.is_empty() && state.external.is_empty() { + break; } + + process_queue(&mut state, resources, anchors, resolution_cache)?; + // Retrieve external resources - for uri in external.drain() { + for uri in state.external.drain() { let mut fragmentless = uri.clone(); fragmentless.set_fragment(None); if !resources.contains_key(&fragmentless) { let retrieved = retriever - .retrieve(&fragmentless.borrow()) + .retrieve(&fragmentless) .map_err(|err| Error::unretrievable(fragmentless.as_str(), err))?; - let draft = default_draft.detect(&retrieved)?; - let boxed = Arc::pin(retrieved); - let contents = std::ptr::addr_of!(*boxed); - let resource = InnerResourcePtr::new(contents, draft); - let key = Arc::new(fragmentless); - documents.insert(Arc::clone(&key), boxed); - resources.insert(Arc::clone(&key), resource.clone()); + let (key, resource) = + create_resource(retrieved, fragmentless, default_draft, documents, resources)?; - if let Some(fragment) = uri.fragment() { - // The original `$ref` could have a fragment that points to a place that won't - // be discovered via the regular sub-resources discovery. Therefore we need to - // explicitly check it - if let Some(resolved) = pointer(resource.contents(), fragment.as_str()) { - let draft = default_draft.detect(resolved)?; - let contents = std::ptr::addr_of!(*resolved); - let resource = InnerResourcePtr::new(contents, draft); - queue.push_back((Arc::clone(&key), resource)); - } - } + handle_fragment(&uri, &resource, &key, default_draft, &mut state.queue)?; - queue.push_back((key, resource)); + state.queue.push_back((key, resource)); } } } - if refers_metaschemas { - resources.reserve(SPECIFICATIONS.resources.len()); - for (key, resource) in &SPECIFICATIONS.resources { - resources.insert(Arc::clone(key), resource.clone()); + handle_metaschemas(state.refers_metaschemas, resources, anchors); + + Ok(()) +} + +#[cfg(feature = "retrieve-async")] +async fn process_resources_async( + pairs: impl IntoIterator, Resource)>, + retriever: &dyn crate::AsyncRetrieve, + documents: &mut DocumentStore, + resources: &mut ResourceMap, + anchors: &mut AHashMap, + resolution_cache: &mut UriCache, + default_draft: Draft, +) -> Result<(), Error> { + let mut state = ProcessingState::new(); + process_input_resources(pairs, documents, resources, &mut state)?; + + loop { + if state.queue.is_empty() && state.external.is_empty() { + break; } - anchors.reserve(SPECIFICATIONS.anchors.len()); - for (key, anchor) in &SPECIFICATIONS.anchors { - anchors.insert(key.clone(), anchor.clone()); + + process_queue(&mut state, resources, anchors, resolution_cache)?; + + if !state.external.is_empty() { + let data = state + .external + .drain() + .filter_map(|uri| { + let mut fragmentless = uri.clone(); + fragmentless.set_fragment(None); + if resources.contains_key(&fragmentless) { + None + } else { + Some((uri, fragmentless)) + } + }) + .collect::>(); + + let results = { + let futures = data + .iter() + .map(|(_, fragmentless)| retriever.retrieve(fragmentless)); + futures::future::join_all(futures).await + }; + + for ((uri, fragmentless), result) in data.iter().zip(results) { + let retrieved = + result.map_err(|err| Error::unretrievable(fragmentless.as_str(), err))?; + + let (key, resource) = create_resource( + retrieved, + fragmentless.clone(), + default_draft, + documents, + resources, + )?; + + handle_fragment(uri, &resource, &key, default_draft, &mut state.queue)?; + + state.queue.push_back((key, resource)); + } } } + handle_metaschemas(state.refers_metaschemas, resources, anchors); + Ok(()) } @@ -637,7 +967,7 @@ fn parse_index(s: &str) -> Option { #[cfg(test)] mod tests { - use std::{error::Error as _, sync::Arc}; + use std::error::Error as _; use ahash::AHashMap; use fluent_uri::Uri; @@ -701,7 +1031,7 @@ mod tests { impl Retrieve for TestRetriever { fn retrieve( &self, - uri: &Uri<&str>, + uri: &Uri, ) -> Result> { if let Some(value) = self.schemas.get(uri.as_str()) { Ok(value.clone()) @@ -909,8 +1239,8 @@ mod tests { }); let registry = Registry::options() - .retriever(Arc::new(retriever)) - .try_from_resources(input_pairs) + .retriever(retriever) + .build(input_pairs) .expect("Invalid resources"); // Verify that all expected URIs are resolved and present in resources for uri in test_case.expected_resolved_uris { @@ -921,14 +1251,11 @@ mod tests { #[test] fn test_default_retriever_with_remote_refs() { - let result = Registry::try_from_resources( - [( - "http://example.com/schema1", - Resource::from_contents(json!({"$ref": "http://example.com/schema2"})) - .expect("Invalid resource"), - )] - .into_iter(), - ); + let result = Registry::try_from_resources([( + "http://example.com/schema1", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})) + .expect("Invalid resource"), + )]); let error = result.expect_err("Should fail"); assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); assert!(error.source().is_some()); @@ -937,7 +1264,10 @@ mod tests { #[test] fn test_options() { let _registry = RegistryOptions::default() - .try_new("", Draft::default().create_resource(json!({}))) + .build([( + "", + Resource::from_contents(json!({})).expect("Invalid resource"), + )]) .expect("Invalid resources"); } @@ -1038,18 +1368,145 @@ mod tests { } #[test] - fn test_try_with_resource_and_retriever() { - let retriever = - create_test_retriever(&[("http://example.com/schema2", json!({"type": "object"}))]); - let registry = SPECIFICATIONS - .clone() - .try_with_resource_and_retriever( - "http://example.com", + fn test_invalid_reference() { + // Found via fuzzing + let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); + let _ = Registry::try_new("http://#/", resource); + } +} + +#[cfg(all(test, feature = "retrieve-async"))] +mod async_tests { + use crate::{uri, DefaultRetriever, Draft, Registry, Resource, Uri}; + use ahash::AHashMap; + use serde_json::{json, Value}; + use std::error::Error; + + struct TestAsyncRetriever { + schemas: AHashMap, + } + + impl TestAsyncRetriever { + fn with_schema(uri: impl Into, schema: Value) -> Self { + TestAsyncRetriever { + schemas: { AHashMap::from_iter([(uri.into(), schema)]) }, + } + } + } + + #[async_trait::async_trait] + impl crate::AsyncRetrieve for TestAsyncRetriever { + async fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + self.schemas + .get(uri.as_str()) + .cloned() + .ok_or_else(|| "Schema not found".into()) + } + } + + #[tokio::test] + async fn test_default_async_retriever_with_remote_refs() { + let result = Registry::options() + .async_retriever(DefaultRetriever) + .build([( + "http://example.com/schema1", Resource::from_contents(json!({"$ref": "http://example.com/schema2"})) .expect("Invalid resource"), - &retriever, + )]) + .await; + + let error = result.expect_err("Should fail"); + assert_eq!(error.to_string(), "Resource 'http://example.com/schema2' is not present in a registry and retrieving it failed: Default retriever does not fetch resources"); + assert!(error.source().is_some()); + } + + #[tokio::test] + async fn test_async_options() { + let _registry = Registry::options() + .async_retriever(DefaultRetriever) + .build([("", Draft::default().create_resource(json!({})))]) + .await + .expect("Invalid resources"); + } + + #[tokio::test] + async fn test_async_registry_with_duplicate_input_uris() { + let input_resources = vec![ + ( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "foo": { "type": "string" } + } + }), + ), + ( + "http://example.com/schema", + json!({ + "type": "object", + "properties": { + "bar": { "type": "number" } + } + }), + ), + ]; + + let result = Registry::options() + .async_retriever(DefaultRetriever) + .build( + input_resources + .into_iter() + .map(|(uri, value)| (uri, Draft::Draft202012.create_resource(value))), ) + .await; + + assert!( + result.is_ok(), + "Failed to create registry with duplicate input URIs" + ); + let registry = result.unwrap(); + + let resource = registry + .resources + .get(&uri::from_str("http://example.com/schema").expect("Invalid URI")) + .unwrap(); + let properties = resource + .contents() + .get("properties") + .and_then(|v| v.as_object()) + .unwrap(); + + assert!( + !properties.contains_key("bar"), + "Registry should contain the earliest added schema" + ); + assert!( + properties.contains_key("foo"), + "Registry should contain the overwritten schema" + ); + } + + #[tokio::test] + async fn test_async_try_with_resource() { + let retriever = TestAsyncRetriever::with_schema( + "http://example.com/schema2", + json!({"type": "object"}), + ); + + let registry = Registry::options() + .async_retriever(retriever) + .build([( + "http://example.com", + Resource::from_contents(json!({"$ref": "http://example.com/schema2"})) + .expect("Invalid resource"), + )]) + .await .expect("Invalid resource"); + let resolver = registry.try_resolver("").expect("Invalid base URI"); let resolved = resolver .lookup("http://example.com/schema2") @@ -1057,10 +1514,100 @@ mod tests { assert_eq!(resolved.contents(), &json!({"type": "object"})); } - #[test] - fn test_invalid_reference() { - // Found via fuzzing - let resource = Draft::Draft202012.create_resource(json!({"$schema": "$##"})); - let _ = Registry::try_new("http://#/", resource); + #[tokio::test] + async fn test_async_registry_with_multiple_refs() { + let retriever = TestAsyncRetriever { + schemas: AHashMap::from_iter([ + ( + "http://example.com/schema2".to_string(), + json!({"type": "object"}), + ), + ( + "http://example.com/schema3".to_string(), + json!({"type": "string"}), + ), + ]), + }; + + let registry = Registry::options() + .async_retriever(retriever) + .build([( + "http://example.com/schema1", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "obj": {"$ref": "http://example.com/schema2"}, + "str": {"$ref": "http://example.com/schema3"} + } + })) + .expect("Invalid resource"), + )]) + .await + .expect("Invalid resource"); + + let resolver = registry.try_resolver("").expect("Invalid base URI"); + + // Check both references are resolved correctly + let resolved2 = resolver + .lookup("http://example.com/schema2") + .expect("Lookup failed"); + assert_eq!(resolved2.contents(), &json!({"type": "object"})); + + let resolved3 = resolver + .lookup("http://example.com/schema3") + .expect("Lookup failed"); + assert_eq!(resolved3.contents(), &json!({"type": "string"})); + } + + #[tokio::test] + async fn test_async_registry_with_nested_refs() { + let retriever = TestAsyncRetriever { + schemas: AHashMap::from_iter([ + ( + "http://example.com/address".to_string(), + json!({ + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"$ref": "http://example.com/city"} + } + }), + ), + ( + "http://example.com/city".to_string(), + json!({ + "type": "string", + "minLength": 1 + }), + ), + ]), + }; + + let registry = Registry::options() + .async_retriever(retriever) + .build([( + "http://example.com/person", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "address": {"$ref": "http://example.com/address"} + } + })) + .expect("Invalid resource"), + )]) + .await + .expect("Invalid resource"); + + let resolver = registry.try_resolver("").expect("Invalid base URI"); + + // Verify nested reference resolution + let resolved = resolver + .lookup("http://example.com/city") + .expect("Lookup failed"); + assert_eq!( + resolved.contents(), + &json!({"type": "string", "minLength": 1}) + ); } } diff --git a/crates/jsonschema-referencing/src/retriever.rs b/crates/jsonschema-referencing/src/retriever.rs index d1015fa6..21c9067e 100644 --- a/crates/jsonschema-referencing/src/retriever.rs +++ b/crates/jsonschema-referencing/src/retriever.rs @@ -16,11 +16,16 @@ pub trait Retrieve: Send + Sync { /// /// # Errors /// - /// If the resource couldn't be retrieved or an error occurred. - fn retrieve(&self, uri: &Uri<&str>) -> Result>; + /// This method can fail for various reasons: + /// - Resource not found + /// - Network errors (for remote resources) + /// - Permission errors + fn retrieve( + &self, + uri: &Uri, + ) -> Result>; } -/// A retriever that always fails, used as a default when external resource fetching is not needed. #[derive(Debug, Clone)] struct DefaultRetrieverError; @@ -32,11 +37,46 @@ impl fmt::Display for DefaultRetrieverError { impl std::error::Error for DefaultRetrieverError {} +/// A retriever that always fails, used as a default when external resource fetching is not needed. #[derive(Debug, PartialEq, Eq)] pub struct DefaultRetriever; impl Retrieve for DefaultRetriever { - fn retrieve(&self, _: &Uri<&str>) -> Result> { + fn retrieve(&self, _: &Uri) -> Result> { + Err(Box::new(DefaultRetrieverError)) + } +} + +#[cfg(feature = "retrieve-async")] +#[async_trait::async_trait] +pub trait AsyncRetrieve: Send + Sync { + /// Asynchronously retrieve a resource from the given URI. + /// + /// This is the non-blocking equivalent of [`Retrieve::retrieve`]. + /// + /// # Arguments + /// + /// * `uri` - The URI of the resource to retrieve. + /// + /// # Errors + /// + /// This method can fail for various reasons: + /// - Resource not found + /// - Network errors (for remote resources) + /// - Permission errors + async fn retrieve( + &self, + uri: &Uri, + ) -> Result>; +} + +#[cfg(feature = "retrieve-async")] +#[async_trait::async_trait] +impl AsyncRetrieve for DefaultRetriever { + async fn retrieve( + &self, + _: &Uri, + ) -> Result> { Err(Box::new(DefaultRetrieverError)) } } diff --git a/crates/jsonschema-referencing/src/uri.rs b/crates/jsonschema-referencing/src/uri.rs index 34c8574f..f641654e 100644 --- a/crates/jsonschema-referencing/src/uri.rs +++ b/crates/jsonschema-referencing/src/uri.rs @@ -1,3 +1,4 @@ +//! URI handling utilities for JSON Schema references. use fluent_uri::{ encoding::{encoder::Fragment, EStr, Encoder}, Uri, UriRef, diff --git a/crates/jsonschema/Cargo.toml b/crates/jsonschema/Cargo.toml index 8cb2234b..aadd4e18 100644 --- a/crates/jsonschema/Cargo.toml +++ b/crates/jsonschema/Cargo.toml @@ -16,6 +16,7 @@ default = ["resolve-http", "resolve-file"] resolve-http = ["reqwest"] resolve-file = [] +resolve-async = ["referencing/retrieve-async", "reqwest/default", "dep:async-trait", "dep:tokio"] [dependencies] ahash.workspace = true @@ -41,12 +42,16 @@ serde.workspace = true serde_json.workspace = true uuid-simd = "0.8" +tokio = { version = "1.0", features = ["fs", "rt"], optional = true } +async-trait = { version = "0.1.86", optional = true } + [dev-dependencies] benchmark = { path = "../benchmark/" } codspeed-criterion-compat = { version = "2.7", default-features = false } criterion = { version = "0.5", default-features = false } testsuite = { package = "jsonschema-testsuite", path = "../jsonschema-testsuite" } test-case = "3" +tokio = { version = "1", features = ["macros", "rt"] } [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] mockito = "1.5" diff --git a/crates/jsonschema/src/compiler.rs b/crates/jsonschema/src/compiler.rs index de0799ac..3fddfa4b 100644 --- a/crates/jsonschema/src/compiler.rs +++ b/crates/jsonschema/src/compiler.rs @@ -252,12 +252,8 @@ pub(crate) fn build_validator( let base_uri = resource_ref.id().unwrap_or(DEFAULT_ROOT_URL); // Build a registry & resolver needed for validator compilation - let pairs = once((Cow::Borrowed(base_uri), resource)).chain( - config - .resources - .drain() - .map(|(uri, resource)| (Cow::Owned(uri), resource)), - ); + let pairs = collect_resource_pairs(base_uri, resource, &mut config.resources); + let registry = if let Some(registry) = config.registry.take() { Arc::new(registry.try_with_resources_and_retriever(pairs, &*config.retriever, draft)?) } else { @@ -265,7 +261,7 @@ pub(crate) fn build_validator( Registry::options() .draft(draft) .retriever(Arc::clone(&config.retriever)) - .try_from_resources(pairs)?, + .build(pairs)?, ) }; let vocabularies = registry.find_vocabularies(draft, schema); @@ -283,20 +279,7 @@ pub(crate) fn build_validator( // Validate the schema itself if config.validate_schema { - if let Err(error) = { - match draft { - Draft::Draft4 => &crate::draft4::meta::VALIDATOR, - Draft::Draft6 => &crate::draft6::meta::VALIDATOR, - Draft::Draft7 => &crate::draft7::meta::VALIDATOR, - Draft::Draft201909 => &crate::draft201909::meta::VALIDATOR, - Draft::Draft202012 => &crate::draft202012::meta::VALIDATOR, - _ => unreachable!("Unknown draft"), - } - } - .validate(schema) - { - return Err(error.to_owned()); - } + validate_schema(draft, schema)?; } // Finally, compile the validator @@ -304,6 +287,86 @@ pub(crate) fn build_validator( Ok(Validator { root, config }) } +#[cfg(feature = "resolve-async")] +pub(crate) async fn build_validator_async( + mut config: ValidationOptions>, + schema: &Value, +) -> Result> { + let draft = config.draft_for(schema).await?; + let resource_ref = draft.create_resource_ref(schema); + let resource = draft.create_resource(schema.clone()); + let base_uri = resource_ref.id().unwrap_or(DEFAULT_ROOT_URL); + + let pairs = collect_resource_pairs(base_uri, resource, &mut config.resources); + + let registry = if let Some(registry) = config.registry.take() { + Arc::new( + registry + .try_with_resources_and_retriever_async(pairs, &*config.retriever, draft) + .await?, + ) + } else { + Arc::new( + Registry::options() + .async_retriever(Arc::clone(&config.retriever)) + .draft(draft) + .build(pairs) + .await?, + ) + }; + + let vocabularies = registry.find_vocabularies(draft, schema); + let resolver = Rc::new(registry.try_resolver(base_uri)?); + // HACK: As we store the config and it has a type parameter we need to apply a small hack here. + // `ValidationOptions` struct has a default type parameter as `Arc` and to + // avoid propagating types everywhere in `Context`, it is easier to just replace the + // retriever to one that implements `Retrieve`, as it is not used anymore anyway. + // In the future it might be better to avoid storing the context anyway. + let config = Arc::new(config.with_blocking_retriever(crate::retriever::DefaultRetriever)); + let ctx = Context::new( + Arc::clone(&config), + Arc::clone(®istry), + resolver, + vocabularies, + draft, + Location::new(), + ); + + if config.validate_schema { + validate_schema(draft, schema)?; + } + + let root = compile(&ctx, resource_ref).map_err(|err| err.to_owned())?; + Ok(Validator { root, config }) +} + +fn collect_resource_pairs<'a>( + base_uri: &'a str, + resource: Resource, + resources: &'a mut AHashMap, +) -> impl IntoIterator, Resource)> { + once((Cow::Borrowed(base_uri), resource)).chain( + resources + .drain() + .map(|(uri, resource)| (Cow::Owned(uri), resource)), + ) +} + +fn validate_schema(draft: Draft, schema: &Value) -> Result<(), ValidationError<'static>> { + let validator = match draft { + Draft::Draft4 => &crate::draft4::meta::VALIDATOR, + Draft::Draft6 => &crate::draft6::meta::VALIDATOR, + Draft::Draft7 => &crate::draft7::meta::VALIDATOR, + Draft::Draft201909 => &crate::draft201909::meta::VALIDATOR, + Draft::Draft202012 => &crate::draft202012::meta::VALIDATOR, + _ => unreachable!("Unknown draft"), + }; + if let Err(error) = validator.validate(schema) { + return Err(error.to_owned()); + } + Ok(()) +} + /// Compile a JSON Schema instance to a tree of nodes. pub(crate) fn compile<'a>( ctx: &Context, diff --git a/crates/jsonschema/src/keywords/ref_.rs b/crates/jsonschema/src/keywords/ref_.rs index 75a99a1f..74a3c777 100644 --- a/crates/jsonschema/src/keywords/ref_.rs +++ b/crates/jsonschema/src/keywords/ref_.rs @@ -454,7 +454,7 @@ mod tests { impl Retrieve for MyRetrieve { fn retrieve( &self, - uri: &Uri<&str>, + uri: &Uri, ) -> Result> { match uri.path().as_str() { "/indirection" => Ok(json!({ @@ -494,7 +494,7 @@ mod tests { impl Retrieve for TestRetrieve { fn retrieve( &self, - uri: &Uri<&str>, + uri: &Uri, ) -> Result> { self.storage .get(uri.path().as_str()) @@ -628,7 +628,7 @@ mod tests { impl Retrieve for NestedRetrieve { fn retrieve( &self, - uri: &Uri<&str>, + uri: &Uri, ) -> Result> { match uri.as_str() { "foo://schema_2.json" => Ok(json!({ @@ -664,7 +664,7 @@ mod tests { impl Retrieve for FragmentRetrieve { fn retrieve( &self, - uri: &Uri<&str>, + uri: &Uri, ) -> Result> { match uri.path().as_str() { "/tmp/schemas/one.json" => Ok(json!({ diff --git a/crates/jsonschema/src/lib.rs b/crates/jsonschema/src/lib.rs index af9b4839..136b8c04 100644 --- a/crates/jsonschema/src/lib.rs +++ b/crates/jsonschema/src/lib.rs @@ -2,7 +2,7 @@ //! //! - 📚 Support for popular JSON Schema drafts //! - 🔧 Custom keywords and format validators -//! - 🌐 Remote reference fetching (network/file) +//! - 🌐 Blocking & non-blocking remote reference fetching (network/file) //! - 🎨 `Basic` output style as per JSON Schema spec //! - ✨ Meta-schema validation for schema documents //! - 🚀 WebAssembly support @@ -20,6 +20,8 @@ //! # Validation //! //! The `jsonschema` crate offers two main approaches to validation: one-off validation and reusable validators. +//! When external references are involved, the validator can be constructed using either blocking or non-blocking I/O. +//! //! //! For simple use cases where you need to validate an instance against a schema once, use [`is_valid`] or [`validate`] functions: //! @@ -34,14 +36,24 @@ //! ``` //! //! For better performance, especially when validating multiple instances against the same schema, build a validator once and reuse it: +//! If your schema contains external references, you can choose between blocking and non-blocking construction: //! //! ```rust //! # fn main() -> Result<(), Box> { //! use serde_json::json; //! //! let schema = json!({"type": "string"}); +//! // Blocking construction - will fetch external references synchronously //! let validator = jsonschema::validator_for(&schema)?; +//! // Non-blocking construction - will fetch external references asynchronously +//! # #[cfg(feature = "resolve-async")] +//! # async fn async_example() -> Result<(), Box> { +//! # let schema = json!({"type": "string"}); +//! let validator = jsonschema::async_validator_for(&schema).await?; +//! # Ok(()) +//! # } //! +//! // Once constructed, validation is always synchronous as it works with in-memory data //! assert!(validator.is_valid(&json!("Hello, world!"))); //! assert!(!validator.is_valid(&json!(42))); //! assert!(validator.validate(&json!(42)).is_err()); @@ -196,6 +208,27 @@ //! # External References //! //! By default, `jsonschema` resolves HTTP references using `reqwest` and file references from the local file system. +//! Both blocking and non-blocking retrieval is supported during validator construction. Note that the validation +//! itself is always synchronous as it operates on in-memory data only. +//! +//! ```rust +//! # async fn example() -> Result<(), Box> { +//! use serde_json::json; +//! +//! let schema = json!({"$schema": "http://json-schema.org/draft-07/schema#", "type": "string"}); +//! +//! // Building a validator with blocking retrieval (default) +//! let validator = jsonschema::validator_for(&schema)?; +//! +//! // Building a validator with non-blocking retrieval (requires `resolve-async` feature) +//! # #[cfg(feature = "resolve-async")] +//! let validator = jsonschema::async_validator_for(&schema).await?; +//! +//! // Validation is always synchronous +//! assert!(validator.is_valid(&json!("Hello"))); +//! # Ok(()) +//! # } +//! ``` //! //! To enable HTTPS support, add the `rustls-tls` feature to `reqwest` in your `Cargo.toml`: //! @@ -207,9 +240,12 @@ //! //! - Disable HTTP resolving: `default-features = false, features = ["resolve-file"]` //! - Disable file resolving: `default-features = false, features = ["resolve-http"]` -//! - Disable both: `default-features = false` +//! - Enable async resolution: `features = ["resolve-async"]` +//! - Disable all resolving: `default-features = false` //! -//! You can implement a custom retriever to handle external references. Here's an example that uses a static map of schemas: +//! ## Custom retrievers +//! +//! You can implement custom retrievers for both blocking and non-blocking retrieval: //! //! ```rust //! # fn main() -> Result<(), Box> { @@ -225,7 +261,7 @@ //! //! fn retrieve( //! &self, -//! uri: &Uri<&str>, +//! uri: &Uri, //! ) -> Result> { //! self.schemas //! .get(uri.as_str()) @@ -268,6 +304,40 @@ //! # Ok(()) //! # } //! ``` +//! +//! And non-blocking version with the `resolve-async` feature enabled: +//! +//! ```rust +//! # #[cfg(feature = "resolve-async")] +//! # async fn example() -> Result<(), Box> { +//! use jsonschema::{AsyncRetrieve, Registry, Resource, Uri}; +//! use serde_json::{Value, json}; +//! +//! struct HttpRetriever; +//! +//! #[async_trait::async_trait] +//! impl AsyncRetrieve for HttpRetriever { +//! async fn retrieve( +//! &self, +//! uri: &Uri, +//! ) -> Result> { +//! reqwest::get(uri.as_str()) +//! .await? +//! .json() +//! .await +//! .map_err(Into::into) +//! } +//! } +//! +//! // Then use it to build a validator +//! let validator = jsonschema::async_options() +//! .with_retriever(HttpRetriever) +//! .build(&json!({"$ref": "https://example.com/user.json"})) +//! .await?; +//! # Ok(()) +//! # } +//! ``` +//! //! # Output Styles //! //! `jsonschema` supports the `basic` output style as defined in JSON Schema Draft 2019-09. @@ -521,6 +591,9 @@ pub use referencing::{ }; pub use validator::Validator; +#[cfg(feature = "resolve-async")] +pub use referencing::AsyncRetrieve; + use serde_json::Value; #[cfg(all( @@ -595,6 +668,35 @@ pub fn validator_for(schema: &Value) -> Result Result<(), Box> { +/// use serde_json::json; +/// +/// let schema = json!({ +/// "type": "object", +/// "properties": { +/// "user": { "$ref": "https://example.com/user.json" } +/// } +/// }); +/// +/// let validator = jsonschema::async_validator_for(&schema).await?; +/// assert!(validator.is_valid(&json!({"user": {"name": "Alice"}}))); +/// # Ok(()) +/// # } +/// ``` +#[cfg(feature = "resolve-async")] +pub async fn async_validator_for(schema: &Value) -> Result> { + Validator::async_new(schema).await +} + /// Create a builder for configuring JSON Schema validation options. /// /// This function returns a [`ValidationOptions`] struct, which allows you to set various @@ -643,6 +745,68 @@ pub fn options() -> ValidationOptions { Validator::options() } +/// Create a builder for configuring JSON Schema validation options. +/// +/// This function returns a [`ValidationOptions`] struct which allows you to set various options for JSON Schema validation. +/// External references will be retrieved using non-blocking I/O. +/// +/// # Examples +/// +/// Basic usage with external references: +/// +/// ```rust +/// # async fn example() -> Result<(), Box> { +/// use serde_json::json; +/// +/// let schema = json!({ +/// "$ref": "https://example.com/user.json" +/// }); +/// +/// let validator = jsonschema::async_options() +/// .build(&schema) +/// .await?; +/// +/// assert!(validator.is_valid(&json!({"name": "Alice"}))); +/// # Ok(()) +/// # } +/// ``` +/// +/// Advanced configuration: +/// +/// ```rust +/// # async fn example() -> Result<(), Box> { +/// use serde_json::{Value, json}; +/// use jsonschema::{Draft, AsyncRetrieve, Uri}; +/// +/// // Custom async retriever +/// struct MyRetriever; +/// +/// #[async_trait::async_trait] +/// impl AsyncRetrieve for MyRetriever { +/// async fn retrieve(&self, uri: &Uri) -> Result> { +/// // Custom retrieval logic +/// Ok(json!({})) +/// } +/// } +/// +/// let schema = json!({ +/// "$ref": "https://example.com/user.json" +/// }); +/// let validator = jsonschema::async_options() +/// .with_draft(Draft::Draft202012) +/// .with_retriever(MyRetriever) +/// .build(&schema) +/// .await?; +/// # Ok(()) +/// # } +/// ``` +/// +/// See [`ValidationOptions`] for all available configuration options. +#[cfg(feature = "resolve-async")] +pub fn async_options() -> ValidationOptions> { + Validator::async_options() +} + /// Functionality for validating JSON Schema documents against their meta-schemas. pub mod meta { use crate::{error::ValidationError, Draft, ReferencingError}; @@ -920,9 +1084,7 @@ pub mod draft4 { /// See [`ValidationOptions`] for all available configuration options. #[must_use] pub fn options() -> ValidationOptions { - let mut options = crate::options(); - options.with_draft(Draft::Draft4); - options + crate::options().with_draft(Draft::Draft4) } /// Functionality for validating JSON Schema Draft 4 documents. @@ -1077,9 +1239,7 @@ pub mod draft6 { /// See [`ValidationOptions`] for all available configuration options. #[must_use] pub fn options() -> ValidationOptions { - let mut options = crate::options(); - options.with_draft(Draft::Draft6); - options + crate::options().with_draft(Draft::Draft6) } /// Functionality for validating JSON Schema Draft 6 documents. @@ -1234,9 +1394,7 @@ pub mod draft7 { /// See [`ValidationOptions`] for all available configuration options. #[must_use] pub fn options() -> ValidationOptions { - let mut options = crate::options(); - options.with_draft(Draft::Draft7); - options + crate::options().with_draft(Draft::Draft7) } /// Functionality for validating JSON Schema Draft 7 documents. @@ -1391,9 +1549,7 @@ pub mod draft201909 { /// See [`ValidationOptions`] for all available configuration options. #[must_use] pub fn options() -> ValidationOptions { - let mut options = crate::options(); - options.with_draft(Draft::Draft201909); - options + crate::options().with_draft(Draft::Draft201909) } /// Functionality for validating JSON Schema Draft 2019-09 documents. @@ -1550,9 +1706,7 @@ pub mod draft202012 { /// See [`ValidationOptions`] for all available configuration options. #[must_use] pub fn options() -> ValidationOptions { - let mut options = crate::options(); - options.with_draft(Draft::Draft202012); - options + crate::options().with_draft(Draft::Draft202012) } /// Functionality for validating JSON Schema Draft 2020-12 documents. @@ -1969,13 +2123,13 @@ mod tests { #[test] fn test_invalid_schema_keyword() { let schema = json!({ - // Note `https`, not `http` - "$schema": "https://json-schema.org/draft-07/schema", + // Note `htt`, not `http` + "$schema": "htt://json-schema.org/draft-07/schema", }); let error = crate::validator_for(&schema).expect_err("Should fail"); assert_eq!( error.to_string(), - "Unknown specification: https://json-schema.org/draft-07/schema" + "Unknown specification: htt://json-schema.org/draft-07/schema" ); } @@ -2007,3 +2161,201 @@ mod tests { let _ = foo(); } } + +#[cfg(all(test, feature = "resolve-async"))] +mod async_tests { + use referencing::Resource; + use std::collections::HashMap; + + use serde_json::json; + + use crate::{AsyncRetrieve, Draft, Uri}; + + /// Mock async retriever for testing + struct TestRetriever { + schemas: HashMap, + } + + impl TestRetriever { + fn new() -> Self { + let mut schemas = HashMap::new(); + schemas.insert( + "https://example.com/user.json".to_string(), + json!({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0} + }, + "required": ["name"] + }), + ); + Self { schemas } + } + } + + #[async_trait::async_trait] + impl AsyncRetrieve for TestRetriever { + async fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + self.schemas + .get(uri.as_str()) + .cloned() + .ok_or_else(|| "Schema not found".into()) + } + } + + #[tokio::test] + async fn test_async_validator_for() { + let schema = json!({ + "$ref": "https://example.com/user.json" + }); + + let validator = crate::async_options() + .with_retriever(TestRetriever::new()) + .build(&schema) + .await + .unwrap(); + + // Valid instance + assert!(validator.is_valid(&json!({ + "name": "John Doe", + "age": 30 + }))); + + // Invalid instances + assert!(!validator.is_valid(&json!({ + "age": -5 + }))); + assert!(!validator.is_valid(&json!({ + "name": 123, + "age": 30 + }))); + } + + #[tokio::test] + async fn test_async_options_with_draft() { + let schema = json!({ + "$ref": "https://example.com/user.json" + }); + + let validator = crate::async_options() + .with_draft(Draft::Draft202012) + .with_retriever(TestRetriever::new()) + .build(&schema) + .await + .unwrap(); + + assert!(validator.is_valid(&json!({ + "name": "John Doe", + "age": 30 + }))); + } + + #[tokio::test] + async fn test_async_retrieval_failure() { + let schema = json!({ + "$ref": "https://example.com/nonexistent.json" + }); + + let result = crate::async_options() + .with_retriever(TestRetriever::new()) + .build(&schema) + .await; + + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Schema not found")); + } + + #[tokio::test] + async fn test_async_nested_references() { + let mut retriever = TestRetriever::new(); + retriever.schemas.insert( + "https://example.com/nested.json".to_string(), + json!({ + "type": "object", + "properties": { + "user": { "$ref": "https://example.com/user.json" } + } + }), + ); + + let schema = json!({ + "$ref": "https://example.com/nested.json" + }); + + let validator = crate::async_options() + .with_retriever(retriever) + .build(&schema) + .await + .unwrap(); + + // Valid nested structure + assert!(validator.is_valid(&json!({ + "user": { + "name": "John Doe", + "age": 30 + } + }))); + + // Invalid nested structure + assert!(!validator.is_valid(&json!({ + "user": { + "age": -5 + } + }))); + } + + #[tokio::test] + async fn test_async_with_registry() { + use crate::Registry; + + // Create a registry with initial schemas + let registry = Registry::options() + .async_retriever(TestRetriever::new()) + .build([( + "https://example.com/user.json", + Resource::from_contents(json!({ + "type": "object", + "properties": { + "name": {"type": "string"}, + "age": {"type": "integer", "minimum": 0} + }, + "required": ["name"] + })) + .unwrap(), + )]) + .await + .unwrap(); + + // Create a validator using the pre-populated registry + let validator = crate::async_options() + .with_registry(registry) + .build(&json!({ + "$ref": "https://example.com/user.json" + })) + .await + .unwrap(); + + // Verify that validation works with the registry + assert!(validator.is_valid(&json!({ + "name": "John Doe", + "age": 30 + }))); + assert!(!validator.is_valid(&json!({ + "age": -5 + }))); + } + + #[tokio::test] + async fn test_async_validator_for_basic() { + let schema = json!({"type": "integer"}); + + let validator = crate::async_validator_for(&schema).await.unwrap(); + + assert!(validator.is_valid(&json!(42))); + assert!(!validator.is_valid(&json!("abc"))); + } +} diff --git a/crates/jsonschema/src/options.rs b/crates/jsonschema/src/options.rs index e3102b60..573d7ba4 100644 --- a/crates/jsonschema/src/options.rs +++ b/crates/jsonschema/src/options.rs @@ -17,13 +17,13 @@ use std::{fmt, sync::Arc}; /// Configuration options for JSON Schema validation. #[derive(Clone)] -pub struct ValidationOptions { +pub struct ValidationOptions> { pub(crate) draft: Option, content_media_type_checks: AHashMap<&'static str, Option>, content_encoding_checks_and_converters: AHashMap<&'static str, Option<(ContentEncodingCheckType, ContentEncodingConverterType)>>, /// Retriever for external resources - pub(crate) retriever: Arc, + pub(crate) retriever: R, /// Additional resources that should be addressable during validation. pub(crate) resources: AHashMap, pub(crate) registry: Option, @@ -34,7 +34,7 @@ pub struct ValidationOptions { keywords: AHashMap>, } -impl Default for ValidationOptions { +impl Default for ValidationOptions> { fn default() -> Self { ValidationOptions { draft: None, @@ -52,54 +52,30 @@ impl Default for ValidationOptions { } } -impl ValidationOptions { +#[cfg(feature = "resolve-async")] +impl Default for ValidationOptions> { + fn default() -> Self { + ValidationOptions { + draft: None, + content_media_type_checks: AHashMap::default(), + content_encoding_checks_and_converters: AHashMap::default(), + retriever: Arc::new(DefaultRetriever), + resources: AHashMap::default(), + registry: None, + formats: AHashMap::default(), + validate_formats: None, + validate_schema: true, + ignore_unknown_formats: true, + keywords: AHashMap::default(), + } + } +} + +impl ValidationOptions { /// Return the draft version, or the default if not set. pub(crate) fn draft(&self) -> Draft { self.draft.unwrap_or_default() } - pub(crate) fn draft_for(&self, contents: &Value) -> Result> { - // Preference: - // - Explicitly set - // - Autodetected - // - Default - if let Some(draft) = self.draft { - Ok(draft) - } else { - let default = Draft::default(); - match default.detect(contents) { - Ok(draft) => Ok(draft), - Err(referencing::Error::UnknownSpecification { specification }) => { - // Try to retrieve the specification and detect its draft - if let Ok(Ok(retrieved)) = uri::from_str(&specification) - .map(|uri| self.retriever.retrieve(&uri.borrow())) - { - Ok(default.detect(&retrieved)?) - } else { - Err(referencing::Error::UnknownSpecification { specification }.into()) - } - } - Err(error) => Err(error.into()), - } - } - } - /// Build a JSON Schema validator using the current options. - /// - /// # Example - /// - /// ```rust - /// use serde_json::json; - /// - /// let schema = json!({"type": "string"}); - /// let validator = jsonschema::options() - /// .build(&schema) - /// .expect("A valid schema"); - /// - /// assert!(validator.is_valid(&json!("Hello"))); - /// assert!(!validator.is_valid(&json!(42))); - /// ``` - pub fn build(&self, schema: &Value) -> Result> { - compiler::build_validator(self.clone(), schema) - } /// Sets the JSON Schema draft version. /// /// ```rust @@ -109,7 +85,7 @@ impl ValidationOptions { /// .with_draft(Draft::Draft4); /// ``` #[inline] - pub fn with_draft(&mut self, draft: Draft) -> &mut Self { + pub fn with_draft(mut self, draft: Draft) -> Self { self.draft = Some(draft); self } @@ -137,21 +113,16 @@ impl ValidationOptions { /// .with_content_media_type("application/custom", check_custom_media_type); /// ``` pub fn with_content_media_type( - &mut self, + mut self, media_type: &'static str, media_type_check: ContentMediaTypeCheckType, - ) -> &mut Self { + ) -> Self { self.content_media_type_checks .insert(media_type, Some(media_type_check)); self } - /// Set a retriever to fetch external resources. - pub fn with_retriever(&mut self, retriever: impl Retrieve + 'static) -> &mut Self { - self.retriever = Arc::new(retriever); - self - } /// Remove support for a specific content media type validation. - pub fn without_content_media_type_support(&mut self, media_type: &'static str) -> &mut Self { + pub fn without_content_media_type_support(mut self, media_type: &'static str) -> Self { self.content_media_type_checks.insert(media_type, None); self } @@ -226,11 +197,11 @@ impl ValidationOptions { /// .with_content_encoding("custom", check, convert); /// ``` pub fn with_content_encoding( - &mut self, + mut self, encoding: &'static str, check: ContentEncodingCheckType, converter: ContentEncodingConverterType, - ) -> &mut Self { + ) -> Self { self.content_encoding_checks_and_converters .insert(encoding, Some((check, converter))); self @@ -243,10 +214,7 @@ impl ValidationOptions { /// let options = jsonschema::options() /// .without_content_encoding_support("base64"); /// ``` - pub fn without_content_encoding_support( - &mut self, - content_encoding: &'static str, - ) -> &mut Self { + pub fn without_content_encoding_support(mut self, content_encoding: &'static str) -> Self { self.content_encoding_checks_and_converters .insert(content_encoding, None); self @@ -272,7 +240,7 @@ impl ValidationOptions { /// # Ok(()) /// # } /// ``` - pub fn with_resource(&mut self, uri: impl Into, resource: Resource) -> &mut Self { + pub fn with_resource(mut self, uri: impl Into, resource: Resource) -> Self { self.resources.insert(uri.into(), resource); self } @@ -305,9 +273,9 @@ impl ValidationOptions { /// # } /// ``` pub fn with_resources( - &mut self, + mut self, pairs: impl Iterator, Resource)>, - ) -> &mut Self { + ) -> Self { for (uri, resource) in pairs { self.resources.insert(uri.into(), resource); } @@ -341,7 +309,7 @@ impl ValidationOptions { /// # Ok(()) /// # } /// ``` - pub fn with_registry(&mut self, registry: referencing::Registry) -> &mut Self { + pub fn with_registry(mut self, registry: referencing::Registry) -> Self { self.registry = Some(registry); self } @@ -366,7 +334,7 @@ impl ValidationOptions { /// assert!(validator.is_valid(&json!("foo42!"))); /// # } /// ``` - pub fn with_format(&mut self, name: N, format: F) -> &mut Self + pub fn with_format(mut self, name: N, format: F) -> Self where N: Into, F: Fn(&str) -> bool + Send + Sync + 'static, @@ -382,7 +350,7 @@ impl ValidationOptions { /// Used internally to prevent infinite recursion when validating meta-schemas. /// **Note**: Manually-crafted `ValidationError`s may still occur during compilation. #[inline] - pub(crate) fn without_schema_validation(&mut self) -> &mut Self { + pub(crate) fn without_schema_validation(mut self) -> Self { self.validate_schema = false; self } @@ -391,7 +359,7 @@ impl ValidationOptions { /// Default behavior depends on the draft version. This method overrides /// the default, enabling or disabling format validation regardless of draft. #[inline] - pub fn should_validate_formats(&mut self, yes: bool) -> &mut Self { + pub fn should_validate_formats(mut self, yes: bool) -> Self { self.validate_formats = Some(yes); self } @@ -402,7 +370,7 @@ impl ValidationOptions { /// /// By default, unknown formats are silently ignored. Set to `false` to report /// unrecognized formats as validation errors. - pub fn should_ignore_unknown_formats(&mut self, yes: bool) -> &mut Self { + pub fn should_ignore_unknown_formats(mut self, yes: bool) -> Self { self.ignore_unknown_formats = yes; self } @@ -465,7 +433,7 @@ impl ValidationOptions { /// /// assert!(validator.is_valid(&json!({ "a": "b"}))); /// ``` - pub fn with_keyword(&mut self, name: N, factory: F) -> &mut Self + pub fn with_keyword(mut self, name: N, factory: F) -> Self where N: Into, F: for<'a> Fn( @@ -486,6 +454,128 @@ impl ValidationOptions { } } +impl ValidationOptions> { + /// Build a JSON Schema validator using the current options. + /// + /// # Example + /// + /// ```rust + /// use serde_json::json; + /// + /// let schema = json!({"type": "string"}); + /// let validator = jsonschema::options() + /// .build(&schema) + /// .expect("A valid schema"); + /// + /// assert!(validator.is_valid(&json!("Hello"))); + /// assert!(!validator.is_valid(&json!(42))); + /// ``` + pub fn build(&self, schema: &Value) -> Result> { + compiler::build_validator(self.clone(), schema) + } + pub(crate) fn draft_for(&self, contents: &Value) -> Result> { + // Preference: + // - Explicitly set + // - Autodetected + // - Default + if let Some(draft) = self.draft { + Ok(draft) + } else { + let default = Draft::default(); + match default.detect(contents) { + Ok(draft) => Ok(draft), + Err(referencing::Error::UnknownSpecification { specification }) => { + // Try to retrieve the specification and detect its draft + if let Ok(Ok(retrieved)) = + uri::from_str(&specification).map(|uri| self.retriever.retrieve(&uri)) + { + Ok(default.detect(&retrieved)?) + } else { + Err(referencing::Error::UnknownSpecification { specification }.into()) + } + } + Err(error) => Err(error.into()), + } + } + } + /// Set a retriever to fetch external resources. + pub fn with_retriever(mut self, retriever: impl Retrieve + 'static) -> Self { + self.retriever = Arc::new(retriever); + self + } +} + +#[cfg(feature = "resolve-async")] +impl ValidationOptions> { + pub async fn build(&self, schema: &Value) -> Result> { + compiler::build_validator_async(self.clone(), schema).await + } + pub fn with_retriever( + self, + retriever: impl referencing::AsyncRetrieve + 'static, + ) -> ValidationOptions> { + ValidationOptions { + draft: self.draft, + retriever: Arc::new(retriever), + content_media_type_checks: self.content_media_type_checks, + content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, + resources: self.resources, + registry: self.registry, + formats: self.formats, + validate_formats: self.validate_formats, + validate_schema: self.validate_schema, + ignore_unknown_formats: self.ignore_unknown_formats, + keywords: self.keywords, + } + } + pub(crate) async fn draft_for( + &self, + contents: &Value, + ) -> Result> { + // Preference: + // - Explicitly set + // - Autodetected + // - Default + if let Some(draft) = self.draft { + Ok(draft) + } else { + let default = Draft::default(); + match default.detect(contents) { + Ok(draft) => Ok(draft), + Err(referencing::Error::UnknownSpecification { specification }) => { + // Try to retrieve the specification and detect its draft + if let Ok(uri) = uri::from_str(&specification) { + if let Ok(retrieved) = self.retriever.retrieve(&uri).await { + return Ok(default.detect(&retrieved)?); + } + } + Err(referencing::Error::UnknownSpecification { specification }.into()) + } + Err(error) => Err(error.into()), + } + } + } + /// Set a retriever to fetch external resources. + pub(crate) fn with_blocking_retriever( + self, + retriever: impl Retrieve + 'static, + ) -> ValidationOptions> { + ValidationOptions { + draft: self.draft, + retriever: Arc::new(retriever), + content_media_type_checks: self.content_media_type_checks, + content_encoding_checks_and_converters: self.content_encoding_checks_and_converters, + resources: self.resources, + registry: self.registry, + formats: self.formats, + validate_formats: self.validate_formats, + validate_schema: self.validate_schema, + ignore_unknown_formats: self.ignore_unknown_formats, + keywords: self.keywords, + } + } +} + impl fmt::Debug for ValidationOptions { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("CompilationConfig") diff --git a/crates/jsonschema/src/retriever.rs b/crates/jsonschema/src/retriever.rs index 3a6b7ab1..8288603c 100644 --- a/crates/jsonschema/src/retriever.rs +++ b/crates/jsonschema/src/retriever.rs @@ -6,7 +6,10 @@ pub(crate) struct DefaultRetriever; impl Retrieve for DefaultRetriever { #[allow(unused)] - fn retrieve(&self, uri: &Uri<&str>) -> Result> { + fn retrieve( + &self, + uri: &Uri, + ) -> Result> { #[cfg(target_arch = "wasm32")] { Err("External references are not supported in WASM".into()) @@ -50,73 +53,131 @@ impl Retrieve for DefaultRetriever { } } -#[cfg(test)] -mod tests { - use serde_json::json; - #[cfg(not(target_arch = "wasm32"))] - use std::io::Write; - - #[cfg(not(target_arch = "wasm32"))] - fn path_to_uri(path: &std::path::Path) -> String { - use percent_encoding::{percent_encode, AsciiSet, CONTROLS}; - - let mut result = "file://".to_owned(); - const SEGMENT: &AsciiSet = &CONTROLS - .add(b' ') - .add(b'"') - .add(b'<') - .add(b'>') - .add(b'`') - .add(b'#') - .add(b'?') - .add(b'{') - .add(b'}') - .add(b'/') - .add(b'%'); - - #[cfg(not(target_os = "windows"))] +#[cfg(feature = "resolve-async")] +#[async_trait::async_trait] +impl referencing::AsyncRetrieve for DefaultRetriever { + async fn retrieve( + &self, + uri: &Uri, + ) -> Result> { + #[cfg(target_arch = "wasm32")] { - use std::os::unix::ffi::OsStrExt; - - const CUSTOM_SEGMENT: &AsciiSet = &SEGMENT.add(b'\\'); - for component in path.components().skip(1) { - result.push('/'); - result.extend(percent_encode( - component.as_os_str().as_bytes(), - CUSTOM_SEGMENT, - )); - } + Err("External references are not supported in WASM".into()) } - #[cfg(target_os = "windows")] - { - use std::path::{Component, Prefix}; - let mut components = path.components(); - - match components.next() { - Some(Component::Prefix(ref p)) => match p.kind() { - Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { - result.push('/'); - result.push(letter as char); - result.push(':'); - } - _ => panic!("Unexpected path"), - }, - _ => panic!("Unexpected path"), + #[cfg(not(target_arch = "wasm32"))] + match uri.scheme().as_str() { + "http" | "https" => { + #[cfg(any(feature = "resolve-http", test))] + { + Ok(reqwest::get(uri.as_str()).await?.json().await?) + } + #[cfg(not(any(feature = "resolve-http", test)))] + Err("`resolve-http` feature or a custom resolver is required to resolve external schemas via HTTP".into()) } - - for component in components { - if component == Component::RootDir { - continue; + "file" => { + #[cfg(any(feature = "resolve-file", test))] + { + // File operations are blocking, so we use tokio's spawn_blocking + let path = uri.path().as_str().to_string(); + let contents = tokio::task::spawn_blocking( + move || -> Result> { + let path = { + #[cfg(windows)] + { + let path = path.trim_start_matches('/').replace('/', "\\"); + std::path::PathBuf::from(path) + } + #[cfg(not(windows))] + { + std::path::PathBuf::from(path) + } + }; + let file = std::fs::File::open(path)?; + Ok(serde_json::from_reader(file)?) + }, + ) + .await??; + Ok(contents) } + #[cfg(not(any(feature = "resolve-file", test)))] + { + Err("`resolve-file` feature or a custom resolver is required to resolve external schemas via files".into()) + } + } + scheme => Err(format!("Unknown scheme {scheme}").into()), + } + } +} + +#[cfg(all(test, not(target_arch = "wasm32")))] +fn path_to_uri(path: &std::path::Path) -> String { + use percent_encoding::{percent_encode, AsciiSet, CONTROLS}; - let component = component.as_os_str().to_str().expect("Unexpected path"); + let mut result = "file://".to_owned(); + const SEGMENT: &AsciiSet = &CONTROLS + .add(b' ') + .add(b'"') + .add(b'<') + .add(b'>') + .add(b'`') + .add(b'#') + .add(b'?') + .add(b'{') + .add(b'}') + .add(b'/') + .add(b'%'); + + #[cfg(not(target_os = "windows"))] + { + use std::os::unix::ffi::OsStrExt; + + const CUSTOM_SEGMENT: &AsciiSet = &SEGMENT.add(b'\\'); + for component in path.components().skip(1) { + result.push('/'); + result.extend(percent_encode( + component.as_os_str().as_bytes(), + CUSTOM_SEGMENT, + )); + } + } + #[cfg(target_os = "windows")] + { + use std::path::{Component, Prefix}; + let mut components = path.components(); - result.push('/'); - result.extend(percent_encode(component.as_bytes(), SEGMENT)); + match components.next() { + Some(Component::Prefix(ref p)) => match p.kind() { + Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => { + result.push('/'); + result.push(letter as char); + result.push(':'); + } + _ => panic!("Unexpected path"), + }, + _ => panic!("Unexpected path"), + } + + for component in components { + if component == Component::RootDir { + continue; } + + let component = component.as_os_str().to_str().expect("Unexpected path"); + + result.push('/'); + result.extend(percent_encode(component.as_bytes(), SEGMENT)); } - result } + result +} + +#[cfg(test)] +mod tests { + #[cfg(not(target_arch = "wasm32"))] + use super::path_to_uri; + use serde_json::json; + #[cfg(not(target_arch = "wasm32"))] + use std::io::Write; #[test] #[cfg(not(target_arch = "wasm32"))] @@ -168,3 +229,135 @@ mod tests { assert!(error.contains("External references are not supported in WASM")); } } + +#[cfg(all(test, feature = "resolve-async", not(target_arch = "wasm32")))] +mod async_tests { + use super::*; + use crate::Registry; + use serde_json::json; + use std::io::Write; + + #[tokio::test] + async fn test_async_retrieve_from_file() { + let mut temp_file = tempfile::NamedTempFile::new().expect("Failed to create temp file"); + let external_schema = json!({ + "type": "object", + "properties": { + "name": { "type": "string" } + }, + "required": ["name"] + }); + write!(temp_file, "{}", external_schema).expect("Failed to write to temp file"); + + let uri = path_to_uri(temp_file.path()); + + let schema = json!({ + "type": "object", + "properties": { + "user": { "$ref": uri } + } + }); + + // Create registry with default async retriever + let registry = Registry::options() + .async_retriever(DefaultRetriever) + .build([( + "http://example.com/schema", + crate::Draft::Draft202012.create_resource(schema.clone()), + )]) + .await + .expect("Registry creation failed"); + + let validator = crate::options() + .with_registry(registry) + .build(&schema) + .expect("Invalid schema"); + + let valid = json!({"user": {"name": "John Doe"}}); + assert!(validator.is_valid(&valid)); + + let invalid = json!({"user": {}}); + assert!(!validator.is_valid(&invalid)); + } + + #[tokio::test] + async fn test_async_unknown_scheme() { + let schema = json!({ + "type": "object", + "properties": { + "test": { "$ref": "unknown-schema://test" } + } + }); + + let result = Registry::options() + .async_retriever(DefaultRetriever) + .build([( + "http://example.com/schema", + crate::Draft::Draft202012.create_resource(schema), + )]) + .await; + + assert!(result.is_err()); + let error = result.unwrap_err().to_string(); + assert!(error.contains("Unknown scheme")); + } + + #[tokio::test] + async fn test_async_concurrent_retrievals() { + let mut temp_files = vec![]; + let mut uris = vec![]; + + // Create multiple temp files with different schemas + for i in 0..3 { + let mut temp_file = tempfile::NamedTempFile::new().expect("Failed to create temp file"); + let schema = json!({ + "type": "object", + "properties": { + "field": { "type": "string", "minLength": i } + } + }); + write!(temp_file, "{}", schema).expect("Failed to write to temp file"); + uris.push(path_to_uri(temp_file.path())); + temp_files.push(temp_file); + } + + // Create a schema that references all temp files + let schema = json!({ + "type": "object", + "properties": { + "obj1": { "$ref": uris[0] }, + "obj2": { "$ref": uris[1] }, + "obj3": { "$ref": uris[2] } + } + }); + + let registry = Registry::options() + .async_retriever(DefaultRetriever) + .build([( + "http://example.com/schema", + crate::Draft::Draft202012.create_resource(schema.clone()), + )]) + .await + .expect("Registry creation failed"); + + let validator = crate::options() + .with_registry(registry) + .build(&schema) + .expect("Invalid schema"); + + let valid = json!({ + "obj1": { "field": "" }, // minLength: 0 + "obj2": { "field": "a" }, // minLength: 1 + "obj3": { "field": "ab" } // minLength: 2 + }); + assert!(validator.is_valid(&valid)); + + // Test invalid data + let invalid = json!({ + "obj1": { "field": "" }, + "obj2": { "field": "" }, // should be at least 1 char + "obj3": { "field": "a" } // should be at least 2 chars + }); + assert!(!validator.is_valid(&invalid)); + } +} diff --git a/crates/jsonschema/src/validator.rs b/crates/jsonschema/src/validator.rs index cae4397a..4c6daed7 100644 --- a/crates/jsonschema/src/validator.rs +++ b/crates/jsonschema/src/validator.rs @@ -189,10 +189,44 @@ impl Validator { pub fn options() -> ValidationOptions { ValidationOptions::default() } + /// Create a default [`ValidationOptions`] configured for async validation. + /// + /// Use this to set the draft version and other validation parameters when working + /// with schemas that require async reference resolution. + /// + /// # Example + /// + /// ```rust + /// # use serde_json::json; + /// # use jsonschema::Draft; + /// # async fn example() -> Result<(), Box> { + /// let schema = json!({ + /// "$ref": "https://example.com/schema.json" + /// }); + /// + /// let validator = jsonschema::async_options() + /// .with_draft(Draft::Draft202012) + /// .build(&schema) + /// .await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// For sync validation, use [`options`] instead. + #[cfg(feature = "resolve-async")] + #[must_use] + pub fn async_options() -> ValidationOptions> { + ValidationOptions::default() + } /// Create a validator using the default options. pub fn new(schema: &Value) -> Result> { Self::options().build(schema) } + /// Create a validator using the default async options. + #[cfg(feature = "resolve-async")] + pub async fn async_new(schema: &Value) -> Result> { + Self::async_options().build(schema).await + } /// Validate `instance` against `schema` and return the first error if any. #[inline] pub fn validate<'i>(&self, instance: &'i Value) -> Result<(), ValidationError<'i>> { diff --git a/crates/jsonschema/tests/suite.rs b/crates/jsonschema/tests/suite.rs index d98908de..493a6b15 100644 --- a/crates/jsonschema/tests/suite.rs +++ b/crates/jsonschema/tests/suite.rs @@ -22,19 +22,19 @@ mod tests { let mut options = jsonschema::options(); match test.draft { "draft4" => { - options.with_draft(Draft::Draft4); + options = options.with_draft(Draft::Draft4); } "draft6" => { - options.with_draft(Draft::Draft6); + options = options.with_draft(Draft::Draft6); } "draft7" => { - options.with_draft(Draft::Draft7); + options = options.with_draft(Draft::Draft7); } "draft2019-09" | "draft2020-12" => {} _ => panic!("Unsupported draft"), }; if test.is_optional { - options.should_validate_formats(true); + options = options.should_validate_formats(true); } let validator = options .build(&test.schema)