From f598c4bb4456bc596e9e79468b43750e5896181a Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 13:53:36 -0700 Subject: [PATCH 1/8] impl ExportableProvider for ForkByErrorProvider and add tutorial --- Cargo.lock | 1 + provider/adapters/Cargo.toml | 3 ++ provider/adapters/src/fork/by_error.rs | 15 ++++++ provider/adapters/src/lib.rs | 2 +- tools/md-tests/Cargo.toml | 5 +- tutorials/data_provider.md | 73 ++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e33bf60e399..eee878686cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1893,6 +1893,7 @@ dependencies = [ "icu_provider_blob", "icu_provider_export", "icu_provider_fs", + "icu_provider_source", "itertools", "litemap", "lru", diff --git a/provider/adapters/Cargo.toml b/provider/adapters/Cargo.toml index 457e844680c..13af46c2ec4 100644 --- a/provider/adapters/Cargo.toml +++ b/provider/adapters/Cargo.toml @@ -29,3 +29,6 @@ serde = { workspace = true, features = ["derive", "alloc"], optional = true } icu_provider = { path = "../../provider/core", features = ["macros", "deserialize_json"] } icu_locale = { path = "../../components/locale" } writeable = { path = "../../utils/writeable" } + +[features] +std = [] diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index 1d5c85ebd9e..ec91e3698be 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -4,6 +4,7 @@ use super::ForkByErrorPredicate; use alloc::{collections::BTreeSet, vec::Vec}; +use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; /// A provider that returns data from one of two child providers based on a predicate function. @@ -159,6 +160,20 @@ where } } +#[cfg(feature = "std")] +impl ExportableProvider for ForkByErrorProvider +where + P0: ExportableProvider, + P1: ExportableProvider, + F: ForkByErrorPredicate + Sync, +{ + fn supported_markers(&self) -> std::collections::HashSet { + let mut markers = self.0.supported_markers(); + markers.extend(self.1.supported_markers()); + markers + } +} + /// A provider that returns data from the first child provider passing a predicate function. /// /// This is an abstract forking provider that must be provided with a type implementing the diff --git a/provider/adapters/src/lib.rs b/provider/adapters/src/lib.rs index 5583cfd272a..5b155d7e295 100644 --- a/provider/adapters/src/lib.rs +++ b/provider/adapters/src/lib.rs @@ -10,7 +10,7 @@ //! - Use the [`fallback`] module to automatically resolve arbitrary locales for data loading. // https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations -#![cfg_attr(not(test), no_std)] +#![cfg_attr(not(any(test, feature = "std")), no_std)] #![cfg_attr( not(test), deny( diff --git a/tools/md-tests/Cargo.toml b/tools/md-tests/Cargo.toml index d710c334e6a..3561ef04a86 100644 --- a/tools/md-tests/Cargo.toml +++ b/tools/md-tests/Cargo.toml @@ -10,9 +10,10 @@ edition = "2021" [dev-dependencies] icu = { workspace = true, features = ["compiled_data", "serde"] } -icu_provider_export = { workspace = true } +icu_provider_export = { workspace = true, features = ["blob_exporter"] } +icu_provider_source = { workspace = true, features = ["networking"] } icu_provider = { workspace = true, features = ["deserialize_json"] } -icu_provider_adapters = { workspace = true, features = ["serde"] } +icu_provider_adapters = { workspace = true, features = ["serde", "std"] } icu_provider_blob = { workspace = true } icu_provider_fs = { workspace = true } diff --git a/tutorials/data_provider.md b/tutorials/data_provider.md index 94fd682b596..26d86d387c5 100644 --- a/tutorials/data_provider.md +++ b/tutorials/data_provider.md @@ -253,6 +253,79 @@ assert_eq!(formatter.format_to_string(&100007i64.into()), "100🐮007"); Forking providers can be implemented using `DataPayload::dynamic_cast`. For an example, see that function's documentation. +## Exporting Custom Data Markers + +To add custom data markers to your baked data or postcard file, create a forking exportable provider: + +```rust +use icu::plurals::provider::CardinalV1Marker; +use icu::locale::locale; +use icu_provider_adapters::fork::ForkByMarkerProvider; +use icu_provider_blob::BlobDataProvider; +use icu_provider_export::blob_exporter::BlobExporter; +use icu_provider_export::prelude::*; +use icu_provider_source::SourceDataProvider; +use icu_provider::prelude::*; +use icu_provider::DataMarker; +use std::borrow::Cow; +use std::collections::BTreeSet; + +#[icu_provider::data_struct(marker(CustomMarker, "x/custom@1"))] +#[derive(Debug, PartialEq)] +#[derive(serde::Deserialize, serde::Serialize, databake::Bake)] +#[databake(path = crate)] +pub struct Custom<'data> { + message: Cow<'data, str>, +}; + +struct CustomProvider; +impl DataProvider for CustomProvider { + fn load(&self, req: DataRequest) -> Result, DataError> { + Ok(DataResponse { + metadata: Default::default(), + payload: DataPayload::from_owned(Custom { + message: format!("Custom data for locale {}!", req.id.locale).into(), + }), + }) + } +} + +impl IterableDataProvider for CustomProvider { + fn iter_ids(&self) -> Result, DataError> { + Ok([locale!("es"), locale!("ja")].into_iter().map(DataLocale::from).map(DataIdentifierCow::from_locale).collect()) + } +} + +icu_provider::export::make_exportable_provider!(CustomProvider, [CustomMarker,]); + +let icu4x_source_provider = SourceDataProvider::new_latest_tested(); +let custom_source_provider = CustomProvider; + +let mut buffer = Vec::::new(); + +ExportDriver::new([DataLocaleFamily::FULL], DeduplicationStrategy::None.into(), LocaleFallbacker::try_new_unstable(&icu4x_source_provider).unwrap()) + .with_markers([CardinalV1Marker::INFO, CustomMarker::INFO]) + .export( + &ForkByMarkerProvider::new( + icu4x_source_provider, + custom_source_provider + ), + BlobExporter::new_v2_with_sink(Box::new(&mut buffer)), + ) + .unwrap(); + +let blob_provider = BlobDataProvider::try_new_from_blob(buffer.into()).unwrap(); + +let locale = DataLocale::from(&locale!("ja")); +let req = DataRequest { + id: DataIdentifierBorrowed::for_locale(&locale), + metadata: Default::default() +}; + +assert!(blob_provider.load_data(CardinalV1Marker::INFO, req).is_ok()); +assert!(blob_provider.load_data(CustomMarker::INFO, req).is_ok()); +``` + ## Accessing the Resolved Locale ICU4X objects do not store their "resolved locale" because that is not a well-defined concept. Components can load data from many sources, and fallbacks to parent locales or root does not necessarily mean that a locale is not supported. From 4e0e2084f042b72230c0a0dffbd3daf611e06525 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 13:56:43 -0700 Subject: [PATCH 2/8] Add impl for MultiForkByErrorProvider --- provider/adapters/src/fork/by_error.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index ec91e3698be..6cb732fb632 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -349,3 +349,18 @@ where Err(last_error) } } + +#[cfg(feature = "std")] +impl ExportableProvider for MultiForkByErrorProvider +where + P: ExportableProvider, + F: ForkByErrorPredicate + Sync, +{ + fn supported_markers(&self) -> std::collections::HashSet { + let mut markers = std::collections::HashSet::new(); + for provider in self.providers.iter() { + markers.extend(provider.supported_markers()); + } + markers + } +} From 4228f57b3d2d70982be524cb753c2b897bd3f12d Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 13:57:20 -0700 Subject: [PATCH 3/8] Features --- provider/adapters/src/fork/by_error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index 6cb732fb632..2da9bb1e12b 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -4,6 +4,7 @@ use super::ForkByErrorPredicate; use alloc::{collections::BTreeSet, vec::Vec}; +#[cfg(feature = "std")] use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; From 8035fabe8685426f5950b7e87c78a8511db5b22d Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 13:58:36 -0700 Subject: [PATCH 4/8] fmt manually --- tutorials/data_provider.md | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tutorials/data_provider.md b/tutorials/data_provider.md index 26d86d387c5..131dd933b7a 100644 --- a/tutorials/data_provider.md +++ b/tutorials/data_provider.md @@ -258,21 +258,20 @@ Forking providers can be implemented using `DataPayload::dynamic_cast`. For an e To add custom data markers to your baked data or postcard file, create a forking exportable provider: ```rust -use icu::plurals::provider::CardinalV1Marker; use icu::locale::locale; +use icu::plurals::provider::CardinalV1Marker; +use icu_provider::prelude::*; +use icu_provider::DataMarker; use icu_provider_adapters::fork::ForkByMarkerProvider; use icu_provider_blob::BlobDataProvider; use icu_provider_export::blob_exporter::BlobExporter; use icu_provider_export::prelude::*; use icu_provider_source::SourceDataProvider; -use icu_provider::prelude::*; -use icu_provider::DataMarker; use std::borrow::Cow; use std::collections::BTreeSet; #[icu_provider::data_struct(marker(CustomMarker, "x/custom@1"))] -#[derive(Debug, PartialEq)] -#[derive(serde::Deserialize, serde::Serialize, databake::Bake)] +#[derive(Debug, PartialEq, serde::Deserialize, serde::Serialize, databake::Bake)] #[databake(path = crate)] pub struct Custom<'data> { message: Cow<'data, str>, @@ -292,7 +291,11 @@ impl DataProvider for CustomProvider { impl IterableDataProvider for CustomProvider { fn iter_ids(&self) -> Result, DataError> { - Ok([locale!("es"), locale!("ja")].into_iter().map(DataLocale::from).map(DataIdentifierCow::from_locale).collect()) + Ok([locale!("es"), locale!("ja")] + .into_iter() + .map(DataLocale::from) + .map(DataIdentifierCow::from_locale) + .collect()) } } @@ -303,23 +306,24 @@ let custom_source_provider = CustomProvider; let mut buffer = Vec::::new(); -ExportDriver::new([DataLocaleFamily::FULL], DeduplicationStrategy::None.into(), LocaleFallbacker::try_new_unstable(&icu4x_source_provider).unwrap()) - .with_markers([CardinalV1Marker::INFO, CustomMarker::INFO]) - .export( - &ForkByMarkerProvider::new( - icu4x_source_provider, - custom_source_provider - ), - BlobExporter::new_v2_with_sink(Box::new(&mut buffer)), - ) - .unwrap(); +ExportDriver::new( + [DataLocaleFamily::FULL], + DeduplicationStrategy::None.into(), + LocaleFallbacker::try_new_unstable(&icu4x_source_provider).unwrap(), +) +.with_markers([CardinalV1Marker::INFO, CustomMarker::INFO]) +.export( + &ForkByMarkerProvider::new(icu4x_source_provider, custom_source_provider), + BlobExporter::new_v2_with_sink(Box::new(&mut buffer)), +) +.unwrap(); let blob_provider = BlobDataProvider::try_new_from_blob(buffer.into()).unwrap(); let locale = DataLocale::from(&locale!("ja")); let req = DataRequest { id: DataIdentifierBorrowed::for_locale(&locale), - metadata: Default::default() + metadata: Default::default(), }; assert!(blob_provider.load_data(CardinalV1Marker::INFO, req).is_ok()); From 3d670941af8270a32f6056c3d2a11157f91c3e4c Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 5 Sep 2024 15:01:00 -0700 Subject: [PATCH 5/8] features --- provider/adapters/Cargo.toml | 1 + provider/adapters/src/fork/by_error.rs | 6 +++--- tools/md-tests/Cargo.toml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/provider/adapters/Cargo.toml b/provider/adapters/Cargo.toml index 13af46c2ec4..d744881f761 100644 --- a/provider/adapters/Cargo.toml +++ b/provider/adapters/Cargo.toml @@ -32,3 +32,4 @@ writeable = { path = "../../utils/writeable" } [features] std = [] +export = ["icu_provider/export", "std"] diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index 2da9bb1e12b..55adefc56ca 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -4,7 +4,7 @@ use super::ForkByErrorPredicate; use alloc::{collections::BTreeSet, vec::Vec}; -#[cfg(feature = "std")] +#[cfg(feature = "export")] use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; @@ -161,7 +161,7 @@ where } } -#[cfg(feature = "std")] +#[cfg(feature = "export")] impl ExportableProvider for ForkByErrorProvider where P0: ExportableProvider, @@ -351,7 +351,7 @@ where } } -#[cfg(feature = "std")] +#[cfg(feature = "export")] impl ExportableProvider for MultiForkByErrorProvider where P: ExportableProvider, diff --git a/tools/md-tests/Cargo.toml b/tools/md-tests/Cargo.toml index 3561ef04a86..3384f3def39 100644 --- a/tools/md-tests/Cargo.toml +++ b/tools/md-tests/Cargo.toml @@ -13,7 +13,7 @@ icu = { workspace = true, features = ["compiled_data", "serde"] } icu_provider_export = { workspace = true, features = ["blob_exporter"] } icu_provider_source = { workspace = true, features = ["networking"] } icu_provider = { workspace = true, features = ["deserialize_json"] } -icu_provider_adapters = { workspace = true, features = ["serde", "std"] } +icu_provider_adapters = { workspace = true, features = ["serde", "export"] } icu_provider_blob = { workspace = true } icu_provider_fs = { workspace = true } From ee8979f16f80b9ce9498c6b0ad126b6c87f4d0b1 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 18 Sep 2024 00:39:40 -0700 Subject: [PATCH 6/8] Add more impls --- provider/adapters/src/either.rs | 15 +++++++++++++++ provider/adapters/src/empty.rs | 7 +++++++ provider/adapters/src/filter/mod.rs | 12 ++++++++++++ 3 files changed, 34 insertions(+) diff --git a/provider/adapters/src/either.rs b/provider/adapters/src/either.rs index a52b2d78c79..5713a4bdc40 100644 --- a/provider/adapters/src/either.rs +++ b/provider/adapters/src/either.rs @@ -122,3 +122,18 @@ impl, P1: IterableDataProvider> } } } + +#[cfg(feature = "export")] +impl ExportableProvider for EitherProvider +where + P0: ExportableProvider, + P1: ExportableProvider, +{ + fn supported_markers(&self) -> std::collections::HashSet { + use EitherProvider::*; + match self { + A(p) => p.supported_markers(), + B(p) => p.supported_markers(), + } + } +} diff --git a/provider/adapters/src/empty.rs b/provider/adapters/src/empty.rs index a69a12a8b68..d9e967ffc6b 100644 --- a/provider/adapters/src/empty.rs +++ b/provider/adapters/src/empty.rs @@ -107,3 +107,10 @@ where Ok(Default::default()) } } + +#[cfg(feature = "export")] +impl ExportableProvider for EmptyDataProvider { + fn supported_markers(&self) -> std::collections::HashSet { + Default::default() + } +} diff --git a/provider/adapters/src/filter/mod.rs b/provider/adapters/src/filter/mod.rs index a8c2258d90f..cad127cc90e 100644 --- a/provider/adapters/src/filter/mod.rs +++ b/provider/adapters/src/filter/mod.rs @@ -165,3 +165,15 @@ where }) } } + +#[cfg(feature = "export")] +impl ExportableProvider for FilterDataProvider +where + P0: ExportableProvider, + F: Fn(DataIdentifierBorrowed) -> bool, +{ + fn supported_markers(&self) -> std::collections::HashSet { + // The predicate only takes DataIdentifier, not DataMarker, so we are not impacted + self.inner.supported_markers() + } +} From f8955d61d88ca43b902bc2e80239923e91670698 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 18 Sep 2024 19:16:17 -0700 Subject: [PATCH 7/8] Update provider/adapters/src/fork/by_error.rs Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com> --- provider/adapters/src/fork/by_error.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index 55adefc56ca..ce90c0987ba 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -358,10 +358,6 @@ where F: ForkByErrorPredicate + Sync, { fn supported_markers(&self) -> std::collections::HashSet { - let mut markers = std::collections::HashSet::new(); - for provider in self.providers.iter() { - markers.extend(provider.supported_markers()); - } - markers + self.providers.iter().flat_map(|p| p.supported_markers()).collect() } } From 0eaff22c99bc366f556942b80778f9b4a09fbd76 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 19 Sep 2024 08:30:05 -0700 Subject: [PATCH 8/8] features --- provider/adapters/src/either.rs | 2 ++ provider/adapters/src/empty.rs | 2 ++ provider/adapters/src/filter/mod.rs | 4 +++- provider/adapters/src/fork/by_error.rs | 5 ++++- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/provider/adapters/src/either.rs b/provider/adapters/src/either.rs index 5713a4bdc40..94be6ef60ae 100644 --- a/provider/adapters/src/either.rs +++ b/provider/adapters/src/either.rs @@ -5,6 +5,8 @@ //! Helpers for switching between multiple providers. use alloc::collections::BTreeSet; +#[cfg(feature = "export")] +use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; /// A provider that is one of two types determined at runtime. diff --git a/provider/adapters/src/empty.rs b/provider/adapters/src/empty.rs index d9e967ffc6b..b3b6048fa95 100644 --- a/provider/adapters/src/empty.rs +++ b/provider/adapters/src/empty.rs @@ -7,6 +7,8 @@ //! Use [`EmptyDataProvider`] as a stand-in for a provider that always fails. use alloc::collections::BTreeSet; +#[cfg(feature = "export")] +use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; /// A data provider that always returns an error. diff --git a/provider/adapters/src/filter/mod.rs b/provider/adapters/src/filter/mod.rs index cad127cc90e..700d3b79f37 100644 --- a/provider/adapters/src/filter/mod.rs +++ b/provider/adapters/src/filter/mod.rs @@ -23,6 +23,8 @@ mod impls; use alloc::collections::BTreeSet; +#[cfg(feature = "export")] +use icu_provider::export::ExportableProvider; use icu_provider::prelude::*; /// A data provider that selectively filters out data requests. @@ -170,7 +172,7 @@ where impl ExportableProvider for FilterDataProvider where P0: ExportableProvider, - F: Fn(DataIdentifierBorrowed) -> bool, + F: Fn(DataIdentifierBorrowed) -> bool + Sync, { fn supported_markers(&self) -> std::collections::HashSet { // The predicate only takes DataIdentifier, not DataMarker, so we are not impacted diff --git a/provider/adapters/src/fork/by_error.rs b/provider/adapters/src/fork/by_error.rs index ce90c0987ba..8f72865a64a 100644 --- a/provider/adapters/src/fork/by_error.rs +++ b/provider/adapters/src/fork/by_error.rs @@ -358,6 +358,9 @@ where F: ForkByErrorPredicate + Sync, { fn supported_markers(&self) -> std::collections::HashSet { - self.providers.iter().flat_map(|p| p.supported_markers()).collect() + self.providers + .iter() + .flat_map(|p| p.supported_markers()) + .collect() } }