diff --git a/provider/baked/src/export.rs b/provider/baked/src/export.rs index 29297d5cd74..3b16b7b6cdc 100644 --- a/provider/baked/src/export.rs +++ b/provider/baked/src/export.rs @@ -24,12 +24,8 @@ //! // Export something //! DatagenDriver::new() //! .with_markers([icu_provider::hello_world::HelloWorldV1Marker::INFO]) -//! .with_locales_and_fallback([LocaleFamily::FULL], { -//! let mut options = FallbackOptions::default(); -//! // HelloWorldProvider cannot provide fallback data, so we cannot deduplicate -//! options.deduplication_strategy = Some(DeduplicationStrategy::None); -//! options -//! }) +//! // HelloWorldProvider cannot provide fallback data, so we cannot deduplicate +//! .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) //! .export(&icu_provider::hello_world::HelloWorldProvider, exporter) //! .unwrap(); //! # @@ -132,6 +128,9 @@ fn maybe_msrv() -> TokenStream { #[non_exhaustive] #[derive(Debug, Clone, Copy)] pub struct Options { + /// By default, baked providers perform fallback internally. This field can be used to + /// disable this behavior. + pub use_internal_fallback: bool, /// Whether to run `rustfmt` on the generated files. pub pretty: bool, /// Whether to use separate crates to name types instead of the `icu` metacrate. @@ -145,10 +144,10 @@ pub struct Options { pub overwrite: bool, } -#[allow(clippy::derivable_impls)] // want to be explicit about bool defaults impl Default for Options { fn default() -> Self { Self { + use_internal_fallback: true, pretty: false, use_separate_crates: false, overwrite: false, @@ -163,6 +162,7 @@ pub struct BakedExporter { mod_directory: PathBuf, pretty: bool, use_separate_crates: bool, + use_internal_fallback: bool, // Temporary storage for put_payload: marker -> (bake -> {(locale, marker_attributes)}) data: Mutex< HashMap< @@ -191,6 +191,7 @@ impl BakedExporter { /// Constructs a new [`BakedExporter`] with the given output directory and options. pub fn new(mod_directory: PathBuf, options: Options) -> Result { let Options { + use_internal_fallback, pretty, use_separate_crates, overwrite, @@ -208,6 +209,7 @@ impl BakedExporter { Ok(Self { mod_directory, pretty, + use_internal_fallback, use_separate_crates, data: Default::default(), impl_data: Default::default(), @@ -436,32 +438,6 @@ impl DataExporter for BakedExporter { } fn flush(&self, marker: DataMarkerInfo) -> Result<(), DataError> { - self.flush_internal(marker, None) - } - - fn flush_with_built_in_fallback( - &self, - marker: DataMarkerInfo, - fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - self.flush_internal(marker, Some(fallback_mode)) - } - - fn close(&mut self) -> Result<(), DataError> { - self.close_internal() - } - - fn supports_built_in_fallback(&self) -> bool { - true - } -} - -impl BakedExporter { - fn flush_internal( - &self, - marker: DataMarkerInfo, - fallback_mode: Option, - ) -> Result<(), DataError> { let marker_bake = bake_marker(marker, &self.dependencies); let (struct_type, into_data_payload) = if marker_bake @@ -545,9 +521,12 @@ impl BakedExporter { let lookup = crate::binary_search::bake(&struct_type, values); - let load_body = match fallback_mode { - None => { - quote! { + let load_body = if !self.use_internal_fallback + || deduplicated_values + .iter() + .all(|(_, reqs)| reqs.iter().all(|(l, _)| l.is_und())) + { + quote! { #(#structs)* #lookup @@ -559,44 +538,38 @@ impl BakedExporter { } else { Err(icu_provider::DataErrorKind::MissingLocale.with_req(<#marker_bake as icu_provider::DataMarker>::INFO, req)) } - } } - Some(BuiltInFallbackMode::Standard) => { - self.dependencies.insert("icu_locale/compiled_data"); - quote! { - #(#structs)* - #lookup + } else { + self.dependencies.insert("icu_locale/compiled_data"); + quote! { + #(#structs)* + #lookup - let mut metadata = icu_provider::DataResponseMetadata::default(); + let mut metadata = icu_provider::DataResponseMetadata::default(); - let payload = if let Some(payload) = lookup(req) { - payload - } else { - const FALLBACKER: icu_locale::fallback::LocaleFallbackerWithConfig<'static> = - icu_locale::fallback::LocaleFallbacker::new() - .for_config(<#marker_bake as icu_provider::DataMarker>::INFO.fallback_config); - let mut fallback_iterator = FALLBACKER.fallback_for(req.locale.clone()); - loop { - if let Some(payload) = lookup(icu_provider::DataRequest { locale: fallback_iterator.get(), ..req }) { - metadata.locale = Some(fallback_iterator.take()); - break payload; - } - if fallback_iterator.get().is_und() { - return Err(icu_provider::DataErrorKind::MissingLocale.with_req(<#marker_bake as icu_provider::DataMarker>::INFO, req)); - } - fallback_iterator.step(); + let payload = if let Some(payload) = lookup(req) { + payload + } else { + const FALLBACKER: icu_locale::fallback::LocaleFallbackerWithConfig<'static> = + icu_locale::fallback::LocaleFallbacker::new() + .for_config(<#marker_bake as icu_provider::DataMarker>::INFO.fallback_config); + let mut fallback_iterator = FALLBACKER.fallback_for(req.locale.clone()); + loop { + if let Some(payload) = lookup(icu_provider::DataRequest { locale: fallback_iterator.get(), ..req }) { + metadata.locale = Some(fallback_iterator.take()); + break payload; } - }; + if fallback_iterator.get().is_und() { + return Err(icu_provider::DataErrorKind::MissingLocale.with_req(<#marker_bake as icu_provider::DataMarker>::INFO, req)); + } + fallback_iterator.step(); + } + }; - Ok(icu_provider::DataResponse { - payload: #into_data_payload, - metadata - }) - } - } - f => { - return Err(DataError::custom("Unknown fallback mode") - .with_display_context(&format!("{f:?}"))) + Ok(icu_provider::DataResponse { + payload: #into_data_payload, + metadata + }) } }; @@ -650,7 +623,7 @@ impl BakedExporter { ) } - fn close_internal(&mut self) -> Result<(), DataError> { + fn close(&mut self) -> Result<(), DataError> { log::info!("Writing macros module..."); let data = move_out!(self.impl_data).into_inner().expect("poison"); diff --git a/provider/blob/benches/auxkey_bench.rs b/provider/blob/benches/auxkey_bench.rs index a96b04085ed..018eef4f4ce 100644 --- a/provider/blob/benches/auxkey_bench.rs +++ b/provider/blob/benches/auxkey_bench.rs @@ -92,7 +92,7 @@ fn make_blob_v1() -> Vec { let exporter = BlobExporter::new_with_sink(Box::new(&mut blob)); DatagenDriver::new() .with_markers(skeleton_markers!(marker_array_cb)) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) .export(&Baked, exporter) .unwrap(); assert_eq!(blob.len(), 450725); @@ -105,7 +105,7 @@ fn make_blob_v2() -> Vec { let exporter = BlobExporter::new_v2_with_sink(Box::new(&mut blob)); DatagenDriver::new() .with_markers(skeleton_markers!(marker_array_cb)) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) .export(&Baked, exporter) .unwrap(); assert_eq!(blob.len(), 241278); diff --git a/provider/blob/src/export/mod.rs b/provider/blob/src/export/mod.rs index 4434351da64..ab2932e90c0 100644 --- a/provider/blob/src/export/mod.rs +++ b/provider/blob/src/export/mod.rs @@ -22,7 +22,8 @@ //! // Export something //! DatagenDriver::new() //! .with_markers([icu_provider::hello_world::HelloWorldV1Marker::INFO]) -//! .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) +//! // HelloWorldProvider cannot provide fallback data, so we cannot deduplicate +//! .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) //! .export(&icu_provider::hello_world::HelloWorldProvider, exporter) //! .unwrap(); //! diff --git a/provider/blob/tests/test_versions.rs b/provider/blob/tests/test_versions.rs index 43b311d6206..5f5536cd0c0 100644 --- a/provider/blob/tests/test_versions.rs +++ b/provider/blob/tests/test_versions.rs @@ -18,7 +18,7 @@ const BLOB_V2: &[u8] = include_bytes!("data/v2.postcard"); fn run_driver(exporter: BlobExporter) -> Result<(), DataError> { DatagenDriver::new() .with_markers([icu_provider::hello_world::HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) .export(&icu_provider::hello_world::HelloWorldProvider, exporter) } @@ -80,7 +80,7 @@ fn test_v2_bigger() { let exporter = BlobExporter::new_v2_with_sink(Box::new(&mut blob)); DatagenDriver::new() .with_markers([icu_provider::hello_world::HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) .export(&ManyLocalesProvider, exporter) .unwrap(); diff --git a/provider/core/src/datagen/mod.rs b/provider/core/src/datagen/mod.rs index e33b44290e1..a2d7a0560ba 100644 --- a/provider/core/src/datagen/mod.rs +++ b/provider/core/src/datagen/mod.rs @@ -21,14 +21,6 @@ pub use payload::{ExportBox, ExportMarker}; use crate::prelude::*; -/// The type of built-in fallback that the data was generated for, if applicable. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[non_exhaustive] -pub enum BuiltInFallbackMode { - /// Data uses full UTS 35 fallbacking. - Standard, -} - /// An object capable of exporting data payloads in some form. pub trait DataExporter: Sync { /// Save a `payload` corresponding to the given marker and locale. @@ -52,20 +44,6 @@ pub trait DataExporter: Sync { self.flush(marker) } - /// Function called after a non-singleton marker has been fully enumerated, - /// flushing that marker with built-in fallback. - /// - /// Takes non-mut self as it can be called concurrently. - fn flush_with_built_in_fallback( - &self, - _marker: DataMarkerInfo, - _fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - Err(DataError::custom( - "Exporter does not implement built-in fallback", - )) - } - /// Function called after a non-singleton marker has been fully enumerated. /// Does not include built-in fallback. /// @@ -80,12 +58,6 @@ pub trait DataExporter: Sync { fn close(&mut self) -> Result<(), DataError> { Ok(()) } - - /// Returns whether the provider supports built-in fallback. If `true`, the provider must - /// implement [`Self::flush_with_built_in_fallback()`]. - fn supports_built_in_fallback(&self) -> bool { - false - } } impl DataExporter for Box { @@ -107,14 +79,6 @@ impl DataExporter for Box { (**self).flush_singleton(marker, payload) } - fn flush_with_built_in_fallback( - &self, - marker: DataMarkerInfo, - fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - (**self).flush_with_built_in_fallback(marker, fallback_mode) - } - fn flush(&self, marker: DataMarkerInfo) -> Result<(), DataError> { (**self).flush(marker) } @@ -122,10 +86,6 @@ impl DataExporter for Box { fn close(&mut self) -> Result<(), DataError> { (**self).close() } - - fn supports_built_in_fallback(&self) -> bool { - (**self).supports_built_in_fallback() - } } /// A [`DynamicDataProvider`] that can be used for exporting data. @@ -229,21 +189,7 @@ impl DataExporter for MultiExporter { self.0.iter().try_for_each(|e| e.flush(marker)) } - fn flush_with_built_in_fallback( - &self, - marker: DataMarkerInfo, - fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - self.0 - .iter() - .try_for_each(|e| e.flush_with_built_in_fallback(marker, fallback_mode)) - } - fn close(&mut self) -> Result<(), DataError> { self.0.iter_mut().try_for_each(|e| e.close()) } - - fn supports_built_in_fallback(&self) -> bool { - self.0.iter().all(|e| e.supports_built_in_fallback()) - } } diff --git a/provider/datagen/README.md b/provider/datagen/README.md index 4fe9db930c5..93203c3fe06 100644 --- a/provider/datagen/README.md +++ b/provider/datagen/README.md @@ -17,7 +17,7 @@ use std::fs::File; DatagenDriver::new() .with_markers([icu::list::provider::AndListV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) .export( &DatagenProvider::new_latest_tested(), BlobExporter::new_v2_with_sink(Box::new( diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index a242f5215a1..29f51f0b381 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -26,25 +26,6 @@ use writeable::Writeable; #[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] pub struct NoFallbackOptions {} -/// Choices for the code location of runtime fallback. -/// -/// Some data providers support "internal" fallback in which all data requests automatically run -/// the locale fallback algorithm. If internal fallback is requested for an exporter that does -/// not support it, an error will occur. -#[non_exhaustive] -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum RuntimeFallbackLocation { - /// Include fallbacking code in the exported data provider. - Internal, - /// Do not include fallbacking code in the exported data provider. - /// - /// The client is responsible for manually configuring [`LocaleFallbackProvider`] in their - /// runtime data pipeline. - /// - /// [`LocaleFallbackProvider`]: icu_provider_adapters::fallback::LocaleFallbackProvider - External, -} - /// Choices for determining the deduplication of locales for exported data payloads. /// /// Deduplication affects the lookup table from locales to data payloads. If a child locale @@ -351,19 +332,34 @@ fn test_locale_family_parsing() { } /// Options bag configuring locale inclusion and behavior when runtime fallback is enabled. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[non_exhaustive] pub struct FallbackOptions { - /// The location in code where fallback will be performed at runtime. - /// - /// If not set, determined by the exporter: internal fallback is used if the exporter - /// supports internal fallback. - pub runtime_fallback_location: Option, /// The aggressiveness of deduplication of data payloads. - /// - /// If not set, determined by `runtime_fallback_location`. If internal fallback is enabled, - /// a more aggressive deduplication strategy is used. - pub deduplication_strategy: Option, + pub deduplication_strategy: DeduplicationStrategy, +} + +impl FallbackOptions { + /// Creates a [`FallbackOptions`] with [`DeduplicationStrategy::None`] + pub fn no_deduplication() -> Self { + Self { + deduplication_strategy: DeduplicationStrategy::None, + } + } + + /// Creates a [`FallbackOptions`] with [`DeduplicationStrategy::Maximal`] + pub fn maximal_deduplication() -> Self { + Self { + deduplication_strategy: DeduplicationStrategy::Maximal, + } + } + + /// Creates a [`FallbackOptions`] with [`DeduplicationStrategy::RetainBaseLanguages`] + pub fn retain_base_languages_deduplication() -> Self { + Self { + deduplication_strategy: DeduplicationStrategy::RetainBaseLanguages, + } + } } #[derive(Debug, Clone)] @@ -390,7 +386,7 @@ enum LocalesWithOrWithoutFallback { /// /// DatagenDriver::new() /// .with_markers([icu::list::provider::AndListV1Marker::INFO]) -/// .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) +/// .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) /// .export( /// &DatagenProvider::new_latest_tested(), /// BlobExporter::new_with_sink(Box::new(&mut Vec::new())), @@ -587,7 +583,7 @@ impl DatagenDriver { log::warn!("No markers selected"); } - let (uses_internal_fallback, deduplication_strategy) = match &locales_fallback { + let deduplication_strategy = match &locales_fallback { LocalesWithOrWithoutFallback::WithoutFallback { langids } => { let mut sorted_locale_strs = langids .iter() @@ -598,25 +594,9 @@ impl DatagenDriver { "Datagen configured without fallback with these locales: {:?}", sorted_locale_strs ); - (false, DeduplicationStrategy::None) + DeduplicationStrategy::None } LocalesWithOrWithoutFallback::WithFallback { options, families } => { - let uses_internal_fallback = match options.runtime_fallback_location { - None => sink.supports_built_in_fallback(), - Some(RuntimeFallbackLocation::Internal) => true, - Some(RuntimeFallbackLocation::External) => false, - }; - let deduplication_strategy = match options.deduplication_strategy { - // TODO(2.0): Default to RetainBaseLanguages here - None => { - if sink.supports_built_in_fallback() { - DeduplicationStrategy::Maximal - } else { - DeduplicationStrategy::None - } - } - Some(x) => x, - }; let mut sorted_locale_strs = families .iter() .map(LocaleFamilyBorrowed::from_parts) @@ -624,13 +604,8 @@ impl DatagenDriver { .collect::>(); sorted_locale_strs.sort_unstable(); log::info!( - "Datagen configured with {}, {}, and these locales: {:?}", - if uses_internal_fallback { - "internal fallback" - } else { - "external fallback" - }, - match deduplication_strategy { + "Datagen configured with {}, and these locales: {:?}", + match options.deduplication_strategy { DeduplicationStrategy::Maximal => "maximal deduplication", DeduplicationStrategy::RetainBaseLanguages => "deduplication retaining base languages", @@ -638,7 +613,7 @@ impl DatagenDriver { }, sorted_locale_strs ); - (uses_internal_fallback, deduplication_strategy) + options.deduplication_strategy } }; @@ -811,14 +786,7 @@ impl DatagenDriver { let transform_duration = instant1.elapsed(); - // segmenter uses hardcoded locales internally, so fallback is not necessary. - // TODO(#4511): Use auxiliary keys for segmenter - if uses_internal_fallback && !marker.path.get().starts_with("segmenter") { - sink.flush_with_built_in_fallback(marker, BuiltInFallbackMode::Standard) - } else { - sink.flush(marker) - } - .map_err(|e| e.with_marker(marker))?; + sink.flush(marker).map_err(|e| e.with_marker(marker))?; let final_duration = instant1.elapsed(); let flush_duration = final_duration - transform_duration; @@ -1242,7 +1210,7 @@ fn test_family_precedence() { "%zh-TW".parse().unwrap(), "^zh-TW".parse().unwrap(), ], - Default::default(), + FallbackOptions::no_deduplication(), ); let Some(LocalesWithOrWithoutFallback::WithFallback { families, .. }) = driver.locales_fallback diff --git a/provider/datagen/src/lib.rs b/provider/datagen/src/lib.rs index 11f5e72bd2d..5a10993985c 100644 --- a/provider/datagen/src/lib.rs +++ b/provider/datagen/src/lib.rs @@ -18,7 +18,7 @@ //! //! DatagenDriver::new() //! .with_markers([icu::list::provider::AndListV1Marker::INFO]) -//! .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) +//! .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) //! .export( //! &DatagenProvider::new_latest_tested(), //! BlobExporter::new_v2_with_sink(Box::new( @@ -76,7 +76,6 @@ pub use driver::DeduplicationStrategy; pub use driver::FallbackOptions; pub use driver::LocaleFamily; pub use driver::NoFallbackOptions; -pub use driver::RuntimeFallbackLocation; #[cfg(feature = "provider")] pub use provider::CollationHanDatabase; @@ -100,7 +99,6 @@ pub mod prelude { #[doc(no_inline)] pub use crate::{ DatagenDriver, DeduplicationStrategy, FallbackOptions, LocaleFamily, NoFallbackOptions, - RuntimeFallbackLocation, }; #[doc(no_inline)] pub use icu_locale_core::{langid, LanguageIdentifier}; diff --git a/provider/datagen/src/provider/tests/make_testdata.rs b/provider/datagen/src/provider/tests/make_testdata.rs index f160261442a..ca41a25de0e 100644 --- a/provider/datagen/src/provider/tests/make_testdata.rs +++ b/provider/datagen/src/provider/tests/make_testdata.rs @@ -68,7 +68,7 @@ fn make_testdata() { .with_markers(crate::all_markers()) .with_locales_and_fallback( LOCALES.iter().cloned().map(LocaleFamily::with_descendants), - Default::default(), + FallbackOptions::no_deduplication(), ) .with_segmenter_models([ "thaidict".into(), @@ -103,21 +103,9 @@ impl DataExporter for StubExporter { self.0.flush(marker) } - fn flush_with_built_in_fallback( - &self, - marker: DataMarkerInfo, - fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - self.0.flush_with_built_in_fallback(marker, fallback_mode) - } - fn close(&mut self) -> Result<(), DataError> { self.0.close() } - - fn supports_built_in_fallback(&self) -> bool { - self.0.supports_built_in_fallback() - } } struct ZeroCopyCheckExporter { diff --git a/provider/datagen/tests/test-options.rs b/provider/datagen/tests/test-options.rs index 975e30b6fa8..b105c0bd104 100644 --- a/provider/datagen/tests/test-options.rs +++ b/provider/datagen/tests/test-options.rs @@ -9,7 +9,6 @@ use std::collections::BTreeMap; use std::collections::HashSet; use icu_datagen::prelude::*; -use icu_datagen::DeduplicationStrategy; use icu_datagen::FallbackOptions; use icu_locale::provider::*; use icu_provider::datagen::*; @@ -160,7 +159,7 @@ fn all_preferred() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], Default::default()), + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -201,11 +200,7 @@ fn all_hybrid() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::None); - options - }), + .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -246,11 +241,10 @@ fn all_runtime() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::Maximal); - options - }), + .with_locales_and_fallback( + [LocaleFamily::FULL], + FallbackOptions::maximal_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -290,11 +284,10 @@ fn all_runtime_retain_base() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::FULL], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::RetainBaseLanguages); - options - }), + .with_locales_and_fallback( + [LocaleFamily::FULL], + FallbackOptions::retain_base_languages_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -343,7 +336,10 @@ fn explicit_preferred() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(families(SELECTED_LOCALES), Default::default()), + .with_locales_and_fallback( + families(SELECTED_LOCALES), + FallbackOptions::no_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -388,11 +384,10 @@ fn explicit_hybrid() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(families(SELECTED_LOCALES), { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::None); - options - }), + .with_locales_and_fallback( + families(SELECTED_LOCALES), + FallbackOptions::no_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -437,11 +432,10 @@ fn explicit_runtime() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(families(SELECTED_LOCALES), { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::Maximal); - options - }), + .with_locales_and_fallback( + families(SELECTED_LOCALES), + FallbackOptions::maximal_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -485,11 +479,10 @@ fn explicit_runtime_retain_base() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(families(SELECTED_LOCALES), { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::RetainBaseLanguages); - options - }), + .with_locales_and_fallback( + families(SELECTED_LOCALES), + FallbackOptions::retain_base_languages_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -568,7 +561,7 @@ fn explicit_hybrid_without_descendants() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(SELECTED_LOCALES, Default::default()), + .with_locales_and_fallback(SELECTED_LOCALES, FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -613,7 +606,7 @@ fn explicit_hybrid_without_ancestors() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(SELECTED_LOCALES, Default::default()), + .with_locales_and_fallback(SELECTED_LOCALES, FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -659,7 +652,7 @@ fn explicit_hybrid_mixed_families() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback(SELECTED_LOCALES, Default::default()), + .with_locales_and_fallback(SELECTED_LOCALES, FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -694,11 +687,10 @@ fn explicit_runtime_und() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::with_descendants(langid!("und"))], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::Maximal); - options - }), + .with_locales_and_fallback( + [LocaleFamily::with_descendants(langid!("und"))], + FallbackOptions::maximal_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -714,11 +706,10 @@ fn explicit_runtime_und_retain_base() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::with_descendants(langid!("und"))], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::RetainBaseLanguages); - options - }), + .with_locales_and_fallback( + [LocaleFamily::with_descendants(langid!("und"))], + FallbackOptions::retain_base_languages_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -734,11 +725,10 @@ fn explicit_hybrid_und() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([LocaleFamily::with_descendants(langid!("und"))], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::None); - options - }), + .with_locales_and_fallback( + [LocaleFamily::with_descendants(langid!("und"))], + FallbackOptions::no_deduplication(), + ), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -770,11 +760,7 @@ fn explicit_runtime_empty() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::Maximal); - options - }), + .with_locales_and_fallback([], FallbackOptions::maximal_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -790,11 +776,7 @@ fn explicit_runtime_empty_retain_base() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::RetainBaseLanguages); - options - }), + .with_locales_and_fallback([], FallbackOptions::retain_base_languages_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); @@ -810,11 +792,7 @@ fn explicit_hybrid_empty() { let exported = export_to_map( DatagenDriver::new() .with_markers([HelloWorldV1Marker::INFO]) - .with_locales_and_fallback([], { - let mut options = FallbackOptions::default(); - options.deduplication_strategy = Some(DeduplicationStrategy::None); - options - }), + .with_locales_and_fallback([], FallbackOptions::no_deduplication()), &TestingProvider::with_decimal_symbol_like_data(), ); diff --git a/provider/fs/src/export/mod.rs b/provider/fs/src/export/mod.rs index abeb8658097..d880d680368 100644 --- a/provider/fs/src/export/mod.rs +++ b/provider/fs/src/export/mod.rs @@ -28,7 +28,8 @@ //! // Export something //! DatagenDriver::new() //! .with_markers([HelloWorldV1Marker::INFO]) -//! .with_locales_and_fallback([LocaleFamily::FULL], Default::default()) +//! // HelloWorldProvider cannot provide fallback data, so we cannot deduplicate +//! .with_locales_and_fallback([LocaleFamily::FULL], FallbackOptions::no_deduplication()) //! .export(&HelloWorldProvider, exporter) //! .unwrap(); //! # diff --git a/provider/icu4x-datagen/src/main.rs b/provider/icu4x-datagen/src/main.rs index 45d74d97ca5..f8a41ec02b0 100644 --- a/provider/icu4x-datagen/src/main.rs +++ b/provider/icu4x-datagen/src/main.rs @@ -60,12 +60,6 @@ struct Cli { #[arg(help = "--format=mod, --format=dir only: pretty-print the Rust or JSON output files.")] pretty: bool, - #[arg(long, hide = true)] - #[arg( - help = "--format=dir only: whether to add a fingerprints file to the output. This feature will be removed in a future version." - )] - fingerprint: bool, - #[arg(short = 't', long, value_name = "TAG", default_value = "latest")] #[arg( help = "Download CLDR JSON data from this GitHub tag (https://github.com/unicode-org/cldr-json/tags)\n\ @@ -176,6 +170,10 @@ struct Cli { )] use_separate_crates: bool, + #[arg(long)] + #[arg(help = "--format=mod only: don't include fallback code inside the baked provider")] + no_internal_fallback: bool, + #[arg(long)] #[arg( help = "disables locale fallback, instead exporting exactly the locales specified in --locales. \ @@ -183,18 +181,11 @@ struct Cli { )] without_fallback: bool, - #[arg(long, value_enum)] - #[arg(help = "configures where runtime fallback should take place in code. \ - If not set, determined by the exporter: \ - internal fallback is used if the exporter supports it. \ - Cannot be used with --without-fallback")] - runtime_fallback_location: Option, - #[arg(long, value_enum)] #[arg( help = "configures the deduplication of locales for exported data payloads. \ - If not set, determined by `runtime_fallback_location`: \ - if internal fallback is enabled, a more aggressive deduplication strategy is used. \ + If not set, determined by the export format: \ + if --format=mod, a more aggressive deduplication strategy is used. \ Cannot be used with --without-fallback" )] deduplication: Option, @@ -280,13 +271,6 @@ enum Deduplication { None, } -// Mirrors crate::RuntimeFallbackLocation -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)] -enum RuntimeFallbackLocation { - Internal, - External, -} - fn main() -> eyre::Result<()> { let cli = Cli::parse(); @@ -476,23 +460,22 @@ fn main() -> eyre::Result<()> { .map(|family_str| family_str.parse().wrap_err(family_str)) .collect::>>()?, }; - let mut options = FallbackOptions::default(); - options.deduplication_strategy = match cli.deduplication { - Some(Deduplication::Maximal) => Some(icu_datagen::DeduplicationStrategy::Maximal), - Some(Deduplication::RetainBaseLanguages) => { - Some(icu_datagen::DeduplicationStrategy::RetainBaseLanguages) - } - Some(Deduplication::None) | None => Some(icu_datagen::DeduplicationStrategy::None), - }; - options.runtime_fallback_location = match cli.runtime_fallback_location { - Some(RuntimeFallbackLocation::Internal) => { - Some(icu_datagen::RuntimeFallbackLocation::Internal) - } - Some(RuntimeFallbackLocation::External) => { - Some(icu_datagen::RuntimeFallbackLocation::External) - } - None => Some(icu_datagen::RuntimeFallbackLocation::External), + let mut options = match cli.format { + Format::Dir | Format::Blob | Format::Blob2 => FallbackOptions::no_deduplication(), + Format::Mod if cli.no_internal_fallback && cli.deduplication.is_none() => + eyre::bail!("--no-internal-fallback requires an explicit --deduplication value. Baked exporter would default to maximal deduplication, which might not be intended"), + // TODO(2.0): Default to RetainBaseLanguages here + Format::Mod => FallbackOptions::maximal_deduplication(), }; + if let Some(deduplication) = cli.deduplication { + options.deduplication_strategy = match deduplication { + Deduplication::Maximal => icu_datagen::DeduplicationStrategy::Maximal, + Deduplication::RetainBaseLanguages => { + icu_datagen::DeduplicationStrategy::RetainBaseLanguages + } + Deduplication::None => icu_datagen::DeduplicationStrategy::None, + }; + } driver = driver.with_locales_and_fallback(locale_families, options); } driver = driver.with_additional_collations( @@ -580,6 +563,7 @@ fn main() -> eyre::Result<()> { { let mut options = icu_datagen::baked_exporter::Options::default(); options.pretty = cli.pretty; + options.use_internal_fallback = !cli.no_internal_fallback; options.use_separate_crates = cli.use_separate_crates; options.overwrite = cli.overwrite; options diff --git a/tools/bakeddata-scripts/src/main.rs b/tools/bakeddata-scripts/src/main.rs index 83c605c5a09..15c6df03ffc 100644 --- a/tools/bakeddata-scripts/src/main.rs +++ b/tools/bakeddata-scripts/src/main.rs @@ -88,7 +88,7 @@ fn main() { .unwrap() .into_iter() .map(LocaleFamily::with_descendants), - Default::default(), + FallbackOptions::maximal_deduplication(), ) .with_recommended_segmenter_models(); @@ -235,14 +235,6 @@ impl DataExporter for PostcardFingerprintExporter { Ok(()) } - fn flush_with_built_in_fallback( - &self, - _marker: DataMarkerInfo, - _fallback_mode: BuiltInFallbackMode, - ) -> Result<(), DataError> { - Ok(()) - } - fn close(&mut self) -> Result<(), DataError> { let mut seen = std::collections::HashMap::new(); for ((marker, req), (size, hash)) in self.size_hash.get_mut().expect("poison").iter() { @@ -258,7 +250,4 @@ impl DataExporter for PostcardFingerprintExporter { } Ok(()) } - fn supports_built_in_fallback(&self) -> bool { - true - } } diff --git a/tools/make/data.toml b/tools/make/data.toml index 81f33470bae..5442c5ddcb4 100644 --- a/tools/make/data.toml +++ b/tools/make/data.toml @@ -46,7 +46,7 @@ exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" -- exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" --locales full --format blob --overwrite --out provider/blob/tests/data/v1.postcard exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" --locales full --format blob2 --overwrite --out provider/blob/tests/data/v2.postcard -exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" --locales full --format mod --pretty --overwrite --out provider/baked/tests/data +exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" --locales full --format mod --pretty --overwrite --no-internal-fallback --deduplication none --out provider/baked/tests/data exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" "fallback/likelysubtags@1" "fallback/parents@1" "fallback/supplement/co@1" --locales full --format blob --out provider/adapters/tests/data/blob.postcard --overwrite exec --fail-on-error target/debug/icu4x-datagen --markers "core/helloworld@1" --without-fallback --locales de --format dir --syntax json --out provider/adapters/tests/data/langtest/de --overwrite @@ -65,7 +65,7 @@ script_runner = "@duckscript" script = ''' exit_on_error true -output = exec git status --porcelain=v1 provider/datagen/tests/data provider/fs/tests/data provider/blob/tests/data provider/adapters/tests/data provider/datagen/src/tests +output = exec git status --porcelain=v1 provider/datagen/tests/data provider/fs/tests/data provider/blob/tests/data provider/baked/tests/data provider/adapters/tests/data provider/datagen/src/tests output_length = length ${output.stdout} if greater_than ${output_length} 0 msg = array "" "" diff --git a/tutorials/rust/baked/build.rs b/tutorials/rust/baked/build.rs index 11fbf8bcc82..edf03f29f08 100644 --- a/tutorials/rust/baked/build.rs +++ b/tutorials/rust/baked/build.rs @@ -12,11 +12,7 @@ fn main() { let mod_directory = PathBuf::from(std::env::var_os("OUT_DIR").unwrap()).join("baked_data"); DatagenDriver::new() - .with_locales_and_fallback([LocaleFamily::single(langid!("ru"))], { - let mut options = FallbackOptions::default(); - options.runtime_fallback_location = Some(RuntimeFallbackLocation::External); - options - }) + .with_locales_and_fallback([LocaleFamily::single(langid!("ru"))], FallbackOptions::no_deduplication()) // These are the markers required by `PluralRules::try_new_cardinal_unstable`. Compilation will // discard unused markers and fail if required markers are not generated, but explicitly listing the // markers will speed up the datagen. @@ -26,6 +22,7 @@ fn main() { BakedExporter::new(mod_directory, { let mut options = Options::default(); options.overwrite = true; + options.use_internal_fallback = false; options }) .unwrap(),