Skip to content

Commit

Permalink
system/locale: Add language lookups for ISO 639 (-2, -3)
Browse files Browse the repository at this point in the history
Signed-off-by: Ikey Doherty <[email protected]>
  • Loading branch information
ikeycode committed Jun 15, 2024
1 parent e605f22 commit a740fde
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 15 deletions.
37 changes: 24 additions & 13 deletions src/system/locale/iso_639.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,34 @@ use serde::Deserialize;
#[derive(Deserialize)]
pub struct DocumentTwoCode<'a> {
#[serde(rename = "639-2", borrow)]
entries: Vec<EntryTwoCode<'a>>,
pub entries: Vec<EntryTwoCode<'a>>,
}

/// A two-letter code entry
#[derive(Deserialize)]
pub struct EntryTwoCode<'a> {
#[serde(rename = "alpha_2", borrow)]
code2: &'a str,
pub code2: Option<&'a str>,

#[serde(rename = "alpha_3", borrow)]
code3: Option<&'a str>,
pub code3: &'a str,

/// Official display name
#[serde(borrow)]
name: &'a str,
pub name: &'a str,

/// Common name (optional)
#[serde(borrow)]
pub common_name: Option<&'a str>,

/// Three letter bibliographic
pub bibliographic: Option<&'a str>,
}

#[derive(Deserialize)]
pub struct DocumentThreeCode<'a> {
#[serde(rename = "639-3", borrow)]
entries: Vec<EntryThreeCode<'a>>,
pub entries: Vec<EntryThreeCode<'a>>,
}

/// Language scope
Expand Down Expand Up @@ -68,29 +75,33 @@ pub enum Kind {
pub struct EntryThreeCode<'a> {
/// Three letter code
#[serde(rename = "alpha_3", borrow)]
code: &'a str,
pub code: &'a str,

/// Sometimes a 2 letter code is present
#[serde(rename = "alpha_2", borrow)]
code2: Option<&'a str>,
pub code2: Option<&'a str>,

/// Official name
#[serde(borrow)]
name: &'a str,
pub name: &'a str,

/// Inverted name
#[serde(borrow)]
inverted_name: Option<&'a str>,
pub inverted_name: Option<&'a str>,

/// Scope of the language
scope: Scope,
pub scope: Scope,

/// Type of language
#[serde(rename = "type")]
kind: Kind,
pub kind: Kind,

/// Three letter bibliographic
bibliographic: Option<&'a str>,
pub bibliographic: Option<&'a str>,

/// Common name (optional)
#[serde(borrow)]
pub common_name: Option<&'a str>,
}

#[cfg(test)]
Expand Down Expand Up @@ -126,7 +137,7 @@ mod tests {
let ga = loaded
.entries
.iter()
.find(|i| i.code2 == "ga")
.find(|i| i.code3 == "gle")
.expect("Failed to find GLE");
assert_eq!(ga.name, "Irish");
}
Expand Down
94 changes: 92 additions & 2 deletions src/system/locale/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use std::{collections::HashMap, fs};

use super::{iso_3166, Error};
use super::{iso_3166, iso_639, Error};

/// All ISO codes are expected to live in this location
const ISO_CODES_BASE: &str = "/usr/share/iso-codes/json";
Expand All @@ -15,6 +15,8 @@ const ISO_CODES_BASE: &str = "/usr/share/iso-codes/json";
pub struct Registry {
places: Vec<Territory>,
places_lookup: HashMap<String, usize>,
languages: Vec<Language>,
languages_lookup: HashMap<String, usize>,
}

/// Sane representation for UI purposes
Expand Down Expand Up @@ -46,6 +48,43 @@ impl From<&iso_3166::Entry<'_>> for Territory {
}
}

/// Simplistic language representation
#[derive(PartialEq, Eq, Debug)]
pub struct Language {
pub code: String,
pub code2: Option<String>,
pub display_name: String,
pub inverted_name: Option<String>,
}

impl From<&iso_639::EntryTwoCode<'_>> for Language {
/// Convert iso entry into Language
fn from(value: &iso_639::EntryTwoCode<'_>) -> Self {
Self {
code: value.code3.into(),
code2: value.code2.map(|v| v.into()),
display_name: value.name.into(),
inverted_name: None,
}
}
}

impl From<&iso_639::EntryThreeCode<'_>> for Language {
fn from(value: &iso_639::EntryThreeCode<'_>) -> Self {
let display = if let Some(name) = value.common_name {
name.into()
} else {
value.name.into()
};
Self {
code: value.code.into(),
code2: value.code2.map(|v| v.into()),
display_name: display,
inverted_name: value.inverted_name.map(|v| v.into()),
}
}
}

impl Registry {
/// Create a new locale registry from the system iso-code JSON definitions
pub fn new() -> Result<Self, Error> {
Expand All @@ -56,9 +95,22 @@ impl Registry {
places_lookup.insert(item.code.to_lowercase(), index);
}

// Convert all languages into usable ones with mapping
let mut languages = Self::load_languages_2()?;
languages.extend(Self::load_languages_3()?);
let mut languages_lookup = HashMap::new();
for (index, language) in languages.iter().enumerate() {
if let Some(code2) = language.code2.as_ref() {
languages_lookup.insert(code2.to_lowercase(), index);
}
languages_lookup.insert(language.code.to_lowercase(), index);
}

Ok(Self {
places,
places_lookup,
languages,
languages_lookup,
})
}

Expand All @@ -72,6 +124,24 @@ impl Registry {
Ok(parser.entries.iter().map(|e| e.into()).collect::<Vec<_>>())
}

/// Load the 2 DB
fn load_languages_2() -> Result<Vec<Language>, Error> {
let languages = format!("{}/iso_639-2.json", ISO_CODES_BASE);
let contents = fs::read_to_string(languages)?;
let parser = serde_json::from_str::<iso_639::DocumentTwoCode>(&contents)?;

Ok(parser.entries.iter().map(|e| e.into()).collect::<Vec<_>>())
}

/// Load the 3 DB
fn load_languages_3() -> Result<Vec<Language>, Error> {
let languages = format!("{}/iso_639-3.json", ISO_CODES_BASE);
let contents = fs::read_to_string(languages)?;
let parser = serde_json::from_str::<iso_639::DocumentThreeCode>(&contents)?;

Ok(parser.entries.iter().map(|e| e.into()).collect::<Vec<_>>())
}

/// Retrieve the territory for the given (lower-case) code
pub fn territory(&self, id: impl AsRef<str>) -> Option<&Territory> {
if let Some(idx) = self.places_lookup.get(id.as_ref()) {
Expand All @@ -80,14 +150,23 @@ impl Registry {
None
}
}

/// Retrieve the language for the given (lower-case) code
pub fn language(&self, id: impl AsRef<str>) -> Option<&Language> {
if let Some(idx) = self.languages_lookup.get(id.as_ref()) {
self.languages.get(*idx)
} else {
None
}
}
}

#[cfg(test)]
mod tests {
use super::Registry;

#[test]
fn test_new() {
fn test_territory() {
let r = Registry::new().expect("Failed to initialise registry");
let ie = r.territory("ie").expect("Cannot find Ireland by ie");
let irl = r.territory("irl").expect("Cannot find Ireland by irl");
Expand All @@ -97,4 +176,15 @@ mod tests {
let dk = r.territory("dk").expect("Cannot find Denmark by dk");
assert_eq!(dk.display_name, "Kingdom of Denmark");
}

#[test]
fn test_language() {
let r = Registry::new().expect("Failed to initialise registry");
let en = r.language("en").expect("Cannot find English by en");
assert_eq!(en.display_name, "English");

let dan = r.language("dan").expect("Cannot find Danish by dan");
let dn = r.language("da").expect("Cannot find Danish by dn");
assert_eq!(dan, dn);
}
}

0 comments on commit a740fde

Please sign in to comment.