icu_locale_core/extensions/transform/mod.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Transform Extensions provide information on content transformations in a given locale.
6//!
7//! The main struct for this extension is [`Transform`] which contains [`Fields`] and an
8//! optional [`LanguageIdentifier`].
9//!
10//! [`LanguageIdentifier`]: super::super::LanguageIdentifier
11//!
12//! # Examples
13//!
14//! ```
15//! use icu::locale::extensions::transform::{Fields, Key, Transform, Value};
16//! use icu::locale::{LanguageIdentifier, Locale};
17//!
18//! let mut loc: Locale =
19//! "en-US-t-es-ar-h0-hybrid".parse().expect("Parsing failed.");
20//!
21//! let lang: LanguageIdentifier =
22//! "es-AR".parse().expect("Parsing LanguageIdentifier failed.");
23//!
24//! let key: Key = "h0".parse().expect("Parsing key failed.");
25//! let value: Value = "hybrid".parse().expect("Parsing value failed.");
26//!
27//! assert_eq!(loc.extensions.transform.lang, Some(lang));
28//! assert!(loc.extensions.transform.fields.contains_key(&key));
29//! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
30//!
31//! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid");
32//! ```
33mod fields;
34mod key;
35mod value;
36
37use core::cmp::Ordering;
38#[cfg(feature = "alloc")]
39use core::str::FromStr;
40
41pub use fields::Fields;
42#[doc(inline)]
43pub use key::{key, Key};
44pub use value::Value;
45
46#[cfg(feature = "alloc")]
47use super::ExtensionType;
48#[cfg(feature = "alloc")]
49use crate::parser::SubtagIterator;
50#[cfg(feature = "alloc")]
51use crate::parser::{parse_language_identifier_from_iter, ParseError, ParserMode};
52#[cfg(feature = "alloc")]
53use crate::shortvec::ShortBoxSlice;
54use crate::subtags;
55use crate::LanguageIdentifier;
56#[cfg(feature = "alloc")]
57use litemap::LiteMap;
58
59pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
60pub(crate) const TRANSFORM_EXT_STR: &str = "t";
61
62/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
63/// Identifier`] specification.
64///
65/// Transform extension carries information about source language or script of
66/// transformed content, including content that has been transliterated, transcribed,
67/// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details).
68///
69/// # Examples
70///
71/// ```
72/// use icu::locale::extensions::transform::{Key, Value};
73/// use icu::locale::{LanguageIdentifier, Locale};
74///
75/// let mut loc: Locale =
76/// "de-t-en-us-h0-hybrid".parse().expect("Parsing failed.");
77///
78/// let en_us: LanguageIdentifier = "en-US".parse().expect("Parsing failed.");
79///
80/// assert_eq!(loc.extensions.transform.lang, Some(en_us));
81/// let key: Key = "h0".parse().expect("Parsing key failed.");
82/// let value: Value = "hybrid".parse().expect("Parsing value failed.");
83/// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value));
84/// ```
85/// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension
86/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
87/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
88#[derive(Clone, PartialEq, Eq, Debug, Default, Hash)]
89#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
90pub struct Transform {
91 /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
92 pub lang: Option<LanguageIdentifier>,
93 /// The key-value pairs present in this locale extension, with each extension key subtag
94 /// associated to its provided value subtag.
95 pub fields: Fields,
96}
97
98impl Transform {
99 /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`.
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use icu::locale::extensions::transform::Transform;
105 ///
106 /// assert_eq!(Transform::new(), Transform::default());
107 /// ```
108 #[inline]
109 pub const fn new() -> Self {
110 Self {
111 lang: None,
112 fields: Fields::new(),
113 }
114 }
115
116 /// A constructor which takes a str slice, parses it and
117 /// produces a well-formed [`Transform`].
118 ///
119 /// ✨ *Enabled with the `alloc` Cargo feature.*
120 #[inline]
121 #[cfg(feature = "alloc")]
122 pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
123 Self::try_from_utf8(s.as_bytes())
124 }
125
126 /// See [`Self::try_from_str`]
127 ///
128 /// ✨ *Enabled with the `alloc` Cargo feature.*
129 #[cfg(feature = "alloc")]
130 pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
131 let mut iter = SubtagIterator::new(code_units);
132
133 let ext = iter.next().ok_or(ParseError::InvalidExtension)?;
134 if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
135 return Self::try_from_iter(&mut iter);
136 }
137
138 Err(ParseError::InvalidExtension)
139 }
140
141 /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`.
142 ///
143 /// # Examples
144 ///
145 /// ```
146 /// use icu::locale::Locale;
147 ///
148 /// let mut loc: Locale = "en-US-t-es-ar".parse().expect("Parsing failed.");
149 ///
150 /// assert!(!loc.extensions.transform.is_empty());
151 /// ```
152 pub fn is_empty(&self) -> bool {
153 self.lang.is_none() && self.fields.is_empty()
154 }
155
156 /// Clears the transform extension, effectively removing it from the locale.
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use icu::locale::Locale;
162 ///
163 /// let mut loc: Locale = "en-US-t-es-ar".parse().unwrap();
164 /// loc.extensions.transform.clear();
165 /// assert_eq!(loc, "en-US".parse().unwrap());
166 /// ```
167 pub fn clear(&mut self) {
168 self.lang = None;
169 self.fields.clear();
170 }
171
172 #[expect(clippy::type_complexity)]
173 pub(crate) fn as_tuple(
174 &self,
175 ) -> (
176 Option<(
177 subtags::Language,
178 Option<subtags::Script>,
179 Option<subtags::Region>,
180 &subtags::Variants,
181 )>,
182 &Fields,
183 ) {
184 (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields)
185 }
186
187 /// Returns an ordering suitable for use in [`BTreeSet`].
188 ///
189 /// The ordering may or may not be equivalent to string ordering, and it
190 /// may or may not be stable across ICU4X releases.
191 ///
192 /// [`BTreeSet`]: alloc::collections::BTreeSet
193 pub fn total_cmp(&self, other: &Self) -> Ordering {
194 self.as_tuple().cmp(&other.as_tuple())
195 }
196
197 #[cfg(feature = "alloc")]
198 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
199 let mut tlang = None;
200 let mut tfields = LiteMap::new();
201
202 if let Some(subtag) = iter.peek() {
203 if subtags::Language::try_from_utf8(subtag).is_ok() {
204 tlang = Some(parse_language_identifier_from_iter(
205 iter,
206 ParserMode::Partial,
207 )?);
208 }
209 }
210
211 let mut current_tkey = None;
212 let mut current_tvalue = ShortBoxSlice::new();
213 let mut has_current_tvalue = false;
214
215 while let Some(subtag) = iter.peek() {
216 if let Some(tkey) = current_tkey {
217 if let Ok(val) = Value::parse_subtag(subtag) {
218 has_current_tvalue = true;
219 if let Some(val) = val {
220 current_tvalue.push(val);
221 }
222 } else {
223 if !has_current_tvalue {
224 return Err(ParseError::InvalidExtension);
225 }
226 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
227 current_tkey = None;
228 current_tvalue = ShortBoxSlice::new();
229 has_current_tvalue = false;
230 continue;
231 }
232 } else if let Ok(tkey) = Key::try_from_utf8(subtag) {
233 current_tkey = Some(tkey);
234 } else {
235 break;
236 }
237
238 iter.next();
239 }
240
241 if let Some(tkey) = current_tkey {
242 if !has_current_tvalue {
243 return Err(ParseError::InvalidExtension);
244 }
245 tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue));
246 }
247
248 if tlang.is_none() && tfields.is_empty() {
249 Err(ParseError::InvalidExtension)
250 } else {
251 Ok(Self {
252 lang: tlang,
253 fields: tfields.into(),
254 })
255 }
256 }
257
258 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
259 where
260 F: FnMut(&str) -> Result<(), E>,
261 {
262 if self.is_empty() {
263 return Ok(());
264 }
265 if with_ext {
266 f(TRANSFORM_EXT_STR)?;
267 }
268 if let Some(lang) = &self.lang {
269 lang.for_each_subtag_str_lowercased(f)?;
270 }
271 self.fields.for_each_subtag_str(f)
272 }
273}
274
275/// ✨ *Enabled with the `alloc` Cargo feature.*
276#[cfg(feature = "alloc")]
277impl FromStr for Transform {
278 type Err = ParseError;
279
280 #[inline]
281 fn from_str(s: &str) -> Result<Self, Self::Err> {
282 Self::try_from_str(s)
283 }
284}
285
286writeable::impl_display_with_writeable!(Transform, #[cfg(feature = "alloc")]);
287
288impl writeable::Writeable for Transform {
289 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
290 if self.is_empty() {
291 return Ok(());
292 }
293 sink.write_char(TRANSFORM_EXT_CHAR)?;
294 if let Some(lang) = &self.lang {
295 sink.write_char('-')?;
296 lang.write_lowercased_to(sink)?;
297 }
298 if !self.fields.is_empty() {
299 sink.write_char('-')?;
300 writeable::Writeable::write_to(&self.fields, sink)?;
301 }
302 Ok(())
303 }
304
305 fn writeable_length_hint(&self) -> writeable::LengthHint {
306 if self.is_empty() {
307 return writeable::LengthHint::exact(0);
308 }
309 let mut result = writeable::LengthHint::exact(1);
310 if let Some(lang) = &self.lang {
311 result += writeable::Writeable::writeable_length_hint(lang) + 1;
312 }
313 if !self.fields.is_empty() {
314 result += writeable::Writeable::writeable_length_hint(&self.fields) + 1;
315 }
316 result
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 #[test]
325 fn test_transform_extension_fromstr() {
326 let te: Transform = "t-en-us-h0-hybrid"
327 .parse()
328 .expect("Failed to parse Transform");
329 assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
330
331 let te: Result<Transform, _> = "t".parse();
332 assert!(te.is_err());
333 }
334}