icu_properties/emoji.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::*;
6use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
7use icu_provider::marker::ErasedMarker;
8use icu_provider::prelude::*;
9
10/// A wrapper around `UnicodeSet` data (characters and strings)
11#[derive(Debug)]
12pub struct EmojiSetData {
13 data: DataPayload<ErasedMarker<PropertyUnicodeSet<'static>>>,
14}
15
16impl EmojiSetData {
17 /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
18 ///
19 /// See the documentation on [`EmojiSet`] implementations for details.
20 ///
21 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
22 ///
23 /// [📚 Help choosing a constructor](icu_provider::constructors)
24 #[cfg(feature = "compiled_data")]
25 #[expect(clippy::new_ret_no_self)]
26 pub const fn new<P: EmojiSet>() -> EmojiSetDataBorrowed<'static> {
27 EmojiSetDataBorrowed::new::<P>()
28 }
29
30 #[cfg(feature = "serde")]
31 #[doc = icu_provider::gen_buffer_unstable_docs!(BUFFER, Self::new)]
32 pub fn try_new_with_buffer_provider<P: EmojiSet>(
33 provider: &(impl BufferProvider + ?Sized),
34 ) -> Result<EmojiSetData, DataError> {
35 use icu_provider::buf::AsDeserializingBufferProvider;
36 Self::try_new_unstable::<P>(&provider.as_deserializing())
37 }
38
39 /// A version of `new()` that uses custom data provided by a [`DataProvider`].
40 ///
41 /// Note that this will return an owned version of the data. Functionality is available on
42 /// the borrowed version, accessible through [`EmojiSetData::as_borrowed`].
43 pub fn try_new_unstable<P: EmojiSet>(
44 provider: &(impl DataProvider<P::DataMarker> + ?Sized),
45 ) -> Result<EmojiSetData, DataError> {
46 Ok(EmojiSetData::from_data(
47 provider.load(Default::default())?.payload,
48 ))
49 }
50
51 /// Construct a borrowed version of this type that can be queried.
52 ///
53 /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
54 /// up front.
55 #[inline]
56 pub fn as_borrowed(&self) -> EmojiSetDataBorrowed<'_> {
57 EmojiSetDataBorrowed {
58 set: self.data.get(),
59 }
60 }
61
62 /// Construct a new one from loaded data
63 ///
64 /// Typically it is preferable to use getters instead
65 pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self
66 where
67 M: DynamicDataMarker<DataStruct = PropertyUnicodeSet<'static>>,
68 {
69 Self { data: data.cast() }
70 }
71
72 /// Construct a new owned [`CodePointInversionListAndStringList`]
73 pub fn from_code_point_inversion_list_string_list(
74 set: CodePointInversionListAndStringList<'static>,
75 ) -> Self {
76 let set = PropertyUnicodeSet::from_code_point_inversion_list_string_list(set);
77 EmojiSetData::from_data(
78 DataPayload::<ErasedMarker<PropertyUnicodeSet<'static>>>::from_owned(set),
79 )
80 }
81
82 /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
83 ///
84 /// The data backing this is extensible and supports multiple implementations.
85 /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
86 /// added, and users may select which at data generation time.
87 ///
88 /// This method returns an `Option` in order to return `None` when the backing data provider
89 /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
90 /// constraint.
91 pub fn as_code_point_inversion_list_string_list(
92 &self,
93 ) -> Option<&CodePointInversionListAndStringList<'_>> {
94 self.data.get().as_code_point_inversion_list_string_list()
95 }
96
97 /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
98 /// otherwise allocating a new [`CodePointInversionListAndStringList`].
99 ///
100 /// The data backing this is extensible and supports multiple implementations.
101 /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
102 /// added, and users may select which at data generation time.
103 ///
104 /// The performance of the conversion to this specific return type will vary
105 /// depending on the data structure that is backing `self`.
106 pub fn to_code_point_inversion_list_string_list(
107 &self,
108 ) -> CodePointInversionListAndStringList<'_> {
109 self.data.get().to_code_point_inversion_list_string_list()
110 }
111}
112
113/// A borrowed wrapper around code point set data, returned by
114/// [`EmojiSetData::as_borrowed()`]. More efficient to query.
115#[derive(Clone, Copy, Debug)]
116pub struct EmojiSetDataBorrowed<'a> {
117 set: &'a PropertyUnicodeSet<'a>,
118}
119
120impl EmojiSetDataBorrowed<'_> {
121 /// Check if the set contains the string. Strings consisting of one character
122 /// are treated as a character/code point.
123 ///
124 /// This matches ICU behavior for ICU's `UnicodeSet`.
125 #[inline]
126 pub fn contains_str(self, s: &str) -> bool {
127 self.set.contains_str(s)
128 }
129
130 /// See [`Self::contains_str`].
131 #[inline]
132 pub fn contains_utf8(self, s: &[u8]) -> bool {
133 self.set.contains_utf8(s)
134 }
135
136 /// Check if the set contains the code point.
137 #[inline]
138 pub fn contains(self, ch: char) -> bool {
139 self.set.contains(ch)
140 }
141
142 /// See [`Self::contains`].
143 #[inline]
144 pub fn contains32(self, cp: u32) -> bool {
145 self.set.contains32(cp)
146 }
147}
148
149impl EmojiSetDataBorrowed<'static> {
150 /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
151 ///
152 /// See the documentation on [`EmojiSet`] implementations for details.
153 ///
154 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
155 ///
156 /// [📚 Help choosing a constructor](icu_provider::constructors)
157 #[inline]
158 #[cfg(feature = "compiled_data")]
159 pub const fn new<P: EmojiSet>() -> Self {
160 EmojiSetDataBorrowed { set: P::SINGLETON }
161 }
162
163 /// Cheaply converts a [`EmojiSetDataBorrowed<'static>`] into a [`EmojiSetData`].
164 ///
165 /// Note: Due to branching and indirection, using [`EmojiSetData`] might inhibit some
166 /// compile-time optimizations that are possible with [`EmojiSetDataBorrowed`].
167 pub const fn static_to_owned(self) -> EmojiSetData {
168 EmojiSetData {
169 data: DataPayload::from_static_ref(self.set),
170 }
171 }
172}
173
174/// An Emoji set as defined by [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/#Emoji_Sets>).
175///
176/// <div class="stab unstable">
177/// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this
178/// trait, please consider using a type from the implementors listed below.
179/// </div>
180pub trait EmojiSet: crate::private::Sealed + Sized {
181 #[doc(hidden)]
182 type DataMarker: DataMarker<DataStruct = PropertyUnicodeSet<'static>>;
183 #[doc(hidden)]
184 #[cfg(feature = "compiled_data")]
185 const SINGLETON: &'static PropertyUnicodeSet<'static>;
186 /// The name of this property
187 const NAME: &'static [u8];
188 /// The abbreviated name of this property, if it exists, otherwise the name
189 const SHORT_NAME: &'static [u8];
190
191 /// Convenience method for `EmojiSetData::new().contains(ch)`
192 ///
193 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
194 #[cfg(feature = "compiled_data")]
195 fn for_char(ch: char) -> bool {
196 EmojiSetData::new::<Self>().contains(ch)
197 }
198
199 /// Convenience method for `EmojiSetData::new().contains_str(s)`
200 ///
201 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
202 #[cfg(feature = "compiled_data")]
203 fn for_str(s: &str) -> bool {
204 EmojiSetData::new::<Self>().contains_str(s)
205 }
206}