compact_str/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![no_std]
4
5#[cfg(feature = "std")]
6#[macro_use]
7extern crate std;
8
9#[cfg_attr(test, macro_use)]
10extern crate alloc;
11
12use alloc::borrow::Cow;
13use alloc::boxed::Box;
14use alloc::string::String;
15#[doc(hidden)]
16pub use core;
17use core::borrow::{
18    Borrow,
19    BorrowMut,
20};
21use core::cmp::Ordering;
22use core::hash::{
23    Hash,
24    Hasher,
25};
26use core::iter::FusedIterator;
27use core::ops::{
28    Add,
29    AddAssign,
30    Bound,
31    Deref,
32    DerefMut,
33    RangeBounds,
34};
35use core::str::{
36    FromStr,
37    Utf8Error,
38};
39use core::{
40    fmt,
41    mem,
42    slice,
43};
44#[cfg(feature = "std")]
45use std::ffi::OsStr;
46
47mod features;
48mod macros;
49mod unicode_data;
50
51mod repr;
52use repr::Repr;
53
54mod traits;
55pub use traits::{
56    CompactStringExt,
57    ToCompactString,
58};
59
60#[cfg(test)]
61mod tests;
62
63/// A [`CompactString`] is a compact string type that can be used almost anywhere a
64/// [`String`] or [`str`] can be used.
65///
66/// ## Using `CompactString`
67/// ```
68/// use compact_str::CompactString;
69/// # use std::collections::HashMap;
70///
71/// // CompactString auto derefs into a str so you can use all methods from `str`
72/// // that take a `&self`
73/// if CompactString::new("hello world!").is_ascii() {
74///     println!("we're all ASCII")
75/// }
76///
77/// // You can use a CompactString in collections like you would a String or &str
78/// let mut map: HashMap<CompactString, CompactString> = HashMap::new();
79///
80/// // directly construct a new `CompactString`
81/// map.insert(CompactString::new("nyc"), CompactString::new("empire state building"));
82/// // create a `CompactString` from a `&str`
83/// map.insert("sf".into(), "transamerica pyramid".into());
84/// // create a `CompactString` from a `String`
85/// map.insert(String::from("sea").into(), String::from("space needle").into());
86///
87/// fn wrapped_print<T: AsRef<str>>(text: T) {
88///     println!("{}", text.as_ref());
89/// }
90///
91/// // CompactString impls AsRef<str> and Borrow<str>, so it can be used anywhere
92/// // that expects a generic string
93/// if let Some(building) = map.get("nyc") {
94///     wrapped_print(building);
95/// }
96///
97/// // CompactString can also be directly compared to a String or &str
98/// assert_eq!(CompactString::new("chicago"), "chicago");
99/// assert_eq!(CompactString::new("houston"), String::from("houston"));
100/// ```
101///
102/// # Converting from a `String`
103/// It's important that a `CompactString` interops well with `String`, so you can easily use both in
104/// your code base.
105///
106/// `CompactString` implements `From<String>` and operates in the following manner:
107/// - Eagerly inlines the string, possibly dropping excess capacity
108/// - Otherwise re-uses the same underlying buffer from `String`
109///
110/// ```
111/// use compact_str::CompactString;
112///
113/// // eagerly inlining
114/// let short = String::from("hello world");
115/// let short_c = CompactString::from(short);
116/// assert!(!short_c.is_heap_allocated());
117///
118/// // dropping excess capacity
119/// let mut excess = String::with_capacity(256);
120/// excess.push_str("abc");
121///
122/// let excess_c = CompactString::from(excess);
123/// assert!(!excess_c.is_heap_allocated());
124/// assert!(excess_c.capacity() < 256);
125///
126/// // re-using the same buffer
127/// let long = String::from("this is a longer string that will be heap allocated");
128///
129/// let long_ptr = long.as_ptr();
130/// let long_len = long.len();
131/// let long_cap = long.capacity();
132///
133/// let mut long_c = CompactString::from(long);
134/// assert!(long_c.is_heap_allocated());
135///
136/// let cpt_ptr = long_c.as_ptr();
137/// let cpt_len = long_c.len();
138/// let cpt_cap = long_c.capacity();
139///
140/// // the original String and the CompactString point to the same place in memory, buffer re-use!
141/// assert_eq!(cpt_ptr, long_ptr);
142/// assert_eq!(cpt_len, long_len);
143/// assert_eq!(cpt_cap, long_cap);
144/// ```
145///
146/// ### Prevent Eagerly Inlining
147/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which
148/// might not always be desirable if you're converting a very large amount of `String`s. If your
149/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API.
150#[repr(transparent)]
151pub struct CompactString(Repr);
152
153impl CompactString {
154    /// Creates a new [`CompactString`] from any type that implements `AsRef<str>`.
155    /// If the string is short enough, then it will be inlined on the stack!
156    ///
157    /// In a `static` or `const` context you can use the method [`CompactString::const_new()`].
158    ///
159    /// # Examples
160    ///
161    /// ### Inlined
162    /// ```
163    /// # use compact_str::CompactString;
164    /// // We can inline strings up to 12 characters long on 32-bit architectures...
165    /// #[cfg(target_pointer_width = "32")]
166    /// let s = "i'm 12 chars";
167    /// // ...and up to 24 characters on 64-bit architectures!
168    /// #[cfg(target_pointer_width = "64")]
169    /// let s = "i am 24 characters long!";
170    ///
171    /// let compact = CompactString::new(&s);
172    ///
173    /// assert_eq!(compact, s);
174    /// // we are not allocated on the heap!
175    /// assert!(!compact.is_heap_allocated());
176    /// ```
177    ///
178    /// ### Heap
179    /// ```
180    /// # use compact_str::CompactString;
181    /// // For longer strings though, we get allocated on the heap
182    /// let long = "I am a longer string that will be allocated on the heap";
183    /// let compact = CompactString::new(long);
184    ///
185    /// assert_eq!(compact, long);
186    /// // we are allocated on the heap!
187    /// assert!(compact.is_heap_allocated());
188    /// ```
189    ///
190    /// ### Creation
191    /// ```
192    /// use compact_str::CompactString;
193    ///
194    /// // Using a `&'static str`
195    /// let s = "hello world!";
196    /// let hello = CompactString::new(&s);
197    ///
198    /// // Using a `String`
199    /// let u = String::from("๐Ÿฆ„๐ŸŒˆ");
200    /// let unicorn = CompactString::new(u);
201    ///
202    /// // Using a `Box<str>`
203    /// let b: Box<str> = String::from("๐Ÿ“ฆ๐Ÿ“ฆ๐Ÿ“ฆ").into_boxed_str();
204    /// let boxed = CompactString::new(&b);
205    /// ```
206    #[inline]
207    #[track_caller]
208    pub fn new<T: AsRef<str>>(text: T) -> Self {
209        Self::try_new(text).unwrap_with_msg()
210    }
211
212    /// Fallible version of [`CompactString::new()`]
213    ///
214    /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
215    /// Otherwise it behaves the same as [`CompactString::new()`].
216    #[inline]
217    pub fn try_new<T: AsRef<str>>(text: T) -> Result<Self, ReserveError> {
218        Repr::new(text.as_ref()).map(CompactString)
219    }
220
221    /// Creates a new inline [`CompactString`] from `&'static str` at compile time.
222    /// Complexity: O(1). As an optimization, short strings get inlined.
223    ///
224    /// In a dynamic context you can use the method [`CompactString::new()`].
225    ///
226    /// # Examples
227    /// ```
228    /// use compact_str::CompactString;
229    ///
230    /// const DEFAULT_NAME: CompactString = CompactString::const_new("untitled");
231    /// ```
232    #[inline]
233    pub const fn const_new(text: &'static str) -> Self {
234        CompactString(Repr::const_new(text))
235    }
236
237    /// Creates a new inline [`CompactString`] at compile time.
238    #[deprecated(
239        since = "0.8.0",
240        note = "replaced by CompactString::const_new, will be removed in 0.9.0"
241    )]
242    #[inline]
243    pub const fn new_inline(text: &'static str) -> Self {
244        CompactString::const_new(text)
245    }
246
247    /// Creates a new inline [`CompactString`] from `&'static str` at compile time.
248    #[deprecated(
249        since = "0.8.0",
250        note = "replaced by CompactString::const_new, will be removed in 0.9.0"
251    )]
252    #[inline]
253    pub const fn from_static_str(text: &'static str) -> Self {
254        CompactString::const_new(text)
255    }
256
257    /// Get back the `&'static str` constructed by [`CompactString::const_new`].
258    ///
259    /// If the string was short enough that it could be inlined, then it was inline, and
260    /// this method will return `None`.
261    ///
262    /// # Examples
263    /// ```
264    /// use compact_str::CompactString;
265    ///
266    /// const DEFAULT_NAME: CompactString =
267    ///     CompactString::const_new("That is not dead which can eternal lie.");
268    /// assert_eq!(
269    ///     DEFAULT_NAME.as_static_str().unwrap(),
270    ///     "That is not dead which can eternal lie.",
271    /// );
272    /// ```
273    #[inline]
274    #[rustversion::attr(since(1.64), const)]
275    pub fn as_static_str(&self) -> Option<&'static str> {
276        self.0.as_static_str()
277    }
278
279    /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes.
280    ///
281    /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically,
282    /// if the string has a length less than or equal to `std::mem::size_of::<String>` bytes
283    /// then it will be inlined. This also means that `CompactString`s have a minimum capacity
284    /// of `std::mem::size_of::<String>`.
285    ///
286    /// # Panics
287    ///
288    /// This method panics if the system is out-of-memory.
289    /// Use [`CompactString::try_with_capacity()`] if you want to handle such a problem manually.
290    ///
291    /// # Examples
292    ///
293    /// ### "zero" Capacity
294    /// ```
295    /// # use compact_str::CompactString;
296    /// // Creating a CompactString with a capacity of 0 will create
297    /// // one with capacity of std::mem::size_of::<String>();
298    /// let empty = CompactString::with_capacity(0);
299    /// let min_size = std::mem::size_of::<String>();
300    ///
301    /// assert_eq!(empty.capacity(), min_size);
302    /// assert_ne!(0, min_size);
303    /// assert!(!empty.is_heap_allocated());
304    /// ```
305    ///
306    /// ### Max Inline Size
307    /// ```
308    /// # use compact_str::CompactString;
309    /// // Creating a CompactString with a capacity of std::mem::size_of::<String>()
310    /// // will not heap allocate.
311    /// let str_size = std::mem::size_of::<String>();
312    /// let empty = CompactString::with_capacity(str_size);
313    ///
314    /// assert_eq!(empty.capacity(), str_size);
315    /// assert!(!empty.is_heap_allocated());
316    /// ```
317    ///
318    /// ### Heap Allocating
319    /// ```
320    /// # use compact_str::CompactString;
321    /// // If you create a `CompactString` with a capacity greater than
322    /// // `std::mem::size_of::<String>`, it will heap allocated. For heap
323    /// // allocated strings we have a minimum capacity
324    ///
325    /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::<usize>() * 4;
326    ///
327    /// let heap_size = std::mem::size_of::<String>() + 1;
328    /// let empty = CompactString::with_capacity(heap_size);
329    ///
330    /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY);
331    /// assert!(empty.is_heap_allocated());
332    /// ```
333    #[inline]
334    #[track_caller]
335    pub fn with_capacity(capacity: usize) -> Self {
336        Self::try_with_capacity(capacity).unwrap_with_msg()
337    }
338
339    /// Fallible version of [`CompactString::with_capacity()`]
340    ///
341    /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
342    /// Otherwise it behaves the same as [`CompactString::with_capacity()`].
343    #[inline]
344    pub fn try_with_capacity(capacity: usize) -> Result<Self, ReserveError> {
345        Repr::with_capacity(capacity).map(CompactString)
346    }
347
348    /// Convert a slice of bytes into a [`CompactString`].
349    ///
350    /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
351    /// This method converts from an arbitrary contiguous collection of bytes into a
352    /// [`CompactString`], failing if the provided bytes are not `UTF-8`.
353    ///
354    /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes,
355    /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf`
356    ///
357    /// # Examples
358    /// ### Valid UTF-8
359    /// ```
360    /// # use compact_str::CompactString;
361    /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175];
362    /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8");
363    ///
364    /// assert_eq!(compact, "๐Ÿฆ€๐Ÿ’ฏ");
365    /// ```
366    ///
367    /// ### Invalid UTF-8
368    /// ```
369    /// # use compact_str::CompactString;
370    /// let bytes = vec![255, 255, 255];
371    /// let result = CompactString::from_utf8(bytes);
372    ///
373    /// assert!(result.is_err());
374    /// ```
375    #[inline]
376    pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
377        Repr::from_utf8(buf).map(CompactString)
378    }
379
380    /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains
381    /// valid UTF-8.
382    ///
383    /// See the safe version, [`CompactString::from_utf8`], for more details.
384    ///
385    /// # Safety
386    ///
387    /// This function is unsafe because it does not check that the bytes passed to it are valid
388    /// UTF-8. If this constraint is violated, it may cause memory unsafety issues with future users
389    /// of the [`CompactString`], as the rest of the standard library assumes that
390    /// [`CompactString`]s are valid UTF-8.
391    ///
392    /// # Examples
393    ///
394    /// Basic usage:
395    ///
396    /// ```
397    /// # use compact_str::CompactString;
398    /// // some bytes, in a vector
399    /// let sparkle_heart = vec![240, 159, 146, 150];
400    ///
401    /// let sparkle_heart = unsafe {
402    ///     CompactString::from_utf8_unchecked(sparkle_heart)
403    /// };
404    ///
405    /// assert_eq!("๐Ÿ’–", sparkle_heart);
406    /// ```
407    #[inline]
408    #[must_use]
409    #[track_caller]
410    pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
411        Repr::from_utf8_unchecked(buf)
412            .map(CompactString)
413            .unwrap_with_msg()
414    }
415
416    /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a
417    /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data.
418    ///
419    /// # Examples
420    /// ### Valid UTF-16
421    /// ```
422    /// # use compact_str::CompactString;
423    /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
424    /// let compact = CompactString::from_utf16(buf).unwrap();
425    ///
426    /// assert_eq!(compact, "๐„žmusic");
427    /// ```
428    ///
429    /// ### Invalid UTF-16
430    /// ```
431    /// # use compact_str::CompactString;
432    /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
433    /// let res = CompactString::from_utf16(buf);
434    ///
435    /// assert!(res.is_err());
436    /// ```
437    #[inline]
438    pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
439        // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
440        // even though the size of our iterator, `buf`, is known ahead of time.
441        //
442        // rustlang issue #48994 is tracking the fix
443
444        let buf = buf.as_ref();
445        let mut ret = CompactString::with_capacity(buf.len());
446        for c in core::char::decode_utf16(buf.iter().copied()) {
447            if let Ok(c) = c {
448                ret.push(c);
449            } else {
450                return Err(Utf16Error(()));
451            }
452        }
453        Ok(ret)
454    }
455
456    /// Decode a UTF-16โ€“encoded slice `v` into a `CompactString`, replacing invalid data with
457    /// the replacement character (`U+FFFD`), ๏ฟฝ.
458    ///
459    /// # Examples
460    ///
461    /// Basic usage:
462    ///
463    /// ```
464    /// # use compact_str::CompactString;
465    /// // ๐„žmus<invalid>ic<invalid>
466    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
467    ///           0x0073, 0xDD1E, 0x0069, 0x0063,
468    ///           0xD834];
469    ///
470    /// assert_eq!(CompactString::from("๐„žmus\u{FFFD}ic\u{FFFD}"),
471    ///            CompactString::from_utf16_lossy(v));
472    /// ```
473    #[inline]
474    pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
475        let buf = buf.as_ref();
476        let mut ret = CompactString::with_capacity(buf.len());
477        for c in core::char::decode_utf16(buf.iter().copied()) {
478            match c {
479                Ok(c) => ret.push(c),
480                Err(_) => ret.push_str("๏ฟฝ"),
481            }
482        }
483        ret
484    }
485
486    /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
487    ///
488    /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
489    /// bytes long, therefore the return value of this method might not be what a human considers
490    /// the length of the string.
491    ///
492    /// # Examples
493    /// ```
494    /// # use compact_str::CompactString;
495    /// let ascii = CompactString::new("hello world");
496    /// assert_eq!(ascii.len(), 11);
497    ///
498    /// let emoji = CompactString::new("๐Ÿ‘ฑ");
499    /// assert_eq!(emoji.len(), 4);
500    /// ```
501    #[inline]
502    pub fn len(&self) -> usize {
503        self.0.len()
504    }
505
506    /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise
507    ///
508    /// # Examples
509    /// ```
510    /// # use compact_str::CompactString;
511    /// let mut msg = CompactString::new("");
512    /// assert!(msg.is_empty());
513    ///
514    /// // add some characters
515    /// msg.push_str("hello reader!");
516    /// assert!(!msg.is_empty());
517    /// ```
518    #[inline]
519    pub fn is_empty(&self) -> bool {
520        self.0.is_empty()
521    }
522
523    /// Returns the capacity of the [`CompactString`], in bytes.
524    ///
525    /// # Note
526    /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::<String>()`
527    ///
528    /// # Examples
529    /// ### Minimum Size
530    /// ```
531    /// # use compact_str::CompactString;
532    /// let min_size = std::mem::size_of::<String>();
533    /// let compact = CompactString::new("");
534    ///
535    /// assert!(compact.capacity() >= min_size);
536    /// ```
537    ///
538    /// ### Heap Allocated
539    /// ```
540    /// # use compact_str::CompactString;
541    /// let compact = CompactString::with_capacity(128);
542    /// assert_eq!(compact.capacity(), 128);
543    /// ```
544    #[inline]
545    pub fn capacity(&self) -> usize {
546        self.0.capacity()
547    }
548
549    /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than
550    /// its length. The capacity may be increased by more than `additional` bytes if it chooses,
551    /// to prevent frequent reallocations.
552    ///
553    /// # Note
554    /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::<String>()`
555    /// * Reserving additional bytes may cause the `CompactString` to become heap allocated
556    ///
557    /// # Panics
558    /// This method panics if the new capacity overflows `usize` or if the system is out-of-memory.
559    /// Use [`CompactString::try_reserve()`] if you want to handle such a problem manually.
560    ///
561    /// # Examples
562    /// ```
563    /// # use compact_str::CompactString;
564    ///
565    /// const WORD: usize = std::mem::size_of::<usize>();
566    /// let mut compact = CompactString::default();
567    /// assert!(compact.capacity() >= (WORD * 3) - 1);
568    ///
569    /// compact.reserve(200);
570    /// assert!(compact.is_heap_allocated());
571    /// assert!(compact.capacity() >= 200);
572    /// ```
573    #[inline]
574    #[track_caller]
575    pub fn reserve(&mut self, additional: usize) {
576        self.try_reserve(additional).unwrap_with_msg()
577    }
578
579    /// Fallible version of [`CompactString::reserve()`]
580    ///
581    /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`]
582    /// Otherwise it behaves the same as [`CompactString::reserve()`].
583    #[inline]
584    pub fn try_reserve(&mut self, additional: usize) -> Result<(), ReserveError> {
585        self.0.reserve(additional)
586    }
587
588    /// Returns a string slice containing the entire [`CompactString`].
589    ///
590    /// # Examples
591    /// ```
592    /// # use compact_str::CompactString;
593    /// let s = CompactString::new("hello");
594    ///
595    /// assert_eq!(s.as_str(), "hello");
596    /// ```
597    #[inline]
598    pub fn as_str(&self) -> &str {
599        self.0.as_str()
600    }
601
602    /// Returns a mutable string slice containing the entire [`CompactString`].
603    ///
604    /// # Examples
605    /// ```
606    /// # use compact_str::CompactString;
607    /// let mut s = CompactString::new("hello");
608    /// s.as_mut_str().make_ascii_uppercase();
609    ///
610    /// assert_eq!(s.as_str(), "HELLO");
611    /// ```
612    #[inline]
613    pub fn as_mut_str(&mut self) -> &mut str {
614        let len = self.len();
615        unsafe { core::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
616    }
617
618    unsafe fn spare_capacity_mut(&mut self) -> &mut [mem::MaybeUninit<u8>] {
619        let buf = self.0.as_mut_buf();
620        let ptr = buf.as_mut_ptr();
621        let cap = buf.len();
622        let len = self.len();
623
624        slice::from_raw_parts_mut(ptr.add(len) as *mut mem::MaybeUninit<u8>, cap - len)
625    }
626
627    /// Returns a byte slice of the [`CompactString`]'s contents.
628    ///
629    /// # Examples
630    /// ```
631    /// # use compact_str::CompactString;
632    /// let s = CompactString::new("hello");
633    ///
634    /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
635    /// ```
636    #[inline]
637    pub fn as_bytes(&self) -> &[u8] {
638        &self.0.as_slice()[..self.len()]
639    }
640
641    // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
642    //
643    /// Provides a mutable reference to the underlying buffer of bytes.
644    ///
645    /// # Safety
646    /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must
647    ///   guarantee that any modifications made to the underlying buffer are valid UTF-8.
648    ///
649    /// # Examples
650    /// ```
651    /// # use compact_str::CompactString;
652    /// let mut s = CompactString::new("hello");
653    ///
654    /// let slice = unsafe { s.as_mut_bytes() };
655    /// // copy bytes into our string
656    /// slice[5..11].copy_from_slice(" world".as_bytes());
657    /// // set the len of the string
658    /// unsafe { s.set_len(11) };
659    ///
660    /// assert_eq!(s, "hello world");
661    /// ```
662    #[inline]
663    pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
664        self.0.as_mut_buf()
665    }
666
667    /// Appends the given [`char`] to the end of this [`CompactString`].
668    ///
669    /// # Examples
670    /// ```
671    /// # use compact_str::CompactString;
672    /// let mut s = CompactString::new("foo");
673    ///
674    /// s.push('b');
675    /// s.push('a');
676    /// s.push('r');
677    ///
678    /// assert_eq!("foobar", s);
679    /// ```
680    pub fn push(&mut self, ch: char) {
681        self.push_str(ch.encode_utf8(&mut [0; 4]));
682    }
683
684    /// Removes the last character from the [`CompactString`] and returns it.
685    /// Returns `None` if this [`CompactString`] is empty.
686    ///
687    /// # Examples
688    /// ```
689    /// # use compact_str::CompactString;
690    /// let mut s = CompactString::new("abc");
691    ///
692    /// assert_eq!(s.pop(), Some('c'));
693    /// assert_eq!(s.pop(), Some('b'));
694    /// assert_eq!(s.pop(), Some('a'));
695    ///
696    /// assert_eq!(s.pop(), None);
697    /// ```
698    #[inline]
699    pub fn pop(&mut self) -> Option<char> {
700        self.0.pop()
701    }
702
703    /// Appends a given string slice onto the end of this [`CompactString`]
704    ///
705    /// # Examples
706    /// ```
707    /// # use compact_str::CompactString;
708    /// let mut s = CompactString::new("abc");
709    ///
710    /// s.push_str("123");
711    ///
712    /// assert_eq!("abc123", s);
713    /// ```
714    #[inline]
715    pub fn push_str(&mut self, s: &str) {
716        self.0.push_str(s)
717    }
718
719    /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it.
720    ///
721    /// This is an *O*(*n*) operation, as it requires copying every element in the
722    /// buffer.
723    ///
724    /// # Panics
725    ///
726    /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length,
727    /// or if it does not lie on a [`char`] boundary.
728    ///
729    /// # Examples
730    ///
731    /// ### Basic usage:
732    ///
733    /// ```
734    /// # use compact_str::CompactString;
735    /// let mut c = CompactString::from("hello world");
736    ///
737    /// assert_eq!(c.remove(0), 'h');
738    /// assert_eq!(c, "ello world");
739    ///
740    /// assert_eq!(c.remove(5), 'w');
741    /// assert_eq!(c, "ello orld");
742    /// ```
743    ///
744    /// ### Past total length:
745    ///
746    /// ```should_panic
747    /// # use compact_str::CompactString;
748    /// let mut c = CompactString::from("hello there!");
749    /// c.remove(100);
750    /// ```
751    ///
752    /// ### Not on char boundary:
753    ///
754    /// ```should_panic
755    /// # use compact_str::CompactString;
756    /// let mut c = CompactString::from("๐Ÿฆ„");
757    /// c.remove(1);
758    /// ```
759    #[inline]
760    pub fn remove(&mut self, idx: usize) -> char {
761        let len = self.len();
762        let substr = &mut self.as_mut_str()[idx..];
763
764        // get the char we want to remove
765        let ch = substr
766            .chars()
767            .next()
768            .expect("cannot remove a char from the end of a string");
769        let ch_len = ch.len_utf8();
770
771        // shift everything back one character
772        let num_bytes = substr.len() - ch_len;
773        let ptr = substr.as_mut_ptr();
774
775        // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes,
776        // and are properly aligned
777        unsafe {
778            core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
779            self.set_len(len - ch_len);
780        }
781
782        ch
783    }
784
785    /// Forces the length of the [`CompactString`] to `new_len`.
786    ///
787    /// This is a low-level operation that maintains none of the normal invariants for
788    /// `CompactString`. If you want to modify the `CompactString` you should use methods like
789    /// `push`, `push_str` or `pop`.
790    ///
791    /// # Safety
792    /// * `new_len` must be less than or equal to `capacity()`
793    /// * The elements at `old_len..new_len` must be initialized
794    #[inline]
795    pub unsafe fn set_len(&mut self, new_len: usize) {
796        self.0.set_len(new_len)
797    }
798
799    /// Returns whether or not the [`CompactString`] is heap allocated.
800    ///
801    /// # Examples
802    /// ### Inlined
803    /// ```
804    /// # use compact_str::CompactString;
805    /// let hello = CompactString::new("hello world");
806    ///
807    /// assert!(!hello.is_heap_allocated());
808    /// ```
809    ///
810    /// ### Heap Allocated
811    /// ```
812    /// # use compact_str::CompactString;
813    /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 ๐Ÿ’ฅ");
814    ///
815    /// assert!(msg.is_heap_allocated());
816    /// ```
817    #[inline]
818    pub fn is_heap_allocated(&self) -> bool {
819        self.0.is_heap_allocated()
820    }
821
822    /// Ensure that the given range is inside the set data, and that no codepoints are split.
823    ///
824    /// Returns the range `start..end` as a tuple.
825    #[inline]
826    fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
827        #[cold]
828        #[inline(never)]
829        fn illegal_range() -> ! {
830            panic!("illegal range");
831        }
832
833        let start = match range.start_bound() {
834            Bound::Included(&n) => n,
835            Bound::Excluded(&n) => match n.checked_add(1) {
836                Some(n) => n,
837                None => illegal_range(),
838            },
839            Bound::Unbounded => 0,
840        };
841        let end = match range.end_bound() {
842            Bound::Included(&n) => match n.checked_add(1) {
843                Some(n) => n,
844                None => illegal_range(),
845            },
846            Bound::Excluded(&n) => n,
847            Bound::Unbounded => self.len(),
848        };
849        if end < start {
850            illegal_range();
851        }
852
853        let s = self.as_str();
854        if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
855            illegal_range();
856        }
857
858        (start, end)
859    }
860
861    /// Removes the specified range in the [`CompactString`],
862    /// and replaces it with the given string.
863    /// The given string doesn't need to be the same length as the range.
864    ///
865    /// # Panics
866    ///
867    /// Panics if the starting point or end point do not lie on a [`char`]
868    /// boundary, or if they're out of bounds.
869    ///
870    /// # Examples
871    ///
872    /// Basic usage:
873    ///
874    /// ```
875    /// # use compact_str::CompactString;
876    /// let mut s = CompactString::new("Hello, world!");
877    ///
878    /// s.replace_range(7..12, "WORLD");
879    /// assert_eq!(s, "Hello, WORLD!");
880    ///
881    /// s.replace_range(7..=11, "you");
882    /// assert_eq!(s, "Hello, you!");
883    ///
884    /// s.replace_range(5.., "! Is it me you're looking for?");
885    /// assert_eq!(s, "Hello! Is it me you're looking for?");
886    /// ```
887    #[inline]
888    pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
889        let (start, end) = self.ensure_range(range);
890        let dest_len = end - start;
891        match dest_len.cmp(&replace_with.len()) {
892            Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
893            Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
894            Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
895        }
896    }
897
898    /// Replace into the same size.
899    unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
900        core::ptr::copy_nonoverlapping(
901            replace_with.as_ptr(),
902            self.as_mut_ptr().add(start),
903            end - start,
904        );
905    }
906
907    /// Replace, so self.len() gets smaller.
908    unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
909        let total_len = self.len();
910        let dest_len = end - start;
911        let new_len = total_len - (dest_len - replace_with.len());
912        let amount = total_len - end;
913        let data = self.as_mut_ptr();
914        // first insert the replacement string, overwriting the current content
915        core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
916        // then move the tail of the CompactString forward to its new place, filling the gap
917        core::ptr::copy(
918            data.add(total_len - amount),
919            data.add(new_len - amount),
920            amount,
921        );
922        // and lastly we set the new length
923        self.set_len(new_len);
924    }
925
926    /// Replace, so self.len() gets bigger.
927    unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
928        let dest_len = end - start;
929        self.reserve(replace_with.len() - dest_len);
930        let total_len = self.len();
931        let new_len = total_len + (replace_with.len() - dest_len);
932        let amount = total_len - end;
933        // first grow the string, so MIRI knows that the full range is usable
934        self.set_len(new_len);
935        let data = self.as_mut_ptr();
936        // then move the tail of the CompactString back to its new place
937        core::ptr::copy(
938            data.add(total_len - amount),
939            data.add(new_len - amount),
940            amount,
941        );
942        // and lastly insert the replacement string
943        core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
944    }
945
946    /// Creates a new [`CompactString`] by repeating a string `n` times.
947    ///
948    /// # Panics
949    ///
950    /// This function will panic if the capacity would overflow.
951    ///
952    /// # Examples
953    ///
954    /// Basic usage:
955    ///
956    /// ```
957    /// use compact_str::CompactString;
958    /// assert_eq!(CompactString::new("abc").repeat(4), CompactString::new("abcabcabcabc"));
959    /// ```
960    ///
961    /// A panic upon overflow:
962    ///
963    /// ```should_panic
964    /// use compact_str::CompactString;
965    ///
966    /// // this will panic at runtime
967    /// let huge = CompactString::new("0123456789abcdef").repeat(usize::MAX);
968    /// ```
969    #[must_use]
970    pub fn repeat(&self, n: usize) -> Self {
971        if n == 0 || self.is_empty() {
972            Self::const_new("")
973        } else if n == 1 {
974            self.clone()
975        } else {
976            let mut out = Self::with_capacity(self.len() * n);
977            (0..n).for_each(|_| out.push_str(self));
978            out
979        }
980    }
981
982    /// Truncate the [`CompactString`] to a shorter length.
983    ///
984    /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op.
985    ///
986    /// Calling this function does not change the capacity of the [`CompactString`].
987    ///
988    /// # Panics
989    ///
990    /// Panics if the new end of the string does not lie on a [`char`] boundary.
991    ///
992    /// # Examples
993    ///
994    /// Basic usage:
995    ///
996    /// ```
997    /// # use compact_str::CompactString;
998    /// let mut s = CompactString::new("Hello, world!");
999    /// s.truncate(5);
1000    /// assert_eq!(s, "Hello");
1001    /// ```
1002    pub fn truncate(&mut self, new_len: usize) {
1003        let s = self.as_str();
1004        if new_len >= s.len() {
1005            return;
1006        }
1007
1008        assert!(
1009            s.is_char_boundary(new_len),
1010            "new_len must lie on char boundary",
1011        );
1012        unsafe { self.set_len(new_len) };
1013    }
1014
1015    /// Converts a [`CompactString`] to a raw pointer.
1016    #[inline]
1017    pub fn as_ptr(&self) -> *const u8 {
1018        self.0.as_slice().as_ptr()
1019    }
1020
1021    /// Converts a mutable [`CompactString`] to a raw pointer.
1022    #[inline]
1023    pub fn as_mut_ptr(&mut self) -> *mut u8 {
1024        unsafe { self.0.as_mut_buf().as_mut_ptr() }
1025    }
1026
1027    /// Insert string character at an index.
1028    ///
1029    /// # Examples
1030    ///
1031    /// Basic usage:
1032    ///
1033    /// ```
1034    /// # use compact_str::CompactString;
1035    /// let mut s = CompactString::new("Hello!");
1036    /// s.insert_str(5, ", world");
1037    /// assert_eq!(s, "Hello, world!");
1038    /// ```
1039    pub fn insert_str(&mut self, idx: usize, string: &str) {
1040        assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
1041
1042        let new_len = self.len() + string.len();
1043        self.reserve(string.len());
1044
1045        // SAFETY: We just checked that we may split self at idx.
1046        //         We set the length only after reserving the memory.
1047        //         We fill the gap with valid UTF-8 data.
1048        unsafe {
1049            // first move the tail to the new back
1050            let data = self.as_mut_ptr();
1051            core::ptr::copy(
1052                data.add(idx),
1053                data.add(idx + string.len()),
1054                new_len - idx - string.len(),
1055            );
1056
1057            // then insert the new bytes
1058            core::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
1059
1060            // and lastly resize the string
1061            self.set_len(new_len);
1062        }
1063    }
1064
1065    /// Insert a character at an index.
1066    ///
1067    /// # Examples
1068    ///
1069    /// Basic usage:
1070    ///
1071    /// ```
1072    /// # use compact_str::CompactString;
1073    /// let mut s = CompactString::new("Hello world!");
1074    /// s.insert(5, ',');
1075    /// assert_eq!(s, "Hello, world!");
1076    /// ```
1077    pub fn insert(&mut self, idx: usize, ch: char) {
1078        self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
1079    }
1080
1081    /// Reduces the length of the [`CompactString`] to zero.
1082    ///
1083    /// Calling this function does not change the capacity of the [`CompactString`].
1084    ///
1085    /// ```
1086    /// # use compact_str::CompactString;
1087    /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!");
1088    /// assert_eq!(s.capacity(), 49);
1089    ///
1090    /// s.clear();
1091    ///
1092    /// assert_eq!(s, "");
1093    /// assert_eq!(s.capacity(), 49);
1094    /// ```
1095    pub fn clear(&mut self) {
1096        unsafe { self.set_len(0) };
1097    }
1098
1099    /// Split the [`CompactString`] into at the given byte index.
1100    ///
1101    /// Calling this function does not change the capacity of the [`CompactString`], unless the
1102    /// [`CompactString`] is backed by a `&'static str`.
1103    ///
1104    /// # Panics
1105    ///
1106    /// Panics if `at` does not lie on a [`char`] boundary.
1107    ///
1108    /// Basic usage:
1109    ///
1110    /// ```
1111    /// # use compact_str::CompactString;
1112    /// let mut s = CompactString::const_new("Hello, world!");
1113    /// let w = s.split_off(5);
1114    ///
1115    /// assert_eq!(w, ", world!");
1116    /// assert_eq!(s, "Hello");
1117    /// ```
1118    pub fn split_off(&mut self, at: usize) -> Self {
1119        if let Some(s) = self.as_static_str() {
1120            let result = Self::const_new(&s[at..]);
1121            // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1122            unsafe { self.set_len(at) };
1123            result
1124        } else {
1125            let result = self[at..].into();
1126            // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1127            unsafe { self.set_len(at) };
1128            result
1129        }
1130    }
1131
1132    /// Remove a range from the [`CompactString`], and return it as an iterator.
1133    ///
1134    /// Calling this function does not change the capacity of the [`CompactString`].
1135    ///
1136    /// # Panics
1137    ///
1138    /// Panics if the start or end of the range does not lie on a [`char`] boundary.
1139    ///
1140    /// # Examples
1141    ///
1142    /// Basic usage:
1143    ///
1144    /// ```
1145    /// # use compact_str::CompactString;
1146    /// let mut s = CompactString::new("Hello, world!");
1147    ///
1148    /// let mut d = s.drain(5..12);
1149    /// assert_eq!(d.next(), Some(','));   // iterate over the extracted data
1150    /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str
1151    ///
1152    /// // The iterator keeps a reference to `s`, so you have to drop() the iterator,
1153    /// // before you can access `s` again.
1154    /// drop(d);
1155    /// assert_eq!(s, "Hello!");
1156    /// ```
1157    pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
1158        let (start, end) = self.ensure_range(range);
1159        Drain {
1160            compact_string: self as *mut Self,
1161            start,
1162            end,
1163            chars: self[start..end].chars(),
1164        }
1165    }
1166
1167    /// Shrinks the capacity of this [`CompactString`] with a lower bound.
1168    ///
1169    /// The resulting capactity is never less than the size of 3ร—[`usize`],
1170    /// i.e. the capacity than can be inlined.
1171    ///
1172    /// # Examples
1173    ///
1174    /// Basic usage:
1175    ///
1176    /// ```
1177    /// # use compact_str::CompactString;
1178    /// let mut s = CompactString::with_capacity(100);
1179    /// assert_eq!(s.capacity(), 100);
1180    ///
1181    /// // if the capacity was already bigger than the argument, the call is a no-op
1182    /// s.shrink_to(100);
1183    /// assert_eq!(s.capacity(), 100);
1184    ///
1185    /// s.shrink_to(50);
1186    /// assert_eq!(s.capacity(), 50);
1187    ///
1188    /// // if the string can be inlined, it is
1189    /// s.shrink_to(10);
1190    /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1191    /// ```
1192    #[inline]
1193    pub fn shrink_to(&mut self, min_capacity: usize) {
1194        self.0.shrink_to(min_capacity);
1195    }
1196
1197    /// Shrinks the capacity of this [`CompactString`] to match its length.
1198    ///
1199    /// The resulting capactity is never less than the size of 3ร—[`usize`],
1200    /// i.e. the capacity than can be inlined.
1201    ///
1202    /// This method is effectively the same as calling [`string.shrink_to(0)`].
1203    ///
1204    /// # Examples
1205    ///
1206    /// Basic usage:
1207    ///
1208    /// ```
1209    /// # use compact_str::CompactString;
1210    /// let mut s = CompactString::from("This is a string with more than 24 characters.");
1211    ///
1212    /// s.reserve(100);
1213    /// assert!(s.capacity() >= 100);
1214    ///
1215    ///  s.shrink_to_fit();
1216    /// assert_eq!(s.len(), s.capacity());
1217    /// ```
1218    ///
1219    /// ```
1220    /// # use compact_str::CompactString;
1221    /// let mut s = CompactString::from("short string");
1222    ///
1223    /// s.reserve(100);
1224    /// assert!(s.capacity() >= 100);
1225    ///
1226    /// s.shrink_to_fit();
1227    /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1228    /// ```
1229    #[inline]
1230    pub fn shrink_to_fit(&mut self) {
1231        self.0.shrink_to(0);
1232    }
1233
1234    /// Retains only the characters specified by the predicate.
1235    ///
1236    /// The method iterates over the characters in the string and calls the `predicate`.
1237    ///
1238    /// If the `predicate` returns `false`, then the character gets removed.
1239    /// If the `predicate` returns `true`, then the character is kept.
1240    ///
1241    /// # Examples
1242    ///
1243    /// ```
1244    /// # use compact_str::CompactString;
1245    /// let mut s = CompactString::from("รคb๐„ždโ‚ฌ");
1246    ///
1247    /// let keep = [false, true, true, false, true];
1248    /// let mut iter = keep.iter();
1249    /// s.retain(|_| *iter.next().unwrap());
1250    ///
1251    /// assert_eq!(s, "b๐„žโ‚ฌ");
1252    /// ```
1253    pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
1254        // We iterate over the string, and copy character by character.
1255
1256        struct SetLenOnDrop<'a> {
1257            self_: &'a mut CompactString,
1258            src_idx: usize,
1259            dst_idx: usize,
1260        }
1261
1262        let mut g = SetLenOnDrop {
1263            self_: self,
1264            src_idx: 0,
1265            dst_idx: 0,
1266        };
1267        let s = g.self_.as_mut_str();
1268        while let Some(ch) = s[g.src_idx..].chars().next() {
1269            let ch_len = ch.len_utf8();
1270            if predicate(ch) {
1271                // SAFETY: We know that both indices are valid, and that we don't split a char.
1272                unsafe {
1273                    let p = s.as_mut_ptr();
1274                    core::ptr::copy(p.add(g.src_idx), p.add(g.dst_idx), ch_len);
1275                }
1276                g.dst_idx += ch_len;
1277            }
1278            g.src_idx += ch_len;
1279        }
1280
1281        impl Drop for SetLenOnDrop<'_> {
1282            fn drop(&mut self) {
1283                // SAFETY: We know that the index is a valid position to break the string.
1284                unsafe { self.self_.set_len(self.dst_idx) };
1285            }
1286        }
1287        drop(g);
1288    }
1289
1290    /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints
1291    ///
1292    /// # Examples
1293    ///
1294    /// ```
1295    /// # use compact_str::CompactString;
1296    /// let chess_knight = b"\xf0\x9f\xa8\x84";
1297    ///
1298    /// assert_eq!(
1299    ///     "๐Ÿจ„",
1300    ///     CompactString::from_utf8_lossy(chess_knight),
1301    /// );
1302    ///
1303    /// // For valid UTF-8 slices, this is the same as:
1304    /// assert_eq!(
1305    ///     "๐Ÿจ„",
1306    ///     CompactString::new(std::str::from_utf8(chess_knight).unwrap()),
1307    /// );
1308    /// ```
1309    ///
1310    /// Incorrect bytes:
1311    ///
1312    /// ```
1313    /// # use compact_str::CompactString;
1314    /// let broken = b"\xf0\x9f\xc8\x84";
1315    ///
1316    /// assert_eq!(
1317    ///     "๏ฟฝศ„",
1318    ///     CompactString::from_utf8_lossy(broken),
1319    /// );
1320    ///
1321    /// // For invalid UTF-8 slices, this is an optimized implemented for:
1322    /// assert_eq!(
1323    ///     "๏ฟฝศ„",
1324    ///     CompactString::from(String::from_utf8_lossy(broken)),
1325    /// );
1326    /// ```
1327    pub fn from_utf8_lossy(v: &[u8]) -> Self {
1328        fn next_char<'a>(
1329            iter: &mut <&[u8] as IntoIterator>::IntoIter,
1330            buf: &'a mut [u8; 4],
1331        ) -> Option<&'a [u8]> {
1332            const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
1333
1334            macro_rules! ensure_range {
1335                ($idx:literal, $range:pat) => {{
1336                    let mut i = iter.clone();
1337                    match i.next() {
1338                        Some(&c) if matches!(c, $range) => {
1339                            buf[$idx] = c;
1340                            *iter = i;
1341                        }
1342                        _ => return Some(REPLACEMENT),
1343                    }
1344                }};
1345            }
1346
1347            macro_rules! ensure_cont {
1348                ($idx:literal) => {{
1349                    ensure_range!($idx, 0x80..=0xBF);
1350                }};
1351            }
1352
1353            let c = *iter.next()?;
1354            buf[0] = c;
1355
1356            match c {
1357                0x00..=0x7F => {
1358                    // simple ASCII: push as is
1359                    Some(&buf[..1])
1360                }
1361                0xC2..=0xDF => {
1362                    // two bytes
1363                    ensure_cont!(1);
1364                    Some(&buf[..2])
1365                }
1366                0xE0..=0xEF => {
1367                    // three bytes
1368                    match c {
1369                        // 0x80..=0x9F encodes surrogate half
1370                        0xE0 => ensure_range!(1, 0xA0..=0xBF),
1371                        // 0xA0..=0xBF encodes surrogate half
1372                        0xED => ensure_range!(1, 0x80..=0x9F),
1373                        // all UTF-8 continuation bytes are valid
1374                        _ => ensure_cont!(1),
1375                    }
1376                    ensure_cont!(2);
1377                    Some(&buf[..3])
1378                }
1379                0xF0..=0xF4 => {
1380                    // four bytes
1381                    match c {
1382                        // 0x80..=0x8F encodes overlong three byte codepoint
1383                        0xF0 => ensure_range!(1, 0x90..=0xBF),
1384                        // 0x90..=0xBF encodes codepoint > U+10FFFF
1385                        0xF4 => ensure_range!(1, 0x80..=0x8F),
1386                        // all UTF-8 continuation bytes are valid
1387                        _ => ensure_cont!(1),
1388                    }
1389                    ensure_cont!(2);
1390                    ensure_cont!(3);
1391                    Some(&buf[..4])
1392                }
1393                | 0x80..=0xBF // unicode continuation, invalid
1394                | 0xC0..=0xC1 // overlong one byte character
1395                | 0xF5..=0xF7 // four bytes that encode > U+10FFFF
1396                | 0xF8..=0xFB // five bytes, invalid
1397                | 0xFC..=0xFD // six bytes, invalid
1398                | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid
1399            }
1400        }
1401
1402        let mut buf = [0; 4];
1403        let mut result = Self::with_capacity(v.len());
1404        let mut iter = v.iter();
1405        while let Some(s) = next_char(&mut iter, &mut buf) {
1406            // SAFETY: next_char() only returns valid strings
1407            let s = unsafe { core::str::from_utf8_unchecked(s) };
1408            result.push_str(s);
1409        }
1410        result
1411    }
1412
1413    fn from_utf16x(
1414        v: &[u8],
1415        from_int: impl Fn(u16) -> u16,
1416        from_bytes: impl Fn([u8; 2]) -> u16,
1417    ) -> Result<Self, Utf16Error> {
1418        if v.len() % 2 != 0 {
1419            // Input had an odd number of bytes.
1420            return Err(Utf16Error(()));
1421        }
1422
1423        // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
1424        // even though the size of our iterator, `v`, is known ahead of time.
1425        //
1426        // rustlang issue #48994 is tracking the fix
1427        let mut result = CompactString::with_capacity(v.len() / 2);
1428
1429        // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1430        //         `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1431        match unsafe { v.align_to::<u16>() } {
1432            (&[], v, &[]) => {
1433                // Input is correctly aligned.
1434                for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1435                    result.push(c.map_err(|_| Utf16Error(()))?);
1436                }
1437            }
1438            _ => {
1439                // Input's alignment is off.
1440                // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1441                let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1442                for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1443                    result.push(c.map_err(|_| Utf16Error(()))?);
1444                }
1445            }
1446        }
1447
1448        Ok(result)
1449    }
1450
1451    fn from_utf16x_lossy(
1452        v: &[u8],
1453        from_int: impl Fn(u16) -> u16,
1454        from_bytes: impl Fn([u8; 2]) -> u16,
1455    ) -> Self {
1456        // Notice: We write the string "๏ฟฝ" instead of the character '๏ฟฝ', so the character does not
1457        //         have to be formatted before it can be appended.
1458
1459        let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
1460            true => (true, &v[..v.len() - 1]),
1461            false => (false, v),
1462        };
1463        let mut result = CompactString::with_capacity(v.len() / 2);
1464
1465        // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1466        //         `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1467        match unsafe { v.align_to::<u16>() } {
1468            (&[], v, &[]) => {
1469                // Input is correctly aligned.
1470                for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1471                    match c {
1472                        Ok(c) => result.push(c),
1473                        Err(_) => result.push_str("๏ฟฝ"),
1474                    }
1475                }
1476            }
1477            _ => {
1478                // Input's alignment is off.
1479                // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1480                let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1481                for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1482                    match c {
1483                        Ok(c) => result.push(c),
1484                        Err(_) => result.push_str("๏ฟฝ"),
1485                    }
1486                }
1487            }
1488        }
1489
1490        if trailing_extra_byte {
1491            result.push_str("๏ฟฝ");
1492        }
1493        result
1494    }
1495
1496    /// Decode a slice of bytes as UTF-16 encoded string, in little endian.
1497    ///
1498    /// # Errors
1499    ///
1500    /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1501    /// a [`Utf16Error`] is returned.
1502    ///
1503    /// # Examples
1504    ///
1505    /// ```
1506    /// # use compact_str::CompactString;
1507    /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe";
1508    /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap();
1509    /// assert_eq!(dancing_men, "๐Ÿ‘ฏโ€โ™‚๏ธ");
1510    /// ```
1511    #[inline]
1512    pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1513        CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
1514    }
1515
1516    /// Decode a slice of bytes as UTF-16 encoded string, in big endian.
1517    ///
1518    /// # Errors
1519    ///
1520    /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1521    /// a [`Utf16Error`] is returned.
1522    ///
1523    /// # Examples
1524    ///
1525    /// ```
1526    /// # use compact_str::CompactString;
1527    /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f";
1528    /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap();
1529    /// assert_eq!(dancing_women, "๐Ÿ‘ฏโ€โ™€๏ธ");
1530    /// ```
1531    #[inline]
1532    pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1533        CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
1534    }
1535
1536    /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian.
1537    ///
1538    /// In this context "lossy" means that any broken characters in the input are replaced by the
1539    /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1540    /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1541    ///
1542    /// # Examples
1543    ///
1544    /// ```
1545    /// # use compact_str::CompactString;
1546    /// // A "random" bit was flipped in the 4th byte:
1547    /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe";
1548    /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN);
1549    /// assert_eq!(dancing_men, "๏ฟฝ\u{fc6f}\u{200d}โ™‚๏ธ");
1550    /// ```
1551    #[inline]
1552    pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
1553        CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
1554    }
1555
1556    /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian.
1557    ///
1558    /// In this context "lossy" means that any broken characters in the input are replaced by the
1559    /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1560    /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1561    ///
1562    /// # Examples
1563    ///
1564    /// ```
1565    /// # use compact_str::CompactString;
1566    /// // A "random" bit was flipped in the 9th byte:
1567    /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f";
1568    /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN);
1569    /// assert_eq!(dancing_women, "๐Ÿ‘ฏ\u{200d}โ™€๏ฟฝ");
1570    /// ```
1571    #[inline]
1572    pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
1573        CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
1574    }
1575
1576    /// Convert the [`CompactString`] into a [`String`].
1577    ///
1578    /// # Examples
1579    ///
1580    /// ```
1581    /// # use compact_str::CompactString;
1582    /// let s = CompactString::new("Hello world");
1583    /// let s = s.into_string();
1584    /// assert_eq!(s, "Hello world");
1585    /// ```
1586    pub fn into_string(self) -> String {
1587        self.0.into_string()
1588    }
1589
1590    /// Convert a [`String`] into a [`CompactString`] _without inlining_.
1591    ///
1592    /// Note: You probably don't need to use this method, instead you should use `From<String>`
1593    /// which is implemented for [`CompactString`].
1594    ///
1595    /// This method exists incase your code is very sensitive to memory allocations. Normally when
1596    /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack.
1597    /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on
1598    /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that
1599    /// was previously owned by the [`String`], so no trips to the allocator are needed.
1600    ///
1601    /// # Examples
1602    ///
1603    /// ### Short Strings
1604    /// ```
1605    /// use compact_str::CompactString;
1606    ///
1607    /// let short = "hello world".to_string();
1608    /// let c_heap = CompactString::from_string_buffer(short);
1609    ///
1610    /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer
1611    /// assert!(c_heap.is_heap_allocated());
1612    ///
1613    /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point
1614    /// let c_inline = c_heap.clone();
1615    /// assert!(!c_inline.is_heap_allocated());
1616    ///
1617    /// assert_eq!(c_heap, c_inline);
1618    /// ```
1619    ///
1620    /// ### Longer Strings
1621    /// ```
1622    /// use compact_str::CompactString;
1623    ///
1624    /// let x = "longer string that will be on the heap".to_string();
1625    /// let c1 = CompactString::from(x);
1626    ///
1627    /// let y = "longer string that will be on the heap".to_string();
1628    /// let c2 = CompactString::from_string_buffer(y);
1629    ///
1630    /// // for longer strings, we re-use the underlying String's buffer in both cases
1631    /// assert!(c1.is_heap_allocated());
1632    /// assert!(c2.is_heap_allocated());
1633    /// ```
1634    ///
1635    /// ### Buffer Re-use
1636    /// ```
1637    /// use compact_str::CompactString;
1638    ///
1639    /// let og = "hello world".to_string();
1640    /// let og_addr = og.as_ptr();
1641    ///
1642    /// let mut c = CompactString::from_string_buffer(og);
1643    /// let ex_addr = c.as_ptr();
1644    ///
1645    /// // When converting to/from String and CompactString with from_string_buffer we always re-use
1646    /// // the same underlying allocated memory/buffer
1647    /// assert_eq!(og_addr, ex_addr);
1648    ///
1649    /// let long = "this is a long string that will be on the heap".to_string();
1650    /// let long_addr = long.as_ptr();
1651    ///
1652    /// let mut long_c = CompactString::from(long);
1653    /// let long_ex_addr = long_c.as_ptr();
1654    ///
1655    /// // When converting to/from String and CompactString with From<String>, we'll also re-use the
1656    /// // underlying buffer, if the string is long, otherwise when converting to CompactString we
1657    /// // eagerly inline
1658    /// assert_eq!(long_addr, long_ex_addr);
1659    /// ```
1660    #[inline]
1661    #[track_caller]
1662    pub fn from_string_buffer(s: String) -> Self {
1663        let repr = Repr::from_string(s, false).unwrap_with_msg();
1664        CompactString(repr)
1665    }
1666
1667    /// Returns a copy of this string where each character is mapped to its
1668    /// ASCII lower case equivalent.
1669    ///
1670    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1671    /// but non-ASCII letters are unchanged.
1672    ///
1673    /// To lowercase the value in-place, use [`str::make_ascii_lowercase`].
1674    ///
1675    /// To lowercase ASCII characters in addition to non-ASCII characters, use
1676    /// [`CompactString::to_lowercase`].
1677    ///
1678    /// # Examples
1679    ///
1680    /// ```
1681    /// use compact_str::CompactString;
1682    /// let s = CompactString::new("GrรผรŸe, Jรผrgen โค");
1683    ///
1684    /// assert_eq!("grรผรŸe, jรผrgen โค", s.to_ascii_lowercase());
1685    /// ```
1686    #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1687    #[inline]
1688    pub fn to_ascii_lowercase(&self) -> Self {
1689        let mut s = self.clone();
1690        s.make_ascii_lowercase();
1691        s
1692    }
1693
1694    /// Returns a copy of this string where each character is mapped to its
1695    /// ASCII upper case equivalent.
1696    ///
1697    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1698    /// but non-ASCII letters are unchanged.
1699    ///
1700    /// To uppercase the value in-place, use [`str::make_ascii_uppercase`].
1701    ///
1702    /// To uppercase ASCII characters in addition to non-ASCII characters, use
1703    /// [`CompactString::to_uppercase`].
1704    ///
1705    /// # Examples
1706    ///
1707    /// ```
1708    /// use compact_str::CompactString;
1709    /// let s = CompactString::new("GrรผรŸe, Jรผrgen โค");
1710    ///
1711    /// assert_eq!("GRรผรŸE, JรผRGEN โค", s.to_ascii_uppercase());
1712    /// ```
1713    #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1714    #[inline]
1715    pub fn to_ascii_uppercase(&self) -> Self {
1716        let mut s = self.clone();
1717        s.make_ascii_uppercase();
1718        s
1719    }
1720
1721    /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1722    ///
1723    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1724    /// `Lowercase`.
1725    ///
1726    /// Since some characters can expand into multiple characters when changing
1727    /// the case, this function returns a [`CompactString`] instead of modifying the
1728    /// parameter in-place.
1729    ///
1730    /// # Examples
1731    ///
1732    /// Basic usage:
1733    ///
1734    /// ```
1735    /// use compact_str::CompactString;
1736    /// let s = CompactString::new("HELLO");
1737    ///
1738    /// assert_eq!("hello", s.to_lowercase());
1739    /// ```
1740    ///
1741    /// A tricky example, with sigma:
1742    ///
1743    /// ```
1744    /// use compact_str::CompactString;
1745    /// let sigma = CompactString::new("ฮฃ");
1746    ///
1747    /// assert_eq!("ฯƒ", sigma.to_lowercase());
1748    ///
1749    /// // but at the end of a word, it's ฯ‚, not ฯƒ:
1750    /// let odysseus = CompactString::new("แฝˆฮ”ฮฅฮฃฮฃฮ•ฮŽฮฃ");
1751    ///
1752    /// assert_eq!("แฝ€ฮดฯ…ฯƒฯƒฮตฯฯ‚", odysseus.to_lowercase());
1753    /// ```
1754    ///
1755    /// Languages without case are not changed:
1756    ///
1757    /// ```
1758    /// use compact_str::CompactString;
1759    /// let new_year = CompactString::new("ๅ†œๅކๆ–ฐๅนด");
1760    ///
1761    /// assert_eq!(new_year, new_year.to_lowercase());
1762    /// ```
1763    #[must_use = "this returns the lowercase string as a new CompactString, \
1764                  without modifying the original"]
1765    pub fn to_lowercase(&self) -> Self {
1766        Self::from_str_to_lowercase(self.as_str())
1767    }
1768
1769    /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1770    ///
1771    /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1772    /// `Lowercase`.
1773    ///
1774    /// Since some characters can expand into multiple characters when changing
1775    /// the case, this function returns a [`CompactString`] instead of modifying the
1776    /// parameter in-place.
1777    ///
1778    /// # Examples
1779    ///
1780    /// Basic usage:
1781    ///
1782    /// ```
1783    /// use compact_str::CompactString;
1784    ///
1785    /// assert_eq!("hello", CompactString::from_str_to_lowercase("HELLO"));
1786    /// ```
1787    ///
1788    /// A tricky example, with sigma:
1789    ///
1790    /// ```
1791    /// use compact_str::CompactString;
1792    ///
1793    /// assert_eq!("ฯƒ", CompactString::from_str_to_lowercase("ฮฃ"));
1794    ///
1795    /// // but at the end of a word, it's ฯ‚, not ฯƒ:
1796    /// assert_eq!("แฝ€ฮดฯ…ฯƒฯƒฮตฯฯ‚", CompactString::from_str_to_lowercase("แฝˆฮ”ฮฅฮฃฮฃฮ•ฮŽฮฃ"));
1797    /// ```
1798    ///
1799    /// Languages without case are not changed:
1800    ///
1801    /// ```
1802    /// use compact_str::CompactString;
1803    ///
1804    /// let new_year = "ๅ†œๅކๆ–ฐๅนด";
1805    /// assert_eq!(new_year, CompactString::from_str_to_lowercase(new_year));
1806    /// ```
1807    #[must_use = "this returns the lowercase string as a new CompactString, \
1808                  without modifying the original"]
1809    pub fn from_str_to_lowercase(input: &str) -> Self {
1810        let mut s = convert_while_ascii(input.as_bytes(), u8::to_ascii_lowercase);
1811
1812        // Safety: we know this is a valid char boundary since
1813        // out.len() is only progressed if ascii bytes are found
1814        let rest = unsafe { input.get_unchecked(s.len()..) };
1815
1816        for (i, c) in rest.char_indices() {
1817            if c == 'ฮฃ' {
1818                // ฮฃ maps to ฯƒ, except at the end of a word where it maps to ฯ‚.
1819                // This is the only conditional (contextual) but language-independent mapping
1820                // in `SpecialCasing.txt`,
1821                // so hard-code it rather than have a generic "condition" mechanism.
1822                // See https://github.com/rust-lang/rust/issues/26035
1823                map_uppercase_sigma(rest, i, &mut s)
1824            } else {
1825                s.extend(c.to_lowercase());
1826            }
1827        }
1828        return s;
1829
1830        fn map_uppercase_sigma(from: &str, i: usize, to: &mut CompactString) {
1831            // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1832            // for the definition of `Final_Sigma`.
1833            debug_assert!('ฮฃ'.len_utf8() == 2);
1834            let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1835                && !case_ignorable_then_cased(from[i + 2..].chars());
1836            to.push_str(if is_word_final { "ฯ‚" } else { "ฯƒ" });
1837        }
1838
1839        fn case_ignorable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
1840            use unicode_data::case_ignorable::lookup as Case_Ignorable;
1841            use unicode_data::cased::lookup as Cased;
1842            match iter.find(|&c| !Case_Ignorable(c)) {
1843                Some(c) => Cased(c),
1844                None => false,
1845            }
1846        }
1847    }
1848
1849    /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1850    ///
1851    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1852    /// `Uppercase`.
1853    ///
1854    /// Since some characters can expand into multiple characters when changing
1855    /// the case, this function returns a [`CompactString`] instead of modifying the
1856    /// parameter in-place.
1857    ///
1858    /// # Examples
1859    ///
1860    /// Basic usage:
1861    ///
1862    /// ```
1863    /// use compact_str::CompactString;
1864    /// let s = CompactString::new("hello");
1865    ///
1866    /// assert_eq!("HELLO", s.to_uppercase());
1867    /// ```
1868    ///
1869    /// Scripts without case are not changed:
1870    ///
1871    /// ```
1872    /// use compact_str::CompactString;
1873    /// let new_year = CompactString::new("ๅ†œๅކๆ–ฐๅนด");
1874    ///
1875    /// assert_eq!(new_year, new_year.to_uppercase());
1876    /// ```
1877    ///
1878    /// One character can become multiple:
1879    /// ```
1880    /// use compact_str::CompactString;
1881    /// let s = CompactString::new("tschรผรŸ");
1882    ///
1883    /// assert_eq!("TSCHรœSS", s.to_uppercase());
1884    /// ```
1885    #[must_use = "this returns the uppercase string as a new CompactString, \
1886                  without modifying the original"]
1887    pub fn to_uppercase(&self) -> Self {
1888        Self::from_str_to_uppercase(self.as_str())
1889    }
1890
1891    /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1892    ///
1893    /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1894    /// `Uppercase`.
1895    ///
1896    /// Since some characters can expand into multiple characters when changing
1897    /// the case, this function returns a [`CompactString`] instead of modifying the
1898    /// parameter in-place.
1899    ///
1900    /// # Examples
1901    ///
1902    /// Basic usage:
1903    ///
1904    /// ```
1905    /// use compact_str::CompactString;
1906    ///
1907    /// assert_eq!("HELLO", CompactString::from_str_to_uppercase("hello"));
1908    /// ```
1909    ///
1910    /// Scripts without case are not changed:
1911    ///
1912    /// ```
1913    /// use compact_str::CompactString;
1914    ///
1915    /// let new_year = "ๅ†œๅކๆ–ฐๅนด";
1916    /// assert_eq!(new_year, CompactString::from_str_to_uppercase(new_year));
1917    /// ```
1918    ///
1919    /// One character can become multiple:
1920    /// ```
1921    /// use compact_str::CompactString;
1922    ///
1923    /// assert_eq!("TSCHรœSS", CompactString::from_str_to_uppercase("tschรผรŸ"));
1924    /// ```
1925    #[must_use = "this returns the uppercase string as a new CompactString, \
1926                  without modifying the original"]
1927    pub fn from_str_to_uppercase(input: &str) -> Self {
1928        let mut out = convert_while_ascii(input.as_bytes(), u8::to_ascii_uppercase);
1929
1930        // Safety: we know this is a valid char boundary since
1931        // out.len() is only progressed if ascii bytes are found
1932        let rest = unsafe { input.get_unchecked(out.len()..) };
1933
1934        for c in rest.chars() {
1935            out.extend(c.to_uppercase());
1936        }
1937
1938        out
1939    }
1940}
1941
1942/// Converts the bytes while the bytes are still ascii.
1943/// For better average performance, this is happens in chunks of `2*size_of::<usize>()`.
1944/// Returns a vec with the converted bytes.
1945///
1946/// Copied from https://doc.rust-lang.org/nightly/src/alloc/str.rs.html#623-666
1947#[inline]
1948fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> CompactString {
1949    let mut out = CompactString::with_capacity(b.len());
1950
1951    const USIZE_SIZE: usize = mem::size_of::<usize>();
1952    const MAGIC_UNROLL: usize = 2;
1953    const N: usize = USIZE_SIZE * MAGIC_UNROLL;
1954    const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
1955
1956    let mut i = 0;
1957    unsafe {
1958        while i + N <= b.len() {
1959            // Safety: we have checks the sizes `b` and `out` to know that our
1960            let in_chunk = b.get_unchecked(i..i + N);
1961            let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
1962
1963            let mut bits = 0;
1964            for j in 0..MAGIC_UNROLL {
1965                // read the bytes 1 usize at a time (unaligned since we haven't checked the
1966                // alignment) safety: in_chunk is valid bytes in the range
1967                bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
1968            }
1969            // if our chunks aren't ascii, then return only the prior bytes as init
1970            if bits & NONASCII_MASK != 0 {
1971                break;
1972            }
1973
1974            // perform the case conversions on N bytes (gets heavily autovec'd)
1975            for j in 0..N {
1976                // safety: in_chunk and out_chunk is valid bytes in the range
1977                let out = out_chunk.get_unchecked_mut(j);
1978                out.write(convert(in_chunk.get_unchecked(j)));
1979            }
1980
1981            // mark these bytes as initialised
1982            i += N;
1983        }
1984        out.set_len(i);
1985    }
1986
1987    out
1988}
1989
1990impl Clone for CompactString {
1991    #[inline]
1992    fn clone(&self) -> Self {
1993        Self(self.0.clone())
1994    }
1995
1996    #[inline]
1997    fn clone_from(&mut self, source: &Self) {
1998        self.0.clone_from(&source.0)
1999    }
2000}
2001
2002impl Default for CompactString {
2003    #[inline]
2004    fn default() -> Self {
2005        CompactString::new("")
2006    }
2007}
2008
2009impl Deref for CompactString {
2010    type Target = str;
2011
2012    #[inline]
2013    fn deref(&self) -> &str {
2014        self.as_str()
2015    }
2016}
2017
2018impl DerefMut for CompactString {
2019    #[inline]
2020    fn deref_mut(&mut self) -> &mut str {
2021        self.as_mut_str()
2022    }
2023}
2024
2025impl AsRef<str> for CompactString {
2026    #[inline]
2027    fn as_ref(&self) -> &str {
2028        self.as_str()
2029    }
2030}
2031
2032#[cfg(feature = "std")]
2033impl AsRef<OsStr> for CompactString {
2034    #[inline]
2035    fn as_ref(&self) -> &OsStr {
2036        OsStr::new(self.as_str())
2037    }
2038}
2039
2040impl AsRef<[u8]> for CompactString {
2041    #[inline]
2042    fn as_ref(&self) -> &[u8] {
2043        self.as_bytes()
2044    }
2045}
2046
2047impl Borrow<str> for CompactString {
2048    #[inline]
2049    fn borrow(&self) -> &str {
2050        self.as_str()
2051    }
2052}
2053
2054impl BorrowMut<str> for CompactString {
2055    #[inline]
2056    fn borrow_mut(&mut self) -> &mut str {
2057        self.as_mut_str()
2058    }
2059}
2060
2061impl Eq for CompactString {}
2062
2063impl<T: AsRef<str> + ?Sized> PartialEq<T> for CompactString {
2064    fn eq(&self, other: &T) -> bool {
2065        self.as_str() == other.as_ref()
2066    }
2067}
2068
2069impl PartialEq<CompactString> for &CompactString {
2070    fn eq(&self, other: &CompactString) -> bool {
2071        self.as_str() == other.as_str()
2072    }
2073}
2074
2075impl PartialEq<CompactString> for String {
2076    fn eq(&self, other: &CompactString) -> bool {
2077        self.as_str() == other.as_str()
2078    }
2079}
2080
2081impl<'a> PartialEq<&'a CompactString> for String {
2082    fn eq(&self, other: &&CompactString) -> bool {
2083        self.as_str() == other.as_str()
2084    }
2085}
2086
2087impl PartialEq<CompactString> for &String {
2088    fn eq(&self, other: &CompactString) -> bool {
2089        self.as_str() == other.as_str()
2090    }
2091}
2092
2093impl PartialEq<CompactString> for str {
2094    fn eq(&self, other: &CompactString) -> bool {
2095        self == other.as_str()
2096    }
2097}
2098
2099impl<'a> PartialEq<&'a CompactString> for str {
2100    fn eq(&self, other: &&CompactString) -> bool {
2101        self == other.as_str()
2102    }
2103}
2104
2105impl PartialEq<CompactString> for &str {
2106    fn eq(&self, other: &CompactString) -> bool {
2107        *self == other.as_str()
2108    }
2109}
2110
2111impl PartialEq<CompactString> for &&str {
2112    fn eq(&self, other: &CompactString) -> bool {
2113        **self == other.as_str()
2114    }
2115}
2116
2117impl<'a> PartialEq<CompactString> for Cow<'a, str> {
2118    fn eq(&self, other: &CompactString) -> bool {
2119        *self == other.as_str()
2120    }
2121}
2122
2123impl<'a> PartialEq<CompactString> for &Cow<'a, str> {
2124    fn eq(&self, other: &CompactString) -> bool {
2125        *self == other.as_str()
2126    }
2127}
2128
2129impl PartialEq<String> for &CompactString {
2130    fn eq(&self, other: &String) -> bool {
2131        self.as_str() == other.as_str()
2132    }
2133}
2134
2135impl<'a> PartialEq<Cow<'a, str>> for &CompactString {
2136    fn eq(&self, other: &Cow<'a, str>) -> bool {
2137        self.as_str() == other
2138    }
2139}
2140
2141impl Ord for CompactString {
2142    fn cmp(&self, other: &Self) -> Ordering {
2143        self.as_str().cmp(other.as_str())
2144    }
2145}
2146
2147impl PartialOrd for CompactString {
2148    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2149        Some(self.cmp(other))
2150    }
2151}
2152
2153impl Hash for CompactString {
2154    fn hash<H: Hasher>(&self, state: &mut H) {
2155        self.as_str().hash(state)
2156    }
2157}
2158
2159impl<'a> From<&'a str> for CompactString {
2160    #[inline]
2161    #[track_caller]
2162    fn from(s: &'a str) -> Self {
2163        CompactString::new(s)
2164    }
2165}
2166
2167impl From<String> for CompactString {
2168    #[inline]
2169    #[track_caller]
2170    fn from(s: String) -> Self {
2171        let repr = Repr::from_string(s, true).unwrap_with_msg();
2172        CompactString(repr)
2173    }
2174}
2175
2176impl<'a> From<&'a String> for CompactString {
2177    #[inline]
2178    #[track_caller]
2179    fn from(s: &'a String) -> Self {
2180        CompactString::new(s)
2181    }
2182}
2183
2184impl<'a> From<Cow<'a, str>> for CompactString {
2185    fn from(cow: Cow<'a, str>) -> Self {
2186        match cow {
2187            Cow::Borrowed(s) => s.into(),
2188            // we separate these two so we can re-use the underlying buffer in the owned case
2189            Cow::Owned(s) => s.into(),
2190        }
2191    }
2192}
2193
2194impl From<Box<str>> for CompactString {
2195    #[inline]
2196    #[track_caller]
2197    fn from(b: Box<str>) -> Self {
2198        let s = b.into_string();
2199        let repr = Repr::from_string(s, true).unwrap_with_msg();
2200        CompactString(repr)
2201    }
2202}
2203
2204impl From<CompactString> for String {
2205    #[inline]
2206    fn from(s: CompactString) -> Self {
2207        s.into_string()
2208    }
2209}
2210
2211impl From<CompactString> for Cow<'_, str> {
2212    #[inline]
2213    fn from(s: CompactString) -> Self {
2214        if let Some(s) = s.as_static_str() {
2215            Self::Borrowed(s)
2216        } else {
2217            Self::Owned(s.into_string())
2218        }
2219    }
2220}
2221
2222impl<'a> From<&'a CompactString> for Cow<'a, str> {
2223    #[inline]
2224    fn from(s: &'a CompactString) -> Self {
2225        Self::Borrowed(s)
2226    }
2227}
2228
2229#[cfg(target_has_atomic = "ptr")]
2230impl From<CompactString> for alloc::sync::Arc<str> {
2231    fn from(value: CompactString) -> Self {
2232        Self::from(value.as_str())
2233    }
2234}
2235
2236impl From<CompactString> for alloc::rc::Rc<str> {
2237    fn from(value: CompactString) -> Self {
2238        Self::from(value.as_str())
2239    }
2240}
2241
2242#[cfg(feature = "std")]
2243impl From<CompactString> for Box<dyn std::error::Error + Send + Sync> {
2244    fn from(value: CompactString) -> Self {
2245        struct StringError(CompactString);
2246
2247        impl std::error::Error for StringError {
2248            #[allow(deprecated)]
2249            fn description(&self) -> &str {
2250                &self.0
2251            }
2252        }
2253
2254        impl fmt::Display for StringError {
2255            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2256                fmt::Display::fmt(&self.0, f)
2257            }
2258        }
2259
2260        // Purposefully skip printing "StringError(..)"
2261        impl fmt::Debug for StringError {
2262            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2263                fmt::Debug::fmt(&self.0, f)
2264            }
2265        }
2266
2267        Box::new(StringError(value))
2268    }
2269}
2270
2271#[cfg(feature = "std")]
2272impl From<CompactString> for Box<dyn std::error::Error> {
2273    fn from(value: CompactString) -> Self {
2274        let err1: Box<dyn std::error::Error + Send + Sync> = From::from(value);
2275        let err2: Box<dyn std::error::Error> = err1;
2276        err2
2277    }
2278}
2279
2280impl From<CompactString> for Box<str> {
2281    fn from(value: CompactString) -> Self {
2282        if value.is_heap_allocated() {
2283            value.into_string().into_boxed_str()
2284        } else {
2285            Box::from(value.as_str())
2286        }
2287    }
2288}
2289
2290#[cfg(feature = "std")]
2291impl From<CompactString> for std::ffi::OsString {
2292    fn from(value: CompactString) -> Self {
2293        Self::from(value.into_string())
2294    }
2295}
2296
2297#[cfg(feature = "std")]
2298impl From<CompactString> for std::path::PathBuf {
2299    fn from(value: CompactString) -> Self {
2300        Self::from(std::ffi::OsString::from(value))
2301    }
2302}
2303
2304#[cfg(feature = "std")]
2305impl AsRef<std::path::Path> for CompactString {
2306    fn as_ref(&self) -> &std::path::Path {
2307        std::path::Path::new(self.as_str())
2308    }
2309}
2310
2311impl From<CompactString> for alloc::vec::Vec<u8> {
2312    fn from(value: CompactString) -> Self {
2313        if value.is_heap_allocated() {
2314            value.into_string().into_bytes()
2315        } else {
2316            value.as_bytes().to_vec()
2317        }
2318    }
2319}
2320
2321impl FromStr for CompactString {
2322    type Err = core::convert::Infallible;
2323    fn from_str(s: &str) -> Result<CompactString, Self::Err> {
2324        Ok(CompactString::from(s))
2325    }
2326}
2327
2328impl fmt::Debug for CompactString {
2329    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2330        fmt::Debug::fmt(self.as_str(), f)
2331    }
2332}
2333
2334impl fmt::Display for CompactString {
2335    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2336        fmt::Display::fmt(self.as_str(), f)
2337    }
2338}
2339
2340impl FromIterator<char> for CompactString {
2341    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
2342        let repr = iter.into_iter().collect();
2343        CompactString(repr)
2344    }
2345}
2346
2347impl<'a> FromIterator<&'a char> for CompactString {
2348    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
2349        let repr = iter.into_iter().collect();
2350        CompactString(repr)
2351    }
2352}
2353
2354impl<'a> FromIterator<&'a str> for CompactString {
2355    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
2356        let repr = iter.into_iter().collect();
2357        CompactString(repr)
2358    }
2359}
2360
2361impl FromIterator<Box<str>> for CompactString {
2362    fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
2363        let repr = iter.into_iter().collect();
2364        CompactString(repr)
2365    }
2366}
2367
2368impl<'a> FromIterator<Cow<'a, str>> for CompactString {
2369    fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
2370        let repr = iter.into_iter().collect();
2371        CompactString(repr)
2372    }
2373}
2374
2375impl FromIterator<String> for CompactString {
2376    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
2377        let repr = iter.into_iter().collect();
2378        CompactString(repr)
2379    }
2380}
2381
2382impl FromIterator<CompactString> for CompactString {
2383    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2384        let repr = iter.into_iter().collect();
2385        CompactString(repr)
2386    }
2387}
2388
2389impl FromIterator<CompactString> for String {
2390    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2391        let mut iterator = iter.into_iter();
2392        match iterator.next() {
2393            None => String::new(),
2394            Some(buf) => {
2395                let mut buf = buf.into_string();
2396                buf.extend(iterator);
2397                buf
2398            }
2399        }
2400    }
2401}
2402
2403impl FromIterator<CompactString> for Cow<'_, str> {
2404    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2405        String::from_iter(iter).into()
2406    }
2407}
2408
2409impl Extend<char> for CompactString {
2410    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
2411        self.0.extend(iter)
2412    }
2413}
2414
2415impl<'a> Extend<&'a char> for CompactString {
2416    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
2417        self.0.extend(iter)
2418    }
2419}
2420
2421impl<'a> Extend<&'a str> for CompactString {
2422    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
2423        self.0.extend(iter)
2424    }
2425}
2426
2427impl Extend<Box<str>> for CompactString {
2428    fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
2429        self.0.extend(iter)
2430    }
2431}
2432
2433impl<'a> Extend<Cow<'a, str>> for CompactString {
2434    fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
2435        iter.into_iter().for_each(move |s| self.push_str(&s));
2436    }
2437}
2438
2439impl Extend<String> for CompactString {
2440    fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
2441        self.0.extend(iter)
2442    }
2443}
2444
2445impl Extend<CompactString> for String {
2446    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2447        for s in iter {
2448            self.push_str(&s);
2449        }
2450    }
2451}
2452
2453impl Extend<CompactString> for CompactString {
2454    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2455        for s in iter {
2456            self.push_str(&s);
2457        }
2458    }
2459}
2460
2461impl<'a> Extend<CompactString> for Cow<'a, str> {
2462    fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2463        self.to_mut().extend(iter);
2464    }
2465}
2466
2467impl fmt::Write for CompactString {
2468    fn write_str(&mut self, s: &str) -> fmt::Result {
2469        self.push_str(s);
2470        Ok(())
2471    }
2472
2473    fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
2474        match args.as_str() {
2475            Some(s) => {
2476                if self.is_empty() && !self.is_heap_allocated() {
2477                    // Since self is currently an empty inline variant or
2478                    // an empty `StaticStr` variant, constructing a new one
2479                    // with `Self::const_new` is more efficient since
2480                    // it is guaranteed to be O(1).
2481                    *self = Self::const_new(s);
2482                } else {
2483                    self.push_str(s);
2484                }
2485                Ok(())
2486            }
2487            None => fmt::write(&mut self, args),
2488        }
2489    }
2490}
2491
2492impl Add<&str> for CompactString {
2493    type Output = Self;
2494    fn add(mut self, rhs: &str) -> Self::Output {
2495        self.push_str(rhs);
2496        self
2497    }
2498}
2499
2500impl AddAssign<&str> for CompactString {
2501    fn add_assign(&mut self, rhs: &str) {
2502        self.push_str(rhs);
2503    }
2504}
2505
2506/// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice.
2507///
2508/// This type is the error type for the [`from_utf16`] method on [`CompactString`].
2509///
2510/// [`from_utf16`]: CompactString::from_utf16
2511/// # Examples
2512///
2513/// Basic usage:
2514///
2515/// ```
2516/// # use compact_str::CompactString;
2517/// // ๐„žmu<invalid>ic
2518/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
2519///           0xD800, 0x0069, 0x0063];
2520///
2521/// assert!(CompactString::from_utf16(v).is_err());
2522/// ```
2523#[derive(Copy, Clone, Debug)]
2524pub struct Utf16Error(());
2525
2526impl fmt::Display for Utf16Error {
2527    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2528        fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
2529    }
2530}
2531
2532/// An iterator over the exacted data by [`CompactString::drain()`].
2533#[must_use = "iterators are lazy and do nothing unless consumed"]
2534pub struct Drain<'a> {
2535    compact_string: *mut CompactString,
2536    start: usize,
2537    end: usize,
2538    chars: core::str::Chars<'a>,
2539}
2540
2541// SAFETY: Drain keeps the lifetime of the CompactString it belongs to.
2542unsafe impl Send for Drain<'_> {}
2543unsafe impl Sync for Drain<'_> {}
2544
2545impl fmt::Debug for Drain<'_> {
2546    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2547        f.debug_tuple("Drain").field(&self.as_str()).finish()
2548    }
2549}
2550
2551impl fmt::Display for Drain<'_> {
2552    #[inline]
2553    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2554        f.write_str(self.as_str())
2555    }
2556}
2557
2558impl Drop for Drain<'_> {
2559    #[inline]
2560    fn drop(&mut self) {
2561        // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access
2562        //         the CompactString, but this function right now. CompactString::drain() ensured
2563        //         that the new extracted range does not split a UTF-8 character.
2564        unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
2565    }
2566}
2567
2568impl Drain<'_> {
2569    /// The remaining, unconsumed characters of the extracted substring.
2570    #[inline]
2571    pub fn as_str(&self) -> &str {
2572        self.chars.as_str()
2573    }
2574}
2575
2576impl Deref for Drain<'_> {
2577    type Target = str;
2578
2579    #[inline]
2580    fn deref(&self) -> &Self::Target {
2581        self.as_str()
2582    }
2583}
2584
2585impl Iterator for Drain<'_> {
2586    type Item = char;
2587
2588    #[inline]
2589    fn next(&mut self) -> Option<char> {
2590        self.chars.next()
2591    }
2592
2593    #[inline]
2594    fn count(self) -> usize {
2595        // <Chars as Iterator>::count() is specialized, and cloning is trivial.
2596        self.chars.clone().count()
2597    }
2598
2599    fn size_hint(&self) -> (usize, Option<usize>) {
2600        self.chars.size_hint()
2601    }
2602
2603    #[inline]
2604    fn last(mut self) -> Option<char> {
2605        self.chars.next_back()
2606    }
2607}
2608
2609impl DoubleEndedIterator for Drain<'_> {
2610    #[inline]
2611    fn next_back(&mut self) -> Option<char> {
2612        self.chars.next_back()
2613    }
2614}
2615
2616impl FusedIterator for Drain<'_> {}
2617
2618/// A possible error value if allocating or resizing a [`CompactString`] failed.
2619#[derive(Debug, Clone, Copy, PartialEq)]
2620pub struct ReserveError(());
2621
2622impl fmt::Display for ReserveError {
2623    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2624        f.write_str("Cannot allocate memory to hold CompactString")
2625    }
2626}
2627
2628#[cfg(feature = "std")]
2629#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2630impl std::error::Error for ReserveError {}
2631
2632/// A possible error value if [`ToCompactString::try_to_compact_string()`] failed.
2633#[derive(Debug, Clone, Copy, PartialEq)]
2634#[non_exhaustive]
2635pub enum ToCompactStringError {
2636    /// Cannot allocate memory to hold CompactString
2637    Reserve(ReserveError),
2638    /// [`Display::fmt()`][core::fmt::Display::fmt] returned an error
2639    Fmt(fmt::Error),
2640}
2641
2642impl fmt::Display for ToCompactStringError {
2643    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2644        match self {
2645            ToCompactStringError::Reserve(err) => err.fmt(f),
2646            ToCompactStringError::Fmt(err) => err.fmt(f),
2647        }
2648    }
2649}
2650
2651impl From<ReserveError> for ToCompactStringError {
2652    #[inline]
2653    fn from(value: ReserveError) -> Self {
2654        Self::Reserve(value)
2655    }
2656}
2657
2658impl From<fmt::Error> for ToCompactStringError {
2659    #[inline]
2660    fn from(value: fmt::Error) -> Self {
2661        Self::Fmt(value)
2662    }
2663}
2664
2665#[cfg(feature = "std")]
2666#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2667impl std::error::Error for ToCompactStringError {
2668    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2669        match self {
2670            ToCompactStringError::Reserve(err) => Some(err),
2671            ToCompactStringError::Fmt(err) => Some(err),
2672        }
2673    }
2674}
2675
2676trait UnwrapWithMsg {
2677    type T;
2678
2679    fn unwrap_with_msg(self) -> Self::T;
2680}
2681
2682impl<T, E: fmt::Display> UnwrapWithMsg for Result<T, E> {
2683    type T = T;
2684
2685    #[inline(always)]
2686    #[track_caller]
2687    fn unwrap_with_msg(self) -> T {
2688        match self {
2689            Ok(value) => value,
2690            Err(err) => unwrap_with_msg_fail(err),
2691        }
2692    }
2693}
2694
2695#[inline(never)]
2696#[cold]
2697#[track_caller]
2698fn unwrap_with_msg_fail<E: fmt::Display>(error: E) -> ! {
2699    panic!("{error}")
2700}
2701
2702static_assertions::assert_eq_size!(CompactString, String);