compact_str/lib.rs
1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![no_std]
4
5#[cfg(feature = "std")]
6#[macro_use]
7extern crate std;
8
9#[cfg_attr(test, macro_use)]
10extern crate alloc;
11
12use alloc::borrow::Cow;
13use alloc::boxed::Box;
14use alloc::string::String;
15#[doc(hidden)]
16pub use core;
17use core::borrow::{
18 Borrow,
19 BorrowMut,
20};
21use core::cmp::Ordering;
22use core::hash::{
23 Hash,
24 Hasher,
25};
26use core::iter::FusedIterator;
27use core::ops::{
28 Add,
29 AddAssign,
30 Bound,
31 Deref,
32 DerefMut,
33 RangeBounds,
34};
35use core::str::{
36 FromStr,
37 Utf8Error,
38};
39use core::{
40 fmt,
41 mem,
42 slice,
43};
44#[cfg(feature = "std")]
45use std::ffi::OsStr;
46
47mod features;
48mod macros;
49mod unicode_data;
50
51mod repr;
52use repr::Repr;
53
54mod traits;
55pub use traits::{
56 CompactStringExt,
57 ToCompactString,
58};
59
60#[cfg(test)]
61mod tests;
62
63/// A [`CompactString`] is a compact string type that can be used almost anywhere a
64/// [`String`] or [`str`] can be used.
65///
66/// ## Using `CompactString`
67/// ```
68/// use compact_str::CompactString;
69/// # use std::collections::HashMap;
70///
71/// // CompactString auto derefs into a str so you can use all methods from `str`
72/// // that take a `&self`
73/// if CompactString::new("hello world!").is_ascii() {
74/// println!("we're all ASCII")
75/// }
76///
77/// // You can use a CompactString in collections like you would a String or &str
78/// let mut map: HashMap<CompactString, CompactString> = HashMap::new();
79///
80/// // directly construct a new `CompactString`
81/// map.insert(CompactString::new("nyc"), CompactString::new("empire state building"));
82/// // create a `CompactString` from a `&str`
83/// map.insert("sf".into(), "transamerica pyramid".into());
84/// // create a `CompactString` from a `String`
85/// map.insert(String::from("sea").into(), String::from("space needle").into());
86///
87/// fn wrapped_print<T: AsRef<str>>(text: T) {
88/// println!("{}", text.as_ref());
89/// }
90///
91/// // CompactString impls AsRef<str> and Borrow<str>, so it can be used anywhere
92/// // that expects a generic string
93/// if let Some(building) = map.get("nyc") {
94/// wrapped_print(building);
95/// }
96///
97/// // CompactString can also be directly compared to a String or &str
98/// assert_eq!(CompactString::new("chicago"), "chicago");
99/// assert_eq!(CompactString::new("houston"), String::from("houston"));
100/// ```
101///
102/// # Converting from a `String`
103/// It's important that a `CompactString` interops well with `String`, so you can easily use both in
104/// your code base.
105///
106/// `CompactString` implements `From<String>` and operates in the following manner:
107/// - Eagerly inlines the string, possibly dropping excess capacity
108/// - Otherwise re-uses the same underlying buffer from `String`
109///
110/// ```
111/// use compact_str::CompactString;
112///
113/// // eagerly inlining
114/// let short = String::from("hello world");
115/// let short_c = CompactString::from(short);
116/// assert!(!short_c.is_heap_allocated());
117///
118/// // dropping excess capacity
119/// let mut excess = String::with_capacity(256);
120/// excess.push_str("abc");
121///
122/// let excess_c = CompactString::from(excess);
123/// assert!(!excess_c.is_heap_allocated());
124/// assert!(excess_c.capacity() < 256);
125///
126/// // re-using the same buffer
127/// let long = String::from("this is a longer string that will be heap allocated");
128///
129/// let long_ptr = long.as_ptr();
130/// let long_len = long.len();
131/// let long_cap = long.capacity();
132///
133/// let mut long_c = CompactString::from(long);
134/// assert!(long_c.is_heap_allocated());
135///
136/// let cpt_ptr = long_c.as_ptr();
137/// let cpt_len = long_c.len();
138/// let cpt_cap = long_c.capacity();
139///
140/// // the original String and the CompactString point to the same place in memory, buffer re-use!
141/// assert_eq!(cpt_ptr, long_ptr);
142/// assert_eq!(cpt_len, long_len);
143/// assert_eq!(cpt_cap, long_cap);
144/// ```
145///
146/// ### Prevent Eagerly Inlining
147/// A consequence of eagerly inlining is you then need to de-allocate the existing buffer, which
148/// might not always be desirable if you're converting a very large amount of `String`s. If your
149/// code is very sensitive to allocations, consider the [`CompactString::from_string_buffer`] API.
150#[repr(transparent)]
151pub struct CompactString(Repr);
152
153impl CompactString {
154 /// Creates a new [`CompactString`] from any type that implements `AsRef<str>`.
155 /// If the string is short enough, then it will be inlined on the stack!
156 ///
157 /// In a `static` or `const` context you can use the method [`CompactString::const_new()`].
158 ///
159 /// # Examples
160 ///
161 /// ### Inlined
162 /// ```
163 /// # use compact_str::CompactString;
164 /// // We can inline strings up to 12 characters long on 32-bit architectures...
165 /// #[cfg(target_pointer_width = "32")]
166 /// let s = "i'm 12 chars";
167 /// // ...and up to 24 characters on 64-bit architectures!
168 /// #[cfg(target_pointer_width = "64")]
169 /// let s = "i am 24 characters long!";
170 ///
171 /// let compact = CompactString::new(&s);
172 ///
173 /// assert_eq!(compact, s);
174 /// // we are not allocated on the heap!
175 /// assert!(!compact.is_heap_allocated());
176 /// ```
177 ///
178 /// ### Heap
179 /// ```
180 /// # use compact_str::CompactString;
181 /// // For longer strings though, we get allocated on the heap
182 /// let long = "I am a longer string that will be allocated on the heap";
183 /// let compact = CompactString::new(long);
184 ///
185 /// assert_eq!(compact, long);
186 /// // we are allocated on the heap!
187 /// assert!(compact.is_heap_allocated());
188 /// ```
189 ///
190 /// ### Creation
191 /// ```
192 /// use compact_str::CompactString;
193 ///
194 /// // Using a `&'static str`
195 /// let s = "hello world!";
196 /// let hello = CompactString::new(&s);
197 ///
198 /// // Using a `String`
199 /// let u = String::from("๐ฆ๐");
200 /// let unicorn = CompactString::new(u);
201 ///
202 /// // Using a `Box<str>`
203 /// let b: Box<str> = String::from("๐ฆ๐ฆ๐ฆ").into_boxed_str();
204 /// let boxed = CompactString::new(&b);
205 /// ```
206 #[inline]
207 #[track_caller]
208 pub fn new<T: AsRef<str>>(text: T) -> Self {
209 Self::try_new(text).unwrap_with_msg()
210 }
211
212 /// Fallible version of [`CompactString::new()`]
213 ///
214 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
215 /// Otherwise it behaves the same as [`CompactString::new()`].
216 #[inline]
217 pub fn try_new<T: AsRef<str>>(text: T) -> Result<Self, ReserveError> {
218 Repr::new(text.as_ref()).map(CompactString)
219 }
220
221 /// Creates a new inline [`CompactString`] from `&'static str` at compile time.
222 /// Complexity: O(1). As an optimization, short strings get inlined.
223 ///
224 /// In a dynamic context you can use the method [`CompactString::new()`].
225 ///
226 /// # Examples
227 /// ```
228 /// use compact_str::CompactString;
229 ///
230 /// const DEFAULT_NAME: CompactString = CompactString::const_new("untitled");
231 /// ```
232 #[inline]
233 pub const fn const_new(text: &'static str) -> Self {
234 CompactString(Repr::const_new(text))
235 }
236
237 /// Creates a new inline [`CompactString`] at compile time.
238 #[deprecated(
239 since = "0.8.0",
240 note = "replaced by CompactString::const_new, will be removed in 0.9.0"
241 )]
242 #[inline]
243 pub const fn new_inline(text: &'static str) -> Self {
244 CompactString::const_new(text)
245 }
246
247 /// Creates a new inline [`CompactString`] from `&'static str` at compile time.
248 #[deprecated(
249 since = "0.8.0",
250 note = "replaced by CompactString::const_new, will be removed in 0.9.0"
251 )]
252 #[inline]
253 pub const fn from_static_str(text: &'static str) -> Self {
254 CompactString::const_new(text)
255 }
256
257 /// Get back the `&'static str` constructed by [`CompactString::const_new`].
258 ///
259 /// If the string was short enough that it could be inlined, then it was inline, and
260 /// this method will return `None`.
261 ///
262 /// # Examples
263 /// ```
264 /// use compact_str::CompactString;
265 ///
266 /// const DEFAULT_NAME: CompactString =
267 /// CompactString::const_new("That is not dead which can eternal lie.");
268 /// assert_eq!(
269 /// DEFAULT_NAME.as_static_str().unwrap(),
270 /// "That is not dead which can eternal lie.",
271 /// );
272 /// ```
273 #[inline]
274 #[rustversion::attr(since(1.64), const)]
275 pub fn as_static_str(&self) -> Option<&'static str> {
276 self.0.as_static_str()
277 }
278
279 /// Creates a new empty [`CompactString`] with the capacity to fit at least `capacity` bytes.
280 ///
281 /// A `CompactString` will inline strings on the stack, if they're small enough. Specifically,
282 /// if the string has a length less than or equal to `std::mem::size_of::<String>` bytes
283 /// then it will be inlined. This also means that `CompactString`s have a minimum capacity
284 /// of `std::mem::size_of::<String>`.
285 ///
286 /// # Panics
287 ///
288 /// This method panics if the system is out-of-memory.
289 /// Use [`CompactString::try_with_capacity()`] if you want to handle such a problem manually.
290 ///
291 /// # Examples
292 ///
293 /// ### "zero" Capacity
294 /// ```
295 /// # use compact_str::CompactString;
296 /// // Creating a CompactString with a capacity of 0 will create
297 /// // one with capacity of std::mem::size_of::<String>();
298 /// let empty = CompactString::with_capacity(0);
299 /// let min_size = std::mem::size_of::<String>();
300 ///
301 /// assert_eq!(empty.capacity(), min_size);
302 /// assert_ne!(0, min_size);
303 /// assert!(!empty.is_heap_allocated());
304 /// ```
305 ///
306 /// ### Max Inline Size
307 /// ```
308 /// # use compact_str::CompactString;
309 /// // Creating a CompactString with a capacity of std::mem::size_of::<String>()
310 /// // will not heap allocate.
311 /// let str_size = std::mem::size_of::<String>();
312 /// let empty = CompactString::with_capacity(str_size);
313 ///
314 /// assert_eq!(empty.capacity(), str_size);
315 /// assert!(!empty.is_heap_allocated());
316 /// ```
317 ///
318 /// ### Heap Allocating
319 /// ```
320 /// # use compact_str::CompactString;
321 /// // If you create a `CompactString` with a capacity greater than
322 /// // `std::mem::size_of::<String>`, it will heap allocated. For heap
323 /// // allocated strings we have a minimum capacity
324 ///
325 /// const MIN_HEAP_CAPACITY: usize = std::mem::size_of::<usize>() * 4;
326 ///
327 /// let heap_size = std::mem::size_of::<String>() + 1;
328 /// let empty = CompactString::with_capacity(heap_size);
329 ///
330 /// assert_eq!(empty.capacity(), MIN_HEAP_CAPACITY);
331 /// assert!(empty.is_heap_allocated());
332 /// ```
333 #[inline]
334 #[track_caller]
335 pub fn with_capacity(capacity: usize) -> Self {
336 Self::try_with_capacity(capacity).unwrap_with_msg()
337 }
338
339 /// Fallible version of [`CompactString::with_capacity()`]
340 ///
341 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`].
342 /// Otherwise it behaves the same as [`CompactString::with_capacity()`].
343 #[inline]
344 pub fn try_with_capacity(capacity: usize) -> Result<Self, ReserveError> {
345 Repr::with_capacity(capacity).map(CompactString)
346 }
347
348 /// Convert a slice of bytes into a [`CompactString`].
349 ///
350 /// A [`CompactString`] is a contiguous collection of bytes (`u8`s) that is valid [`UTF-8`](https://en.wikipedia.org/wiki/UTF-8).
351 /// This method converts from an arbitrary contiguous collection of bytes into a
352 /// [`CompactString`], failing if the provided bytes are not `UTF-8`.
353 ///
354 /// Note: If you want to create a [`CompactString`] from a non-contiguous collection of bytes,
355 /// enable the `bytes` feature of this crate, and see `CompactString::from_utf8_buf`
356 ///
357 /// # Examples
358 /// ### Valid UTF-8
359 /// ```
360 /// # use compact_str::CompactString;
361 /// let bytes = vec![240, 159, 166, 128, 240, 159, 146, 175];
362 /// let compact = CompactString::from_utf8(bytes).expect("valid UTF-8");
363 ///
364 /// assert_eq!(compact, "๐ฆ๐ฏ");
365 /// ```
366 ///
367 /// ### Invalid UTF-8
368 /// ```
369 /// # use compact_str::CompactString;
370 /// let bytes = vec![255, 255, 255];
371 /// let result = CompactString::from_utf8(bytes);
372 ///
373 /// assert!(result.is_err());
374 /// ```
375 #[inline]
376 pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
377 Repr::from_utf8(buf).map(CompactString)
378 }
379
380 /// Converts a vector of bytes to a [`CompactString`] without checking that the string contains
381 /// valid UTF-8.
382 ///
383 /// See the safe version, [`CompactString::from_utf8`], for more details.
384 ///
385 /// # Safety
386 ///
387 /// This function is unsafe because it does not check that the bytes passed to it are valid
388 /// UTF-8. If this constraint is violated, it may cause memory unsafety issues with future users
389 /// of the [`CompactString`], as the rest of the standard library assumes that
390 /// [`CompactString`]s are valid UTF-8.
391 ///
392 /// # Examples
393 ///
394 /// Basic usage:
395 ///
396 /// ```
397 /// # use compact_str::CompactString;
398 /// // some bytes, in a vector
399 /// let sparkle_heart = vec![240, 159, 146, 150];
400 ///
401 /// let sparkle_heart = unsafe {
402 /// CompactString::from_utf8_unchecked(sparkle_heart)
403 /// };
404 ///
405 /// assert_eq!("๐", sparkle_heart);
406 /// ```
407 #[inline]
408 #[must_use]
409 #[track_caller]
410 pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Self {
411 Repr::from_utf8_unchecked(buf)
412 .map(CompactString)
413 .unwrap_with_msg()
414 }
415
416 /// Decode a [`UTF-16`](https://en.wikipedia.org/wiki/UTF-16) slice of bytes into a
417 /// [`CompactString`], returning an [`Err`] if the slice contains any invalid data.
418 ///
419 /// # Examples
420 /// ### Valid UTF-16
421 /// ```
422 /// # use compact_str::CompactString;
423 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0x0069, 0x0063];
424 /// let compact = CompactString::from_utf16(buf).unwrap();
425 ///
426 /// assert_eq!(compact, "๐music");
427 /// ```
428 ///
429 /// ### Invalid UTF-16
430 /// ```
431 /// # use compact_str::CompactString;
432 /// let buf: &[u16] = &[0xD834, 0xDD1E, 0x006d, 0x0075, 0xD800, 0x0069, 0x0063];
433 /// let res = CompactString::from_utf16(buf);
434 ///
435 /// assert!(res.is_err());
436 /// ```
437 #[inline]
438 pub fn from_utf16<B: AsRef<[u16]>>(buf: B) -> Result<Self, Utf16Error> {
439 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
440 // even though the size of our iterator, `buf`, is known ahead of time.
441 //
442 // rustlang issue #48994 is tracking the fix
443
444 let buf = buf.as_ref();
445 let mut ret = CompactString::with_capacity(buf.len());
446 for c in core::char::decode_utf16(buf.iter().copied()) {
447 if let Ok(c) = c {
448 ret.push(c);
449 } else {
450 return Err(Utf16Error(()));
451 }
452 }
453 Ok(ret)
454 }
455
456 /// Decode a UTF-16โencoded slice `v` into a `CompactString`, replacing invalid data with
457 /// the replacement character (`U+FFFD`), ๏ฟฝ.
458 ///
459 /// # Examples
460 ///
461 /// Basic usage:
462 ///
463 /// ```
464 /// # use compact_str::CompactString;
465 /// // ๐mus<invalid>ic<invalid>
466 /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
467 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
468 /// 0xD834];
469 ///
470 /// assert_eq!(CompactString::from("๐mus\u{FFFD}ic\u{FFFD}"),
471 /// CompactString::from_utf16_lossy(v));
472 /// ```
473 #[inline]
474 pub fn from_utf16_lossy<B: AsRef<[u16]>>(buf: B) -> Self {
475 let buf = buf.as_ref();
476 let mut ret = CompactString::with_capacity(buf.len());
477 for c in core::char::decode_utf16(buf.iter().copied()) {
478 match c {
479 Ok(c) => ret.push(c),
480 Err(_) => ret.push_str("๏ฟฝ"),
481 }
482 }
483 ret
484 }
485
486 /// Returns the length of the [`CompactString`] in `bytes`, not [`char`]s or graphemes.
487 ///
488 /// When using `UTF-8` encoding (which all strings in Rust do) a single character will be 1 to 4
489 /// bytes long, therefore the return value of this method might not be what a human considers
490 /// the length of the string.
491 ///
492 /// # Examples
493 /// ```
494 /// # use compact_str::CompactString;
495 /// let ascii = CompactString::new("hello world");
496 /// assert_eq!(ascii.len(), 11);
497 ///
498 /// let emoji = CompactString::new("๐ฑ");
499 /// assert_eq!(emoji.len(), 4);
500 /// ```
501 #[inline]
502 pub fn len(&self) -> usize {
503 self.0.len()
504 }
505
506 /// Returns `true` if the [`CompactString`] has a length of 0, `false` otherwise
507 ///
508 /// # Examples
509 /// ```
510 /// # use compact_str::CompactString;
511 /// let mut msg = CompactString::new("");
512 /// assert!(msg.is_empty());
513 ///
514 /// // add some characters
515 /// msg.push_str("hello reader!");
516 /// assert!(!msg.is_empty());
517 /// ```
518 #[inline]
519 pub fn is_empty(&self) -> bool {
520 self.0.is_empty()
521 }
522
523 /// Returns the capacity of the [`CompactString`], in bytes.
524 ///
525 /// # Note
526 /// * A `CompactString` will always have a capacity of at least `std::mem::size_of::<String>()`
527 ///
528 /// # Examples
529 /// ### Minimum Size
530 /// ```
531 /// # use compact_str::CompactString;
532 /// let min_size = std::mem::size_of::<String>();
533 /// let compact = CompactString::new("");
534 ///
535 /// assert!(compact.capacity() >= min_size);
536 /// ```
537 ///
538 /// ### Heap Allocated
539 /// ```
540 /// # use compact_str::CompactString;
541 /// let compact = CompactString::with_capacity(128);
542 /// assert_eq!(compact.capacity(), 128);
543 /// ```
544 #[inline]
545 pub fn capacity(&self) -> usize {
546 self.0.capacity()
547 }
548
549 /// Ensures that this [`CompactString`]'s capacity is at least `additional` bytes longer than
550 /// its length. The capacity may be increased by more than `additional` bytes if it chooses,
551 /// to prevent frequent reallocations.
552 ///
553 /// # Note
554 /// * A `CompactString` will always have at least a capacity of `std::mem::size_of::<String>()`
555 /// * Reserving additional bytes may cause the `CompactString` to become heap allocated
556 ///
557 /// # Panics
558 /// This method panics if the new capacity overflows `usize` or if the system is out-of-memory.
559 /// Use [`CompactString::try_reserve()`] if you want to handle such a problem manually.
560 ///
561 /// # Examples
562 /// ```
563 /// # use compact_str::CompactString;
564 ///
565 /// const WORD: usize = std::mem::size_of::<usize>();
566 /// let mut compact = CompactString::default();
567 /// assert!(compact.capacity() >= (WORD * 3) - 1);
568 ///
569 /// compact.reserve(200);
570 /// assert!(compact.is_heap_allocated());
571 /// assert!(compact.capacity() >= 200);
572 /// ```
573 #[inline]
574 #[track_caller]
575 pub fn reserve(&mut self, additional: usize) {
576 self.try_reserve(additional).unwrap_with_msg()
577 }
578
579 /// Fallible version of [`CompactString::reserve()`]
580 ///
581 /// This method won't panic if the system is out-of-memory, but return an [`ReserveError`]
582 /// Otherwise it behaves the same as [`CompactString::reserve()`].
583 #[inline]
584 pub fn try_reserve(&mut self, additional: usize) -> Result<(), ReserveError> {
585 self.0.reserve(additional)
586 }
587
588 /// Returns a string slice containing the entire [`CompactString`].
589 ///
590 /// # Examples
591 /// ```
592 /// # use compact_str::CompactString;
593 /// let s = CompactString::new("hello");
594 ///
595 /// assert_eq!(s.as_str(), "hello");
596 /// ```
597 #[inline]
598 pub fn as_str(&self) -> &str {
599 self.0.as_str()
600 }
601
602 /// Returns a mutable string slice containing the entire [`CompactString`].
603 ///
604 /// # Examples
605 /// ```
606 /// # use compact_str::CompactString;
607 /// let mut s = CompactString::new("hello");
608 /// s.as_mut_str().make_ascii_uppercase();
609 ///
610 /// assert_eq!(s.as_str(), "HELLO");
611 /// ```
612 #[inline]
613 pub fn as_mut_str(&mut self) -> &mut str {
614 let len = self.len();
615 unsafe { core::str::from_utf8_unchecked_mut(&mut self.0.as_mut_buf()[..len]) }
616 }
617
618 unsafe fn spare_capacity_mut(&mut self) -> &mut [mem::MaybeUninit<u8>] {
619 let buf = self.0.as_mut_buf();
620 let ptr = buf.as_mut_ptr();
621 let cap = buf.len();
622 let len = self.len();
623
624 slice::from_raw_parts_mut(ptr.add(len) as *mut mem::MaybeUninit<u8>, cap - len)
625 }
626
627 /// Returns a byte slice of the [`CompactString`]'s contents.
628 ///
629 /// # Examples
630 /// ```
631 /// # use compact_str::CompactString;
632 /// let s = CompactString::new("hello");
633 ///
634 /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes());
635 /// ```
636 #[inline]
637 pub fn as_bytes(&self) -> &[u8] {
638 &self.0.as_slice()[..self.len()]
639 }
640
641 // TODO: Implement a `try_as_mut_slice(...)` that will fail if it results in cloning?
642 //
643 /// Provides a mutable reference to the underlying buffer of bytes.
644 ///
645 /// # Safety
646 /// * All Rust strings, including `CompactString`, must be valid UTF-8. The caller must
647 /// guarantee that any modifications made to the underlying buffer are valid UTF-8.
648 ///
649 /// # Examples
650 /// ```
651 /// # use compact_str::CompactString;
652 /// let mut s = CompactString::new("hello");
653 ///
654 /// let slice = unsafe { s.as_mut_bytes() };
655 /// // copy bytes into our string
656 /// slice[5..11].copy_from_slice(" world".as_bytes());
657 /// // set the len of the string
658 /// unsafe { s.set_len(11) };
659 ///
660 /// assert_eq!(s, "hello world");
661 /// ```
662 #[inline]
663 pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] {
664 self.0.as_mut_buf()
665 }
666
667 /// Appends the given [`char`] to the end of this [`CompactString`].
668 ///
669 /// # Examples
670 /// ```
671 /// # use compact_str::CompactString;
672 /// let mut s = CompactString::new("foo");
673 ///
674 /// s.push('b');
675 /// s.push('a');
676 /// s.push('r');
677 ///
678 /// assert_eq!("foobar", s);
679 /// ```
680 pub fn push(&mut self, ch: char) {
681 self.push_str(ch.encode_utf8(&mut [0; 4]));
682 }
683
684 /// Removes the last character from the [`CompactString`] and returns it.
685 /// Returns `None` if this [`CompactString`] is empty.
686 ///
687 /// # Examples
688 /// ```
689 /// # use compact_str::CompactString;
690 /// let mut s = CompactString::new("abc");
691 ///
692 /// assert_eq!(s.pop(), Some('c'));
693 /// assert_eq!(s.pop(), Some('b'));
694 /// assert_eq!(s.pop(), Some('a'));
695 ///
696 /// assert_eq!(s.pop(), None);
697 /// ```
698 #[inline]
699 pub fn pop(&mut self) -> Option<char> {
700 self.0.pop()
701 }
702
703 /// Appends a given string slice onto the end of this [`CompactString`]
704 ///
705 /// # Examples
706 /// ```
707 /// # use compact_str::CompactString;
708 /// let mut s = CompactString::new("abc");
709 ///
710 /// s.push_str("123");
711 ///
712 /// assert_eq!("abc123", s);
713 /// ```
714 #[inline]
715 pub fn push_str(&mut self, s: &str) {
716 self.0.push_str(s)
717 }
718
719 /// Removes a [`char`] from this [`CompactString`] at a byte position and returns it.
720 ///
721 /// This is an *O*(*n*) operation, as it requires copying every element in the
722 /// buffer.
723 ///
724 /// # Panics
725 ///
726 /// Panics if `idx` is larger than or equal to the [`CompactString`]'s length,
727 /// or if it does not lie on a [`char`] boundary.
728 ///
729 /// # Examples
730 ///
731 /// ### Basic usage:
732 ///
733 /// ```
734 /// # use compact_str::CompactString;
735 /// let mut c = CompactString::from("hello world");
736 ///
737 /// assert_eq!(c.remove(0), 'h');
738 /// assert_eq!(c, "ello world");
739 ///
740 /// assert_eq!(c.remove(5), 'w');
741 /// assert_eq!(c, "ello orld");
742 /// ```
743 ///
744 /// ### Past total length:
745 ///
746 /// ```should_panic
747 /// # use compact_str::CompactString;
748 /// let mut c = CompactString::from("hello there!");
749 /// c.remove(100);
750 /// ```
751 ///
752 /// ### Not on char boundary:
753 ///
754 /// ```should_panic
755 /// # use compact_str::CompactString;
756 /// let mut c = CompactString::from("๐ฆ");
757 /// c.remove(1);
758 /// ```
759 #[inline]
760 pub fn remove(&mut self, idx: usize) -> char {
761 let len = self.len();
762 let substr = &mut self.as_mut_str()[idx..];
763
764 // get the char we want to remove
765 let ch = substr
766 .chars()
767 .next()
768 .expect("cannot remove a char from the end of a string");
769 let ch_len = ch.len_utf8();
770
771 // shift everything back one character
772 let num_bytes = substr.len() - ch_len;
773 let ptr = substr.as_mut_ptr();
774
775 // SAFETY: Both src and dest are valid for reads of `num_bytes` amount of bytes,
776 // and are properly aligned
777 unsafe {
778 core::ptr::copy(ptr.add(ch_len) as *const u8, ptr, num_bytes);
779 self.set_len(len - ch_len);
780 }
781
782 ch
783 }
784
785 /// Forces the length of the [`CompactString`] to `new_len`.
786 ///
787 /// This is a low-level operation that maintains none of the normal invariants for
788 /// `CompactString`. If you want to modify the `CompactString` you should use methods like
789 /// `push`, `push_str` or `pop`.
790 ///
791 /// # Safety
792 /// * `new_len` must be less than or equal to `capacity()`
793 /// * The elements at `old_len..new_len` must be initialized
794 #[inline]
795 pub unsafe fn set_len(&mut self, new_len: usize) {
796 self.0.set_len(new_len)
797 }
798
799 /// Returns whether or not the [`CompactString`] is heap allocated.
800 ///
801 /// # Examples
802 /// ### Inlined
803 /// ```
804 /// # use compact_str::CompactString;
805 /// let hello = CompactString::new("hello world");
806 ///
807 /// assert!(!hello.is_heap_allocated());
808 /// ```
809 ///
810 /// ### Heap Allocated
811 /// ```
812 /// # use compact_str::CompactString;
813 /// let msg = CompactString::new("this message will self destruct in 5, 4, 3, 2, 1 ๐ฅ");
814 ///
815 /// assert!(msg.is_heap_allocated());
816 /// ```
817 #[inline]
818 pub fn is_heap_allocated(&self) -> bool {
819 self.0.is_heap_allocated()
820 }
821
822 /// Ensure that the given range is inside the set data, and that no codepoints are split.
823 ///
824 /// Returns the range `start..end` as a tuple.
825 #[inline]
826 fn ensure_range(&self, range: impl RangeBounds<usize>) -> (usize, usize) {
827 #[cold]
828 #[inline(never)]
829 fn illegal_range() -> ! {
830 panic!("illegal range");
831 }
832
833 let start = match range.start_bound() {
834 Bound::Included(&n) => n,
835 Bound::Excluded(&n) => match n.checked_add(1) {
836 Some(n) => n,
837 None => illegal_range(),
838 },
839 Bound::Unbounded => 0,
840 };
841 let end = match range.end_bound() {
842 Bound::Included(&n) => match n.checked_add(1) {
843 Some(n) => n,
844 None => illegal_range(),
845 },
846 Bound::Excluded(&n) => n,
847 Bound::Unbounded => self.len(),
848 };
849 if end < start {
850 illegal_range();
851 }
852
853 let s = self.as_str();
854 if !s.is_char_boundary(start) || !s.is_char_boundary(end) {
855 illegal_range();
856 }
857
858 (start, end)
859 }
860
861 /// Removes the specified range in the [`CompactString`],
862 /// and replaces it with the given string.
863 /// The given string doesn't need to be the same length as the range.
864 ///
865 /// # Panics
866 ///
867 /// Panics if the starting point or end point do not lie on a [`char`]
868 /// boundary, or if they're out of bounds.
869 ///
870 /// # Examples
871 ///
872 /// Basic usage:
873 ///
874 /// ```
875 /// # use compact_str::CompactString;
876 /// let mut s = CompactString::new("Hello, world!");
877 ///
878 /// s.replace_range(7..12, "WORLD");
879 /// assert_eq!(s, "Hello, WORLD!");
880 ///
881 /// s.replace_range(7..=11, "you");
882 /// assert_eq!(s, "Hello, you!");
883 ///
884 /// s.replace_range(5.., "! Is it me you're looking for?");
885 /// assert_eq!(s, "Hello! Is it me you're looking for?");
886 /// ```
887 #[inline]
888 pub fn replace_range(&mut self, range: impl RangeBounds<usize>, replace_with: &str) {
889 let (start, end) = self.ensure_range(range);
890 let dest_len = end - start;
891 match dest_len.cmp(&replace_with.len()) {
892 Ordering::Equal => unsafe { self.replace_range_same_size(start, end, replace_with) },
893 Ordering::Greater => unsafe { self.replace_range_shrink(start, end, replace_with) },
894 Ordering::Less => unsafe { self.replace_range_grow(start, end, replace_with) },
895 }
896 }
897
898 /// Replace into the same size.
899 unsafe fn replace_range_same_size(&mut self, start: usize, end: usize, replace_with: &str) {
900 core::ptr::copy_nonoverlapping(
901 replace_with.as_ptr(),
902 self.as_mut_ptr().add(start),
903 end - start,
904 );
905 }
906
907 /// Replace, so self.len() gets smaller.
908 unsafe fn replace_range_shrink(&mut self, start: usize, end: usize, replace_with: &str) {
909 let total_len = self.len();
910 let dest_len = end - start;
911 let new_len = total_len - (dest_len - replace_with.len());
912 let amount = total_len - end;
913 let data = self.as_mut_ptr();
914 // first insert the replacement string, overwriting the current content
915 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
916 // then move the tail of the CompactString forward to its new place, filling the gap
917 core::ptr::copy(
918 data.add(total_len - amount),
919 data.add(new_len - amount),
920 amount,
921 );
922 // and lastly we set the new length
923 self.set_len(new_len);
924 }
925
926 /// Replace, so self.len() gets bigger.
927 unsafe fn replace_range_grow(&mut self, start: usize, end: usize, replace_with: &str) {
928 let dest_len = end - start;
929 self.reserve(replace_with.len() - dest_len);
930 let total_len = self.len();
931 let new_len = total_len + (replace_with.len() - dest_len);
932 let amount = total_len - end;
933 // first grow the string, so MIRI knows that the full range is usable
934 self.set_len(new_len);
935 let data = self.as_mut_ptr();
936 // then move the tail of the CompactString back to its new place
937 core::ptr::copy(
938 data.add(total_len - amount),
939 data.add(new_len - amount),
940 amount,
941 );
942 // and lastly insert the replacement string
943 core::ptr::copy_nonoverlapping(replace_with.as_ptr(), data.add(start), replace_with.len());
944 }
945
946 /// Creates a new [`CompactString`] by repeating a string `n` times.
947 ///
948 /// # Panics
949 ///
950 /// This function will panic if the capacity would overflow.
951 ///
952 /// # Examples
953 ///
954 /// Basic usage:
955 ///
956 /// ```
957 /// use compact_str::CompactString;
958 /// assert_eq!(CompactString::new("abc").repeat(4), CompactString::new("abcabcabcabc"));
959 /// ```
960 ///
961 /// A panic upon overflow:
962 ///
963 /// ```should_panic
964 /// use compact_str::CompactString;
965 ///
966 /// // this will panic at runtime
967 /// let huge = CompactString::new("0123456789abcdef").repeat(usize::MAX);
968 /// ```
969 #[must_use]
970 pub fn repeat(&self, n: usize) -> Self {
971 if n == 0 || self.is_empty() {
972 Self::const_new("")
973 } else if n == 1 {
974 self.clone()
975 } else {
976 let mut out = Self::with_capacity(self.len() * n);
977 (0..n).for_each(|_| out.push_str(self));
978 out
979 }
980 }
981
982 /// Truncate the [`CompactString`] to a shorter length.
983 ///
984 /// If the length of the [`CompactString`] is less or equal to `new_len`, the call is a no-op.
985 ///
986 /// Calling this function does not change the capacity of the [`CompactString`].
987 ///
988 /// # Panics
989 ///
990 /// Panics if the new end of the string does not lie on a [`char`] boundary.
991 ///
992 /// # Examples
993 ///
994 /// Basic usage:
995 ///
996 /// ```
997 /// # use compact_str::CompactString;
998 /// let mut s = CompactString::new("Hello, world!");
999 /// s.truncate(5);
1000 /// assert_eq!(s, "Hello");
1001 /// ```
1002 pub fn truncate(&mut self, new_len: usize) {
1003 let s = self.as_str();
1004 if new_len >= s.len() {
1005 return;
1006 }
1007
1008 assert!(
1009 s.is_char_boundary(new_len),
1010 "new_len must lie on char boundary",
1011 );
1012 unsafe { self.set_len(new_len) };
1013 }
1014
1015 /// Converts a [`CompactString`] to a raw pointer.
1016 #[inline]
1017 pub fn as_ptr(&self) -> *const u8 {
1018 self.0.as_slice().as_ptr()
1019 }
1020
1021 /// Converts a mutable [`CompactString`] to a raw pointer.
1022 #[inline]
1023 pub fn as_mut_ptr(&mut self) -> *mut u8 {
1024 unsafe { self.0.as_mut_buf().as_mut_ptr() }
1025 }
1026
1027 /// Insert string character at an index.
1028 ///
1029 /// # Examples
1030 ///
1031 /// Basic usage:
1032 ///
1033 /// ```
1034 /// # use compact_str::CompactString;
1035 /// let mut s = CompactString::new("Hello!");
1036 /// s.insert_str(5, ", world");
1037 /// assert_eq!(s, "Hello, world!");
1038 /// ```
1039 pub fn insert_str(&mut self, idx: usize, string: &str) {
1040 assert!(self.is_char_boundary(idx), "idx must lie on char boundary");
1041
1042 let new_len = self.len() + string.len();
1043 self.reserve(string.len());
1044
1045 // SAFETY: We just checked that we may split self at idx.
1046 // We set the length only after reserving the memory.
1047 // We fill the gap with valid UTF-8 data.
1048 unsafe {
1049 // first move the tail to the new back
1050 let data = self.as_mut_ptr();
1051 core::ptr::copy(
1052 data.add(idx),
1053 data.add(idx + string.len()),
1054 new_len - idx - string.len(),
1055 );
1056
1057 // then insert the new bytes
1058 core::ptr::copy_nonoverlapping(string.as_ptr(), data.add(idx), string.len());
1059
1060 // and lastly resize the string
1061 self.set_len(new_len);
1062 }
1063 }
1064
1065 /// Insert a character at an index.
1066 ///
1067 /// # Examples
1068 ///
1069 /// Basic usage:
1070 ///
1071 /// ```
1072 /// # use compact_str::CompactString;
1073 /// let mut s = CompactString::new("Hello world!");
1074 /// s.insert(5, ',');
1075 /// assert_eq!(s, "Hello, world!");
1076 /// ```
1077 pub fn insert(&mut self, idx: usize, ch: char) {
1078 self.insert_str(idx, ch.encode_utf8(&mut [0; 4]));
1079 }
1080
1081 /// Reduces the length of the [`CompactString`] to zero.
1082 ///
1083 /// Calling this function does not change the capacity of the [`CompactString`].
1084 ///
1085 /// ```
1086 /// # use compact_str::CompactString;
1087 /// let mut s = CompactString::new("Rust is the most loved language on Stackoverflow!");
1088 /// assert_eq!(s.capacity(), 49);
1089 ///
1090 /// s.clear();
1091 ///
1092 /// assert_eq!(s, "");
1093 /// assert_eq!(s.capacity(), 49);
1094 /// ```
1095 pub fn clear(&mut self) {
1096 unsafe { self.set_len(0) };
1097 }
1098
1099 /// Split the [`CompactString`] into at the given byte index.
1100 ///
1101 /// Calling this function does not change the capacity of the [`CompactString`], unless the
1102 /// [`CompactString`] is backed by a `&'static str`.
1103 ///
1104 /// # Panics
1105 ///
1106 /// Panics if `at` does not lie on a [`char`] boundary.
1107 ///
1108 /// Basic usage:
1109 ///
1110 /// ```
1111 /// # use compact_str::CompactString;
1112 /// let mut s = CompactString::const_new("Hello, world!");
1113 /// let w = s.split_off(5);
1114 ///
1115 /// assert_eq!(w, ", world!");
1116 /// assert_eq!(s, "Hello");
1117 /// ```
1118 pub fn split_off(&mut self, at: usize) -> Self {
1119 if let Some(s) = self.as_static_str() {
1120 let result = Self::const_new(&s[at..]);
1121 // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1122 unsafe { self.set_len(at) };
1123 result
1124 } else {
1125 let result = self[at..].into();
1126 // SAFETY: the previous line `self[at...]` would have panicked if `at` was invalid
1127 unsafe { self.set_len(at) };
1128 result
1129 }
1130 }
1131
1132 /// Remove a range from the [`CompactString`], and return it as an iterator.
1133 ///
1134 /// Calling this function does not change the capacity of the [`CompactString`].
1135 ///
1136 /// # Panics
1137 ///
1138 /// Panics if the start or end of the range does not lie on a [`char`] boundary.
1139 ///
1140 /// # Examples
1141 ///
1142 /// Basic usage:
1143 ///
1144 /// ```
1145 /// # use compact_str::CompactString;
1146 /// let mut s = CompactString::new("Hello, world!");
1147 ///
1148 /// let mut d = s.drain(5..12);
1149 /// assert_eq!(d.next(), Some(',')); // iterate over the extracted data
1150 /// assert_eq!(d.as_str(), " world"); // or get the whole data as &str
1151 ///
1152 /// // The iterator keeps a reference to `s`, so you have to drop() the iterator,
1153 /// // before you can access `s` again.
1154 /// drop(d);
1155 /// assert_eq!(s, "Hello!");
1156 /// ```
1157 pub fn drain(&mut self, range: impl RangeBounds<usize>) -> Drain<'_> {
1158 let (start, end) = self.ensure_range(range);
1159 Drain {
1160 compact_string: self as *mut Self,
1161 start,
1162 end,
1163 chars: self[start..end].chars(),
1164 }
1165 }
1166
1167 /// Shrinks the capacity of this [`CompactString`] with a lower bound.
1168 ///
1169 /// The resulting capactity is never less than the size of 3ร[`usize`],
1170 /// i.e. the capacity than can be inlined.
1171 ///
1172 /// # Examples
1173 ///
1174 /// Basic usage:
1175 ///
1176 /// ```
1177 /// # use compact_str::CompactString;
1178 /// let mut s = CompactString::with_capacity(100);
1179 /// assert_eq!(s.capacity(), 100);
1180 ///
1181 /// // if the capacity was already bigger than the argument, the call is a no-op
1182 /// s.shrink_to(100);
1183 /// assert_eq!(s.capacity(), 100);
1184 ///
1185 /// s.shrink_to(50);
1186 /// assert_eq!(s.capacity(), 50);
1187 ///
1188 /// // if the string can be inlined, it is
1189 /// s.shrink_to(10);
1190 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1191 /// ```
1192 #[inline]
1193 pub fn shrink_to(&mut self, min_capacity: usize) {
1194 self.0.shrink_to(min_capacity);
1195 }
1196
1197 /// Shrinks the capacity of this [`CompactString`] to match its length.
1198 ///
1199 /// The resulting capactity is never less than the size of 3ร[`usize`],
1200 /// i.e. the capacity than can be inlined.
1201 ///
1202 /// This method is effectively the same as calling [`string.shrink_to(0)`].
1203 ///
1204 /// # Examples
1205 ///
1206 /// Basic usage:
1207 ///
1208 /// ```
1209 /// # use compact_str::CompactString;
1210 /// let mut s = CompactString::from("This is a string with more than 24 characters.");
1211 ///
1212 /// s.reserve(100);
1213 /// assert!(s.capacity() >= 100);
1214 ///
1215 /// s.shrink_to_fit();
1216 /// assert_eq!(s.len(), s.capacity());
1217 /// ```
1218 ///
1219 /// ```
1220 /// # use compact_str::CompactString;
1221 /// let mut s = CompactString::from("short string");
1222 ///
1223 /// s.reserve(100);
1224 /// assert!(s.capacity() >= 100);
1225 ///
1226 /// s.shrink_to_fit();
1227 /// assert_eq!(s.capacity(), 3 * std::mem::size_of::<usize>());
1228 /// ```
1229 #[inline]
1230 pub fn shrink_to_fit(&mut self) {
1231 self.0.shrink_to(0);
1232 }
1233
1234 /// Retains only the characters specified by the predicate.
1235 ///
1236 /// The method iterates over the characters in the string and calls the `predicate`.
1237 ///
1238 /// If the `predicate` returns `false`, then the character gets removed.
1239 /// If the `predicate` returns `true`, then the character is kept.
1240 ///
1241 /// # Examples
1242 ///
1243 /// ```
1244 /// # use compact_str::CompactString;
1245 /// let mut s = CompactString::from("รคb๐dโฌ");
1246 ///
1247 /// let keep = [false, true, true, false, true];
1248 /// let mut iter = keep.iter();
1249 /// s.retain(|_| *iter.next().unwrap());
1250 ///
1251 /// assert_eq!(s, "b๐โฌ");
1252 /// ```
1253 pub fn retain(&mut self, mut predicate: impl FnMut(char) -> bool) {
1254 // We iterate over the string, and copy character by character.
1255
1256 struct SetLenOnDrop<'a> {
1257 self_: &'a mut CompactString,
1258 src_idx: usize,
1259 dst_idx: usize,
1260 }
1261
1262 let mut g = SetLenOnDrop {
1263 self_: self,
1264 src_idx: 0,
1265 dst_idx: 0,
1266 };
1267 let s = g.self_.as_mut_str();
1268 while let Some(ch) = s[g.src_idx..].chars().next() {
1269 let ch_len = ch.len_utf8();
1270 if predicate(ch) {
1271 // SAFETY: We know that both indices are valid, and that we don't split a char.
1272 unsafe {
1273 let p = s.as_mut_ptr();
1274 core::ptr::copy(p.add(g.src_idx), p.add(g.dst_idx), ch_len);
1275 }
1276 g.dst_idx += ch_len;
1277 }
1278 g.src_idx += ch_len;
1279 }
1280
1281 impl Drop for SetLenOnDrop<'_> {
1282 fn drop(&mut self) {
1283 // SAFETY: We know that the index is a valid position to break the string.
1284 unsafe { self.self_.set_len(self.dst_idx) };
1285 }
1286 }
1287 drop(g);
1288 }
1289
1290 /// Decode a bytes slice as UTF-8 string, replacing any illegal codepoints
1291 ///
1292 /// # Examples
1293 ///
1294 /// ```
1295 /// # use compact_str::CompactString;
1296 /// let chess_knight = b"\xf0\x9f\xa8\x84";
1297 ///
1298 /// assert_eq!(
1299 /// "๐จ",
1300 /// CompactString::from_utf8_lossy(chess_knight),
1301 /// );
1302 ///
1303 /// // For valid UTF-8 slices, this is the same as:
1304 /// assert_eq!(
1305 /// "๐จ",
1306 /// CompactString::new(std::str::from_utf8(chess_knight).unwrap()),
1307 /// );
1308 /// ```
1309 ///
1310 /// Incorrect bytes:
1311 ///
1312 /// ```
1313 /// # use compact_str::CompactString;
1314 /// let broken = b"\xf0\x9f\xc8\x84";
1315 ///
1316 /// assert_eq!(
1317 /// "๏ฟฝศ",
1318 /// CompactString::from_utf8_lossy(broken),
1319 /// );
1320 ///
1321 /// // For invalid UTF-8 slices, this is an optimized implemented for:
1322 /// assert_eq!(
1323 /// "๏ฟฝศ",
1324 /// CompactString::from(String::from_utf8_lossy(broken)),
1325 /// );
1326 /// ```
1327 pub fn from_utf8_lossy(v: &[u8]) -> Self {
1328 fn next_char<'a>(
1329 iter: &mut <&[u8] as IntoIterator>::IntoIter,
1330 buf: &'a mut [u8; 4],
1331 ) -> Option<&'a [u8]> {
1332 const REPLACEMENT: &[u8] = "\u{FFFD}".as_bytes();
1333
1334 macro_rules! ensure_range {
1335 ($idx:literal, $range:pat) => {{
1336 let mut i = iter.clone();
1337 match i.next() {
1338 Some(&c) if matches!(c, $range) => {
1339 buf[$idx] = c;
1340 *iter = i;
1341 }
1342 _ => return Some(REPLACEMENT),
1343 }
1344 }};
1345 }
1346
1347 macro_rules! ensure_cont {
1348 ($idx:literal) => {{
1349 ensure_range!($idx, 0x80..=0xBF);
1350 }};
1351 }
1352
1353 let c = *iter.next()?;
1354 buf[0] = c;
1355
1356 match c {
1357 0x00..=0x7F => {
1358 // simple ASCII: push as is
1359 Some(&buf[..1])
1360 }
1361 0xC2..=0xDF => {
1362 // two bytes
1363 ensure_cont!(1);
1364 Some(&buf[..2])
1365 }
1366 0xE0..=0xEF => {
1367 // three bytes
1368 match c {
1369 // 0x80..=0x9F encodes surrogate half
1370 0xE0 => ensure_range!(1, 0xA0..=0xBF),
1371 // 0xA0..=0xBF encodes surrogate half
1372 0xED => ensure_range!(1, 0x80..=0x9F),
1373 // all UTF-8 continuation bytes are valid
1374 _ => ensure_cont!(1),
1375 }
1376 ensure_cont!(2);
1377 Some(&buf[..3])
1378 }
1379 0xF0..=0xF4 => {
1380 // four bytes
1381 match c {
1382 // 0x80..=0x8F encodes overlong three byte codepoint
1383 0xF0 => ensure_range!(1, 0x90..=0xBF),
1384 // 0x90..=0xBF encodes codepoint > U+10FFFF
1385 0xF4 => ensure_range!(1, 0x80..=0x8F),
1386 // all UTF-8 continuation bytes are valid
1387 _ => ensure_cont!(1),
1388 }
1389 ensure_cont!(2);
1390 ensure_cont!(3);
1391 Some(&buf[..4])
1392 }
1393 | 0x80..=0xBF // unicode continuation, invalid
1394 | 0xC0..=0xC1 // overlong one byte character
1395 | 0xF5..=0xF7 // four bytes that encode > U+10FFFF
1396 | 0xF8..=0xFB // five bytes, invalid
1397 | 0xFC..=0xFD // six bytes, invalid
1398 | 0xFE..=0xFF => Some(REPLACEMENT), // always invalid
1399 }
1400 }
1401
1402 let mut buf = [0; 4];
1403 let mut result = Self::with_capacity(v.len());
1404 let mut iter = v.iter();
1405 while let Some(s) = next_char(&mut iter, &mut buf) {
1406 // SAFETY: next_char() only returns valid strings
1407 let s = unsafe { core::str::from_utf8_unchecked(s) };
1408 result.push_str(s);
1409 }
1410 result
1411 }
1412
1413 fn from_utf16x(
1414 v: &[u8],
1415 from_int: impl Fn(u16) -> u16,
1416 from_bytes: impl Fn([u8; 2]) -> u16,
1417 ) -> Result<Self, Utf16Error> {
1418 if v.len() % 2 != 0 {
1419 // Input had an odd number of bytes.
1420 return Err(Utf16Error(()));
1421 }
1422
1423 // Note: we don't use collect::<Result<_, _>>() because that fails to pre-allocate a buffer,
1424 // even though the size of our iterator, `v`, is known ahead of time.
1425 //
1426 // rustlang issue #48994 is tracking the fix
1427 let mut result = CompactString::with_capacity(v.len() / 2);
1428
1429 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1430 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1431 match unsafe { v.align_to::<u16>() } {
1432 (&[], v, &[]) => {
1433 // Input is correctly aligned.
1434 for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1435 result.push(c.map_err(|_| Utf16Error(()))?);
1436 }
1437 }
1438 _ => {
1439 // Input's alignment is off.
1440 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1441 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1442 for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1443 result.push(c.map_err(|_| Utf16Error(()))?);
1444 }
1445 }
1446 }
1447
1448 Ok(result)
1449 }
1450
1451 fn from_utf16x_lossy(
1452 v: &[u8],
1453 from_int: impl Fn(u16) -> u16,
1454 from_bytes: impl Fn([u8; 2]) -> u16,
1455 ) -> Self {
1456 // Notice: We write the string "๏ฟฝ" instead of the character '๏ฟฝ', so the character does not
1457 // have to be formatted before it can be appended.
1458
1459 let (trailing_extra_byte, v) = match v.len() % 2 != 0 {
1460 true => (true, &v[..v.len() - 1]),
1461 false => (false, v),
1462 };
1463 let mut result = CompactString::with_capacity(v.len() / 2);
1464
1465 // SAFETY: `u8` and `u16` are `Copy`, so if the alignment fits, we can transmute a
1466 // `[u8; 2*N]` to `[u16; N]`. `slice::align_to()` checks if the alignment is right.
1467 match unsafe { v.align_to::<u16>() } {
1468 (&[], v, &[]) => {
1469 // Input is correctly aligned.
1470 for c in core::char::decode_utf16(v.iter().copied().map(from_int)) {
1471 match c {
1472 Ok(c) => result.push(c),
1473 Err(_) => result.push_str("๏ฟฝ"),
1474 }
1475 }
1476 }
1477 _ => {
1478 // Input's alignment is off.
1479 // SAFETY: we can always reinterpret a `[u8; 2*N]` slice as `[[u8; 2]; N]`
1480 let v = unsafe { slice::from_raw_parts(v.as_ptr().cast(), v.len() / 2) };
1481 for c in core::char::decode_utf16(v.iter().copied().map(from_bytes)) {
1482 match c {
1483 Ok(c) => result.push(c),
1484 Err(_) => result.push_str("๏ฟฝ"),
1485 }
1486 }
1487 }
1488 }
1489
1490 if trailing_extra_byte {
1491 result.push_str("๏ฟฝ");
1492 }
1493 result
1494 }
1495
1496 /// Decode a slice of bytes as UTF-16 encoded string, in little endian.
1497 ///
1498 /// # Errors
1499 ///
1500 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1501 /// a [`Utf16Error`] is returned.
1502 ///
1503 /// # Examples
1504 ///
1505 /// ```
1506 /// # use compact_str::CompactString;
1507 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xdc\x0d\x20\x42\x26\x0f\xfe";
1508 /// let dancing_men = CompactString::from_utf16le(DANCING_MEN).unwrap();
1509 /// assert_eq!(dancing_men, "๐ฏโโ๏ธ");
1510 /// ```
1511 #[inline]
1512 pub fn from_utf16le(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1513 CompactString::from_utf16x(v.as_ref(), u16::from_le, u16::from_le_bytes)
1514 }
1515
1516 /// Decode a slice of bytes as UTF-16 encoded string, in big endian.
1517 ///
1518 /// # Errors
1519 ///
1520 /// If the slice has an odd number of bytes, or if it did not contain valid UTF-16 characters,
1521 /// a [`Utf16Error`] is returned.
1522 ///
1523 /// # Examples
1524 ///
1525 /// ```
1526 /// # use compact_str::CompactString;
1527 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xfe\x0f";
1528 /// let dancing_women = CompactString::from_utf16be(DANCING_WOMEN).unwrap();
1529 /// assert_eq!(dancing_women, "๐ฏโโ๏ธ");
1530 /// ```
1531 #[inline]
1532 pub fn from_utf16be(v: impl AsRef<[u8]>) -> Result<Self, Utf16Error> {
1533 CompactString::from_utf16x(v.as_ref(), u16::from_be, u16::from_be_bytes)
1534 }
1535
1536 /// Lossy decode a slice of bytes as UTF-16 encoded string, in little endian.
1537 ///
1538 /// In this context "lossy" means that any broken characters in the input are replaced by the
1539 /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1540 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1541 ///
1542 /// # Examples
1543 ///
1544 /// ```
1545 /// # use compact_str::CompactString;
1546 /// // A "random" bit was flipped in the 4th byte:
1547 /// const DANCING_MEN: &[u8] = b"\x3d\xd8\x6f\xfc\x0d\x20\x42\x26\x0f\xfe";
1548 /// let dancing_men = CompactString::from_utf16le_lossy(DANCING_MEN);
1549 /// assert_eq!(dancing_men, "๏ฟฝ\u{fc6f}\u{200d}โ๏ธ");
1550 /// ```
1551 #[inline]
1552 pub fn from_utf16le_lossy(v: impl AsRef<[u8]>) -> Self {
1553 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_le, u16::from_le_bytes)
1554 }
1555
1556 /// Lossy decode a slice of bytes as UTF-16 encoded string, in big endian.
1557 ///
1558 /// In this context "lossy" means that any broken characters in the input are replaced by the
1559 /// \<REPLACEMENT CHARACTER\> `'๏ฟฝ'`. Please notice that, unlike UTF-8, UTF-16 is not self
1560 /// synchronizing. I.e. if a byte in the input is dropped, all following data is broken.
1561 ///
1562 /// # Examples
1563 ///
1564 /// ```
1565 /// # use compact_str::CompactString;
1566 /// // A "random" bit was flipped in the 9th byte:
1567 /// const DANCING_WOMEN: &[u8] = b"\xd8\x3d\xdc\x6f\x20\x0d\x26\x40\xde\x0f";
1568 /// let dancing_women = CompactString::from_utf16be_lossy(DANCING_WOMEN);
1569 /// assert_eq!(dancing_women, "๐ฏ\u{200d}โ๏ฟฝ");
1570 /// ```
1571 #[inline]
1572 pub fn from_utf16be_lossy(v: impl AsRef<[u8]>) -> Self {
1573 CompactString::from_utf16x_lossy(v.as_ref(), u16::from_be, u16::from_be_bytes)
1574 }
1575
1576 /// Convert the [`CompactString`] into a [`String`].
1577 ///
1578 /// # Examples
1579 ///
1580 /// ```
1581 /// # use compact_str::CompactString;
1582 /// let s = CompactString::new("Hello world");
1583 /// let s = s.into_string();
1584 /// assert_eq!(s, "Hello world");
1585 /// ```
1586 pub fn into_string(self) -> String {
1587 self.0.into_string()
1588 }
1589
1590 /// Convert a [`String`] into a [`CompactString`] _without inlining_.
1591 ///
1592 /// Note: You probably don't need to use this method, instead you should use `From<String>`
1593 /// which is implemented for [`CompactString`].
1594 ///
1595 /// This method exists incase your code is very sensitive to memory allocations. Normally when
1596 /// converting a [`String`] to a [`CompactString`] we'll inline short strings onto the stack.
1597 /// But this results in [`Drop`]-ing the original [`String`], which causes memory it owned on
1598 /// the heap to be deallocated. Instead when using this method, we always reuse the buffer that
1599 /// was previously owned by the [`String`], so no trips to the allocator are needed.
1600 ///
1601 /// # Examples
1602 ///
1603 /// ### Short Strings
1604 /// ```
1605 /// use compact_str::CompactString;
1606 ///
1607 /// let short = "hello world".to_string();
1608 /// let c_heap = CompactString::from_string_buffer(short);
1609 ///
1610 /// // using CompactString::from_string_buffer, we'll re-use the String's underlying buffer
1611 /// assert!(c_heap.is_heap_allocated());
1612 ///
1613 /// // note: when Clone-ing a short heap allocated string, we'll eagerly inline at that point
1614 /// let c_inline = c_heap.clone();
1615 /// assert!(!c_inline.is_heap_allocated());
1616 ///
1617 /// assert_eq!(c_heap, c_inline);
1618 /// ```
1619 ///
1620 /// ### Longer Strings
1621 /// ```
1622 /// use compact_str::CompactString;
1623 ///
1624 /// let x = "longer string that will be on the heap".to_string();
1625 /// let c1 = CompactString::from(x);
1626 ///
1627 /// let y = "longer string that will be on the heap".to_string();
1628 /// let c2 = CompactString::from_string_buffer(y);
1629 ///
1630 /// // for longer strings, we re-use the underlying String's buffer in both cases
1631 /// assert!(c1.is_heap_allocated());
1632 /// assert!(c2.is_heap_allocated());
1633 /// ```
1634 ///
1635 /// ### Buffer Re-use
1636 /// ```
1637 /// use compact_str::CompactString;
1638 ///
1639 /// let og = "hello world".to_string();
1640 /// let og_addr = og.as_ptr();
1641 ///
1642 /// let mut c = CompactString::from_string_buffer(og);
1643 /// let ex_addr = c.as_ptr();
1644 ///
1645 /// // When converting to/from String and CompactString with from_string_buffer we always re-use
1646 /// // the same underlying allocated memory/buffer
1647 /// assert_eq!(og_addr, ex_addr);
1648 ///
1649 /// let long = "this is a long string that will be on the heap".to_string();
1650 /// let long_addr = long.as_ptr();
1651 ///
1652 /// let mut long_c = CompactString::from(long);
1653 /// let long_ex_addr = long_c.as_ptr();
1654 ///
1655 /// // When converting to/from String and CompactString with From<String>, we'll also re-use the
1656 /// // underlying buffer, if the string is long, otherwise when converting to CompactString we
1657 /// // eagerly inline
1658 /// assert_eq!(long_addr, long_ex_addr);
1659 /// ```
1660 #[inline]
1661 #[track_caller]
1662 pub fn from_string_buffer(s: String) -> Self {
1663 let repr = Repr::from_string(s, false).unwrap_with_msg();
1664 CompactString(repr)
1665 }
1666
1667 /// Returns a copy of this string where each character is mapped to its
1668 /// ASCII lower case equivalent.
1669 ///
1670 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1671 /// but non-ASCII letters are unchanged.
1672 ///
1673 /// To lowercase the value in-place, use [`str::make_ascii_lowercase`].
1674 ///
1675 /// To lowercase ASCII characters in addition to non-ASCII characters, use
1676 /// [`CompactString::to_lowercase`].
1677 ///
1678 /// # Examples
1679 ///
1680 /// ```
1681 /// use compact_str::CompactString;
1682 /// let s = CompactString::new("Grรผรe, Jรผrgen โค");
1683 ///
1684 /// assert_eq!("grรผรe, jรผrgen โค", s.to_ascii_lowercase());
1685 /// ```
1686 #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1687 #[inline]
1688 pub fn to_ascii_lowercase(&self) -> Self {
1689 let mut s = self.clone();
1690 s.make_ascii_lowercase();
1691 s
1692 }
1693
1694 /// Returns a copy of this string where each character is mapped to its
1695 /// ASCII upper case equivalent.
1696 ///
1697 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1698 /// but non-ASCII letters are unchanged.
1699 ///
1700 /// To uppercase the value in-place, use [`str::make_ascii_uppercase`].
1701 ///
1702 /// To uppercase ASCII characters in addition to non-ASCII characters, use
1703 /// [`CompactString::to_uppercase`].
1704 ///
1705 /// # Examples
1706 ///
1707 /// ```
1708 /// use compact_str::CompactString;
1709 /// let s = CompactString::new("Grรผรe, Jรผrgen โค");
1710 ///
1711 /// assert_eq!("GRรผรE, JรผRGEN โค", s.to_ascii_uppercase());
1712 /// ```
1713 #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1714 #[inline]
1715 pub fn to_ascii_uppercase(&self) -> Self {
1716 let mut s = self.clone();
1717 s.make_ascii_uppercase();
1718 s
1719 }
1720
1721 /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1722 ///
1723 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1724 /// `Lowercase`.
1725 ///
1726 /// Since some characters can expand into multiple characters when changing
1727 /// the case, this function returns a [`CompactString`] instead of modifying the
1728 /// parameter in-place.
1729 ///
1730 /// # Examples
1731 ///
1732 /// Basic usage:
1733 ///
1734 /// ```
1735 /// use compact_str::CompactString;
1736 /// let s = CompactString::new("HELLO");
1737 ///
1738 /// assert_eq!("hello", s.to_lowercase());
1739 /// ```
1740 ///
1741 /// A tricky example, with sigma:
1742 ///
1743 /// ```
1744 /// use compact_str::CompactString;
1745 /// let sigma = CompactString::new("ฮฃ");
1746 ///
1747 /// assert_eq!("ฯ", sigma.to_lowercase());
1748 ///
1749 /// // but at the end of a word, it's ฯ, not ฯ:
1750 /// let odysseus = CompactString::new("แฝฮฮฅฮฃฮฃฮฮฮฃ");
1751 ///
1752 /// assert_eq!("แฝฮดฯ
ฯฯฮตฯฯ", odysseus.to_lowercase());
1753 /// ```
1754 ///
1755 /// Languages without case are not changed:
1756 ///
1757 /// ```
1758 /// use compact_str::CompactString;
1759 /// let new_year = CompactString::new("ๅๅๆฐๅนด");
1760 ///
1761 /// assert_eq!(new_year, new_year.to_lowercase());
1762 /// ```
1763 #[must_use = "this returns the lowercase string as a new CompactString, \
1764 without modifying the original"]
1765 pub fn to_lowercase(&self) -> Self {
1766 Self::from_str_to_lowercase(self.as_str())
1767 }
1768
1769 /// Returns the lowercase equivalent of this string slice, as a new [`CompactString`].
1770 ///
1771 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property
1772 /// `Lowercase`.
1773 ///
1774 /// Since some characters can expand into multiple characters when changing
1775 /// the case, this function returns a [`CompactString`] instead of modifying the
1776 /// parameter in-place.
1777 ///
1778 /// # Examples
1779 ///
1780 /// Basic usage:
1781 ///
1782 /// ```
1783 /// use compact_str::CompactString;
1784 ///
1785 /// assert_eq!("hello", CompactString::from_str_to_lowercase("HELLO"));
1786 /// ```
1787 ///
1788 /// A tricky example, with sigma:
1789 ///
1790 /// ```
1791 /// use compact_str::CompactString;
1792 ///
1793 /// assert_eq!("ฯ", CompactString::from_str_to_lowercase("ฮฃ"));
1794 ///
1795 /// // but at the end of a word, it's ฯ, not ฯ:
1796 /// assert_eq!("แฝฮดฯ
ฯฯฮตฯฯ", CompactString::from_str_to_lowercase("แฝฮฮฅฮฃฮฃฮฮฮฃ"));
1797 /// ```
1798 ///
1799 /// Languages without case are not changed:
1800 ///
1801 /// ```
1802 /// use compact_str::CompactString;
1803 ///
1804 /// let new_year = "ๅๅๆฐๅนด";
1805 /// assert_eq!(new_year, CompactString::from_str_to_lowercase(new_year));
1806 /// ```
1807 #[must_use = "this returns the lowercase string as a new CompactString, \
1808 without modifying the original"]
1809 pub fn from_str_to_lowercase(input: &str) -> Self {
1810 let mut s = convert_while_ascii(input.as_bytes(), u8::to_ascii_lowercase);
1811
1812 // Safety: we know this is a valid char boundary since
1813 // out.len() is only progressed if ascii bytes are found
1814 let rest = unsafe { input.get_unchecked(s.len()..) };
1815
1816 for (i, c) in rest.char_indices() {
1817 if c == 'ฮฃ' {
1818 // ฮฃ maps to ฯ, except at the end of a word where it maps to ฯ.
1819 // This is the only conditional (contextual) but language-independent mapping
1820 // in `SpecialCasing.txt`,
1821 // so hard-code it rather than have a generic "condition" mechanism.
1822 // See https://github.com/rust-lang/rust/issues/26035
1823 map_uppercase_sigma(rest, i, &mut s)
1824 } else {
1825 s.extend(c.to_lowercase());
1826 }
1827 }
1828 return s;
1829
1830 fn map_uppercase_sigma(from: &str, i: usize, to: &mut CompactString) {
1831 // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1832 // for the definition of `Final_Sigma`.
1833 debug_assert!('ฮฃ'.len_utf8() == 2);
1834 let is_word_final = case_ignorable_then_cased(from[..i].chars().rev())
1835 && !case_ignorable_then_cased(from[i + 2..].chars());
1836 to.push_str(if is_word_final { "ฯ" } else { "ฯ" });
1837 }
1838
1839 fn case_ignorable_then_cased<I: Iterator<Item = char>>(mut iter: I) -> bool {
1840 use unicode_data::case_ignorable::lookup as Case_Ignorable;
1841 use unicode_data::cased::lookup as Cased;
1842 match iter.find(|&c| !Case_Ignorable(c)) {
1843 Some(c) => Cased(c),
1844 None => false,
1845 }
1846 }
1847 }
1848
1849 /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1850 ///
1851 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1852 /// `Uppercase`.
1853 ///
1854 /// Since some characters can expand into multiple characters when changing
1855 /// the case, this function returns a [`CompactString`] instead of modifying the
1856 /// parameter in-place.
1857 ///
1858 /// # Examples
1859 ///
1860 /// Basic usage:
1861 ///
1862 /// ```
1863 /// use compact_str::CompactString;
1864 /// let s = CompactString::new("hello");
1865 ///
1866 /// assert_eq!("HELLO", s.to_uppercase());
1867 /// ```
1868 ///
1869 /// Scripts without case are not changed:
1870 ///
1871 /// ```
1872 /// use compact_str::CompactString;
1873 /// let new_year = CompactString::new("ๅๅๆฐๅนด");
1874 ///
1875 /// assert_eq!(new_year, new_year.to_uppercase());
1876 /// ```
1877 ///
1878 /// One character can become multiple:
1879 /// ```
1880 /// use compact_str::CompactString;
1881 /// let s = CompactString::new("tschรผร");
1882 ///
1883 /// assert_eq!("TSCHรSS", s.to_uppercase());
1884 /// ```
1885 #[must_use = "this returns the uppercase string as a new CompactString, \
1886 without modifying the original"]
1887 pub fn to_uppercase(&self) -> Self {
1888 Self::from_str_to_uppercase(self.as_str())
1889 }
1890
1891 /// Returns the uppercase equivalent of this string slice, as a new [`CompactString`].
1892 ///
1893 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property
1894 /// `Uppercase`.
1895 ///
1896 /// Since some characters can expand into multiple characters when changing
1897 /// the case, this function returns a [`CompactString`] instead of modifying the
1898 /// parameter in-place.
1899 ///
1900 /// # Examples
1901 ///
1902 /// Basic usage:
1903 ///
1904 /// ```
1905 /// use compact_str::CompactString;
1906 ///
1907 /// assert_eq!("HELLO", CompactString::from_str_to_uppercase("hello"));
1908 /// ```
1909 ///
1910 /// Scripts without case are not changed:
1911 ///
1912 /// ```
1913 /// use compact_str::CompactString;
1914 ///
1915 /// let new_year = "ๅๅๆฐๅนด";
1916 /// assert_eq!(new_year, CompactString::from_str_to_uppercase(new_year));
1917 /// ```
1918 ///
1919 /// One character can become multiple:
1920 /// ```
1921 /// use compact_str::CompactString;
1922 ///
1923 /// assert_eq!("TSCHรSS", CompactString::from_str_to_uppercase("tschรผร"));
1924 /// ```
1925 #[must_use = "this returns the uppercase string as a new CompactString, \
1926 without modifying the original"]
1927 pub fn from_str_to_uppercase(input: &str) -> Self {
1928 let mut out = convert_while_ascii(input.as_bytes(), u8::to_ascii_uppercase);
1929
1930 // Safety: we know this is a valid char boundary since
1931 // out.len() is only progressed if ascii bytes are found
1932 let rest = unsafe { input.get_unchecked(out.len()..) };
1933
1934 for c in rest.chars() {
1935 out.extend(c.to_uppercase());
1936 }
1937
1938 out
1939 }
1940}
1941
1942/// Converts the bytes while the bytes are still ascii.
1943/// For better average performance, this is happens in chunks of `2*size_of::<usize>()`.
1944/// Returns a vec with the converted bytes.
1945///
1946/// Copied from https://doc.rust-lang.org/nightly/src/alloc/str.rs.html#623-666
1947#[inline]
1948fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> CompactString {
1949 let mut out = CompactString::with_capacity(b.len());
1950
1951 const USIZE_SIZE: usize = mem::size_of::<usize>();
1952 const MAGIC_UNROLL: usize = 2;
1953 const N: usize = USIZE_SIZE * MAGIC_UNROLL;
1954 const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
1955
1956 let mut i = 0;
1957 unsafe {
1958 while i + N <= b.len() {
1959 // Safety: we have checks the sizes `b` and `out` to know that our
1960 let in_chunk = b.get_unchecked(i..i + N);
1961 let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);
1962
1963 let mut bits = 0;
1964 for j in 0..MAGIC_UNROLL {
1965 // read the bytes 1 usize at a time (unaligned since we haven't checked the
1966 // alignment) safety: in_chunk is valid bytes in the range
1967 bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
1968 }
1969 // if our chunks aren't ascii, then return only the prior bytes as init
1970 if bits & NONASCII_MASK != 0 {
1971 break;
1972 }
1973
1974 // perform the case conversions on N bytes (gets heavily autovec'd)
1975 for j in 0..N {
1976 // safety: in_chunk and out_chunk is valid bytes in the range
1977 let out = out_chunk.get_unchecked_mut(j);
1978 out.write(convert(in_chunk.get_unchecked(j)));
1979 }
1980
1981 // mark these bytes as initialised
1982 i += N;
1983 }
1984 out.set_len(i);
1985 }
1986
1987 out
1988}
1989
1990impl Clone for CompactString {
1991 #[inline]
1992 fn clone(&self) -> Self {
1993 Self(self.0.clone())
1994 }
1995
1996 #[inline]
1997 fn clone_from(&mut self, source: &Self) {
1998 self.0.clone_from(&source.0)
1999 }
2000}
2001
2002impl Default for CompactString {
2003 #[inline]
2004 fn default() -> Self {
2005 CompactString::new("")
2006 }
2007}
2008
2009impl Deref for CompactString {
2010 type Target = str;
2011
2012 #[inline]
2013 fn deref(&self) -> &str {
2014 self.as_str()
2015 }
2016}
2017
2018impl DerefMut for CompactString {
2019 #[inline]
2020 fn deref_mut(&mut self) -> &mut str {
2021 self.as_mut_str()
2022 }
2023}
2024
2025impl AsRef<str> for CompactString {
2026 #[inline]
2027 fn as_ref(&self) -> &str {
2028 self.as_str()
2029 }
2030}
2031
2032#[cfg(feature = "std")]
2033impl AsRef<OsStr> for CompactString {
2034 #[inline]
2035 fn as_ref(&self) -> &OsStr {
2036 OsStr::new(self.as_str())
2037 }
2038}
2039
2040impl AsRef<[u8]> for CompactString {
2041 #[inline]
2042 fn as_ref(&self) -> &[u8] {
2043 self.as_bytes()
2044 }
2045}
2046
2047impl Borrow<str> for CompactString {
2048 #[inline]
2049 fn borrow(&self) -> &str {
2050 self.as_str()
2051 }
2052}
2053
2054impl BorrowMut<str> for CompactString {
2055 #[inline]
2056 fn borrow_mut(&mut self) -> &mut str {
2057 self.as_mut_str()
2058 }
2059}
2060
2061impl Eq for CompactString {}
2062
2063impl<T: AsRef<str> + ?Sized> PartialEq<T> for CompactString {
2064 fn eq(&self, other: &T) -> bool {
2065 self.as_str() == other.as_ref()
2066 }
2067}
2068
2069impl PartialEq<CompactString> for &CompactString {
2070 fn eq(&self, other: &CompactString) -> bool {
2071 self.as_str() == other.as_str()
2072 }
2073}
2074
2075impl PartialEq<CompactString> for String {
2076 fn eq(&self, other: &CompactString) -> bool {
2077 self.as_str() == other.as_str()
2078 }
2079}
2080
2081impl<'a> PartialEq<&'a CompactString> for String {
2082 fn eq(&self, other: &&CompactString) -> bool {
2083 self.as_str() == other.as_str()
2084 }
2085}
2086
2087impl PartialEq<CompactString> for &String {
2088 fn eq(&self, other: &CompactString) -> bool {
2089 self.as_str() == other.as_str()
2090 }
2091}
2092
2093impl PartialEq<CompactString> for str {
2094 fn eq(&self, other: &CompactString) -> bool {
2095 self == other.as_str()
2096 }
2097}
2098
2099impl<'a> PartialEq<&'a CompactString> for str {
2100 fn eq(&self, other: &&CompactString) -> bool {
2101 self == other.as_str()
2102 }
2103}
2104
2105impl PartialEq<CompactString> for &str {
2106 fn eq(&self, other: &CompactString) -> bool {
2107 *self == other.as_str()
2108 }
2109}
2110
2111impl PartialEq<CompactString> for &&str {
2112 fn eq(&self, other: &CompactString) -> bool {
2113 **self == other.as_str()
2114 }
2115}
2116
2117impl<'a> PartialEq<CompactString> for Cow<'a, str> {
2118 fn eq(&self, other: &CompactString) -> bool {
2119 *self == other.as_str()
2120 }
2121}
2122
2123impl<'a> PartialEq<CompactString> for &Cow<'a, str> {
2124 fn eq(&self, other: &CompactString) -> bool {
2125 *self == other.as_str()
2126 }
2127}
2128
2129impl PartialEq<String> for &CompactString {
2130 fn eq(&self, other: &String) -> bool {
2131 self.as_str() == other.as_str()
2132 }
2133}
2134
2135impl<'a> PartialEq<Cow<'a, str>> for &CompactString {
2136 fn eq(&self, other: &Cow<'a, str>) -> bool {
2137 self.as_str() == other
2138 }
2139}
2140
2141impl Ord for CompactString {
2142 fn cmp(&self, other: &Self) -> Ordering {
2143 self.as_str().cmp(other.as_str())
2144 }
2145}
2146
2147impl PartialOrd for CompactString {
2148 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
2149 Some(self.cmp(other))
2150 }
2151}
2152
2153impl Hash for CompactString {
2154 fn hash<H: Hasher>(&self, state: &mut H) {
2155 self.as_str().hash(state)
2156 }
2157}
2158
2159impl<'a> From<&'a str> for CompactString {
2160 #[inline]
2161 #[track_caller]
2162 fn from(s: &'a str) -> Self {
2163 CompactString::new(s)
2164 }
2165}
2166
2167impl From<String> for CompactString {
2168 #[inline]
2169 #[track_caller]
2170 fn from(s: String) -> Self {
2171 let repr = Repr::from_string(s, true).unwrap_with_msg();
2172 CompactString(repr)
2173 }
2174}
2175
2176impl<'a> From<&'a String> for CompactString {
2177 #[inline]
2178 #[track_caller]
2179 fn from(s: &'a String) -> Self {
2180 CompactString::new(s)
2181 }
2182}
2183
2184impl<'a> From<Cow<'a, str>> for CompactString {
2185 fn from(cow: Cow<'a, str>) -> Self {
2186 match cow {
2187 Cow::Borrowed(s) => s.into(),
2188 // we separate these two so we can re-use the underlying buffer in the owned case
2189 Cow::Owned(s) => s.into(),
2190 }
2191 }
2192}
2193
2194impl From<Box<str>> for CompactString {
2195 #[inline]
2196 #[track_caller]
2197 fn from(b: Box<str>) -> Self {
2198 let s = b.into_string();
2199 let repr = Repr::from_string(s, true).unwrap_with_msg();
2200 CompactString(repr)
2201 }
2202}
2203
2204impl From<CompactString> for String {
2205 #[inline]
2206 fn from(s: CompactString) -> Self {
2207 s.into_string()
2208 }
2209}
2210
2211impl From<CompactString> for Cow<'_, str> {
2212 #[inline]
2213 fn from(s: CompactString) -> Self {
2214 if let Some(s) = s.as_static_str() {
2215 Self::Borrowed(s)
2216 } else {
2217 Self::Owned(s.into_string())
2218 }
2219 }
2220}
2221
2222impl<'a> From<&'a CompactString> for Cow<'a, str> {
2223 #[inline]
2224 fn from(s: &'a CompactString) -> Self {
2225 Self::Borrowed(s)
2226 }
2227}
2228
2229#[cfg(target_has_atomic = "ptr")]
2230impl From<CompactString> for alloc::sync::Arc<str> {
2231 fn from(value: CompactString) -> Self {
2232 Self::from(value.as_str())
2233 }
2234}
2235
2236impl From<CompactString> for alloc::rc::Rc<str> {
2237 fn from(value: CompactString) -> Self {
2238 Self::from(value.as_str())
2239 }
2240}
2241
2242#[cfg(feature = "std")]
2243impl From<CompactString> for Box<dyn std::error::Error + Send + Sync> {
2244 fn from(value: CompactString) -> Self {
2245 struct StringError(CompactString);
2246
2247 impl std::error::Error for StringError {
2248 #[allow(deprecated)]
2249 fn description(&self) -> &str {
2250 &self.0
2251 }
2252 }
2253
2254 impl fmt::Display for StringError {
2255 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2256 fmt::Display::fmt(&self.0, f)
2257 }
2258 }
2259
2260 // Purposefully skip printing "StringError(..)"
2261 impl fmt::Debug for StringError {
2262 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2263 fmt::Debug::fmt(&self.0, f)
2264 }
2265 }
2266
2267 Box::new(StringError(value))
2268 }
2269}
2270
2271#[cfg(feature = "std")]
2272impl From<CompactString> for Box<dyn std::error::Error> {
2273 fn from(value: CompactString) -> Self {
2274 let err1: Box<dyn std::error::Error + Send + Sync> = From::from(value);
2275 let err2: Box<dyn std::error::Error> = err1;
2276 err2
2277 }
2278}
2279
2280impl From<CompactString> for Box<str> {
2281 fn from(value: CompactString) -> Self {
2282 if value.is_heap_allocated() {
2283 value.into_string().into_boxed_str()
2284 } else {
2285 Box::from(value.as_str())
2286 }
2287 }
2288}
2289
2290#[cfg(feature = "std")]
2291impl From<CompactString> for std::ffi::OsString {
2292 fn from(value: CompactString) -> Self {
2293 Self::from(value.into_string())
2294 }
2295}
2296
2297#[cfg(feature = "std")]
2298impl From<CompactString> for std::path::PathBuf {
2299 fn from(value: CompactString) -> Self {
2300 Self::from(std::ffi::OsString::from(value))
2301 }
2302}
2303
2304#[cfg(feature = "std")]
2305impl AsRef<std::path::Path> for CompactString {
2306 fn as_ref(&self) -> &std::path::Path {
2307 std::path::Path::new(self.as_str())
2308 }
2309}
2310
2311impl From<CompactString> for alloc::vec::Vec<u8> {
2312 fn from(value: CompactString) -> Self {
2313 if value.is_heap_allocated() {
2314 value.into_string().into_bytes()
2315 } else {
2316 value.as_bytes().to_vec()
2317 }
2318 }
2319}
2320
2321impl FromStr for CompactString {
2322 type Err = core::convert::Infallible;
2323 fn from_str(s: &str) -> Result<CompactString, Self::Err> {
2324 Ok(CompactString::from(s))
2325 }
2326}
2327
2328impl fmt::Debug for CompactString {
2329 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2330 fmt::Debug::fmt(self.as_str(), f)
2331 }
2332}
2333
2334impl fmt::Display for CompactString {
2335 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2336 fmt::Display::fmt(self.as_str(), f)
2337 }
2338}
2339
2340impl FromIterator<char> for CompactString {
2341 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
2342 let repr = iter.into_iter().collect();
2343 CompactString(repr)
2344 }
2345}
2346
2347impl<'a> FromIterator<&'a char> for CompactString {
2348 fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
2349 let repr = iter.into_iter().collect();
2350 CompactString(repr)
2351 }
2352}
2353
2354impl<'a> FromIterator<&'a str> for CompactString {
2355 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
2356 let repr = iter.into_iter().collect();
2357 CompactString(repr)
2358 }
2359}
2360
2361impl FromIterator<Box<str>> for CompactString {
2362 fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
2363 let repr = iter.into_iter().collect();
2364 CompactString(repr)
2365 }
2366}
2367
2368impl<'a> FromIterator<Cow<'a, str>> for CompactString {
2369 fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
2370 let repr = iter.into_iter().collect();
2371 CompactString(repr)
2372 }
2373}
2374
2375impl FromIterator<String> for CompactString {
2376 fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
2377 let repr = iter.into_iter().collect();
2378 CompactString(repr)
2379 }
2380}
2381
2382impl FromIterator<CompactString> for CompactString {
2383 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2384 let repr = iter.into_iter().collect();
2385 CompactString(repr)
2386 }
2387}
2388
2389impl FromIterator<CompactString> for String {
2390 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2391 let mut iterator = iter.into_iter();
2392 match iterator.next() {
2393 None => String::new(),
2394 Some(buf) => {
2395 let mut buf = buf.into_string();
2396 buf.extend(iterator);
2397 buf
2398 }
2399 }
2400 }
2401}
2402
2403impl FromIterator<CompactString> for Cow<'_, str> {
2404 fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
2405 String::from_iter(iter).into()
2406 }
2407}
2408
2409impl Extend<char> for CompactString {
2410 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
2411 self.0.extend(iter)
2412 }
2413}
2414
2415impl<'a> Extend<&'a char> for CompactString {
2416 fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
2417 self.0.extend(iter)
2418 }
2419}
2420
2421impl<'a> Extend<&'a str> for CompactString {
2422 fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
2423 self.0.extend(iter)
2424 }
2425}
2426
2427impl Extend<Box<str>> for CompactString {
2428 fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
2429 self.0.extend(iter)
2430 }
2431}
2432
2433impl<'a> Extend<Cow<'a, str>> for CompactString {
2434 fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
2435 iter.into_iter().for_each(move |s| self.push_str(&s));
2436 }
2437}
2438
2439impl Extend<String> for CompactString {
2440 fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
2441 self.0.extend(iter)
2442 }
2443}
2444
2445impl Extend<CompactString> for String {
2446 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2447 for s in iter {
2448 self.push_str(&s);
2449 }
2450 }
2451}
2452
2453impl Extend<CompactString> for CompactString {
2454 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2455 for s in iter {
2456 self.push_str(&s);
2457 }
2458 }
2459}
2460
2461impl<'a> Extend<CompactString> for Cow<'a, str> {
2462 fn extend<T: IntoIterator<Item = CompactString>>(&mut self, iter: T) {
2463 self.to_mut().extend(iter);
2464 }
2465}
2466
2467impl fmt::Write for CompactString {
2468 fn write_str(&mut self, s: &str) -> fmt::Result {
2469 self.push_str(s);
2470 Ok(())
2471 }
2472
2473 fn write_fmt(mut self: &mut Self, args: fmt::Arguments<'_>) -> fmt::Result {
2474 match args.as_str() {
2475 Some(s) => {
2476 if self.is_empty() && !self.is_heap_allocated() {
2477 // Since self is currently an empty inline variant or
2478 // an empty `StaticStr` variant, constructing a new one
2479 // with `Self::const_new` is more efficient since
2480 // it is guaranteed to be O(1).
2481 *self = Self::const_new(s);
2482 } else {
2483 self.push_str(s);
2484 }
2485 Ok(())
2486 }
2487 None => fmt::write(&mut self, args),
2488 }
2489 }
2490}
2491
2492impl Add<&str> for CompactString {
2493 type Output = Self;
2494 fn add(mut self, rhs: &str) -> Self::Output {
2495 self.push_str(rhs);
2496 self
2497 }
2498}
2499
2500impl AddAssign<&str> for CompactString {
2501 fn add_assign(&mut self, rhs: &str) {
2502 self.push_str(rhs);
2503 }
2504}
2505
2506/// A possible error value when converting a [`CompactString`] from a UTF-16 byte slice.
2507///
2508/// This type is the error type for the [`from_utf16`] method on [`CompactString`].
2509///
2510/// [`from_utf16`]: CompactString::from_utf16
2511/// # Examples
2512///
2513/// Basic usage:
2514///
2515/// ```
2516/// # use compact_str::CompactString;
2517/// // ๐mu<invalid>ic
2518/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
2519/// 0xD800, 0x0069, 0x0063];
2520///
2521/// assert!(CompactString::from_utf16(v).is_err());
2522/// ```
2523#[derive(Copy, Clone, Debug)]
2524pub struct Utf16Error(());
2525
2526impl fmt::Display for Utf16Error {
2527 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2528 fmt::Display::fmt("invalid utf-16: lone surrogate found", f)
2529 }
2530}
2531
2532/// An iterator over the exacted data by [`CompactString::drain()`].
2533#[must_use = "iterators are lazy and do nothing unless consumed"]
2534pub struct Drain<'a> {
2535 compact_string: *mut CompactString,
2536 start: usize,
2537 end: usize,
2538 chars: core::str::Chars<'a>,
2539}
2540
2541// SAFETY: Drain keeps the lifetime of the CompactString it belongs to.
2542unsafe impl Send for Drain<'_> {}
2543unsafe impl Sync for Drain<'_> {}
2544
2545impl fmt::Debug for Drain<'_> {
2546 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2547 f.debug_tuple("Drain").field(&self.as_str()).finish()
2548 }
2549}
2550
2551impl fmt::Display for Drain<'_> {
2552 #[inline]
2553 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2554 f.write_str(self.as_str())
2555 }
2556}
2557
2558impl Drop for Drain<'_> {
2559 #[inline]
2560 fn drop(&mut self) {
2561 // SAFETY: Drain keeps a mutable reference to compact_string, so one one else can access
2562 // the CompactString, but this function right now. CompactString::drain() ensured
2563 // that the new extracted range does not split a UTF-8 character.
2564 unsafe { (*self.compact_string).replace_range_shrink(self.start, self.end, "") };
2565 }
2566}
2567
2568impl Drain<'_> {
2569 /// The remaining, unconsumed characters of the extracted substring.
2570 #[inline]
2571 pub fn as_str(&self) -> &str {
2572 self.chars.as_str()
2573 }
2574}
2575
2576impl Deref for Drain<'_> {
2577 type Target = str;
2578
2579 #[inline]
2580 fn deref(&self) -> &Self::Target {
2581 self.as_str()
2582 }
2583}
2584
2585impl Iterator for Drain<'_> {
2586 type Item = char;
2587
2588 #[inline]
2589 fn next(&mut self) -> Option<char> {
2590 self.chars.next()
2591 }
2592
2593 #[inline]
2594 fn count(self) -> usize {
2595 // <Chars as Iterator>::count() is specialized, and cloning is trivial.
2596 self.chars.clone().count()
2597 }
2598
2599 fn size_hint(&self) -> (usize, Option<usize>) {
2600 self.chars.size_hint()
2601 }
2602
2603 #[inline]
2604 fn last(mut self) -> Option<char> {
2605 self.chars.next_back()
2606 }
2607}
2608
2609impl DoubleEndedIterator for Drain<'_> {
2610 #[inline]
2611 fn next_back(&mut self) -> Option<char> {
2612 self.chars.next_back()
2613 }
2614}
2615
2616impl FusedIterator for Drain<'_> {}
2617
2618/// A possible error value if allocating or resizing a [`CompactString`] failed.
2619#[derive(Debug, Clone, Copy, PartialEq)]
2620pub struct ReserveError(());
2621
2622impl fmt::Display for ReserveError {
2623 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2624 f.write_str("Cannot allocate memory to hold CompactString")
2625 }
2626}
2627
2628#[cfg(feature = "std")]
2629#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2630impl std::error::Error for ReserveError {}
2631
2632/// A possible error value if [`ToCompactString::try_to_compact_string()`] failed.
2633#[derive(Debug, Clone, Copy, PartialEq)]
2634#[non_exhaustive]
2635pub enum ToCompactStringError {
2636 /// Cannot allocate memory to hold CompactString
2637 Reserve(ReserveError),
2638 /// [`Display::fmt()`][core::fmt::Display::fmt] returned an error
2639 Fmt(fmt::Error),
2640}
2641
2642impl fmt::Display for ToCompactStringError {
2643 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2644 match self {
2645 ToCompactStringError::Reserve(err) => err.fmt(f),
2646 ToCompactStringError::Fmt(err) => err.fmt(f),
2647 }
2648 }
2649}
2650
2651impl From<ReserveError> for ToCompactStringError {
2652 #[inline]
2653 fn from(value: ReserveError) -> Self {
2654 Self::Reserve(value)
2655 }
2656}
2657
2658impl From<fmt::Error> for ToCompactStringError {
2659 #[inline]
2660 fn from(value: fmt::Error) -> Self {
2661 Self::Fmt(value)
2662 }
2663}
2664
2665#[cfg(feature = "std")]
2666#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
2667impl std::error::Error for ToCompactStringError {
2668 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
2669 match self {
2670 ToCompactStringError::Reserve(err) => Some(err),
2671 ToCompactStringError::Fmt(err) => Some(err),
2672 }
2673 }
2674}
2675
2676trait UnwrapWithMsg {
2677 type T;
2678
2679 fn unwrap_with_msg(self) -> Self::T;
2680}
2681
2682impl<T, E: fmt::Display> UnwrapWithMsg for Result<T, E> {
2683 type T = T;
2684
2685 #[inline(always)]
2686 #[track_caller]
2687 fn unwrap_with_msg(self) -> T {
2688 match self {
2689 Ok(value) => value,
2690 Err(err) => unwrap_with_msg_fail(err),
2691 }
2692 }
2693}
2694
2695#[inline(never)]
2696#[cold]
2697#[track_caller]
2698fn unwrap_with_msg_fail<E: fmt::Display>(error: E) -> ! {
2699 panic!("{error}")
2700}
2701
2702static_assertions::assert_eq_size!(CompactString, String);