compact_str/repr/
mod.rs

1use alloc::borrow::Cow;
2use alloc::boxed::Box;
3use core::str::Utf8Error;
4use core::{
5    mem,
6    ptr,
7};
8
9#[cfg(feature = "bytes")]
10mod bytes;
11#[cfg(feature = "smallvec")]
12mod smallvec;
13
14mod capacity;
15mod heap;
16mod inline;
17mod iter;
18mod last_utf8_char;
19mod num;
20mod static_str;
21mod traits;
22
23use alloc::string::String;
24
25use capacity::Capacity;
26use heap::HeapBuffer;
27use inline::InlineBuffer;
28use last_utf8_char::LastUtf8Char;
29use static_str::StaticStr;
30pub(crate) use traits::IntoRepr;
31
32use crate::{
33    ReserveError,
34    UnwrapWithMsg,
35};
36
37/// The max size of a string we can fit inline
38pub const MAX_SIZE: usize = core::mem::size_of::<String>();
39/// Used as a discriminant to identify different variants
40pub const HEAP_MASK: u8 = LastUtf8Char::Heap as u8;
41/// Used for `StaticStr` variant
42pub const STATIC_STR_MASK: u8 = LastUtf8Char::Static as u8;
43/// When our string is stored inline, we represent the length of the string in the last byte, offset
44/// by `LENGTH_MASK`
45pub const LENGTH_MASK: u8 = 0b11000000;
46
47const EMPTY: Repr = Repr::const_new("");
48
49#[repr(C)]
50pub struct Repr(
51    // We have a pointer in the representation to properly carry provenance
52    *const (),
53    // Then we need two `usize`s (aka WORDs) of data, for the first we just define a `usize`...
54    usize,
55    // ...but the second we breakup into multiple pieces...
56    #[cfg(target_pointer_width = "64")] u32,
57    u16,
58    u8,
59    // ...so that the last byte can be a NonMax, which allows the compiler to see a niche value
60    LastUtf8Char,
61);
62static_assertions::assert_eq_size!([u8; MAX_SIZE], Repr);
63
64unsafe impl Send for Repr {}
65unsafe impl Sync for Repr {}
66
67impl Repr {
68    #[inline]
69    pub fn new(text: &str) -> Result<Self, ReserveError> {
70        let len = text.len();
71
72        if len == 0 {
73            Ok(EMPTY)
74        } else if len <= MAX_SIZE {
75            // SAFETY: We checked that the length of text is less than or equal to MAX_SIZE
76            let inline = unsafe { InlineBuffer::new(text) };
77            Ok(Repr::from_inline(inline))
78        } else {
79            HeapBuffer::new(text).map(Repr::from_heap)
80        }
81    }
82
83    #[inline]
84    pub const fn const_new(text: &'static str) -> Self {
85        if text.len() <= MAX_SIZE {
86            let inline = InlineBuffer::new_const(text);
87            Repr::from_inline(inline)
88        } else {
89            let repr = StaticStr::new(text);
90            Repr::from_static(repr)
91        }
92    }
93
94    /// Create a [`Repr`] with the provided `capacity`
95    #[inline]
96    pub fn with_capacity(capacity: usize) -> Result<Self, ReserveError> {
97        if capacity <= MAX_SIZE {
98            Ok(EMPTY)
99        } else {
100            HeapBuffer::with_capacity(capacity).map(Repr::from_heap)
101        }
102    }
103
104    /// Create a [`Repr`] from a slice of bytes that is UTF-8
105    #[inline]
106    pub fn from_utf8<B: AsRef<[u8]>>(buf: B) -> Result<Self, Utf8Error> {
107        // Get a &str from the Vec, failing if it's not valid UTF-8
108        let s = core::str::from_utf8(buf.as_ref())?;
109        // Construct a Repr from the &str
110        Ok(Self::new(s).unwrap_with_msg())
111    }
112
113    /// Create a [`Repr`] from a slice of bytes that is UTF-8, without validating that it is indeed
114    /// UTF-8
115    ///
116    /// # Safety
117    /// * The caller must guarantee that `buf` is valid UTF-8
118    #[inline]
119    pub unsafe fn from_utf8_unchecked<B: AsRef<[u8]>>(buf: B) -> Result<Self, ReserveError> {
120        let bytes = buf.as_ref();
121        let bytes_len = bytes.len();
122
123        // Create a Repr with enough capacity for the entire buffer
124        let mut repr = Repr::with_capacity(bytes_len)?;
125
126        // There's an edge case where the final byte of this buffer == `HEAP_MASK`, which is
127        // invalid UTF-8, but would result in us creating an inline variant, that identifies as
128        // a heap variant. If a user ever tried to reference the data at all, we'd incorrectly
129        // try and read data from an invalid memory address, causing undefined behavior.
130        if bytes_len == MAX_SIZE {
131            let last_byte = bytes[bytes_len - 1];
132            // If we hit the edge case, reserve additional space to make the repr becomes heap
133            // allocated, which prevents us from writing this last byte inline
134            if last_byte >= 0b11000000 {
135                repr.reserve(MAX_SIZE + 1)?;
136            }
137        }
138
139        // SAFETY: The caller is responsible for making sure the provided buffer is UTF-8. This
140        // invariant is documented in the public API
141        let slice = repr.as_mut_buf();
142        // write the chunk into the Repr
143        slice[..bytes_len].copy_from_slice(bytes);
144
145        // Set the length of the Repr
146        // SAFETY: We just wrote the entire `buf` into the Repr
147        repr.set_len(bytes_len);
148
149        Ok(repr)
150    }
151
152    /// Create a [`Repr`] from a [`String`], in `O(1)` time. We'll attempt to inline the string
153    /// if `should_inline` is `true`
154    ///
155    /// Note: If the provided [`String`] is >16 MB and we're on a 32-bit arch, we'll copy the
156    /// `String`.
157    #[inline]
158    pub fn from_string(s: String, should_inline: bool) -> Result<Self, ReserveError> {
159        let og_cap = s.capacity();
160        let cap = Capacity::new(og_cap);
161
162        #[cold]
163        fn capacity_on_heap(s: String) -> Result<Repr, ReserveError> {
164            HeapBuffer::new(s.as_str()).map(Repr::from_heap)
165        }
166
167        #[cold]
168        fn empty() -> Result<Repr, ReserveError> {
169            Ok(EMPTY)
170        }
171
172        if cap.is_heap() {
173            // We only hit this case if the provided String is > 16MB and we're on a 32-bit arch. We
174            // expect it to be unlikely, thus we hint that to the compiler
175            capacity_on_heap(s)
176        } else if og_cap == 0 {
177            // We don't expect converting from an empty String often, so we make this code path cold
178            empty()
179        } else if should_inline && s.len() <= MAX_SIZE {
180            // SAFETY: Checked to make sure the string would fit inline
181            let inline = unsafe { InlineBuffer::new(s.as_str()) };
182            Ok(Repr::from_inline(inline))
183        } else {
184            let mut s = mem::ManuallyDrop::new(s.into_bytes());
185            let len = s.len();
186            let raw_ptr = s.as_mut_ptr();
187
188            let ptr = ptr::NonNull::new(raw_ptr).expect("string with capacity has null ptr?");
189            let heap = HeapBuffer { ptr, len, cap };
190
191            Ok(Repr::from_heap(heap))
192        }
193    }
194
195    /// Converts a [`Repr`] into a [`String`], in `O(1)` time, if possible
196    #[inline]
197    pub fn into_string(self) -> String {
198        #[cold]
199        fn into_string_heap(this: HeapBuffer) -> String {
200            // SAFETY: We know pointer is valid for `length` bytes
201            let slice = unsafe { core::slice::from_raw_parts(this.ptr.as_ptr(), this.len) };
202            // SAFETY: A `Repr` contains valid UTF-8
203            let s = unsafe { core::str::from_utf8_unchecked(slice) };
204
205            String::from(s)
206        }
207
208        if self.is_heap_allocated() {
209            // SAFETY: we just checked that the discriminant indicates we're a HeapBuffer
210            let heap_buffer = unsafe { self.into_heap() };
211
212            if heap_buffer.cap.is_heap() {
213                // We don't expect capacity to be on the heap often, so we mark it as cold
214                into_string_heap(heap_buffer)
215            } else {
216                // Wrap the BoxString in a ManuallyDrop so the underlying buffer doesn't get freed
217                let this = mem::ManuallyDrop::new(heap_buffer);
218
219                // SAFETY: We checked above to make sure capacity is valid
220                let cap = unsafe { this.cap.as_usize() };
221
222                // SAFETY:
223                // * The memory in `ptr` was previously allocated by the same allocator the standard
224                //   library uses, with a required alignment of exactly 1.
225                // * `length` is less than or equal to capacity, due to internal invaraints.
226                // * `capacity` is correctly maintained internally.
227                // * `BoxString` only ever contains valid UTF-8.
228                unsafe { String::from_raw_parts(this.ptr.as_ptr(), this.len, cap) }
229            }
230        } else {
231            String::from(self.as_str())
232        }
233    }
234
235    /// Reserves at least `additional` bytes. If there is already enough capacity to store
236    /// `additional` bytes this is a no-op
237    #[inline]
238    pub fn reserve(&mut self, additional: usize) -> Result<(), ReserveError> {
239        let len = self.len();
240        let needed_capacity = len.checked_add(additional).ok_or(ReserveError(()))?;
241
242        if !self.is_static_str() && needed_capacity <= self.capacity() {
243            // we already have enough space, no-op
244            // If self.is_static_str() is true, then we would have to convert
245            // it to other variants since static_str variant cannot be modified.
246            Ok(())
247        } else if needed_capacity <= MAX_SIZE {
248            // It's possible to have a `Repr` that is heap allocated with a capacity less than
249            // MAX_SIZE, if that `Repr` was created From a String or Box<str>
250            //
251            // SAFETY: Our needed_capacity is >= our length, which is <= than MAX_SIZE
252            let inline = unsafe { InlineBuffer::new(self.as_str()) };
253            *self = Repr::from_inline(inline);
254            Ok(())
255        } else if !self.is_heap_allocated() {
256            // We're not heap allocated, but need to be, create a HeapBuffer
257            let heap = HeapBuffer::with_additional(self.as_str(), additional)?;
258            *self = Repr::from_heap(heap);
259            Ok(())
260        } else {
261            // We're already heap allocated, but we need more capacity
262            //
263            // SAFETY: We checked above to see if we're heap allocated
264            let heap_buffer = unsafe { self.as_mut_heap() };
265
266            // To reduce allocations, we amortize our growth
267            let amortized_capacity = heap::amortized_growth(len, additional);
268            // Attempt to grow our capacity, allocating a new HeapBuffer on failure
269            if heap_buffer.realloc(amortized_capacity).is_err() {
270                // Create a new HeapBuffer
271                let heap = HeapBuffer::with_additional(self.as_str(), additional)?;
272                *self = Repr::from_heap(heap);
273            }
274
275            Ok(())
276        }
277    }
278
279    pub fn shrink_to(&mut self, min_capacity: usize) {
280        // Note: We can't shrink the inline variant since it's buffer is a fixed size
281        // or the static str variant since it is just a pointer, so we only
282        // take action here if our string is heap allocated
283        if !self.is_heap_allocated() {
284            return;
285        }
286
287        // SAFETY: We just checked the discriminant to make sure we're heap allocated
288        let heap = unsafe { self.as_mut_heap() };
289
290        let old_capacity = heap.capacity();
291        let new_capacity = heap.len.max(min_capacity);
292
293        if new_capacity <= MAX_SIZE {
294            // Inline string if possible.
295
296            let mut inline = InlineBuffer::empty();
297            // SAFETY: Our src is on the heap, so it does not overlap with our new inline
298            // buffer, and the src is a `Repr` so we can assume it's valid UTF-8
299            unsafe {
300                inline
301                    .0
302                    .as_mut_ptr()
303                    .copy_from_nonoverlapping(heap.ptr.as_ptr(), heap.len)
304            };
305            // SAFETY: The src we wrote from was a `Repr` which we can assume is valid UTF-8
306            unsafe { inline.set_len(heap.len) }
307            *self = Repr::from_inline(inline);
308            return;
309        }
310
311        // Return if the string cannot be strunk.
312        if new_capacity >= old_capacity {
313            return;
314        }
315
316        // Try to shrink in-place.
317        if heap.realloc(new_capacity).is_ok() {
318            return;
319        }
320
321        // Otherwise try to allocate a new, smaller chunk.
322        // We can ignore the error. The string keeps its old capacity, but that's okay.
323        if let Ok(mut new_this) = Repr::with_capacity(new_capacity) {
324            new_this.push_str(self.as_str());
325            *self = new_this;
326        }
327    }
328
329    #[inline]
330    pub fn push_str(&mut self, s: &str) {
331        // If `s` is empty, then there's no reason to reserve or push anything
332        // at all.
333        if s.is_empty() {
334            return;
335        }
336
337        let len = self.len();
338        let str_len = s.len();
339
340        // Reserve at least enough space to fit `s`
341        self.reserve(str_len).unwrap_with_msg();
342
343        // SAFETY: `s` which we're appending to the buffer, is valid UTF-8
344        let slice = unsafe { self.as_mut_buf() };
345        let push_buffer = &mut slice[len..len + str_len];
346
347        debug_assert_eq!(push_buffer.len(), s.as_bytes().len());
348
349        // Copy the string into our buffer
350        push_buffer.copy_from_slice(s.as_bytes());
351
352        // Increment the length of our string
353        //
354        // SAFETY: We appended `s` which is valid UTF-8, and if our size became greater than
355        // MAX_SIZE, our call to reserve would make us heap allocated
356        unsafe { self.set_len(len + str_len) };
357    }
358
359    #[inline]
360    pub fn pop(&mut self) -> Option<char> {
361        let ch = self.as_str().chars().next_back()?;
362
363        // SAFETY: We know this is is a valid length which falls on a char boundary
364        unsafe { self.set_len(self.len() - ch.len_utf8()) };
365
366        Some(ch)
367    }
368
369    /// Returns the string content, and only the string content, as a slice of bytes.
370    #[inline]
371    pub fn as_slice(&self) -> &[u8] {
372        // initially has the value of the stack pointer, conditionally becomes the heap pointer
373        let mut pointer = self as *const Self as *const u8;
374        let heap_pointer = self.0 as *const u8;
375        if self.last_byte() >= HEAP_MASK {
376            pointer = heap_pointer;
377        }
378
379        // initially has the value of the stack length, conditionally becomes the heap length
380        let mut length = core::cmp::min(
381            self.last_byte().wrapping_sub(LENGTH_MASK) as usize,
382            MAX_SIZE,
383        );
384        let heap_length = self.1;
385        if self.last_byte() >= HEAP_MASK {
386            length = heap_length;
387        }
388
389        // SAFETY: We know the data is valid, aligned, and part of the same contiguous allocated
390        // chunk. It's also valid for the lifetime of self
391        unsafe { core::slice::from_raw_parts(pointer, length) }
392    }
393
394    #[inline]
395    pub fn as_str(&self) -> &str {
396        // SAFETY: A `Repr` contains valid UTF-8
397        unsafe { core::str::from_utf8_unchecked(self.as_slice()) }
398    }
399
400    /// Returns the length of the string that we're storing
401    #[inline]
402    pub fn len(&self) -> usize {
403        // This ugly looking code results in two conditional moves and only one comparison, without
404        // branching. The outcome of a comparison is a tristate `{lt, eq, gt}`, but the compiler
405        // won't use this optimization if you match on `len_inline.cmp(&MAX_SIZE)`, so we have to
406        // do it manually.
407
408        // Force the compiler to read the variable, so it won't put the reading in a branch.
409        let len_heap = ensure_read(self.1);
410
411        let last_byte = self.last_byte();
412
413        // Extending the variable early results in fewer instructions, because loading and
414        // extending can be done in one instruction.
415        let mut len = (last_byte as usize)
416            .wrapping_sub(LENGTH_MASK as usize)
417            .min(MAX_SIZE);
418
419        // our discriminant is stored in the last byte and denotes stack vs heap
420        //
421        // Note: We should never add an `else` statement here, keeping the conditional simple allows
422        // the compiler to optimize this to a conditional-move instead of a branch
423        if last_byte >= HEAP_MASK {
424            len = len_heap;
425        }
426
427        len
428    }
429
430    /// Returns `true` if the length is 0, `false` otherwise
431    #[inline]
432    pub fn is_empty(&self) -> bool {
433        let len_heap = ensure_read(self.1);
434        let last_byte = self.last_byte() as usize;
435        let mut len = last_byte.wrapping_sub(LastUtf8Char::L0 as u8 as usize);
436        if last_byte >= LastUtf8Char::Heap as u8 as usize {
437            len = len_heap;
438        }
439        len == 0
440    }
441
442    /// Returns the overall capacity of the underlying buffer
443    #[inline]
444    pub fn capacity(&self) -> usize {
445        #[cold]
446        fn heap_capacity(this: &Repr) -> usize {
447            // SAFETY: We just checked the discriminant to make sure we're heap allocated
448            let heap_buffer = unsafe { this.as_heap() };
449            heap_buffer.capacity()
450        }
451
452        if let Some(s) = self.as_static_str() {
453            s.len()
454        } else if self.is_heap_allocated() {
455            heap_capacity(self)
456        } else {
457            MAX_SIZE
458        }
459    }
460
461    #[inline(always)]
462    pub fn is_heap_allocated(&self) -> bool {
463        let last_byte = self.last_byte();
464        last_byte == HEAP_MASK
465    }
466
467    #[inline(always)]
468    const fn is_static_str(&self) -> bool {
469        let last_byte = self.last_byte();
470        last_byte == STATIC_STR_MASK
471    }
472
473    #[inline]
474    #[rustversion::attr(since(1.64), const)]
475    pub fn as_static_str(&self) -> Option<&'static str> {
476        if self.is_static_str() {
477            // SAFETY: A `Repr` is transmuted from `StaticStr`
478            let s: &StaticStr = unsafe { &*(self as *const Self as *const StaticStr) };
479            Some(s.get_text())
480        } else {
481            None
482        }
483    }
484
485    #[inline]
486    fn as_static_variant_mut(&mut self) -> Option<&mut StaticStr> {
487        if self.is_static_str() {
488            // SAFETY: A `Repr` is transmuted from `StaticStr`
489            let s: &mut StaticStr = unsafe { &mut *(self as *mut Self as *mut StaticStr) };
490            Some(s)
491        } else {
492            None
493        }
494    }
495
496    /// Return a mutable reference to the entirely underlying buffer
497    ///
498    /// # Safety
499    /// * Callers must guarantee that any modifications made to the buffer are valid UTF-8
500    pub unsafe fn as_mut_buf(&mut self) -> &mut [u8] {
501        #[cold]
502        fn inline_static_str(this: &mut Repr) {
503            if let Some(s) = this.as_static_str() {
504                *this = Repr::new(s).unwrap_with_msg();
505            }
506        }
507
508        if self.is_static_str() {
509            inline_static_str(self);
510        }
511
512        // the last byte stores our discriminant and stack length
513        let last_byte = self.last_byte();
514
515        let (ptr, cap) = if last_byte == HEAP_MASK {
516            // SAFETY: We just checked the discriminant to make sure we're heap allocated
517            let heap_buffer = self.as_heap();
518            let ptr = heap_buffer.ptr.as_ptr();
519            let cap = heap_buffer.capacity();
520
521            (ptr, cap)
522        } else {
523            let ptr = self as *mut Self as *mut u8;
524            (ptr, MAX_SIZE)
525        };
526
527        // SAFETY: Our data is valid for `cap` bytes, and is initialized
528        core::slice::from_raw_parts_mut(ptr, cap)
529    }
530
531    /// Sets the length of the string that our underlying buffer contains
532    ///
533    /// # Safety
534    /// * `len` bytes in the buffer must be valid UTF-8
535    /// * If the underlying buffer is stored inline, `len` must be <= MAX_SIZE
536    pub unsafe fn set_len(&mut self, len: usize) {
537        if let Some(s) = self.as_static_variant_mut() {
538            s.set_len(len);
539        } else if self.is_heap_allocated() {
540            // SAFETY: We just checked the discriminant to make sure we're heap allocated
541            let heap_buffer = self.as_mut_heap();
542            // SAFETY: The caller guarantees that `len` bytes is valid UTF-8
543            heap_buffer.set_len(len);
544        } else {
545            // SAFETY: We just checked the discriminant to make sure we're an InlineBuffer
546            let inline_buffer = self.as_mut_inline();
547            // SAFETY: The caller guarantees that len <= MAX_SIZE, and `len` bytes is valid UTF-8
548            inline_buffer.set_len(len);
549        }
550    }
551
552    /// Returns the last byte that's on the stack.
553    ///
554    /// The last byte stores the discriminant that indicates whether the string is on the stack or
555    /// on the heap. When the string is on the stack the last byte also stores the length
556    #[inline(always)]
557    const fn last_byte(&self) -> u8 {
558        cfg_if::cfg_if! {
559            if #[cfg(target_pointer_width = "64")] {
560                let last_byte = self.5;
561            } else if #[cfg(target_pointer_width = "32")] {
562                let last_byte = self.4;
563            } else {
564                compile_error!("Unsupported target_pointer_width");
565            }
566        };
567        last_byte as u8
568    }
569
570    /// Reinterprets an [`InlineBuffer`] into a [`Repr`]
571    ///
572    /// Note: This is safe because [`InlineBuffer`] and [`Repr`] are the same size. We used to
573    /// define [`Repr`] as a `union` which implicitly transmuted between the two types, but that
574    /// prevented us from defining a "niche" value to make `Option<CompactString>` the same size as
575    /// just `CompactString`
576    #[inline(always)]
577    const fn from_inline(inline: InlineBuffer) -> Self {
578        // SAFETY: An `InlineBuffer` and `Repr` have the same size
579        unsafe { core::mem::transmute(inline) }
580    }
581
582    /// Reinterprets a [`HeapBuffer`] into a [`Repr`]
583    ///
584    /// Note: This is safe because [`HeapBuffer`] and [`Repr`] are the same size. We used to define
585    /// [`Repr`] as a `union` which implicitly transmuted between the two types, but that prevented
586    /// us from defining a "niche" value to make `Option<CompactString>` the same size as just
587    /// `CompactString`
588    #[inline(always)]
589    const fn from_heap(heap: HeapBuffer) -> Self {
590        // SAFETY: A `HeapBuffer` and `Repr` have the same size
591        unsafe { core::mem::transmute(heap) }
592    }
593
594    /// Reinterprets a [`StaticStr`] into a [`Repr`]
595    ///
596    /// Note: This is safe because [`StaticStr`] and [`Repr`] are the same size. We used to define
597    /// [`Repr`] as a `union` which implicitly transmuted between the two types, but that prevented
598    /// us from defining a "niche" value to make `Option<CompactString>` the same size as just
599    /// `CompactString`
600    #[inline(always)]
601    const fn from_static(heap: StaticStr) -> Self {
602        // SAFETY: A `StaticStr` and `Repr` have the same size
603        unsafe { core::mem::transmute(heap) }
604    }
605
606    /// Reinterprets a [`Repr`] as a [`HeapBuffer`]
607    ///
608    /// # SAFETY
609    /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by
610    ///   checking the discriminant.
611    ///
612    /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two
613    /// types, but that prevented us from defining a "niche" value to make `Option<CompactString>`
614    /// the same size as just `CompactString`
615    #[inline(always)]
616    const unsafe fn into_heap(self) -> HeapBuffer {
617        core::mem::transmute(self)
618    }
619
620    /// Reinterprets a `&mut Repr` as a `&mut HeapBuffer`
621    ///
622    /// # SAFETY
623    /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by
624    ///   checking the discriminant.
625    ///
626    /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two
627    /// types, but that prevented us from defining a "niche" value to make `Option<CompactString>`
628    /// the same size as just `CompactString`
629    #[inline(always)]
630    unsafe fn as_mut_heap(&mut self) -> &mut HeapBuffer {
631        // SAFETY: A `HeapBuffer` and `Repr` have the same size
632        &mut *(self as *mut _ as *mut HeapBuffer)
633    }
634
635    /// Reinterprets a `&Repr` as a `&HeapBuffer`
636    ///
637    /// # SAFETY
638    /// * The caller must guarantee that the provided [`Repr`] is actually a [`HeapBuffer`] by
639    ///   checking the discriminant.
640    ///
641    /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two
642    /// types, but that prevented us from defining a "niche" value to make `Option<CompactString>`
643    /// the same size as just `CompactString`
644    #[inline(always)]
645    unsafe fn as_heap(&self) -> &HeapBuffer {
646        // SAFETY: A `HeapBuffer` and `Repr` have the same size
647        &*(self as *const _ as *const HeapBuffer)
648    }
649
650    /// Reinterprets a [`Repr`] as an [`InlineBuffer`]
651    ///
652    /// # SAFETY
653    /// * The caller must guarantee that the provided [`Repr`] is actually an [`InlineBuffer`] by
654    ///   checking the discriminant.
655    ///
656    /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two
657    /// types, but that prevented us from defining a "niche" value to make `Option<CompactString>`
658    /// the same size as just `CompactString`
659    #[inline(always)]
660    #[cfg(feature = "smallvec")]
661    const unsafe fn into_inline(self) -> InlineBuffer {
662        core::mem::transmute(self)
663    }
664
665    /// Reinterprets a `&mut Repr` as an `&mut InlineBuffer`
666    ///
667    /// # SAFETY
668    /// * The caller must guarantee that the provided [`Repr`] is actually an [`InlineBuffer`] by
669    ///   checking the discriminant.
670    ///
671    /// Note: We used to define [`Repr`] as a `union` which implicitly transmuted between the two
672    /// types, but that prevented us from defining a "niche" value to make `Option<CompactString>`
673    /// the same size as just `CompactString`
674    #[inline(always)]
675    unsafe fn as_mut_inline(&mut self) -> &mut InlineBuffer {
676        // SAFETY: An `InlineBuffer` and `Repr` have the same size
677        &mut *(self as *mut _ as *mut InlineBuffer)
678    }
679}
680
681impl Clone for Repr {
682    #[inline]
683    fn clone(&self) -> Self {
684        #[inline(never)]
685        fn clone_heap(this: &Repr) -> Repr {
686            Repr::new(this.as_str()).unwrap_with_msg()
687        }
688
689        // There are only two cases we need to care about: If the string is allocated on the heap
690        // or not. If it is, then the data must be cloned properly, otherwise we can simply copy
691        // the `Repr`.
692        if self.is_heap_allocated() {
693            clone_heap(self)
694        } else {
695            // SAFETY: We just checked that `self` can be copied because it is an inline string or
696            // a reference to a `&'static str`.
697            unsafe { core::ptr::read(self) }
698        }
699    }
700
701    #[inline]
702    fn clone_from(&mut self, source: &Self) {
703        #[inline(never)]
704        fn clone_from_heap(this: &mut Repr, source: &Repr) {
705            unsafe { this.set_len(0) };
706            this.push_str(source.as_str());
707        }
708
709        // There are only two cases we need to care about: If the string is allocated on the heap
710        // or not. If it is, then the data must be cloned proberly, otherwise we can simply copy
711        // the `Repr`.
712        if source.is_heap_allocated() {
713            clone_from_heap(self, source)
714        } else {
715            // SAFETY: We just checked that `source` can be copied because it is an inline string or
716            // a reference to a `&'static str`.
717            *self = unsafe { core::ptr::read(source) }
718        }
719    }
720}
721
722impl Drop for Repr {
723    #[inline]
724    fn drop(&mut self) {
725        // By "outlining" the actual Drop code and only calling it if we're a heap variant, it
726        // allows dropping an inline variant to be as cheap as possible.
727        if self.is_heap_allocated() {
728            outlined_drop(self)
729        }
730
731        #[cold]
732        fn outlined_drop(this: &mut Repr) {
733            // SAFETY: We just checked the discriminant to make sure we're heap allocated
734            let heap_buffer = unsafe { this.as_mut_heap() };
735            heap_buffer.dealloc();
736        }
737    }
738}
739
740impl Extend<char> for Repr {
741    #[inline]
742    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
743        let iter = iter.into_iter();
744
745        let (lower_bound, _) = iter.size_hint();
746        if lower_bound > 0 {
747            // Ignore the error and hope that the lower_bound is incorrect.
748            let _: Result<(), ReserveError> = self.reserve(lower_bound);
749        }
750
751        for c in iter {
752            self.push_str(c.encode_utf8(&mut [0; 4]));
753        }
754    }
755}
756
757impl<'a> Extend<&'a char> for Repr {
758    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
759        self.extend(iter.into_iter().copied());
760    }
761}
762
763impl<'a> Extend<&'a str> for Repr {
764    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
765        iter.into_iter().for_each(|s| self.push_str(s));
766    }
767}
768
769impl Extend<Box<str>> for Repr {
770    fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
771        iter.into_iter().for_each(move |s| self.push_str(&s));
772    }
773}
774
775impl<'a> Extend<Cow<'a, str>> for Repr {
776    fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
777        iter.into_iter().for_each(move |s| self.push_str(&s));
778    }
779}
780
781impl Extend<String> for Repr {
782    fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
783        iter.into_iter().for_each(move |s| self.push_str(&s));
784    }
785}
786
787/// Returns the supplied value, and ensures that the value is eagerly loaded into a register.
788#[inline(always)]
789fn ensure_read(value: usize) -> usize {
790    // SAFETY: This assembly instruction is a noop that only affects the instruction ordering.
791    //
792    // TODO(parkmycar): Re-add loongarch and riscv once we have CI coverage for them.
793    #[cfg(all(
794        not(miri),
795        any(
796            target_arch = "x86",
797            target_arch = "x86_64",
798            target_arch = "arm",
799            target_arch = "aarch64",
800        )
801    ))]
802    unsafe {
803        core::arch::asm!(
804            "/* {value} */",
805            value = in(reg) value,
806            options(nomem, nostack),
807        );
808    };
809
810    value
811}
812
813#[cfg(test)]
814mod tests {
815    use alloc::string::{
816        String,
817        ToString,
818    };
819    use alloc::vec::Vec;
820
821    use quickcheck_macros::quickcheck;
822    use test_case::test_case;
823
824    use super::{
825        Repr,
826        MAX_SIZE,
827    };
828    use crate::ReserveError;
829
830    const EIGHTEEN_MB: usize = 18 * 1024 * 1024;
831    const EIGHTEEN_MB_STR: &str = unsafe { core::str::from_utf8_unchecked(&[42; EIGHTEEN_MB]) };
832
833    #[test_case("hello world!"; "inline")]
834    #[test_case("this is a long string that should be stored on the heap"; "heap")]
835    fn test_create(s: &'static str) {
836        let repr = Repr::new(s).unwrap();
837        assert_eq!(repr.as_str(), s);
838        assert_eq!(repr.len(), s.len());
839
840        // test StaticStr variant
841        let repr = Repr::const_new(s);
842        assert_eq!(repr.as_str(), s);
843        assert_eq!(repr.len(), s.len());
844    }
845
846    #[quickcheck]
847    #[cfg_attr(miri, ignore)]
848    fn quickcheck_create(s: String) {
849        let repr = Repr::new(&s).unwrap();
850        assert_eq!(repr.as_str(), s);
851        assert_eq!(repr.len(), s.len());
852    }
853
854    #[test_case(0; "empty")]
855    #[test_case(10; "short")]
856    #[test_case(64; "long")]
857    #[test_case(EIGHTEEN_MB; "huge")]
858    fn test_with_capacity(cap: usize) {
859        let r = Repr::with_capacity(cap).unwrap();
860        assert!(r.capacity() >= MAX_SIZE);
861        assert_eq!(r.len(), 0);
862    }
863
864    #[test_case(""; "empty")]
865    #[test_case("abc"; "short")]
866    #[test_case("hello world! I am a longer string 🦀"; "long")]
867    fn test_from_utf8_valid(s: &'static str) {
868        let bytes = s.as_bytes();
869        let r = Repr::from_utf8(bytes).expect("valid UTF-8");
870
871        assert_eq!(r.as_str(), s);
872        assert_eq!(r.len(), s.len());
873    }
874
875    #[quickcheck]
876    #[cfg_attr(miri, ignore)]
877    fn quickcheck_from_utf8(buf: Vec<u8>) {
878        match (core::str::from_utf8(&buf), Repr::from_utf8(&buf)) {
879            (Ok(s), Ok(r)) => {
880                assert_eq!(r.as_str(), s);
881                assert_eq!(r.len(), s.len());
882            }
883            (Err(e), Err(r)) => assert_eq!(e, r),
884            _ => panic!("core::str and Repr differ on what is valid UTF-8!"),
885        }
886    }
887
888    #[test_case(String::new(), true; "empty should inline")]
889    #[test_case(String::new(), false; "empty not inline")]
890    #[test_case(String::with_capacity(10), true ; "empty with small capacity inline")]
891    #[test_case(String::with_capacity(10), false ; "empty with small capacity not inline")]
892    #[test_case(String::with_capacity(128), true ; "empty with large capacity inline")]
893    #[test_case(String::with_capacity(128), false ; "empty with large capacity not inline")]
894    #[test_case(String::from("nyc 🗽"), true; "short should inline")]
895    #[test_case(String::from("nyc 🗽"), false ; "short not inline")]
896    #[test_case(String::from("this is a really long string, which is intended"), true; "long")]
897    #[test_case(String::from("this is a really long string, which is intended"), false; "long not inline")]
898    #[test_case(EIGHTEEN_MB_STR.to_string(), true ; "huge should inline")]
899    #[test_case(EIGHTEEN_MB_STR.to_string(), false ; "huge not inline")]
900    fn test_from_string(s: String, try_to_inline: bool) {
901        // note: when cloning a String it truncates capacity, which is why we measure these values
902        // before cloning the string
903        let s_len = s.len();
904        let s_cap = s.capacity();
905        let s_str = s.clone();
906
907        let r = Repr::from_string(s, try_to_inline).unwrap();
908
909        assert_eq!(r.len(), s_len);
910        assert_eq!(r.as_str(), s_str.as_str());
911
912        if s_cap == 0 || (try_to_inline && s_len <= MAX_SIZE) {
913            // we should inline the string, if we were asked to, and the length of the string would
914            // fit inline, meaning we would truncate capacity
915            assert!(!r.is_heap_allocated());
916        } else {
917            assert!(r.is_heap_allocated());
918        }
919    }
920
921    #[quickcheck]
922    #[cfg_attr(miri, ignore)]
923    fn quickcheck_from_string(s: String, try_to_inline: bool) {
924        let r = Repr::from_string(s.clone(), try_to_inline).unwrap();
925
926        assert_eq!(r.len(), s.len());
927        assert_eq!(r.as_str(), s.as_str());
928
929        if s.capacity() == 0 {
930            // we should always inline the string, if the length of the source string is 0
931            assert!(!r.is_heap_allocated());
932        } else if s.capacity() <= MAX_SIZE {
933            // we should inline the string, if we were asked to
934            assert_eq!(!r.is_heap_allocated(), try_to_inline);
935        } else {
936            assert!(r.is_heap_allocated());
937        }
938    }
939
940    #[test_case(""; "empty")]
941    #[test_case("nyc 🗽"; "short")]
942    #[test_case("this is a really long string, which is intended"; "long")]
943    fn test_into_string(control: &'static str) {
944        let r = Repr::new(control).unwrap();
945        let s = r.into_string();
946
947        assert_eq!(control.len(), s.len());
948        assert_eq!(control, s.as_str());
949
950        // test StaticStr variant
951        let r = Repr::const_new(control);
952        let s = r.into_string();
953
954        assert_eq!(control.len(), s.len());
955        assert_eq!(control, s.as_str());
956    }
957
958    #[quickcheck]
959    #[cfg_attr(miri, ignore)]
960    fn quickcheck_into_string(control: String) {
961        let r = Repr::new(&control).unwrap();
962        let s = r.into_string();
963
964        assert_eq!(control.len(), s.len());
965        assert_eq!(control, s.as_str());
966    }
967
968    #[test_case("", "a", false; "empty")]
969    #[test_case("", "🗽", false; "empty_emoji")]
970    #[test_case("abc", "🗽🙂🦀🌈👏🐶", true; "inline_to_heap")]
971    #[test_case("i am a long string that will be on the heap", "extra", true; "heap_to_heap")]
972    fn test_push_str(control: &'static str, append: &'static str, is_heap: bool) {
973        let mut r = Repr::new(control).unwrap();
974        let mut c = String::from(control);
975
976        r.push_str(append);
977        c.push_str(append);
978
979        assert_eq!(r.as_str(), c.as_str());
980        assert_eq!(r.len(), c.len());
981
982        assert_eq!(r.is_heap_allocated(), is_heap);
983
984        // test StaticStr variant
985        let mut r = Repr::const_new(control);
986        let mut c = String::from(control);
987
988        r.push_str(append);
989        c.push_str(append);
990
991        assert_eq!(r.as_str(), c.as_str());
992        assert_eq!(r.len(), c.len());
993
994        assert_eq!(r.is_heap_allocated(), is_heap);
995    }
996
997    #[quickcheck]
998    #[cfg_attr(miri, ignore)]
999    fn quickcheck_push_str(control: String, append: String) {
1000        let mut r = Repr::new(&control).unwrap();
1001        let mut c = control;
1002
1003        r.push_str(&append);
1004        c.push_str(&append);
1005
1006        assert_eq!(r.as_str(), c.as_str());
1007        assert_eq!(r.len(), c.len());
1008    }
1009
1010    #[test_case(&[42; 0], &[42; EIGHTEEN_MB]; "empty_to_heap_capacity")]
1011    #[test_case(&[42; 8], &[42; EIGHTEEN_MB]; "inline_to_heap_capacity")]
1012    #[test_case(&[42; 128], &[42; EIGHTEEN_MB]; "heap_inline_to_heap_capacity")]
1013    #[test_case(&[42; EIGHTEEN_MB], &[42; 64]; "heap_capacity_to_heap_capacity")]
1014    fn test_push_str_from_buf(buf: &[u8], append: &[u8]) {
1015        // The goal of this test is to exercise the scenario when our capacity is stored on the heap
1016
1017        let control = unsafe { core::str::from_utf8_unchecked(buf) };
1018        let append = unsafe { core::str::from_utf8_unchecked(append) };
1019
1020        let mut r = Repr::new(control).unwrap();
1021        let mut c = String::from(control);
1022
1023        r.push_str(append);
1024        c.push_str(append);
1025
1026        assert_eq!(r.as_str(), c.as_str());
1027        assert_eq!(r.len(), c.len());
1028
1029        assert!(r.is_heap_allocated());
1030    }
1031
1032    #[test_case("", 0, false; "empty_zero")]
1033    #[test_case("", 10, false; "empty_small")]
1034    #[test_case("", 64, true; "empty_large")]
1035    #[test_case("abc", 0, false; "short_zero")]
1036    #[test_case("abc", 8, false; "short_small")]
1037    #[test_case("abc", 64, true; "short_large")]
1038    #[test_case("I am a long string that will be on the heap", 0, true; "large_zero")]
1039    #[test_case("I am a long string that will be on the heap", 10, true; "large_small")]
1040    #[test_case("I am a long string that will be on the heap", EIGHTEEN_MB, true; "large_huge")]
1041    fn test_reserve(initial: &'static str, additional: usize, is_heap: bool) {
1042        let mut r = Repr::new(initial).unwrap();
1043        r.reserve(additional).unwrap();
1044
1045        assert!(r.capacity() >= initial.len() + additional);
1046        assert_eq!(r.is_heap_allocated(), is_heap);
1047
1048        // Test static_str variant
1049        let mut r = Repr::const_new(initial);
1050        r.reserve(additional).unwrap();
1051
1052        assert!(r.capacity() >= initial.len() + additional);
1053        assert_eq!(r.is_heap_allocated(), is_heap);
1054    }
1055
1056    #[test]
1057    fn test_reserve_overflow() {
1058        let mut r = Repr::new("abc").unwrap();
1059        let err = r.reserve(usize::MAX).unwrap_err();
1060        assert_eq!(err, ReserveError(()));
1061    }
1062
1063    #[test_case(""; "empty")]
1064    #[test_case("abc"; "short")]
1065    #[test_case("i am a longer string that will be on the heap"; "long")]
1066    #[test_case(EIGHTEEN_MB_STR; "huge")]
1067    fn test_clone(initial: &'static str) {
1068        let r_a = Repr::new(initial).unwrap();
1069        let r_b = r_a.clone();
1070
1071        assert_eq!(r_a.as_str(), initial);
1072        assert_eq!(r_a.len(), initial.len());
1073
1074        assert_eq!(r_a.as_str(), r_b.as_str());
1075        assert_eq!(r_a.len(), r_b.len());
1076        assert_eq!(r_a.capacity(), r_b.capacity());
1077        assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated());
1078
1079        // test StaticStr variant
1080        let r_a = Repr::const_new(initial);
1081        let r_b = r_a.clone();
1082
1083        assert_eq!(r_a.as_str(), initial);
1084        assert_eq!(r_a.len(), initial.len());
1085
1086        assert_eq!(r_a.as_str(), r_b.as_str());
1087        assert_eq!(r_a.len(), r_b.len());
1088        assert_eq!(r_a.capacity(), r_b.capacity());
1089        assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated());
1090    }
1091
1092    #[test_case(Repr::const_new(""), Repr::const_new(""); "empty clone from static")]
1093    #[test_case(Repr::const_new("abc"), Repr::const_new("efg"); "short clone from static")]
1094    #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::const_new(EIGHTEEN_MB_STR); "long clone from static")]
1095    #[test_case(Repr::const_new(""), Repr::const_new(""); "empty clone from inline")]
1096    #[test_case(Repr::const_new("abc"), Repr::const_new("efg"); "short clone from inline")]
1097    #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::const_new("small"); "long clone from inline")]
1098    #[test_case(Repr::const_new(""), Repr::new(EIGHTEEN_MB_STR).unwrap(); "empty clone from heap")]
1099    #[test_case(Repr::const_new("abc"), Repr::new(EIGHTEEN_MB_STR).unwrap(); "short clone from heap")]
1100    #[test_case(Repr::new("i am a longer string that will be on the heap").unwrap(), Repr::new(EIGHTEEN_MB_STR).unwrap(); "long clone from heap")]
1101    fn test_clone_from(mut initial: Repr, source: Repr) {
1102        initial.clone_from(&source);
1103        assert_eq!(initial.as_str(), source.as_str());
1104        assert_eq!(initial.is_heap_allocated(), source.is_heap_allocated());
1105    }
1106
1107    #[quickcheck]
1108    #[cfg_attr(miri, ignore)]
1109    fn quickcheck_clone(initial: String) {
1110        let r_a = Repr::new(&initial).unwrap();
1111        let r_b = r_a.clone();
1112
1113        assert_eq!(r_a.as_str(), initial);
1114        assert_eq!(r_a.len(), initial.len());
1115
1116        assert_eq!(r_a.as_str(), r_b.as_str());
1117        assert_eq!(r_a.len(), r_b.len());
1118        assert_eq!(r_a.capacity(), r_b.capacity());
1119        assert_eq!(r_a.is_heap_allocated(), r_b.is_heap_allocated());
1120    }
1121}