compact_str/repr/
inline.rs

1use core::ptr;
2
3use super::{
4    Repr,
5    LENGTH_MASK,
6    MAX_SIZE,
7};
8
9/// A buffer stored on the stack whose size is equal to the stack size of `String`
10#[cfg(target_pointer_width = "64")]
11#[repr(C, align(8))]
12pub struct InlineBuffer(pub [u8; MAX_SIZE]);
13
14#[cfg(target_pointer_width = "32")]
15#[repr(C, align(4))]
16pub struct InlineBuffer(pub [u8; MAX_SIZE]);
17
18static_assertions::assert_eq_size!(InlineBuffer, Repr);
19static_assertions::assert_eq_align!(InlineBuffer, Repr);
20
21impl InlineBuffer {
22    /// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack
23    ///
24    /// SAFETY:
25    /// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`]
26    #[inline]
27    pub unsafe fn new(text: &str) -> Self {
28        debug_assert!(text.len() <= MAX_SIZE);
29
30        let len = text.len();
31        let mut buffer = InlineBuffer([0u8; MAX_SIZE]);
32
33        // set the length in the last byte
34        buffer.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
35
36        // copy the string into our buffer
37        //
38        // note: in the case where len == MAX_SIZE, we'll overwrite the len, but that's okay because
39        // when reading the length we can detect that the last byte is part of UTF-8 and return a
40        // length of MAX_SIZE
41        //
42        // SAFETY:
43        // * src (`text`) is valid for `len` bytes because `len` comes from `text`
44        // * dst (`buffer`) is valid for `len` bytes because we assert src is less than MAX_SIZE
45        // * src and dst don't overlap because we created dst
46        //
47        ptr::copy_nonoverlapping(text.as_ptr(), buffer.0.as_mut_ptr(), len);
48
49        buffer
50    }
51
52    #[inline]
53    pub const fn new_const(text: &str) -> Self {
54        if text.len() > MAX_SIZE {
55            panic!("Provided string has a length greater than our MAX_SIZE");
56        }
57
58        let len = text.len();
59        let mut buffer = [0u8; MAX_SIZE];
60
61        // set the length
62        buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
63
64        // Note: for loops aren't allowed in `const fn`, hence the while.
65        // Note: Iterating forward results in badly optimized code, because the compiler tries to
66        //       unroll the loop.
67        let text = text.as_bytes();
68        let mut i = len;
69        while i > 0 {
70            buffer[i - 1] = text[i - 1];
71            i -= 1;
72        }
73
74        InlineBuffer(buffer)
75    }
76
77    /// Returns an empty [`InlineBuffer`]
78    #[inline(always)]
79    pub const fn empty() -> Self {
80        Self::new_const("")
81    }
82
83    /// Consumes the [`InlineBuffer`] returning the entire underlying array and the length of the
84    /// string that it contains
85    #[inline]
86    #[cfg(feature = "smallvec")]
87    pub fn into_array(self) -> ([u8; MAX_SIZE], usize) {
88        let mut buffer = self.0;
89
90        let length = core::cmp::min(
91            (buffer[MAX_SIZE - 1].wrapping_sub(LENGTH_MASK)) as usize,
92            MAX_SIZE,
93        );
94
95        let last_byte_ref = &mut buffer[MAX_SIZE - 1];
96
97        // unset the last byte of the buffer if it's just storing the length of the string
98        //
99        // Note: we should never add an `else` statement here, keeping the conditional simple allows
100        // the compiler to optimize this to a conditional-move instead of a branch
101        if length < MAX_SIZE {
102            *last_byte_ref = 0;
103        }
104
105        (buffer, length)
106    }
107
108    /// Set's the length of the content for this [`InlineBuffer`]
109    ///
110    /// # SAFETY:
111    /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8
112    #[inline]
113    pub unsafe fn set_len(&mut self, len: usize) {
114        debug_assert!(len <= MAX_SIZE);
115
116        // If `length` == MAX_SIZE, then we infer the length to be the capacity of the buffer. We
117        // can infer this because the way we encode length doesn't overlap with any valid UTF-8
118        // bytes
119        if len < MAX_SIZE {
120            self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
121        }
122    }
123}
124
125#[cfg(test)]
126mod tests {
127    #[rustversion::since(1.63)]
128    #[test]
129    #[ignore] // we run this in CI, but unless you're compiling in release, this takes a while
130    fn test_unused_utf8_bytes() {
131        use rayon::prelude::*;
132
133        // test to validate for all char the first and last bytes are never within a specified range
134        // note: according to the UTF-8 spec it shouldn't be, but we double check that here
135        (0..u32::MAX).into_par_iter().for_each(|i| {
136            if let Ok(c) = char::try_from(i) {
137                let mut buf = [0_u8; 4];
138                c.encode_utf8(&mut buf);
139
140                // check ranges for first byte
141                match buf[0] {
142                    x @ 128..=191 => panic!("first byte within 128..=191, {}", x),
143                    x @ 248..=255 => panic!("first byte within 248..=255, {}", x),
144                    _ => (),
145                }
146
147                // check ranges for last byte
148                if let x @ 192..=255 = buf[c.len_utf8() - 1] {
149                    panic!("last byte within 192..=255, {}", x)
150                }
151            }
152        })
153    }
154
155    #[cfg(feature = "smallvec")]
156    mod smallvec {
157        use alloc::string::String;
158
159        use quickcheck_macros::quickcheck;
160
161        use crate::repr::{
162            InlineBuffer,
163            MAX_SIZE,
164        };
165
166        #[test]
167        fn test_into_array() {
168            let s = "hello world!";
169
170            let inline = unsafe { InlineBuffer::new(s) };
171            let (array, length) = inline.into_array();
172
173            assert_eq!(s.len(), length);
174
175            // all bytes after the length should be 0
176            assert!(array[length..].iter().all(|b| *b == 0));
177
178            // taking a string slice should give back the same string as the original
179            let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) };
180            assert_eq!(s, ex_s);
181        }
182
183        #[quickcheck]
184        #[cfg_attr(miri, ignore)]
185        fn quickcheck_into_array(s: String) {
186            let mut total_length = 0;
187            let s: String = s
188                .chars()
189                .take_while(|c| {
190                    total_length += c.len_utf8();
191                    total_length < MAX_SIZE
192                })
193                .collect();
194
195            let inline = unsafe { InlineBuffer::new(&s) };
196            let (array, length) = inline.into_array();
197            assert_eq!(s.len(), length);
198
199            // all bytes after the length should be 0
200            assert!(array[length..].iter().all(|b| *b == 0));
201
202            // taking a string slice should give back the same string as the original
203            let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) };
204            assert_eq!(s, ex_s);
205        }
206    }
207}