compact_str/repr/inline.rs
1use core::ptr;
2
3use super::{
4 Repr,
5 LENGTH_MASK,
6 MAX_SIZE,
7};
8
9/// A buffer stored on the stack whose size is equal to the stack size of `String`
10#[cfg(target_pointer_width = "64")]
11#[repr(C, align(8))]
12pub struct InlineBuffer(pub [u8; MAX_SIZE]);
13
14#[cfg(target_pointer_width = "32")]
15#[repr(C, align(4))]
16pub struct InlineBuffer(pub [u8; MAX_SIZE]);
17
18static_assertions::assert_eq_size!(InlineBuffer, Repr);
19static_assertions::assert_eq_align!(InlineBuffer, Repr);
20
21impl InlineBuffer {
22 /// Construct a new [`InlineString`]. A string that lives in a small buffer on the stack
23 ///
24 /// SAFETY:
25 /// * The caller must guarantee that the length of `text` is less than [`MAX_SIZE`]
26 #[inline]
27 pub unsafe fn new(text: &str) -> Self {
28 debug_assert!(text.len() <= MAX_SIZE);
29
30 let len = text.len();
31 let mut buffer = InlineBuffer([0u8; MAX_SIZE]);
32
33 // set the length in the last byte
34 buffer.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
35
36 // copy the string into our buffer
37 //
38 // note: in the case where len == MAX_SIZE, we'll overwrite the len, but that's okay because
39 // when reading the length we can detect that the last byte is part of UTF-8 and return a
40 // length of MAX_SIZE
41 //
42 // SAFETY:
43 // * src (`text`) is valid for `len` bytes because `len` comes from `text`
44 // * dst (`buffer`) is valid for `len` bytes because we assert src is less than MAX_SIZE
45 // * src and dst don't overlap because we created dst
46 //
47 ptr::copy_nonoverlapping(text.as_ptr(), buffer.0.as_mut_ptr(), len);
48
49 buffer
50 }
51
52 #[inline]
53 pub const fn new_const(text: &str) -> Self {
54 if text.len() > MAX_SIZE {
55 panic!("Provided string has a length greater than our MAX_SIZE");
56 }
57
58 let len = text.len();
59 let mut buffer = [0u8; MAX_SIZE];
60
61 // set the length
62 buffer[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
63
64 // Note: for loops aren't allowed in `const fn`, hence the while.
65 // Note: Iterating forward results in badly optimized code, because the compiler tries to
66 // unroll the loop.
67 let text = text.as_bytes();
68 let mut i = len;
69 while i > 0 {
70 buffer[i - 1] = text[i - 1];
71 i -= 1;
72 }
73
74 InlineBuffer(buffer)
75 }
76
77 /// Returns an empty [`InlineBuffer`]
78 #[inline(always)]
79 pub const fn empty() -> Self {
80 Self::new_const("")
81 }
82
83 /// Consumes the [`InlineBuffer`] returning the entire underlying array and the length of the
84 /// string that it contains
85 #[inline]
86 #[cfg(feature = "smallvec")]
87 pub fn into_array(self) -> ([u8; MAX_SIZE], usize) {
88 let mut buffer = self.0;
89
90 let length = core::cmp::min(
91 (buffer[MAX_SIZE - 1].wrapping_sub(LENGTH_MASK)) as usize,
92 MAX_SIZE,
93 );
94
95 let last_byte_ref = &mut buffer[MAX_SIZE - 1];
96
97 // unset the last byte of the buffer if it's just storing the length of the string
98 //
99 // Note: we should never add an `else` statement here, keeping the conditional simple allows
100 // the compiler to optimize this to a conditional-move instead of a branch
101 if length < MAX_SIZE {
102 *last_byte_ref = 0;
103 }
104
105 (buffer, length)
106 }
107
108 /// Set's the length of the content for this [`InlineBuffer`]
109 ///
110 /// # SAFETY:
111 /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8
112 #[inline]
113 pub unsafe fn set_len(&mut self, len: usize) {
114 debug_assert!(len <= MAX_SIZE);
115
116 // If `length` == MAX_SIZE, then we infer the length to be the capacity of the buffer. We
117 // can infer this because the way we encode length doesn't overlap with any valid UTF-8
118 // bytes
119 if len < MAX_SIZE {
120 self.0[MAX_SIZE - 1] = len as u8 | LENGTH_MASK;
121 }
122 }
123}
124
125#[cfg(test)]
126mod tests {
127 #[rustversion::since(1.63)]
128 #[test]
129 #[ignore] // we run this in CI, but unless you're compiling in release, this takes a while
130 fn test_unused_utf8_bytes() {
131 use rayon::prelude::*;
132
133 // test to validate for all char the first and last bytes are never within a specified range
134 // note: according to the UTF-8 spec it shouldn't be, but we double check that here
135 (0..u32::MAX).into_par_iter().for_each(|i| {
136 if let Ok(c) = char::try_from(i) {
137 let mut buf = [0_u8; 4];
138 c.encode_utf8(&mut buf);
139
140 // check ranges for first byte
141 match buf[0] {
142 x @ 128..=191 => panic!("first byte within 128..=191, {}", x),
143 x @ 248..=255 => panic!("first byte within 248..=255, {}", x),
144 _ => (),
145 }
146
147 // check ranges for last byte
148 if let x @ 192..=255 = buf[c.len_utf8() - 1] {
149 panic!("last byte within 192..=255, {}", x)
150 }
151 }
152 })
153 }
154
155 #[cfg(feature = "smallvec")]
156 mod smallvec {
157 use alloc::string::String;
158
159 use quickcheck_macros::quickcheck;
160
161 use crate::repr::{
162 InlineBuffer,
163 MAX_SIZE,
164 };
165
166 #[test]
167 fn test_into_array() {
168 let s = "hello world!";
169
170 let inline = unsafe { InlineBuffer::new(s) };
171 let (array, length) = inline.into_array();
172
173 assert_eq!(s.len(), length);
174
175 // all bytes after the length should be 0
176 assert!(array[length..].iter().all(|b| *b == 0));
177
178 // taking a string slice should give back the same string as the original
179 let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) };
180 assert_eq!(s, ex_s);
181 }
182
183 #[quickcheck]
184 #[cfg_attr(miri, ignore)]
185 fn quickcheck_into_array(s: String) {
186 let mut total_length = 0;
187 let s: String = s
188 .chars()
189 .take_while(|c| {
190 total_length += c.len_utf8();
191 total_length < MAX_SIZE
192 })
193 .collect();
194
195 let inline = unsafe { InlineBuffer::new(&s) };
196 let (array, length) = inline.into_array();
197 assert_eq!(s.len(), length);
198
199 // all bytes after the length should be 0
200 assert!(array[length..].iter().all(|b| *b == 0));
201
202 // taking a string slice should give back the same string as the original
203 let ex_s = unsafe { core::str::from_utf8_unchecked(&array[..length]) };
204 assert_eq!(s, ex_s);
205 }
206 }
207}