compact_str/repr/
heap.rs

1use core::alloc::Layout;
2use core::{
3    cmp,
4    mem,
5    ptr,
6};
7
8use super::capacity::Capacity;
9use super::{
10    Repr,
11    MAX_SIZE,
12};
13use crate::{
14    ReserveError,
15    UnwrapWithMsg,
16};
17
18/// The minimum size we'll allocate on the heap is one usize larger than our max inline size
19const MIN_HEAP_SIZE: usize = MAX_SIZE + mem::size_of::<usize>();
20
21const UNKNOWN: usize = 0;
22pub type StrBuffer = [u8; UNKNOWN];
23
24/// [`HeapBuffer`] grows at an amortized rates of 1.5x
25///
26/// Note: this is different than [`std::string::String`], which grows at a rate of 2x. It's debated
27/// which is better, for now we'll stick with a rate of 1.5x
28#[inline(always)]
29pub fn amortized_growth(cur_len: usize, additional: usize) -> usize {
30    let required = cur_len.saturating_add(additional);
31    let amortized = cur_len.saturating_mul(3) / 2;
32    amortized.max(required)
33}
34
35#[repr(C)]
36pub struct HeapBuffer {
37    pub ptr: ptr::NonNull<u8>,
38    pub len: usize,
39    pub cap: Capacity,
40}
41static_assertions::assert_eq_size!(HeapBuffer, Repr);
42static_assertions::assert_eq_align!(HeapBuffer, Repr);
43
44impl HeapBuffer {
45    /// Create a [`HeapBuffer`] with the provided text
46    #[inline]
47    pub fn new(text: &str) -> Result<Self, ReserveError> {
48        let len = text.len();
49        let (cap, ptr) = allocate_ptr(len)?;
50
51        // copy our string into the buffer we just allocated
52        //
53        // SAFETY: We know both `src` and `dest` are valid for respectively reads and writes of
54        // length `len` because `len` comes from `src`, and `dest` was allocated to be at least that
55        // length. We also know they're non-overlapping because `dest` is newly allocated
56        unsafe { ptr.as_ptr().copy_from_nonoverlapping(text.as_ptr(), len) };
57
58        Ok(HeapBuffer { ptr, len, cap })
59    }
60
61    /// Create an empty [`HeapBuffer`] with a specific capacity
62    #[inline]
63    pub fn with_capacity(capacity: usize) -> Result<Self, ReserveError> {
64        let len = 0;
65        let (cap, ptr) = allocate_ptr(capacity)?;
66
67        Ok(HeapBuffer { ptr, len, cap })
68    }
69
70    /// Create a [`HeapBuffer`] with `text` that has _at least_ `additional` bytes of capacity
71    ///
72    /// To prevent frequent re-allocations, this method will create a [`HeapBuffer`] with a capacity
73    /// of `text.len() + additional` or `text.len() * 1.5`, whichever is greater
74    #[inline]
75    pub fn with_additional(text: &str, additional: usize) -> Result<Self, ReserveError> {
76        let len = text.len();
77        let new_capacity = amortized_growth(len, additional);
78        let (cap, ptr) = allocate_ptr(new_capacity)?;
79
80        // copy our string into the buffer we just allocated
81        //
82        // SAFETY: We know both `src` and `dest` are valid for respectively reads and writes of
83        // length `len` because `len` comes from `src`, and `dest` was allocated to be at least that
84        // length. We also know they're non-overlapping because `dest` is newly allocated
85        unsafe { ptr.as_ptr().copy_from_nonoverlapping(text.as_ptr(), len) };
86
87        Ok(HeapBuffer { ptr, len, cap })
88    }
89
90    /// Return the capacity of the [`HeapBuffer`]
91    #[inline]
92    pub fn capacity(&self) -> usize {
93        #[cold]
94        fn read_capacity_from_heap(this: &HeapBuffer) -> usize {
95            // re-adjust the pointer to include the capacity that's on the heap
96            let adj_ptr: *const u8 = this.ptr.as_ptr().wrapping_sub(mem::size_of::<usize>());
97            let mut buf = [0u8; mem::size_of::<usize>()];
98            // SAFETY: `src` and `dst` don't overlap, and are valid for usize number of bytes
99            unsafe {
100                ptr::copy_nonoverlapping(adj_ptr, buf.as_mut_ptr(), mem::size_of::<usize>());
101            }
102            usize::from_ne_bytes(buf)
103        }
104
105        if self.cap.is_heap() {
106            read_capacity_from_heap(self)
107        } else {
108            // SAFETY: Checked above that the capacity is on the stack
109            unsafe { self.cap.as_usize() }
110        }
111    }
112
113    /// Try to grow the [`HeapBuffer`] by reallocating, returning an error if we fail
114    pub fn realloc(&mut self, new_capacity: usize) -> Result<usize, ()> {
115        let new_cap = Capacity::new(new_capacity);
116
117        // We can't reallocate to a size less than our length, or else we'd clip the string
118        if new_capacity < self.len {
119            return Err(());
120        }
121
122        // HeapBuffer doesn't support 0 byte heap sizes
123        if new_capacity == 0 {
124            return Err(());
125        }
126
127        // Always allocate at least MIN_HEAP_SIZE
128        let new_capacity = cmp::max(new_capacity, MIN_HEAP_SIZE);
129
130        let (new_cap, new_ptr) = match (self.cap.is_heap(), new_cap.is_heap()) {
131            // both current and new capacity can be stored inline
132            (false, false) => {
133                // SAFETY: checked above that our capacity is valid
134                let cap = unsafe { self.cap.as_usize() };
135
136                // current capacity is the same as the new, nothing to do!
137                if cap == new_capacity {
138                    return Ok(new_capacity);
139                }
140
141                let cur_layout = inline_capacity::layout(cap);
142                let new_layout = inline_capacity::layout(new_capacity);
143                let new_size = new_layout.size();
144
145                // It's possible `new_size` could overflow since inline_capacity::layout pads for
146                // alignment
147                if new_size < new_capacity {
148                    return Err(());
149                }
150
151                // SAFETY:
152                // * We're using the same allocator that we used for `ptr`
153                // * The layout is the same because we checked that the capacity is inline
154                // * `new_size` will be > 0, we return early if the requested capacity is 0
155                // * Checked above if `new_size` overflowed when rounding to alignment
156                match ptr::NonNull::new(unsafe {
157                    ::alloc::alloc::realloc(self.ptr.as_ptr(), cur_layout, new_size)
158                }) {
159                    Some(ptr) => (new_cap, ptr),
160                    None => return Err(()),
161                }
162            }
163            // both current and new capacity need to be stored on the heap
164            (true, true) => {
165                let cur_layout = heap_capacity::layout(self.capacity());
166                let new_layout = heap_capacity::layout(new_capacity);
167                let new_size = new_layout.size();
168
169                // alloc::realloc requires that size > 0
170                debug_assert!(new_size > 0);
171
172                // It's possible `new_size` could overflow since heap_capacity::layout requires a
173                // few additional bytes
174                if new_size < new_capacity {
175                    return Err(());
176                }
177
178                // move our pointer back one WORD since our capacity is behind it
179                let raw_ptr = self.ptr.as_ptr();
180                let adj_ptr = raw_ptr.wrapping_sub(mem::size_of::<usize>());
181
182                // SAFETY:
183                // * We're using the same allocator that we used for `ptr`
184                // * The layout is the same because we checked that the capacity is on the heap
185                // * `new_size` will be > 0, we return early if the requested capacity is 0
186                // * Checked above if `new_size` overflowed when rounding to alignment
187                let cap_ptr = unsafe { alloc::alloc::realloc(adj_ptr, cur_layout, new_size) };
188                // Check if reallocation succeeded
189                if cap_ptr.is_null() {
190                    return Err(());
191                }
192
193                // Our allocation succeeded! Write the new capacity
194                //
195                // SAFETY:
196                // * `src` and `dst` are both valid for reads of `usize` number of bytes
197                // * `src` and `dst` don't overlap because we created `src`
198                unsafe {
199                    ptr::copy_nonoverlapping(
200                        new_capacity.to_ne_bytes().as_ptr(),
201                        cap_ptr,
202                        mem::size_of::<usize>(),
203                    )
204                };
205
206                // Finally, adjust our pointer backup so it points at the string content
207                let str_ptr = cap_ptr.wrapping_add(mem::size_of::<usize>());
208                // SAFETY: We checked above to make sure the pointer was non-null
209                let ptr = unsafe { ptr::NonNull::new_unchecked(str_ptr) };
210
211                (new_cap, ptr)
212            }
213            // capacity is currently inline or on the heap, but needs to move, can't realloc because
214            // we'd need to change the layout!
215            (false, true) | (true, false) => return Err(()),
216        };
217
218        // set our new pointer and new capacity
219        self.ptr = new_ptr;
220        self.cap = new_cap;
221
222        Ok(new_capacity)
223    }
224
225    /// Set's the length of the content for this [`HeapBuffer`]
226    ///
227    /// # SAFETY:
228    /// * The caller must guarantee that `len` bytes in the buffer are valid UTF-8
229    #[inline]
230    pub unsafe fn set_len(&mut self, len: usize) {
231        self.len = len;
232    }
233
234    /// Deallocates the memory owned by the provided [`HeapBuffer`]
235    #[inline]
236    pub fn dealloc(&mut self) {
237        deallocate_ptr(self.ptr, self.cap);
238    }
239}
240
241impl Clone for HeapBuffer {
242    fn clone(&self) -> Self {
243        // Create a new HeapBuffer with the same capacity as the original
244        let mut new = Self::with_capacity(self.capacity()).unwrap_with_msg();
245
246        // SAFETY:
247        // * `src` and `dst` don't overlap because we just created `dst`
248        // * `src` and `dst` are both valid for `self.len` bytes because self.len < capacity
249        unsafe {
250            new.ptr
251                .as_ptr()
252                .copy_from_nonoverlapping(self.ptr.as_ptr(), self.len)
253        };
254        // SAFETY:
255        // * We copied the text from self, which is valid UTF-8
256        unsafe { new.set_len(self.len) };
257
258        new
259    }
260}
261
262impl Drop for HeapBuffer {
263    fn drop(&mut self) {
264        self.dealloc()
265    }
266}
267
268/// Allocates a buffer on the heap that we can use to store a string, optionally stores the capacity
269/// of said buffer on the heap.
270///
271/// Returns a [`Capacity`] that either indicates the capacity is stored on the heap, or is stored
272/// in the `Capacity` itself.
273#[inline]
274pub fn allocate_ptr(capacity: usize) -> Result<(Capacity, ptr::NonNull<u8>), ReserveError> {
275    // We allocate at least MIN_HEAP_SIZE bytes because we need to allocate at least one byte
276    let capacity = capacity.max(MIN_HEAP_SIZE);
277    let cap = Capacity::new(capacity);
278
279    // HeapBuffer doesn't support 0 sized allocations, we should always allocate at least
280    // MIN_HEAP_SIZE bytes
281    debug_assert!(capacity > 0);
282
283    #[cold]
284    fn allocate_with_capacity_on_heap(capacity: usize) -> Result<ptr::NonNull<u8>, ReserveError> {
285        // write our capacity onto the heap
286        // SAFETY: we know that the capacity is not zero
287        let ptr = unsafe { heap_capacity::alloc(capacity)? };
288        // SAFETY:
289        // * `src` and `dst` don't overlap and are both valid for `usize` bytes
290        unsafe {
291            ptr::copy_nonoverlapping(
292                capacity.to_ne_bytes().as_ptr(),
293                ptr.as_ptr(),
294                mem::size_of::<usize>(),
295            )
296        };
297        let raw_ptr = ptr.as_ptr().wrapping_add(core::mem::size_of::<usize>());
298        // SAFETY: We know `raw_ptr` is non-null because we just created it
299        Ok(unsafe { ptr::NonNull::new_unchecked(raw_ptr) })
300    }
301
302    let ptr = if cap.is_heap() {
303        allocate_with_capacity_on_heap(capacity)
304    } else {
305        unsafe { inline_capacity::alloc(capacity) }
306    };
307
308    Ok((cap, ptr?))
309}
310
311/// Deallocates a buffer on the heap, handling when the capacity is also stored on the heap
312#[inline]
313pub fn deallocate_ptr(ptr: ptr::NonNull<u8>, cap: Capacity) {
314    #[cold]
315    fn deallocate_with_capacity_on_heap(ptr: ptr::NonNull<u8>) {
316        // re-adjust the pointer to include the capacity that's on the heap
317        let adj_ptr = ptr.as_ptr().wrapping_sub(mem::size_of::<usize>());
318        // read the capacity from the heap so we know how much to deallocate
319        let mut buf = [0u8; mem::size_of::<usize>()];
320        // SAFETY: `src` and `dst` don't overlap, and are valid for usize number of bytes
321        unsafe {
322            ptr::copy_nonoverlapping(adj_ptr, buf.as_mut_ptr(), mem::size_of::<usize>());
323        }
324        let capacity = usize::from_ne_bytes(buf);
325        // SAFETY: We know the pointer is not null since we got it as a NonNull
326        let ptr = unsafe { ptr::NonNull::new_unchecked(adj_ptr) };
327        // SAFETY: We checked above that our capacity is on the heap, and we readjusted the
328        // pointer to reference the capacity
329        unsafe { heap_capacity::dealloc(ptr, capacity) }
330    }
331
332    if cap.is_heap() {
333        deallocate_with_capacity_on_heap(ptr);
334    } else {
335        // SAFETY: Our capacity is always inline on 64-bit archs
336        unsafe { inline_capacity::dealloc(ptr, cap.as_usize()) }
337    }
338}
339
340/// SAFETY: `layout` must not be zero sized
341#[inline]
342pub unsafe fn do_alloc(layout: Layout) -> Result<ptr::NonNull<u8>, ReserveError> {
343    debug_assert!(layout.size() > 0);
344
345    // SAFETY: `alloc(...)` has undefined behavior if the layout is zero-sized. We specify that
346    // `capacity` must be > 0 as a constraint to uphold the safety of this method. If capacity
347    // is greater than 0, then our layout will be non-zero-sized.
348    let raw_ptr = ::alloc::alloc::alloc(layout);
349
350    // Check to make sure our pointer is non-null.
351    // Implementations are encouraged to return null on memory exhaustion rather than aborting.
352    ptr::NonNull::new(raw_ptr).ok_or(ReserveError(()))
353}
354
355mod heap_capacity {
356    use core::{
357        alloc,
358        ptr,
359    };
360
361    use super::{
362        do_alloc,
363        StrBuffer,
364    };
365    use crate::ReserveError;
366
367    /// SAFETY: `capacity` must not be zero
368    pub unsafe fn alloc(capacity: usize) -> Result<ptr::NonNull<u8>, ReserveError> {
369        do_alloc(layout(capacity))
370    }
371
372    /// Deallocates a pointer which references a `HeapBuffer` whose capacity is on the heap
373    ///
374    /// # Safety
375    /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on the heap. i.e. we
376    ///   must have `ptr -> [cap<usize> ; string<bytes>]`
377    pub unsafe fn dealloc(ptr: ptr::NonNull<u8>, capacity: usize) {
378        let layout = layout(capacity);
379        ::alloc::alloc::dealloc(ptr.as_ptr(), layout);
380    }
381
382    #[repr(C)]
383    struct HeapBufferInnerHeapCapacity {
384        capacity: usize,
385        buffer: StrBuffer,
386    }
387
388    #[inline(always)]
389    pub fn layout(capacity: usize) -> alloc::Layout {
390        let buffer_layout = alloc::Layout::array::<u8>(capacity).expect("valid capacity");
391        alloc::Layout::new::<HeapBufferInnerHeapCapacity>()
392            .extend(buffer_layout)
393            .expect("valid layout")
394            .0
395            .pad_to_align()
396    }
397}
398
399mod inline_capacity {
400    use core::{
401        alloc,
402        ptr,
403    };
404
405    use super::{
406        do_alloc,
407        StrBuffer,
408    };
409    use crate::ReserveError;
410
411    /// # SAFETY:
412    /// * `capacity` must be > 0
413    pub unsafe fn alloc(capacity: usize) -> Result<ptr::NonNull<u8>, ReserveError> {
414        do_alloc(layout(capacity))
415    }
416
417    /// Deallocates a pointer which references a `HeapBuffer` whose capacity is stored inline
418    ///
419    /// # Safety
420    /// * `ptr` must point to the start of a `HeapBuffer` whose capacity is on the inline
421    pub unsafe fn dealloc(ptr: ptr::NonNull<u8>, capacity: usize) {
422        let layout = layout(capacity);
423        ::alloc::alloc::dealloc(ptr.as_ptr(), layout);
424    }
425
426    #[repr(C)]
427    struct HeapBufferInnerInlineCapacity {
428        buffer: StrBuffer,
429    }
430
431    #[inline(always)]
432    pub fn layout(capacity: usize) -> alloc::Layout {
433        let buffer_layout = alloc::Layout::array::<u8>(capacity).expect("valid capacity");
434        alloc::Layout::new::<HeapBufferInnerInlineCapacity>()
435            .extend(buffer_layout)
436            .expect("valid layout")
437            .0
438            .pad_to_align()
439    }
440}
441
442#[cfg(test)]
443mod test {
444    use test_case::test_case;
445
446    use super::{
447        HeapBuffer,
448        MIN_HEAP_SIZE,
449    };
450
451    const EIGHTEEN_MB: usize = 18 * 1024 * 1024;
452
453    #[test]
454    fn test_min_capacity() {
455        let h = HeapBuffer::new("short").unwrap();
456        assert_eq!(h.capacity(), MIN_HEAP_SIZE);
457    }
458
459    #[test_case(&[42; 8]; "short")]
460    #[test_case(&[42; 50]; "long")]
461    #[test_case(&[42; EIGHTEEN_MB]; "huge")]
462    fn test_capacity(buf: &[u8]) {
463        // we know the buffer is valid UTF-8
464        let s = unsafe { core::str::from_utf8_unchecked(buf) };
465        let h = HeapBuffer::new(s).unwrap();
466
467        assert_eq!(h.capacity(), core::cmp::max(s.len(), MIN_HEAP_SIZE));
468    }
469
470    #[test_case(&[42; 0], 0, Err(MIN_HEAP_SIZE); "empty_empty")]
471    #[test_case(&[42; 64], 0, Err(64); "short_empty")]
472    #[test_case(&[42; 64], 32, Err(64); "short_to_shorter")]
473    #[test_case(&[42; 64], 128, Ok(128); "short_to_longer")]
474    #[test_case(&[42; EIGHTEEN_MB], EIGHTEEN_MB + 128, Ok(EIGHTEEN_MB + 128); "heap_to_heap")]
475    fn test_realloc(buf: &[u8], realloc: usize, result: Result<usize, usize>) {
476        // we know the buffer is valid UTF-8
477        let s = unsafe { core::str::from_utf8_unchecked(buf) };
478        let mut h = HeapBuffer::new(s).unwrap();
479
480        // reallocate, asserting our result
481        let expected_cap = match result {
482            Ok(c) | Err(c) => c,
483        };
484        let expected_res = result.map_err(|_| ());
485        assert_eq!(h.realloc(realloc), expected_res);
486        assert_eq!(h.capacity(), expected_cap);
487    }
488
489    #[test]
490    fn test_realloc_inline_to_heap() {
491        // we know the buffer is valid UTF-8
492        let s = unsafe { core::str::from_utf8_unchecked(&[42; 128]) };
493        let mut h = HeapBuffer::new(s).unwrap();
494
495        cfg_if::cfg_if! {
496            if #[cfg(target_pointer_width = "64")] {
497                let expected_result = Ok(EIGHTEEN_MB);
498                let expected_capacity = EIGHTEEN_MB;
499            } else if #[cfg(target_pointer_width = "32")] {
500                // on 32-bit architectures we'd need to change the layout from capacity being inline
501                // to the capacity being on the heap, which isn't possible
502                let expected_result = Err(());
503                let expected_capacity = 128;
504            } else {
505                compile_error!("Unsupported pointer width!");
506            }
507        }
508        assert_eq!(h.realloc(EIGHTEEN_MB), expected_result);
509        assert_eq!(h.capacity(), expected_capacity);
510    }
511
512    #[test_case(&[42; 64], 128, 100, Ok(100); "sanity")]
513    fn test_realloc_shrink(
514        buf: &[u8],
515        realloc_one: usize,
516        realloc_two: usize,
517        exp_result: Result<usize, usize>,
518    ) {
519        // we know the buffer is valid UTF-8
520        let s = unsafe { core::str::from_utf8_unchecked(buf) };
521        let mut h = HeapBuffer::new(s).unwrap();
522
523        assert!(
524            realloc_one > realloc_two,
525            "we have to grow before we can shrink"
526        );
527
528        // grow our allocation
529        assert_eq!(h.realloc(realloc_one), Ok(realloc_one));
530
531        // shrink our allocation, asserting our result
532        let expected_cap = match exp_result {
533            Ok(c) | Err(c) => c,
534        };
535        let expected_res = exp_result.map_err(|_| ());
536        assert_eq!(h.realloc(realloc_two), expected_res);
537        assert_eq!(h.capacity(), expected_cap);
538    }
539
540    #[test]
541    fn test_realloc_shrink_heap_to_inline() {
542        // TODO: test this case
543        assert_eq!(1, 1);
544    }
545
546    #[test_case(&[42; 0]; "empty")]
547    #[test_case(&[42; 3]; "short")]
548    #[test_case(&[42; 64]; "long")]
549    #[test_case(&[42; EIGHTEEN_MB]; "huge")]
550    fn test_clone(buf: &[u8]) {
551        let s = unsafe { core::str::from_utf8_unchecked(buf) };
552        let h_a = HeapBuffer::new(s).unwrap();
553        let h_b = h_a.clone();
554
555        assert_eq!(h_a.capacity(), h_b.capacity());
556    }
557}