compact_str/repr/
capacity.rs

1use core::fmt;
2
3use crate::repr::LastUtf8Char;
4
5// how many bytes a `usize` occupies
6const USIZE_SIZE: usize = core::mem::size_of::<usize>();
7
8/// Mask of bits in [`Capacity`] that encode the value.
9const VALID_MASK: usize = {
10    let mut bytes = [255; USIZE_SIZE];
11    bytes[USIZE_SIZE - 1] = 0;
12    usize::from_ne_bytes(bytes)
13};
14
15/// Mask of bits that are set in [`Capacity`] if the string data is stored on the heap.
16const HEAP_MARKER: usize = {
17    let mut bytes = [0; USIZE_SIZE];
18    bytes[USIZE_SIZE - 1] = LastUtf8Char::Heap as u8;
19    usize::from_ne_bytes(bytes)
20};
21
22/// State that describes the capacity as being stored on the heap.
23///
24/// All bytes `255`, with the last being [`LastUtf8Char::Heap`], using the same amount of bytes
25/// as `usize`. Example (64-bit): `[255, 255, 255, 255, 255, 255, 255, 216]`
26const CAPACITY_IS_ON_THE_HEAP: Capacity = Capacity(VALID_MASK | HEAP_MARKER);
27
28/// The maximum value we're able to store, e.g. on 64-bit arch this is 2^56 - 2.
29pub const MAX_VALUE: usize = {
30    let mut bytes = [255; USIZE_SIZE];
31    bytes[USIZE_SIZE - 1] = 0;
32    usize::from_le_bytes(bytes) - 1
33};
34
35/// An integer type that uses `core::mem::size_of::<usize>() - 1` bytes to store the capacity of
36/// a heap buffer.
37///
38/// Assuming a 64-bit arch, a [`super::BoxString`] uses 8 bytes for a pointer, 8 bytes for a
39/// length, and then needs 1 byte for a discriminant. We need to store the capacity somewhere, and
40/// we could store it on the heap, but we also have 7 unused bytes. [`Capacity`] handles storing a
41/// value in these 7 bytes, returning an error if it's not possible, at which point we'll store the
42/// capacity on the heap.
43///
44/// # Max Values
45/// * __64-bit:__ `(2 ^ (7 * 8)) - 2 = 72_057_594_037_927_934 ~= 64 petabytes`
46/// * __32-bit:__ `(2 ^ (3 * 8)) - 2 = 16_777_214             ~= 16 megabytes`
47///
48/// Practically speaking, on a 64-bit architecture we'll never need to store the capacity on the
49/// heap, because with it's impossible to create a string that is 64 petabytes or larger. But for
50/// 32-bit architectures we need to be able to store a capacity larger than 16 megabytes, since a
51/// string larger than 16 megabytes probably isn't that uncommon.
52#[derive(Copy, Clone, PartialEq, Eq)]
53#[repr(transparent)]
54pub struct Capacity(usize);
55
56static_assertions::assert_eq_size!(Capacity, usize);
57static_assertions::assert_eq_align!(Capacity, usize);
58
59impl fmt::Debug for Capacity {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        write!(f, "Capacity(0x{:x})", usize::from_le(self.0))
62    }
63}
64
65impl Capacity {
66    #[inline]
67    pub const fn new(capacity: usize) -> Self {
68        cfg_if::cfg_if! {
69            if #[cfg(target_pointer_width = "64")] {
70                // on 64-bit arches we can always fit the capacity inline
71                debug_assert!(capacity <= MAX_VALUE);
72
73                Capacity(capacity.to_le() | HEAP_MARKER)
74            } else if #[cfg(target_pointer_width = "32")] {
75                // on 32-bit arches we might need to store the capacity on the heap
76                if capacity > MAX_VALUE {
77                    // if we need the last byte to encode this capacity then we need to put the capacity on
78                    // the heap. return an Error so `BoxString` can do the right thing
79                    CAPACITY_IS_ON_THE_HEAP
80                } else {
81                    // otherwise, we can store this capacity inline! Set the last byte to be our `LastUtf8Char::Heap as u8`
82                    // for our discriminant, using the leading bytes to store the actual value
83                    Capacity(capacity.to_le() | HEAP_MARKER)
84                }
85            } else {
86                compile_error!("Unsupported target_pointer_width");
87            }
88        }
89    }
90
91    /// Re-interprets a [`Capacity`] as a `usize`
92    ///
93    /// # SAFETY:
94    /// * `self` must be less than or equal to [`MAX_VALUE`]
95    #[inline(always)]
96    pub unsafe fn as_usize(self) -> usize {
97        usize::from_le(self.0 & VALID_MASK)
98    }
99
100    /// Returns whether or not this [`Capacity`] has a value that indicates the capacity is being
101    /// stored on the heap
102    #[inline(always)]
103    pub fn is_heap(self) -> bool {
104        self == CAPACITY_IS_ON_THE_HEAP
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::Capacity;
111
112    #[test]
113    fn test_zero_roundtrips() {
114        let og = 0;
115        let cap = Capacity::new(og);
116        let after = unsafe { cap.as_usize() };
117
118        assert_eq!(og, after);
119    }
120
121    #[test]
122    fn test_max_value() {
123        let available_bytes = (core::mem::size_of::<usize>() - 1) as u32;
124        let max_value = 2usize.pow(available_bytes * 8) - 2;
125
126        #[cfg(target_pointer_width = "64")]
127        assert_eq!(max_value, 72057594037927934);
128        #[cfg(target_pointer_width = "32")]
129        assert_eq!(max_value, 16777214);
130
131        let cap = Capacity::new(max_value);
132        let after = unsafe { cap.as_usize() };
133
134        assert_eq!(max_value, after);
135    }
136
137    #[cfg(target_pointer_width = "32")]
138    #[test]
139    fn test_invalid_value() {
140        let invalid_val = usize::MAX;
141        let cap = Capacity::new(invalid_val);
142        let after = unsafe { cap.as_usize() };
143
144        // anything greater than or equal to 16777215, should "resolve" to 16777215
145        assert_eq!(16777215, after);
146    }
147
148    #[test]
149    #[cfg_attr(miri, ignore)]
150    fn test_all_valid_32bit_values() {
151        #[cfg(target_pointer_width = "32")]
152        assert_eq!(16_777_214, super::MAX_VALUE);
153
154        for i in 0..=16_777_214 {
155            let cap = Capacity::new(i);
156            let val = unsafe { cap.as_usize() };
157
158            assert_eq!(val, i, "value roundtriped to wrong value?");
159        }
160    }
161}