compact_str/repr/capacity.rs
1use core::fmt;
2
3use crate::repr::LastUtf8Char;
4
5// how many bytes a `usize` occupies
6const USIZE_SIZE: usize = core::mem::size_of::<usize>();
7
8/// Mask of bits in [`Capacity`] that encode the value.
9const VALID_MASK: usize = {
10 let mut bytes = [255; USIZE_SIZE];
11 bytes[USIZE_SIZE - 1] = 0;
12 usize::from_ne_bytes(bytes)
13};
14
15/// Mask of bits that are set in [`Capacity`] if the string data is stored on the heap.
16const HEAP_MARKER: usize = {
17 let mut bytes = [0; USIZE_SIZE];
18 bytes[USIZE_SIZE - 1] = LastUtf8Char::Heap as u8;
19 usize::from_ne_bytes(bytes)
20};
21
22/// State that describes the capacity as being stored on the heap.
23///
24/// All bytes `255`, with the last being [`LastUtf8Char::Heap`], using the same amount of bytes
25/// as `usize`. Example (64-bit): `[255, 255, 255, 255, 255, 255, 255, 216]`
26const CAPACITY_IS_ON_THE_HEAP: Capacity = Capacity(VALID_MASK | HEAP_MARKER);
27
28/// The maximum value we're able to store, e.g. on 64-bit arch this is 2^56 - 2.
29pub const MAX_VALUE: usize = {
30 let mut bytes = [255; USIZE_SIZE];
31 bytes[USIZE_SIZE - 1] = 0;
32 usize::from_le_bytes(bytes) - 1
33};
34
35/// An integer type that uses `core::mem::size_of::<usize>() - 1` bytes to store the capacity of
36/// a heap buffer.
37///
38/// Assuming a 64-bit arch, a [`super::BoxString`] uses 8 bytes for a pointer, 8 bytes for a
39/// length, and then needs 1 byte for a discriminant. We need to store the capacity somewhere, and
40/// we could store it on the heap, but we also have 7 unused bytes. [`Capacity`] handles storing a
41/// value in these 7 bytes, returning an error if it's not possible, at which point we'll store the
42/// capacity on the heap.
43///
44/// # Max Values
45/// * __64-bit:__ `(2 ^ (7 * 8)) - 2 = 72_057_594_037_927_934 ~= 64 petabytes`
46/// * __32-bit:__ `(2 ^ (3 * 8)) - 2 = 16_777_214 ~= 16 megabytes`
47///
48/// Practically speaking, on a 64-bit architecture we'll never need to store the capacity on the
49/// heap, because with it's impossible to create a string that is 64 petabytes or larger. But for
50/// 32-bit architectures we need to be able to store a capacity larger than 16 megabytes, since a
51/// string larger than 16 megabytes probably isn't that uncommon.
52#[derive(Copy, Clone, PartialEq, Eq)]
53#[repr(transparent)]
54pub struct Capacity(usize);
55
56static_assertions::assert_eq_size!(Capacity, usize);
57static_assertions::assert_eq_align!(Capacity, usize);
58
59impl fmt::Debug for Capacity {
60 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61 write!(f, "Capacity(0x{:x})", usize::from_le(self.0))
62 }
63}
64
65impl Capacity {
66 #[inline]
67 pub const fn new(capacity: usize) -> Self {
68 cfg_if::cfg_if! {
69 if #[cfg(target_pointer_width = "64")] {
70 // on 64-bit arches we can always fit the capacity inline
71 debug_assert!(capacity <= MAX_VALUE);
72
73 Capacity(capacity.to_le() | HEAP_MARKER)
74 } else if #[cfg(target_pointer_width = "32")] {
75 // on 32-bit arches we might need to store the capacity on the heap
76 if capacity > MAX_VALUE {
77 // if we need the last byte to encode this capacity then we need to put the capacity on
78 // the heap. return an Error so `BoxString` can do the right thing
79 CAPACITY_IS_ON_THE_HEAP
80 } else {
81 // otherwise, we can store this capacity inline! Set the last byte to be our `LastUtf8Char::Heap as u8`
82 // for our discriminant, using the leading bytes to store the actual value
83 Capacity(capacity.to_le() | HEAP_MARKER)
84 }
85 } else {
86 compile_error!("Unsupported target_pointer_width");
87 }
88 }
89 }
90
91 /// Re-interprets a [`Capacity`] as a `usize`
92 ///
93 /// # SAFETY:
94 /// * `self` must be less than or equal to [`MAX_VALUE`]
95 #[inline(always)]
96 pub unsafe fn as_usize(self) -> usize {
97 usize::from_le(self.0 & VALID_MASK)
98 }
99
100 /// Returns whether or not this [`Capacity`] has a value that indicates the capacity is being
101 /// stored on the heap
102 #[inline(always)]
103 pub fn is_heap(self) -> bool {
104 self == CAPACITY_IS_ON_THE_HEAP
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use super::Capacity;
111
112 #[test]
113 fn test_zero_roundtrips() {
114 let og = 0;
115 let cap = Capacity::new(og);
116 let after = unsafe { cap.as_usize() };
117
118 assert_eq!(og, after);
119 }
120
121 #[test]
122 fn test_max_value() {
123 let available_bytes = (core::mem::size_of::<usize>() - 1) as u32;
124 let max_value = 2usize.pow(available_bytes * 8) - 2;
125
126 #[cfg(target_pointer_width = "64")]
127 assert_eq!(max_value, 72057594037927934);
128 #[cfg(target_pointer_width = "32")]
129 assert_eq!(max_value, 16777214);
130
131 let cap = Capacity::new(max_value);
132 let after = unsafe { cap.as_usize() };
133
134 assert_eq!(max_value, after);
135 }
136
137 #[cfg(target_pointer_width = "32")]
138 #[test]
139 fn test_invalid_value() {
140 let invalid_val = usize::MAX;
141 let cap = Capacity::new(invalid_val);
142 let after = unsafe { cap.as_usize() };
143
144 // anything greater than or equal to 16777215, should "resolve" to 16777215
145 assert_eq!(16777215, after);
146 }
147
148 #[test]
149 #[cfg_attr(miri, ignore)]
150 fn test_all_valid_32bit_values() {
151 #[cfg(target_pointer_width = "32")]
152 assert_eq!(16_777_214, super::MAX_VALUE);
153
154 for i in 0..=16_777_214 {
155 let cap = Capacity::new(i);
156 let val = unsafe { cap.as_usize() };
157
158 assert_eq!(val, i, "value roundtriped to wrong value?");
159 }
160 }
161}