compact_str/repr/
capacity.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
use core::fmt;

use crate::repr::LastUtf8Char;

// how many bytes a `usize` occupies
const USIZE_SIZE: usize = core::mem::size_of::<usize>();

/// Mask of bits in [`Capacity`] that encode the value.
const VALID_MASK: usize = {
    let mut bytes = [255; USIZE_SIZE];
    bytes[USIZE_SIZE - 1] = 0;
    usize::from_ne_bytes(bytes)
};

/// Mask of bits that are set in [`Capacity`] if the string data is stored on the heap.
const HEAP_MARKER: usize = {
    let mut bytes = [0; USIZE_SIZE];
    bytes[USIZE_SIZE - 1] = LastUtf8Char::Heap as u8;
    usize::from_ne_bytes(bytes)
};

/// State that describes the capacity as being stored on the heap.
///
/// All bytes `255`, with the last being [`LastUtf8Char::Heap`], using the same amount of bytes
/// as `usize`. Example (64-bit): `[255, 255, 255, 255, 255, 255, 255, 216]`
const CAPACITY_IS_ON_THE_HEAP: Capacity = Capacity(VALID_MASK | HEAP_MARKER);

/// The maximum value we're able to store, e.g. on 64-bit arch this is 2^56 - 2.
pub const MAX_VALUE: usize = {
    let mut bytes = [255; USIZE_SIZE];
    bytes[USIZE_SIZE - 1] = 0;
    usize::from_le_bytes(bytes) - 1
};

/// An integer type that uses `core::mem::size_of::<usize>() - 1` bytes to store the capacity of
/// a heap buffer.
///
/// Assuming a 64-bit arch, a [`super::BoxString`] uses 8 bytes for a pointer, 8 bytes for a
/// length, and then needs 1 byte for a discriminant. We need to store the capacity somewhere, and
/// we could store it on the heap, but we also have 7 unused bytes. [`Capacity`] handles storing a
/// value in these 7 bytes, returning an error if it's not possible, at which point we'll store the
/// capacity on the heap.
///
/// # Max Values
/// * __64-bit:__ `(2 ^ (7 * 8)) - 2 = 72_057_594_037_927_934 ~= 64 petabytes`
/// * __32-bit:__ `(2 ^ (3 * 8)) - 2 = 16_777_214             ~= 16 megabytes`
///
/// Practically speaking, on a 64-bit architecture we'll never need to store the capacity on the
/// heap, because with it's impossible to create a string that is 64 petabytes or larger. But for
/// 32-bit architectures we need to be able to store a capacity larger than 16 megabytes, since a
/// string larger than 16 megabytes probably isn't that uncommon.
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Capacity(usize);

static_assertions::assert_eq_size!(Capacity, usize);
static_assertions::assert_eq_align!(Capacity, usize);

impl fmt::Debug for Capacity {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "Capacity(0x{:x})", usize::from_le(self.0))
    }
}

impl Capacity {
    #[inline]
    pub const fn new(capacity: usize) -> Self {
        cfg_if::cfg_if! {
            if #[cfg(target_pointer_width = "64")] {
                // on 64-bit arches we can always fit the capacity inline
                debug_assert!(capacity <= MAX_VALUE);

                Capacity(capacity.to_le() | HEAP_MARKER)
            } else if #[cfg(target_pointer_width = "32")] {
                // on 32-bit arches we might need to store the capacity on the heap
                if capacity > MAX_VALUE {
                    // if we need the last byte to encode this capacity then we need to put the capacity on
                    // the heap. return an Error so `BoxString` can do the right thing
                    CAPACITY_IS_ON_THE_HEAP
                } else {
                    // otherwise, we can store this capacity inline! Set the last byte to be our `LastUtf8Char::Heap as u8`
                    // for our discriminant, using the leading bytes to store the actual value
                    Capacity(capacity.to_le() | HEAP_MARKER)
                }
            } else {
                compile_error!("Unsupported target_pointer_width");
            }
        }
    }

    /// Re-interprets a [`Capacity`] as a `usize`
    ///
    /// # SAFETY:
    /// * `self` must be less than or equal to [`MAX_VALUE`]
    #[inline(always)]
    pub unsafe fn as_usize(self) -> usize {
        usize::from_le(self.0 & VALID_MASK)
    }

    /// Returns whether or not this [`Capacity`] has a value that indicates the capacity is being
    /// stored on the heap
    #[inline(always)]
    pub fn is_heap(self) -> bool {
        self == CAPACITY_IS_ON_THE_HEAP
    }
}

#[cfg(test)]
mod tests {
    use super::Capacity;

    #[test]
    fn test_zero_roundtrips() {
        let og = 0;
        let cap = Capacity::new(og);
        let after = unsafe { cap.as_usize() };

        assert_eq!(og, after);
    }

    #[test]
    fn test_max_value() {
        let available_bytes = (core::mem::size_of::<usize>() - 1) as u32;
        let max_value = 2usize.pow(available_bytes * 8) - 2;

        #[cfg(target_pointer_width = "64")]
        assert_eq!(max_value, 72057594037927934);
        #[cfg(target_pointer_width = "32")]
        assert_eq!(max_value, 16777214);

        let cap = Capacity::new(max_value);
        let after = unsafe { cap.as_usize() };

        assert_eq!(max_value, after);
    }

    #[cfg(target_pointer_width = "32")]
    #[test]
    fn test_invalid_value() {
        let invalid_val = usize::MAX;
        let cap = Capacity::new(invalid_val);
        let after = unsafe { cap.as_usize() };

        // anything greater than or equal to 16777215, should "resolve" to 16777215
        assert_eq!(16777215, after);
    }

    #[test]
    #[cfg_attr(miri, ignore)]
    fn test_all_valid_32bit_values() {
        #[cfg(target_pointer_width = "32")]
        assert_eq!(16_777_214, super::MAX_VALUE);

        for i in 0..=16_777_214 {
            let cap = Capacity::new(i);
            let val = unsafe { cap.as_usize() };

            assert_eq!(val, i, "value roundtriped to wrong value?");
        }
    }
}