compact_str/repr/
iter.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
//! Implementations of the [`FromIterator`] trait to make building [`Repr`]s more ergonomic

use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::string::String;

use super::{
    InlineBuffer,
    Repr,
    EMPTY,
    MAX_SIZE,
};
use crate::{
    CompactString,
    UnwrapWithMsg,
};

impl FromIterator<char> for Repr {
    #[inline]
    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
        let iter = iter.into_iter();

        let (lower_bound, _) = iter.size_hint();
        let mut this = match Repr::with_capacity(lower_bound) {
            Ok(this) => this,
            Err(_) => EMPTY, // Ignore the error and hope that the lower_bound is incorrect.
        };

        for c in iter {
            this.push_str(c.encode_utf8(&mut [0; 4]));
        }
        this
    }
}

impl<'a> FromIterator<&'a char> for Repr {
    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
        iter.into_iter().copied().collect()
    }
}

fn from_as_ref_str_iterator<S, I>(mut iter: I) -> Repr
where
    S: AsRef<str>,
    I: Iterator<Item = S>,
    String: core::iter::Extend<S>,
    String: FromIterator<S>,
{
    // Note: We don't check the lower bound here like we do in the character iterator because it's
    // possible for the iterator to be full of empty strings! In which case checking the lower bound
    // could cause us to heap allocate when there's no need.

    // Continuously pull strings from the iterator
    let mut curr_len = 0;
    let mut inline_buf = InlineBuffer::new_const("");
    while let Some(s) = iter.next() {
        let str_slice = s.as_ref();
        let bytes_len = str_slice.len();

        // this new string is too large to fit into our inline buffer, so heap allocate the rest
        if bytes_len + curr_len > MAX_SIZE {
            let (min_remaining, _) = iter.size_hint();
            let mut string = String::with_capacity(bytes_len + curr_len + min_remaining);

            // push existing strings onto the heap
            // SAFETY: `inline_buf` has been filled with `&str`s which are valid UTF-8
            string.push_str(unsafe { core::str::from_utf8_unchecked(&inline_buf.0[..curr_len]) });
            // push current string onto the heap
            string.push_str(str_slice);
            // extend heap with remaining strings
            string.extend(iter);

            return Repr::from_string(string, true).unwrap_with_msg();
        }

        // write the current string into a slice of the unoccupied space
        inline_buf.0[curr_len..][..bytes_len].copy_from_slice(str_slice.as_bytes());
        curr_len += bytes_len;
    }

    // SAFETY: Everything we just pushed onto the buffer is a `str` which is valid UTF-8
    unsafe { inline_buf.set_len(curr_len) }

    Repr::from_inline(inline_buf)
}

impl<'a> FromIterator<&'a str> for Repr {
    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<Box<str>> for Repr {
    fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<String> for Repr {
    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl FromIterator<CompactString> for Repr {
    fn from_iter<T: IntoIterator<Item = CompactString>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

impl<'a> FromIterator<Cow<'a, str>> for Repr {
    fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
        from_as_ref_str_iterator(iter.into_iter())
    }
}

#[cfg(test)]
mod tests {
    use alloc::string::String;

    use super::Repr;

    #[test]
    fn short_char_iter() {
        let chars = ['a', 'b', 'c'];
        let repr: Repr = chars.iter().collect();

        assert_eq!(repr.as_str(), "abc");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn short_char_ref_iter() {
        let chars = ['a', 'b', 'c'];
        let repr: Repr = chars.iter().collect();

        assert_eq!(repr.as_str(), "abc");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    #[cfg_attr(target_pointer_width = "32", ignore)]
    fn packed_char_iter() {
        let chars = [
            '\u{92f01}',
            '\u{81515}',
            '\u{81515}',
            '\u{81515}',
            '\u{81515}',
            '\u{41515}',
        ];

        let repr: Repr = chars.iter().collect();
        let s: String = chars.iter().collect();

        assert_eq!(repr.as_str(), s.as_str());
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn long_char_iter() {
        let long = "This is supposed to be a really long string";
        let repr: Repr = long.chars().collect();

        assert_eq!(repr.as_str(), "This is supposed to be a really long string");
        assert!(repr.is_heap_allocated());
    }

    #[test]
    fn short_string_iter() {
        let strings = vec!["hello", "world"];
        let repr: Repr = strings.into_iter().collect();

        assert_eq!(repr.as_str(), "helloworld");
        assert!(!repr.is_heap_allocated());
    }

    #[test]
    fn long_short_string_iter() {
        let strings = vec![
            "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
            "17", "18", "19", "20",
        ];
        let repr: Repr = strings.into_iter().collect();

        assert_eq!(repr.as_str(), "1234567891011121314151617181920");
        assert!(repr.is_heap_allocated());
    }
}