console/
ansi.rs

1#[cfg(feature = "alloc")]
2use alloc::{borrow::Cow, string::String};
3use core::{
4    fmt::Display,
5    iter::{FusedIterator, Peekable},
6    str::CharIndices,
7};
8
9#[derive(Debug, Clone, Copy)]
10enum State {
11    Start,
12    S1,
13    S2,
14    S3,
15    S4,
16    S5,
17    S6,
18    S7,
19    S8,
20    S9,
21    S10,
22    S11,
23    Trap,
24}
25
26impl Default for State {
27    fn default() -> Self {
28        Self::Start
29    }
30}
31
32impl State {
33    fn is_final(&self) -> bool {
34        #[allow(clippy::match_like_matches_macro)]
35        match self {
36            Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
37            _ => false,
38        }
39    }
40
41    fn is_trapped(&self) -> bool {
42        #[allow(clippy::match_like_matches_macro)]
43        match self {
44            Self::Trap => true,
45            _ => false,
46        }
47    }
48
49    fn transition(&mut self, c: char) {
50        *self = match c {
51            '\u{1b}' | '\u{9b}' => match self {
52                Self::Start => Self::S1,
53                _ => Self::Trap,
54            },
55            '(' | ')' => match self {
56                Self::S1 => Self::S2,
57                Self::S2 | Self::S4 => Self::S4,
58                _ => Self::Trap,
59            },
60            ';' => match self {
61                Self::S1 | Self::S2 | Self::S4 => Self::S4,
62                Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
63                _ => Self::Trap,
64            },
65
66            '[' | '#' | '?' => match self {
67                Self::S1 | Self::S2 | Self::S4 => Self::S4,
68                _ => Self::Trap,
69            },
70            '0'..='2' => match self {
71                Self::S1 | Self::S4 => Self::S5,
72                Self::S2 => Self::S3,
73                Self::S5 => Self::S6,
74                Self::S6 => Self::S7,
75                Self::S7 => Self::S8,
76                Self::S8 => Self::S9,
77                Self::S10 => Self::S5,
78                _ => Self::Trap,
79            },
80            '3'..='9' => match self {
81                Self::S1 | Self::S4 => Self::S5,
82                Self::S2 => Self::S5,
83                Self::S5 => Self::S6,
84                Self::S6 => Self::S7,
85                Self::S7 => Self::S8,
86                Self::S8 => Self::S9,
87                Self::S10 => Self::S5,
88                _ => Self::Trap,
89            },
90            'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
91                match self {
92                    Self::S1
93                    | Self::S2
94                    | Self::S4
95                    | Self::S5
96                    | Self::S6
97                    | Self::S7
98                    | Self::S8
99                    | Self::S10 => Self::S11,
100                    _ => Self::Trap,
101                }
102            }
103            _ => Self::Trap,
104        };
105    }
106}
107
108#[derive(Debug)]
109struct Matches<'a> {
110    s: &'a str,
111    it: Peekable<CharIndices<'a>>,
112}
113
114impl<'a> Matches<'a> {
115    fn new(s: &'a str) -> Self {
116        let it = s.char_indices().peekable();
117        Self { s, it }
118    }
119}
120
121#[derive(Debug)]
122struct Match<'a> {
123    text: &'a str,
124    start: usize,
125    end: usize,
126}
127
128impl<'a> Match<'a> {
129    #[inline]
130    pub(crate) fn as_str(&self) -> &'a str {
131        &self.text[self.start..self.end]
132    }
133}
134
135impl<'a> Iterator for Matches<'a> {
136    type Item = Match<'a>;
137
138    fn next(&mut self) -> Option<Self::Item> {
139        find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
140            text: self.s,
141            start,
142            end,
143        })
144    }
145}
146
147impl FusedIterator for Matches<'_> {}
148
149fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
150    'outer: loop {
151        if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
152            let start = *start;
153            let mut state = State::default();
154            let mut maybe_end = None;
155
156            loop {
157                let item = it.peek();
158
159                if let Some((idx, c)) = item {
160                    state.transition(*c);
161
162                    if state.is_final() {
163                        maybe_end = Some(*idx);
164                    }
165                }
166
167                // The match is greedy so run till we hit the trap state no matter what. A valid
168                // match is just one that was final at some point
169                if state.is_trapped() || item.is_none() {
170                    match maybe_end {
171                        Some(end) => {
172                            // All possible final characters are a single byte so it's safe to make
173                            // the end exclusive by just adding one
174                            return Some((start, end + 1));
175                        }
176                        // The character we are peeking right now might be the start of a match so
177                        // we want to continue the loop without popping off that char
178                        None => continue 'outer,
179                    }
180                }
181
182                it.next();
183            }
184        }
185
186        it.next();
187    }
188}
189
190/// Helper function to strip ansi codes.
191#[cfg(feature = "alloc")]
192pub fn strip_ansi_codes(s: &str) -> Cow<'_, str> {
193    let mut char_it = s.char_indices().peekable();
194    match find_ansi_code_exclusive(&mut char_it) {
195        Some(_) => {
196            let stripped: String = AnsiCodeIterator::new(s)
197                .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
198                .collect();
199            Cow::Owned(stripped)
200        }
201        None => Cow::Borrowed(s),
202    }
203}
204
205/// A wrapper struct that implements [`core::fmt::Display`], only displaying non-ansi parts.
206pub struct WithoutAnsi<'a> {
207    str: &'a str,
208}
209
210impl<'a> WithoutAnsi<'a> {
211    pub fn new(str: &'a str) -> Self {
212        Self { str }
213    }
214}
215
216impl Display for WithoutAnsi<'_> {
217    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
218        for (str, is_ansi) in AnsiCodeIterator::new(self.str) {
219            if !is_ansi {
220                f.write_str(str)?;
221            }
222        }
223        Ok(())
224    }
225}
226
227/// An iterator over ansi codes in a string.
228///
229/// This type can be used to scan over ansi codes in a string.
230/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of
231/// the original string and `is_ansi` indicates if the slice contains
232/// ansi codes or string values.
233pub struct AnsiCodeIterator<'a> {
234    s: &'a str,
235    pending_item: Option<(&'a str, bool)>,
236    last_idx: usize,
237    cur_idx: usize,
238    iter: Matches<'a>,
239}
240
241impl<'a> AnsiCodeIterator<'a> {
242    /// Creates a new ansi code iterator.
243    pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
244        AnsiCodeIterator {
245            s,
246            pending_item: None,
247            last_idx: 0,
248            cur_idx: 0,
249            iter: Matches::new(s),
250        }
251    }
252
253    /// Returns the string slice up to the current match.
254    pub fn current_slice(&self) -> &str {
255        &self.s[..self.cur_idx]
256    }
257
258    /// Returns the string slice from the current match to the end.
259    pub fn rest_slice(&self) -> &str {
260        &self.s[self.cur_idx..]
261    }
262}
263
264impl<'a> Iterator for AnsiCodeIterator<'a> {
265    type Item = (&'a str, bool);
266
267    fn next(&mut self) -> Option<(&'a str, bool)> {
268        if let Some(pending_item) = self.pending_item.take() {
269            self.cur_idx += pending_item.0.len();
270            Some(pending_item)
271        } else if let Some(m) = self.iter.next() {
272            let s = &self.s[self.last_idx..m.start];
273            self.last_idx = m.end;
274            if s.is_empty() {
275                self.cur_idx = m.end;
276                Some((m.as_str(), true))
277            } else {
278                self.cur_idx = m.start;
279                self.pending_item = Some((m.as_str(), true));
280                Some((s, false))
281            }
282        } else if self.last_idx < self.s.len() {
283            let rv = &self.s[self.last_idx..];
284            self.cur_idx = self.s.len();
285            self.last_idx = self.s.len();
286            Some((rv, false))
287        } else {
288            None
289        }
290    }
291}
292
293impl FusedIterator for AnsiCodeIterator<'_> {}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    use core::fmt::Write;
300    use once_cell::sync::Lazy;
301    use proptest::prelude::*;
302    use regex::Regex;
303
304    // The manual dfa `State` is a handwritten translation from the previously used regex. That
305    // regex is kept here and used to ensure that the new matches are the same as the old
306    static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
307        Regex::new(
308            r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
309        )
310        .unwrap()
311    });
312
313    impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
314        fn eq(&self, other: &Match<'a>) -> bool {
315            self.start() == other.start && self.end() == other.end
316        }
317    }
318
319    proptest! {
320        #[test]
321        fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
322            let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
323            let new_matches: Vec<_> = Matches::new(&s).collect();
324            assert_eq!(old_matches, new_matches);
325        }
326    }
327
328    #[test]
329    fn dfa_matches_regex_on_small_strings() {
330        // To make sure the test runs in a reasonable time this is a slimmed down list of
331        // characters to reduce the groups that are only used with each other along with one
332        // arbitrarily chosen character not used in the regex (' ')
333        const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
334
335        fn check_all_strings_of_len(len: usize) {
336            _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
337        }
338
339        fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
340            if len == 0 {
341                if let Ok(s) = core::str::from_utf8(chunk) {
342                    let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
343                    let new_matches: Vec<_> = Matches::new(s).collect();
344                    assert_eq!(old_matches, new_matches);
345                }
346
347                return;
348            }
349
350            for b in POSSIBLE_BYTES {
351                chunk.push(*b);
352                _check_all_strings_of_len(len - 1, chunk);
353                chunk.pop();
354            }
355        }
356
357        for str_len in 0..=6 {
358            check_all_strings_of_len(str_len);
359        }
360    }
361
362    #[test]
363    fn complex_data() {
364        let s = std::fs::read_to_string(
365            std::path::Path::new("tests")
366                .join("data")
367                .join("sample_zellij_session.log"),
368        )
369        .unwrap();
370
371        let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
372        let new_matches: Vec<_> = Matches::new(&s).collect();
373        assert_eq!(old_matches, new_matches);
374    }
375
376    #[test]
377    fn state_machine() {
378        let ansi_code = "\x1b)B";
379        let mut state = State::default();
380        assert!(!state.is_final());
381
382        for c in ansi_code.chars() {
383            state.transition(c);
384        }
385        assert!(state.is_final());
386
387        state.transition('A');
388        assert!(state.is_trapped());
389    }
390
391    #[test]
392    fn back_to_back_entry_char() {
393        let s = "\x1b\x1bf";
394        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
395        assert_eq!(&["\x1bf"], matches.as_slice());
396    }
397
398    #[test]
399    fn early_paren_can_use_many_chars() {
400        let s = "\x1b(C";
401        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
402        assert_eq!(&[s], matches.as_slice());
403    }
404
405    #[test]
406    fn long_run_of_digits() {
407        let s = "\u{1b}00000";
408        let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
409        assert_eq!(&[s], matches.as_slice());
410    }
411
412    #[test]
413    fn test_without_ansi() {
414        let str_with_ansi = "\x1b[1;97;41mError\x1b[0m";
415        let without_ansi = WithoutAnsi::new(str_with_ansi);
416        for _ in 0..2 {
417            let mut output = String::default();
418            write!(output, "{without_ansi}").unwrap();
419            assert_eq!(output, "Error");
420        }
421    }
422
423    #[test]
424    fn test_ansi_iter_re_vt100() {
425        let s = "\x1b(0lpq\x1b)Benglish";
426        let mut iter = AnsiCodeIterator::new(s);
427        assert_eq!(iter.next(), Some(("\x1b(0", true)));
428        assert_eq!(iter.next(), Some(("lpq", false)));
429        assert_eq!(iter.next(), Some(("\x1b)B", true)));
430        assert_eq!(iter.next(), Some(("english", false)));
431    }
432
433    #[test]
434    fn test_ansi_iter_re() {
435        use crate::style;
436        let s = format!("Hello {}!", style("World").red().force_styling(true));
437        let mut iter = AnsiCodeIterator::new(&s);
438        assert_eq!(iter.next(), Some(("Hello ", false)));
439        assert_eq!(iter.current_slice(), "Hello ");
440        assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
441        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
442        assert_eq!(iter.current_slice(), "Hello \x1b[31m");
443        assert_eq!(iter.rest_slice(), "World\x1b[0m!");
444        assert_eq!(iter.next(), Some(("World", false)));
445        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
446        assert_eq!(iter.rest_slice(), "\x1b[0m!");
447        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
448        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
449        assert_eq!(iter.rest_slice(), "!");
450        assert_eq!(iter.next(), Some(("!", false)));
451        assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
452        assert_eq!(iter.rest_slice(), "");
453        assert_eq!(iter.next(), None);
454    }
455
456    #[test]
457    fn test_ansi_iter_re_on_multi() {
458        use crate::style;
459        let s = format!("{}", style("a").red().bold().force_styling(true));
460        let mut iter = AnsiCodeIterator::new(&s);
461        assert_eq!(iter.next(), Some(("\x1b[31m", true)));
462        assert_eq!(iter.current_slice(), "\x1b[31m");
463        assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
464        assert_eq!(iter.next(), Some(("\x1b[1m", true)));
465        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
466        assert_eq!(iter.rest_slice(), "a\x1b[0m");
467        assert_eq!(iter.next(), Some(("a", false)));
468        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
469        assert_eq!(iter.rest_slice(), "\x1b[0m");
470        assert_eq!(iter.next(), Some(("\x1b[0m", true)));
471        assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
472        assert_eq!(iter.rest_slice(), "");
473        assert_eq!(iter.next(), None);
474    }
475}