1#[cfg(feature = "alloc")]
2use alloc::{borrow::Cow, string::String};
3use core::{
4 fmt::Display,
5 iter::{FusedIterator, Peekable},
6 str::CharIndices,
7};
8
9#[derive(Debug, Clone, Copy)]
10enum State {
11 Start,
12 S1,
13 S2,
14 S3,
15 S4,
16 S5,
17 S6,
18 S7,
19 S8,
20 S9,
21 S10,
22 S11,
23 Trap,
24}
25
26impl Default for State {
27 fn default() -> Self {
28 Self::Start
29 }
30}
31
32impl State {
33 fn is_final(&self) -> bool {
34 #[allow(clippy::match_like_matches_macro)]
35 match self {
36 Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true,
37 _ => false,
38 }
39 }
40
41 fn is_trapped(&self) -> bool {
42 #[allow(clippy::match_like_matches_macro)]
43 match self {
44 Self::Trap => true,
45 _ => false,
46 }
47 }
48
49 fn transition(&mut self, c: char) {
50 *self = match c {
51 '\u{1b}' | '\u{9b}' => match self {
52 Self::Start => Self::S1,
53 _ => Self::Trap,
54 },
55 '(' | ')' => match self {
56 Self::S1 => Self::S2,
57 Self::S2 | Self::S4 => Self::S4,
58 _ => Self::Trap,
59 },
60 ';' => match self {
61 Self::S1 | Self::S2 | Self::S4 => Self::S4,
62 Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10,
63 _ => Self::Trap,
64 },
65
66 '[' | '#' | '?' => match self {
67 Self::S1 | Self::S2 | Self::S4 => Self::S4,
68 _ => Self::Trap,
69 },
70 '0'..='2' => match self {
71 Self::S1 | Self::S4 => Self::S5,
72 Self::S2 => Self::S3,
73 Self::S5 => Self::S6,
74 Self::S6 => Self::S7,
75 Self::S7 => Self::S8,
76 Self::S8 => Self::S9,
77 Self::S10 => Self::S5,
78 _ => Self::Trap,
79 },
80 '3'..='9' => match self {
81 Self::S1 | Self::S4 => Self::S5,
82 Self::S2 => Self::S5,
83 Self::S5 => Self::S6,
84 Self::S6 => Self::S7,
85 Self::S7 => Self::S8,
86 Self::S8 => Self::S9,
87 Self::S10 => Self::S5,
88 _ => Self::Trap,
89 },
90 'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => {
91 match self {
92 Self::S1
93 | Self::S2
94 | Self::S4
95 | Self::S5
96 | Self::S6
97 | Self::S7
98 | Self::S8
99 | Self::S10 => Self::S11,
100 _ => Self::Trap,
101 }
102 }
103 _ => Self::Trap,
104 };
105 }
106}
107
108#[derive(Debug)]
109struct Matches<'a> {
110 s: &'a str,
111 it: Peekable<CharIndices<'a>>,
112}
113
114impl<'a> Matches<'a> {
115 fn new(s: &'a str) -> Self {
116 let it = s.char_indices().peekable();
117 Self { s, it }
118 }
119}
120
121#[derive(Debug)]
122struct Match<'a> {
123 text: &'a str,
124 start: usize,
125 end: usize,
126}
127
128impl<'a> Match<'a> {
129 #[inline]
130 pub(crate) fn as_str(&self) -> &'a str {
131 &self.text[self.start..self.end]
132 }
133}
134
135impl<'a> Iterator for Matches<'a> {
136 type Item = Match<'a>;
137
138 fn next(&mut self) -> Option<Self::Item> {
139 find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match {
140 text: self.s,
141 start,
142 end,
143 })
144 }
145}
146
147impl FusedIterator for Matches<'_> {}
148
149fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> {
150 'outer: loop {
151 if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? {
152 let start = *start;
153 let mut state = State::default();
154 let mut maybe_end = None;
155
156 loop {
157 let item = it.peek();
158
159 if let Some((idx, c)) = item {
160 state.transition(*c);
161
162 if state.is_final() {
163 maybe_end = Some(*idx);
164 }
165 }
166
167 if state.is_trapped() || item.is_none() {
170 match maybe_end {
171 Some(end) => {
172 return Some((start, end + 1));
175 }
176 None => continue 'outer,
179 }
180 }
181
182 it.next();
183 }
184 }
185
186 it.next();
187 }
188}
189
190#[cfg(feature = "alloc")]
192pub fn strip_ansi_codes(s: &str) -> Cow<'_, str> {
193 let mut char_it = s.char_indices().peekable();
194 match find_ansi_code_exclusive(&mut char_it) {
195 Some(_) => {
196 let stripped: String = AnsiCodeIterator::new(s)
197 .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) })
198 .collect();
199 Cow::Owned(stripped)
200 }
201 None => Cow::Borrowed(s),
202 }
203}
204
205pub struct WithoutAnsi<'a> {
207 str: &'a str,
208}
209
210impl<'a> WithoutAnsi<'a> {
211 pub fn new(str: &'a str) -> Self {
212 Self { str }
213 }
214}
215
216impl Display for WithoutAnsi<'_> {
217 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
218 for (str, is_ansi) in AnsiCodeIterator::new(self.str) {
219 if !is_ansi {
220 f.write_str(str)?;
221 }
222 }
223 Ok(())
224 }
225}
226
227pub struct AnsiCodeIterator<'a> {
234 s: &'a str,
235 pending_item: Option<(&'a str, bool)>,
236 last_idx: usize,
237 cur_idx: usize,
238 iter: Matches<'a>,
239}
240
241impl<'a> AnsiCodeIterator<'a> {
242 pub fn new(s: &'a str) -> AnsiCodeIterator<'a> {
244 AnsiCodeIterator {
245 s,
246 pending_item: None,
247 last_idx: 0,
248 cur_idx: 0,
249 iter: Matches::new(s),
250 }
251 }
252
253 pub fn current_slice(&self) -> &str {
255 &self.s[..self.cur_idx]
256 }
257
258 pub fn rest_slice(&self) -> &str {
260 &self.s[self.cur_idx..]
261 }
262}
263
264impl<'a> Iterator for AnsiCodeIterator<'a> {
265 type Item = (&'a str, bool);
266
267 fn next(&mut self) -> Option<(&'a str, bool)> {
268 if let Some(pending_item) = self.pending_item.take() {
269 self.cur_idx += pending_item.0.len();
270 Some(pending_item)
271 } else if let Some(m) = self.iter.next() {
272 let s = &self.s[self.last_idx..m.start];
273 self.last_idx = m.end;
274 if s.is_empty() {
275 self.cur_idx = m.end;
276 Some((m.as_str(), true))
277 } else {
278 self.cur_idx = m.start;
279 self.pending_item = Some((m.as_str(), true));
280 Some((s, false))
281 }
282 } else if self.last_idx < self.s.len() {
283 let rv = &self.s[self.last_idx..];
284 self.cur_idx = self.s.len();
285 self.last_idx = self.s.len();
286 Some((rv, false))
287 } else {
288 None
289 }
290 }
291}
292
293impl FusedIterator for AnsiCodeIterator<'_> {}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 use core::fmt::Write;
300 use once_cell::sync::Lazy;
301 use proptest::prelude::*;
302 use regex::Regex;
303
304 static STRIP_ANSI_RE: Lazy<Regex> = Lazy::new(|| {
307 Regex::new(
308 r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])",
309 )
310 .unwrap()
311 });
312
313 impl<'a> PartialEq<Match<'a>> for regex::Match<'_> {
314 fn eq(&self, other: &Match<'a>) -> bool {
315 self.start() == other.start && self.end() == other.end
316 }
317 }
318
319 proptest! {
320 #[test]
321 fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") {
322 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
323 let new_matches: Vec<_> = Matches::new(&s).collect();
324 assert_eq!(old_matches, new_matches);
325 }
326 }
327
328 #[test]
329 fn dfa_matches_regex_on_small_strings() {
330 const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C'];
334
335 fn check_all_strings_of_len(len: usize) {
336 _check_all_strings_of_len(len, &mut Vec::with_capacity(len));
337 }
338
339 fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) {
340 if len == 0 {
341 if let Ok(s) = core::str::from_utf8(chunk) {
342 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect();
343 let new_matches: Vec<_> = Matches::new(s).collect();
344 assert_eq!(old_matches, new_matches);
345 }
346
347 return;
348 }
349
350 for b in POSSIBLE_BYTES {
351 chunk.push(*b);
352 _check_all_strings_of_len(len - 1, chunk);
353 chunk.pop();
354 }
355 }
356
357 for str_len in 0..=6 {
358 check_all_strings_of_len(str_len);
359 }
360 }
361
362 #[test]
363 fn complex_data() {
364 let s = std::fs::read_to_string(
365 std::path::Path::new("tests")
366 .join("data")
367 .join("sample_zellij_session.log"),
368 )
369 .unwrap();
370
371 let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect();
372 let new_matches: Vec<_> = Matches::new(&s).collect();
373 assert_eq!(old_matches, new_matches);
374 }
375
376 #[test]
377 fn state_machine() {
378 let ansi_code = "\x1b)B";
379 let mut state = State::default();
380 assert!(!state.is_final());
381
382 for c in ansi_code.chars() {
383 state.transition(c);
384 }
385 assert!(state.is_final());
386
387 state.transition('A');
388 assert!(state.is_trapped());
389 }
390
391 #[test]
392 fn back_to_back_entry_char() {
393 let s = "\x1b\x1bf";
394 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
395 assert_eq!(&["\x1bf"], matches.as_slice());
396 }
397
398 #[test]
399 fn early_paren_can_use_many_chars() {
400 let s = "\x1b(C";
401 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
402 assert_eq!(&[s], matches.as_slice());
403 }
404
405 #[test]
406 fn long_run_of_digits() {
407 let s = "\u{1b}00000";
408 let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect();
409 assert_eq!(&[s], matches.as_slice());
410 }
411
412 #[test]
413 fn test_without_ansi() {
414 let str_with_ansi = "\x1b[1;97;41mError\x1b[0m";
415 let without_ansi = WithoutAnsi::new(str_with_ansi);
416 for _ in 0..2 {
417 let mut output = String::default();
418 write!(output, "{without_ansi}").unwrap();
419 assert_eq!(output, "Error");
420 }
421 }
422
423 #[test]
424 fn test_ansi_iter_re_vt100() {
425 let s = "\x1b(0lpq\x1b)Benglish";
426 let mut iter = AnsiCodeIterator::new(s);
427 assert_eq!(iter.next(), Some(("\x1b(0", true)));
428 assert_eq!(iter.next(), Some(("lpq", false)));
429 assert_eq!(iter.next(), Some(("\x1b)B", true)));
430 assert_eq!(iter.next(), Some(("english", false)));
431 }
432
433 #[test]
434 fn test_ansi_iter_re() {
435 use crate::style;
436 let s = format!("Hello {}!", style("World").red().force_styling(true));
437 let mut iter = AnsiCodeIterator::new(&s);
438 assert_eq!(iter.next(), Some(("Hello ", false)));
439 assert_eq!(iter.current_slice(), "Hello ");
440 assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!");
441 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
442 assert_eq!(iter.current_slice(), "Hello \x1b[31m");
443 assert_eq!(iter.rest_slice(), "World\x1b[0m!");
444 assert_eq!(iter.next(), Some(("World", false)));
445 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld");
446 assert_eq!(iter.rest_slice(), "\x1b[0m!");
447 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
448 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m");
449 assert_eq!(iter.rest_slice(), "!");
450 assert_eq!(iter.next(), Some(("!", false)));
451 assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!");
452 assert_eq!(iter.rest_slice(), "");
453 assert_eq!(iter.next(), None);
454 }
455
456 #[test]
457 fn test_ansi_iter_re_on_multi() {
458 use crate::style;
459 let s = format!("{}", style("a").red().bold().force_styling(true));
460 let mut iter = AnsiCodeIterator::new(&s);
461 assert_eq!(iter.next(), Some(("\x1b[31m", true)));
462 assert_eq!(iter.current_slice(), "\x1b[31m");
463 assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m");
464 assert_eq!(iter.next(), Some(("\x1b[1m", true)));
465 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m");
466 assert_eq!(iter.rest_slice(), "a\x1b[0m");
467 assert_eq!(iter.next(), Some(("a", false)));
468 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma");
469 assert_eq!(iter.rest_slice(), "\x1b[0m");
470 assert_eq!(iter.next(), Some(("\x1b[0m", true)));
471 assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m");
472 assert_eq!(iter.rest_slice(), "");
473 assert_eq!(iter.next(), None);
474 }
475}