From c51580960ddb14de229de7bf52630721113e5c83 Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Fri, 7 Mar 2025 16:30:58 +0900 Subject: [PATCH 1/5] Add support for custom parsing of APC, SOS and PM sequences Fixes #109 --- src/lib.rs | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 153 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1c69123..3b8b3c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -179,7 +179,9 @@ impl Parser { State::Escape => self.advance_esc(performer, byte), State::EscapeIntermediate => self.advance_esc_intermediate(performer, byte), State::OscString => self.advance_osc_string(performer, byte), - State::SosPmApcString => self.anywhere(performer, byte), + State::SosString => self.advance_opaque_string(SosDispatch(performer), byte), + State::ApcString => self.advance_opaque_string(ApcDispatch(performer), byte), + State::PmString => self.advance_opaque_string(PmDispatch(performer), byte), State::Ground => unreachable!(), } } @@ -356,7 +358,12 @@ impl Parser { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); self.state = State::Ground }, - 0x58 => self.state = State::SosPmApcString, + 0x58 => { + self.state = { + performer.sos_start(); + State::SosString + } + }, 0x59..=0x5A => { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); self.state = State::Ground @@ -374,7 +381,14 @@ impl Parser { self.osc_num_params = 0; self.state = State::OscString }, - 0x5E..=0x5F => self.state = State::SosPmApcString, + 0x5E => { + performer.pm_start(); + self.state = State::PmString + }, + 0x5F => { + performer.apc_start(); + self.state = State::ApcString + }, 0x60..=0x7E => { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); self.state = State::Ground @@ -434,6 +448,41 @@ impl Parser { } } + #[inline(always)] + fn advance_opaque_string(&mut self, mut dispatcher: D, byte: u8) { + match byte { + 0x07 => { + // The standard only supports ST-terminated SOS/APC/PM strings, using either + // ESC-ST (ESC-\) and C1-ST (0x9C), but kitty (and probably some other + // terminals) also support bell-terminated strings. Some + // terminals (including Kitty), do not support C1-ST (0x9C) as a + // terminator, which means every character from 0x20-0xFF can be + // used with this sequence in theory. + dispatcher.opaque_end(); + self.state = State::Ground + }, + 0x18 | 0x1A => { + // XTerm terminates SOS/APC/PM strings on C1 CAN (^X) and SUB (^Z). This is also + // the same behavior we implement for OSC strings. + dispatcher.opaque_end(); + dispatcher.execute(byte); + self.state = State::Ground + }, + 0x1B => { + // Any escape code ends the SOS/APC/PM string. This is not standard behavior, + // but avoids having to keep additional state. + dispatcher.opaque_end(); + self.state = State::Escape + }, + 0x20..=0xFF => { + // Only dispatch valid characters. + dispatcher.opaque_put(byte) + }, + // Ignore all other control codes + _ => (), + } + } + #[inline(always)] fn anywhere(&mut self, performer: &mut P, byte: u8) { match byte { @@ -743,7 +792,9 @@ enum State { Escape, EscapeIntermediate, OscString, - SosPmApcString, + SosString, + ApcString, + PmString, #[default] Ground, } @@ -811,6 +862,41 @@ pub trait Perform { /// subsequent characters were ignored. fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} + /// Invoked when the beginning of a new SOS (Start of String) sequence is + /// encountered. + fn sos_start(&mut self) {} + + /// Invoked when the beginning of a new APC (Application Program Command) + /// sequence is encountered. + fn apc_start(&mut self) {} + + /// Invoked when the beginning of a new PM (Privacy Message) sequence is + /// encountered. + fn pm_start(&mut self) {} + + /// Invoked for every valid character (0x20-0xFF) in a SOS (Start of String) + /// sequence. + fn sos_dispatch(&mut self, _byte: u8) {} + + /// Invoked for every valid character (0x20-0xFF) in an APC (Application + /// Program Command) sequence. + fn apc_dispatch(&mut self, _byte: u8) {} + + /// Invoked for every valid character (0x20-0xFF) in a PM (Privacy Message) + /// sequence. + fn pm_dispatch(&mut self, _byte: u8) {} + + /// Invoked when the end of an SOS (Start of String) sequence is + /// encountered. + fn sos_string_end(&mut self) {} + + /// Invoked when the end of an APC (Application Program Command) sequence is + /// encountered. + fn apc_string_end(&mut self) {} + + /// Invoked when the end of a PM (Privacy Message) sequence is encountered. + fn pm_string_end(&mut self) {} + /// Whether the parser should terminate prematurely. /// /// This can be used in conjunction with @@ -825,6 +911,69 @@ pub trait Perform { } } +trait OpaqueDispatch { + fn execute(&mut self, byte: u8); + fn opaque_put(&mut self, byte: u8); + fn opaque_end(&mut self); +} + +struct SosDispatch<'a, P: Perform>(&'a mut P); + +impl OpaqueDispatch for SosDispatch<'_, P> { + #[inline(always)] + fn execute(&mut self, byte: u8) { + self.0.execute(byte); + } + + #[inline(always)] + fn opaque_put(&mut self, byte: u8) { + self.0.sos_dispatch(byte); + } + + #[inline(always)] + fn opaque_end(&mut self) { + self.0.sos_string_end(); + } +} + +struct ApcDispatch<'a, P: Perform>(&'a mut P); + +impl OpaqueDispatch for ApcDispatch<'_, P> { + #[inline(always)] + fn execute(&mut self, byte: u8) { + self.0.execute(byte); + } + + #[inline(always)] + fn opaque_put(&mut self, byte: u8) { + self.0.apc_dispatch(byte); + } + + #[inline(always)] + fn opaque_end(&mut self) { + self.0.apc_string_end(); + } +} + +struct PmDispatch<'a, P: Perform>(&'a mut P); + +impl OpaqueDispatch for PmDispatch<'_, P> { + #[inline(always)] + fn execute(&mut self, byte: u8) { + self.0.execute(byte); + } + + #[inline(always)] + fn opaque_put(&mut self, byte: u8) { + self.0.pm_dispatch(byte); + } + + #[inline(always)] + fn opaque_end(&mut self) { + self.0.pm_string_end(); + } +} + #[cfg(all(test, not(feature = "std")))] #[macro_use] extern crate std; From 907eed357b1218c720aa52ffb43d118800466e0d Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Wed, 12 Mar 2025 11:28:02 +0900 Subject: [PATCH 2/5] Remove unnecessary block --- src/lib.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3b8b3c3..c94a224 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -359,10 +359,8 @@ impl Parser { self.state = State::Ground }, 0x58 => { - self.state = { - performer.sos_start(); - State::SosString - } + performer.sos_start(); + self.state = State::SosString }, 0x59..=0x5A => { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); From 4d31609be0fcf6bb4b556eb7f1c27b296ed749e8 Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Wed, 12 Mar 2025 11:30:00 +0900 Subject: [PATCH 3/5] Rename and rearrange sos/pm/apc methods --- src/lib.rs | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c94a224..b4f1147 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -864,36 +864,35 @@ pub trait Perform { /// encountered. fn sos_start(&mut self) {} - /// Invoked when the beginning of a new APC (Application Program Command) - /// sequence is encountered. - fn apc_start(&mut self) {} + /// Invoked for every valid byte (0x20-0xFF) in a SOS (Start of String) + /// sequence. + fn sos_put(&mut self, _byte: u8) {} + + /// Invoked when the end of an SOS (Start of String) sequence is + /// encountered. + fn sos_end(&mut self) {} /// Invoked when the beginning of a new PM (Privacy Message) sequence is /// encountered. fn pm_start(&mut self) {} - /// Invoked for every valid character (0x20-0xFF) in a SOS (Start of String) + /// Invoked for every valid byte (0x20-0xFF) in a PM (Privacy Message) /// sequence. - fn sos_dispatch(&mut self, _byte: u8) {} + fn pm_put(&mut self, _byte: u8) {} - /// Invoked for every valid character (0x20-0xFF) in an APC (Application - /// Program Command) sequence. - fn apc_dispatch(&mut self, _byte: u8) {} - - /// Invoked for every valid character (0x20-0xFF) in a PM (Privacy Message) - /// sequence. - fn pm_dispatch(&mut self, _byte: u8) {} + /// Invoked when the end of a PM (Privacy Message) sequence is encountered. + fn pm_end(&mut self) {} - /// Invoked when the end of an SOS (Start of String) sequence is - /// encountered. - fn sos_string_end(&mut self) {} + /// Invoked when the beginning of a new APC (Application Program Command) + /// sequence is encountered. + fn apc_start(&mut self) {} + /// Invoked for every valid byte (0x20-0xFF) in an APC (Application Program + /// Command) sequence. + fn apc_put(&mut self, _byte: u8) {} /// Invoked when the end of an APC (Application Program Command) sequence is /// encountered. - fn apc_string_end(&mut self) {} - - /// Invoked when the end of a PM (Privacy Message) sequence is encountered. - fn pm_string_end(&mut self) {} + fn apc_end(&mut self) {} /// Whether the parser should terminate prematurely. /// @@ -925,12 +924,12 @@ impl OpaqueDispatch for SosDispatch<'_, P> { #[inline(always)] fn opaque_put(&mut self, byte: u8) { - self.0.sos_dispatch(byte); + self.0.sos_put(byte); } #[inline(always)] fn opaque_end(&mut self) { - self.0.sos_string_end(); + self.0.sos_end(); } } @@ -944,12 +943,12 @@ impl OpaqueDispatch for ApcDispatch<'_, P> { #[inline(always)] fn opaque_put(&mut self, byte: u8) { - self.0.apc_dispatch(byte); + self.0.apc_put(byte); } #[inline(always)] fn opaque_end(&mut self) { - self.0.apc_string_end(); + self.0.apc_end(); } } @@ -963,12 +962,12 @@ impl OpaqueDispatch for PmDispatch<'_, P> { #[inline(always)] fn opaque_put(&mut self, byte: u8) { - self.0.pm_dispatch(byte); + self.0.pm_put(byte); } #[inline(always)] fn opaque_end(&mut self) { - self.0.pm_string_end(); + self.0.pm_end(); } } From 4277fbce9b30a616c5190c3a769dc18648ae76b5 Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Wed, 12 Mar 2025 11:31:57 +0900 Subject: [PATCH 4/5] Remove and update comments --- src/lib.rs | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b4f1147..7eb1336 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -450,33 +450,20 @@ impl Parser { fn advance_opaque_string(&mut self, mut dispatcher: D, byte: u8) { match byte { 0x07 => { - // The standard only supports ST-terminated SOS/APC/PM strings, using either - // ESC-ST (ESC-\) and C1-ST (0x9C), but kitty (and probably some other - // terminals) also support bell-terminated strings. Some - // terminals (including Kitty), do not support C1-ST (0x9C) as a - // terminator, which means every character from 0x20-0xFF can be - // used with this sequence in theory. dispatcher.opaque_end(); self.state = State::Ground }, 0x18 | 0x1A => { - // XTerm terminates SOS/APC/PM strings on C1 CAN (^X) and SUB (^Z). This is also - // the same behavior we implement for OSC strings. dispatcher.opaque_end(); dispatcher.execute(byte); self.state = State::Ground }, 0x1B => { - // Any escape code ends the SOS/APC/PM string. This is not standard behavior, - // but avoids having to keep additional state. dispatcher.opaque_end(); self.state = State::Escape }, - 0x20..=0xFF => { - // Only dispatch valid characters. - dispatcher.opaque_put(byte) - }, - // Ignore all other control codes + 0x20..=0xFF => dispatcher.opaque_put(byte), + // Ignore all other control bytes. _ => (), } } @@ -908,6 +895,10 @@ pub trait Perform { } } +/// This trait is used internally to provide a common implementation for Opaque +/// Sequences (SOS, APC, PM). Implementations of this trait will just forward +/// calls to the equivalent method on [Perform]. Implementations of this trait +/// are always inlined to avoid overhead. trait OpaqueDispatch { fn execute(&mut self, byte: u8); fn opaque_put(&mut self, byte: u8); From 37413f013a27db77598772825e0d3a3b030fc9ac Mon Sep 17 00:00:00 2001 From: Boaz Yaniv Date: Wed, 12 Mar 2025 13:35:45 +0900 Subject: [PATCH 5/5] Add unit tests for SOS, PM and APC sequences --- src/lib.rs | 147 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 7eb1336..d79cbc7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -979,12 +979,21 @@ mod tests { b'c', b'r', b'i', b't', b't', b'y', 0x07, // End OSC ]; + const ST_ESC_SEQUENCE: &[Sequence] = &[Sequence::Esc(vec![], false, 0x5C)]; + #[derive(Default)] struct Dispatcher { dispatched: Vec, } - #[derive(Debug, PartialEq, Eq)] + #[derive(Copy, Clone, Debug, PartialEq, Eq)] + enum OpaqueSequenceKind { + Sos, + Pm, + Apc, + } + + #[derive(Clone, Debug, PartialEq, Eq)] enum Sequence { Osc(Vec>, bool), Csi(Vec>, Vec, bool, char), @@ -993,6 +1002,9 @@ mod tests { DcsPut(u8), Print(char), Execute(u8), + OpaqueStart(OpaqueSequenceKind), + OpaquePut(OpaqueSequenceKind, u8), + OpaqueEnd(OpaqueSequenceKind), DcsUnhook, } @@ -1034,6 +1046,42 @@ mod tests { fn execute(&mut self, byte: u8) { self.dispatched.push(Sequence::Execute(byte)); } + + fn sos_start(&mut self) { + self.dispatched.push(Sequence::OpaqueStart(OpaqueSequenceKind::Sos)); + } + + fn sos_put(&mut self, byte: u8) { + self.dispatched.push(Sequence::OpaquePut(OpaqueSequenceKind::Sos, byte)); + } + + fn sos_end(&mut self) { + self.dispatched.push(Sequence::OpaqueEnd(OpaqueSequenceKind::Sos)); + } + + fn pm_start(&mut self) { + self.dispatched.push(Sequence::OpaqueStart(OpaqueSequenceKind::Pm)); + } + + fn pm_put(&mut self, byte: u8) { + self.dispatched.push(Sequence::OpaquePut(OpaqueSequenceKind::Pm, byte)); + } + + fn pm_end(&mut self) { + self.dispatched.push(Sequence::OpaqueEnd(OpaqueSequenceKind::Pm)); + } + + fn apc_start(&mut self) { + self.dispatched.push(Sequence::OpaqueStart(OpaqueSequenceKind::Apc)); + } + + fn apc_put(&mut self, byte: u8) { + self.dispatched.push(Sequence::OpaquePut(OpaqueSequenceKind::Apc, byte)); + } + + fn apc_end(&mut self) { + self.dispatched.push(Sequence::OpaqueEnd(OpaqueSequenceKind::Apc)); + } } #[test] @@ -1523,6 +1571,103 @@ mod tests { } } + fn expect_opaque_sequence( + input: &[u8], + kind: OpaqueSequenceKind, + expected_payload: &[u8], + expected_trailer: &[Sequence], + ) { + let mut expected_dispatched: Vec = vec![Sequence::OpaqueStart(kind)]; + for byte in expected_payload { + expected_dispatched.push(Sequence::OpaquePut(kind, *byte)); + } + expected_dispatched.push(Sequence::OpaqueEnd(kind)); + for item in expected_trailer { + expected_dispatched.push(item.clone()); + } + + let mut dispatcher = Dispatcher::default(); + let mut parser = Parser::new(); + parser.advance(&mut dispatcher, input); + + assert_eq!(dispatcher.dispatched, expected_dispatched); + } + + #[test] + fn sos_c0_st_terminated() { + expect_opaque_sequence( + b"\x1bXTest\x20\xFF;xyz\x1b\\", + OpaqueSequenceKind::Sos, + b"Test\x20\xFF;xyz", + ST_ESC_SEQUENCE, + ); + } + + #[test] + fn sos_bell_terminated() { + expect_opaque_sequence( + b"\x1bXTest\x20\xFF;xyz\x07", + OpaqueSequenceKind::Sos, + b"Test\x20\xFF;xyz", + &[], + ); + } + + #[test] + fn sos_empty() { + expect_opaque_sequence(b"\x1bX\x1b\\", OpaqueSequenceKind::Sos, &[], ST_ESC_SEQUENCE); + } + + #[test] + fn pm_c0_st_terminated() { + expect_opaque_sequence( + b"\x1b^Test\x20\xFF;xyz\x1b\\", + OpaqueSequenceKind::Pm, + b"Test\x20\xFF;xyz", + ST_ESC_SEQUENCE, + ); + } + + #[test] + fn pm_bell_terminated() { + expect_opaque_sequence( + b"\x1b^Test\x20\xFF;xyz\x07", + OpaqueSequenceKind::Pm, + b"Test\x20\xFF;xyz", + &[], + ); + } + + #[test] + fn pm_empty() { + expect_opaque_sequence(b"\x1b^\x1b\\", OpaqueSequenceKind::Pm, &[], ST_ESC_SEQUENCE); + } + + #[test] + fn apc_c0_st_terminated() { + expect_opaque_sequence( + b"\x1b_Test\x20\xFF;xyz\x1b\\", + OpaqueSequenceKind::Apc, + b"Test\x20\xFF;xyz", + ST_ESC_SEQUENCE, + ); + } + + #[test] + fn apc_bell_terminated() { + expect_opaque_sequence( + b"\x1b_Test\x20\xFF;xyz\x07", + OpaqueSequenceKind::Apc, + b"Test\x20\xFF;xyz", + &[], + ); + } + + #[test] + fn apc_empty() { + expect_opaque_sequence(b"\x1b_\x1b\\", OpaqueSequenceKind::Apc, &[], ST_ESC_SEQUENCE); + } + #[test] fn unicode() { const INPUT: &[u8] = b"\xF0\x9F\x8E\x89_\xF0\x9F\xA6\x80\xF0\x9F\xA6\x80_\xF0\x9F\x8E\x89";