From 24f0abe78078a47e855de1d773e19119ee55619d Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Tue, 23 Apr 2024 00:04:05 +0200 Subject: [PATCH] Fix decoding mulitbyte characters after escape prefix Fixes #10457 --- CHANGELOG.rst | 2 +- src/input_common.rs | 43 +++++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index df57de808..3e2269960 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -162,9 +162,9 @@ Completions Improved terminal support ^^^^^^^^^^^^^^^^^^^^^^^^^ - Fish now marks the prompt and command-output regions (via OSC 133) to enable terminal shell integration (:issue:`10352`). + Shell integration shortcuts can scroll to the next/previous prompt or show the last command output in a pager. - Fish now reports the working directory (via OSC 7) unconditionally instead of only for some terminals (:issue:`9955`). - Fish now sets the terminal window title (via OSC 0) unconditionally instead of only for some terminals (:issue:`10037`). - Shell integration shortcuts can scroll to the next/previous prompt or show the last command output in a pager. - Focus reporting in tmux is no longer disabled on the first prompt. Other improvements diff --git a/src/input_common.rs b/src/input_common.rs index 6f5044ed6..d2a77d305 100644 --- a/src/input_common.rs +++ b/src/input_common.rs @@ -21,6 +21,7 @@ use crate::wutil::encoding::{mbrtowc, mbstate_t, zero_mbstate}; use crate::wutil::{fish_wcstol, write_to_fd}; use std::cell::RefCell; use std::collections::VecDeque; +use std::ops::ControlFlow; use std::os::fd::RawFd; use std::ptr; use std::sync::atomic::{AtomicUsize, Ordering}; @@ -363,6 +364,7 @@ fn readb(in_fd: RawFd, blocking: bool) -> ReadbResult { // The terminal has been closed. return ReadbResult::Eof; } + FLOG!(reader, "Read byte {}", arr[0]); // The common path is to return a u8. return ReadbResult::Byte(arr[0]); } @@ -557,7 +559,6 @@ pub trait InputEventQueuer { /// convert them to a wchar_t. Conversion is done using mbrtowc. If a character has previously /// been read and then 'unread' using \c input_common_unreadch, that character is returned. fn readch(&mut self) -> CharEvent { - let mut state = zero_mbstate(); loop { // Do we have something enqueued already? // Note this may be initially true, or it may become true through calls to @@ -610,8 +611,16 @@ pub trait InputEventQueuer { continue; } let mut consumed = 0; - for i in 0..buffer.len() { - self.parse_codepoint( + let mut state = zero_mbstate(); + let mut i = 0; + let ok = loop { + if i == buffer.len() { + buffer.push(match readb(self.get_in_fd(), /*blocking=*/ true) { + ReadbResult::Byte(b) => b, + _ => 0, + }); + } + match self.parse_codepoint( &mut state, &mut key, &mut seq, @@ -619,7 +628,20 @@ pub trait InputEventQueuer { i, &mut consumed, &mut have_escape_prefix, - ); + ) { + ControlFlow::Continue(codepoint_complete) => { + if codepoint_complete && i + 1 == buffer.len() { + break true; + } + } + ControlFlow::Break(()) => { + break false; + } + } + i += 1; + }; + if !ok { + continue; } return if let Some(key) = key { CharEvent::from_key_seq(key, seq) @@ -684,7 +706,7 @@ pub trait InputEventQueuer { i: usize, consumed: &mut usize, have_escape_prefix: &mut bool, - ) { + ) -> ControlFlow<(), bool> { let mut res: char = '\0'; let read_byte = buffer[i]; if crate::libc::MB_CUR_MAX() == 1 { @@ -693,7 +715,7 @@ pub trait InputEventQueuer { // the single-byte locale is compatible with Unicode upper-ASCII. res = read_byte.into(); out_seq.push(res); - return; + return ControlFlow::Continue(true); } let mut codepoint = u32::from(res); let sz = unsafe { @@ -709,17 +731,17 @@ pub trait InputEventQueuer { FLOG!(reader, "Illegal input"); *consumed += 1; self.push_front(CharEvent::from_check_exit()); - return; + return ControlFlow::Break(()); } -2 => { // Sequence not yet complete. - return; + return ControlFlow::Continue(false); } 0 => { // Actual nul char. *consumed += 1; out_seq.push('\0'); - return; + return ControlFlow::Continue(true); } _ => (), } @@ -732,13 +754,14 @@ pub trait InputEventQueuer { } *consumed += 1; out_seq.push(res); - return; + return ControlFlow::Continue(true); } } for &b in &buffer[*consumed..i] { out_seq.push(encode_byte_to_char(b)); *consumed += 1; } + ControlFlow::Continue(true) } fn parse_csi(&mut self, buffer: &mut Vec) -> Option {