mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-20 22:27:08 +08:00
Stop crashing on invalid Unicode input
Unlike C++, Rust requires "char" to be a valid Unicode code point. As a workaround, we take the raw (probably UTF-8-encoded) input and convert each input byte to a char representation from the private use area (see commit 3b15e995e (str2wcs: encode invalid Unicode characters in the private use area, 2023-04-01)). We convert back whenever we output the string, which is correct as long as the encoding didn't change since the data was input. We also need to convert keyboard input; do that. Quick testing shows that our reader drops PUA characters. Since this patch converts both invalid Unicode input as well as PUA input into a safe PUA representation, there's no longer a reason to not add PUA characters to the commandline, so let's do that to restore traditional behavior. Render them as � (REPLACEMENT CHARACTER); unfortunately we show one per input byte instead of one per code point. To fix this we probably need our own char type. While at it, remove some special cases that try to prevent insertion of control characters. I don't think they are necessary. Could be wrong..
This commit is contained in:
parent
f60b6e6cd4
commit
b77d1d0e2b
@ -1,11 +1,11 @@
|
||||
use crate::common::{is_windows_subsystem_for_linux, read_blocked};
|
||||
use crate::common::{fish_reserved_codepoint, is_windows_subsystem_for_linux, read_blocked};
|
||||
use crate::env::{EnvStack, Environment};
|
||||
use crate::fd_readable_set::FdReadableSet;
|
||||
use crate::flog::FLOG;
|
||||
use crate::reader::reader_current_data;
|
||||
use crate::threads::{iothread_port, iothread_service_main};
|
||||
use crate::universal_notifier::default_notifier;
|
||||
use crate::wchar::prelude::*;
|
||||
use crate::wchar::{encode_byte_to_char, prelude::*};
|
||||
use crate::wutil::encoding::{mbrtowc, zero_mbstate};
|
||||
use crate::wutil::fish_wcstol;
|
||||
use std::collections::VecDeque;
|
||||
@ -374,6 +374,8 @@ pub trait InputEventQueuer {
|
||||
fn readch(&mut self) -> CharEvent {
|
||||
let mut res: char = '\0';
|
||||
let mut state = zero_mbstate();
|
||||
let mut bytes = [0; 64 * 16];
|
||||
let mut num_bytes = 0;
|
||||
loop {
|
||||
// Do we have something enqueued already?
|
||||
// Note this may be initially true, or it may become true through calls to
|
||||
@ -415,9 +417,10 @@ pub trait InputEventQueuer {
|
||||
res = read_byte.into();
|
||||
return CharEvent::from_char(res);
|
||||
}
|
||||
let mut codepoint = u32::from(res);
|
||||
let sz = unsafe {
|
||||
mbrtowc(
|
||||
std::ptr::addr_of_mut!(res).cast(),
|
||||
std::ptr::addr_of_mut!(codepoint).cast(),
|
||||
std::ptr::addr_of!(read_byte).cast(),
|
||||
1,
|
||||
&mut state,
|
||||
@ -430,16 +433,30 @@ pub trait InputEventQueuer {
|
||||
}
|
||||
-2 => {
|
||||
// Sequence not yet complete.
|
||||
bytes[num_bytes] = read_byte;
|
||||
num_bytes += 1;
|
||||
continue;
|
||||
}
|
||||
0 => {
|
||||
// Actual nul char.
|
||||
return CharEvent::from_char('\0');
|
||||
}
|
||||
_ => {
|
||||
// Sequence complete.
|
||||
_ => (),
|
||||
}
|
||||
if let Some(res) = char::from_u32(codepoint) {
|
||||
// Sequence complete.
|
||||
if !fish_reserved_codepoint(res) {
|
||||
return CharEvent::from_char(res);
|
||||
}
|
||||
}
|
||||
bytes[num_bytes] = read_byte;
|
||||
num_bytes += 1;
|
||||
for &b in &bytes[1..num_bytes] {
|
||||
let c = CharEvent::from_char(encode_byte_to_char(b));
|
||||
self.push_back(c);
|
||||
}
|
||||
let res = CharEvent::from_char(encode_byte_to_char(bytes[0]));
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,10 +39,10 @@ use crate::ast::{self, Ast, Category, Traversal};
|
||||
use crate::builtins::shared::STATUS_CMD_OK;
|
||||
use crate::color::RgbColor;
|
||||
use crate::common::{
|
||||
escape, escape_string, exit_without_destructors, fish_reserved_codepoint, get_ellipsis_char,
|
||||
get_obfuscation_read_char, redirect_tty_output, scoped_push_replacer, scoped_push_replacer_ctx,
|
||||
shell_modes, str2wcstring, wcs2string, write_loop, EscapeFlags, EscapeStringStyle, ScopeGuard,
|
||||
PROGRAM_NAME, UTF8_BOM_WCHAR,
|
||||
escape, escape_string, exit_without_destructors, get_ellipsis_char, get_obfuscation_read_char,
|
||||
redirect_tty_output, scoped_push_replacer, scoped_push_replacer_ctx, shell_modes, str2wcstring,
|
||||
wcs2string, write_loop, EscapeFlags, EscapeStringStyle, ScopeGuard, PROGRAM_NAME,
|
||||
UTF8_BOM_WCHAR,
|
||||
};
|
||||
use crate::complete::{
|
||||
complete, complete_load, sort_and_prioritize, CompleteFlags, Completion, CompletionList,
|
||||
@ -1844,10 +1844,7 @@ impl ReaderData {
|
||||
&& zelf.active_edit_line().1.position() == 0
|
||||
{
|
||||
// This character is skipped.
|
||||
} else if !fish_reserved_codepoint(c)
|
||||
&& (c >= ' ' || c == '\n' || c == '\r')
|
||||
&& c != '\x7F'
|
||||
{
|
||||
} else {
|
||||
// Regular character.
|
||||
let (elt, _el) = zelf.active_edit_line();
|
||||
zelf.insert_char(elt, c);
|
||||
@ -1857,10 +1854,6 @@ impl ReaderData {
|
||||
// We end history search. We could instead update the search string.
|
||||
zelf.history_search.reset();
|
||||
}
|
||||
} else {
|
||||
// This can happen if the user presses a control char we don't recognize. No
|
||||
// reason to report this to the user unless they've enabled debugging output.
|
||||
FLOG!(reader, wgettext_fmt!("Unknown key binding 0x%X", c));
|
||||
}
|
||||
rls.last_cmd = None;
|
||||
}
|
||||
@ -1976,7 +1969,7 @@ impl ReaderData {
|
||||
self.inputter
|
||||
.read_char(allow_commands.then_some(&mut command_handler))
|
||||
};
|
||||
if !event_is_normal_char(&evt) || !poll_fd_readable(self.conf.inputfd) {
|
||||
if !evt.is_char() || !poll_fd_readable(self.conf.inputfd) {
|
||||
event_needing_handling = Some(evt);
|
||||
break;
|
||||
} else if evt.input_style == CharInputStyle::NotFirst
|
||||
@ -5334,12 +5327,3 @@ impl ReaderData {
|
||||
self.set_buffer_maintaining_pager(&new_command_line, cursor, false);
|
||||
}
|
||||
}
|
||||
|
||||
/// \return true if an event is a normal character that should be inserted into the buffer.
|
||||
fn event_is_normal_char(evt: &CharEvent) -> bool {
|
||||
if !evt.is_char() {
|
||||
return false;
|
||||
}
|
||||
let c = evt.get_char();
|
||||
!fish_reserved_codepoint(c) && c > char::from(31) && c != char::from(127)
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ use std::sync::Mutex;
|
||||
use libc::{ONLCR, STDERR_FILENO, STDOUT_FILENO};
|
||||
|
||||
use crate::common::{
|
||||
get_ellipsis_char, get_omitted_newline_str, get_omitted_newline_width,
|
||||
fish_reserved_codepoint, get_ellipsis_char, get_omitted_newline_str, get_omitted_newline_width,
|
||||
has_working_tty_timestamps, shell_modes, str2wcstring, wcs2string, write_loop, ScopeGuard,
|
||||
ScopeGuarding,
|
||||
};
|
||||
@ -1825,6 +1825,9 @@ fn compute_layout(
|
||||
// \n.
|
||||
// See https://unicode-table.com/en/blocks/control-pictures/
|
||||
fn rendered_character(c: char) -> char {
|
||||
if fish_reserved_codepoint(c) {
|
||||
return '�'; // replacement character
|
||||
}
|
||||
if c <= '\x1F' {
|
||||
char::from_u32(u32::from(c) + 0x2400).unwrap()
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user