From 6569943cb82275b41854c9f268d1ec50f7a9f9cd Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Sat, 16 Sep 2023 08:36:39 +0200 Subject: [PATCH] Port builtin read --- CMakeLists.txt | 1 - fish-rust/src/builtins/read.rs | 797 ++++++++++++++++++++++++++++++++- fish-rust/src/ffi.rs | 2 - src/builtins/read.cpp | 672 --------------------------- src/builtins/read.h | 13 - src/ffi_baggage.h | 2 - 6 files changed, 795 insertions(+), 692 deletions(-) delete mode 100644 src/builtins/read.cpp delete mode 100644 src/builtins/read.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e209aadb2..30ffdc43d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,7 +101,6 @@ endif() set(FISH_BUILTIN_SRCS src/builtins/bind.cpp src/builtins/commandline.cpp - src/builtins/read.cpp src/builtins/ulimit.cpp ) # List of other sources. diff --git a/fish-rust/src/builtins/read.rs b/fish-rust/src/builtins/read.rs index 5f9c4c33a..8dd4c5543 100644 --- a/fish-rust/src/builtins/read.rs +++ b/fish-rust/src/builtins/read.rs @@ -1,7 +1,800 @@ //! Implementation of the read builtin. use super::prelude::*; +use crate::common::escape; +use crate::common::read_blocked; +use crate::common::scoped_push_replacer; +use crate::common::str2wcstring; +use crate::common::unescape_string; +use crate::common::valid_var_name; +use crate::common::UnescapeStringStyle; +use crate::common::EMPTY_STRING; +use crate::compat::MB_CUR_MAX; +use crate::env::EnvMode; +use crate::env::Environment; +use crate::env::READ_BYTE_LIMIT; +use crate::env::{EnvVar, EnvVarFlags}; +use crate::ffi; +use crate::reader::ReaderConfig; +use crate::reader::{reader_pop, reader_push, reader_readline}; +use crate::tokenizer::Tokenizer; +use crate::tokenizer::TOK_ACCEPT_UNFINISHED; +use crate::wcstringutil::split_about; +use crate::wcstringutil::split_string_tok; +use crate::wutil; +use crate::wutil::encoding::mbrtowc; +use crate::wutil::encoding::zero_mbstate; +use crate::wutil::perror; +use libc::SEEK_CUR; +use std::os::fd::RawFd; +use std::sync::atomic::Ordering; -pub fn read(parser: &Parser, streams: &mut IoStreams, args: &mut [&wstr]) -> Option { - run_builtin_ffi(crate::ffi::builtin_read, parser, streams, args) +#[derive(Default)] +struct Options { + print_help: bool, + place: EnvMode, + prompt_cmd: WString, + prompt: Option, + prompt_str: Option, + right_prompt: WString, + commandline: WString, + // If a delimiter was given. Used to distinguish between the default + // empty string and a given empty delimiter. + delimiter: Option, + tokenize: bool, + shell: bool, + array: bool, + silent: bool, + split_null: bool, + to_stdout: bool, + nchars: i32, + one_line: bool, +} + +impl Options { + fn new() -> Self { + Options { + place: EnvMode::USER, + ..Default::default() + } + } +} + +const SHORT_OPTIONS: &wstr = L!(":ac:d:fghiLln:p:sStuxzP:UR:L"); +const LONG_OPTIONS: &[woption] = &[ + wopt(L!("array"), woption_argument_t::no_argument, 'a'), + wopt(L!("command"), woption_argument_t::required_argument, 'c'), + wopt(L!("delimiter"), woption_argument_t::required_argument, 'd'), + wopt(L!("export"), woption_argument_t::no_argument, 'x'), + wopt(L!("function"), woption_argument_t::no_argument, 'f'), + wopt(L!("global"), woption_argument_t::no_argument, 'g'), + wopt(L!("help"), woption_argument_t::no_argument, 'h'), + wopt(L!("line"), woption_argument_t::no_argument, 'L'), + wopt(L!("list"), woption_argument_t::no_argument, 'a'), + wopt(L!("local"), woption_argument_t::no_argument, 'l'), + wopt(L!("nchars"), woption_argument_t::required_argument, 'n'), + wopt(L!("null"), woption_argument_t::no_argument, 'z'), + wopt(L!("prompt"), woption_argument_t::required_argument, 'p'), + wopt(L!("prompt-str"), woption_argument_t::required_argument, 'P'), + wopt( + L!("right-prompt"), + woption_argument_t::required_argument, + 'R', + ), + wopt(L!("shell"), woption_argument_t::no_argument, 'S'), + wopt(L!("silent"), woption_argument_t::no_argument, 's'), + wopt(L!("tokenize"), woption_argument_t::no_argument, 't'), + wopt(L!("unexport"), woption_argument_t::no_argument, 'u'), + wopt(L!("universal"), woption_argument_t::no_argument, 'U'), +]; + +fn parse_cmd_opts( + args: &mut [&wstr], + parser: &Parser, + streams: &mut IoStreams, +) -> Result<(Options, usize), Option> { + let cmd = args[0]; + let mut opts = Options::new(); + let mut w = wgetopter_t::new(SHORT_OPTIONS, LONG_OPTIONS, args); + while let Some(opt) = w.wgetopt_long() { + match opt { + 'a' => { + opts.array = true; + } + 'c' => { + opts.commandline = w.woptarg.unwrap().to_owned(); + } + 'd' => { + opts.delimiter = Some(w.woptarg.unwrap().to_owned()); + } + 'i' => { + streams.err.append(wgettext_fmt!( + concat!( + "%ls: usage of -i for --silent is deprecated. Please ", + "use -s or --silent instead.\n" + ), + cmd + )); + return Err(STATUS_INVALID_ARGS); + } + 'f' => { + opts.place |= EnvMode::FUNCTION; + } + 'g' => { + opts.place |= EnvMode::GLOBAL; + } + 'h' => { + opts.print_help = true; + } + 'L' => { + opts.one_line = true; + } + 'l' => { + opts.place |= EnvMode::LOCAL; + } + 'n' => { + opts.nchars = match fish_wcstoi(w.woptarg.unwrap()) { + Ok(n) => n, + Err(err) => { + if err == wutil::Error::Overflow { + streams.err.append(wgettext_fmt!( + "%ls: Argument '%ls' is out of range\n", + cmd, + w.woptarg.unwrap() + )); + builtin_print_error_trailer(parser, streams.err, cmd); + return Err(STATUS_INVALID_ARGS); + } + + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_NOT_NUMBER, + cmd, + w.woptarg.unwrap() + )); + builtin_print_error_trailer(parser, streams.err, cmd); + return Err(STATUS_INVALID_ARGS); + } + } + } + 'P' => { + opts.prompt_str = Some(w.woptarg.unwrap().to_owned()); + } + 'p' => { + opts.prompt = Some(w.woptarg.unwrap().to_owned()); + } + 'R' => { + opts.right_prompt = w.woptarg.unwrap().to_owned(); + } + 's' => { + opts.silent = true; + } + 'S' => { + opts.shell = true; + } + 't' => { + opts.tokenize = true; + } + 'U' => { + opts.place |= EnvMode::UNIVERSAL; + } + 'u' => { + opts.place |= EnvMode::UNEXPORT; + } + 'x' => { + opts.place |= EnvMode::EXPORT; + } + 'z' => { + opts.split_null = true; + } + ':' => { + builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], true); + return Err(STATUS_INVALID_ARGS); + } + '?' => { + builtin_unknown_option(parser, streams, cmd, args[w.woptind - 1], true); + return Err(STATUS_INVALID_ARGS); + } + _ => { + panic!("unexpected retval from wgetopt_long"); + } + } + } + + Ok((opts, w.woptind)) +} + +/// Read from the tty. This is only valid when the stream is stdin and it is attached to a tty and +/// we weren't asked to split on null characters. +fn read_interactive( + parser: &Parser, + buff: &mut WString, + nchars: i32, + shell: bool, + silent: bool, + prompt: &wstr, + right_prompt: &wstr, + commandline: &wstr, + inputfd: RawFd, +) -> Option { + let mut exit_res = STATUS_CMD_OK; + + // Construct a configuration. + let mut conf = ReaderConfig::default(); + conf.complete_ok = shell; + conf.highlight_ok = shell; + conf.syntax_check_ok = shell; + + // No autosuggestions or abbreviations in builtin_read. + conf.autosuggest_ok = false; + conf.expand_abbrev_ok = false; + + conf.exit_on_interrupt = true; + conf.in_silent_mode = silent; + + conf.left_prompt_cmd = prompt.to_owned(); + conf.right_prompt_cmd = right_prompt.to_owned(); + conf.event = L!("fish_read"); + + conf.inputfd = inputfd; + + // Keep in-memory history only. + reader_push(parser, L!(""), conf); + ffi::commandline_set_buffer_ffi(&commandline.to_ffi(), usize::MAX); + + let mline = { + let _interactive = scoped_push_replacer( + |new_value| std::mem::replace(&mut parser.libdata_mut().pods.is_interactive, new_value), + true, + ); + + reader_readline(nchars) + }; + if let Some(line) = mline { + *buff = line; + if nchars > 0 && usize::try_from(nchars).unwrap() < buff.len() { + // Line may be longer than nchars if a keybinding used `commandline -i` + // note: we're deliberately throwing away the tail of the commandline. + // It shouldn't be unread because it was produced with `commandline -i`, + // not typed. + buff.truncate(usize::try_from(nchars).unwrap()); + } + } else { + exit_res = STATUS_CMD_ERROR; + } + reader_pop(); + exit_res +} + +/// Bash uses 128 bytes for its chunk size. Very informal testing I did suggested that a smaller +/// chunk size performed better. However, we're going to use the bash value under the assumption +/// they've done more extensive testing. +const READ_CHUNK_SIZE: usize = 128; + +/// Read from the fd in chunks until we see newline or null, as requested, is seen. This is only +/// used when the fd is seekable (so not from a tty or pipe) and we're not reading a specific number +/// of chars. +/// +/// Returns an exit status. +fn read_in_chunks(fd: RawFd, buff: &mut WString, split_null: bool, do_seek: bool) -> Option { + let mut exit_res = STATUS_CMD_OK; + let mut narrow_buff = vec![]; + let mut eof = false; + let mut finished = false; + + while !finished { + let mut inbuf = [0_u8; READ_CHUNK_SIZE]; + let bytes_read = read_blocked(fd, &mut inbuf); + + if bytes_read <= 0 { + eof = true; + break; + } + let bytes_read = bytes_read as usize; + + let bytes_consumed = inbuf[..bytes_read] + .iter() + .position(|c| *c == if split_null { b'\0' } else { b'\n' }) + .unwrap_or(bytes_read); + assert!(bytes_consumed <= bytes_read); + narrow_buff.extend_from_slice(&inbuf[..bytes_consumed]); + if bytes_consumed < bytes_read { + // We found a splitter. The +1 because we need to treat the splitter as consumed, but + // not append it to the string. + if do_seek + && unsafe { + libc::lseek( + fd, + libc::off_t::try_from( + isize::try_from(bytes_consumed).unwrap() - (bytes_read as isize) + 1, + ) + .unwrap(), + SEEK_CUR, + ) + } == -1 + { + perror("lseek"); + return STATUS_CMD_ERROR; + } + finished = true; + } else if narrow_buff.len() > READ_BYTE_LIMIT.load(Ordering::Relaxed) { + exit_res = STATUS_READ_TOO_MUCH; + finished = true; + } + } + + *buff = str2wcstring(&narrow_buff); + if buff.is_empty() && eof { + exit_res = STATUS_CMD_ERROR; + } + + exit_res +} + +/// Read from the fd on char at a time until we've read the requested number of characters or a +/// newline or null, as appropriate, is seen. This is inefficient so should only be used when the +/// fd is not seekable. +fn read_one_char_at_a_time( + fd: RawFd, + buff: &mut WString, + nchars: i32, + split_null: bool, +) -> Option { + let mut exit_res = STATUS_CMD_OK; + let mut eof = false; + let mut nbytes = 0; + + loop { + let mut finished = false; + let mut res = '\x00'; + let mut state = zero_mbstate(); + + while !finished { + let mut b = [0_u8; 1]; + if read_blocked(fd, &mut b) <= 0 { + eof = true; + break; + } + let b = b[0]; + + nbytes += 1; + if MB_CUR_MAX() == 1 { + res = char::from(b); + finished = true; + } else { + let sz = unsafe { + mbrtowc( + std::ptr::addr_of_mut!(res).cast(), + std::ptr::addr_of!(b).cast(), + 1, + &mut state, + ) + } as isize; + if sz == -1 { + state = zero_mbstate(); + } else if sz != -2 { + finished = true; + } + } + } + + if nbytes > READ_BYTE_LIMIT.load(Ordering::Relaxed) { + exit_res = STATUS_READ_TOO_MUCH; + break; + } + if eof { + break; + } + if !split_null && res == '\n' { + break; + } + if split_null && res == '\0' { + break; + } + + buff.push(res); + if nchars > 0 && usize::try_from(nchars).unwrap() <= buff.len() { + break; + } + } + + if buff.is_empty() && eof { + exit_res = STATUS_CMD_ERROR; + } + + exit_res +} + +/// Validate the arguments given to `read` and provide defaults where needed. +fn validate_read_args( + cmd: &wstr, + opts: &mut Options, + argv: &[&wstr], + parser: &Parser, + streams: &mut IoStreams, +) -> Option { + if opts.prompt.is_some() && opts.prompt_str.is_some() { + streams.err.append(wgettext_fmt!( + "%ls: Options %ls and %ls cannot be used together\n", + cmd, + "-p", + "-P", + )); + builtin_print_error_trailer(parser, streams.err, cmd); + return STATUS_INVALID_ARGS; + } + + if opts.delimiter.is_some() && opts.one_line { + streams.err.append(wgettext_fmt!( + "%ls: Options %ls and %ls cannot be used together\n", + cmd, + "--delimiter", + "--line" + )); + return STATUS_INVALID_ARGS; + } + if opts.one_line && opts.split_null { + streams.err.append(wgettext_fmt!( + "%ls: Options %ls and %ls cannot be used together\n", + cmd, + "-z", + "--line" + )); + return STATUS_INVALID_ARGS; + } + + if let Some(prompt_str) = opts.prompt_str.as_ref() { + opts.prompt_cmd = L!("echo ").to_owned() + &escape(prompt_str)[..]; + opts.prompt = Some(prompt_str.clone()); + } else if opts.prompt.is_none() { + opts.prompt = Some(DEFAULT_READ_PROMPT.to_owned()); + } + + if opts.place.contains(EnvMode::UNEXPORT) && opts.place.contains(EnvMode::EXPORT) { + streams.err.append(wgettext_fmt!(BUILTIN_ERR_EXPUNEXP, cmd)); + builtin_print_error_trailer(parser, streams.err, cmd); + return STATUS_INVALID_ARGS; + } + + if opts + .place + .intersection(EnvMode::LOCAL | EnvMode::FUNCTION | EnvMode::GLOBAL | EnvMode::UNIVERSAL) + .iter() + .count() + > 1 + { + streams.err.append(wgettext_fmt!(BUILTIN_ERR_GLOCAL, cmd)); + builtin_print_error_trailer(parser, streams.err, cmd); + return STATUS_INVALID_ARGS; + } + + let argc = argv.len(); + if !opts.array && argc < 1 && !opts.to_stdout { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_MIN_ARG_COUNT1, cmd, 1, argc)); + return STATUS_INVALID_ARGS; + } + + if opts.array && argc != 1 { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_ARG_COUNT1, cmd, 1, argc)); + return STATUS_INVALID_ARGS; + } + + if opts.to_stdout && argc > 0 { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_MAX_ARG_COUNT1, cmd, 0, argc)); + return STATUS_INVALID_ARGS; + } + + if opts.tokenize && opts.delimiter.is_some() { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2_EXCLUSIVE, + cmd, + "--delimiter", + "--tokenize" + )); + return STATUS_INVALID_ARGS; + } + + if opts.tokenize && opts.one_line { + streams.err.append(wgettext_fmt!( + BUILTIN_ERR_COMBO2_EXCLUSIVE, + cmd, + "--line", + "--tokenize" + )); + return STATUS_INVALID_ARGS; + } + + // Verify all variable names. + for arg in argv { + if !valid_var_name(arg) { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_VARNAME, cmd, arg)); + builtin_print_error_trailer(parser, streams.err, cmd); + return STATUS_INVALID_ARGS; + } + if EnvVar::flags_for(arg).contains(EnvVarFlags::READ_ONLY) { + streams.err.append(wgettext_fmt!( + "%ls: %ls: cannot overwrite read-only variable", + cmd, + arg + )); + builtin_print_error_trailer(parser, streams.err, cmd); + return STATUS_INVALID_ARGS; + } + } + + STATUS_CMD_OK +} + +/// The read builtin. Reads from stdin and stores the values in environment variables. +pub fn read(parser: &Parser, streams: &mut IoStreams, argv: &mut [&wstr]) -> Option { + let mut buff = WString::new(); + let mut exit_res; + + let (mut opts, optind) = match parse_cmd_opts(argv, parser, streams) { + Ok(res) => res, + Err(retval) => return retval, + }; + let cmd = argv[0]; + let mut argv = &argv[..]; + if !opts.to_stdout { + argv = &argv[optind..]; + } + let argc = argv.len(); + + if argv.is_empty() { + opts.to_stdout = true; + } + + if opts.print_help { + builtin_print_help(parser, streams, cmd); + return STATUS_CMD_OK; + } + + let retval = validate_read_args(cmd, &mut opts, argv, parser, streams); + if retval != STATUS_CMD_OK { + return retval; + } + + // stdin may have been explicitly closed + if streams.stdin_fd < 0 { + streams + .err + .append(wgettext_fmt!("%ls: stdin is closed\n", cmd)); + return STATUS_CMD_ERROR; + } + + if opts.one_line { + // --line is the same as read -d \n repeated N times + opts.delimiter = Some(L!("\n").to_owned()); + opts.split_null = false; + opts.shell = false; + } + + let mut var_ptr = 0; + let vars_left = |var_ptr: usize| argc - var_ptr; + let clear_remaining_vars = |var_ptr: &mut usize| { + while vars_left(*var_ptr) != 0 { + parser.vars().set_empty(argv[*var_ptr], opts.place); + *var_ptr += 1; + } + }; + + // Normally, we either consume a line of input or all available input. But if we are reading a + // line at a time, we need a middle ground where we only consume as many lines as we need to + // fill the given vars. + loop { + buff.clear(); + + let stream_stdin_is_a_tty = unsafe { libc::isatty(streams.stdin_fd) } != 0; + if stream_stdin_is_a_tty && !opts.split_null { + // Read interactively using reader_readline(). This does not support splitting on null. + exit_res = read_interactive( + parser, + &mut buff, + opts.nchars, + opts.shell, + opts.silent, + opts.prompt.as_ref().unwrap_or(&EMPTY_STRING), + &opts.right_prompt, + &opts.commandline, + streams.stdin_fd, + ); + } else if opts.nchars == 0 && !stream_stdin_is_a_tty && + // "one_line" is implemented as reading n-times to a new line, + // if we're chunking we could get multiple lines so we would have to advance + // more than 1 per run through the loop. Let's skip that for now. + !opts.one_line && + ( + streams.stdin_is_directly_redirected || + unsafe {libc::lseek(streams.stdin_fd, 0, SEEK_CUR)} != -1) + { + // We read in chunks when we either can seek (so we put the bytes back), + // or we have the bytes to ourselves (because it's directly redirected). + // + // Note we skip seeking back even if we're directly redirected to a seekable stream, + // under the assumption that the stream will be closed soon anyway. + // You don't rewind VHS tapes before throwing them in the trash. + // TODO: Do this when nchars is set by seeking back. + exit_res = read_in_chunks( + streams.stdin_fd, + &mut buff, + opts.split_null, + !streams.stdin_is_directly_redirected, + ); + } else { + exit_res = + read_one_char_at_a_time(streams.stdin_fd, &mut buff, opts.nchars, opts.split_null); + } + + if exit_res != STATUS_CMD_OK { + clear_remaining_vars(&mut var_ptr); + return exit_res; + } + + if opts.to_stdout { + streams.out.append(buff); + return exit_res; + } + + if opts.tokenize { + let mut tok = Tokenizer::new(&buff, TOK_ACCEPT_UNFINISHED); + if opts.array { + // Array mode: assign each token as a separate element of the sole var. + let mut tokens = vec![]; + while let Some(t) = tok.next() { + let text = tok.text_of(&t); + if let Some(out) = unescape_string(text, UnescapeStringStyle::default()) { + tokens.push(out); + } else { + tokens.push(text.to_owned()); + } + } + + parser.set_var_and_fire(argv[var_ptr], opts.place, tokens); + var_ptr += 1; + } else { + while vars_left(var_ptr) - 1 > 0 { + let Some(t) = tok.next() else { + break; + }; + let text = tok.text_of(&t); + if let Some(out) = unescape_string(text, UnescapeStringStyle::default()) { + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![out]); + } else { + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![text.to_owned()]); + } + var_ptr += 1; + } + + // If we still have tokens, set the last variable to them. + if let Some(t) = tok.next() { + let rest = buff[t.offset()..].to_owned(); + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![rest]); + var_ptr += 1; + } + } + // The rest of the loop is other split-modes, we don't care about those. + // Make sure to check the loop exit condition before continuing. + if !opts.one_line || vars_left(var_ptr) == 0 { + break; + } + continue; + } + + // todo!("don't clone") + let delimiter = opts + .delimiter + .clone() + .or_else(|| { + let ifs = parser.vars().get_unless_empty(L!("IFS")); + ifs.map(|ifs| ifs.as_string()) + }) + .unwrap_or_default(); + + if delimiter.is_empty() { + // Every character is a separate token with one wrinkle involving non-array mode where + // the final var gets the remaining characters as a single string. + let x = 1.max(buff.len()); + let n_splits = if opts.array || vars_left(var_ptr) > x { + x + } else { + vars_left(var_ptr) + }; + let mut chars = Vec::with_capacity(n_splits); + + for (i, c) in buff.chars().enumerate() { + if opts.array || i + 1 < vars_left(var_ptr) { + chars.push(WString::from_chars([c])); + } else { + chars.push(buff[i..].to_owned()); + break; + } + } + + if opts.array { + // Array mode: assign each char as a separate element of the sole var. + parser.set_var_and_fire(argv[var_ptr], opts.place, chars); + var_ptr += 1; + } else { + // Not array mode: assign each char to a separate var with the remainder being + // assigned to the last var. + for c in chars { + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![c]); + var_ptr += 1; + } + } + } else if opts.array { + // The user has requested the input be split into a sequence of tokens and all the + // tokens assigned to a single var. How we do the tokenizing depends on whether the user + // specified the delimiter string or we're using IFS. + if opts.delimiter.is_none() { + // We're using IFS, so tokenize the buffer using each IFS char. This is for backward + // compatibility with old versions of fish. + let tokens = split_string_tok(&buff, &delimiter, None) + .into_iter() + .map(|s| s.to_owned()) + .collect(); + parser.set_var_and_fire(argv[var_ptr], opts.place, tokens); + var_ptr += 1; + } else { + // We're using a delimiter provided by the user so use the `string split` behavior. + let splits = split_about(&buff, &delimiter, usize::MAX, false) + .into_iter() + .map(|s| s.to_owned()) + .collect(); + parser.set_var_and_fire(argv[var_ptr], opts.place, splits); + var_ptr += 1; + } + } else { + // Not array mode. Split the input into tokens and assign each to the vars in sequence. + if opts.delimiter.is_none() { + // We're using IFS, so tokenize the buffer using each IFS char. This is for backward + // compatibility with old versions of fish. + // Note the final variable gets any remaining text. + let mut var_vals: Vec = + split_string_tok(&buff, &delimiter, Some(vars_left(var_ptr))) + .into_iter() + .map(|s| s.to_owned()) + .collect(); + let mut val_idx = 0; + while vars_left(var_ptr) != 0 { + let mut val = WString::new(); + if val_idx < var_vals.len() { + std::mem::swap(&mut val, &mut var_vals[val_idx]); + val_idx += 1; + } + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![val]); + var_ptr += 1; + } + } else { + // We're using a delimiter provided by the user so use the `string split` behavior. + // We're making at most argc - 1 splits so the last variable + // is set to the remaining string. + let splits = split_about(&buff, &delimiter, argc - 1, false); + assert!(splits.len() <= vars_left(var_ptr)); + for split in splits { + parser.set_var_and_fire(argv[var_ptr], opts.place, vec![split.to_owned()]); + var_ptr += 1; + } + } + } + + if !opts.one_line || vars_left(var_ptr) == 0 { + break; + } + } + + if !opts.array { + // In case there were more args than splits + clear_remaining_vars(&mut var_ptr); + } + + exit_res } diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs index 1a5a0eeed..7f396dffe 100644 --- a/fish-rust/src/ffi.rs +++ b/fish-rust/src/ffi.rs @@ -44,7 +44,6 @@ include_cpp! { #include "builtins/bind.h" #include "builtins/commandline.h" - #include "builtins/read.h" #include "builtins/ulimit.h" safety!(unsafe_ffi) @@ -73,7 +72,6 @@ include_cpp! { generate!("builtin_bind") generate!("builtin_commandline") - generate!("builtin_read") generate!("builtin_ulimit") generate!("init_input") diff --git a/src/builtins/read.cpp b/src/builtins/read.cpp deleted file mode 100644 index 2f78dd409..000000000 --- a/src/builtins/read.cpp +++ /dev/null @@ -1,672 +0,0 @@ -// Implementation of the read builtin. -#include "config.h" // IWYU pragma: keep - -#include "read.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../builtin.h" -#include "../common.h" -#include "../env.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../parser.h" -#include "../reader.h" -#include "../tokenizer.h" -#include "../wcstringutil.h" -#include "../wgetopt.h" -#include "../wutil.h" // IWYU pragma: keep -#include "builtins/shared.rs.h" - -namespace { -struct read_cmd_opts_t { - bool print_help = false; - int place = ENV_USER; - wcstring prompt_cmd; - const wchar_t *prompt = nullptr; - const wchar_t *prompt_str = nullptr; - const wchar_t *right_prompt = L""; - const wchar_t *commandline = L""; - // If a delimiter was given. Used to distinguish between the default - // empty string and a given empty delimiter. - bool have_delimiter = false; - wcstring delimiter; - bool tokenize = false; - bool shell = false; - bool array = false; - bool silent = false; - bool split_null = false; - bool to_stdout = false; - int nchars = 0; - bool one_line = false; -}; -} // namespace - -static const wchar_t *const short_options = L":ac:d:fghiLln:p:sStuxzP:UR:L"; -static const struct woption long_options[] = {{L"array", no_argument, 'a'}, - {L"command", required_argument, 'c'}, - {L"delimiter", required_argument, 'd'}, - {L"export", no_argument, 'x'}, - {L"function", no_argument, 'f'}, - {L"global", no_argument, 'g'}, - {L"help", no_argument, 'h'}, - {L"line", no_argument, 'L'}, - {L"list", no_argument, 'a'}, - {L"local", no_argument, 'l'}, - {L"nchars", required_argument, 'n'}, - {L"null", no_argument, 'z'}, - {L"prompt", required_argument, 'p'}, - {L"prompt-str", required_argument, 'P'}, - {L"right-prompt", required_argument, 'R'}, - {L"shell", no_argument, 'S'}, - {L"silent", no_argument, 's'}, - {L"tokenize", no_argument, 't'}, - {L"unexport", no_argument, 'u'}, - {L"universal", no_argument, 'U'}, - {}}; - -static int parse_cmd_opts(read_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) - int argc, const wchar_t **argv, const parser_t &parser, - io_streams_t &streams) { - const wchar_t *cmd = argv[0]; - int opt; - wgetopter_t w; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { - case 'a': { - opts.array = true; - break; - } - case L'c': { - opts.commandline = w.woptarg; - break; - } - case 'd': { - opts.have_delimiter = true; - opts.delimiter = w.woptarg; - break; - } - case 'i': { - streams.err()->append( - format_string(_(L"%ls: usage of -i for --silent is deprecated. Please " - L"use -s or --silent instead.\n"), - cmd)); - return STATUS_INVALID_ARGS; - } - case L'f': { - opts.place |= ENV_FUNCTION; - break; - } - case L'g': { - opts.place |= ENV_GLOBAL; - break; - } - case 'h': { - opts.print_help = true; - break; - } - case L'L': { - opts.one_line = true; - break; - } - case L'l': { - opts.place |= ENV_LOCAL; - break; - } - case L'n': { - opts.nchars = fish_wcstoi(w.woptarg); - if (errno) { - if (errno == ERANGE) { - streams.err()->append(format_string( - _(L"%ls: Argument '%ls' is out of range\n"), cmd, w.woptarg)); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - - streams.err()->append(format_string(BUILTIN_ERR_NOT_NUMBER, cmd, w.woptarg)); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - break; - } - case L'P': { - opts.prompt_str = w.woptarg; - break; - } - case L'p': { - opts.prompt = w.woptarg; - break; - } - case L'R': { - opts.right_prompt = w.woptarg; - break; - } - case 's': { - opts.silent = true; - break; - } - case L'S': { - opts.shell = true; - break; - } - case L't': { - opts.tokenize = true; - break; - } - case L'U': { - opts.place |= ENV_UNIVERSAL; - break; - } - case L'u': { - opts.place |= ENV_UNEXPORT; - break; - } - case L'x': { - opts.place |= ENV_EXPORT; - break; - } - case L'z': { - opts.split_null = true; - break; - } - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1], true); - return STATUS_INVALID_ARGS; - } - case L'?': { - builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1], true); - return STATUS_INVALID_ARGS; - } - default: { - DIE("unexpected retval from wgetopt_long"); - } - } - } - - *optind = w.woptind; - return STATUS_CMD_OK; -} - -/// Read from the tty. This is only valid when the stream is stdin and it is attached to a tty and -/// we weren't asked to split on null characters. -static int read_interactive(const parser_t &parser, wcstring &buff, int nchars, bool shell, - bool silent, const wchar_t *prompt, const wchar_t *right_prompt, - const wchar_t *commandline, int in) { - int exit_res = STATUS_CMD_OK; - - // Construct a configuration. - reader_config_t conf; - conf.complete_ok = shell; - conf.highlight_ok = shell; - conf.syntax_check_ok = shell; - - // No autosuggestions or abbreviations in builtin_read. - conf.autosuggest_ok = false; - conf.expand_abbrev_ok = false; - - conf.exit_on_interrupt = true; - conf.in_silent_mode = silent; - - conf.left_prompt_cmd = prompt; - conf.right_prompt_cmd = right_prompt; - conf.event = L"fish_read"; - - conf.in = in; - - // Keep in-memory history only. - reader_push(parser, wcstring{}, std::move(conf)); - - commandline_set_buffer(commandline, std::wcslen(commandline)); - scoped_push interactive{&parser.libdata_pods_mut().is_interactive, true}; - - auto mline = reader_readline(nchars); - interactive.restore(); - if (mline) { - buff = mline.acquire(); - if (nchars > 0 && static_cast(nchars) < buff.size()) { - // Line may be longer than nchars if a keybinding used `commandline -i` - // note: we're deliberately throwing away the tail of the commandline. - // It shouldn't be unread because it was produced with `commandline -i`, - // not typed. - buff.resize(nchars); - } - } else { - exit_res = STATUS_CMD_ERROR; - } - reader_pop(); - return exit_res; -} - -/// Bash uses 128 bytes for its chunk size. Very informal testing I did suggested that a smaller -/// chunk size performed better. However, we're going to use the bash value under the assumption -/// they've done more extensive testing. -#define READ_CHUNK_SIZE 128 - -/// Read from the fd in chunks until we see newline or null, as requested, is seen. This is only -/// used when the fd is seekable (so not from a tty or pipe) and we're not reading a specific number -/// of chars. -/// -/// Returns an exit status. -static int read_in_chunks(int fd, wcstring &buff, bool split_null, bool do_seek) { - int exit_res = STATUS_CMD_OK; - std::string str; - bool eof = false; - bool finished = false; - - while (!finished) { - char inbuf[READ_CHUNK_SIZE]; - long bytes_read = read_blocked(fd, inbuf, READ_CHUNK_SIZE); - - if (bytes_read <= 0) { - eof = true; - break; - } - - const char *end = std::find(inbuf, inbuf + bytes_read, split_null ? L'\0' : L'\n'); - long bytes_consumed = end - inbuf; // must be signed for use in lseek - assert(bytes_consumed <= bytes_read); - str.append(inbuf, bytes_consumed); - if (bytes_consumed < bytes_read) { - // We found a splitter. The +1 because we need to treat the splitter as consumed, but - // not append it to the string. - if (do_seek && lseek(fd, bytes_consumed - bytes_read + 1, SEEK_CUR) == -1) { - wperror(L"lseek"); - return STATUS_CMD_ERROR; - } - finished = true; - } else if (str.size() > READ_BYTE_LIMIT) { - exit_res = STATUS_READ_TOO_MUCH; - finished = true; - } - } - - buff = str2wcstring(str); - if (buff.empty() && eof) { - exit_res = STATUS_CMD_ERROR; - } - - return exit_res; -} - -/// Read from the fd on char at a time until we've read the requested number of characters or a -/// newline or null, as appropriate, is seen. This is inefficient so should only be used when the -/// fd is not seekable. -static int read_one_char_at_a_time(int fd, wcstring &buff, int nchars, bool split_null) { - int exit_res = STATUS_CMD_OK; - bool eof = false; - size_t nbytes = 0; - - while (true) { - bool finished = false; - wchar_t res = 0; - mbstate_t state = {}; - - while (!finished) { - char b; - if (read_blocked(fd, &b, 1) <= 0) { - eof = true; - break; - } - - nbytes++; - if (MB_CUR_MAX == 1) { - res = static_cast(b); - finished = true; - } else { - size_t sz = std::mbrtowc(&res, &b, 1, &state); - if (sz == static_cast(-1)) { - std::memset(&state, 0, sizeof(state)); - } else if (sz != static_cast(-2)) { - finished = true; - } - } - } - - if (nbytes > READ_BYTE_LIMIT) { - exit_res = STATUS_READ_TOO_MUCH; - break; - } - if (eof) break; - if (!split_null && res == L'\n') break; - if (split_null && res == L'\0') break; - - buff.push_back(res); - if (nchars > 0 && static_cast(nchars) <= buff.size()) { - break; - } - } - - if (buff.empty() && eof) { - exit_res = STATUS_CMD_ERROR; - } - - return exit_res; -} - -/// Validate the arguments given to `read` and provide defaults where needed. -static int validate_read_args(const wchar_t *cmd, read_cmd_opts_t &opts, int argc, - const wchar_t *const *argv, const parser_t &parser, - io_streams_t &streams) { - if (opts.prompt && opts.prompt_str) { - streams.err()->append(format_string( - _(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"-p", L"-P")); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - - if (opts.have_delimiter && opts.one_line) { - streams.err()->append( - format_string(_(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, - L"--delimiter", L"--line")); - return STATUS_INVALID_ARGS; - } - if (opts.one_line && opts.split_null) { - streams.err()->append(format_string( - _(L"%ls: Options %ls and %ls cannot be used together\n"), cmd, L"-z", L"--line")); - return STATUS_INVALID_ARGS; - } - - if (opts.prompt_str) { - opts.prompt_cmd = L"echo " + escape_string(opts.prompt_str); - opts.prompt = opts.prompt_cmd.c_str(); - } else if (!opts.prompt) { - opts.prompt = DEFAULT_READ_PROMPT; - } - - if ((opts.place & ENV_UNEXPORT) && (opts.place & ENV_EXPORT)) { - streams.err()->append(format_string(BUILTIN_ERR_EXPUNEXP, cmd)); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - - if ((opts.place & ENV_LOCAL ? 1 : 0) + (opts.place & ENV_FUNCTION ? 1 : 0) + - (opts.place & ENV_GLOBAL ? 1 : 0) + (opts.place & ENV_UNIVERSAL ? 1 : 0) > - 1) { - streams.err()->append(format_string(BUILTIN_ERR_GLOCAL, cmd)); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - - if (!opts.array && argc < 1 && !opts.to_stdout) { - streams.err()->append(format_string(BUILTIN_ERR_MIN_ARG_COUNT1, cmd, 1, argc)); - return STATUS_INVALID_ARGS; - } - - if (opts.array && argc != 1) { - streams.err()->append(format_string(BUILTIN_ERR_ARG_COUNT1, cmd, 1, argc)); - return STATUS_INVALID_ARGS; - } - - if (opts.to_stdout && argc > 0) { - streams.err()->append(format_string(BUILTIN_ERR_MAX_ARG_COUNT1, cmd, 0, argc)); - return STATUS_INVALID_ARGS; - } - - if (opts.tokenize && opts.have_delimiter) { - streams.err()->append( - format_string(BUILTIN_ERR_COMBO2_EXCLUSIVE, cmd, L"--delimiter", L"--tokenize")); - return STATUS_INVALID_ARGS; - } - - if (opts.tokenize && opts.one_line) { - streams.err()->append( - format_string(BUILTIN_ERR_COMBO2_EXCLUSIVE, cmd, L"--line", L"--tokenize")); - return STATUS_INVALID_ARGS; - } - - // Verify all variable names. - for (int i = 0; i < argc; i++) { - if (!valid_var_name(argv[i])) { - streams.err()->append(format_string(BUILTIN_ERR_VARNAME, cmd, argv[i])); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - if (env_flags_for(argv[i]) & env_var_flag_read_only) { - streams.err()->append( - format_string(_(L"%ls: %ls: cannot overwrite read-only variable"), cmd, argv[i])); - builtin_print_error_trailer(parser, *streams.err(), cmd); - return STATUS_INVALID_ARGS; - } - } - - return STATUS_CMD_OK; -} - -/// The read builtin. Reads from stdin and stores the values in environment variables. -int builtin_read(const void *_parser, void *_streams, void *_argv) { - const auto &parser = *static_cast(_parser); - auto &streams = *static_cast(_streams); - auto argv = static_cast(_argv); - int argc = builtin_count_args(argv); - const wchar_t *cmd = argv[0]; - wcstring buff; - int exit_res = STATUS_CMD_OK; - read_cmd_opts_t opts; - - int optind; - int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - if (!opts.to_stdout) { - argc -= optind; - argv += optind; - } - - if (argc == 0) { - opts.to_stdout = true; - } - - if (opts.print_help) { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - - retval = validate_read_args(cmd, opts, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - // stdin may have been explicitly closed - if (streams.stdin_fd() < 0) { - streams.err()->append(format_string(_(L"%ls: stdin is closed\n"), cmd)); - return STATUS_CMD_ERROR; - } - - if (opts.one_line) { - // --line is the same as read -d \n repeated N times - opts.have_delimiter = true; - opts.delimiter = L"\n"; - opts.split_null = false; - opts.shell = false; - } - - const wchar_t *const *var_ptr = argv; - auto vars_left = [&]() { return argv + argc - var_ptr; }; - auto clear_remaining_vars = [&]() { - while (vars_left()) { - parser.vars().set(*var_ptr, opts.place, std::vector{}); - ++var_ptr; - } - }; - - // Normally, we either consume a line of input or all available input. But if we are reading a - // line at a time, we need a middle ground where we only consume as many lines as we need to - // fill the given vars. - do { - buff.clear(); - - int stream_stdin_is_a_tty = isatty(streams.stdin_fd()); - if (stream_stdin_is_a_tty && !opts.split_null) { - // Read interactively using reader_readline(). This does not support splitting on null. - exit_res = - read_interactive(parser, buff, opts.nchars, opts.shell, opts.silent, opts.prompt, - opts.right_prompt, opts.commandline, streams.stdin_fd()); - } else if (!opts.nchars && !stream_stdin_is_a_tty && - // "one_line" is implemented as reading n-times to a new line, - // if we're chunking we could get multiple lines so we would have to advance - // more than 1 per run through the loop. Let's skip that for now. - !opts.one_line && - (streams.stdin_is_directly_redirected() || - lseek(streams.stdin_fd(), 0, SEEK_CUR) != -1)) { - // We read in chunks when we either can seek (so we put the bytes back), - // or we have the bytes to ourselves (because it's directly redirected). - // - // Note we skip seeking back even if we're directly redirected to a seekable stream, - // under the assumption that the stream will be closed soon anyway. - // You don't rewind VHS tapes before throwing them in the trash. - // TODO: Do this when nchars is set by seeking back. - exit_res = read_in_chunks(streams.stdin_fd(), buff, opts.split_null, - !streams.stdin_is_directly_redirected()); - } else { - exit_res = - read_one_char_at_a_time(streams.stdin_fd(), buff, opts.nchars, opts.split_null); - } - - if (exit_res != STATUS_CMD_OK) { - clear_remaining_vars(); - return exit_res; - } - - if (opts.to_stdout) { - streams.out()->append(buff); - return exit_res; - } - - if (opts.tokenize) { - auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED); - if (opts.array) { - // Array mode: assign each token as a separate element of the sole var. - std::vector tokens; - while (auto t = tok->next()) { - auto text = *tok->text_of(*t); - if (auto out = unescape_string(text.c_str(), text.length(), UNESCAPE_DEFAULT, - STRING_STYLE_SCRIPT)) { - tokens.push_back(*out); - } else { - tokens.push_back(text); - } - } - - parser.set_var_and_fire(*var_ptr++, opts.place, std::move(tokens)); - } else { - std::unique_ptr t; - while ((vars_left() - 1 > 0) && (t = tok->next())) { - auto text = *tok->text_of(*t); - if (auto out = unescape_string(text.c_str(), text.length(), UNESCAPE_DEFAULT, - STRING_STYLE_SCRIPT)) { - parser.set_var_and_fire(*var_ptr++, opts.place, - std::vector{*out}); - } else { - parser.set_var_and_fire(*var_ptr++, opts.place, - std::vector{text}); - } - } - - // If we still have tokens, set the last variable to them. - if ((t = tok->next())) { - wcstring rest = wcstring(buff, t->offset); - parser.set_var_and_fire(*var_ptr++, opts.place, - std::vector{std::move(rest)}); - } - } - // The rest of the loop is other split-modes, we don't care about those. - continue; - } - - if (!opts.have_delimiter) { - auto ifs = parser.vars().get_unless_empty(L"IFS"); - if (ifs) opts.delimiter = *ifs->as_string(); - } - - if (opts.delimiter.empty()) { - // Every character is a separate token with one wrinkle involving non-array mode where - // the final var gets the remaining characters as a single string. - size_t x = std::max(static_cast(1), buff.size()); - size_t n_splits = - (opts.array || static_cast(vars_left()) > x) ? x : vars_left(); - std::vector chars; - chars.reserve(n_splits); - - int i = 0; - for (auto it = buff.begin(), end = buff.end(); it != end; ++i, ++it) { - if (opts.array || i + 1 < vars_left()) { - chars.emplace_back(1, *it); - } else { - chars.emplace_back(it, buff.end()); - break; - } - } - - if (opts.array) { - // Array mode: assign each char as a separate element of the sole var. - parser.set_var_and_fire(*var_ptr++, opts.place, chars); - } else { - // Not array mode: assign each char to a separate var with the remainder being - // assigned to the last var. - for (const auto &c : chars) { - parser.set_var_and_fire(*var_ptr++, opts.place, std::vector{c}); - } - } - } else if (opts.array) { - // The user has requested the input be split into a sequence of tokens and all the - // tokens assigned to a single var. How we do the tokenizing depends on whether the user - // specified the delimiter string or we're using IFS. - if (!opts.have_delimiter) { - // We're using IFS, so tokenize the buffer using each IFS char. This is for backward - // compatibility with old versions of fish. - std::vector tokens = split_string_tok(buff, opts.delimiter); - parser.set_var_and_fire(*var_ptr++, opts.place, std::move(tokens)); - } else { - // We're using a delimiter provided by the user so use the `string split` behavior. - std::vector splits; - split_about(buff.begin(), buff.end(), opts.delimiter.begin(), opts.delimiter.end(), - &splits); - - parser.set_var_and_fire(*var_ptr++, opts.place, splits); - } - } else { - // Not array mode. Split the input into tokens and assign each to the vars in sequence. - if (!opts.have_delimiter) { - // We're using IFS, so tokenize the buffer using each IFS char. This is for backward - // compatibility with old versions of fish. - // Note the final variable gets any remaining text. - std::vector var_vals = - split_string_tok(buff, opts.delimiter, vars_left()); - size_t val_idx = 0; - while (vars_left()) { - wcstring val; - if (val_idx < var_vals.size()) { - val = std::move(var_vals.at(val_idx++)); - } - parser.set_var_and_fire(*var_ptr++, opts.place, - std::vector{std::move(val)}); - } - } else { - // We're using a delimiter provided by the user so use the `string split` behavior. - std::vector splits; - // We're making at most argc - 1 splits so the last variable - // is set to the remaining string. - split_about(buff.begin(), buff.end(), opts.delimiter.begin(), opts.delimiter.end(), - &splits, argc - 1); - assert(splits.size() <= static_cast(vars_left())); - for (const auto &split : splits) { - parser.set_var_and_fire(*var_ptr++, opts.place, std::vector{split}); - } - } - } - } while (opts.one_line && vars_left()); - - if (!opts.array) { - // In case there were more args than splits - clear_remaining_vars(); - } - - return exit_res; -} diff --git a/src/builtins/read.h b/src/builtins/read.h deleted file mode 100644 index 2e2bc6262..000000000 --- a/src/builtins/read.h +++ /dev/null @@ -1,13 +0,0 @@ -// Prototypes for executing builtin_read function. -#ifndef FISH_BUILTIN_READ_H -#define FISH_BUILTIN_READ_H - -#include "../maybe.h" - -struct Parser; -struct IoStreams; -using parser_t = Parser; -using io_streams_t = IoStreams; - -int builtin_read(const void *parser, void *streams, void *argv); -#endif diff --git a/src/ffi_baggage.h b/src/ffi_baggage.h index 90fb825c6..e3a809599 100644 --- a/src/ffi_baggage.h +++ b/src/ffi_baggage.h @@ -1,7 +1,6 @@ #include "builtin.h" #include "builtins/bind.h" #include "builtins/commandline.h" -#include "builtins/read.h" #include "builtins/ulimit.h" #include "event.h" #include "fds.h" @@ -44,6 +43,5 @@ void mark_as_used(const parser_t& parser, env_stack_t& env_stack) { builtin_bind({}, {}, {}); builtin_commandline({}, {}, {}); - builtin_read({}, {}, {}); builtin_ulimit({}, {}, {}); }