Port echo builtin to Rust

This commit is contained in:
Xiretza 2023-02-05 22:08:32 +01:00 committed by Johannes Altmanninger
parent 4b85c2f6db
commit a16e2ecb1b
9 changed files with 268 additions and 257 deletions

View File

@ -103,7 +103,7 @@ set(FISH_BUILTIN_SRCS
src/builtins/bg.cpp src/builtins/bind.cpp src/builtins/block.cpp
src/builtins/builtin.cpp src/builtins/cd.cpp src/builtins/command.cpp
src/builtins/commandline.cpp src/builtins/complete.cpp src/builtins/contains.cpp
src/builtins/disown.cpp src/builtins/echo.cpp src/builtins/emit.cpp
src/builtins/disown.cpp src/builtins/emit.cpp
src/builtins/eval.cpp src/builtins/exit.cpp src/builtins/fg.cpp
src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp
src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp

View File

@ -0,0 +1,232 @@
//! Implementation of the echo builtin.
use libc::c_int;
use super::shared::{builtin_missing_argument, io_streams_t, STATUS_CMD_OK, STATUS_INVALID_ARGS};
use crate::ffi::parser_t;
use crate::wchar::{wchar_literal_byte, wstr, WString, L};
use crate::wgetopt::{wgetopter_t, woption};
#[derive(Debug, Clone, Copy)]
struct Options {
print_newline: bool,
print_spaces: bool,
interpret_special_chars: bool,
}
impl Default for Options {
fn default() -> Self {
Self {
print_newline: true,
print_spaces: true,
interpret_special_chars: false,
}
}
}
fn parse_options(
args: &mut [&wstr],
parser: &mut parser_t,
streams: &mut io_streams_t,
) -> Result<(Options, usize), Option<c_int>> {
let cmd = args[0];
const SHORT_OPTS: &wstr = L!("+:Eens");
const LONG_OPTS: &[woption] = &[];
let mut opts = Options::default();
let mut oldopts = opts;
let mut oldoptind = 0;
let mut w = wgetopter_t::new(SHORT_OPTS, LONG_OPTS, args);
while let Some(c) = w.wgetopt_long() {
match c {
'n' => opts.print_newline = false,
'e' => opts.interpret_special_chars = true,
's' => opts.print_spaces = false,
'E' => opts.interpret_special_chars = false,
':' => {
builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], true);
return Err(STATUS_INVALID_ARGS);
}
'?' => {
return Ok((oldopts, w.woptind - 1));
}
_ => {
panic!("unexpected retval from wgetopter::wgetopt_long()");
}
}
// Super cheesy: We keep an old copy of the option state around,
// so we can revert it in case we get an argument like
// "-n foo".
// We need to keep it one out-of-date so we can ignore the *last* option.
// (this might be an issue in wgetopt, but that's a whole other can of worms
// and really only occurs with our weird "put it back" option parsing)
if w.woptind == oldoptind + 2 {
oldopts = opts;
oldoptind = w.woptind;
}
}
Ok((opts, w.woptind))
}
/// Parse a numeric escape sequence in `s`, returning the number of characters consumed and the
/// resulting value. Supported escape sequences:
///
/// - `0nnn`: octal value, zero to three digits
/// - `nnn`: octal value, one to three digits
/// - `xhh`: hex value, one to two digits
fn parse_numeric_sequence<I>(chars: I) -> Option<(usize, u8)>
where
I: IntoIterator<Item = char>,
{
let mut chars = chars.into_iter().peekable();
// the first character of the numeric part of the sequence
let mut start = 0;
let mut base: u8 = 0;
let mut max_digits = 0;
let first = *chars.peek()?;
if first.is_digit(8) {
// Octal escape
base = 8;
// If the first digit is a 0, we allow four digits (including that zero); otherwise, we
// allow 3.
max_digits = if first == '0' { 4 } else { 3 };
} else if first == 'x' {
// Hex escape
base = 16;
max_digits = 2;
// Skip the x
start = 1;
};
if base == 0 {
return None;
}
let mut val = 0;
let mut consumed = start;
for digit in chars
.skip(start)
.take(max_digits)
.map_while(|c| c.to_digit(base.into()))
{
// base is either 8 or 16, so digit can never be >255
let digit = u8::try_from(digit).unwrap();
val = val * base + digit;
consumed += 1;
}
// We succeeded if we consumed at least one digit.
if consumed > 0 {
Some((consumed, val))
} else {
None
}
}
/// The echo builtin.
///
/// Bash only respects `-n` if it's the first argument. We'll do the same. We also support a new,
/// fish specific, option `-s` to mean "no spaces".
pub fn echo(
parser: &mut parser_t,
streams: &mut io_streams_t,
args: &mut [&wstr],
) -> Option<c_int> {
let (opts, optind) = match parse_options(args, parser, streams) {
Ok((opts, optind)) => (opts, optind),
Err(err @ Some(_)) if err != STATUS_CMD_OK => return err,
Err(err) => panic!("Illogical exit code from parse_options(): {err:?}"),
};
// The special character \c can be used to indicate no more output.
let mut output_stopped = false;
// We buffer output so we can write in one go,
// this matters when writing to an fd.
let mut out = WString::new();
let args_to_echo = &args[optind..];
'outer: for (idx, arg) in args_to_echo.iter().enumerate() {
if opts.print_spaces && idx > 0 {
out.push(' ');
}
let mut chars = arg.chars().peekable();
while let Some(c) = chars.next() {
if !opts.interpret_special_chars || c != '\\' {
// Not an escape.
out.push(c);
continue;
}
let Some(next_char) = chars.peek() else {
// Incomplete escape sequence is echoed verbatim
out.push('\\');
break;
};
// Most escapes consume one character in addition to the backslash; the numeric
// sequences may consume more, while an unrecognized escape sequence consumes none.
let mut consumed = 1;
let escaped = match next_char {
'a' => '\x07',
'b' => '\x08',
'e' => '\x1B',
'f' => '\x0C',
'n' => '\n',
'r' => '\r',
't' => '\t',
'v' => '\x0B',
'\\' => '\\',
'c' => {
output_stopped = true;
break 'outer;
}
_ => {
// Octal and hex escape sequences.
if let Some((digits_consumed, narrow_val)) =
parse_numeric_sequence(chars.clone())
{
consumed = digits_consumed;
// The narrow_val is a literal byte that we want to output (#1894).
wchar_literal_byte(narrow_val)
} else {
consumed = 0;
'\\'
}
}
};
// Skip over characters that were part of this escape sequence (after the backslash
// that was consumed by the `while` loop).
// TODO: `Iterator::advance_by()`: https://github.com/rust-lang/rust/issues/77404
for _ in 0..consumed {
let _ = chars.next();
}
out.push(escaped);
}
}
if opts.print_newline && !output_stopped {
out.push('\n');
}
if !out.is_empty() {
streams.out.append(out);
}
STATUS_CMD_OK
}

View File

@ -1,2 +1,4 @@
pub mod shared;
pub mod echo;
pub mod wait;

View File

@ -108,6 +108,7 @@ pub fn run_builtin(
builtin: RustBuiltin,
) -> Option<c_int> {
match builtin {
RustBuiltin::Echo => super::echo::echo(parser, streams, args),
RustBuiltin::Wait => wait::wait(parser, streams, args),
}
}

View File

@ -33,3 +33,30 @@ pub use widestring_suffix::widestrs;
/// Pull in our extensions.
pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
// These are in the Unicode private-use range. We really shouldn't use this
// range but have little choice in the matter given how our lexer/parser works.
// We can't use non-characters for these two ranges because there are only 66 of
// them and we need at least 256 + 64.
//
// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that
// would result in fish having different behavior on machines with 16 versus 32
// bit wchar_t. It's better that fish behave the same on both types of systems.
//
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
const ENCODE_DIRECT_BASE: u32 = 0xF600;
const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256;
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8
/// character.
///
/// See https://github.com/fish-shell/fish-shell/issues/1894.
pub fn wchar_literal_byte(byte: u8) -> char {
char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte))
.expect("private-use codepoint should be valid char")
}

View File

@ -41,7 +41,6 @@
#include "builtins/complete.h"
#include "builtins/contains.h"
#include "builtins/disown.h"
#include "builtins/echo.h"
#include "builtins/emit.h"
#include "builtins/eval.h"
#include "builtins/exit.h"
@ -384,7 +383,7 @@ static constexpr builtin_data_t builtin_datas[] = {
{L"continue", &builtin_break_continue, N_(L"Skip over remaining innermost loop")},
{L"count", &builtin_count, N_(L"Count the number of arguments")},
{L"disown", &builtin_disown, N_(L"Remove job from job list")},
{L"echo", &builtin_echo, N_(L"Print arguments")},
{L"echo", &implemented_in_rust, N_(L"Print arguments")},
{L"else", &builtin_generic, N_(L"Evaluate block if condition is false")},
{L"emit", &builtin_emit, N_(L"Emit an event")},
{L"end", &builtin_generic, N_(L"End a block of commands")},
@ -529,6 +528,9 @@ const wchar_t *builtin_get_desc(const wcstring &name) {
}
static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
if (cmd == L"echo") {
return RustBuiltin::Echo;
}
if (cmd == L"wait") {
return RustBuiltin::Wait;
}

View File

@ -109,6 +109,7 @@ int parse_help_only_cmd_opts(help_only_cmd_opts_t &opts, int *optind, int argc,
/// An enum of the builtins implemented in Rust.
enum RustBuiltin : int32_t {
Echo,
Wait,
};
#endif

View File

@ -1,243 +0,0 @@
// Implementation of the echo builtin.
#include "config.h" // IWYU pragma: keep
#include "echo.h"
#include <cstddef>
#include "../builtin.h"
#include "../common.h"
#include "../fallback.h" // IWYU pragma: keep
#include "../io.h"
#include "../maybe.h"
#include "../wgetopt.h"
#include "../wutil.h" // IWYU pragma: keep
struct echo_cmd_opts_t {
bool print_newline = true;
bool print_spaces = true;
bool interpret_special_chars = false;
};
static const wchar_t *const short_options = L"+:Eens";
static const struct woption *const long_options = nullptr;
static int parse_cmd_opts(echo_cmd_opts_t &opts, int *optind, int argc, const wchar_t **argv,
parser_t &parser, io_streams_t &streams) {
UNUSED(parser);
UNUSED(streams);
const wchar_t *cmd = argv[0];
int opt;
wgetopter_t w;
echo_cmd_opts_t oldopts = opts;
int oldoptind = 0;
while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) {
switch (opt) {
case 'n': {
opts.print_newline = false;
break;
}
case 'e': {
opts.interpret_special_chars = true;
break;
}
case 's': {
opts.print_spaces = false;
break;
}
case 'E': {
opts.interpret_special_chars = false;
break;
}
case ':': {
builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]);
return STATUS_INVALID_ARGS;
}
case '?': {
opts = oldopts;
*optind = w.woptind - 1;
return STATUS_CMD_OK;
}
default: {
DIE("unexpected retval from wgetopt_long");
}
}
// Super cheesy: We keep an old copy of the option state around,
// so we can revert it in case we get an argument like
// "-n foo".
// We need to keep it one out-of-date so we can ignore the *last* option.
// (this might be an issue in wgetopt, but that's a whole other can of worms
// and really only occurs with our weird "put it back" option parsing)
if (w.woptind == oldoptind + 2) {
oldopts = opts;
oldoptind = w.woptind;
}
}
*optind = w.woptind;
return STATUS_CMD_OK;
}
/// Parse a numeric escape sequence in str, returning whether we succeeded. Also return the number
/// of characters consumed and the resulting value. Supported escape sequences:
///
/// \0nnn: octal value, zero to three digits
/// \nnn: octal value, one to three digits
/// \xhh: hex value, one to two digits
static bool builtin_echo_parse_numeric_sequence(const wchar_t *str, size_t *consumed,
unsigned char *out_val) {
bool success = false;
unsigned int start = 0; // the first character of the numeric part of the sequence
unsigned int base = 0, max_digits = 0;
if (convert_digit(str[0], 8) != -1) {
// Octal escape
base = 8;
// If the first digit is a 0, we allow four digits (including that zero); otherwise, we
// allow 3.
max_digits = (str[0] == L'0' ? 4 : 3);
} else if (str[0] == L'x') {
// Hex escape
base = 16;
max_digits = 2;
// Skip the x
start = 1;
}
if (base == 0) {
return success;
}
unsigned int idx;
unsigned char val = 0; // resulting character
for (idx = start; idx < start + max_digits; idx++) {
int digit = convert_digit(str[idx], base);
if (digit == -1) break;
val = val * base + digit;
}
// We succeeded if we consumed at least one digit.
if (idx > start) {
*consumed = idx;
*out_val = val;
success = true;
}
return success;
}
/// The echo builtin.
///
/// Bash only respects -n if it's the first argument. We'll do the same. We also support a new,
/// fish specific, option -s to mean "no spaces".
maybe_t<int> builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
const wchar_t *cmd = argv[0];
UNUSED(cmd);
int argc = builtin_count_args(argv);
echo_cmd_opts_t opts;
int optind;
int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams);
if (retval != STATUS_CMD_OK) return retval;
// The special character \c can be used to indicate no more output.
bool continue_output = true;
const wchar_t *const *args_to_echo = argv + optind;
// We buffer output so we can write in one go,
// this matters when writing to an fd.
wcstring out;
for (size_t idx = 0; continue_output && args_to_echo[idx] != nullptr; idx++) {
if (opts.print_spaces && idx > 0) {
out.push_back(' ');
}
const wchar_t *str = args_to_echo[idx];
for (size_t j = 0; continue_output && str[j]; j++) {
if (!opts.interpret_special_chars || str[j] != L'\\') {
// Not an escape.
out.push_back(str[j]);
} else {
// Most escapes consume one character in addition to the backslash; the numeric
// sequences may consume more, while an unrecognized escape sequence consumes none.
wchar_t wc;
size_t consumed = 1;
switch (str[j + 1]) {
case L'a': {
wc = L'\a';
break;
}
case L'b': {
wc = L'\b';
break;
}
case L'e': {
wc = L'\x1B';
break;
}
case L'f': {
wc = L'\f';
break;
}
case L'n': {
wc = L'\n';
break;
}
case L'r': {
wc = L'\r';
break;
}
case L't': {
wc = L'\t';
break;
}
case L'v': {
wc = L'\v';
break;
}
case L'\\': {
wc = L'\\';
break;
}
case L'c': {
wc = 0;
continue_output = false;
break;
}
default: {
// Octal and hex escape sequences.
unsigned char narrow_val = 0;
if (builtin_echo_parse_numeric_sequence(str + j + 1, &consumed,
&narrow_val)) {
// Here consumed must have been set to something. The narrow_val is a
// literal byte that we want to output (#1894).
wc = ENCODE_DIRECT_BASE + narrow_val % 256;
} else {
// Not a recognized escape. We consume only the backslash.
wc = L'\\';
consumed = 0;
}
break;
}
}
// Skip over characters that were part of this escape sequence (but not the
// backslash, which will be handled by the loop increment.
j += consumed;
if (continue_output) {
out.push_back(wc);
}
}
}
}
if (opts.print_newline && continue_output) {
out.push_back('\n');
}
if (!out.empty()) {
streams.out.append(out);
}
return STATUS_CMD_OK;
}

View File

@ -1,11 +0,0 @@
// Prototypes for executing builtin_echo function.
#ifndef FISH_BUILTIN_ECHO_H
#define FISH_BUILTIN_ECHO_H
#include "../maybe.h"
class parser_t;
struct io_streams_t;
maybe_t<int> builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
#endif