mirror of
https://github.com/fish-shell/fish-shell.git
synced 2024-11-29 05:03:46 +08:00
Port echo builtin to Rust
This commit is contained in:
parent
4b85c2f6db
commit
a16e2ecb1b
|
@ -103,7 +103,7 @@ set(FISH_BUILTIN_SRCS
|
|||
src/builtins/bg.cpp src/builtins/bind.cpp src/builtins/block.cpp
|
||||
src/builtins/builtin.cpp src/builtins/cd.cpp src/builtins/command.cpp
|
||||
src/builtins/commandline.cpp src/builtins/complete.cpp src/builtins/contains.cpp
|
||||
src/builtins/disown.cpp src/builtins/echo.cpp src/builtins/emit.cpp
|
||||
src/builtins/disown.cpp src/builtins/emit.cpp
|
||||
src/builtins/eval.cpp src/builtins/exit.cpp src/builtins/fg.cpp
|
||||
src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp
|
||||
src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp
|
||||
|
|
232
fish-rust/src/builtins/echo.rs
Normal file
232
fish-rust/src/builtins/echo.rs
Normal file
|
@ -0,0 +1,232 @@
|
|||
//! Implementation of the echo builtin.
|
||||
|
||||
use libc::c_int;
|
||||
|
||||
use super::shared::{builtin_missing_argument, io_streams_t, STATUS_CMD_OK, STATUS_INVALID_ARGS};
|
||||
use crate::ffi::parser_t;
|
||||
use crate::wchar::{wchar_literal_byte, wstr, WString, L};
|
||||
use crate::wgetopt::{wgetopter_t, woption};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Options {
|
||||
print_newline: bool,
|
||||
print_spaces: bool,
|
||||
interpret_special_chars: bool,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
print_newline: true,
|
||||
print_spaces: true,
|
||||
interpret_special_chars: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_options(
|
||||
args: &mut [&wstr],
|
||||
parser: &mut parser_t,
|
||||
streams: &mut io_streams_t,
|
||||
) -> Result<(Options, usize), Option<c_int>> {
|
||||
let cmd = args[0];
|
||||
|
||||
const SHORT_OPTS: &wstr = L!("+:Eens");
|
||||
const LONG_OPTS: &[woption] = &[];
|
||||
|
||||
let mut opts = Options::default();
|
||||
|
||||
let mut oldopts = opts;
|
||||
let mut oldoptind = 0;
|
||||
|
||||
let mut w = wgetopter_t::new(SHORT_OPTS, LONG_OPTS, args);
|
||||
while let Some(c) = w.wgetopt_long() {
|
||||
match c {
|
||||
'n' => opts.print_newline = false,
|
||||
'e' => opts.interpret_special_chars = true,
|
||||
's' => opts.print_spaces = false,
|
||||
'E' => opts.interpret_special_chars = false,
|
||||
':' => {
|
||||
builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], true);
|
||||
return Err(STATUS_INVALID_ARGS);
|
||||
}
|
||||
'?' => {
|
||||
return Ok((oldopts, w.woptind - 1));
|
||||
}
|
||||
_ => {
|
||||
panic!("unexpected retval from wgetopter::wgetopt_long()");
|
||||
}
|
||||
}
|
||||
|
||||
// Super cheesy: We keep an old copy of the option state around,
|
||||
// so we can revert it in case we get an argument like
|
||||
// "-n foo".
|
||||
// We need to keep it one out-of-date so we can ignore the *last* option.
|
||||
// (this might be an issue in wgetopt, but that's a whole other can of worms
|
||||
// and really only occurs with our weird "put it back" option parsing)
|
||||
if w.woptind == oldoptind + 2 {
|
||||
oldopts = opts;
|
||||
oldoptind = w.woptind;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((opts, w.woptind))
|
||||
}
|
||||
|
||||
/// Parse a numeric escape sequence in `s`, returning the number of characters consumed and the
|
||||
/// resulting value. Supported escape sequences:
|
||||
///
|
||||
/// - `0nnn`: octal value, zero to three digits
|
||||
/// - `nnn`: octal value, one to three digits
|
||||
/// - `xhh`: hex value, one to two digits
|
||||
fn parse_numeric_sequence<I>(chars: I) -> Option<(usize, u8)>
|
||||
where
|
||||
I: IntoIterator<Item = char>,
|
||||
{
|
||||
let mut chars = chars.into_iter().peekable();
|
||||
|
||||
// the first character of the numeric part of the sequence
|
||||
let mut start = 0;
|
||||
|
||||
let mut base: u8 = 0;
|
||||
let mut max_digits = 0;
|
||||
|
||||
let first = *chars.peek()?;
|
||||
if first.is_digit(8) {
|
||||
// Octal escape
|
||||
base = 8;
|
||||
|
||||
// If the first digit is a 0, we allow four digits (including that zero); otherwise, we
|
||||
// allow 3.
|
||||
max_digits = if first == '0' { 4 } else { 3 };
|
||||
} else if first == 'x' {
|
||||
// Hex escape
|
||||
base = 16;
|
||||
max_digits = 2;
|
||||
|
||||
// Skip the x
|
||||
start = 1;
|
||||
};
|
||||
|
||||
if base == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut val = 0;
|
||||
let mut consumed = start;
|
||||
for digit in chars
|
||||
.skip(start)
|
||||
.take(max_digits)
|
||||
.map_while(|c| c.to_digit(base.into()))
|
||||
{
|
||||
// base is either 8 or 16, so digit can never be >255
|
||||
let digit = u8::try_from(digit).unwrap();
|
||||
|
||||
val = val * base + digit;
|
||||
|
||||
consumed += 1;
|
||||
}
|
||||
|
||||
// We succeeded if we consumed at least one digit.
|
||||
if consumed > 0 {
|
||||
Some((consumed, val))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// The echo builtin.
|
||||
///
|
||||
/// Bash only respects `-n` if it's the first argument. We'll do the same. We also support a new,
|
||||
/// fish specific, option `-s` to mean "no spaces".
|
||||
pub fn echo(
|
||||
parser: &mut parser_t,
|
||||
streams: &mut io_streams_t,
|
||||
args: &mut [&wstr],
|
||||
) -> Option<c_int> {
|
||||
let (opts, optind) = match parse_options(args, parser, streams) {
|
||||
Ok((opts, optind)) => (opts, optind),
|
||||
Err(err @ Some(_)) if err != STATUS_CMD_OK => return err,
|
||||
Err(err) => panic!("Illogical exit code from parse_options(): {err:?}"),
|
||||
};
|
||||
|
||||
// The special character \c can be used to indicate no more output.
|
||||
let mut output_stopped = false;
|
||||
|
||||
// We buffer output so we can write in one go,
|
||||
// this matters when writing to an fd.
|
||||
let mut out = WString::new();
|
||||
let args_to_echo = &args[optind..];
|
||||
'outer: for (idx, arg) in args_to_echo.iter().enumerate() {
|
||||
if opts.print_spaces && idx > 0 {
|
||||
out.push(' ');
|
||||
}
|
||||
|
||||
let mut chars = arg.chars().peekable();
|
||||
while let Some(c) = chars.next() {
|
||||
if !opts.interpret_special_chars || c != '\\' {
|
||||
// Not an escape.
|
||||
out.push(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(next_char) = chars.peek() else {
|
||||
// Incomplete escape sequence is echoed verbatim
|
||||
out.push('\\');
|
||||
break;
|
||||
};
|
||||
|
||||
// Most escapes consume one character in addition to the backslash; the numeric
|
||||
// sequences may consume more, while an unrecognized escape sequence consumes none.
|
||||
let mut consumed = 1;
|
||||
|
||||
let escaped = match next_char {
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'e' => '\x1B',
|
||||
'f' => '\x0C',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
'v' => '\x0B',
|
||||
'\\' => '\\',
|
||||
'c' => {
|
||||
output_stopped = true;
|
||||
break 'outer;
|
||||
}
|
||||
_ => {
|
||||
// Octal and hex escape sequences.
|
||||
if let Some((digits_consumed, narrow_val)) =
|
||||
parse_numeric_sequence(chars.clone())
|
||||
{
|
||||
consumed = digits_consumed;
|
||||
// The narrow_val is a literal byte that we want to output (#1894).
|
||||
wchar_literal_byte(narrow_val)
|
||||
} else {
|
||||
consumed = 0;
|
||||
'\\'
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Skip over characters that were part of this escape sequence (after the backslash
|
||||
// that was consumed by the `while` loop).
|
||||
// TODO: `Iterator::advance_by()`: https://github.com/rust-lang/rust/issues/77404
|
||||
for _ in 0..consumed {
|
||||
let _ = chars.next();
|
||||
}
|
||||
|
||||
out.push(escaped);
|
||||
}
|
||||
}
|
||||
|
||||
if opts.print_newline && !output_stopped {
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
if !out.is_empty() {
|
||||
streams.out.append(out);
|
||||
}
|
||||
|
||||
STATUS_CMD_OK
|
||||
}
|
|
@ -1,2 +1,4 @@
|
|||
pub mod shared;
|
||||
|
||||
pub mod echo;
|
||||
pub mod wait;
|
||||
|
|
|
@ -108,6 +108,7 @@ pub fn run_builtin(
|
|||
builtin: RustBuiltin,
|
||||
) -> Option<c_int> {
|
||||
match builtin {
|
||||
RustBuiltin::Echo => super::echo::echo(parser, streams, args),
|
||||
RustBuiltin::Wait => wait::wait(parser, streams, args),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,3 +33,30 @@ pub use widestring_suffix::widestrs;
|
|||
|
||||
/// Pull in our extensions.
|
||||
pub use crate::wchar_ext::{CharPrefixSuffix, WExt};
|
||||
|
||||
// These are in the Unicode private-use range. We really shouldn't use this
|
||||
// range but have little choice in the matter given how our lexer/parser works.
|
||||
// We can't use non-characters for these two ranges because there are only 66 of
|
||||
// them and we need at least 256 + 64.
|
||||
//
|
||||
// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that
|
||||
// would result in fish having different behavior on machines with 16 versus 32
|
||||
// bit wchar_t. It's better that fish behave the same on both types of systems.
|
||||
//
|
||||
// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know
|
||||
// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF)
|
||||
// on Mac OS X. See http://www.unicode.org/faq/private_use.html.
|
||||
const ENCODE_DIRECT_BASE: u32 = 0xF600;
|
||||
const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256;
|
||||
|
||||
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
|
||||
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
|
||||
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
|
||||
/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8
|
||||
/// character.
|
||||
///
|
||||
/// See https://github.com/fish-shell/fish-shell/issues/1894.
|
||||
pub fn wchar_literal_byte(byte: u8) -> char {
|
||||
char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte))
|
||||
.expect("private-use codepoint should be valid char")
|
||||
}
|
||||
|
|
|
@ -41,7 +41,6 @@
|
|||
#include "builtins/complete.h"
|
||||
#include "builtins/contains.h"
|
||||
#include "builtins/disown.h"
|
||||
#include "builtins/echo.h"
|
||||
#include "builtins/emit.h"
|
||||
#include "builtins/eval.h"
|
||||
#include "builtins/exit.h"
|
||||
|
@ -384,7 +383,7 @@ static constexpr builtin_data_t builtin_datas[] = {
|
|||
{L"continue", &builtin_break_continue, N_(L"Skip over remaining innermost loop")},
|
||||
{L"count", &builtin_count, N_(L"Count the number of arguments")},
|
||||
{L"disown", &builtin_disown, N_(L"Remove job from job list")},
|
||||
{L"echo", &builtin_echo, N_(L"Print arguments")},
|
||||
{L"echo", &implemented_in_rust, N_(L"Print arguments")},
|
||||
{L"else", &builtin_generic, N_(L"Evaluate block if condition is false")},
|
||||
{L"emit", &builtin_emit, N_(L"Emit an event")},
|
||||
{L"end", &builtin_generic, N_(L"End a block of commands")},
|
||||
|
@ -529,6 +528,9 @@ const wchar_t *builtin_get_desc(const wcstring &name) {
|
|||
}
|
||||
|
||||
static maybe_t<RustBuiltin> try_get_rust_builtin(const wcstring &cmd) {
|
||||
if (cmd == L"echo") {
|
||||
return RustBuiltin::Echo;
|
||||
}
|
||||
if (cmd == L"wait") {
|
||||
return RustBuiltin::Wait;
|
||||
}
|
||||
|
|
|
@ -109,6 +109,7 @@ int parse_help_only_cmd_opts(help_only_cmd_opts_t &opts, int *optind, int argc,
|
|||
|
||||
/// An enum of the builtins implemented in Rust.
|
||||
enum RustBuiltin : int32_t {
|
||||
Echo,
|
||||
Wait,
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -1,243 +0,0 @@
|
|||
// Implementation of the echo builtin.
|
||||
#include "config.h" // IWYU pragma: keep
|
||||
|
||||
#include "echo.h"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "../builtin.h"
|
||||
#include "../common.h"
|
||||
#include "../fallback.h" // IWYU pragma: keep
|
||||
#include "../io.h"
|
||||
#include "../maybe.h"
|
||||
#include "../wgetopt.h"
|
||||
#include "../wutil.h" // IWYU pragma: keep
|
||||
|
||||
struct echo_cmd_opts_t {
|
||||
bool print_newline = true;
|
||||
bool print_spaces = true;
|
||||
bool interpret_special_chars = false;
|
||||
};
|
||||
static const wchar_t *const short_options = L"+:Eens";
|
||||
static const struct woption *const long_options = nullptr;
|
||||
|
||||
static int parse_cmd_opts(echo_cmd_opts_t &opts, int *optind, int argc, const wchar_t **argv,
|
||||
parser_t &parser, io_streams_t &streams) {
|
||||
UNUSED(parser);
|
||||
UNUSED(streams);
|
||||
const wchar_t *cmd = argv[0];
|
||||
int opt;
|
||||
wgetopter_t w;
|
||||
echo_cmd_opts_t oldopts = opts;
|
||||
int oldoptind = 0;
|
||||
while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) {
|
||||
switch (opt) {
|
||||
case 'n': {
|
||||
opts.print_newline = false;
|
||||
break;
|
||||
}
|
||||
case 'e': {
|
||||
opts.interpret_special_chars = true;
|
||||
break;
|
||||
}
|
||||
case 's': {
|
||||
opts.print_spaces = false;
|
||||
break;
|
||||
}
|
||||
case 'E': {
|
||||
opts.interpret_special_chars = false;
|
||||
break;
|
||||
}
|
||||
case ':': {
|
||||
builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]);
|
||||
return STATUS_INVALID_ARGS;
|
||||
}
|
||||
case '?': {
|
||||
opts = oldopts;
|
||||
*optind = w.woptind - 1;
|
||||
return STATUS_CMD_OK;
|
||||
}
|
||||
default: {
|
||||
DIE("unexpected retval from wgetopt_long");
|
||||
}
|
||||
}
|
||||
|
||||
// Super cheesy: We keep an old copy of the option state around,
|
||||
// so we can revert it in case we get an argument like
|
||||
// "-n foo".
|
||||
// We need to keep it one out-of-date so we can ignore the *last* option.
|
||||
// (this might be an issue in wgetopt, but that's a whole other can of worms
|
||||
// and really only occurs with our weird "put it back" option parsing)
|
||||
if (w.woptind == oldoptind + 2) {
|
||||
oldopts = opts;
|
||||
oldoptind = w.woptind;
|
||||
}
|
||||
}
|
||||
|
||||
*optind = w.woptind;
|
||||
return STATUS_CMD_OK;
|
||||
}
|
||||
|
||||
/// Parse a numeric escape sequence in str, returning whether we succeeded. Also return the number
|
||||
/// of characters consumed and the resulting value. Supported escape sequences:
|
||||
///
|
||||
/// \0nnn: octal value, zero to three digits
|
||||
/// \nnn: octal value, one to three digits
|
||||
/// \xhh: hex value, one to two digits
|
||||
static bool builtin_echo_parse_numeric_sequence(const wchar_t *str, size_t *consumed,
|
||||
unsigned char *out_val) {
|
||||
bool success = false;
|
||||
unsigned int start = 0; // the first character of the numeric part of the sequence
|
||||
|
||||
unsigned int base = 0, max_digits = 0;
|
||||
if (convert_digit(str[0], 8) != -1) {
|
||||
// Octal escape
|
||||
base = 8;
|
||||
|
||||
// If the first digit is a 0, we allow four digits (including that zero); otherwise, we
|
||||
// allow 3.
|
||||
max_digits = (str[0] == L'0' ? 4 : 3);
|
||||
} else if (str[0] == L'x') {
|
||||
// Hex escape
|
||||
base = 16;
|
||||
max_digits = 2;
|
||||
|
||||
// Skip the x
|
||||
start = 1;
|
||||
}
|
||||
|
||||
if (base == 0) {
|
||||
return success;
|
||||
}
|
||||
|
||||
unsigned int idx;
|
||||
unsigned char val = 0; // resulting character
|
||||
for (idx = start; idx < start + max_digits; idx++) {
|
||||
int digit = convert_digit(str[idx], base);
|
||||
if (digit == -1) break;
|
||||
val = val * base + digit;
|
||||
}
|
||||
|
||||
// We succeeded if we consumed at least one digit.
|
||||
if (idx > start) {
|
||||
*consumed = idx;
|
||||
*out_val = val;
|
||||
success = true;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/// The echo builtin.
|
||||
///
|
||||
/// Bash only respects -n if it's the first argument. We'll do the same. We also support a new,
|
||||
/// fish specific, option -s to mean "no spaces".
|
||||
maybe_t<int> builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv) {
|
||||
const wchar_t *cmd = argv[0];
|
||||
UNUSED(cmd);
|
||||
int argc = builtin_count_args(argv);
|
||||
echo_cmd_opts_t opts;
|
||||
int optind;
|
||||
int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams);
|
||||
if (retval != STATUS_CMD_OK) return retval;
|
||||
|
||||
// The special character \c can be used to indicate no more output.
|
||||
bool continue_output = true;
|
||||
|
||||
const wchar_t *const *args_to_echo = argv + optind;
|
||||
// We buffer output so we can write in one go,
|
||||
// this matters when writing to an fd.
|
||||
wcstring out;
|
||||
for (size_t idx = 0; continue_output && args_to_echo[idx] != nullptr; idx++) {
|
||||
if (opts.print_spaces && idx > 0) {
|
||||
out.push_back(' ');
|
||||
}
|
||||
|
||||
const wchar_t *str = args_to_echo[idx];
|
||||
for (size_t j = 0; continue_output && str[j]; j++) {
|
||||
if (!opts.interpret_special_chars || str[j] != L'\\') {
|
||||
// Not an escape.
|
||||
out.push_back(str[j]);
|
||||
} else {
|
||||
// Most escapes consume one character in addition to the backslash; the numeric
|
||||
// sequences may consume more, while an unrecognized escape sequence consumes none.
|
||||
wchar_t wc;
|
||||
size_t consumed = 1;
|
||||
switch (str[j + 1]) {
|
||||
case L'a': {
|
||||
wc = L'\a';
|
||||
break;
|
||||
}
|
||||
case L'b': {
|
||||
wc = L'\b';
|
||||
break;
|
||||
}
|
||||
case L'e': {
|
||||
wc = L'\x1B';
|
||||
break;
|
||||
}
|
||||
case L'f': {
|
||||
wc = L'\f';
|
||||
break;
|
||||
}
|
||||
case L'n': {
|
||||
wc = L'\n';
|
||||
break;
|
||||
}
|
||||
case L'r': {
|
||||
wc = L'\r';
|
||||
break;
|
||||
}
|
||||
case L't': {
|
||||
wc = L'\t';
|
||||
break;
|
||||
}
|
||||
case L'v': {
|
||||
wc = L'\v';
|
||||
break;
|
||||
}
|
||||
case L'\\': {
|
||||
wc = L'\\';
|
||||
break;
|
||||
}
|
||||
case L'c': {
|
||||
wc = 0;
|
||||
continue_output = false;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Octal and hex escape sequences.
|
||||
unsigned char narrow_val = 0;
|
||||
if (builtin_echo_parse_numeric_sequence(str + j + 1, &consumed,
|
||||
&narrow_val)) {
|
||||
// Here consumed must have been set to something. The narrow_val is a
|
||||
// literal byte that we want to output (#1894).
|
||||
wc = ENCODE_DIRECT_BASE + narrow_val % 256;
|
||||
} else {
|
||||
// Not a recognized escape. We consume only the backslash.
|
||||
wc = L'\\';
|
||||
consumed = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip over characters that were part of this escape sequence (but not the
|
||||
// backslash, which will be handled by the loop increment.
|
||||
j += consumed;
|
||||
|
||||
if (continue_output) {
|
||||
out.push_back(wc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (opts.print_newline && continue_output) {
|
||||
out.push_back('\n');
|
||||
}
|
||||
|
||||
if (!out.empty()) {
|
||||
streams.out.append(out);
|
||||
}
|
||||
|
||||
return STATUS_CMD_OK;
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
// Prototypes for executing builtin_echo function.
|
||||
#ifndef FISH_BUILTIN_ECHO_H
|
||||
#define FISH_BUILTIN_ECHO_H
|
||||
|
||||
#include "../maybe.h"
|
||||
|
||||
class parser_t;
|
||||
struct io_streams_t;
|
||||
|
||||
maybe_t<int> builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv);
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user