mirror of
https://github.com/fish-shell/fish-shell.git
synced 2024-11-23 17:06:59 +08:00
Rename tokenizer_t::buff to token_cursor
That should clarify its role vis-a-vis 'start'.
This commit is contained in:
parent
41c42c86e3
commit
eb2386f3e3
|
@ -63,13 +63,13 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token
|
|||
const wchar_t *error_loc, maybe_t<size_t> token_length) {
|
||||
assert(error_type != tokenizer_error_t::none && "tokenizer_error_t::none passed to call_error");
|
||||
assert(error_loc >= token_start && "Invalid error location");
|
||||
assert(this->buff >= token_start && "Invalid buff location");
|
||||
assert(this->token_cursor >= token_start && "Invalid buff location");
|
||||
|
||||
// If continue_after_error is set and we have a real token length, then skip past it.
|
||||
// Otherwise give up.
|
||||
if (token_length.has_value() && continue_after_error) {
|
||||
assert(this->buff < error_loc + *token_length && "Unable to continue past error");
|
||||
this->buff = error_loc + *token_length;
|
||||
assert(this->token_cursor < error_loc + *token_length && "Unable to continue past error");
|
||||
this->token_cursor = error_loc + *token_length;
|
||||
} else {
|
||||
this->has_next = false;
|
||||
}
|
||||
|
@ -78,12 +78,13 @@ tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token
|
|||
result.error = error_type;
|
||||
result.offset = token_start - this->start;
|
||||
// If we are passed a token_length, then use it; otherwise infer it from the buffer.
|
||||
result.length = token_length ? *token_length : this->buff - token_start;
|
||||
result.length = token_length ? *token_length : this->token_cursor - token_start;
|
||||
result.error_offset = error_loc - token_start;
|
||||
return result;
|
||||
}
|
||||
|
||||
tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) : buff(start), start(start) {
|
||||
tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags)
|
||||
: token_cursor(start), start(start) {
|
||||
assert(start != nullptr && "Invalid start");
|
||||
|
||||
this->accept_unfinished = static_cast<bool>(flags & TOK_ACCEPT_UNFINISHED);
|
||||
|
@ -145,11 +146,11 @@ tok_t tokenizer_t::read_string() {
|
|||
std::vector<int> brace_offsets;
|
||||
std::vector<char> expecting;
|
||||
int slice_offset = 0;
|
||||
const wchar_t *const buff_start = this->buff;
|
||||
const wchar_t *const buff_start = this->token_cursor;
|
||||
bool is_first = true;
|
||||
|
||||
while (true) {
|
||||
wchar_t c = *this->buff;
|
||||
wchar_t c = *this->token_cursor;
|
||||
#if false
|
||||
wcstring msg = L"Handling 0x%x (%lc)";
|
||||
tok_mode mode_begin = mode;
|
||||
|
@ -173,22 +174,22 @@ tok_t tokenizer_t::read_string() {
|
|||
else if (c == L'\\') {
|
||||
mode |= tok_modes::char_escape;
|
||||
} else if (c == L'(') {
|
||||
paran_offsets.push_back(this->buff - this->start);
|
||||
paran_offsets.push_back(this->token_cursor - this->start);
|
||||
expecting.push_back(L')');
|
||||
mode |= tok_modes::subshell;
|
||||
} else if (c == L'{') {
|
||||
brace_offsets.push_back(this->buff - this->start);
|
||||
brace_offsets.push_back(this->token_cursor - this->start);
|
||||
expecting.push_back(L'}');
|
||||
mode |= tok_modes::curly_braces;
|
||||
} else if (c == L')') {
|
||||
if (expecting.size() > 0 && expecting.back() == L'}') {
|
||||
return this->call_error(tokenizer_error_t::expected_bclose_found_pclose,
|
||||
this->start, this->buff, 1);
|
||||
this->start, this->token_cursor, 1);
|
||||
}
|
||||
switch (paran_offsets.size()) {
|
||||
case 0:
|
||||
return this->call_error(tokenizer_error_t::closing_unopened_subshell,
|
||||
this->start, this->buff, 1);
|
||||
this->start, this->token_cursor, 1);
|
||||
case 1:
|
||||
mode &= ~(tok_modes::subshell);
|
||||
default:
|
||||
|
@ -198,12 +199,12 @@ tok_t tokenizer_t::read_string() {
|
|||
} else if (c == L'}') {
|
||||
if (expecting.size() > 0 && expecting.back() == L')') {
|
||||
return this->call_error(tokenizer_error_t::expected_pclose_found_bclose,
|
||||
this->start, this->buff, 1);
|
||||
this->start, this->token_cursor, 1);
|
||||
}
|
||||
switch (brace_offsets.size()) {
|
||||
case 0:
|
||||
return this->call_error(tokenizer_error_t::closing_unopened_brace, this->buff,
|
||||
this->start + wcslen(this->start));
|
||||
return this->call_error(tokenizer_error_t::closing_unopened_brace,
|
||||
this->token_cursor, this->start + wcslen(this->start));
|
||||
case 1:
|
||||
mode &= ~(tok_modes::curly_braces);
|
||||
default:
|
||||
|
@ -211,9 +212,9 @@ tok_t tokenizer_t::read_string() {
|
|||
}
|
||||
expecting.pop_back();
|
||||
} else if (c == L'[') {
|
||||
if (this->buff != buff_start) {
|
||||
if (this->token_cursor != buff_start) {
|
||||
mode |= tok_modes::array_brackets;
|
||||
slice_offset = this->buff - this->start;
|
||||
slice_offset = this->token_cursor - this->start;
|
||||
} else {
|
||||
// This is actually allowed so the test operator `[` can be used as the head of a
|
||||
// command
|
||||
|
@ -225,12 +226,12 @@ tok_t tokenizer_t::read_string() {
|
|||
else if (c == L']' && ((mode & tok_modes::array_brackets) == tok_modes::array_brackets)) {
|
||||
mode &= ~(tok_modes::array_brackets);
|
||||
} else if (c == L'\'' || c == L'"') {
|
||||
const wchar_t *end = quote_end(this->buff);
|
||||
const wchar_t *end = quote_end(this->token_cursor);
|
||||
if (end) {
|
||||
this->buff = end;
|
||||
this->token_cursor = end;
|
||||
} else {
|
||||
const wchar_t *error_loc = this->buff;
|
||||
this->buff += std::wcslen(this->buff);
|
||||
const wchar_t *error_loc = this->token_cursor;
|
||||
this->token_cursor += std::wcslen(this->token_cursor);
|
||||
if ((!this->accept_unfinished)) {
|
||||
return this->call_error(tokenizer_error_t::unterminated_quote, buff_start,
|
||||
error_loc);
|
||||
|
@ -250,14 +251,14 @@ tok_t tokenizer_t::read_string() {
|
|||
FLOGF(error, msg.c_str(), c, c, int(mode_begin), int(mode));
|
||||
#endif
|
||||
|
||||
this->buff++;
|
||||
this->token_cursor++;
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
if ((!this->accept_unfinished) && (mode != tok_modes::regular_text)) {
|
||||
if (mode & tok_modes::char_escape) {
|
||||
return this->call_error(tokenizer_error_t::unterminated_escape, buff_start,
|
||||
this->buff - 1, 1);
|
||||
this->token_cursor - 1, 1);
|
||||
} else if (mode & tok_modes::array_brackets) {
|
||||
return this->call_error(tokenizer_error_t::unterminated_slice, buff_start,
|
||||
this->start + slice_offset);
|
||||
|
@ -280,7 +281,7 @@ tok_t tokenizer_t::read_string() {
|
|||
|
||||
tok_t result(token_type_t::string);
|
||||
result.offset = buff_start - this->start;
|
||||
result.length = this->buff - buff_start;
|
||||
result.length = this->token_cursor - buff_start;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -492,25 +493,27 @@ maybe_t<tok_t> tokenizer_t::next() {
|
|||
// it.
|
||||
bool preceding_escaped_nl = false;
|
||||
for (;;) {
|
||||
if (this->buff[0] == L'\\' && this->buff[1] == L'\n') {
|
||||
this->buff += 2;
|
||||
if (this->token_cursor[0] == L'\\' && this->token_cursor[1] == L'\n') {
|
||||
this->token_cursor += 2;
|
||||
this->continue_line_after_comment = true;
|
||||
preceding_escaped_nl = true;
|
||||
} else if (iswspace_not_nl(this->buff[0])) {
|
||||
this->buff++;
|
||||
} else if (iswspace_not_nl(this->token_cursor[0])) {
|
||||
this->token_cursor++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (*this->buff == L'#') {
|
||||
while (*this->token_cursor == L'#') {
|
||||
// We have a comment, walk over the comment.
|
||||
const wchar_t *comment_start = this->buff;
|
||||
while (this->buff[0] != L'\n' && this->buff[0] != L'\0') this->buff++;
|
||||
size_t comment_len = this->buff - comment_start;
|
||||
const wchar_t *comment_start = this->token_cursor;
|
||||
while (this->token_cursor[0] != L'\n' && this->token_cursor[0] != L'\0')
|
||||
this->token_cursor++;
|
||||
size_t comment_len = this->token_cursor - comment_start;
|
||||
|
||||
// If we are going to continue after the comment, skip any trailing newline.
|
||||
if (this->buff[0] == L'\n' && this->continue_line_after_comment) this->buff++;
|
||||
if (this->token_cursor[0] == L'\n' && this->continue_line_after_comment)
|
||||
this->token_cursor++;
|
||||
|
||||
// Maybe return the comment.
|
||||
if (this->show_comments) {
|
||||
|
@ -520,15 +523,15 @@ maybe_t<tok_t> tokenizer_t::next() {
|
|||
result.preceding_escaped_nl = preceding_escaped_nl;
|
||||
return result;
|
||||
}
|
||||
while (iswspace_not_nl(this->buff[0])) this->buff++;
|
||||
while (iswspace_not_nl(this->token_cursor[0])) this->token_cursor++;
|
||||
}
|
||||
|
||||
// We made it past the comments and ate any trailing newlines we wanted to ignore.
|
||||
this->continue_line_after_comment = false;
|
||||
const size_t start_pos = this->buff - this->start;
|
||||
const size_t start_pos = this->token_cursor - this->start;
|
||||
|
||||
maybe_t<tok_t> result{};
|
||||
switch (*this->buff) {
|
||||
switch (*this->token_cursor) {
|
||||
case L'\0': {
|
||||
this->has_next = false;
|
||||
return none();
|
||||
|
@ -539,60 +542,60 @@ maybe_t<tok_t> tokenizer_t::next() {
|
|||
result.emplace(token_type_t::end);
|
||||
result->offset = start_pos;
|
||||
result->length = 1;
|
||||
this->buff++;
|
||||
this->token_cursor++;
|
||||
// Hack: when we get a newline, swallow as many as we can. This compresses multiple
|
||||
// subsequent newlines into a single one.
|
||||
if (!this->show_blank_lines) {
|
||||
while (*this->buff == L'\n' || *this->buff == 13 /* CR */ || *this->buff == ' ' ||
|
||||
*this->buff == '\t') {
|
||||
this->buff++;
|
||||
while (*this->token_cursor == L'\n' || *this->token_cursor == 13 /* CR */ ||
|
||||
*this->token_cursor == ' ' || *this->token_cursor == '\t') {
|
||||
this->token_cursor++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'&': {
|
||||
if (this->buff[1] == L'&') {
|
||||
if (this->token_cursor[1] == L'&') {
|
||||
// && is and.
|
||||
result.emplace(token_type_t::andand);
|
||||
result->offset = start_pos;
|
||||
result->length = 2;
|
||||
this->buff += 2;
|
||||
} else if (this->buff[1] == L'>' || this->buff[1] == L'|') {
|
||||
this->token_cursor += 2;
|
||||
} else if (this->token_cursor[1] == L'>' || this->token_cursor[1] == L'|') {
|
||||
// &> and &| redirect both stdout and stderr.
|
||||
auto redir = pipe_or_redir_t::from_string(buff);
|
||||
auto redir = pipe_or_redir_t::from_string(this->token_cursor);
|
||||
assert(redir.has_value() &&
|
||||
"Should always succeed to parse a &> or &| redirection");
|
||||
result.emplace(redir->token_type());
|
||||
result->offset = start_pos;
|
||||
result->length = redir->consumed;
|
||||
this->buff += redir->consumed;
|
||||
this->token_cursor += redir->consumed;
|
||||
} else {
|
||||
result.emplace(token_type_t::background);
|
||||
result->offset = start_pos;
|
||||
result->length = 1;
|
||||
this->buff++;
|
||||
this->token_cursor++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case L'|': {
|
||||
if (this->buff[1] == L'|') {
|
||||
if (this->token_cursor[1] == L'|') {
|
||||
// || is or.
|
||||
result.emplace(token_type_t::oror);
|
||||
result->offset = start_pos;
|
||||
result->length = 2;
|
||||
this->buff += 2;
|
||||
} else if (this->buff[1] == L'&') {
|
||||
this->token_cursor += 2;
|
||||
} else if (this->token_cursor[1] == L'&') {
|
||||
// |& is a bashism; in fish it's &|.
|
||||
return this->call_error(tokenizer_error_t::invalid_pipe_ampersand, this->buff,
|
||||
this->buff, 2);
|
||||
return this->call_error(tokenizer_error_t::invalid_pipe_ampersand,
|
||||
this->token_cursor, this->token_cursor, 2);
|
||||
} else {
|
||||
auto pipe = pipe_or_redir_t::from_string(buff);
|
||||
auto pipe = pipe_or_redir_t::from_string(this->token_cursor);
|
||||
assert(pipe.has_value() && pipe->is_pipe &&
|
||||
"Should always succeed to parse a | pipe");
|
||||
result.emplace(pipe->token_type());
|
||||
result->offset = start_pos;
|
||||
result->length = pipe->consumed;
|
||||
this->buff += pipe->consumed;
|
||||
this->token_cursor += pipe->consumed;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -601,23 +604,24 @@ maybe_t<tok_t> tokenizer_t::next() {
|
|||
// There's some duplication with the code in the default case below. The key
|
||||
// difference here is that we must never parse these as a string; a failed
|
||||
// redirection is an error!
|
||||
auto redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
|
||||
auto redir_or_pipe = pipe_or_redir_t::from_string(this->token_cursor);
|
||||
if (!redir_or_pipe || redir_or_pipe->fd < 0) {
|
||||
return this->call_error(tokenizer_error_t::invalid_redirect, this->buff, this->buff,
|
||||
return this->call_error(tokenizer_error_t::invalid_redirect, this->token_cursor,
|
||||
this->token_cursor,
|
||||
redir_or_pipe ? redir_or_pipe->consumed : 0);
|
||||
}
|
||||
result.emplace(redir_or_pipe->token_type());
|
||||
result->offset = start_pos;
|
||||
result->length = redir_or_pipe->consumed;
|
||||
this->buff += redir_or_pipe->consumed;
|
||||
this->token_cursor += redir_or_pipe->consumed;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string.
|
||||
const wchar_t *error_location = this->buff;
|
||||
const wchar_t *error_location = this->token_cursor;
|
||||
maybe_t<pipe_or_redir_t> redir_or_pipe{};
|
||||
if (iswdigit(*this->buff) || (*this->buff == L'^' && caret_redirs())) {
|
||||
redir_or_pipe = pipe_or_redir_t::from_string(this->buff);
|
||||
if (iswdigit(*this->token_cursor) || (*this->token_cursor == L'^' && caret_redirs())) {
|
||||
redir_or_pipe = pipe_or_redir_t::from_string(this->token_cursor);
|
||||
}
|
||||
|
||||
if (redir_or_pipe) {
|
||||
|
@ -631,7 +635,7 @@ maybe_t<tok_t> tokenizer_t::next() {
|
|||
result.emplace(redir_or_pipe->token_type());
|
||||
result->offset = start_pos;
|
||||
result->length = redir_or_pipe->consumed;
|
||||
this->buff += redir_or_pipe->consumed;
|
||||
this->token_cursor += redir_or_pipe->consumed;
|
||||
} else {
|
||||
// Not a redirection or pipe, so just a string.
|
||||
result = this->read_string();
|
||||
|
|
|
@ -103,7 +103,7 @@ class tokenizer_t {
|
|||
void operator=(const tokenizer_t &) = delete;
|
||||
|
||||
/// A pointer into the original string, showing where the next token begins.
|
||||
const wchar_t *buff;
|
||||
const wchar_t *token_cursor;
|
||||
/// The start of the original string.
|
||||
const wchar_t *const start;
|
||||
/// Whether we have additional tokens.
|
||||
|
|
Loading…
Reference in New Issue
Block a user