mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-14 03:22:46 +08:00
![Johannes Altmanninger](/assets/img/avatar_default.png)
For compound commands we already have begin/end but > it is long, which it is not convenient for the command line > it is different than {} which shell users have been using for >50 years The difference from {} can break muscle memory and add extra steps when I'm trying to write simple commands that work in any shell. Fix that by embracing the traditional style too. --- Since { and } have always been special syntax in fish, we can also allow { } { echo } which I find intuitive even without having used a shell that supports this (like zsh. The downside is that this doesn't work in some other shells. The upside is in aesthetics and convenience (this is for interactive use). Not completely sure about this. --- This implementation adds a hack to the tokenizer: '{' is usually a brace expansion. Make it compound command when in command position (not something the tokenizer would normally know). We need to disable this when parsing a freestanding argument lists (in "complete somecmd -a "{true,false}"). It's not really clear what "read -t" should do. For now, keep the existing behavior (don't parse compound statements). Add another hack to increase backwards compatibility: parse something like "{ foo }" as brace statement only if it has a space after the opening brace. This style is less likely to be used for brace expansion. Perhaps we can change this in future (I'll make a PR). Use separate terminal token types for braces; we could make the left brace an ordinary string token but since string tokens undergo unescaping during expansion etc., every such place would need to know whether it's dealing with a command or an argument. Certainly possible but it seems simpler (especially for tab-completions) to strip braces in the parser. We could change this. --- In future we could allow the following alternative syntax (which is invalid today). if true { } if true; { } Closes #10895 Closes #10898
159 lines
5.5 KiB
Python
159 lines
5.5 KiB
Python
# Pygments lexer for a fish command synopsis.
|
|
#
|
|
# Example usage:
|
|
# echo 'string match [OPTIONS] [STRING]' | pygmentize -f terminal256 -l doc_src/fish_synopsis.py:FishSynopsisLexer -x
|
|
|
|
from docutils import nodes
|
|
from pygments.lexer import Lexer
|
|
from pygments.token import (
|
|
Generic,
|
|
Name,
|
|
Operator,
|
|
Punctuation,
|
|
Text,
|
|
)
|
|
import re
|
|
from sphinx.directives.code import CodeBlock
|
|
|
|
|
|
class FishSynopsisDirective(CodeBlock):
|
|
"""A custom directive that describes a command's grammar."""
|
|
|
|
has_content = True
|
|
required_arguments = 0
|
|
|
|
def run(self):
|
|
if self.env.app.builder.name != "man":
|
|
self.arguments = ["fish-synopsis"]
|
|
return CodeBlock.run(self)
|
|
lexer = FishSynopsisLexer()
|
|
result = nodes.line_block()
|
|
for start, tok, text in lexer.get_tokens_unprocessed("\n".join(self.content)):
|
|
if ( # Literal text.
|
|
(tok in (Name.Function, Name.Constant) and not text.isupper())
|
|
or text.startswith("-") # Literal option, even if it's uppercase.
|
|
or tok in (Operator, Punctuation)
|
|
or text
|
|
== " ]" # Tiny hack: the closing bracket of the test(1) alias is a literal.
|
|
):
|
|
node = nodes.strong(text=text)
|
|
elif (
|
|
tok in (Name.Constant, Name.Function) and text.isupper()
|
|
): # Placeholder parameter.
|
|
node = nodes.emphasis(text=text)
|
|
else: # Grammar metacharacter or whitespace.
|
|
node = nodes.inline(text=text)
|
|
result.append(node)
|
|
return [result]
|
|
|
|
|
|
lexer_rules = [
|
|
(re.compile(pattern), token)
|
|
for pattern, token in (
|
|
# Hack: treat the "[ expr ]" alias of builtin test as command token (not as grammar
|
|
# metacharacter). This works because we write it without spaces in the grammar (like
|
|
# "[OPTIONS]").
|
|
(r"\. |! |\[ | \]|\{ | \}", Name.Constant),
|
|
# Statement separators.
|
|
(r"\n", Text.Whitespace),
|
|
(r";", Punctuation),
|
|
(r" +", Text.Whitespace),
|
|
# Operators have different highlighting than commands or parameters.
|
|
(r"\b(and|not|or|time)\b", Operator),
|
|
# Keywords that are not in command position.
|
|
(r"\b(if|in)\b", Name.Function),
|
|
# Grammar metacharacters.
|
|
(r"[()[\]|]", Generic.Other),
|
|
(r"\.\.\.", Generic.Other),
|
|
# Parameters.
|
|
(r"[\w-]+", Name.Constant),
|
|
(r"[=%]", Name.Constant),
|
|
(
|
|
r"[<>]",
|
|
Name.Constant,
|
|
), # Redirection are highlighted like parameters by default.
|
|
)
|
|
]
|
|
|
|
|
|
class FishSynopsisLexer(Lexer):
|
|
name = "FishSynopsisLexer"
|
|
aliases = ["fish-synopsis"]
|
|
|
|
is_before_command_token = None
|
|
|
|
def next_token(self, rule: str, offset: int, has_continuation_line: bool):
|
|
for pattern, token_kind in lexer_rules:
|
|
m = pattern.match(rule, pos=offset)
|
|
if m is None:
|
|
continue
|
|
if token_kind is Name.Constant and self.is_before_command_token:
|
|
token_kind = Name.Function
|
|
|
|
if has_continuation_line:
|
|
# Traditional case: rules with continuation lines only have a single command.
|
|
self.is_before_command_token = False
|
|
else:
|
|
if m.group() in ("\n", ";") or token_kind is Operator:
|
|
self.is_before_command_token = True
|
|
elif token_kind in (Name.Constant, Name.Function):
|
|
self.is_before_command_token = False
|
|
|
|
return m, token_kind, m.end()
|
|
return None, None, offset
|
|
|
|
def get_tokens_unprocessed(self, input_text):
|
|
"""Return a list of (start, tok, value) tuples.
|
|
|
|
start is the index into the string
|
|
tok is the token type (as above)
|
|
value is the string contents of the token
|
|
"""
|
|
"""
|
|
A synopsis consists of multiple rules. Each rule can have continuation lines, which
|
|
are expected to be indented:
|
|
|
|
cmd foo [--quux]
|
|
[ARGUMENT] ...
|
|
cmd bar
|
|
|
|
We'll split the input into rules. This is easy for a traditional synopsis because each
|
|
non-indented line starts a new rule. However, we also want to support code blocks:
|
|
|
|
switch VALUE
|
|
[case [GLOB ...]
|
|
[COMMAND ...]]
|
|
end
|
|
|
|
which makes this format ambiguous. Hack around this by always adding "end" to the
|
|
current rule, which is enough in practice.
|
|
"""
|
|
rules = []
|
|
rule = []
|
|
for line in list(input_text.splitlines()) + [""]:
|
|
if rule and not line.startswith(" "):
|
|
rules.append(rule)
|
|
rule = []
|
|
if line == "end":
|
|
rules[-1].append(line)
|
|
continue
|
|
rule.append(line)
|
|
result = []
|
|
for rule in rules:
|
|
offset = 0
|
|
self.is_before_command_token = True
|
|
has_continuation_line = rule[-1].startswith(" ")
|
|
rule = "\n".join(rule) + "\n"
|
|
while True:
|
|
match, token_kind, offset = self.next_token(
|
|
rule, offset, has_continuation_line
|
|
)
|
|
if match is None:
|
|
break
|
|
text = match.group()
|
|
result.append((match.start(), token_kind, text))
|
|
assert offset == len(rule), "cannot tokenize leftover text: '{}'".format(
|
|
rule[offset:]
|
|
)
|
|
return result
|