fish-shell/doc_src/fish_indent_lexer.py

# This is a plugin for pygments that shells out to fish_indent.

# Example of how to use this:
# env PATH="/dir/containing/fish/indent/:$PATH" pygmentize -f terminal256 -l /path/to/fish_indent_lexer.py:FishIndentLexer -x ~/test.fish

import os
from pygments.lexer import Lexer
from pygments.token import (
    Keyword,
    Name,
    Comment,
    String,
    Error,
    Number,
    Operator,
    Other,
    Generic,
    Whitespace,
    String,
    Text,
    Punctuation,
)
import re
import subprocess

# The token type representing output to the console.
OUTPUT_TOKEN = Text

# A fallback token type.
DEFAULT = Text

# Mapping from fish token types to Pygments types.
ROLE_TO_TOKEN = {
    "normal": Name.Variable,
    "error": Generic.Error,
    "command": Name.Function,
    "keyword": Keyword,
    "statement_terminator": Punctuation,
    "param": Name.Constant,
    "comment": Comment,
    "match": DEFAULT,
    "search_match": DEFAULT,
    "operat": Operator,
    "escape": String.Escape,
    "quote": String.Single,  # note, may be changed to double dynamically
    "redirection": Punctuation,  # ?
    "autosuggestion": Other,  # in practice won't be generated
    "selection": DEFAULT,
    "pager_progress": DEFAULT,
    "pager_background": DEFAULT,
    "pager_prefix": DEFAULT,
    "pager_completion": DEFAULT,
    "pager_description": DEFAULT,
    "pager_secondary_background": DEFAULT,
    "pager_secondary_prefix": DEFAULT,
    "pager_secondary_completion": DEFAULT,
    "pager_secondary_description": DEFAULT,
    "pager_selected_background": DEFAULT,
    "pager_selected_prefix": DEFAULT,
    "pager_selected_completion": DEFAULT,
    "pager_selected_description": DEFAULT,
}


def token_for_text_and_role(text, role):
    """Return the pygments token for some input text and a fish role

    This applies any special cases of ROLE_TO_TOKEN.
    """
    if text.isspace():
        # Here fish will return 'normal' or 'statement_terminator' for newline.
        return Text.Whitespace
    elif role == "quote":
        # Check for single or double.
        return String.Single if text.startswith("'") else String.Double
    else:
        return ROLE_TO_TOKEN[role]


def tokenize_fish_command(code, offset):
    """Tokenize some fish code, offset in a parent string, by shelling
    out to fish_indent.

    fish_indent will output a list of csv lines: start,end,type.

    This function returns a list of (start, tok, value) tuples, as
    Pygments expects.
    """
    proc = subprocess.Popen(
        ["fish_indent", "--pygments"],
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        universal_newlines=False,
    )
    stdout, _ = proc.communicate(code.encode("utf-8"))
    result = []
    for line in stdout.decode("utf-8").splitlines():
        start, end, role = line.split(",")
        start, end = int(start), int(end)
        value = code[start:end]
        tok = token_for_text_and_role(value, role)
        result.append((start + offset, tok, value))
    return result


class FishIndentLexer(Lexer):
    name = "FishIndentLexer"
    aliases = ["fish", "fish-docs-samples"]
    filenames = ["*.fish"]

    def get_tokens_unprocessed(self, input_text):
        """Return a list of (start, tok, value) tuples.

        start is the index into the string
        tok is the token type (as above)
        value is the string contents of the token
        """
        result = []
        if not any(s.startswith(">") for s in input_text.splitlines()):
            # No prompt, just tokenize everything.
            result = tokenize_fish_command(input_text, 0)
        else:
            # We have a prompt line.
            # Use a regexp because it will maintain string indexes for us.
            regex = re.compile(r"^(>_?\s*)?(.*\n?)", re.MULTILINE)
            for m in regex.finditer(input_text):
                if m.group(1):
                    # Prompt line; highlight via fish syntax.
                    result.append((m.start(1), Generic.Prompt, m.group(1)))
                    result.extend(tokenize_fish_command(m.group(2), m.start(2)))
                else:
                    # Non-prompt line representing output from a command.
                    result.append((m.start(2), OUTPUT_TOKEN, m.group(2)))
        return result
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`# This is a plugin for pygments that shells out to fish_indent.`

			`# Example of how to use this:`
			`# env PATH="/dir/containing/fish/indent/:$PATH" pygmentize -f terminal256 -l /path/to/fish_indent_lexer.py:FishIndentLexer -x ~/test.fish`

			`import os`
			`from pygments.lexer import Lexer`
			`from pygments.token import (`
			`Keyword,`
			`Name,`
			`Comment,`
			`String,`
			`Error,`
			`Number,`
			`Operator,`
			`Other,`
			`Generic,`
			`Whitespace,`
			`String,`
			`Text,`
			`Punctuation,`
			`)`
			`import re`
			`import subprocess`

			`# The token type representing output to the console.`
			`OUTPUT_TOKEN = Text`

			`# A fallback token type.`
			`DEFAULT = Text`

			`# Mapping from fish token types to Pygments types.`
			`ROLE_TO_TOKEN = {`
			`"normal": Name.Variable,`
			`"error": Generic.Error,`
			`"command": Name.Function,`
Highlight keywords differently This introduces a new variable $fish_color_keyword that will be used to highlight keywords. If it's not defined, we fall back on $fish_color_command as before. An issue here is that most of our keywords have this weird duality of also being builtins if executed without an argument or with `--help`. This means that e.g. if is highlighted as a command until you start typing if t and then it turns keyword. 2021-02-05 03:22:32 +08:00			`"keyword": Keyword,`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`"statement_terminator": Punctuation,`
			`"param": Name.Constant,`
			`"comment": Comment,`
			`"match": DEFAULT,`
			`"search_match": DEFAULT,`
			`"operat": Operator,`
			`"escape": String.Escape,`
			`"quote": String.Single, # note, may be changed to double dynamically`
			`"redirection": Punctuation, # ?`
			`"autosuggestion": Other, # in practice won't be generated`
			`"selection": DEFAULT,`
			`"pager_progress": DEFAULT,`
			`"pager_background": DEFAULT,`
			`"pager_prefix": DEFAULT,`
			`"pager_completion": DEFAULT,`
			`"pager_description": DEFAULT,`
			`"pager_secondary_background": DEFAULT,`
			`"pager_secondary_prefix": DEFAULT,`
			`"pager_secondary_completion": DEFAULT,`
			`"pager_secondary_description": DEFAULT,`
			`"pager_selected_background": DEFAULT,`
			`"pager_selected_prefix": DEFAULT,`
			`"pager_selected_completion": DEFAULT,`
			`"pager_selected_description": DEFAULT,`
			`}`


			`def token_for_text_and_role(text, role):`
Reformat 2020-11-22 21:39:48 +08:00			`"""Return the pygments token for some input text and a fish role`

			`This applies any special cases of ROLE_TO_TOKEN.`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`"""`
			`if text.isspace():`
			`# Here fish will return 'normal' or 'statement_terminator' for newline.`
			`return Text.Whitespace`
			`elif role == "quote":`
			`# Check for single or double.`
			`return String.Single if text.startswith("'") else String.Double`
			`else:`
			`return ROLE_TO_TOKEN[role]`


			`def tokenize_fish_command(code, offset):`
Reformat 2020-11-22 21:39:48 +08:00			`"""Tokenize some fish code, offset in a parent string, by shelling`
			`out to fish_indent.`

			`fish_indent will output a list of csv lines: start,end,type.`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00
Reformat 2020-11-22 21:39:48 +08:00			`This function returns a list of (start, tok, value) tuples, as`
			`Pygments expects.`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`"""`
			`proc = subprocess.Popen(`
			`["fish_indent", "--pygments"],`
			`stdin=subprocess.PIPE,`
			`stdout=subprocess.PIPE,`
fish_indent_lexer: explicitly encode/decode bytes over pipe Universal newlines behaves differently between Python 2.7 and 3.x, leading to problems when running Sphinx with Python 2.7. fish_indent always uses \n, so there's no need to use universal newline detection. This also allows full UTF-8 in documentation sources. Closes #5808. 2019-04-12 23:07:41 +08:00			`universal_newlines=False,`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`)`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`stdout, _ = proc.communicate(code.encode("utf-8"))`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`result = []`
Reformat all files This runs build_tools/style.fish, which runs clang-format on C++, fish_indent on fish and (new) black on python. If anything is wrong with the formatting, we should fix the tools, but automated formatting is worth it. 2019-05-05 18:09:25 +08:00			`for line in stdout.decode("utf-8").splitlines():`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`start, end, role = line.split(",")`
			`start, end = int(start), int(end)`
			`value = code[start:end]`
			`tok = token_for_text_and_role(value, role)`
			`result.append((start + offset, tok, value))`
			`return result`


			`class FishIndentLexer(Lexer):`
			`name = "FishIndentLexer"`
			`aliases = ["fish", "fish-docs-samples"]`
			`filenames = ["*.fish"]`

			`def get_tokens_unprocessed(self, input_text):`
Reformat 2020-11-22 21:39:48 +08:00			`"""Return a list of (start, tok, value) tuples.`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00
Reformat 2020-11-22 21:39:48 +08:00			`start is the index into the string`
			`tok is the token type (as above)`
			`value is the string contents of the token`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`"""`
			`result = []`
			`if not any(s.startswith(">") for s in input_text.splitlines()):`
			`# No prompt, just tokenize everything.`
			`result = tokenize_fish_command(input_text, 0)`
			`else:`
			`# We have a prompt line.`
			`# Use a regexp because it will maintain string indexes for us.`
docs/fish_indent_lexer: Support `>_` as prompt indicator This was widely used in the old docs, and currently it just allowed `>`. See #5696. [ci skip] 2019-04-30 18:16:41 +08:00			`regex = re.compile(r"^(>_?\s)?(.\n?)", re.MULTILINE)`
Add fish_indent_lexer.py This is a pygments lexer that shells out to fish_indent 2019-03-13 14:42:36 +08:00			`for m in regex.finditer(input_text):`
			`if m.group(1):`
			`# Prompt line; highlight via fish syntax.`
			`result.append((m.start(1), Generic.Prompt, m.group(1)))`
			`result.extend(tokenize_fish_command(m.group(2), m.start(2)))`
			`else:`
			`# Non-prompt line representing output from a command.`
			`result.append((m.start(2), OUTPUT_TOKEN, m.group(2)))`
			`return result`