mirror of
https://github.com/fish-shell/fish-shell.git
synced 2024-11-23 18:30:20 +08:00
1152 lines
32 KiB
Python
Executable File
1152 lines
32 KiB
Python
Executable File
# -*- coding: utf-8 -*-
|
||
|
||
""" Deroff.py, ported to Python from the venerable deroff.c """
|
||
|
||
import sys, re, string
|
||
|
||
IS_PY3 = sys.version_info[0] >= 3
|
||
|
||
|
||
class Deroffer:
|
||
|
||
g_specs_specletter = {
|
||
# Output composed latin1 letters
|
||
"-D": "\320",
|
||
"Sd": "\360",
|
||
"Tp": "\376",
|
||
"TP": "\336",
|
||
"AE": "\306",
|
||
"ae": "\346",
|
||
"OE": "OE",
|
||
"oe": "oe",
|
||
":a": "\344",
|
||
":A": "\304",
|
||
":e": "\353",
|
||
":E": "\313",
|
||
":i": "\357",
|
||
":I": "\317",
|
||
":o": "\366",
|
||
":O": "\326",
|
||
":u": "\374",
|
||
":U": "\334",
|
||
":y": "\377",
|
||
"ss": "\337",
|
||
"'A": "\301",
|
||
"'E": "\311",
|
||
"'I": "\315",
|
||
"'O": "\323",
|
||
"'U": "\332",
|
||
"'Y": "\335",
|
||
"'a": "\341",
|
||
"'e": "\351",
|
||
"'i": "\355",
|
||
"'o": "\363",
|
||
"'u": "\372",
|
||
"'y": "\375",
|
||
"^A": "\302",
|
||
"^E": "\312",
|
||
"^I": "\316",
|
||
"^O": "\324",
|
||
"^U": "\333",
|
||
"^a": "\342",
|
||
"^e": "\352",
|
||
"^i": "\356",
|
||
"^o": "\364",
|
||
"^u": "\373",
|
||
"`A": "\300",
|
||
"`E": "\310",
|
||
"`I": "\314",
|
||
"`O": "\322",
|
||
"`U": "\331",
|
||
"`a": "\340",
|
||
"`e": "\350",
|
||
"`i": "\354",
|
||
"`o": "\362",
|
||
"`u": "\371",
|
||
"~A": "\303",
|
||
"~N": "\321",
|
||
"~O": "\325",
|
||
"~a": "\343",
|
||
"~n": "\361",
|
||
"~o": "\365",
|
||
",C": "\307",
|
||
",c": "\347",
|
||
"/l": "/l",
|
||
"/L": "/L",
|
||
"/o": "\370",
|
||
"/O": "\330",
|
||
"oA": "\305",
|
||
"oa": "\345",
|
||
# Ligatures
|
||
"fi": "fi",
|
||
"ff": "ff",
|
||
"fl": "fl",
|
||
"Fi": "ffi",
|
||
"Ff": "fff",
|
||
"Fl": "ffl",
|
||
}
|
||
|
||
g_specs = {
|
||
"mi": "-",
|
||
"en": "-",
|
||
"hy": "-",
|
||
"em": "--",
|
||
"lq": "“",
|
||
"rq": "”",
|
||
"Bq": ",,",
|
||
"oq": "`",
|
||
"cq": "'",
|
||
"aq": "'",
|
||
"dq": '"',
|
||
"or": "|",
|
||
"at": "@",
|
||
"sh": "#",
|
||
"Eu": "\244",
|
||
"eu": "\244",
|
||
"Do": "$",
|
||
"ct": "\242",
|
||
"Fo": "\253",
|
||
"Fc": "\273",
|
||
"fo": "<",
|
||
"fc": ">",
|
||
"r!": "\241",
|
||
"r?": "\277",
|
||
"Of": "\252",
|
||
"Om": "\272",
|
||
"pc": "\267",
|
||
"S1": "\271",
|
||
"S2": "\262",
|
||
"S3": "\263",
|
||
"<-": "<-",
|
||
"->": "->",
|
||
"<>": "<->",
|
||
"ua": "^",
|
||
"da": "v",
|
||
"lA": "<=",
|
||
"rA": "=>",
|
||
"hA": "<=>",
|
||
"uA": "^^",
|
||
"dA": "vv",
|
||
"ba": "|",
|
||
"bb": "|",
|
||
"br": "|",
|
||
"bv": "|",
|
||
"ru": "_",
|
||
"ul": "_",
|
||
"ci": "O",
|
||
"bu": "o",
|
||
"co": "\251",
|
||
"rg": "\256",
|
||
"tm": "(TM)",
|
||
"dd": "||",
|
||
"dg": "|",
|
||
"ps": "\266",
|
||
"sc": "\247",
|
||
"de": "\260",
|
||
"%0": "0/00",
|
||
"14": "\274",
|
||
"12": "\275",
|
||
"34": "\276",
|
||
"f/": "/",
|
||
"sl": "/",
|
||
"rs": "\\",
|
||
"sq": "[]",
|
||
"fm": "'",
|
||
"ha": "^",
|
||
"ti": "~",
|
||
"lB": "[",
|
||
"rB": "]",
|
||
"lC": "{",
|
||
"rC": "}",
|
||
"la": "<",
|
||
"ra": ">",
|
||
"lh": "<=",
|
||
"rh": "=>",
|
||
"tf": "therefore",
|
||
"~~": "~~",
|
||
"~=": "~=",
|
||
"!=": "!=",
|
||
"**": "*",
|
||
"+-": "\261",
|
||
"<=": "<=",
|
||
"==": "==",
|
||
"=~": "=~",
|
||
">=": ">=",
|
||
"AN": "\\/",
|
||
"OR": "/\\",
|
||
"no": "\254",
|
||
"te": "there exists",
|
||
"fa": "for all",
|
||
"Ah": "aleph",
|
||
"Im": "imaginary",
|
||
"Re": "real",
|
||
"if": "infinity",
|
||
"md": "\267",
|
||
"mo": "member of",
|
||
"mu": "\327",
|
||
"nm": "not member of",
|
||
"pl": "+",
|
||
"eq": "=",
|
||
"pt": "oc",
|
||
"pp": "perpendicular",
|
||
"sb": "(=",
|
||
"sp": "=)",
|
||
"ib": "(-",
|
||
"ip": "-)",
|
||
"ap": "~",
|
||
"is": "I",
|
||
"sr": "root",
|
||
"pd": "d",
|
||
"c*": "(x)",
|
||
"c+": "(+)",
|
||
"ca": "cap",
|
||
"cu": "U",
|
||
"di": "\367",
|
||
"gr": "V",
|
||
"es": "{}",
|
||
"CR": "_|",
|
||
"st": "such that",
|
||
"/_": "/_",
|
||
"lz": "<>",
|
||
"an": "-",
|
||
# Output Greek
|
||
"*A": "Alpha",
|
||
"*B": "Beta",
|
||
"*C": "Xi",
|
||
"*D": "Delta",
|
||
"*E": "Epsilon",
|
||
"*F": "Phi",
|
||
"*G": "Gamma",
|
||
"*H": "Theta",
|
||
"*I": "Iota",
|
||
"*K": "Kappa",
|
||
"*L": "Lambda",
|
||
"*M": "Mu",
|
||
"*N": "Nu",
|
||
"*O": "Omicron",
|
||
"*P": "Pi",
|
||
"*Q": "Psi",
|
||
"*R": "Rho",
|
||
"*S": "Sigma",
|
||
"*T": "Tau",
|
||
"*U": "Upsilon",
|
||
"*W": "Omega",
|
||
"*X": "Chi",
|
||
"*Y": "Eta",
|
||
"*Z": "Zeta",
|
||
"*a": "alpha",
|
||
"*b": "beta",
|
||
"*c": "xi",
|
||
"*d": "delta",
|
||
"*e": "epsilon",
|
||
"*f": "phi",
|
||
"+f": "phi",
|
||
"*g": "gamma",
|
||
"*h": "theta",
|
||
"+h": "theta",
|
||
"*i": "iota",
|
||
"*k": "kappa",
|
||
"*l": "lambda",
|
||
"*m": "\265",
|
||
"*n": "nu",
|
||
"*o": "omicron",
|
||
"*p": "pi",
|
||
"+p": "omega",
|
||
"*q": "psi",
|
||
"*r": "rho",
|
||
"*s": "sigma",
|
||
"*t": "tau",
|
||
"*u": "upsilon",
|
||
"*w": "omega",
|
||
"*x": "chi",
|
||
"*y": "eta",
|
||
"*z": "zeta",
|
||
"ts": "sigma",
|
||
}
|
||
|
||
g_re_word = re.compile(r"[a-zA-Z_]+") # equivalent to the word() method
|
||
g_re_number = re.compile(r"[+-]?\d+") # equivalent to the number() method
|
||
g_re_esc_char = re.compile(
|
||
r"""([a-zA-Z_]) | # Word
|
||
([+-]?\d) | # Number
|
||
\\ # Backslash (for escape seq)
|
||
""",
|
||
re.VERBOSE,
|
||
)
|
||
|
||
g_re_not_backslash_or_whitespace = re.compile(
|
||
r"[^ \t\n\r\f\v\\]+"
|
||
) # Match a sequence of not backslash or whitespace
|
||
|
||
g_re_newline_collapse = re.compile(r"\n{3,}")
|
||
|
||
g_re_font = re.compile(
|
||
r"""\\f( # Starts with backslash f
|
||
(\(\S{2}) | # Open paren, then two printable chars
|
||
(\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket
|
||
\S) # Any printable character
|
||
""",
|
||
re.VERBOSE,
|
||
)
|
||
|
||
# This gets filled in in __init__ below
|
||
g_macro_dict = False
|
||
|
||
def __init__(self):
|
||
self.reg_table = {}
|
||
self.tr_from = ""
|
||
self.tr_to = ""
|
||
self.tr = ""
|
||
self.nls = 2
|
||
self.specletter = False
|
||
self.refer = False
|
||
self.macro = 0
|
||
self.nobody = False
|
||
self.inlist = False
|
||
self.inheader = False
|
||
self.pic = False
|
||
self.tbl = False
|
||
self.tblstate = 0
|
||
self.tblTab = ""
|
||
self.eqn = False
|
||
self.skipheaders = False
|
||
self.skiplists = False
|
||
self.ignore_sonx = False
|
||
self.output = []
|
||
self.name = ""
|
||
|
||
self.OPTIONS = 0
|
||
self.FORMAT = 1
|
||
self.DATA = 2
|
||
|
||
# words is uninteresting and should be treated as false
|
||
|
||
if not Deroffer.g_macro_dict:
|
||
Deroffer.g_macro_dict = {
|
||
"SH": Deroffer.macro_sh,
|
||
"SS": Deroffer.macro_ss_ip,
|
||
"IP": Deroffer.macro_ss_ip,
|
||
"H ": Deroffer.macro_ss_ip,
|
||
"I ": Deroffer.macro_i_ir,
|
||
"IR": Deroffer.macro_i_ir,
|
||
"IB": Deroffer.macro_i_ir,
|
||
"B ": Deroffer.macro_i_ir,
|
||
"BR": Deroffer.macro_i_ir,
|
||
"BI": Deroffer.macro_i_ir,
|
||
"R ": Deroffer.macro_i_ir,
|
||
"RB": Deroffer.macro_i_ir,
|
||
"RI": Deroffer.macro_i_ir,
|
||
"AB": Deroffer.macro_i_ir,
|
||
"Nm": Deroffer.macro_Nm,
|
||
"] ": Deroffer.macro_close_bracket,
|
||
"PS": Deroffer.macro_ps,
|
||
"PE": Deroffer.macro_pe,
|
||
"TS": Deroffer.macro_ts,
|
||
"T&": Deroffer.macro_t_and,
|
||
"TE": Deroffer.macro_te,
|
||
"EQ": Deroffer.macro_eq,
|
||
"EN": Deroffer.macro_en,
|
||
"R1": Deroffer.macro_r1,
|
||
"R2": Deroffer.macro_r2,
|
||
"de": Deroffer.macro_de,
|
||
"BL": Deroffer.macro_bl_vl,
|
||
"VL": Deroffer.macro_bl_vl,
|
||
"AL": Deroffer.macro_bl_vl,
|
||
"LB": Deroffer.macro_bl_vl,
|
||
"RL": Deroffer.macro_bl_vl,
|
||
"ML": Deroffer.macro_bl_vl,
|
||
"DL": Deroffer.macro_bl_vl,
|
||
"BV": Deroffer.macro_bv,
|
||
"LE": Deroffer.macro_le,
|
||
"LP": Deroffer.macro_lp_pp,
|
||
"PP": Deroffer.macro_lp_pp,
|
||
"P\n": Deroffer.macro_lp_pp,
|
||
"ds": Deroffer.macro_ds,
|
||
"so": Deroffer.macro_so_nx,
|
||
"nx": Deroffer.macro_so_nx,
|
||
"tr": Deroffer.macro_tr,
|
||
"sp": Deroffer.macro_sp,
|
||
}
|
||
|
||
def flush_output(self, where):
|
||
if where:
|
||
where.write(self.get_output())
|
||
self.output[:] = []
|
||
|
||
def get_output(self):
|
||
res = "".join(self.output)
|
||
clean_res = Deroffer.g_re_newline_collapse.sub("\n", res)
|
||
return clean_res
|
||
|
||
def putchar(self, c):
|
||
self.output.append(c)
|
||
return c
|
||
|
||
# This gets swapped in in place of condputs the first time tr gets modified
|
||
def condputs_tr(self, str):
|
||
special = (
|
||
self.pic
|
||
or self.eqn
|
||
or self.refer
|
||
or self.macro
|
||
or (self.skiplists and self.inlist)
|
||
or (self.skipheaders and self.inheader)
|
||
)
|
||
if not special:
|
||
self.output.append(str.translate(self.tr))
|
||
|
||
def condputs(self, str):
|
||
special = (
|
||
self.pic
|
||
or self.eqn
|
||
or self.refer
|
||
or self.macro
|
||
or (self.skiplists and self.inlist)
|
||
or (self.skipheaders and self.inheader)
|
||
)
|
||
if not special:
|
||
self.output.append(str)
|
||
|
||
def str_at(self, idx):
|
||
return self.s[idx : idx + 1]
|
||
|
||
def skip_char(self, amt=1):
|
||
self.s = self.s[amt:]
|
||
|
||
def skip_leading_whitespace(self):
|
||
self.s = self.s.lstrip()
|
||
|
||
def is_white(self, idx):
|
||
# Note this returns false for empty strings (idx >= len(self.s))
|
||
return self.s[idx : idx + 1].isspace()
|
||
|
||
def str_eq(offset, other, len):
|
||
return self.s[offset : offset + len] == other[:len]
|
||
|
||
def prch(self, idx):
|
||
# Note that this return False for the empty string (idx >= len(self.s))
|
||
ch = self.s[idx : idx + 1]
|
||
return ch not in " \t\n"
|
||
|
||
def font(self):
|
||
match = Deroffer.g_re_font.match(self.s)
|
||
if not match:
|
||
return False
|
||
self.skip_char(match.end())
|
||
return True
|
||
|
||
def font2(self):
|
||
if self.s[0:2] == "\\f":
|
||
c = self.str_at(2)
|
||
if c == "(" and self.prch(3) and self.prch(4):
|
||
self.skip_char(5)
|
||
return True
|
||
elif c == "[":
|
||
self.skip_char(2)
|
||
while self.prch(0) and self.str_at(0) != "]":
|
||
self.skip_char()
|
||
if self.str_at(0) == "]":
|
||
self.skip_char()
|
||
elif self.prch(2):
|
||
self.skip_char(3)
|
||
return True
|
||
return False
|
||
|
||
def comment(self):
|
||
# Here we require that the string start with \"
|
||
while self.str_at(0) and self.str_at(0) != "\n":
|
||
self.skip_char()
|
||
return True
|
||
|
||
def numreq(self):
|
||
# We require that the string starts with backslash
|
||
if self.str_at(1) in "hvwud" and self.str_at(2) == "'":
|
||
self.macro += 1
|
||
self.skip_char(3)
|
||
while self.str_at(0) != "'" and self.esc_char():
|
||
pass # Weird
|
||
if self.str_at(0) == "'":
|
||
self.skip_char()
|
||
self.macro -= 1
|
||
return True
|
||
return False
|
||
|
||
def var(self):
|
||
reg = ""
|
||
s0s1 = self.s[0:2]
|
||
if s0s1 == "\\n":
|
||
if self.s[3:5] == "dy":
|
||
self.skip_char(5)
|
||
return True
|
||
elif self.str_at(2) == "(" and self.prch(3) and self.prch(4):
|
||
self.skip_char(5)
|
||
return True
|
||
elif self.str_at(2) == "[" and self.prch(3):
|
||
self.skip_char(3)
|
||
while self.str_at(0) and self.str_at(0) != "]":
|
||
self.skip_char()
|
||
return True
|
||
elif self.prch(2):
|
||
self.skip_char(3)
|
||
return True
|
||
elif s0s1 == "\\*":
|
||
if self.str_at(2) == "(" and self.prch(3) and self.prch(4):
|
||
reg = self.s[3:5]
|
||
self.skip_char(5)
|
||
elif self.str_at(2) == "[" and self.prch(3):
|
||
self.skip_char(3)
|
||
while self.str_at(0) and self.str_at(0) != "]":
|
||
reg = reg + self.str_at(0)
|
||
self.skip_char()
|
||
if self.s[0:1] == "]":
|
||
self.skip_char()
|
||
else:
|
||
return False
|
||
elif self.prch(2):
|
||
reg = self.str_at(2)
|
||
self.skip_char(3)
|
||
else:
|
||
return False
|
||
|
||
if reg in self.reg_table:
|
||
old_s = self.s
|
||
self.s = self.reg_table[reg]
|
||
self.text_arg()
|
||
return True
|
||
return False
|
||
|
||
def size(self):
|
||
# We require that the string starts with \s
|
||
if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)):
|
||
self.skip_char(3)
|
||
while self.digit(0):
|
||
self.skip_char()
|
||
return True
|
||
return False
|
||
|
||
def spec(self):
|
||
self.specletter = False
|
||
if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3):
|
||
key = self.s[2:4]
|
||
if key in Deroffer.g_specs_specletter:
|
||
self.condputs(Deroffer.g_specs_specletter[key])
|
||
self.specletter = True
|
||
elif key in Deroffer.g_specs:
|
||
self.condputs(Deroffer.g_specs[key])
|
||
self.skip_char(4)
|
||
return True
|
||
elif self.s.startswith("\\%"):
|
||
self.specletter = True
|
||
self.skip_char(2)
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
def esc(self):
|
||
# We require that the string start with backslash
|
||
c = self.s[1:2]
|
||
if not c:
|
||
return False
|
||
if c in "eE":
|
||
self.condputs("\\")
|
||
elif c in "t":
|
||
self.condputs("\t")
|
||
elif c in "0~":
|
||
self.condputs(" ")
|
||
elif c in "|^&:":
|
||
pass
|
||
else:
|
||
self.condputs(c)
|
||
self.skip_char(2)
|
||
return True
|
||
|
||
def word(self):
|
||
got_something = False
|
||
while True:
|
||
match = Deroffer.g_re_word.match(self.s)
|
||
if not match:
|
||
break
|
||
got_something = True
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end(0))
|
||
|
||
# Consume all specials
|
||
while self.spec():
|
||
if not self.specletter:
|
||
break
|
||
|
||
return got_something
|
||
|
||
def text(self):
|
||
while True:
|
||
idx = self.s.find("\\")
|
||
if idx == -1:
|
||
self.condputs(self.s)
|
||
self.s = ""
|
||
break
|
||
else:
|
||
self.condputs(self.s[:idx])
|
||
self.skip_char(idx)
|
||
if not self.esc_char_backslash():
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
return True
|
||
|
||
def letter(self, idx):
|
||
ch = self.str_at(idx)
|
||
return ch.isalpha() or ch == "_" # underscore is used in C identifiers
|
||
|
||
def digit(self, idx):
|
||
ch = self.str_at(idx)
|
||
return ch.isdigit()
|
||
|
||
def number(self):
|
||
match = Deroffer.g_re_number.match(self.s)
|
||
if not match:
|
||
return False
|
||
else:
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end())
|
||
return True
|
||
|
||
def esc_char_backslash(self):
|
||
# Like esc_char, but we know the string starts with a backslash
|
||
c = self.s[1:2]
|
||
if c == '"':
|
||
return self.comment()
|
||
elif c == "f":
|
||
return self.font()
|
||
elif c == "s":
|
||
return self.size()
|
||
elif c in "hvwud":
|
||
return self.numreq()
|
||
elif c in "n*":
|
||
return self.var()
|
||
elif c == "(":
|
||
return self.spec()
|
||
else:
|
||
return self.esc()
|
||
|
||
def esc_char(self):
|
||
if self.s[0:1] == "\\":
|
||
return self.esc_char_backslash()
|
||
return self.word() or self.number()
|
||
|
||
def quoted_arg(self):
|
||
if self.str_at(0) == '"':
|
||
self.skip_char()
|
||
while self.s and self.str_at(0) != '"':
|
||
if not self.esc_char():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
def text_arg(self):
|
||
# PCA: The deroff.c textArg() disallowed quotes at the start of an argument
|
||
# I'm not sure if this was a bug or not
|
||
got_something = False
|
||
while True:
|
||
match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s)
|
||
if match:
|
||
# Output the characters in the match
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end(0))
|
||
got_something = True
|
||
|
||
# Next is either an escape, or whitespace, or the end
|
||
# If it's the whitespace or the end, we're done
|
||
if not self.s or self.is_white(0):
|
||
return got_something
|
||
|
||
# Try an escape
|
||
if not self.esc_char():
|
||
# Some busted escape? Just output it
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
got_something = True
|
||
|
||
def text_arg2(self):
|
||
if not self.esc_char():
|
||
if self.s and not self.is_white(0):
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return False
|
||
while True:
|
||
if not self.esc_char():
|
||
if self.s and not self.is_white(0):
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
# Macro functions
|
||
def macro_sh(self):
|
||
for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']:
|
||
if self.s[2:].startswith(header_str):
|
||
self.inheader = True
|
||
break
|
||
else:
|
||
# Did not find a header string
|
||
self.inheader = False
|
||
self.nobody = True
|
||
|
||
def macro_ss_ip(self):
|
||
self.nobody = True
|
||
return False
|
||
|
||
def macro_i_ir(self):
|
||
return False
|
||
|
||
def macro_Nm(self):
|
||
if self.s == "Nm\n":
|
||
self.condputs(self.name)
|
||
else:
|
||
self.name = self.s[3:].strip() + " "
|
||
return True
|
||
|
||
def macro_close_bracket(self):
|
||
self.refer = False
|
||
return False
|
||
|
||
def macro_ps(self):
|
||
if self.is_white(2):
|
||
self.pic = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_pe(self):
|
||
if self.is_white(2):
|
||
self.pic = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_ts(self):
|
||
if self.is_white(2):
|
||
self.tbl, self.tblstate = True, self.OPTIONS
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_t_and(self):
|
||
if self.is_white(2):
|
||
self.tbl, self.tblstate = True, self.FORMAT
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_te(self):
|
||
if self.is_white(2):
|
||
self.tbl = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_eq(self):
|
||
if self.is_white(2):
|
||
self.eqn = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_en(self):
|
||
if self.is_white(2):
|
||
self.eqn = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_r1(self):
|
||
if self.is_white(2):
|
||
self.refer2 = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_r2(self):
|
||
if self.is_white(2):
|
||
self.refer2 = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_de(self):
|
||
macro = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_bl_vl(self):
|
||
if self.is_white(2):
|
||
self.inlist = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_bv(self):
|
||
if self.str_at(2) == "L" and self.white(self.str_at(3)):
|
||
self.inlist = True
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_le(self):
|
||
if self.is_white(2):
|
||
self.inlist = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_lp_pp(self):
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_ds(self):
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0):
|
||
# Split at whitespace
|
||
comps = self.s.split(None, 2)
|
||
if len(comps) == 2:
|
||
name, value = comps
|
||
value = value.rstrip()
|
||
self.reg_table[name] = value
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_so_nx(self):
|
||
# We always ignore include directives
|
||
# deroff.c for some reason allowed this to fall through to the 'tr' case
|
||
# I think that was just a bug so I won't replicate it
|
||
return True
|
||
|
||
def macro_tr(self):
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
while self.s and self.str_at(0) != "\n":
|
||
c = self.str_at(0)
|
||
ns = self.str_at(1)
|
||
self.skip_char(2)
|
||
if not ns or ns == "\n":
|
||
ns = " "
|
||
self.tr_from += c
|
||
self.tr_to += ns
|
||
|
||
# Update our table, then swap in the slower tr-savvy condputs
|
||
try: # Python2
|
||
self.tr = string.maketrans(self.tr_from, self.tr_to)
|
||
except AttributeError: # Python3
|
||
self.tr = "".maketrans(self.tr_from, self.tr_to)
|
||
self.condputs = self.condputs_tr
|
||
return True
|
||
|
||
def macro_sp(self):
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def macro_other(self):
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
def request_or_macro(self):
|
||
# s[0] is period or open single quote
|
||
self.skip_char()
|
||
s0 = self.s[1:2]
|
||
if s0 == "\\":
|
||
if self.str_at(1) == '"':
|
||
self.condputs("\n")
|
||
return True
|
||
else:
|
||
pass
|
||
elif s0 == "[":
|
||
self.refer = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0 == "]":
|
||
self.refer = False
|
||
self.skip_char()
|
||
return self.text()
|
||
elif s0 == ".":
|
||
self.macro = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
self.nobody = False
|
||
s0s1 = self.s[0:2]
|
||
|
||
macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other)
|
||
if macro_func(self):
|
||
return True
|
||
|
||
if self.skipheaders and self.nobody:
|
||
return True
|
||
|
||
self.skip_leading_whitespace()
|
||
while self.s and not self.is_white(0):
|
||
self.skip_char()
|
||
self.skip_leading_whitespace()
|
||
while True:
|
||
if not self.quoted_arg() and not self.text_arg():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
def request_or_macro2(self):
|
||
self.skip_char()
|
||
s0 = self.s[0:1]
|
||
if s0 == "\\":
|
||
if self.str_at(1) == '"':
|
||
self.condputs("\n")
|
||
return True
|
||
else:
|
||
pass
|
||
elif s0 == "[":
|
||
self.refer = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0 == "]":
|
||
self.refer = False
|
||
self.skip_char()
|
||
return self.text()
|
||
elif s0 == ".":
|
||
self.macro = False
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
self.nobody = False
|
||
s0s1 = self.s[0:2]
|
||
if s0s1 == "SH":
|
||
for header_str in [" SYNOPSIS", ' "SYNOPSIS', " ‹BERSICHT", ' "‹BERSICHT']:
|
||
if self.s[2:].startswith(header_str):
|
||
self.inheader = True
|
||
break
|
||
else:
|
||
# Did not find a header string
|
||
self.inheader = False
|
||
self.nobody = True
|
||
elif s0s1 in ["SS", "IP", "H "]:
|
||
self.nobody = True
|
||
elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]:
|
||
pass
|
||
elif s0s1 in ["] "]:
|
||
self.refer = False
|
||
elif s0s1 in ["PS"]:
|
||
if self.is_white(2):
|
||
self.pic = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["PE"]:
|
||
if self.is_white(2):
|
||
self.pic = False
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["TS"]:
|
||
if self.is_white(2):
|
||
self.tbl, self.tblstate = True, self.OPTIONS
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["T&"]:
|
||
if self.is_white(2):
|
||
self.tbl, self.tblstate = True, self.FORMAT
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["TE"]:
|
||
if self.is_white(2):
|
||
self.tbl = False
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["EQ"]:
|
||
if self.is_white(2):
|
||
self.eqn = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["EN"]:
|
||
if self.is_white(2):
|
||
self.eqn = False
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["R1"]:
|
||
if self.is_white(2):
|
||
self.refer2 = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["R2"]:
|
||
if self.is_white(2):
|
||
self.refer2 = False
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["de"]:
|
||
macro = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]:
|
||
if self.is_white(2):
|
||
self.inlist = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["BV"]:
|
||
if self.str_at(2) == "L" and self.white(self.str_at(3)):
|
||
self.inlist = True
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["LE"]:
|
||
if self.is_white(2):
|
||
self.inlist = False
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["LP", "PP", "P\n"]:
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["ds"]:
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0):
|
||
# Split at whitespace
|
||
comps = self.s.split(None, 2)
|
||
if len(comps) == 2:
|
||
name, value = comps
|
||
value = value.rstrip()
|
||
self.reg_table[name] = value
|
||
self.condputs("\n")
|
||
return True
|
||
elif s0s1 in ["so", "nx"]:
|
||
# We always ignore include directives
|
||
# deroff.c for some reason allowed this to fall through to the 'tr' case
|
||
# I think that was just a bug so I won't replicate it
|
||
return True
|
||
elif s0s1 in ["tr"]:
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
while self.s and self.str_at(0) != "\n":
|
||
c = self.str_at(0)
|
||
ns = self.str_at(1)
|
||
self.skip_char(2)
|
||
if not ns or ns == "\n":
|
||
ns = " "
|
||
self.tr_from += c
|
||
self.tr_to += ns
|
||
|
||
# Update our table, then swap in the slower tr-savvy condputs
|
||
try: # Python2
|
||
self.tr = string.maketrans(self.tr_from, self.tr_to)
|
||
except AttributeError: # Python3
|
||
self.tr = "".maketrans(self.tr_from, self.tr_to)
|
||
self.condputs = self.condputs_tr
|
||
|
||
return True
|
||
elif s0s1 in ["sp"]:
|
||
self.condputs("\n")
|
||
return True
|
||
else:
|
||
self.condputs("\n")
|
||
return True
|
||
|
||
if self.skipheaders and self.nobody:
|
||
return True
|
||
|
||
self.skip_leading_whitespace()
|
||
while self.s and not self.is_white(0):
|
||
self.skip_char()
|
||
self.skip_leading_whitespace()
|
||
while True:
|
||
if not self.quoted_arg() and not self.text_arg():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
def do_tbl(self):
|
||
if self.tblstate == self.OPTIONS:
|
||
while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n":
|
||
self.skip_leading_whitespace()
|
||
if not self.str_at(0).isalpha():
|
||
# deroff.c has a bug where it can loop forever here...we try to work around it
|
||
self.skip_char()
|
||
else: # Parse option
|
||
|
||
option = self.s
|
||
arg = ""
|
||
|
||
idx = 0
|
||
while option[idx : idx + 1].isalpha():
|
||
idx += 1
|
||
|
||
if option[idx : idx + 1] == "(":
|
||
option = option[:idx]
|
||
self.s = self.s[idx + 1 :]
|
||
arg = self.s
|
||
else:
|
||
self.s = ""
|
||
|
||
if arg:
|
||
idx = arg.find(")")
|
||
if idx != -1:
|
||
arg = arg[:idx]
|
||
self.s = self.s[idx + 1 :]
|
||
else:
|
||
# self.skip_char()
|
||
pass
|
||
|
||
if option.lower() == "tab":
|
||
self.tblTab = arg[0:1]
|
||
|
||
self.tblstate = self.FORMAT
|
||
self.condputs("\n")
|
||
|
||
elif self.tblstate == self.FORMAT:
|
||
while self.s and self.str_at(0) != "." and self.str_at(0) != "\n":
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0):
|
||
self.skip_char()
|
||
|
||
if self.str_at(0) == ".":
|
||
self.tblstate = self.DATA
|
||
self.condputs("\n")
|
||
elif self.tblstate == self.DATA:
|
||
if self.tblTab:
|
||
self.s = self.s.replace(self.tblTab, "\t")
|
||
self.text()
|
||
return True
|
||
|
||
def do_line(self):
|
||
if self.s[0:1] in ".'":
|
||
if not self.request_or_macro():
|
||
return False
|
||
elif self.tbl:
|
||
self.do_tbl()
|
||
else:
|
||
self.text()
|
||
return True
|
||
|
||
def deroff(self, str):
|
||
lines = str.split("\n")
|
||
for line in lines:
|
||
self.s = line + "\n"
|
||
if not self.do_line():
|
||
break
|
||
# self.putchar('\n')
|
||
|
||
|
||
def deroff_files(files):
|
||
for arg in files:
|
||
sys.stderr.write(arg + "\n")
|
||
if arg.endswith(".gz"):
|
||
f = gzip.open(arg, "r")
|
||
str = f.read()
|
||
if IS_PY3:
|
||
str = str.decode("latin-1")
|
||
else:
|
||
f = open(arg, "r")
|
||
str = f.read()
|
||
d = Deroffer()
|
||
d.deroff(str)
|
||
d.flush_output(sys.stdout)
|
||
f.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import gzip
|
||
|
||
paths = sys.argv[1:]
|
||
deroff_files(paths)
|
||
# import cProfile, profile, pstats
|
||
# profile.run("deroff_files(paths)", "fooprof")
|
||
# p = pstats.Stats("fooprof")
|
||
# p.sort_stats("time").print_stats(100)
|
||
# p.sort_stats('calls').print_callers(.5, 'startswith')
|