fish-shell/share/tools/deroff.py

1152 lines
32 KiB
Python
Raw Permalink Normal View History

2012-04-08 16:43:30 +08:00
# -*- coding: utf-8 -*-
""" Deroff.py, ported to Python from the venerable deroff.c """
2012-04-15 19:41:20 +08:00
import sys, re, string
2012-04-08 16:43:30 +08:00
IS_PY3 = sys.version_info[0] >= 3
2012-04-08 16:43:30 +08:00
class Deroffer:
2012-04-15 16:15:10 +08:00
g_specs_specletter = {
# Output composed latin1 letters
"-D": "\320",
"Sd": "\360",
"Tp": "\376",
"TP": "\336",
"AE": "\306",
"ae": "\346",
"OE": "OE",
"oe": "oe",
":a": "\344",
":A": "\304",
":e": "\353",
":E": "\313",
":i": "\357",
":I": "\317",
":o": "\366",
":O": "\326",
":u": "\374",
":U": "\334",
":y": "\377",
"ss": "\337",
"'A": "\301",
"'E": "\311",
"'I": "\315",
"'O": "\323",
"'U": "\332",
"'Y": "\335",
"'a": "\341",
"'e": "\351",
"'i": "\355",
"'o": "\363",
"'u": "\372",
"'y": "\375",
"^A": "\302",
"^E": "\312",
"^I": "\316",
"^O": "\324",
"^U": "\333",
"^a": "\342",
"^e": "\352",
"^i": "\356",
"^o": "\364",
"^u": "\373",
"`A": "\300",
"`E": "\310",
"`I": "\314",
"`O": "\322",
"`U": "\331",
"`a": "\340",
"`e": "\350",
"`i": "\354",
"`o": "\362",
"`u": "\371",
"~A": "\303",
"~N": "\321",
"~O": "\325",
"~a": "\343",
"~n": "\361",
"~o": "\365",
",C": "\307",
",c": "\347",
"/l": "/l",
"/L": "/L",
"/o": "\370",
"/O": "\330",
"oA": "\305",
"oa": "\345",
2012-04-15 16:15:10 +08:00
# Ligatures
"fi": "fi",
"ff": "ff",
"fl": "fl",
"Fi": "ffi",
"Ff": "fff",
"Fl": "ffl",
2012-04-15 16:15:10 +08:00
}
2012-04-15 16:15:10 +08:00
g_specs = {
"mi": "-",
"en": "-",
"hy": "-",
"em": "--",
"lq": "",
"rq": "",
"Bq": ",,",
"oq": "`",
"cq": "'",
"aq": "'",
"dq": '"',
"or": "|",
"at": "@",
"sh": "#",
"Eu": "\244",
"eu": "\244",
"Do": "$",
"ct": "\242",
"Fo": "\253",
"Fc": "\273",
"fo": "<",
"fc": ">",
"r!": "\241",
"r?": "\277",
"Of": "\252",
"Om": "\272",
"pc": "\267",
"S1": "\271",
"S2": "\262",
"S3": "\263",
"<-": "<-",
"->": "->",
"<>": "<->",
"ua": "^",
"da": "v",
"lA": "<=",
"rA": "=>",
"hA": "<=>",
"uA": "^^",
"dA": "vv",
"ba": "|",
"bb": "|",
"br": "|",
"bv": "|",
"ru": "_",
"ul": "_",
"ci": "O",
"bu": "o",
"co": "\251",
"rg": "\256",
"tm": "(TM)",
"dd": "||",
"dg": "|",
"ps": "\266",
"sc": "\247",
"de": "\260",
"%0": "0/00",
"14": "\274",
"12": "\275",
"34": "\276",
"f/": "/",
"sl": "/",
"rs": "\\",
"sq": "[]",
"fm": "'",
"ha": "^",
"ti": "~",
"lB": "[",
"rB": "]",
"lC": "{",
"rC": "}",
"la": "<",
"ra": ">",
"lh": "<=",
"rh": "=>",
"tf": "therefore",
"~~": "~~",
"~=": "~=",
"!=": "!=",
"**": "*",
"+-": "\261",
"<=": "<=",
"==": "==",
"=~": "=~",
">=": ">=",
"AN": "\\/",
"OR": "/\\",
"no": "\254",
"te": "there exists",
"fa": "for all",
"Ah": "aleph",
"Im": "imaginary",
"Re": "real",
"if": "infinity",
"md": "\267",
"mo": "member of",
"mu": "\327",
"nm": "not member of",
"pl": "+",
"eq": "=",
"pt": "oc",
"pp": "perpendicular",
"sb": "(=",
"sp": "=)",
"ib": "(-",
"ip": "-)",
"ap": "~",
"is": "I",
"sr": "root",
"pd": "d",
"c*": "(x)",
"c+": "(+)",
"ca": "cap",
"cu": "U",
"di": "\367",
"gr": "V",
"es": "{}",
"CR": "_|",
"st": "such that",
"/_": "/_",
"lz": "<>",
"an": "-",
2012-04-15 16:15:10 +08:00
# Output Greek
"*A": "Alpha",
"*B": "Beta",
"*C": "Xi",
"*D": "Delta",
"*E": "Epsilon",
"*F": "Phi",
"*G": "Gamma",
"*H": "Theta",
"*I": "Iota",
"*K": "Kappa",
"*L": "Lambda",
"*M": "Mu",
"*N": "Nu",
"*O": "Omicron",
"*P": "Pi",
"*Q": "Psi",
"*R": "Rho",
"*S": "Sigma",
"*T": "Tau",
"*U": "Upsilon",
"*W": "Omega",
"*X": "Chi",
"*Y": "Eta",
"*Z": "Zeta",
"*a": "alpha",
"*b": "beta",
"*c": "xi",
"*d": "delta",
"*e": "epsilon",
"*f": "phi",
"+f": "phi",
"*g": "gamma",
"*h": "theta",
"+h": "theta",
"*i": "iota",
"*k": "kappa",
"*l": "lambda",
"*m": "\265",
"*n": "nu",
"*o": "omicron",
"*p": "pi",
"+p": "omega",
"*q": "psi",
"*r": "rho",
"*s": "sigma",
"*t": "tau",
"*u": "upsilon",
"*w": "omega",
"*x": "chi",
"*y": "eta",
"*z": "zeta",
"ts": "sigma",
2012-04-15 16:15:10 +08:00
}
g_re_word = re.compile(r"[a-zA-Z_]+") # equivalent to the word() method
g_re_number = re.compile(r"[+-]?\d+") # equivalent to the number() method
g_re_esc_char = re.compile(
r"""([a-zA-Z_]) | # Word
2012-04-15 16:15:10 +08:00
([+-]?\d) | # Number
\\ # Backslash (for escape seq)
""",
re.VERBOSE,
)
g_re_not_backslash_or_whitespace = re.compile(
r"[^ \t\n\r\f\v\\]+"
) # Match a sequence of not backslash or whitespace
g_re_newline_collapse = re.compile(r"\n{3,}")
2012-04-15 19:41:20 +08:00
g_re_font = re.compile(
r"""\\f( # Starts with backslash f
2012-04-15 19:41:20 +08:00
(\(\S{2}) | # Open paren, then two printable chars
(\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket
\S) # Any printable character
""",
re.VERBOSE,
)
2012-04-16 10:22:30 +08:00
# This gets filled in in __init__ below
g_macro_dict = False
2012-04-08 16:43:30 +08:00
def __init__(self):
self.reg_table = {}
self.tr_from = ""
self.tr_to = ""
self.tr = ""
2012-04-08 16:43:30 +08:00
self.nls = 2
self.specletter = False
self.refer = False
self.macro = 0
self.nobody = False
self.inlist = False
self.inheader = False
self.pic = False
self.tbl = False
self.tblstate = 0
self.tblTab = ""
2012-04-08 16:43:30 +08:00
self.eqn = False
self.skipheaders = False
self.skiplists = False
self.ignore_sonx = False
self.output = []
self.name = ""
2012-04-08 16:43:30 +08:00
self.OPTIONS = 0
self.FORMAT = 1
self.DATA = 2
2012-04-08 16:43:30 +08:00
# words is uninteresting and should be treated as false
2012-04-16 10:22:30 +08:00
if not Deroffer.g_macro_dict:
Deroffer.g_macro_dict = {
"SH": Deroffer.macro_sh,
"SS": Deroffer.macro_ss_ip,
"IP": Deroffer.macro_ss_ip,
"H ": Deroffer.macro_ss_ip,
"I ": Deroffer.macro_i_ir,
"IR": Deroffer.macro_i_ir,
"IB": Deroffer.macro_i_ir,
"B ": Deroffer.macro_i_ir,
"BR": Deroffer.macro_i_ir,
"BI": Deroffer.macro_i_ir,
"R ": Deroffer.macro_i_ir,
"RB": Deroffer.macro_i_ir,
"RI": Deroffer.macro_i_ir,
"AB": Deroffer.macro_i_ir,
"Nm": Deroffer.macro_Nm,
"] ": Deroffer.macro_close_bracket,
"PS": Deroffer.macro_ps,
"PE": Deroffer.macro_pe,
"TS": Deroffer.macro_ts,
"T&": Deroffer.macro_t_and,
"TE": Deroffer.macro_te,
"EQ": Deroffer.macro_eq,
"EN": Deroffer.macro_en,
"R1": Deroffer.macro_r1,
"R2": Deroffer.macro_r2,
"de": Deroffer.macro_de,
"BL": Deroffer.macro_bl_vl,
"VL": Deroffer.macro_bl_vl,
"AL": Deroffer.macro_bl_vl,
"LB": Deroffer.macro_bl_vl,
"RL": Deroffer.macro_bl_vl,
"ML": Deroffer.macro_bl_vl,
"DL": Deroffer.macro_bl_vl,
"BV": Deroffer.macro_bv,
"LE": Deroffer.macro_le,
"LP": Deroffer.macro_lp_pp,
"PP": Deroffer.macro_lp_pp,
"P\n": Deroffer.macro_lp_pp,
"ds": Deroffer.macro_ds,
"so": Deroffer.macro_so_nx,
"nx": Deroffer.macro_so_nx,
"tr": Deroffer.macro_tr,
"sp": Deroffer.macro_sp,
2012-04-16 10:22:30 +08:00
}
2012-04-08 16:43:30 +08:00
def flush_output(self, where):
if where:
2012-04-15 16:15:10 +08:00
where.write(self.get_output())
2012-04-08 16:43:30 +08:00
self.output[:] = []
2012-04-08 16:43:30 +08:00
def get_output(self):
res = "".join(self.output)
clean_res = Deroffer.g_re_newline_collapse.sub("\n", res)
2012-04-15 16:15:10 +08:00
return clean_res
2012-04-08 16:43:30 +08:00
def putchar(self, c):
2012-04-15 16:15:10 +08:00
self.output.append(c)
2012-04-08 16:43:30 +08:00
return c
2012-04-15 19:41:20 +08:00
# This gets swapped in in place of condputs the first time tr gets modified
def condputs_tr(self, str):
special = (
self.pic
or self.eqn
or self.refer
or self.macro
or (self.skiplists and self.inlist)
or (self.skipheaders and self.inheader)
)
2012-04-15 16:15:10 +08:00
if not special:
2012-04-15 19:41:20 +08:00
self.output.append(str.translate(self.tr))
2012-04-15 16:15:10 +08:00
def condputs(self, str):
special = (
self.pic
or self.eqn
or self.refer
or self.macro
or (self.skiplists and self.inlist)
or (self.skipheaders and self.inheader)
)
if not special:
2012-04-15 19:41:20 +08:00
self.output.append(str)
2012-04-08 16:43:30 +08:00
def str_at(self, idx):
return self.s[idx : idx + 1]
2012-04-08 16:43:30 +08:00
def skip_char(self, amt=1):
self.s = self.s[amt:]
2012-04-08 16:43:30 +08:00
def skip_leading_whitespace(self):
self.s = self.s.lstrip()
2012-04-08 16:43:30 +08:00
def is_white(self, idx):
# Note this returns false for empty strings (idx >= len(self.s))
return self.s[idx : idx + 1].isspace()
2012-04-08 16:43:30 +08:00
def str_eq(offset, other, len):
return self.s[offset : offset + len] == other[:len]
2012-04-08 16:43:30 +08:00
def prch(self, idx):
2012-04-15 19:41:20 +08:00
# Note that this return False for the empty string (idx >= len(self.s))
ch = self.s[idx : idx + 1]
return ch not in " \t\n"
2012-04-08 16:43:30 +08:00
def font(self):
2012-04-15 19:41:20 +08:00
match = Deroffer.g_re_font.match(self.s)
if not match:
return False
2012-04-15 19:41:20 +08:00
self.skip_char(match.end())
return True
2012-04-15 19:41:20 +08:00
def font2(self):
if self.s[0:2] == "\\f":
2012-04-15 19:41:20 +08:00
c = self.str_at(2)
if c == "(" and self.prch(3) and self.prch(4):
2012-04-08 16:43:30 +08:00
self.skip_char(5)
return True
elif c == "[":
2012-04-08 16:43:30 +08:00
self.skip_char(2)
while self.prch(0) and self.str_at(0) != "]":
self.skip_char()
if self.str_at(0) == "]":
self.skip_char()
2012-04-08 16:43:30 +08:00
elif self.prch(2):
self.skip_char(3)
return True
return False
2012-04-08 16:43:30 +08:00
def comment(self):
2012-04-16 10:22:30 +08:00
# Here we require that the string start with \"
while self.str_at(0) and self.str_at(0) != "\n":
self.skip_char()
2012-04-16 10:22:30 +08:00
return True
2012-04-08 16:43:30 +08:00
def numreq(self):
2012-04-16 10:22:30 +08:00
# We require that the string starts with backslash
if self.str_at(1) in "hvwud" and self.str_at(2) == "'":
2012-04-08 16:43:30 +08:00
self.macro += 1
self.skip_char(3)
while self.str_at(0) != "'" and self.esc_char():
pass # Weird
if self.str_at(0) == "'":
2012-04-08 16:43:30 +08:00
self.skip_char()
self.macro -= 1
return True
return False
def var(self):
reg = ""
2012-04-16 10:22:30 +08:00
s0s1 = self.s[0:2]
if s0s1 == "\\n":
if self.s[3:5] == "dy":
2012-04-08 16:43:30 +08:00
self.skip_char(5)
return True
elif self.str_at(2) == "(" and self.prch(3) and self.prch(4):
2012-04-08 16:43:30 +08:00
self.skip_char(5)
return True
elif self.str_at(2) == "[" and self.prch(3):
2012-04-08 16:43:30 +08:00
self.skip_char(3)
while self.str_at(0) and self.str_at(0) != "]":
2012-04-08 16:43:30 +08:00
self.skip_char()
return True
elif self.prch(2):
self.skip_char(3)
return True
elif s0s1 == "\\*":
if self.str_at(2) == "(" and self.prch(3) and self.prch(4):
2012-04-08 16:43:30 +08:00
reg = self.s[3:5]
self.skip_char(5)
elif self.str_at(2) == "[" and self.prch(3):
2012-04-08 16:43:30 +08:00
self.skip_char(3)
while self.str_at(0) and self.str_at(0) != "]":
2012-04-08 16:43:30 +08:00
reg = reg + self.str_at(0)
self.skip_char()
if self.s[0:1] == "]":
2012-04-08 16:43:30 +08:00
self.skip_char()
else:
return False
elif self.prch(2):
reg = self.str_at(2)
self.skip_char(3)
else:
return False
2012-04-08 16:43:30 +08:00
if reg in self.reg_table:
old_s = self.s
self.s = self.reg_table[reg]
self.text_arg()
return True
return False
def size(self):
2012-04-16 10:22:30 +08:00
# We require that the string starts with \s
if self.digit(2) or (self.str_at(2) in "-+" and self.digit(3)):
2012-04-08 16:43:30 +08:00
self.skip_char(3)
while self.digit(0):
self.skip_char()
2012-04-08 16:43:30 +08:00
return True
return False
def spec(self):
self.specletter = False
if self.s[0:2] == "\\(" and self.prch(2) and self.prch(3):
2012-04-08 16:43:30 +08:00
key = self.s[2:4]
2012-04-15 16:15:10 +08:00
if key in Deroffer.g_specs_specletter:
self.condputs(Deroffer.g_specs_specletter[key])
2012-04-08 16:43:30 +08:00
self.specletter = True
2012-04-15 16:15:10 +08:00
elif key in Deroffer.g_specs:
self.condputs(Deroffer.g_specs[key])
2012-04-08 16:43:30 +08:00
self.skip_char(4)
return True
elif self.s.startswith("\\%"):
2012-04-08 16:43:30 +08:00
self.specletter = True
self.skip_char(2)
return True
else:
return False
2012-04-08 16:43:30 +08:00
def esc(self):
2012-04-16 10:22:30 +08:00
# We require that the string start with backslash
c = self.s[1:2]
if not c:
return False
if c in "eE":
self.condputs("\\")
elif c in "t":
self.condputs("\t")
elif c in "0~":
self.condputs(" ")
elif c in "|^&:":
2012-04-16 10:22:30 +08:00
pass
else:
self.condputs(c)
self.skip_char(2)
return True
2012-04-15 16:15:10 +08:00
2012-04-08 16:43:30 +08:00
def word(self):
2012-04-15 16:15:10 +08:00
got_something = False
while True:
match = Deroffer.g_re_word.match(self.s)
if not match:
break
2012-04-15 16:15:10 +08:00
got_something = True
self.condputs(match.group(0))
self.skip_char(match.end(0))
2012-04-15 16:15:10 +08:00
# Consume all specials
while self.spec():
if not self.specletter:
break
2012-04-15 16:15:10 +08:00
return got_something
2012-04-08 16:43:30 +08:00
def text(self):
2012-04-15 19:41:20 +08:00
while True:
idx = self.s.find("\\")
2012-04-15 16:15:10 +08:00
if idx == -1:
self.condputs(self.s)
self.s = ""
2012-04-15 19:41:20 +08:00
break
2012-04-15 16:15:10 +08:00
else:
self.condputs(self.s[:idx])
self.skip_char(idx)
2012-04-16 10:22:30 +08:00
if not self.esc_char_backslash():
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-15 16:15:10 +08:00
self.skip_char()
2012-04-08 16:43:30 +08:00
return True
def letter(self, idx):
ch = self.str_at(idx)
return ch.isalpha() or ch == "_" # underscore is used in C identifiers
2012-04-08 16:43:30 +08:00
def digit(self, idx):
ch = self.str_at(idx)
return ch.isdigit()
def number(self):
2012-04-15 16:15:10 +08:00
match = Deroffer.g_re_number.match(self.s)
if not match:
return False
else:
self.condputs(match.group(0))
self.skip_char(match.end())
2012-04-08 16:43:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def esc_char_backslash(self):
# Like esc_char, but we know the string starts with a backslash
c = self.s[1:2]
if c == '"':
return self.comment()
elif c == "f":
2012-04-16 10:22:30 +08:00
return self.font()
elif c == "s":
2012-04-16 10:22:30 +08:00
return self.size()
elif c in "hvwud":
2012-04-16 10:22:30 +08:00
return self.numreq()
elif c in "n*":
2012-04-16 10:22:30 +08:00
return self.var()
elif c == "(":
2012-04-16 10:22:30 +08:00
return self.spec()
else:
return self.esc()
2012-04-15 19:41:20 +08:00
def esc_char(self):
if self.s[0:1] == "\\":
2012-04-16 10:22:30 +08:00
return self.esc_char_backslash()
2012-04-15 19:41:20 +08:00
return self.word() or self.number()
2012-04-08 16:43:30 +08:00
def quoted_arg(self):
if self.str_at(0) == '"':
self.skip_char()
while self.s and self.str_at(0) != '"':
if not self.esc_char():
if self.s:
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-08 16:43:30 +08:00
self.skip_char()
return True
else:
return False
2012-04-08 16:43:30 +08:00
def text_arg(self):
2012-04-15 16:15:10 +08:00
# PCA: The deroff.c textArg() disallowed quotes at the start of an argument
# I'm not sure if this was a bug or not
got_something = False
while True:
match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s)
if match:
# Output the characters in the match
self.condputs(match.group(0))
self.skip_char(match.end(0))
got_something = True
2012-04-15 16:15:10 +08:00
# Next is either an escape, or whitespace, or the end
# If it's the whitespace or the end, we're done
if not self.s or self.is_white(0):
return got_something
2012-04-15 16:15:10 +08:00
# Try an escape
if not self.esc_char():
# Some busted escape? Just output it
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-15 16:15:10 +08:00
self.skip_char()
got_something = True
2012-04-15 16:15:10 +08:00
def text_arg2(self):
2012-04-08 16:43:30 +08:00
if not self.esc_char():
if self.s and not self.is_white(0):
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-08 16:43:30 +08:00
self.skip_char()
else:
return False
while True:
if not self.esc_char():
if self.s and not self.is_white(0):
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-08 16:43:30 +08:00
self.skip_char()
else:
return True
2012-04-16 10:22:30 +08:00
# Macro functions
def macro_sh(self):
for header_str in [" SYNOPSIS", ' "SYNOPSIS', " BERSICHT", ' "BERSICHT']:
2012-04-16 10:22:30 +08:00
if self.s[2:].startswith(header_str):
self.inheader = True
break
else:
# Did not find a header string
self.inheader = False
self.nobody = True
2012-04-16 10:22:30 +08:00
def macro_ss_ip(self):
self.nobody = True
return False
2012-04-16 10:22:30 +08:00
def macro_i_ir(self):
return False
def macro_Nm(self):
if self.s == "Nm\n":
self.condputs(self.name)
else:
self.name = self.s[3:].strip() + " "
return True
2012-04-16 10:22:30 +08:00
def macro_close_bracket(self):
self.refer = False
return False
2012-04-16 10:22:30 +08:00
def macro_ps(self):
if self.is_white(2):
self.pic = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_pe(self):
if self.is_white(2):
self.pic = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_ts(self):
if self.is_white(2):
self.tbl, self.tblstate = True, self.OPTIONS
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_t_and(self):
if self.is_white(2):
self.tbl, self.tblstate = True, self.FORMAT
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_te(self):
if self.is_white(2):
self.tbl = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_eq(self):
if self.is_white(2):
self.eqn = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_en(self):
if self.is_white(2):
self.eqn = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_r1(self):
if self.is_white(2):
self.refer2 = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_r2(self):
if self.is_white(2):
self.refer2 = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_de(self):
macro = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_bl_vl(self):
if self.is_white(2):
self.inlist = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_bv(self):
if self.str_at(2) == "L" and self.white(self.str_at(3)):
self.inlist = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_le(self):
if self.is_white(2):
self.inlist = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_lp_pp(self):
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_ds(self):
self.skip_char(2)
self.skip_leading_whitespace()
if self.str_at(0):
# Split at whitespace
comps = self.s.split(None, 2)
if len(comps) == 2:
2012-04-16 10:22:30 +08:00
name, value = comps
value = value.rstrip()
self.reg_table[name] = value
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_so_nx(self):
# We always ignore include directives
# deroff.c for some reason allowed this to fall through to the 'tr' case
# I think that was just a bug so I won't replicate it
return True
2012-04-16 10:22:30 +08:00
def macro_tr(self):
self.skip_char(2)
self.skip_leading_whitespace()
while self.s and self.str_at(0) != "\n":
2012-04-16 10:22:30 +08:00
c = self.str_at(0)
ns = self.str_at(1)
self.skip_char(2)
if not ns or ns == "\n":
ns = " "
2012-04-16 10:22:30 +08:00
self.tr_from += c
self.tr_to += ns
2012-04-16 10:22:30 +08:00
# Update our table, then swap in the slower tr-savvy condputs
try: # Python2
self.tr = string.maketrans(self.tr_from, self.tr_to)
except AttributeError: # Python3
self.tr = "".maketrans(self.tr_from, self.tr_to)
2012-04-16 10:22:30 +08:00
self.condputs = self.condputs_tr
return True
2012-04-16 10:22:30 +08:00
def macro_sp(self):
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
def macro_other(self):
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-08 16:43:30 +08:00
def request_or_macro(self):
2012-04-16 10:22:30 +08:00
# s[0] is period or open single quote
self.skip_char()
s0 = self.s[1:2]
if s0 == "\\":
2012-04-16 10:22:30 +08:00
if self.str_at(1) == '"':
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
else:
pass
elif s0 == "[":
2012-04-16 10:22:30 +08:00
self.refer = True
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
elif s0 == "]":
2012-04-16 10:22:30 +08:00
self.refer = False
self.skip_char()
return self.text()
elif s0 == ".":
2012-04-16 10:22:30 +08:00
self.macro = False
self.condputs("\n")
2012-04-16 10:22:30 +08:00
return True
2012-04-16 10:22:30 +08:00
self.nobody = False
s0s1 = self.s[0:2]
2012-04-16 10:22:30 +08:00
macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other)
if macro_func(self):
return True
if self.skipheaders and self.nobody:
return True
2012-04-16 10:22:30 +08:00
self.skip_leading_whitespace()
while self.s and not self.is_white(0):
self.skip_char()
2012-04-16 10:22:30 +08:00
self.skip_leading_whitespace()
while True:
if not self.quoted_arg() and not self.text_arg():
if self.s:
self.condputs(self.str_at(0))
self.skip_char()
else:
return True
def request_or_macro2(self):
2012-04-08 16:43:30 +08:00
self.skip_char()
2012-04-16 10:22:30 +08:00
s0 = self.s[0:1]
if s0 == "\\":
2012-04-08 16:43:30 +08:00
if self.str_at(1) == '"':
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
else:
pass
elif s0 == "[":
2012-04-08 16:43:30 +08:00
self.refer = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0 == "]":
2012-04-08 16:43:30 +08:00
self.refer = False
self.skip_char()
return self.text()
elif s0 == ".":
2012-04-08 16:43:30 +08:00
self.macro = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
2012-04-08 16:43:30 +08:00
self.nobody = False
s0s1 = self.s[0:2]
if s0s1 == "SH":
for header_str in [" SYNOPSIS", ' "SYNOPSIS', " BERSICHT", ' "BERSICHT']:
2012-04-08 16:43:30 +08:00
if self.s[2:].startswith(header_str):
self.inheader = True
break
else:
# Did not find a header string
self.inheader = False
self.nobody = True
elif s0s1 in ["SS", "IP", "H "]:
2012-04-08 16:43:30 +08:00
self.nobody = True
elif s0s1 in ["I ", "IR", "IB", "B ", "BR", "BI", "R ", "RB", "RI", "AB"]:
2012-04-08 16:43:30 +08:00
pass
elif s0s1 in ["] "]:
2012-04-08 16:43:30 +08:00
self.refer = False
elif s0s1 in ["PS"]:
if self.is_white(2):
self.pic = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["PE"]:
if self.is_white(2):
self.pic = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["TS"]:
if self.is_white(2):
self.tbl, self.tblstate = True, self.OPTIONS
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["T&"]:
if self.is_white(2):
self.tbl, self.tblstate = True, self.FORMAT
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["TE"]:
if self.is_white(2):
self.tbl = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["EQ"]:
if self.is_white(2):
self.eqn = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["EN"]:
if self.is_white(2):
self.eqn = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["R1"]:
if self.is_white(2):
self.refer2 = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["R2"]:
if self.is_white(2):
self.refer2 = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["de"]:
macro = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["BL", "VL", "AL", "LB", "RL", "ML", "DL"]:
if self.is_white(2):
self.inlist = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["BV"]:
if self.str_at(2) == "L" and self.white(self.str_at(3)):
self.inlist = True
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["LE"]:
if self.is_white(2):
self.inlist = False
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["LP", "PP", "P\n"]:
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["ds"]:
2012-04-08 16:43:30 +08:00
self.skip_char(2)
self.skip_leading_whitespace()
if self.str_at(0):
# Split at whitespace
comps = self.s.split(None, 2)
if len(comps) == 2:
2012-04-08 16:43:30 +08:00
name, value = comps
value = value.rstrip()
self.reg_table[name] = value
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["so", "nx"]:
2012-04-08 16:43:30 +08:00
# We always ignore include directives
# deroff.c for some reason allowed this to fall through to the 'tr' case
# I think that was just a bug so I won't replicate it
return True
elif s0s1 in ["tr"]:
2012-04-08 16:43:30 +08:00
self.skip_char(2)
self.skip_leading_whitespace()
while self.s and self.str_at(0) != "\n":
2012-04-15 19:41:20 +08:00
c = self.str_at(0)
ns = self.str_at(1)
self.skip_char(2)
if not ns or ns == "\n":
ns = " "
2012-04-15 19:41:20 +08:00
self.tr_from += c
self.tr_to += ns
2012-04-15 19:41:20 +08:00
# Update our table, then swap in the slower tr-savvy condputs
try: # Python2
self.tr = string.maketrans(self.tr_from, self.tr_to)
except AttributeError: # Python3
self.tr = "".maketrans(self.tr_from, self.tr_to)
2012-04-15 19:41:20 +08:00
self.condputs = self.condputs_tr
2012-04-08 16:43:30 +08:00
return True
elif s0s1 in ["sp"]:
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
else:
self.condputs("\n")
2012-04-08 16:43:30 +08:00
return True
if self.skipheaders and self.nobody:
return True
2012-04-08 16:43:30 +08:00
self.skip_leading_whitespace()
while self.s and not self.is_white(0):
self.skip_char()
2012-04-08 16:43:30 +08:00
self.skip_leading_whitespace()
while True:
if not self.quoted_arg() and not self.text_arg():
if self.s:
2012-04-15 19:41:20 +08:00
self.condputs(self.str_at(0))
2012-04-08 16:43:30 +08:00
self.skip_char()
else:
return True
def do_tbl(self):
if self.tblstate == self.OPTIONS:
while self.s and self.str_at(0) != ";" and self.str_at(0) != "\n":
2012-04-08 16:43:30 +08:00
self.skip_leading_whitespace()
if not self.str_at(0).isalpha():
# deroff.c has a bug where it can loop forever here...we try to work around it
self.skip_char()
else: # Parse option
2012-04-08 16:43:30 +08:00
option = self.s
arg = ""
2012-04-08 16:43:30 +08:00
idx = 0
while option[idx : idx + 1].isalpha():
2012-04-08 16:43:30 +08:00
idx += 1
if option[idx : idx + 1] == "(":
2012-04-08 16:43:30 +08:00
option = option[:idx]
self.s = self.s[idx + 1 :]
2012-04-08 16:43:30 +08:00
arg = self.s
else:
self.s = ""
2012-04-08 16:43:30 +08:00
if arg:
idx = arg.find(")")
2012-04-08 16:43:30 +08:00
if idx != -1:
arg = arg[:idx]
self.s = self.s[idx + 1 :]
2012-04-08 16:43:30 +08:00
else:
# self.skip_char()
2012-04-08 16:43:30 +08:00
pass
if option.lower() == "tab":
2012-04-08 16:43:30 +08:00
self.tblTab = arg[0:1]
2012-04-08 16:43:30 +08:00
self.tblstate = self.FORMAT
self.condputs("\n")
2012-04-08 16:43:30 +08:00
elif self.tblstate == self.FORMAT:
while self.s and self.str_at(0) != "." and self.str_at(0) != "\n":
2012-04-08 16:43:30 +08:00
self.skip_leading_whitespace()
if self.str_at(0):
self.skip_char()
if self.str_at(0) == ".":
self.tblstate = self.DATA
self.condputs("\n")
2012-04-08 16:43:30 +08:00
elif self.tblstate == self.DATA:
if self.tblTab:
self.s = self.s.replace(self.tblTab, "\t")
2012-04-08 16:43:30 +08:00
self.text()
return True
def do_line(self):
2012-04-16 10:22:30 +08:00
if self.s[0:1] in ".'":
if not self.request_or_macro():
return False
2012-04-08 16:43:30 +08:00
elif self.tbl:
self.do_tbl()
else:
self.text()
return True
2012-04-08 16:43:30 +08:00
def deroff(self, str):
lines = str.split("\n")
2012-04-08 16:43:30 +08:00
for line in lines:
self.s = line + "\n"
2012-04-08 16:43:30 +08:00
if not self.do_line():
break
# self.putchar('\n')
2012-04-08 16:43:30 +08:00
def deroff_files(files):
for arg in files:
sys.stderr.write(arg + "\n")
if arg.endswith(".gz"):
f = gzip.open(arg, "r")
str = f.read()
if IS_PY3:
str = str.decode("latin-1")
2012-04-08 16:43:30 +08:00
else:
f = open(arg, "r")
str = f.read()
2012-04-08 16:43:30 +08:00
d = Deroffer()
d.deroff(str)
d.flush_output(sys.stdout)
f.close()
2012-04-08 16:43:30 +08:00
if __name__ == "__main__":
import gzip
2012-04-08 16:43:30 +08:00
paths = sys.argv[1:]
deroff_files(paths)
# import cProfile, profile, pstats
# profile.run("deroff_files(paths)", "fooprof")
# p = pstats.Stats("fooprof")
# p.sort_stats("time").print_stats(100)
# p.sort_stats('calls').print_callers(.5, 'startswith')