mirror of
https://github.com/fish-shell/fish-shell.git
synced 2024-11-24 11:37:10 +08:00
92e14d7e4a
The Linux kernel only splits on the first whitespace in the shebang line (unlike BSD which splits on all whitespace). Which means there can be only one argument after the path to the program.
1103 lines
31 KiB
Python
Executable File
1103 lines
31 KiB
Python
Executable File
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
|
||
""" Deroff.py, ported to Python from the venerable deroff.c """
|
||
|
||
|
||
import sys, re, string
|
||
|
||
sys.dont_write_bytecode = True
|
||
IS_PY3 = sys.version_info[0] >= 3
|
||
|
||
class Deroffer:
|
||
|
||
g_specs_specletter = {
|
||
# Output composed latin1 letters
|
||
'-D': '\320',
|
||
'Sd': '\360',
|
||
'Tp': '\376',
|
||
'TP': '\336',
|
||
'AE': '\306',
|
||
'ae': '\346',
|
||
'OE': "OE",
|
||
'oe': "oe",
|
||
':a': '\344',
|
||
':A': '\304',
|
||
':e': '\353',
|
||
':E': '\313',
|
||
':i': '\357',
|
||
':I': '\317',
|
||
':o': '\366',
|
||
':O': '\326',
|
||
':u': '\374',
|
||
':U': '\334',
|
||
':y': '\377',
|
||
'ss': '\337',
|
||
'\'A': '\301',
|
||
'\'E': '\311',
|
||
'\'I': '\315',
|
||
'\'O': '\323',
|
||
'\'U': '\332',
|
||
'\'Y': '\335',
|
||
'\'a': '\341',
|
||
'\'e': '\351',
|
||
'\'i': '\355',
|
||
'\'o': '\363',
|
||
'\'u': '\372',
|
||
'\'y': '\375',
|
||
'^A': '\302',
|
||
'^E': '\312',
|
||
'^I': '\316',
|
||
'^O': '\324',
|
||
'^U': '\333',
|
||
'^a': '\342',
|
||
'^e': '\352',
|
||
'^i': '\356',
|
||
'^o': '\364',
|
||
'^u': '\373',
|
||
'`A': '\300',
|
||
'`E': '\310',
|
||
'`I': '\314',
|
||
'`O': '\322',
|
||
'`U': '\331',
|
||
'`a': '\340',
|
||
'`e': '\350',
|
||
'`i': '\354',
|
||
'`o': '\362',
|
||
'`u': '\371',
|
||
'~A': '\303',
|
||
'~N': '\321',
|
||
'~O': '\325',
|
||
'~a': '\343',
|
||
'~n': '\361',
|
||
'~o': '\365',
|
||
',C': '\307',
|
||
',c': '\347',
|
||
'/l': "/l",
|
||
'/L': "/L",
|
||
'/o': '\370',
|
||
'/O': '\330',
|
||
'oA': '\305',
|
||
'oa': '\345',
|
||
|
||
# Ligatures
|
||
'fi': 'fi',
|
||
'ff': 'ff',
|
||
'fl': 'fl',
|
||
|
||
'Fi': 'ffi',
|
||
'Ff': 'fff',
|
||
'Fl': 'ffl'
|
||
}
|
||
|
||
g_specs = {
|
||
'mi': '-',
|
||
'en': '-',
|
||
'hy': '-',
|
||
'em': "--",
|
||
'lq': '“',
|
||
'rq': '”',
|
||
'Bq': ",,",
|
||
'oq': '`',
|
||
'cq': '\'',
|
||
'aq': '\'',
|
||
'dq': '"',
|
||
'or': '|',
|
||
'at': '@',
|
||
'sh': '#',
|
||
'Eu': '\244',
|
||
'eu': '\244',
|
||
'Do': '$',
|
||
'ct': '\242',
|
||
'Fo': '\253',
|
||
'Fc': '\273',
|
||
'fo': '<',
|
||
'fc': '>',
|
||
'r!': '\241',
|
||
'r?': '\277',
|
||
'Of': '\252',
|
||
'Om': '\272',
|
||
'pc': '\267',
|
||
'S1': '\271',
|
||
'S2': '\262',
|
||
'S3': '\263',
|
||
'<-': "<-",
|
||
'->': "->",
|
||
'<>': "<->",
|
||
'ua': '^',
|
||
'da': 'v',
|
||
'lA': "<=",
|
||
'rA': "=>",
|
||
'hA': "<=>",
|
||
'uA': "^^",
|
||
'dA': "vv",
|
||
'ba': '|',
|
||
'bb': '|',
|
||
'br': '|',
|
||
'bv': '|',
|
||
'ru': '_',
|
||
'ul': '_',
|
||
'ci': 'O',
|
||
'bu': 'o',
|
||
'co': '\251',
|
||
'rg': '\256',
|
||
'tm': "(TM)",
|
||
'dd': "||",
|
||
'dg': '|',
|
||
'ps': '\266',
|
||
'sc': '\247',
|
||
'de': '\260',
|
||
'%0': "0/00",
|
||
'14': '\274',
|
||
'12': '\275',
|
||
'34': '\276',
|
||
'f/': '/',
|
||
'sl': '/',
|
||
'rs': '\\',
|
||
'sq': "[]",
|
||
'fm': '\'',
|
||
'ha': '^',
|
||
'ti': '~',
|
||
'lB': '[',
|
||
'rB': ']',
|
||
'lC': '{',
|
||
'rC': '}',
|
||
'la': '<',
|
||
'ra': '>',
|
||
'lh': "<=",
|
||
'rh': "=>",
|
||
'tf': "therefore",
|
||
'~~': "~~",
|
||
'~=': "~=",
|
||
'!=': "!=",
|
||
'**': '*',
|
||
'+-': '\261',
|
||
'<=': "<=",
|
||
'==': "==",
|
||
'=~': "=~",
|
||
'>=': ">=",
|
||
'AN': "\\/",
|
||
'OR': "/\\",
|
||
'no': '\254',
|
||
'te': "there exists",
|
||
'fa': "for all",
|
||
'Ah': "aleph",
|
||
'Im': "imaginary",
|
||
'Re': "real",
|
||
'if': "infinity",
|
||
'md': "\267",
|
||
'mo': "member of",
|
||
'mu': '\327',
|
||
'nm': "not member of",
|
||
'pl': '+',
|
||
'eq': '=',
|
||
'pt': "oc",
|
||
'pp': "perpendicular",
|
||
'sb': "(=",
|
||
'sp': "=)",
|
||
'ib': "(-",
|
||
'ip': "-)",
|
||
'ap': '~',
|
||
'is': 'I',
|
||
'sr': "root",
|
||
'pd': 'd',
|
||
'c*': "(x)",
|
||
'c+': "(+)",
|
||
'ca': "cap",
|
||
'cu': 'U',
|
||
'di': '\367',
|
||
'gr': 'V',
|
||
'es': "{}",
|
||
'CR': "_|",
|
||
'st': "such that",
|
||
'/_': "/_",
|
||
'lz': "<>",
|
||
'an': '-',
|
||
|
||
# Output Greek
|
||
'*A': "Alpha",
|
||
'*B': "Beta",
|
||
'*C': "Xi",
|
||
'*D': "Delta",
|
||
'*E': "Epsilon",
|
||
'*F': "Phi",
|
||
'*G': "Gamma",
|
||
'*H': "Theta",
|
||
'*I': "Iota",
|
||
'*K': "Kappa",
|
||
'*L': "Lambda",
|
||
'*M': "Mu",
|
||
'*N': "Nu",
|
||
'*O': "Omicron",
|
||
'*P': "Pi",
|
||
'*Q': "Psi",
|
||
'*R': "Rho",
|
||
'*S': "Sigma",
|
||
'*T': "Tau",
|
||
'*U': "Upsilon",
|
||
'*W': "Omega",
|
||
'*X': "Chi",
|
||
'*Y': "Eta",
|
||
'*Z': "Zeta",
|
||
'*a': "alpha",
|
||
'*b': "beta",
|
||
'*c': "xi",
|
||
'*d': "delta",
|
||
'*e': "epsilon",
|
||
'*f': "phi",
|
||
'+f': "phi",
|
||
'*g': "gamma",
|
||
'*h': "theta",
|
||
'+h': "theta",
|
||
'*i': "iota",
|
||
'*k': "kappa",
|
||
'*l': "lambda",
|
||
'*m': "\265",
|
||
'*n': "nu",
|
||
'*o': "omicron",
|
||
'*p': "pi",
|
||
'+p': "omega",
|
||
'*q': "psi",
|
||
'*r': "rho",
|
||
'*s': "sigma",
|
||
'*t': "tau",
|
||
'*u': "upsilon",
|
||
'*w': "omega",
|
||
'*x': "chi",
|
||
'*y': "eta",
|
||
'*z': "zeta",
|
||
'ts': "sigma",
|
||
}
|
||
|
||
g_re_word = re.compile(r'[a-zA-Z_]+') # equivalent to the word() method
|
||
g_re_number = re.compile(r'[+-]?\d+') # equivalent to the number() method
|
||
g_re_esc_char = re.compile(r"""([a-zA-Z_]) | # Word
|
||
([+-]?\d) | # Number
|
||
\\ # Backslash (for escape seq)
|
||
""", re.VERBOSE)
|
||
|
||
g_re_not_backslash_or_whitespace = re.compile(r'[^ \t\n\r\f\v\\]+') # Match a sequence of not backslash or whitespace
|
||
|
||
g_re_newline_collapse = re.compile(r'\n{3,}')
|
||
|
||
g_re_font = re.compile(r"""\\f( # Starts with backslash f
|
||
(\(\S{2}) | # Open paren, then two printable chars
|
||
(\[\S*?\]) | # Open bracket, zero or more printable characters, then close bracket
|
||
\S) # Any printable character
|
||
""", re.VERBOSE)
|
||
|
||
# This gets filled in in __init__ below
|
||
g_macro_dict = False
|
||
|
||
def __init__(self):
|
||
self.reg_table = {}
|
||
self.tr_from = ''
|
||
self.tr_to = ''
|
||
self.tr = ''
|
||
self.nls = 2
|
||
self.specletter = False
|
||
self.refer = False
|
||
self.macro = 0
|
||
self.nobody = False
|
||
self.inlist = False
|
||
self.inheader = False
|
||
self.pic = False
|
||
self.tbl = False
|
||
self.tblstate = 0
|
||
self.tblTab = ''
|
||
self.eqn = False
|
||
self.skipheaders = False
|
||
self.skiplists = False
|
||
self.ignore_sonx = False
|
||
self.output = []
|
||
self.name = ''
|
||
|
||
self.OPTIONS = 0
|
||
self.FORMAT = 1
|
||
self.DATA = 2
|
||
|
||
# words is uninteresting and should be treated as false
|
||
|
||
if not Deroffer.g_macro_dict:
|
||
Deroffer.g_macro_dict = {
|
||
'SH': Deroffer.macro_sh,
|
||
'SS': Deroffer.macro_ss_ip,
|
||
'IP': Deroffer.macro_ss_ip,
|
||
'H ': Deroffer.macro_ss_ip,
|
||
'I ': Deroffer.macro_i_ir,
|
||
'IR': Deroffer.macro_i_ir,
|
||
'IB': Deroffer.macro_i_ir,
|
||
'B ': Deroffer.macro_i_ir,
|
||
'BR': Deroffer.macro_i_ir,
|
||
'BI': Deroffer.macro_i_ir,
|
||
'R ': Deroffer.macro_i_ir,
|
||
'RB': Deroffer.macro_i_ir,
|
||
'RI': Deroffer.macro_i_ir,
|
||
'AB': Deroffer.macro_i_ir,
|
||
'Nm': Deroffer.macro_Nm,
|
||
'] ': Deroffer.macro_close_bracket,
|
||
'PS': Deroffer.macro_ps,
|
||
'PE': Deroffer.macro_pe,
|
||
'TS': Deroffer.macro_ts,
|
||
'T&': Deroffer.macro_t_and,
|
||
'TE': Deroffer.macro_te,
|
||
'EQ': Deroffer.macro_eq,
|
||
'EN': Deroffer.macro_en,
|
||
'R1': Deroffer.macro_r1,
|
||
'R2': Deroffer.macro_r2,
|
||
'de': Deroffer.macro_de,
|
||
'BL': Deroffer.macro_bl_vl,
|
||
'VL': Deroffer.macro_bl_vl,
|
||
'AL': Deroffer.macro_bl_vl,
|
||
'LB': Deroffer.macro_bl_vl,
|
||
'RL': Deroffer.macro_bl_vl,
|
||
'ML': Deroffer.macro_bl_vl,
|
||
'DL': Deroffer.macro_bl_vl,
|
||
'BV': Deroffer.macro_bv,
|
||
'LE': Deroffer.macro_le,
|
||
'LP': Deroffer.macro_lp_pp,
|
||
'PP': Deroffer.macro_lp_pp,
|
||
'P\n': Deroffer.macro_lp_pp,
|
||
'ds': Deroffer.macro_ds,
|
||
'so': Deroffer.macro_so_nx,
|
||
'nx': Deroffer.macro_so_nx,
|
||
'tr': Deroffer.macro_tr,
|
||
'sp': Deroffer.macro_sp
|
||
}
|
||
|
||
def flush_output(self, where):
|
||
if where:
|
||
where.write(self.get_output())
|
||
self.output[:] = []
|
||
|
||
def get_output(self):
|
||
res = ''.join(self.output)
|
||
clean_res = Deroffer.g_re_newline_collapse.sub('\n', res)
|
||
return clean_res
|
||
|
||
def putchar(self, c):
|
||
self.output.append(c)
|
||
return c
|
||
|
||
# This gets swapped in in place of condputs the first time tr gets modified
|
||
def condputs_tr(self, str):
|
||
special = self.pic or self.eqn or self.refer or self.macro or (self.skiplists and self.inlist) or (self.skipheaders and self.inheader)
|
||
if not special:
|
||
self.output.append(str.translate(self.tr))
|
||
|
||
def condputs(self, str):
|
||
special = self.pic or self.eqn or self.refer or self.macro or (self.skiplists and self.inlist) or (self.skipheaders and self.inheader)
|
||
if not special:
|
||
self.output.append(str)
|
||
|
||
def str_at(self, idx):
|
||
return self.s[idx:idx+1]
|
||
|
||
def skip_char(self, amt=1):
|
||
self.s = self.s[amt:]
|
||
|
||
def skip_leading_whitespace(self):
|
||
self.s = self.s.lstrip()
|
||
|
||
def is_white(self, idx):
|
||
# Note this returns false for empty strings (idx >= len(self.s))
|
||
return self.s[idx:idx+1].isspace()
|
||
|
||
def str_eq(offset, other, len):
|
||
return self.s[offset:offset+len] == other[:len]
|
||
|
||
def prch(self, idx):
|
||
# Note that this return False for the empty string (idx >= len(self.s))
|
||
ch = self.s[idx:idx+1]
|
||
return ch not in ' \t\n'
|
||
|
||
def font(self):
|
||
match = Deroffer.g_re_font.match(self.s)
|
||
if not match: return False
|
||
self.skip_char(match.end())
|
||
return True
|
||
|
||
def font2(self):
|
||
if self.s[0:2] == '\\f':
|
||
c = self.str_at(2)
|
||
if c == '(' and self.prch(3) and self.prch(4):
|
||
self.skip_char(5)
|
||
return True
|
||
elif c == '[':
|
||
self.skip_char(2)
|
||
while self.prch(0) and self.str_at(0) != ']': self.skip_char()
|
||
if self.str_at(0) == ']': self.skip_char()
|
||
elif self.prch(2):
|
||
self.skip_char(3)
|
||
return True
|
||
return False
|
||
|
||
def comment(self):
|
||
# Here we require that the string start with \"
|
||
while self.str_at(0) and self.str_at(0) != '\n': self.skip_char()
|
||
return True
|
||
|
||
def numreq(self):
|
||
# We require that the string starts with backslash
|
||
if self.str_at(1) in 'hvwud' and self.str_at(2) == '\'':
|
||
self.macro += 1
|
||
self.skip_char(3)
|
||
while self.str_at(0) != '\'' and self.esc_char():
|
||
pass # Weird
|
||
if self.str_at(0) == '\'':
|
||
self.skip_char()
|
||
self.macro -= 1
|
||
return True
|
||
return False
|
||
|
||
def var(self):
|
||
reg = ''
|
||
s0s1 = self.s[0:2]
|
||
if s0s1 == '\\n':
|
||
if self.s[3:5] == 'dy':
|
||
self.skip_char(5)
|
||
return True
|
||
elif self.str_at(2) == '(' and self.prch(3) and self.prch(4):
|
||
self.skip_char(5)
|
||
return True
|
||
elif self.str_at(2) == '[' and self.prch(3):
|
||
self.skip_char(3)
|
||
while self.str_at(0) and self.str_at(0) != ']':
|
||
self.skip_char()
|
||
return True
|
||
elif self.prch(2):
|
||
self.skip_char(3)
|
||
return True
|
||
elif s0s1 == '\\*':
|
||
if self.str_at(2) == '(' and self.prch(3) and self.prch(4):
|
||
reg = self.s[3:5]
|
||
self.skip_char(5)
|
||
elif self.str_at(2) == '[' and self.prch(3):
|
||
self.skip_char(3)
|
||
while self.str_at(0) and self.str_at(0) != ']':
|
||
reg = reg + self.str_at(0)
|
||
self.skip_char()
|
||
if self.s[0:1] == ']':
|
||
self.skip_char()
|
||
else:
|
||
return False
|
||
elif self.prch(2):
|
||
reg = self.str_at(2)
|
||
self.skip_char(3)
|
||
else:
|
||
return False
|
||
|
||
if reg in self.reg_table:
|
||
old_s = self.s
|
||
self.s = self.reg_table[reg]
|
||
self.text_arg()
|
||
return True
|
||
return False
|
||
|
||
def size(self):
|
||
# We require that the string starts with \s
|
||
if self.digit(2) or (self.str_at(2) in '-+' and self.digit(3)):
|
||
self.skip_char(3)
|
||
while self.digit(0): self.skip_char()
|
||
return True
|
||
return False
|
||
|
||
def spec(self):
|
||
self.specletter = False
|
||
if self.s[0:2] == '\\(' and self.prch(2) and self.prch(3):
|
||
key = self.s[2:4]
|
||
if key in Deroffer.g_specs_specletter:
|
||
self.condputs(Deroffer.g_specs_specletter[key])
|
||
self.specletter = True
|
||
elif key in Deroffer.g_specs:
|
||
self.condputs(Deroffer.g_specs[key])
|
||
self.skip_char(4)
|
||
return True
|
||
elif self.s.startswith('\\%'):
|
||
self.specletter = True
|
||
self.skip_char(2)
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
def esc(self):
|
||
# We require that the string start with backslash
|
||
c = self.s[1:2]
|
||
if not c: return False
|
||
if c in 'eE':
|
||
self.condputs('\\')
|
||
elif c in 't':
|
||
self.condputs('\t')
|
||
elif c in '0~':
|
||
self.condputs(' ')
|
||
elif c in '|^&:':
|
||
pass
|
||
else:
|
||
self.condputs(c)
|
||
self.skip_char(2)
|
||
return True
|
||
|
||
def word(self):
|
||
got_something = False
|
||
while True:
|
||
match = Deroffer.g_re_word.match(self.s)
|
||
if not match: break
|
||
got_something = True
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end(0))
|
||
|
||
# Consume all specials
|
||
while self.spec():
|
||
if not self.specletter: break
|
||
|
||
return got_something
|
||
|
||
|
||
def text(self):
|
||
while True:
|
||
idx = self.s.find('\\')
|
||
if idx == -1:
|
||
self.condputs(self.s)
|
||
self.s = ''
|
||
break
|
||
else:
|
||
self.condputs(self.s[:idx])
|
||
self.skip_char(idx)
|
||
if not self.esc_char_backslash():
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
return True
|
||
|
||
def letter(self, idx):
|
||
ch = self.str_at(idx)
|
||
return ch.isalpha() or ch == '_' # underscore is used in C identifiers
|
||
|
||
|
||
def digit(self, idx):
|
||
ch = self.str_at(idx)
|
||
return ch.isdigit()
|
||
|
||
def number(self):
|
||
match = Deroffer.g_re_number.match(self.s)
|
||
if not match:
|
||
return False
|
||
else:
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end())
|
||
return True
|
||
|
||
def esc_char_backslash(self):
|
||
# Like esc_char, but we know the string starts with a backslash
|
||
c = self.s[1:2]
|
||
if c == '"':
|
||
return self.comment()
|
||
elif c == 'f':
|
||
return self.font()
|
||
elif c == 's':
|
||
return self.size()
|
||
elif c in 'hvwud':
|
||
return self.numreq()
|
||
elif c in 'n*':
|
||
return self.var()
|
||
elif c == '(':
|
||
return self.spec()
|
||
else:
|
||
return self.esc()
|
||
|
||
|
||
def esc_char(self):
|
||
if self.s[0:1] == '\\':
|
||
return self.esc_char_backslash()
|
||
return self.word() or self.number()
|
||
|
||
def quoted_arg(self):
|
||
if self.str_at(0) == '"':
|
||
self.skip_char()
|
||
while self.s and self.str_at(0) != '"':
|
||
if not self.esc_char():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
return True
|
||
else:
|
||
return False
|
||
|
||
def text_arg(self):
|
||
# PCA: The deroff.c textArg() disallowed quotes at the start of an argument
|
||
# I'm not sure if this was a bug or not
|
||
got_something = False
|
||
while True:
|
||
match = Deroffer.g_re_not_backslash_or_whitespace.match(self.s)
|
||
if match:
|
||
# Output the characters in the match
|
||
self.condputs(match.group(0))
|
||
self.skip_char(match.end(0))
|
||
got_something = True
|
||
|
||
# Next is either an escape, or whitespace, or the end
|
||
# If it's the whitespace or the end, we're done
|
||
if not self.s or self.is_white(0):
|
||
return got_something
|
||
|
||
# Try an escape
|
||
if not self.esc_char():
|
||
# Some busted escape? Just output it
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
got_something = True
|
||
|
||
|
||
|
||
def text_arg2(self):
|
||
if not self.esc_char():
|
||
if self.s and not self.is_white(0):
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return False
|
||
while True:
|
||
if not self.esc_char():
|
||
if self.s and not self.is_white(0):
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
|
||
# Macro functions
|
||
def macro_sh(self):
|
||
for header_str in [' SYNOPSIS', ' "SYNOPSIS', ' ‹BERSICHT', ' "‹BERSICHT']:
|
||
if self.s[2:].startswith(header_str):
|
||
self.inheader = True
|
||
break
|
||
else:
|
||
# Did not find a header string
|
||
self.inheader = False
|
||
self.nobody = True
|
||
|
||
def macro_ss_ip(self):
|
||
self.nobody = True
|
||
return False
|
||
|
||
def macro_i_ir(self):
|
||
pass
|
||
return False
|
||
|
||
def macro_Nm(self):
|
||
if self.s == 'Nm\n':
|
||
self.condputs(self.name)
|
||
else:
|
||
self.name = self.s[3:].strip() + ' '
|
||
return True
|
||
|
||
def macro_close_bracket(self):
|
||
self.refer = False
|
||
return False
|
||
|
||
def macro_ps(self):
|
||
if self.is_white(2): self.pic = True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_pe(self):
|
||
if self.is_white(2): self.pic = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_ts(self):
|
||
if self.is_white(2): self.tbl, self.tblstate = True, self.OPTIONS
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_t_and(self):
|
||
if self.is_white(2): self.tbl, self.tblstate = True, self.FORMAT
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_te(self):
|
||
if self.is_white(2): self.tbl = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_eq(self):
|
||
if self.is_white(2): self.eqn = True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_en(self):
|
||
if self.is_white(2): self.eqn = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_r1(self):
|
||
if self.is_white(2): self.refer2 = True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_r2(self):
|
||
if self.is_white(2): self.refer2 = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_de(self):
|
||
macro=True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_bl_vl(self):
|
||
if self.is_white(2): self.inlist = True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_bv(self):
|
||
if self.str_at(2) == 'L' and self.white(self.str_at(3)): self.inlist = True
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_le(self):
|
||
if self.is_white(2): self.inlist = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_lp_pp(self):
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_ds(self):
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0):
|
||
# Split at whitespace
|
||
comps = self.s.split(None, 2)
|
||
if len(comps) is 2:
|
||
name, value = comps
|
||
value = value.rstrip()
|
||
self.reg_table[name] = value
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_so_nx(self):
|
||
# We always ignore include directives
|
||
# deroff.c for some reason allowed this to fall through to the 'tr' case
|
||
# I think that was just a bug so I won't replicate it
|
||
return True
|
||
|
||
def macro_tr(self):
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
while self.s and self.str_at(0) != '\n':
|
||
c = self.str_at(0)
|
||
ns = self.str_at(1)
|
||
self.skip_char(2)
|
||
if not ns or ns == '\n': ns = ' '
|
||
self.tr_from += c
|
||
self.tr_to += ns
|
||
|
||
# Update our table, then swap in the slower tr-savvy condputs
|
||
try: #Python2
|
||
self.tr = string.maketrans(self.tr_from, self.tr_to)
|
||
except AttributeError: #Python3
|
||
self.tr = "".maketrans(self.tr_from, self.tr_to)
|
||
self.condputs = self.condputs_tr
|
||
return True
|
||
|
||
def macro_sp(self):
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def macro_other(self):
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
def request_or_macro(self):
|
||
# s[0] is period or open single quote
|
||
self.skip_char()
|
||
s0 = self.s[1:2]
|
||
if s0 == '\\':
|
||
if self.str_at(1) == '"':
|
||
self.condputs('\n')
|
||
return True
|
||
else:
|
||
pass
|
||
elif s0 == '[':
|
||
self.refer = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0 == ']':
|
||
self.refer = False
|
||
self.skip_char()
|
||
return self.text()
|
||
elif s0 == '.':
|
||
self.macro = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
self.nobody = False
|
||
s0s1 = self.s[0:2]
|
||
|
||
macro_func = Deroffer.g_macro_dict.get(s0s1, Deroffer.macro_other)
|
||
if macro_func(self):
|
||
return True
|
||
|
||
if self.skipheaders and self.nobody: return True
|
||
|
||
self.skip_leading_whitespace()
|
||
while self.s and not self.is_white(0): self.skip_char()
|
||
self.skip_leading_whitespace()
|
||
while True:
|
||
if not self.quoted_arg() and not self.text_arg():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
|
||
def request_or_macro2(self):
|
||
self.skip_char()
|
||
s0 = self.s[0:1]
|
||
if s0 == '\\':
|
||
if self.str_at(1) == '"':
|
||
self.condputs('\n')
|
||
return True
|
||
else:
|
||
pass
|
||
elif s0 == '[':
|
||
self.refer = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0 == ']':
|
||
self.refer = False
|
||
self.skip_char()
|
||
return self.text()
|
||
elif s0 == '.':
|
||
self.macro = False
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
self.nobody = False
|
||
s0s1 = self.s[0:2]
|
||
if s0s1 == 'SH':
|
||
for header_str in [' SYNOPSIS', ' "SYNOPSIS', ' ‹BERSICHT', ' "‹BERSICHT']:
|
||
if self.s[2:].startswith(header_str):
|
||
self.inheader = True
|
||
break
|
||
else:
|
||
# Did not find a header string
|
||
self.inheader = False
|
||
self.nobody = True
|
||
elif s0s1 in ['SS', 'IP', 'H ']:
|
||
self.nobody = True
|
||
elif s0s1 in ['I ', 'IR', 'IB', 'B ', 'BR', 'BI', 'R ', 'RB', 'RI', 'AB']:
|
||
pass
|
||
elif s0s1 in ['] ']:
|
||
self.refer = False
|
||
elif s0s1 in ['PS']:
|
||
if self.is_white(2): self.pic = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['PE']:
|
||
if self.is_white(2): self.pic = False
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['TS']:
|
||
if self.is_white(2): self.tbl, self.tblstate = True, self.OPTIONS
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['T&']:
|
||
if self.is_white(2): self.tbl, self.tblstate = True, self.FORMAT
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['TE']:
|
||
if self.is_white(2): self.tbl = False
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['EQ']:
|
||
if self.is_white(2): self.eqn = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['EN']:
|
||
if self.is_white(2): self.eqn = False
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['R1']:
|
||
if self.is_white(2): self.refer2 = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['R2']:
|
||
if self.is_white(2): self.refer2 = False
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['de']:
|
||
macro=True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['BL', 'VL', 'AL', 'LB', 'RL', 'ML', 'DL']:
|
||
if self.is_white(2): self.inlist = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['BV']:
|
||
if self.str_at(2) == 'L' and self.white(self.str_at(3)): self.inlist = True
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['LE']:
|
||
if self.is_white(2): self.inlist = False
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['LP', 'PP', 'P\n']:
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['ds']:
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0):
|
||
# Split at whitespace
|
||
comps = self.s.split(None, 2)
|
||
if len(comps) is 2:
|
||
name, value = comps
|
||
value = value.rstrip()
|
||
self.reg_table[name] = value
|
||
self.condputs('\n')
|
||
return True
|
||
elif s0s1 in ['so', 'nx']:
|
||
# We always ignore include directives
|
||
# deroff.c for some reason allowed this to fall through to the 'tr' case
|
||
# I think that was just a bug so I won't replicate it
|
||
return True
|
||
elif s0s1 in ['tr']:
|
||
self.skip_char(2)
|
||
self.skip_leading_whitespace()
|
||
while self.s and self.str_at(0) != '\n':
|
||
c = self.str_at(0)
|
||
ns = self.str_at(1)
|
||
self.skip_char(2)
|
||
if not ns or ns == '\n': ns = ' '
|
||
self.tr_from += c
|
||
self.tr_to += ns
|
||
|
||
# Update our table, then swap in the slower tr-savvy condputs
|
||
try: #Python2
|
||
self.tr = string.maketrans(self.tr_from, self.tr_to)
|
||
except AttributeError: #Python3
|
||
self.tr = "".maketrans(self.tr_from, self.tr_to)
|
||
self.condputs = self.condputs_tr
|
||
|
||
return True
|
||
elif s0s1 in ['sp']:
|
||
self.condputs('\n')
|
||
return True
|
||
else:
|
||
self.condputs('\n')
|
||
return True
|
||
|
||
if self.skipheaders and self.nobody: return True
|
||
|
||
self.skip_leading_whitespace()
|
||
while self.s and not self.is_white(0): self.skip_char()
|
||
self.skip_leading_whitespace()
|
||
while True:
|
||
if not self.quoted_arg() and not self.text_arg():
|
||
if self.s:
|
||
self.condputs(self.str_at(0))
|
||
self.skip_char()
|
||
else:
|
||
return True
|
||
|
||
|
||
def do_tbl(self):
|
||
if self.tblstate == self.OPTIONS:
|
||
while self.s and self.str_at(0) != ';' and self.str_at(0) != '\n':
|
||
self.skip_leading_whitespace()
|
||
if not self.str_at(0).isalpha():
|
||
# deroff.c has a bug where it can loop forever here...we try to work around it
|
||
self.skip_char()
|
||
else: # Parse option
|
||
|
||
option = self.s
|
||
arg = ''
|
||
|
||
idx = 0
|
||
while option[idx:idx+1].isalpha():
|
||
idx += 1
|
||
|
||
if option[idx:idx+1] == '(':
|
||
option = option[:idx]
|
||
self.s = self.s[idx+1:]
|
||
arg = self.s
|
||
else:
|
||
self.s = ''
|
||
|
||
if arg:
|
||
idx = arg.find(')')
|
||
if idx != -1:
|
||
arg = arg[:idx]
|
||
self.s = self.s[idx+1:]
|
||
else:
|
||
#self.skip_char()
|
||
pass
|
||
|
||
if option.lower() == 'tab':
|
||
self.tblTab = arg[0:1]
|
||
|
||
self.tblstate = self.FORMAT
|
||
self.condputs('\n')
|
||
|
||
elif self.tblstate == self.FORMAT:
|
||
while self.s and self.str_at(0) != '.' and self.str_at(0) != '\n':
|
||
self.skip_leading_whitespace()
|
||
if self.str_at(0): self.skip_char()
|
||
|
||
if self.str_at(0) == '.': self.tblstate = self.DATA
|
||
self.condputs('\n')
|
||
elif self.tblstate == self.DATA:
|
||
if self.tblTab:
|
||
self.s = self.s.replace(self.tblTab, '\t')
|
||
self.text()
|
||
return True
|
||
|
||
def do_line(self):
|
||
if self.s[0:1] in ".'":
|
||
if not self.request_or_macro(): return False
|
||
elif self.tbl:
|
||
self.do_tbl()
|
||
else:
|
||
self.text()
|
||
return True
|
||
|
||
def deroff(self, str):
|
||
lines = str.split('\n')
|
||
for line in lines:
|
||
self.s = line + '\n'
|
||
if not self.do_line():
|
||
break
|
||
#self.putchar('\n')
|
||
|
||
def deroff_files(files):
|
||
for arg in files:
|
||
sys.stderr.write(arg + '\n')
|
||
if arg.endswith('.gz'):
|
||
f = gzip.open(arg, 'r')
|
||
str = f.read()
|
||
if IS_PY3: str = str.decode('latin-1')
|
||
else:
|
||
f = open(arg, 'r')
|
||
str = f.read()
|
||
d = Deroffer()
|
||
d.deroff(str)
|
||
d.flush_output(sys.stdout)
|
||
f.close()
|
||
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import gzip
|
||
paths = sys.argv[1:]
|
||
if True:
|
||
deroff_files(paths)
|
||
else:
|
||
import cProfile, profile, pstats
|
||
profile.run('deroff_files(paths)', 'fooprof')
|
||
p = pstats.Stats('fooprof')
|
||
p.sort_stats('time').print_stats(100)
|
||
#p.sort_stats('calls').print_callers(.5, 'startswith')
|