From 02e9486559d0a49568691b7a3c1daac12d240275 Mon Sep 17 00:00:00 2001 From: Bjorn Neergaard Date: Thu, 7 May 2020 13:01:02 -0600 Subject: [PATCH] Fix manpath handling in create_manpage_completions.py (#6879) * Fix manpath handling in create_manpage_completions.py ...as well as do some (very!) light cleanup. Currently, `create_manpage_completions.py` does not properly understand/respect the `$MANPATH` variable. One important feature of `$MANPATH` is that an empty component (i.e. the trailing : in `foo:bar:`) expands to the 'default' or 'system' path -- that is to say, the path that would be used if `$MANPATH` was unset. This allows the user to extend the manpath without clobbering it, and has been a feature many Unices have included for years. The current implementation blindly uses the `$MANPATH` variable if it exists, which does not allow for this behaviour -- to expand the variable correctly, an external program must be invoked. Therefore, we first shell out to the 'proper' (read: best guess) external program. If that fails, we can then try to use `$MANPATH` directly/literally. Finally, if both of those are impossible, we can fall back to some common paths from widely used operating systems. Note that the `man.conf` parsing has been removed: this is because while many 'traditional' Unices (BSDs, Solaris, macOS) support this file, only macOS actually ships a file -- most other Unices use a `conf.d`-style layout and supporting that from our Python is impractical and silly at best. On GNU (read: Linux) systems, `mandb` uses `/etc/man_db.conf` with slightly different syntax and sematics. As this code-path has bitrotted (and likely never worked, anyway), just remove it. `create_manpage_completions.py` looks like it has suffered a lot of confusion and bitrot in general over the last few years -- and is overdue for a major refactoring. I am quite interested in tackling this, but I plan to wait until the go-ahead to drop support for Python 2 is given, as a major refactor/rewrite that still supports Python 2 (and thus ignores the ergonomic/API/syntax improvements of Python 3) does not make sense to me. Related: #5657 It would probably be good to revisit `man.fish` once again when a comprehensive refactor happens: hopefully every permutation of `man`/`$MANPATH` could be documented as part of that effort. * Restore /etc/man.conf parsing I was not aware that this codepath was used -- since it appeared that it would throw an error when it was reached. Redo it, using regex, and support parsing NetBSD man.conf as well (untested). * Fix create_manpage_completions.py under Python 2 --- share/tools/create_manpage_completions.py | 93 ++++++++++++++--------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/share/tools/create_manpage_completions.py b/share/tools/create_manpage_completions.py index cd999331f..4816a8e93 100755 --- a/share/tools/create_manpage_completions.py +++ b/share/tools/create_manpage_completions.py @@ -16,9 +16,19 @@ Redistributions in binary form must reproduce the above copyright notice, this l THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -import string, sys, re, os.path, bz2, gzip, traceback, errno, codecs -import argparse +from __future__ import print_function from deroff import Deroffer +import argparse +import bz2 +import codecs +import errno +import gzip +import os +import re +import string +import subprocess +import sys +import traceback lzma_available = True try: @@ -29,6 +39,11 @@ try: except ImportError: lzma_available = False +try: + from subprocess import DEVNULL +except ImportError: + DEVNULL = open(os.devnull, 'wb') + # Whether we're Python 3 IS_PY3 = sys.version_info[0] >= 3 @@ -61,8 +76,8 @@ def add_diagnostic(dgn, msg_verbosity=VERY_VERBOSE): def flush_diagnostics(where): if diagnostic_output: - output_str = "\n".join(diagnostic_output) + "\n" - where.write(output_str) + output_str = "\n".join(diagnostic_output) + print(output_str, file=where) diagnostic_output[:] = [] @@ -496,7 +511,7 @@ class Type4ManParser(ManParser): add_diagnostic("Command is %r" % CMDNAME) if options_matched == None: - print >>sys.stderr, "Unable to find options section" + print("Unable to find options section", file=sys.stderr) return False while options_matched != None: @@ -949,47 +964,49 @@ def parse_and_output_man_pages(paths, output_directory, show_progress): def get_paths_from_man_locations(): # Return all the paths to man(1) and man(8) files in the manpath - import subprocess, os - proc = None - # $MANPATH takes precedence, just like with `man` on the CLI. - if os.getenv("MANPATH"): + parent_paths = [] + + # Most (GNU, macOS, Haiku) modern implementations of man support being called with `--path`. + # Traditional implementations require a second `manpath` program: examples include FreeBSD and Solaris. + # Prefer an external program first because these programs return a superset of the $MANPATH variable. + for prog in [["man", "--path"], ["manpath"]]: + try: + output = subprocess.check_output(prog, stderr=DEVNULL) + if IS_PY3: + output = output.decode("latin-1") + parent_paths = output.strip().split(":") + except (OSError, subprocess.CalledProcessError): + continue + # If we can't have the OS interpret $MANPATH, just use it as-is (gulp). + if not parent_paths and os.getenv("MANPATH"): parent_paths = os.getenv("MANPATH").strip().split(":") - else: - # Some systems have manpath, others have `man --path` (like Haiku). - for prog in [["manpath"], ["man", "--path"]]: - try: - proc = subprocess.Popen( - prog, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - except OSError: # Command does not exist, keep trying - continue - break # Command exists, use it. - manpath, err_data = proc.communicate() - parent_paths = manpath.decode().strip().split(":") - if (not parent_paths) or (proc and proc.returncode > 0): - # HACK: Use some fallbacks in case we can't get anything else. - # `mandoc` does not provide `manpath` or `man --path` and $MANPATH might not be set. - # The alternative is reading its config file (/etc/man.conf) - if os.path.isfile("/etc/man.conf"): - data = open("/etc/man.conf", "r") - for line in data: - if "manpath" in line or "MANPATH" in line: - p = line.split(" ")[1] - p = p.split()[0] - parent_paths.append(p) - if not parent_paths: - sys.stderr.write( - "Unable to get the manpath, falling back to /usr/share/man:/usr/local/share/man. Please set $MANPATH if that is not correct.\n" - ) - parent_paths = ["/usr/share/man", "/usr/local/share/man"] + # Fallback: With mandoc (OpenBSD, embedded Linux) and NetBSD man, the only way to get the default manpath is by reading /etc. + if not parent_paths: + try: + with open("/etc/man.conf", "r") as file: + data = file.read() + for key in ["MANPATH", "_default"]: + for match in re.findall(r"^%s\s+(.*)$" % key, data, re.I | re.M): + parent_paths.append(match) + except FileNotFoundError: + pass + # Fallback: hard-code some common paths. These should be likely for FHS Linux distros, BSDs, and macOS. + if not parent_paths: + parent_paths = ["/usr/share/man", "/usr/local/man", "/usr/local/share/man"] + print( + "Unable to get the manpath, falling back to %s." % ":".join(parent_paths), + "Explictly set $MANPATH to fix this error.", + file=sys.stderr + ) + result = [] for parent_path in parent_paths: for section in ["man1", "man6", "man8"]: directory_path = os.path.join(parent_path, section) try: names = os.listdir(directory_path) - except OSError as e: + except OSError: names = [] names.sort() for name in names: