From eb71e4555f29948573be4d5f4da764e70fc5e85e Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 27 Mar 2021 16:07:57 -0700 Subject: [PATCH] Clean up and relnote shebangless script support This adds a test for shebangless support from #7802, cleans up some of its tricks, and includes it in the changelog. --- CHANGELOG.rst | 1 + src/exec.cpp | 24 +++++++++++--- src/postfork.cpp | 14 ++++---- tests/checks/noshebang.fish | 66 +++++++++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 12 deletions(-) create mode 100644 tests/checks/noshebang.fish diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b357d3643..572d2900b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,6 +13,7 @@ Deprecations and removed features Scripting improvements ---------------------- - ``math`` now has a ``log2`` function like the documentation already claimed. +- Shebang lines are no longer required within shell scripts, improving support for scripts with concatenated binary contents. If a file fails to execute and passes a binary safety check, fish will re-invoke the file using `/bin/sh` (:issue:`7802`). Interactive improvements ------------------------- diff --git a/src/exec.cpp b/src/exec.cpp index 7118802c6..ff84cb698 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -63,6 +63,9 @@ enum class launch_result_t { failed, } __warn_unused_type; +/// This is a 'looks like text' check. +/// \return true if either there is no NUL byte, or there is a line containing a lowercase letter +/// before the first NUL byte. static bool is_thompson_shell_payload(const char *p, size_t n) { if (!memchr(p, '\0', n)) return true; bool haslower = false; @@ -87,15 +90,16 @@ static bool is_thompson_shell_payload(const char *p, size_t n) { /// is usually uppercase, e.g. PNG, JFIF, MZ, etc. These rules are also /// flexible enough to permit scripts with concatenated binary content, /// such as Actually Portable Executable. +/// N.B.: this is called after fork, it must not allocate heap memory. bool is_thompson_shell_script(const char *path) { int e = errno; bool res = false; - int fd = open(path, O_RDONLY | O_NOCTTY); + int fd = open_cloexec(path, O_RDONLY | O_NOCTTY); if (fd != -1) { char buf[256]; ssize_t got = read(fd, buf, sizeof(buf)); close(fd); - if (got != -1 && is_thompson_shell_payload(buf, got)) { + if (got >= 0 && is_thompson_shell_payload(buf, static_cast(got))) { res = true; } } @@ -119,12 +123,22 @@ bool is_thompson_shell_script(const char *path) { err = errno; // The shebang wasn't introduced until UNIX Seventh Edition, so if - // the kernel won't run the binary we hand it off to the intpreter + // the kernel won't run the binary we hand it off to the interpreter // after performing a binary safety check, recommended by POSIX: a // line needs to exist before the first \0 with a lowercase letter if (err == ENOEXEC && is_thompson_shell_script(actual_cmd)) { - *--argv = const_cast(_PATH_BSHELL); - execve(_PATH_BSHELL, argv, envv); + // Construct new argv. + // We must not allocate memory, so only 128 args are supported. + constexpr size_t maxargs = 128; + size_t nargs = 0; + while (argv[nargs]) nargs++; + if (nargs <= maxargs) { + char *argv2[1 + maxargs + 1]; // +1 for /bin/sh, +1 for terminating nullptr + char interp[] = _PATH_BSHELL; + argv2[0] = interp; + std::copy_n(argv, 1 + nargs, &argv2[1]); // +1 to copy terminating nullptr + execve(_PATH_BSHELL, argv2, envv); + } } errno = err; diff --git a/src/postfork.cpp b/src/postfork.cpp index 754c3a161..148840da6 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -309,15 +309,15 @@ maybe_t posix_spawner_t::spawn(const char *cmd, char *const argv[], char // line needs to exist before the first \0 with a lowercase letter if (error_ == ENOEXEC && is_thompson_shell_script(cmd)) { error_ = 0; - size_t n = 0; - while (argv[n]) ++n; - std::unique_ptr argv2(new char *[1 + n + 1]); + // Create a new argv with /bin/sh prepended. + std::vector argv2; char interp[] = _PATH_BSHELL; - argv2[0] = interp; - for (size_t i = 0; i < n + 1; ++i) { - argv2[i + 1] = argv[i]; + argv2.push_back(interp); + for (size_t i = 0; argv[i] != nullptr; i++) { + argv2.push_back(argv[i]); } - if (check_fail(posix_spawn(&pid, interp, &*actions_, &*attr_, argv2.get(), envp))) { + argv2.push_back(nullptr); + if (check_fail(posix_spawn(&pid, interp, &*actions_, &*attr_, &argv2[0], envp))) { return none(); } } else { diff --git a/tests/checks/noshebang.fish b/tests/checks/noshebang.fish new file mode 100644 index 000000000..c58111144 --- /dev/null +++ b/tests/checks/noshebang.fish @@ -0,0 +1,66 @@ +# RUN: %fish %s + +# Test for shebangless scripts - see 7802. + +set testdir (mktemp -d) +cd $testdir + +touch file +chmod a+x file + +function runfile + # Run our file twice, printing status. + # Arguments are passed to exercise the re-execve code paths; they have no other effect. + set -g fish_use_posix_spawn 0 + ./file arg1 arg2 arg3 + echo $status + + set -g fish_use_posix_spawn 1 + ./file arg1 arg2 arg3 arg4 arg5 + echo $status +end + +# Empty executable files are 'true'. +true >file +runfile +#CHECK: 0 +#CHECK: 0 + +# Files without NUL are 'true' as well. +echo -e -n '#COMMENT\n#COMMENT' >file +runfile +#CHECK: 0 +#CHECK: 0 + +# On to NUL bytes. +# The heuristic is that there must be a line containing a lowercase letter before the first NUL byte. +echo -n -e 'true\n\x00' >file +runfile +#CHECK: 0 +#CHECK: 0 + +# Doesn't meet our heuristic as there is no newline. +echo -n -e 'true\x00' >file +runfile +#CHECK: 125 +#CHECKERR: Failed {{.*}} +#CHECKERR: exec: {{.*}} +#CHECKERR: {{.*}} + +#CHECK: 125 +#CHECKERR: Failed {{.*}} +#CHECKERR: exec: {{.*}} +#CHECKERR: {{.*}} + +# Doesn't meet our heuristic as there is no lowercase before newline. +echo -n -e 'NOPE\n\x00' >file +runfile +#CHECK: 125 +#CHECKERR: Failed {{.*}} +#CHECKERR: exec: {{.*}} +#CHECKERR: {{.*}} + +#CHECK: 125 +#CHECKERR: Failed {{.*}} +#CHECKERR: exec: {{.*}} +#CHECKERR: {{.*}}