From cef39cdcc077653354a037ef9e9b0cc832081142 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 24 Dec 2017 16:29:12 -0800 Subject: [PATCH 1/8] Add is_windows_subsystem_for_linux to detect WSL --- src/common.cpp | 19 +++++++++++++++++++ src/common.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/src/common.cpp b/src/common.cpp index 4896c2d7a..17da93408 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1997,6 +1997,25 @@ void assert_is_locked(void *vmutex, const char *who, const char *caller) { } } +/// Detect if we are Windows Subsystem for Linux by inspecting /proc/sys/kernel/osrelease +/// and checking if "Microsoft" is in the first line. +/// See https://github.com/Microsoft/WSL/issues/423 +bool is_windows_subsystem_for_linux() { + ASSERT_IS_NOT_FORKED_CHILD(); + static bool s_is_wsl = false; + static std::once_flag oflag; + std::call_once(oflag, []() { + // 'e' sets CLOEXEC if possible. + FILE *fp = fopen("/proc/sys/kernel/osrelease", "re"); + if (fp) { + char buff[256]; + if (fgets(buff, sizeof buff, fp)) s_is_wsl = (strstr(buff, "Microsoft") != NULL); + fclose(fp); + } + }); + return s_is_wsl; +} + template static CharType_t **make_null_terminated_array_helper( const std::vector > &argv) { diff --git a/src/common.h b/src/common.h index 1809eab1c..04a53ce76 100644 --- a/src/common.h +++ b/src/common.h @@ -728,6 +728,9 @@ void assert_is_not_forked_child(const char *who); #define ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(x) assert_is_not_forked_child(x) #define ASSERT_IS_NOT_FORKED_CHILD() ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(__FUNCTION__) +/// Return whether we are running in Windows Subsystem for Linux. +bool is_windows_subsystem_for_linux(); + extern "C" { __attribute__((noinline)) void debug_thread_error(void); } From 1b1fd5ab9b7c7ac1b4152f636c63fc425dc3cf5f Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 24 Dec 2017 16:48:00 -0800 Subject: [PATCH 2/8] Mark needs_keepalive more often for WSL Have WSL use a keepalive whenever the first process is external. This works around the fact that WSL prohibits setting an exited process as the group leader. --- src/exec.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/exec.cpp b/src/exec.cpp index 73bab322b..15d8f2660 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -576,6 +576,12 @@ void exec_job(parser_t &parser, job_t *j) { } } + // When running under WSL, create a keepalive process unconditionally if our first process is external. + // This is because WSL does not permit joining the pgrp of an exited process. + // (see https://github.com/Microsoft/WSL/issues/2786), also fish PR #4676 + if (j->processes.front()->type == EXTERNAL && is_windows_subsystem_for_linux()) + needs_keepalive = true; + if (needs_keepalive) { // Call fork. No need to wait for threads since our use is confined and simple. keepalive.pid = execute_fork(false); From e9f676a7f4be1d06c5750ae47ff9b794b783647d Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 24 Dec 2017 12:33:02 -0800 Subject: [PATCH 3/8] Provide a way to stop blocking children via s_block_children This is to investigate alternatives to the existing kill(SIGSTOP) WSL compatibility thing. --- src/exec.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/exec.cpp b/src/exec.cpp index 15d8f2660..a83e3bdf5 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -53,6 +53,8 @@ /// Base open mode to pass to calls to open. #define OPEN_MASK 0666 +static bool s_block_children = false; + /// Called in a forked child. static void exec_write_and_exit(int fd, const char *buff, size_t count, int status) { if (write_loop(fd, buff, count) == -1) { @@ -750,7 +752,7 @@ void exec_job(parser_t &parser, job_t *j) { // SIGCONT before they've actually stopped, and they'll remain suspended indefinitely. if (blocked_pid != -1) { debug(2, L"Unblocking process %d.\n", blocked_pid); - kill(blocked_pid, SIGCONT); + if (s_block_children) kill(blocked_pid, SIGCONT); blocked_pid = -1; } }; @@ -772,7 +774,7 @@ void exec_job(parser_t &parser, job_t *j) { // down-chain commands in the job a chance to join their process group and read // their pipes. The process will be resumed when the next command in the chain is // started. - if (block_child) { + if (block_child && s_block_children) { kill(p->pid, SIGSTOP); } // The parent will wake us up when we're ready to execute. @@ -1118,7 +1120,7 @@ void exec_job(parser_t &parser, job_t *j) { } bool child_blocked = block_child && child_forked; - if (child_blocked) { + if (child_blocked && s_block_children) { // We have to wait to ensure the child has set their progress group and is in SIGSTOP // state otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be // blocked indefinitely. The child is SIGSTOP'd and is guaranteed to have called From 14880ce7d1ab5ba94811beb12aa2681a25366538 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Tue, 26 Dec 2017 01:41:04 -0800 Subject: [PATCH 4/8] Resume setting group ID in both parent and child --- src/postfork.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/postfork.cpp b/src/postfork.cpp index f6d597ded..cdb040af0 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -72,24 +72,12 @@ static void debug_safe_int(int level, const char *format, int val) { /// Returns true on sucess, false on failiure. bool child_set_group(job_t *j, process_t *p) { bool retval = true; - if (j->get_flag(JOB_CONTROL)) { // New jobs have the pgid set to -2 if (j->pgid == -2) { j->pgid = p->pid; } - // Retry on EPERM because there's no way that a child cannot join an existing progress group - // because we are SIGSTOPing the previous job in the chain. Sometimes we have to try a few - // times to get the kernel to see the new group. (Linux 4.4.0) - int failure = setpgid(p->pid, j->pgid); - while (failure == -1 && (errno == EPERM || errno == EINTR)) { - debug_safe(4, "Retrying setpgid in child process"); - failure = setpgid(p->pid, j->pgid); - } - // TODO: Figure out why we're testing whether the pgid is correct after attempting to - // set it failed. This was added in commit 4e912ef8 from 2012-02-27. - failure = failure && getpgid(p->pid) != j->pgid; - if (failure) { //!OCLINT(collapsible if statements) + if (setpgid(p->pid, j->pgid) < 0) { char pid_buff[128]; char job_id_buff[128]; char getpgid_buff[128]; @@ -136,6 +124,13 @@ bool set_child_group(job_t *j, pid_t child_pid) { if (j->pgid == -2) { j->pgid = child_pid; } + // The parent sets the child's group. This incurs the well-known unavoidable race with the + // child exiting, so ignore ESRCH and EPERM (in case the pid was recycled). + if (setpgid(child_pid, j->pgid) < 0) { + if (errno != ESRCH && errno != EPERM) { + safe_perror("setpgid"); + } + } } else { j->pgid = getpgrp(); } From 080521071fee874f2b9945b696d0051e8e873d30 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 20 Jan 2018 23:43:48 -0800 Subject: [PATCH 5/8] Teach keepalives to exit when their parent dies keepalive processes are typically killed by the main shell process. However if the main shell exits the keepalive may linger. In WSL keepalives are used more often, and the lingering keepalives are both leaks and prevent the tests from finishing. Have keepalives poll for their parent process ID and exit when it changes, so they can clean themselves up. The polling frequency can be low. --- src/exec.cpp | 3 ++- src/postfork.cpp | 12 ++++++++++++ src/postfork.h | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/exec.cpp b/src/exec.cpp index a83e3bdf5..da3a31ee1 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -586,12 +586,13 @@ void exec_job(parser_t &parser, job_t *j) { if (needs_keepalive) { // Call fork. No need to wait for threads since our use is confined and simple. + pid_t parent_pid = getpid(); keepalive.pid = execute_fork(false); if (keepalive.pid == 0) { // Child keepalive.pid = getpid(); child_set_group(j, &keepalive); - pause(); + run_as_keepalive(parent_pid); exit_without_destructors(0); } else { // Parent diff --git a/src/postfork.cpp b/src/postfork.cpp index cdb040af0..c8b4d1028 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -546,3 +546,15 @@ bool do_builtin_io(const char *out, size_t outlen, const char *err, size_t errle errno = saved_errno; return success; } + +void run_as_keepalive(pid_t parent_pid) { + // Run this process as a keepalive. In typical usage the parent process will kill us. However + // this may not happen if the parent process exits abruptly, either via kill or exec. What we do + // is poll our ppid() and exit when it differs from parent_pid. We can afford to do this with + // low frequency because in the great majority of cases, fish will kill(9) us. + for (;;) { + // Note sleep is async-safe. + if (sleep(1)) break; + if (getppid() != parent_pid) break; + } +} diff --git a/src/postfork.h b/src/postfork.h index c397e342e..a4119418b 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -47,6 +47,9 @@ bool do_builtin_io(const char *out, size_t outlen, const char *err, size_t errle void safe_report_exec_error(int err, const char *actual_cmd, const char *const *argv, const char *const *envv); +/// Runs the process as a keepalive, until the parent process given by parent_pid exits. +void run_as_keepalive(pid_t parent_pid); + #if FISH_USE_POSIX_SPAWN /// Initializes and fills in a posix_spawnattr_t; on success, the caller should destroy it via /// posix_spawnattr_destroy. From 0c18f68cc2f595b2fe6f001a77c81a668fb7ba79 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sun, 21 Jan 2018 00:20:01 -0800 Subject: [PATCH 6/8] Remove support for blocking children This removes support for blocking children via signals, which was used to orchestrate processes on WSL. Now we use the keepalive mechanism instead. --- src/exec.cpp | 104 ++++------------------------------------------- src/postfork.cpp | 12 +----- 2 files changed, 11 insertions(+), 105 deletions(-) diff --git a/src/exec.cpp b/src/exec.cpp index da3a31ee1..de9c24d9b 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -53,8 +53,6 @@ /// Base open mode to pass to calls to open. #define OPEN_MASK 0666 -static bool s_block_children = false; - /// Called in a forked child. static void exec_write_and_exit(int fd, const char *buff, size_t count, int status) { if (write_loop(fd, buff, count) == -1) { @@ -399,13 +397,11 @@ void internal_exec(job_t *j, const io_chain_t &&all_ios) { /// Execute an internal builtin. Given a parser, a job within that parser, and a process within that /// job corresponding to a builtin, execute the builtin with the given streams. If pipe_read is set, -/// assign stdin to it; otherwise infer stdin from the IO chain. unblock_previous is a hack used to -/// prevent jobs from finishing; see commit cdb72b7024. +/// assign stdin to it; otherwise infer stdin from the IO chain. /// return true on success, false if there is an exec error. static bool exec_internal_builtin_proc(parser_t &parser, job_t *j, process_t *p, const io_pipe_t *pipe_read, const io_chain_t &proc_io_chain, - io_streams_t &streams, - const std::function &unblock_previous) { + io_streams_t &streams) { assert(p->type == INTERNAL_BUILTIN && "Process must be a builtin"); int local_builtin_stdin = STDIN_FILENO; bool close_stdin = false; @@ -493,10 +489,6 @@ static bool exec_internal_builtin_proc(parser_t &parser, job_t *j, process_t *p, // make exec handle things. const int fg = j->get_flag(JOB_FOREGROUND); j->set_flag(JOB_FOREGROUND, false); - - // Main loop may need to perform a blocking read from previous command's output. - // Make sure read source is not blocked. - unblock_previous(); p->status = builtin_run(parser, p->get_argv(), streams); // Restore the fg flag, which is temporarily set to false during builtin @@ -616,9 +608,6 @@ void exec_job(parser_t &parser, job_t *j) { // We are careful to set these to -1 when closed, so if we exit the loop abruptly, we can still // close them. bool pgrp_set = false; - // This is static since processes can block on input/output across jobs the main exec_job loop - // is only ever run in a single thread, so this is OK. - static pid_t blocked_pid = -1; int pipe_current_read = -1, pipe_current_write = -1, pipe_next_read = -1; for (std::unique_ptr &unique_p : j->processes) { if (exec_error) { @@ -639,7 +628,6 @@ void exec_job(parser_t &parser, job_t *j) { // Set to true if we end up forking for this process. bool child_forked = false; bool child_spawned = false; - bool block_child = true; // The pipes the current process write to and read from. Unfortunately these can't be just // allocated on the stack, since j->io wants shared_ptr. @@ -745,23 +733,10 @@ void exec_job(parser_t &parser, job_t *j) { // a pipeline. shared_ptr block_output_io_buffer; - // Used to SIGCONT the previously SIGSTOP'd process so the main loop or next command in job - // can read from its output. - auto unblock_previous = [&j]() { - // We've already called waitpid after forking the child, so we've guaranteed that - // they're already SIGSTOP'd. Otherwise we'd be risking a deadlock because we can call - // SIGCONT before they've actually stopped, and they'll remain suspended indefinitely. - if (blocked_pid != -1) { - debug(2, L"Unblocking process %d.\n", blocked_pid); - if (s_block_children) kill(blocked_pid, SIGCONT); - blocked_pid = -1; - } - }; - // This is the io_streams we pass to internal builtins. std::unique_ptr builtin_io_streams(new io_streams_t(stdout_read_limit)); - auto do_fork = [&j, &p, &pid, &exec_error, &process_net_io_chain, &block_child, + auto do_fork = [&j, &p, &pid, &exec_error, &process_net_io_chain, &child_forked](bool drain_threads, const char *fork_type, std::function child_action) -> bool { pid = execute_fork(drain_threads); @@ -769,16 +744,7 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. Setup redirections, print correct output to // stdout and stderr, and then exit. p->pid = getpid(); - blocked_pid = -1; child_set_group(j, p); - // Make child processes pause after executing setup_child_process() to give - // down-chain commands in the job a chance to join their process group and read - // their pipes. The process will be resumed when the next command in the chain is - // started. - if (block_child && s_block_children) { - kill(p->pid, SIGSTOP); - } - // The parent will wake us up when we're ready to execute. setup_child_process(p, process_net_io_chain); child_action(); DIE("Child process returned control to do_fork lambda!"); @@ -794,10 +760,8 @@ void exec_job(parser_t &parser, job_t *j) { debug(2, L"Fork #%d, pid %d: %s for '%ls'", g_fork_count, pid, fork_type, p->argv0()); child_forked = true; - if (block_child) { - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); - } p->pid = pid; + set_child_group(j, p->pid); } return true; @@ -867,7 +831,7 @@ void exec_job(parser_t &parser, job_t *j) { case INTERNAL_BUILTIN: { if (!exec_internal_builtin_proc(parser, j, p, pipe_read.get(), process_net_io_chain, - *builtin_io_streams, unblock_previous)) { + *builtin_io_streams)) { exec_error = true; } break; @@ -953,9 +917,8 @@ void exec_job(parser_t &parser, job_t *j) { bool must_fork = redirection_is_to_real_file(stdout_io.get()) || redirection_is_to_real_file(stderr_io.get()); if (!must_fork && p->is_last_in_job) { - // We are handling reads directly in the main loop. Make sure source is - // unblocked. Note that we may still end up forking. - unblock_previous(); + // We are handling reads directly in the main loop. Note that we may still end + // up forking. const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; const bool no_stdout_output = stdout_buffer.empty(); const bool no_stderr_output = stderr_buffer.empty(); @@ -1108,8 +1071,9 @@ void exec_job(parser_t &parser, job_t *j) { } // This is the parent process. Store away information on the child, and possibly - // fice it control over the terminal. + // give it control over the terminal. p->pid = pid; + set_child_group(j, p->pid); break; } @@ -1120,51 +1084,6 @@ void exec_job(parser_t &parser, job_t *j) { } } - bool child_blocked = block_child && child_forked; - if (child_blocked && s_block_children) { - // We have to wait to ensure the child has set their progress group and is in SIGSTOP - // state otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be - // blocked indefinitely. The child is SIGSTOP'd and is guaranteed to have called - // child_set_group() at this point. but we only need to call set_child_group for the - // first process in the group. If needs_keepalive is set, this has already been called - // for the keepalive process. - int result; - int pid_status; - while ((result = waitpid(p->pid, &pid_status, WUNTRACED)) == -1 && errno == EINTR) { - // This could be a superfluous interrupt or Ctrl+C at the terminal In all cases, it - // is OK to retry since the forking code above is specifically designed to never, - // ever hang/block in a child process before the SIGSTOP call is reached. - ; // do nothing - } - if (result == -1) { - exec_error = true; - debug(1, L"waitpid(%d) call in unblock_pid failed:!\n", p->pid); - wperror(L"waitpid"); - break; - } - // We are not unblocking the child via SIGCONT just yet to give the next process a - // chance to open the pipes and join the process group. We only unblock the last process - // in the job chain because no one awaits it. - } - // Regardless of whether the child blocked or not: only once per job, and only once we've - // actually executed an external command. - if ((child_spawned || child_forked) && !pgrp_set) { - // This should be called after waitpid if child_forked and pipes_to_next_command it can - // be called at any time if child_spawned. - set_child_group(j, p->pid); - // we can't rely on p->is_first_in_job because a builtin may have been the first. - pgrp_set = true; - } - // If the command we ran _before_ this one was SIGSTOP'd to let this one catch up, unblock - // it now. this must be after the wait_pid on the process we just started, if any. - unblock_previous(); - - if (child_blocked) { - // Store the newly-blocked command's PID so that it can be SIGCONT'd once the next - // process in the chain is started. That may be in this job or in another job. - blocked_pid = p->pid; - } - // Close the pipe the current process uses to read from the previous process_t. if (pipe_current_read >= 0) { exec_close(pipe_current_read); @@ -1176,11 +1095,6 @@ void exec_job(parser_t &parser, job_t *j) { exec_close(pipe_current_write); pipe_current_write = -1; } - - // Unblock the last process because there's no need for it to stay SIGSTOP'd for anything. - if (p->is_last_in_job) { - unblock_previous(); - } } // Clean up any file descriptors we left open. diff --git a/src/postfork.cpp b/src/postfork.cpp index c8b4d1028..91becfa17 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -60,15 +60,7 @@ static void debug_safe_int(int level, const char *format, int val) { } /// Called only by the child to set its own process group (possibly creating a new group in the -/// process if it is the first in a JOB_CONTROL job. The parent will wait for this to finish. -/// A process that isn't already in control of the terminal can't give itself control of the -/// terminal without hanging, but it's not right for the child to try and give itself control -/// from the very beginning because the parent may not have gotten around to doing so yet. Let -/// the parent figure it out; if the child doesn't have terminal control and it later tries to -/// read from the terminal, the kernel will send it SIGTTIN and it'll hang anyway. -/// The key here is that the parent should transfer control of the terminal (if appropriate) -/// prior to sending the child SIGCONT to wake it up to exec. -/// +/// process if it is the first in a JOB_CONTROL job. /// Returns true on sucess, false on failiure. bool child_set_group(job_t *j, process_t *p) { bool retval = true; @@ -100,7 +92,7 @@ bool child_set_group(job_t *j, process_t *p) { retval = false; } } else { - // This is probably stays unused in the child. + // The child does not actualyl use this field. j->pgid = getpgrp(); } From 8de266afb45d6a35a82c912e64d0b8cfcb6782cd Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Fri, 2 Feb 2018 14:31:10 -0800 Subject: [PATCH 7/8] Improve commenting regarding process groups and builtins. --- src/exec.cpp | 2 ++ src/postfork.cpp | 6 +----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/exec.cpp b/src/exec.cpp index de9c24d9b..4e2f3ed95 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -489,6 +489,8 @@ static bool exec_internal_builtin_proc(parser_t &parser, job_t *j, process_t *p, // make exec handle things. const int fg = j->get_flag(JOB_FOREGROUND); j->set_flag(JOB_FOREGROUND, false); + + // Note this call may block for a long time, while the builtin performs I/O. p->status = builtin_run(parser, p->get_argv(), streams); // Restore the fg flag, which is temporarily set to false during builtin diff --git a/src/postfork.cpp b/src/postfork.cpp index 91becfa17..766681ff5 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -103,11 +103,7 @@ bool child_set_group(job_t *j, process_t *p) { /// guaranteeing the job control process group has been created and that the child belongs to the /// correct process group. Here we can update our job_t structure to reflect the correct process /// group in the case of JOB_CONTROL, and we can give the new process group control of the terminal -/// if it's to run in the foreground. Note that we can guarantee the child won't try to read from -/// the terminal before we've had a chance to run this code, because we haven't woken them up with a -/// SIGCONT yet. This musn't be called as a part of setup_child_process because that can hang -/// indefinitely until data is available to read/write in the case of IO_FILE, which means we'll -/// never reach our SIGSTOP and everything hangs. +/// if it's to run in the foreground. bool set_child_group(job_t *j, pid_t child_pid) { bool retval = true; From cb03be9fe634aed8247f10bea4b64fde7a3d5c22 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 7 Feb 2018 12:54:26 -0800 Subject: [PATCH 8/8] Remove unused 'pgrp_set' variable --- src/exec.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/exec.cpp b/src/exec.cpp index 4e2f3ed95..91cf99cd6 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -609,7 +609,6 @@ void exec_job(parser_t &parser, job_t *j) { // // We are careful to set these to -1 when closed, so if we exit the loop abruptly, we can still // close them. - bool pgrp_set = false; int pipe_current_read = -1, pipe_current_write = -1, pipe_next_read = -1; for (std::unique_ptr &unique_p : j->processes) { if (exec_error) {