diff --git a/src/common.cpp b/src/common.cpp index 4896c2d7a..17da93408 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -1997,6 +1997,25 @@ void assert_is_locked(void *vmutex, const char *who, const char *caller) { } } +/// Detect if we are Windows Subsystem for Linux by inspecting /proc/sys/kernel/osrelease +/// and checking if "Microsoft" is in the first line. +/// See https://github.com/Microsoft/WSL/issues/423 +bool is_windows_subsystem_for_linux() { + ASSERT_IS_NOT_FORKED_CHILD(); + static bool s_is_wsl = false; + static std::once_flag oflag; + std::call_once(oflag, []() { + // 'e' sets CLOEXEC if possible. + FILE *fp = fopen("/proc/sys/kernel/osrelease", "re"); + if (fp) { + char buff[256]; + if (fgets(buff, sizeof buff, fp)) s_is_wsl = (strstr(buff, "Microsoft") != NULL); + fclose(fp); + } + }); + return s_is_wsl; +} + template static CharType_t **make_null_terminated_array_helper( const std::vector > &argv) { diff --git a/src/common.h b/src/common.h index 1809eab1c..04a53ce76 100644 --- a/src/common.h +++ b/src/common.h @@ -728,6 +728,9 @@ void assert_is_not_forked_child(const char *who); #define ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(x) assert_is_not_forked_child(x) #define ASSERT_IS_NOT_FORKED_CHILD() ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(__FUNCTION__) +/// Return whether we are running in Windows Subsystem for Linux. +bool is_windows_subsystem_for_linux(); + extern "C" { __attribute__((noinline)) void debug_thread_error(void); } diff --git a/src/exec.cpp b/src/exec.cpp index 73bab322b..91cf99cd6 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -397,13 +397,11 @@ void internal_exec(job_t *j, const io_chain_t &&all_ios) { /// Execute an internal builtin. Given a parser, a job within that parser, and a process within that /// job corresponding to a builtin, execute the builtin with the given streams. If pipe_read is set, -/// assign stdin to it; otherwise infer stdin from the IO chain. unblock_previous is a hack used to -/// prevent jobs from finishing; see commit cdb72b7024. +/// assign stdin to it; otherwise infer stdin from the IO chain. /// return true on success, false if there is an exec error. static bool exec_internal_builtin_proc(parser_t &parser, job_t *j, process_t *p, const io_pipe_t *pipe_read, const io_chain_t &proc_io_chain, - io_streams_t &streams, - const std::function &unblock_previous) { + io_streams_t &streams) { assert(p->type == INTERNAL_BUILTIN && "Process must be a builtin"); int local_builtin_stdin = STDIN_FILENO; bool close_stdin = false; @@ -492,9 +490,7 @@ static bool exec_internal_builtin_proc(parser_t &parser, job_t *j, process_t *p, const int fg = j->get_flag(JOB_FOREGROUND); j->set_flag(JOB_FOREGROUND, false); - // Main loop may need to perform a blocking read from previous command's output. - // Make sure read source is not blocked. - unblock_previous(); + // Note this call may block for a long time, while the builtin performs I/O. p->status = builtin_run(parser, p->get_argv(), streams); // Restore the fg flag, which is temporarily set to false during builtin @@ -576,14 +572,21 @@ void exec_job(parser_t &parser, job_t *j) { } } + // When running under WSL, create a keepalive process unconditionally if our first process is external. + // This is because WSL does not permit joining the pgrp of an exited process. + // (see https://github.com/Microsoft/WSL/issues/2786), also fish PR #4676 + if (j->processes.front()->type == EXTERNAL && is_windows_subsystem_for_linux()) + needs_keepalive = true; + if (needs_keepalive) { // Call fork. No need to wait for threads since our use is confined and simple. + pid_t parent_pid = getpid(); keepalive.pid = execute_fork(false); if (keepalive.pid == 0) { // Child keepalive.pid = getpid(); child_set_group(j, &keepalive); - pause(); + run_as_keepalive(parent_pid); exit_without_destructors(0); } else { // Parent @@ -606,10 +609,6 @@ void exec_job(parser_t &parser, job_t *j) { // // We are careful to set these to -1 when closed, so if we exit the loop abruptly, we can still // close them. - bool pgrp_set = false; - // This is static since processes can block on input/output across jobs the main exec_job loop - // is only ever run in a single thread, so this is OK. - static pid_t blocked_pid = -1; int pipe_current_read = -1, pipe_current_write = -1, pipe_next_read = -1; for (std::unique_ptr &unique_p : j->processes) { if (exec_error) { @@ -630,7 +629,6 @@ void exec_job(parser_t &parser, job_t *j) { // Set to true if we end up forking for this process. bool child_forked = false; bool child_spawned = false; - bool block_child = true; // The pipes the current process write to and read from. Unfortunately these can't be just // allocated on the stack, since j->io wants shared_ptr. @@ -736,23 +734,10 @@ void exec_job(parser_t &parser, job_t *j) { // a pipeline. shared_ptr block_output_io_buffer; - // Used to SIGCONT the previously SIGSTOP'd process so the main loop or next command in job - // can read from its output. - auto unblock_previous = [&j]() { - // We've already called waitpid after forking the child, so we've guaranteed that - // they're already SIGSTOP'd. Otherwise we'd be risking a deadlock because we can call - // SIGCONT before they've actually stopped, and they'll remain suspended indefinitely. - if (blocked_pid != -1) { - debug(2, L"Unblocking process %d.\n", blocked_pid); - kill(blocked_pid, SIGCONT); - blocked_pid = -1; - } - }; - // This is the io_streams we pass to internal builtins. std::unique_ptr builtin_io_streams(new io_streams_t(stdout_read_limit)); - auto do_fork = [&j, &p, &pid, &exec_error, &process_net_io_chain, &block_child, + auto do_fork = [&j, &p, &pid, &exec_error, &process_net_io_chain, &child_forked](bool drain_threads, const char *fork_type, std::function child_action) -> bool { pid = execute_fork(drain_threads); @@ -760,16 +745,7 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. Setup redirections, print correct output to // stdout and stderr, and then exit. p->pid = getpid(); - blocked_pid = -1; child_set_group(j, p); - // Make child processes pause after executing setup_child_process() to give - // down-chain commands in the job a chance to join their process group and read - // their pipes. The process will be resumed when the next command in the chain is - // started. - if (block_child) { - kill(p->pid, SIGSTOP); - } - // The parent will wake us up when we're ready to execute. setup_child_process(p, process_net_io_chain); child_action(); DIE("Child process returned control to do_fork lambda!"); @@ -785,10 +761,8 @@ void exec_job(parser_t &parser, job_t *j) { debug(2, L"Fork #%d, pid %d: %s for '%ls'", g_fork_count, pid, fork_type, p->argv0()); child_forked = true; - if (block_child) { - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); - } p->pid = pid; + set_child_group(j, p->pid); } return true; @@ -858,7 +832,7 @@ void exec_job(parser_t &parser, job_t *j) { case INTERNAL_BUILTIN: { if (!exec_internal_builtin_proc(parser, j, p, pipe_read.get(), process_net_io_chain, - *builtin_io_streams, unblock_previous)) { + *builtin_io_streams)) { exec_error = true; } break; @@ -944,9 +918,8 @@ void exec_job(parser_t &parser, job_t *j) { bool must_fork = redirection_is_to_real_file(stdout_io.get()) || redirection_is_to_real_file(stderr_io.get()); if (!must_fork && p->is_last_in_job) { - // We are handling reads directly in the main loop. Make sure source is - // unblocked. Note that we may still end up forking. - unblock_previous(); + // We are handling reads directly in the main loop. Note that we may still end + // up forking. const bool stdout_is_to_buffer = stdout_io && stdout_io->io_mode == IO_BUFFER; const bool no_stdout_output = stdout_buffer.empty(); const bool no_stderr_output = stderr_buffer.empty(); @@ -1099,8 +1072,9 @@ void exec_job(parser_t &parser, job_t *j) { } // This is the parent process. Store away information on the child, and possibly - // fice it control over the terminal. + // give it control over the terminal. p->pid = pid; + set_child_group(j, p->pid); break; } @@ -1111,51 +1085,6 @@ void exec_job(parser_t &parser, job_t *j) { } } - bool child_blocked = block_child && child_forked; - if (child_blocked) { - // We have to wait to ensure the child has set their progress group and is in SIGSTOP - // state otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be - // blocked indefinitely. The child is SIGSTOP'd and is guaranteed to have called - // child_set_group() at this point. but we only need to call set_child_group for the - // first process in the group. If needs_keepalive is set, this has already been called - // for the keepalive process. - int result; - int pid_status; - while ((result = waitpid(p->pid, &pid_status, WUNTRACED)) == -1 && errno == EINTR) { - // This could be a superfluous interrupt or Ctrl+C at the terminal In all cases, it - // is OK to retry since the forking code above is specifically designed to never, - // ever hang/block in a child process before the SIGSTOP call is reached. - ; // do nothing - } - if (result == -1) { - exec_error = true; - debug(1, L"waitpid(%d) call in unblock_pid failed:!\n", p->pid); - wperror(L"waitpid"); - break; - } - // We are not unblocking the child via SIGCONT just yet to give the next process a - // chance to open the pipes and join the process group. We only unblock the last process - // in the job chain because no one awaits it. - } - // Regardless of whether the child blocked or not: only once per job, and only once we've - // actually executed an external command. - if ((child_spawned || child_forked) && !pgrp_set) { - // This should be called after waitpid if child_forked and pipes_to_next_command it can - // be called at any time if child_spawned. - set_child_group(j, p->pid); - // we can't rely on p->is_first_in_job because a builtin may have been the first. - pgrp_set = true; - } - // If the command we ran _before_ this one was SIGSTOP'd to let this one catch up, unblock - // it now. this must be after the wait_pid on the process we just started, if any. - unblock_previous(); - - if (child_blocked) { - // Store the newly-blocked command's PID so that it can be SIGCONT'd once the next - // process in the chain is started. That may be in this job or in another job. - blocked_pid = p->pid; - } - // Close the pipe the current process uses to read from the previous process_t. if (pipe_current_read >= 0) { exec_close(pipe_current_read); @@ -1167,11 +1096,6 @@ void exec_job(parser_t &parser, job_t *j) { exec_close(pipe_current_write); pipe_current_write = -1; } - - // Unblock the last process because there's no need for it to stay SIGSTOP'd for anything. - if (p->is_last_in_job) { - unblock_previous(); - } } // Clean up any file descriptors we left open. diff --git a/src/postfork.cpp b/src/postfork.cpp index f6d597ded..766681ff5 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -60,36 +60,16 @@ static void debug_safe_int(int level, const char *format, int val) { } /// Called only by the child to set its own process group (possibly creating a new group in the -/// process if it is the first in a JOB_CONTROL job. The parent will wait for this to finish. -/// A process that isn't already in control of the terminal can't give itself control of the -/// terminal without hanging, but it's not right for the child to try and give itself control -/// from the very beginning because the parent may not have gotten around to doing so yet. Let -/// the parent figure it out; if the child doesn't have terminal control and it later tries to -/// read from the terminal, the kernel will send it SIGTTIN and it'll hang anyway. -/// The key here is that the parent should transfer control of the terminal (if appropriate) -/// prior to sending the child SIGCONT to wake it up to exec. -/// +/// process if it is the first in a JOB_CONTROL job. /// Returns true on sucess, false on failiure. bool child_set_group(job_t *j, process_t *p) { bool retval = true; - if (j->get_flag(JOB_CONTROL)) { // New jobs have the pgid set to -2 if (j->pgid == -2) { j->pgid = p->pid; } - // Retry on EPERM because there's no way that a child cannot join an existing progress group - // because we are SIGSTOPing the previous job in the chain. Sometimes we have to try a few - // times to get the kernel to see the new group. (Linux 4.4.0) - int failure = setpgid(p->pid, j->pgid); - while (failure == -1 && (errno == EPERM || errno == EINTR)) { - debug_safe(4, "Retrying setpgid in child process"); - failure = setpgid(p->pid, j->pgid); - } - // TODO: Figure out why we're testing whether the pgid is correct after attempting to - // set it failed. This was added in commit 4e912ef8 from 2012-02-27. - failure = failure && getpgid(p->pid) != j->pgid; - if (failure) { //!OCLINT(collapsible if statements) + if (setpgid(p->pid, j->pgid) < 0) { char pid_buff[128]; char job_id_buff[128]; char getpgid_buff[128]; @@ -112,7 +92,7 @@ bool child_set_group(job_t *j, process_t *p) { retval = false; } } else { - // This is probably stays unused in the child. + // The child does not actualyl use this field. j->pgid = getpgrp(); } @@ -123,11 +103,7 @@ bool child_set_group(job_t *j, process_t *p) { /// guaranteeing the job control process group has been created and that the child belongs to the /// correct process group. Here we can update our job_t structure to reflect the correct process /// group in the case of JOB_CONTROL, and we can give the new process group control of the terminal -/// if it's to run in the foreground. Note that we can guarantee the child won't try to read from -/// the terminal before we've had a chance to run this code, because we haven't woken them up with a -/// SIGCONT yet. This musn't be called as a part of setup_child_process because that can hang -/// indefinitely until data is available to read/write in the case of IO_FILE, which means we'll -/// never reach our SIGSTOP and everything hangs. +/// if it's to run in the foreground. bool set_child_group(job_t *j, pid_t child_pid) { bool retval = true; @@ -136,6 +112,13 @@ bool set_child_group(job_t *j, pid_t child_pid) { if (j->pgid == -2) { j->pgid = child_pid; } + // The parent sets the child's group. This incurs the well-known unavoidable race with the + // child exiting, so ignore ESRCH and EPERM (in case the pid was recycled). + if (setpgid(child_pid, j->pgid) < 0) { + if (errno != ESRCH && errno != EPERM) { + safe_perror("setpgid"); + } + } } else { j->pgid = getpgrp(); } @@ -551,3 +534,15 @@ bool do_builtin_io(const char *out, size_t outlen, const char *err, size_t errle errno = saved_errno; return success; } + +void run_as_keepalive(pid_t parent_pid) { + // Run this process as a keepalive. In typical usage the parent process will kill us. However + // this may not happen if the parent process exits abruptly, either via kill or exec. What we do + // is poll our ppid() and exit when it differs from parent_pid. We can afford to do this with + // low frequency because in the great majority of cases, fish will kill(9) us. + for (;;) { + // Note sleep is async-safe. + if (sleep(1)) break; + if (getppid() != parent_pid) break; + } +} diff --git a/src/postfork.h b/src/postfork.h index c397e342e..a4119418b 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -47,6 +47,9 @@ bool do_builtin_io(const char *out, size_t outlen, const char *err, size_t errle void safe_report_exec_error(int err, const char *actual_cmd, const char *const *argv, const char *const *envv); +/// Runs the process as a keepalive, until the parent process given by parent_pid exits. +void run_as_keepalive(pid_t parent_pid); + #if FISH_USE_POSIX_SPAWN /// Initializes and fills in a posix_spawnattr_t; on success, the caller should destroy it via /// posix_spawnattr_destroy.