diff --git a/src/exec.cpp b/src/exec.cpp index ab13a5513..a16d6ac28 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -383,21 +384,6 @@ void exec_job(parser_t &parser, job_t *j) { return; } - auto unblock_pid = [] (pid_t blocked_pid) { - //this is correct, except there's a race condition if the child hasn't yet SIGSTOP'd - //in that case, they never receive the SIGCONT - pid_t pid_status{}; - if (waitpid(blocked_pid, &pid_status, WUNTRACED) != -1) { - // if (WIFSTOPPED(pid_status)) { - debug(2, L"Unblocking process %d.\n", blocked_pid); - kill(blocked_pid, SIGCONT); - return true; - // } - } - debug(2, L"waitpid call in unblock_pid failed!\n"); - return false; - }; - debug(4, L"Exec job '%ls' with id %d", j->command_wcstr(), j->job_id); // Verify that all IO_BUFFERs are output. We used to support a (single, hacked-in) magical input @@ -485,14 +471,14 @@ void exec_job(parser_t &parser, job_t *j) { if (keepalive.pid == 0) { // Child keepalive.pid = getpid(); - set_child_group(j, &keepalive, 1); + child_set_group(j, &keepalive); pause(); exit_without_destructors(0); } else { // Parent debug(2, L"Fork #%d, pid %d: keepalive fork for '%ls'", g_fork_count, keepalive.pid, j->command_wcstr()); - set_child_group(j, &keepalive, 0); + set_child_group(j, keepalive.pid); } } @@ -527,7 +513,8 @@ void exec_job(parser_t &parser, job_t *j) { // See if we need a pipe. const bool pipes_to_next_command = !p->is_last_in_job; - bool command_blocked = false; + //set to true if we end up forking for this process + bool child_forked = false; // The pipes the current process write to and read from. Unfortunately these can't be just // allocated on the stack, since j->io wants shared_ptr. @@ -632,12 +619,16 @@ void exec_job(parser_t &parser, job_t *j) { // This is the IO buffer we use for storing the output of a block or function when it is in // a pipeline. shared_ptr block_output_io_buffer; - auto unblock_previous = [&] () { + + auto unblock_previous = [&j] () { + //we've already called waitpid after forking the child, so we've guaranteed that they're + //already SIGSTOP'd. Otherwise we'd be risking a deadlock because we can call SIGCONT before + //they've actually stopped, and they'll remain suspended indefinitely. if (blocked_pid != -1) { - //now that next command in the chain has been started, unblock the previous command - unblock_pid(blocked_pid); - blocked_pid = -1; + debug(2, L"Unblocking process %d.\n", blocked_pid); + kill(blocked_pid, SIGCONT); } + blocked_pid = -1; }; // This is the io_streams we pass to internal builtins. @@ -893,14 +884,10 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. Write out the contents of the pipeline. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - // Start child processes that are part of a chain in a stopped state - // to ensure that they are still running when the next command in the - // chain is started. + // Make child processes pause after executing setup_child_process() to give down-chain + // commands in the job a chance to join their process group and read their pipes. // The process will be resumed when the next command in the chain is started. - // Note that this may span multiple jobs, as jobs can read from each other. - if (pipes_to_next_command) { - kill(p->pid, SIGSTOP); - } + kill(p->pid, SIGSTOP); exec_write_and_exit(block_output_io_buffer->fd, buffer, count, status); } else { @@ -908,14 +895,9 @@ void exec_job(parser_t &parser, job_t *j) { // possibly give it control over the terminal. debug(2, L"Fork #%d, pid %d: internal block or function for '%ls'", g_fork_count, pid, p->argv0()); - if (pipes_to_next_command) { - //it actually blocked itself after forking above, but print in here for output - //synchronization & so we can assign command_blocked in the correct address space - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); - command_blocked = true; - } + child_forked = true; + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); p->pid = pid; - set_child_group(j, p, 0); } } else { @@ -1030,14 +1012,11 @@ void exec_job(parser_t &parser, job_t *j) { // stdout and stderr, and then exit. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - // Start child processes that are part of a chain in a stopped state - // to ensure that they are still running when the next command in the - // chain is started. + // Make child processes pause after executing setup_child_process() to give down-chain + // commands in the job a chance to join their process group and read their pipes. // The process will be resumed when the next command in the chain is started. - // Note that this may span multiple jobs, as jobs can read from each other. - if (pipes_to_next_command) { - kill(p->pid, SIGSTOP); - } + kill(p->pid, SIGSTOP); + do_builtin_io(outbuff, outbuff_len, errbuff, errbuff_len); exit_without_destructors(p->status); } else { @@ -1045,15 +1024,9 @@ void exec_job(parser_t &parser, job_t *j) { // possibly give it control over the terminal. debug(2, L"Fork #%d, pid %d: internal builtin for '%ls'", g_fork_count, pid, p->argv0()); - if (pipes_to_next_command) { - //it actually blocked itself after forking above, but print in here for output - //synchronization & so we can assign command_blocked in the correct address space - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); - command_blocked = true; - } + child_forked = true; + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); p->pid = pid; - - set_child_group(j, p, 0); } } @@ -1127,15 +1100,10 @@ void exec_job(parser_t &parser, job_t *j) { // This is the child process. p->pid = getpid(); setup_child_process(j, p, process_net_io_chain); - - // Start child processes that are part of a chain in a stopped state - // to ensure that they are still running when the next command in the - // chain is started. + // Make child processes pause after executing setup_child_process() to give down-chain + // commands in the job a chance to join their process group and read their pipes. // The process will be resumed when the next command in the chain is started. - // Note that this may span multiple jobs, as jobs can read from each other. - if (pipes_to_next_command) { - kill(p->pid, SIGSTOP); - } + kill(p->pid, SIGSTOP); safe_launch_process(p, actual_cmd, argv, envv); // safe_launch_process _never_ returns... @@ -1147,19 +1115,14 @@ void exec_job(parser_t &parser, job_t *j) { job_mark_process_as_failed(j, p); exec_error = true; } - if (pipes_to_next_command) { - //it actually blocked itself after forking above, but print in here for output - //synchronization & so we can assign command_blocked in the correct address space - debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); - command_blocked = true; - } + child_forked = true; + debug(2, L"Blocking process %d waiting for next command in chain.\n", pid); } } // This is the parent process. Store away information on the child, and possibly // fice it control over the terminal. p->pid = pid; - set_child_group(j, p, 0); break; } @@ -1170,10 +1133,31 @@ void exec_job(parser_t &parser, job_t *j) { } } - - //if the command we ran before this one was SIGSTOP'd to let this one catch up, unblock it now. + if (child_forked) { + //we have to wait to ensure the child has set their progress group and is in SIGSTOP state + //otherwise, we can later call SIGCONT before they call SIGSTOP and they'll be blocked indefinitely. + pid_t pid_status{}; + if (waitpid(p->pid, &pid_status, WUNTRACED) != -1) { + //the child is SIGSTOP'd and is guaranteed to have called child_set_group() at this point. + set_child_group(j, p->pid); //update our own process group info to match + //we are not unblocking the child via SIGCONT just yet to give the next process a chance to open + //the pipes and join the process group. We only unblock the last process in the job chain because + //no one awaits it. + if (!pipes_to_next_command) + { + kill(p->pid, SIGCONT); + } + } + else { + debug(2, L"waitpid(%d) call in unblock_pid failed:!\n", p->pid); + wperror(L"waitpid"); + } + } + //if the command we ran _before_ this one was SIGSTOP'd to let this one catch up, unblock it now. + //this must be after the wait_pid on the process we just started, if any. unblock_previous(); - if (command_blocked) { + + if (child_forked) { //store the newly-blocked command's PID so that it can be SIGCONT'd once the next process //in the chain is started. That may be in this job or in another job. blocked_pid = p->pid; diff --git a/src/postfork.cpp b/src/postfork.cpp index 1270dfee0..6832d748e 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -59,14 +59,18 @@ static void debug_safe_int(int level, const char *format, int val) { debug_safe(level, format, buff); } -/// This function should be called by both the parent process and the child right after fork() has -/// been called. If job control is enabled, the child is put in the jobs group, and if the child is -/// also in the foreground, it is also given control of the terminal. When called in the parent -/// process, this function may fail, since the child might have already finished and called exit. -/// The parent process may safely ignore the exit status of this call. +/// Called only by the child to set its own process group (possibly creating a new group in the +/// process if it is the first in a JOB_CONTROL job. The parent will wait for this to finish. +/// A process that isn't already in control of the terminal can't give itself control of the +/// terminal without hanging, but it's not right for the child to try and give itself control +/// from the very beginning because the parent may not have gotten around to doing so yet. Let +/// the parent figure it out; if the child doesn't have terminal control and it later tries to +/// read from the terminal, the kernel will send it SIGTTIN and it'll hang anyway. +/// The key here is that the parent should transfer control of the terminal (if appropriate) +/// prior to sending the child SIGCONT to wake it up to exec. /// /// Returns true on sucess, false on failiure. -bool set_child_group(job_t *j, process_t *p, int print_errors) { +bool child_set_group(job_t *j, process_t *p) { bool retval = true; if (j->get_flag(JOB_CONTROL)) { @@ -74,32 +78,53 @@ bool set_child_group(job_t *j, process_t *p, int print_errors) { if (j->pgid == -2) { j->pgid = p->pid; } + int failure = setpgid(p->pid, j->pgid); + // TODO: Figure out why we're testing whether the pgid is correct after attempting to + // set it failed. This was added in commit 4e912ef8 from 2012-02-27. + failure = failure && getpgid(p->pid) != j->pgid; + if (failure) { //!OCLINT(collapsible if statements) + char pid_buff[128]; + char job_id_buff[128]; + char getpgid_buff[128]; + char job_pgid_buff[128]; + char argv0[64]; + char command[64]; - if (setpgid(p->pid, j->pgid)) { //!OCLINT(collapsible if statements) - // TODO: Figure out why we're testing whether the pgid is correct after attempting to - // set it failed. This was added in commit 4e912ef8 from 2012-02-27. - if (getpgid(p->pid) != j->pgid && print_errors) { - char pid_buff[128]; - char job_id_buff[128]; - char getpgid_buff[128]; - char job_pgid_buff[128]; - char argv0[64]; - char command[64]; + format_long_safe(pid_buff, p->pid); + format_long_safe(job_id_buff, j->job_id); + format_long_safe(getpgid_buff, getpgid(p->pid)); + format_long_safe(job_pgid_buff, j->pgid); + narrow_string_safe(argv0, p->argv0()); + narrow_string_safe(command, j->command_wcstr()); - format_long_safe(pid_buff, p->pid); - format_long_safe(job_id_buff, j->job_id); - format_long_safe(getpgid_buff, getpgid(p->pid)); - format_long_safe(job_pgid_buff, j->pgid); - narrow_string_safe(argv0, p->argv0()); - narrow_string_safe(command, j->command_wcstr()); + debug_safe( + 1, "Could not send own process %s, '%s' in job %s, '%s' from group %s to group %s", + pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff); - debug_safe( - 1, "Could not send process %s, '%s' in job %s, '%s' from group %s to group %s", - pid_buff, argv0, job_id_buff, command, getpgid_buff, job_pgid_buff); + safe_perror("setpgid"); + retval = false; + } + } else { + //this is probably stays unused in the child + j->pgid = getpgrp(); + } - safe_perror("setpgid"); - retval = false; - } + return retval; +} + +/// Called only by the parent only after a child forks and successfully calls child_set_group, guaranteeing +/// the job control process group has been created and that the child belongs to the correct process group. +/// Here we can update our job_t structure to reflect the correct process group in the case of JOB_CONTROL, +/// and we can give the new process group control of the terminal if it's to run in the foreground. Note that +/// we can guarantee the child won't try to read from the terminal before we've had a chance to run this code, +/// because we haven't woken them up with a SIGCONT yet. +bool set_child_group(job_t *j, pid_t child_pid) { + bool retval = true; + + if (j->get_flag(JOB_CONTROL)) { + // New jobs have the pgid set to -2 + if (j->pgid == -2) { + j->pgid = child_pid; } } else { j->pgid = getpgrp(); @@ -107,33 +132,16 @@ bool set_child_group(job_t *j, process_t *p, int print_errors) { if (j->get_flag(JOB_TERMINAL) && j->get_flag(JOB_FOREGROUND)) { //!OCLINT(early exit) if (tcgetpgrp(STDIN_FILENO) == j->pgid) { + //we've already assigned the process group control of the terminal when the first process in the job + //was started. There's no need to do so again, and on some platforms this can cause an EPERM error. + //In addition, if we've given control of the terminal to a process group, attempting to call tcsetpgrp + //from the background will cause SIGTTOU to be sent to everything in our process group (unless we + //handle it).. debug(4, L"Process group %d already has control of terminal\n", j->pgid); } else { - debug(4, L"Attempting bring process group to foreground via tcsetpgrp for job->pgid %d\n", j->pgid); - debug(4, L"caller session id: %d, pgid %d has session id: %d\n", getsid(0), j->pgid, getsid(j->pgid)); - int result = -1; - errno = EINTR; - while (result == -1 && errno == EINTR) { - signal_block(true); - result = tcsetpgrp(STDIN_FILENO, j->pgid); - signal_unblock(true); - } - if (result == -1) { - if (errno == ENOTTY) redirect_tty_output(); - if (print_errors) { - char job_id_buff[64]; - char command_buff[64]; - char job_pgid_buff[128]; - format_long_safe(job_id_buff, j->job_id); - narrow_string_safe(command_buff, j->command_wcstr()); - format_long_safe(job_pgid_buff, j->pgid); - debug_safe(1, "Could not send job %s ('%s') with pgid %s to foreground", job_id_buff, - command_buff, job_pgid_buff); - safe_perror("tcsetpgrp"); - retval = false; - } - } + //no need to duplicate the code here, a function already exists that does just this + retval = terminal_give_to_job(j, false /*new job, so not continuing*/); } } @@ -242,10 +250,10 @@ static int handle_child_io(const io_chain_t &io_chain) { } int setup_child_process(job_t *j, process_t *p, const io_chain_t &io_chain) { - bool ok = true; + bool ok = false; if (p) { - ok = set_child_group(j, p, 1); + ok = child_set_group(j, p); } if (ok) { diff --git a/src/postfork.h b/src/postfork.h index 0a82a4553..a90fd377b 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -18,7 +18,8 @@ class io_chain_t; class job_t; class process_t; -bool set_child_group(job_t *j, process_t *p, int print_errors); +bool set_child_group(job_t *j, pid_t child_pid); //called by parent +bool child_set_group(job_t *j, process_t *p); //called by child /// Initialize a new child process. This should be called right away after forking in the child /// process. If job control is enabled for this job, the process is put in the process group of the diff --git a/src/proc.cpp b/src/proc.cpp index 061aa2c96..f6d338977 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -782,7 +782,7 @@ static void read_try(job_t *j) { /// \param j The job to give the terminal to. /// \param cont If this variable is set, we are giving back control to a job that has previously /// been stopped. In that case, we need to set the terminal attributes to those saved in the job. -static bool terminal_give_to_job(job_t *j, int cont) { +bool terminal_give_to_job(job_t *j, int cont) { errno = 0; if (j->pgid == 0) { debug(2, "terminal_give_to_job() returning early due to no process group"); diff --git a/src/proc.h b/src/proc.h index 874fb8230..b57b89c25 100644 --- a/src/proc.h +++ b/src/proc.h @@ -365,3 +365,5 @@ void proc_pop_interactive(); int proc_format_status(int status); #endif + +bool terminal_give_to_job(job_t *j, int cont);