Overhaul continue_job() and try_select()

Convert `select_try()` to return a well-defined enum describing its
state, and handle each of the three possible cases with clear reasons
why we are blocking or not blocking in each subsequent call to
`process_mark_finished_children()`.
This commit is contained in:
Mahmoud Al-Qudsi 2018-10-02 15:10:42 -05:00
parent 1bfbed94ae
commit 39a05a359a
2 changed files with 105 additions and 86 deletions

View File

@ -218,22 +218,25 @@ void job_t::set_flag(job_flag_t flag, bool set) { this->flags.set(flag, set); }
bool job_t::get_flag(job_flag_t flag) const { return this->flags.get(flag); } bool job_t::get_flag(job_flag_t flag) const { return this->flags.get(flag); }
int job_t::signal(int signal) { bool job_t::signal(int signal) {
pid_t my_pgid = getpgrp(); // Presumably we are distinguishing between the two cases below because we do
int res = 0; // not want to send ourselves the signal in question in case the job shares
// a pgid with the shell.
if (pgid != my_pgid) { if (pgid != getpgrp()) {
res = killpg(pgid, signal); if (killpg(pgid, signal) == -1) {
wperror(L"killpg");
return false;
}
} else { } else {
for (const process_ptr_t &p : processes) { for (const auto &p : processes) {
if (!p->completed && p->pid && kill(p->pid, signal)) { if (!p->completed && p->pid && kill(p->pid, signal) == -1) {
res = -1; return false;
break;
} }
} }
} }
return res; return true;
} }
static void mark_job_complete(const job_t *j) { static void mark_job_complete(const job_t *j) {
@ -681,19 +684,17 @@ void proc_update_jiffies() {
/// Check if there are buffers associated with the job, and select on them for a while if available. /// Check if there are buffers associated with the job, and select on them for a while if available.
/// ///
/// \param j the job to test /// \param j the job to test
/// /// \return the status of the select operation
/// \return 1 if buffers were available, zero otherwise static select_try_t select_try(job_t *j) {
static int select_try(job_t *j) {
fd_set fds; fd_set fds;
int maxfd = -1; int maxfd = -1;
FD_ZERO(&fds); FD_ZERO(&fds);
const io_chain_t chain = j->all_io_redirections(); const io_chain_t chain = j->all_io_redirections();
for (size_t idx = 0; idx < chain.size(); idx++) { for (const auto &io : chain) {
const io_data_t *io = chain.at(idx).get();
if (io->io_mode == IO_BUFFER) { if (io->io_mode == IO_BUFFER) {
const io_pipe_t *io_pipe = static_cast<const io_pipe_t *>(io); auto io_pipe = static_cast<const io_pipe_t *>(io.get());
int fd = io_pipe->pipe_fd[0]; int fd = io_pipe->pipe_fd[0];
FD_SET(fd, &fds); FD_SET(fd, &fds);
maxfd = std::max(maxfd, fd); maxfd = std::max(maxfd, fd);
@ -702,20 +703,20 @@ static int select_try(job_t *j) {
} }
if (maxfd >= 0) { if (maxfd >= 0) {
int retval; struct timeval timeout;
struct timeval tv;
tv.tv_sec = 0; timeout.tv_sec = 0;
tv.tv_usec = 10000; timeout.tv_usec = 10000;
retval = select(maxfd + 1, &fds, 0, 0, &tv); int retval = select(maxfd + 1, &fds, 0, 0, &timeout);
if (retval == 0) { if (retval == 0) {
debug(4, L"select_try hit timeout"); debug(4, L"select_try hit timeout");
return select_try_t::TIMEOUT;
} }
return retval > 0; return select_try_t::DATA_READY;
} }
return -1; return select_try_t::IO_ERROR;
} }
/// Read from descriptors until they are empty. /// Read from descriptors until they are empty.
@ -915,39 +916,41 @@ static bool terminal_return_from_job(job_t *j) {
return true; return true;
} }
void job_t::continue_job(bool cont) { void job_t::continue_job(bool send_sigcont) {
// Put job first in the job list. // Put job first in the job list.
promote(); promote();
set_flag(job_flag_t::NOTIFIED, false); set_flag(job_flag_t::NOTIFIED, false);
debug(4, L"%ls job %d, gid %d (%ls), %ls, %ls", cont ? L"Continue" : L"Start", job_id, debug(4, L"%ls job %d, gid %d (%ls), %ls, %ls", send_sigcont ? L"Continue" : L"Start", job_id,
pgid, command_wcstr(), is_completed() ? L"COMPLETED" : L"UNCOMPLETED", pgid, command_wcstr(), is_completed() ? L"COMPLETED" : L"UNCOMPLETED",
is_interactive ? L"INTERACTIVE" : L"NON-INTERACTIVE"); is_interactive ? L"INTERACTIVE" : L"NON-INTERACTIVE");
if (!is_completed()) { if (!is_completed()) {
if (get_flag(job_flag_t::TERMINAL) && is_foreground()) { if (get_flag(job_flag_t::TERMINAL) && is_foreground()) {
// Put the job into the foreground. // Put the job into the foreground and give it control of the terminal.
// Hack: ensure that stdin is marked as blocking first (issue #176). // Hack: ensure that stdin is marked as blocking first (issue #176).
make_fd_blocking(STDIN_FILENO); make_fd_blocking(STDIN_FILENO);
if (!terminal_give_to_job(this, cont)) return; if (!terminal_give_to_job(this, send_sigcont)) {
// This returns without bubbling up the error. Presumably that is OK.
return;
}
} }
// Send the job a continue signal, if necessary. // If both requested and necessary, send the job a continue signal
if (cont) { if (send_sigcont) {
for (process_ptr_t &p : processes) p->stopped = false; // reset the status of each process instance
for (auto &p : processes) {
p->stopped = false;
}
if (get_flag(job_flag_t::JOB_CONTROL)) { // This code used to check for JOB_CONTROL to decide between using killpg to signal
if (killpg(pgid, SIGCONT)) { // all processes in the group or iterating over each process in the group and sending
wperror(L"killpg (SIGCONT)"); // the signal individually. job_t::signal() does the same, but uses the shell's own
return; // pgroup to make that distinction.
} if (!signal(SIGCONT)) {
} else { // This returns without bubbling up the error. Presumably that is OK.
for (const process_ptr_t &p : processes) { debug(2, "Failed to send SIGCONT to any processes in pgroup %d!", pgid);
if (kill(p->pid, SIGCONT) < 0) { return;
wperror(L"kill (SIGCONT)");
return;
}
}
} }
} }
@ -955,36 +958,45 @@ void job_t::continue_job(bool cont) {
// Look for finished processes first, to avoid select() if it's already done. // Look for finished processes first, to avoid select() if it's already done.
process_mark_finished_children(false); process_mark_finished_children(false);
// Wait for job to report. // sigcont is NOT sent only when a job is first created. In the event of "nested jobs"
// (i.e. new jobs started due to the evaluation of functions), we can start a process
// that feeds its output to a child job (e.g. `some_func | some_builtin`, which is two
// jobs), and if the first job in that expression generates enough data that it fills
// its pipe buffer, it'll hang waiting for the second job to read from it. If we block
// on foreground jobs in this case, we'll keep waiting forever.
bool block_on_fg = send_sigcont;
// Wait for data to become available or the status of our own job to change
while (!reader_exit_forced() && !is_stopped() && !is_completed()) { while (!reader_exit_forced() && !is_stopped() && !is_completed()) {
switch (select_try(this)) { auto result = select_try(this);
case 1: {
// debug(1, L"select_try() 1" ); if (result == select_try_t::DATA_READY) {
read_try(this); // Read the data that we know is now available, then scan for finished processes
process_mark_finished_children(false); // but do not block. We don't block so long as we have IO to process, once the
break; // fd buffers are empty we'll block in the second case below.
} read_try(this);
case 0: { process_mark_finished_children(false);
// debug(1, L"select_try() 0" ); }
// No FDs are ready. Look for finished processes. else if (result == select_try_t::TIMEOUT) {
process_mark_finished_children(true); // Our select_try() timeout is ~10ms, so this can be EXTREMELY chatty but this is very useful
break; // if trying to debug an unknown hang in fish. Uncomment to see if we're stuck here.
} // debug(1, L"select_try: no fds returned valid data within the timeout" );
case -1: {
// debug(1, L"select_try() -1" ); // No FDs are ready. Look for finished processes instead.
// If there is no funky IO magic, we can use waitpid instead of handling process_mark_finished_children(block_on_fg);
// child deaths through signals. This gives a rather large speed boost (A }
// factor 3 startup time improvement on my 300 MHz machine) on short-lived else {
// jobs. // This is easily encountered by simply transferring control of the terminal to another process,
// // then suspending it. For example, `nvim`, then `ctrl+z`. Since we are not the foreground process
// This will return early if we get a signal, like SIGHUP. debug(3, L"select_try: interrupted read from job file descriptors" );
process_mark_finished_children(true);
break; // This tends to happen when the foreground process has changed, e.g. it was suspended and control
} // has returned to the shell. It's a good time to block on foreground processes.
default: { process_mark_finished_children(block_on_fg);
DIE("unexpected return value from select_try()");
break; // If it turns out that we encountered this because the file descriptor we were reading from has
} // died, process_mark_finished_children() should take care of changing the status of our is_completed()
// (assuming it is appropriate to do so), in which case we will break out of this loop.
} }
} }
} }
@ -992,19 +1004,15 @@ void job_t::continue_job(bool cont) {
if (is_foreground()) { if (is_foreground()) {
if (is_completed()) { if (is_completed()) {
// It's possible that the job will produce output and exit before we've even read from // It's possible that the job will produce output and exit before we've even read from it.
// it. // In that case, make sure we read that output now, before we've executed any subsequent calls.
// // This is why prompt colors were getting screwed up - the builtin `echo` calls were sometimes
// We'll eventually read the output, but it may be after we've executed subsequent calls // having their output combined with the `set_color` calls in the wrong order!
// This is why my prompt colors kept getting screwed up - the builtin echo calls
// were sometimes having their output combined with the set_color calls in the wrong
// order!
read_try(this); read_try(this);
const std::unique_ptr<process_t> &p = processes.back(); // Set $status only if we are in the foreground and the last process in the job has finished
// and is not a short-circuited builtin.
// Mark process status only if we are in the foreground and the last process in a pipe, auto &p = processes.back();
// and it is not a short circuited builtin.
if ((WIFEXITED(p->status) || WIFSIGNALED(p->status)) && p->pid) { if ((WIFEXITED(p->status) || WIFSIGNALED(p->status)) && p->pid) {
int status = proc_format_status(p->status); int status = proc_format_status(p->status);
proc_set_last_status(get_flag(job_flag_t::NEGATE) ? !status : status); proc_set_last_status(get_flag(job_flag_t::NEGATE) ? !status : status);

View File

@ -41,6 +41,16 @@ enum {
JOB_CONTROL_NONE, JOB_CONTROL_NONE,
}; };
/// The return value of select_try(), indicating IO readiness or an error
enum class select_try_t {
/// One or more fds have data ready for read
DATA_READY,
/// The timeout elapsed without any data becoming available for read
TIMEOUT,
/// The select operation was aborted due to an interrupt or IO error
IO_ERROR,
};
/// A structure representing a single fish process. Contains variables for tracking process state /// A structure representing a single fish process. Contains variables for tracking process state
/// and the process argument list. Actually, a fish process can be either a regular external /// and the process argument list. Actually, a fish process can be either a regular external
/// process, an internal builtin which may or may not spawn a fake IO process during execution, a /// process, an internal builtin which may or may not spawn a fake IO process during execution, a
@ -239,19 +249,20 @@ class job_t {
/// The job is in a stopped state /// The job is in a stopped state
bool is_stopped() const; bool is_stopped() const;
// (This function would just be called `continue` but that's obviously a reserved keyword)
/// Resume a (possibly) stopped job. Puts job in the foreground. If cont is true, restore the /// Resume a (possibly) stopped job. Puts job in the foreground. If cont is true, restore the
/// saved terminal modes and send the process group a SIGCONT signal to wake it up before we /// saved terminal modes and send the process group a SIGCONT signal to wake it up before we
/// block. /// block.
/// ///
/// \param cont Whether the function should wait for the job to complete before returning /// \param send_sigcont Whether SIGCONT should be sent to the job if it is in the foreground.
// (This would just be called `continue` but that's obviously a reserved keyword) void continue_job(bool send_sigcont);
void continue_job(bool cont);
/// Promotes the job to the front of the job list. /// Promotes the job to the front of the job list.
void promote(); void promote();
/// Send the specified signal to all processes in this job. /// Send the specified signal to all processes in this job.
int signal(int signal); /// \return true on success, false on failure.
bool signal(int signal);
/// Return the job instance matching this unique job id. /// Return the job instance matching this unique job id.
/// If id is 0 or less, return the last job used. /// If id is 0 or less, return the last job used.