Refactor tty transfer to be more deliberate

This is a big cleanup to how tty transfer works. Recall that when job
control is active, we transfer the tty to jobs via tcsetpgrp().

Previously, transferring was done "as needed" in continue_job. That is, if
we are running a job, and the job wants the terminal and does not have it,
we will transfer the tty at that point.

This got pretty weird when running mixed pipelines. For example:

    cmd1 | func1 | cmd2

Here we would run `func1` before calling continue_job. Thus the tty
would be transferred by the nested function invocation, and also restored
by that invocation, potentially racing with tty manipulation from cmd1 or
cmd2.

In the new model, migrate the tty transfer responsibility outside of
continue_job. The caller of continue_job is then responsible for setting up
the tty. There's two places where this gets done:

1. In `exec_job`, where we run a job for the first time.

2. In `builtin_fg` where we continue a stopped job in the foreground.

Fixes #8699
This commit is contained in:
ridiculousfish 2022-02-13 13:12:18 -08:00
parent 3f585cddfc
commit df2cbe321c
8 changed files with 163 additions and 167 deletions

View File

@ -24,6 +24,7 @@ Scripting improvements
Interactive improvements
------------------------
- The default command-not-found handler now reports a special error if there is a non-executable file (:issue:`8804`)
- `less` and other interactive commands would occasionally be stopped when run in a pipeline with fish functions; this has been fixed (:issue:`8699`).
New or improved bindings
^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -30,9 +30,11 @@ static int send_to_bg(parser_t &parser, io_streams_t &streams, job_t *j) {
streams.err.append_format(_(L"Send job %d '%ls' to background\n"), j->job_id(),
j->command_wcstr());
parser.job_promote(j);
j->group->set_is_foreground(false);
j->continue_job(parser, false /* not in_foreground */);
if (!j->resume()) {
return STATUS_CMD_ERROR;
}
parser.job_promote(j);
return STATUS_CMD_OK;
}

View File

@ -104,9 +104,21 @@ maybe_t<int> builtin_fg(parser_t &parser, io_streams_t &streams, const wchar_t *
if (!ft.empty()) parser.set_var_and_fire(L"_", ENV_EXPORT, std::move(ft));
reader_write_title(job->command(), parser);
// Note if tty transfer fails, we still try running the job.
parser.job_promote(job);
make_fd_blocking(STDIN_FILENO);
job->group->set_is_foreground(true);
if (job->group->wants_terminal() && job->group->tmodes) {
int res = tcsetattr(STDIN_FILENO, TCSADRAIN, &job->group->tmodes.value());
if (res < 0) wperror(L"tcsetattr");
}
tty_transfer_t transfer;
transfer.to_job_group(job->group);
bool resumed = job->resume();
if (resumed) {
job->continue_job(parser);
return STATUS_CMD_OK;
}
if (job->is_stopped()) transfer.save_tty_modes();
transfer.reclaim();
return resumed ? STATUS_CMD_OK : STATUS_CMD_ERROR;
}

View File

@ -240,7 +240,8 @@ static void internal_exec(env_stack_t &vars, job_t *j, const io_chain_t &block_i
// child_setup_process makes sure signals are properly set up.
dup2_list_t redirs = dup2_list_t::resolve_chain(all_ios);
if (child_setup_process(INVALID_PID, INVALID_PID, *j, false, redirs) == 0) {
if (child_setup_process(false /* not claim_tty */, *j, false /* not is_forked */, redirs) ==
0) {
// Decrement SHLVL as we're removing ourselves from the shell "stack".
if (is_interactive_session()) {
auto shlvl_var = vars.get(L"SHLVL", ENV_GLOBAL | ENV_EXPORT);
@ -397,10 +398,9 @@ bool blocked_signals_for_job(const job_t &job, sigset_t *sigmask) {
static launch_result_t fork_child_for_process(const std::shared_ptr<job_t> &job, process_t *p,
const dup2_list_t &dup2s, const char *fork_type,
const std::function<void()> &child_action) {
// Decide if we want to job to control the tty.
// If so we need to get our pgroup; if not we don't need the pgroup.
bool claim_tty = job->group->wants_terminal();
pid_t fish_pgrp = claim_tty ? getpgrp() : INVALID_PID;
// Claim the tty from fish, if the job wants it and we are the pgroup leader.
pid_t claim_tty_from =
(p->leads_pgrp && job->group->wants_terminal()) ? getpgrp() : INVALID_PID;
pid_t pid = execute_fork();
if (pid < 0) {
@ -422,8 +422,7 @@ static launch_result_t fork_child_for_process(const std::shared_ptr<job_t> &job,
if (!is_parent) {
// Child process.
child_setup_process(claim_tty ? *job->group->get_pgid() : INVALID_PID, fish_pgrp, *job,
true, dup2s);
child_setup_process(claim_tty_from, *job, true, dup2s);
child_action();
DIE("Child process returned control to fork_child lambda!");
}
@ -431,7 +430,6 @@ static launch_result_t fork_child_for_process(const std::shared_ptr<job_t> &job,
s_fork_count++;
FLOGF(exec_fork, L"Fork #%d, pid %d: %s for '%ls'", int(s_fork_count), pid, fork_type,
p->argv0());
terminal_maybe_give_to_job_group(job->group.get(), false);
return launch_result_t::ok;
}
@ -514,11 +512,8 @@ static launch_result_t exec_external_command(parser_t &parser, const std::shared
// Convert our IO chain to a dup2 sequence.
auto dup2s = dup2_list_t::resolve_chain(proc_io_chain);
// Ensure that stdin is blocking before we hand it off (see issue #176). It's a
// little strange that we only do this with stdin and not with stdout or stderr.
// However in practice, setting or clearing O_NONBLOCK on stdin also sets it for the
// other two fds, presumably because they refer to the same underlying file
// (/dev/tty?).
// Ensure that stdin is blocking before we hand it off (see issue #176).
// Note this will also affect stdout and stderr if they refer to the same tty.
make_fd_blocking(STDIN_FILENO);
auto export_arr = parser.vars().export_arr();
@ -561,7 +556,6 @@ static launch_result_t exec_external_command(parser_t &parser, const std::shared
// Ensure it gets set. See #4715, also https://github.com/Microsoft/WSL/issues/2997.
execute_setpgid(p->pid, p->pid, true /* is parent */);
}
terminal_maybe_give_to_job_group(j->group.get(), false);
return launch_result_t::ok;
} else
#endif
@ -1026,6 +1020,9 @@ bool exec_job(parser_t &parser, const shared_ptr<job_t> &j, const io_chain_t &bl
autoclose_pipes_t deferred_pipes;
process_t *const deferred_process = get_deferred_process(j);
// We may want to transfer tty ownership to the pgroup leader.
tty_transfer_t transfer{};
// This loop loops over every process_t in the job, starting it as appropriate. This turns out
// to be rather complex, since a process_t can be one of many rather different things.
//
@ -1080,6 +1077,11 @@ bool exec_job(parser_t &parser, const shared_ptr<job_t> &j, const io_chain_t &bl
break;
}
procs_launched += 1;
// Transfer tty?
if (p->leads_pgrp && j->group->wants_terminal()) {
transfer.to_job_group(j->group);
}
}
pipe_next_read.close();
@ -1119,7 +1121,12 @@ bool exec_job(parser_t &parser, const shared_ptr<job_t> &j, const io_chain_t &bl
}
}
j->continue_job(parser, !j->is_initially_background());
if (!j->is_initially_background()) {
j->continue_job(parser);
}
if (j->is_stopped()) transfer.save_tty_modes();
transfer.reclaim();
return true;
}

View File

@ -137,7 +137,7 @@ int execute_setpgid(pid_t pid, pid_t pgroup, bool is_parent) {
}
}
int child_setup_process(pid_t new_termowner, pid_t fish_pgrp, const job_t &job, bool is_forked,
int child_setup_process(pid_t claim_tty_from, const job_t &job, bool is_forked,
const dup2_list_t &dup2s) {
// Note we are called in a forked child.
for (const auto &act : dup2s.get_actions()) {
@ -161,7 +161,7 @@ int child_setup_process(pid_t new_termowner, pid_t fish_pgrp, const job_t &job,
return err;
}
}
if (new_termowner != INVALID_PID && new_termowner != fish_pgrp) {
if (claim_tty_from >= 0 && tcgetpgrp(STDIN_FILENO) == claim_tty_from) {
// Assign the terminal within the child to avoid the well-known race between tcsetgrp() in
// the parent and the child executing. We are not interested in error handling here, except
// we try to avoid this for non-terminals; in particular pipelines often make non-terminal
@ -170,12 +170,10 @@ int child_setup_process(pid_t new_termowner, pid_t fish_pgrp, const job_t &job,
// another process which may happen if we are run in the background with job control
// enabled. Note if stdin is not a tty, then tcgetpgrp() will return -1 and we will not
// enter this.
if (tcgetpgrp(STDIN_FILENO) == fish_pgrp) {
// Ensure this doesn't send us to the background (see #5963)
signal(SIGTTIN, SIG_IGN);
signal(SIGTTOU, SIG_IGN);
(void)tcsetpgrp(STDIN_FILENO, new_termowner);
}
(void)tcsetpgrp(STDIN_FILENO, getpid());
}
sigset_t sigmask;
sigemptyset(&sigmask);

View File

@ -34,16 +34,12 @@ void report_setpgid_error(int err, bool is_parent, pid_t desired_pgid, const job
const process_t *p);
/// Initialize a new child process. This should be called right away after forking in the child
/// process. If job control is enabled for this job, the process is put in the process group of the
/// job, all signal handlers are reset, signals are unblocked (this function may only be called
/// inside the exec function, which blocks all signals), and all IO redirections and other file
/// descriptor actions are performed.
/// process. This resets signal handlers and applies IO redirections.
///
/// Assign the terminal to new_termowner unless it is INVALID_PID.
/// If \p claim_tty_from is >= 0 and owns the tty, use tcsetpgrp() to claim it.
///
/// \return 0 on success, -1 on failure. When this function returns, signals are always unblocked.
/// On failure, signal handlers, io redirections and process group of the process is undefined.
int child_setup_process(pid_t new_termowner, pid_t fish_pgrp, const job_t &job, bool is_forked,
/// \return 0 on success, -1 on failure, in which case an error will be printed.
int child_setup_process(pid_t claim_tty_from, const job_t &job, bool is_forked,
const dup2_list_t &dup2s);
/// Call fork(), retrying on failure a few times.

View File

@ -137,6 +137,7 @@ bool job_t::signal(int signal) {
return false;
}
} else {
// This job lives in fish's pgroup and we need to signal procs individually.
for (const auto &p : processes) {
if (!p->completed && p->pid && kill(p->pid, signal) == -1) {
return false;
@ -788,61 +789,48 @@ void proc_update_jiffies(parser_t &parser) {
}
}
// Return control of the terminal to a job's process group. restore_attrs is true if we are
// restoring a previously-stopped job, in which case we need to restore terminal attributes.
int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from_stopped) {
enum { notneeded = 0, success = 1, error = -1 };
if (!jg->wants_terminal() || !jg->get_pgid()) {
// The job doesn't want the terminal, or doesn't have a pgroup yet.
return notneeded;
// static
bool tty_transfer_t::try_transfer(const job_group_ref_t &jg) {
assert(jg && "Null job group");
if (!jg->wants_terminal()) {
// The job doesn't want the terminal.
return false;
}
// Get the pgid.
// Get the pgid; we must have one if we want the terminal.
pid_t pgid = *jg->get_pgid();
assert(pgid >= 0 && "Invalid pgid");
// If we are continuing, ensure that stdin is marked as blocking first (issue #176).
// Also restore tty modes.
if (continuing_from_stopped) {
make_fd_blocking(STDIN_FILENO);
if (jg->tmodes.has_value()) {
int res = tcsetattr(STDIN_FILENO, TCSADRAIN, &jg->tmodes.value());
if (res < 0) wperror(L"tcsetattr");
}
}
// It should never be fish's pgroup.
pid_t fish_pgrp = getpgrp();
assert(pgid != fish_pgrp && "Job should not have fish's pgroup");
// Ok, we want to transfer to the child.
// Note it is important to be very careful about calling tcsetpgrp()!
// fish ignores SIGTTOU which means that it has the power to reassign the tty even if it doesn't
// own it. This means that other processes may get SIGTTOU and become zombies.
// Check who own the tty now. Thre's five cases of interest:
// 1. The process's pgrp is the same as fish. In that case there is nothing to do.
// 2. There is no tty at all (tcgetpgrp() returns -1). For example running from a pure script.
// Check who own the tty now. There's four cases of interest:
// 1. There is no tty at all (tcgetpgrp() returns -1). For example running from a pure script.
// Of course do not transfer it in that case.
// 3. The tty is owned by the process. This comes about often, as the process will call
// 2. The tty is owned by the process. This comes about often, as the process will call
// tcsetpgrp() on itself between fork ane exec. This is the essential race inherent in
// tcsetpgrp(). In this case we want to reclaim the tty, but do not need to transfer it
// ourselves since the child won the race.
// 4. The tty is owned by a different process. This may come about if fish is running in the
// 3. The tty is owned by a different process. This may come about if fish is running in the
// background with job control enabled. Do not transfer it.
// 5. The tty is owned by fish. In that case we want to transfer the pgid.
pid_t fish_pgrp = getpgrp();
if (fish_pgrp == pgid) {
// Case 1.
return notneeded;
}
// 4. The tty is owned by fish. In that case we want to transfer the pgid.
pid_t current_owner = tcgetpgrp(STDIN_FILENO);
if (current_owner < 0) {
// Case 2.
return notneeded;
// Case 1.
return false;
} else if (current_owner == pgid) {
// Case 3.
return success;
// Case 2.
return true;
} else if (current_owner != pgid && current_owner != fish_pgrp) {
// Case 4.
return notneeded;
// Case 3.
return false;
}
// Case 5 - we do want to transfer it.
// Case 4 - we do want to transfer it.
// The tcsetpgrp(2) man page says that EPERM is thrown if "pgrp has a supported value, but
// is not the process group ID of a process in the same session as the calling process."
@ -866,19 +854,19 @@ int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from
case ENOTTY:
// stdin is not a tty. This may come about if job control is enabled but we are
// not a tty - see #6573.
return notneeded;
return false;
case EBADF:
// stdin has been closed. Workaround a glibc bug - see #3644.
redirect_tty_output();
return notneeded;
return false;
default:
wperror(L"tcgetpgrp");
return error;
return false;
}
}
if (getpgrp_res == pgid) {
FLOGF(proc_termowner, L"Process group %d already has control of terminal", pgid);
return notneeded;
return true;
}
bool pgroup_terminated = false;
@ -906,12 +894,12 @@ int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from
} else if (errno == ENOTTY) {
// stdin is not a TTY. In general we expect this to be caught via the tcgetpgrp
// call's EBADF handler above.
return notneeded;
return false;
} else {
FLOGF(warning, _(L"Could not send job %d ('%ls') with pgid %d to foreground"),
jg->get_job_id(), jg->get_command().c_str(), pgid);
wperror(L"tcsetpgrp");
return error;
return false;
}
if (pgroup_terminated) {
@ -921,33 +909,12 @@ int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from
// process in the group terminated and didn't need to access the terminal, otherwise
// it would have hung waiting for terminal IO (SIGTTIN). We can safely ignore this.
FLOGF(proc_termowner, L"tcsetpgrp called but process group %d has terminated.\n", pgid);
return notneeded;
return false;
}
break;
}
return success;
}
/// Returns control of the terminal to the shell, and saves the terminal attribute state to the job
/// group, so that we can restore the terminal ownership to the job at a later time.
static void terminal_return_from_job_group(job_group_t *jg) {
errno = 0;
FLOG(proc_pgroup, "fish reclaiming terminal");
if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) {
FLOGF(warning, _(L"Could not return shell to foreground"));
wperror(L"tcsetpgrp");
return;
}
// Save jobs terminal modes.
struct termios tmodes {};
if (tcgetattr(STDIN_FILENO, &tmodes) == 0) {
jg->tmodes = tmodes;
} else if (errno != ENOTTY) {
wperror(L"tcgetattr");
}
return true;
}
bool job_t::is_foreground() const { return group->is_foreground(); }
@ -964,66 +931,30 @@ maybe_t<pid_t> job_t::get_last_pid() const {
job_id_t job_t::job_id() const { return group->get_job_id(); }
void job_t::continue_job(parser_t &parser, bool in_foreground) {
// Put job first in the job list.
parser.job_promote(this);
bool job_t::resume() {
mut_flags().notified_of_stop = false;
int pgid = -2;
if (auto tmp = get_pgid()) pgid = *tmp;
// We must send_sigcont if the job is stopped.
bool send_sigcont = this->is_stopped();
FLOGF(proc_job_run, L"%ls job %d, gid %d (%ls), %ls, %ls",
send_sigcont ? L"Continue" : L"Start", job_id(), pgid, command_wcstr(),
is_completed() ? L"COMPLETED" : L"UNCOMPLETED",
parser.libdata().is_interactive ? L"INTERACTIVE" : L"NON-INTERACTIVE");
// Make sure we retake control of the terminal before leaving this function.
bool term_transferred = false;
cleanup_t take_term_back([&] {
if (term_transferred) {
// Issues of interest include #121 and #2114.
terminal_return_from_job_group(this->group.get());
}
});
if (!is_completed()) {
int transfer = terminal_maybe_give_to_job_group(this->group.get(), send_sigcont);
if (transfer < 0) {
// terminal_maybe_give_to_job prints an error.
return;
}
term_transferred = (transfer > 0);
// If both requested and necessary, send the job a continue signal.
if (send_sigcont) {
// This code used to check for JOB_CONTROL to decide between using killpg to signal all
// processes in the group or iterating over each process in the group and sending the
// signal individually. job_t::signal() does the same, but uses the shell's own pgroup
// to make that distinction.
if (!signal(SIGCONT)) {
FLOGF(proc_pgroup, "Failed to send SIGCONT to any processes in pgroup %d!", pgid);
// This returns without bubbling up the error. Presumably that is OK.
return;
if (!this->signal(SIGCONT)) {
FLOGF(proc_pgroup, "Failed to send SIGCONT to procs in job %ls", this->command_wcstr());
return false;
}
// reset the status of each process instance
for (auto &p : processes) {
for (auto &p : this->processes) {
p->stopped = false;
}
}
return true;
}
void job_t::continue_job(parser_t &parser) {
FLOGF(proc_job_run, L"Run job %d, gid %d (%ls), %ls, %ls",
is_completed() ? L"COMPLETED" : L"UNCOMPLETED",
parser.libdata().is_interactive ? L"INTERACTIVE" : L"NON-INTERACTIVE");
if (in_foreground) {
// Wait for the status of our own job to change.
while (!check_cancel_from_fish_signal() && !is_stopped() && !is_completed()) {
process_mark_finished_children(parser, true);
}
}
}
if (in_foreground && is_completed()) {
if (is_completed()) {
// Set $status only if we are in the foreground and the last process in the job has
// finished.
const auto &p = processes.back();
@ -1055,6 +986,37 @@ void hup_jobs(const job_list_t &jobs) {
}
}
void tty_transfer_t::to_job_group(const job_group_ref_t &jg) {
assert(!owner_ && "Terminal already transferred");
if (tty_transfer_t::try_transfer(jg)) {
owner_ = jg;
}
}
void tty_transfer_t::save_tty_modes() {
if (owner_) {
struct termios tmodes {};
if (tcgetattr(STDIN_FILENO, &tmodes) == 0) {
owner_->tmodes = tmodes;
} else if (errno != ENOTTY) {
wperror(L"tcgetattr");
}
}
}
void tty_transfer_t::reclaim() {
if (this->owner_) {
FLOG(proc_pgroup, "fish reclaiming terminal");
if (tcsetpgrp(STDIN_FILENO, getpgrp()) == -1) {
FLOGF(warning, _(L"Could not return shell to foreground"));
wperror(L"tcsetpgrp");
}
this->owner_ = nullptr;
}
}
tty_transfer_t::~tty_transfer_t() { assert(!this->owner_ && "Forgot to reclaim() the tty"); }
static std::atomic<bool> s_is_within_fish_initialization{false};
void set_is_within_fish_initialization(bool flag) { s_is_within_fish_initialization.store(flag); }

View File

@ -176,6 +176,32 @@ class internal_proc_t {
/// function
enum { INVALID_PID = -2 };
// Allows transferring the tty to a job group, while it runs.
class tty_transfer_t : nonmovable_t, noncopyable_t {
public:
tty_transfer_t() = default;
/// Transfer to the given job group, if it wants to own the terminal.
void to_job_group(const job_group_ref_t &jg);
/// Reclaim the tty if we transferred it.
void reclaim();
/// Save the current tty modes into the owning job group, if we are transferred.
void save_tty_modes();
/// The destructor will assert if reclaim() has not been called.
~tty_transfer_t();
private:
// Try transferring the tty to the given job group.
// \return true if we should reclaim it.
static bool try_transfer(const job_group_ref_t &jg);
// The job group which owns the tty, or empty if none.
job_group_ref_t owner_;
};
/// A structure representing a single fish process. Contains variables for tracking process state
/// and the process argument list. Actually, a fish process can be either a regular external
/// process, an internal builtin which may or may not spawn a fake IO process during execution, a
@ -466,11 +492,12 @@ class job_t : noncopyable_t {
/// \return whether this job and its parent chain are fully constructed.
bool job_chain_is_fully_constructed() const;
/// Continues running a job, which may be stopped, or may just have started.
/// This will send SIGCONT if the job is stopped.
/// If \p in_foreground is set, then wait for the job to stop or complete;
/// otherwise do not wait for the job.
void continue_job(parser_t &parser, bool in_foreground = true);
/// Run ourselves. Returning once we complete or stop.
void continue_job(parser_t &parser);
/// Prepare to resume a stopped job by sending SIGCONT and clearing the stopped flag.
/// \return true on success, false if we failed to send the signal.
bool resume();
/// Send the specified signal to all processes in this job.
/// \return true on success, false on failure.
@ -545,15 +572,6 @@ bool is_within_fish_initialization();
/// Send SIGHUP to the list \p jobs, excepting those which are in fish's pgroup.
void hup_jobs(const job_list_t &jobs);
/// Give ownership of the terminal to the specified job group, if it wants it.
///
/// \param jg The job group to give the terminal to.
/// \param continuing_from_stopped If this variable is set, we are giving back control to a job that
/// was previously stopped. In that case, we need to set the terminal attributes to those saved in
/// the job.
/// \return 1 if transferred, 0 if no transfer was necessary, -1 on error.
int terminal_maybe_give_to_job_group(const job_group_t *jg, bool continuing_from_stopped);
/// Add a job to the list of PIDs/PGIDs we wait on even though they are not associated with any
/// jobs. Used to avoid zombie processes after disown.
void add_disowned_job(const job_t *j);