diff --git a/src/exec.cpp b/src/exec.cpp
index 793f6b52f..cb86c56e4 100644
--- a/src/exec.cpp
+++ b/src/exec.cpp
@@ -17,6 +17,7 @@
 #include <string.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <stack>
 
 #include <algorithm>
 #include <functional>
@@ -610,7 +611,8 @@ static bool handle_builtin_output(job_t *j, process_t *p, io_chain_t *io_chain,
     if (fork_was_skipped) {
         p->completed = 1;
         if (p->is_last_in_job) {
-            debug(4, L"Set status of job %d (%ls) to %d using short circuit", j->job_id, j->preview().c_str(), p->status);
+            debug(4, L"Set status of job %d (%ls) to %d using short circuit", j->job_id,
+                  j->preview().c_str(), p->status);
 
             int status = p->status;
             proc_set_last_status(j->get_flag(job_flag_t::NEGATE) ? (!status) : status);
@@ -996,6 +998,47 @@ void exec_job(parser_t &parser, shared_ptr<job_t> j) {
         return;
     }
 
+    // Unfortunately `exec_job()` is called recursively when functions are encountered, with a new
+    // job id (and therefore pgrp) each time, but always from the main thread. This breaks terminal
+    // control since new pgrps take terminal control away from commands upstream in a different pgrp.
+    // We try to work around this with a heuristic to determine whether to reuse the same pgrp as the
+    // last-spawned pgrp if part of an existing job pipeline (keeping in mind that new jobs are
+    // recursively started for both foreground and background jobs, and that a function can expand
+    // to more than one external command, one (or more?) of which may want to read from upstream or
+    // write to downstream of a pipe.
+    // By keeping track of (select) "jobs in flight" we can try to marshall newly-created external
+    // processes into existing pgrps. Fixes #3952.
+    // This is a HACK and the correct solution would be to pass the active job through the pipeline
+    // to the newly established parser context so that the funtion as parsed and evaluated can be
+    // directly associated with this job and not a new one, BUT sometimes functions NEED to start a
+    // new job. This HACK seeks a compromise by letting functions trigger the unilateral creation of
+    // a new job, but reusing the "parent" job's existing pgrp in case of terminal control.
+    static std::stack<decltype(j)> active_jobs;
+    // There's an assumption that there's a one-to-one mapping between jobs under job control and
+    // pgrps. When we share a parent job's pgrp, we risk reaping its processes before it is fully
+    // constructed, causing later setpgrp(2) calls to fails (#5219). While the parent job is still
+    // under construction, child jobs have job_flag_t::WAIT_BY_PROCESS set to prevent early repaing.
+    // We store them here until the parent job is constructed, at which point it unsets this flag.
+    static std::stack<decltype(j)> child_jobs;
+
+    auto parent_job = active_jobs.empty() ? nullptr : active_jobs.top();
+    bool job_pushed = false;
+    if (j->get_flag(job_flag_t::TERMINAL) && j->get_flag(job_flag_t::JOB_CONTROL)) {
+        // This will be popped before this job leaves exec_job
+        active_jobs.push(j);
+        job_pushed = true;
+    }
+
+    if (parent_job && j->processes.front()->type == EXTERNAL) {
+        if (parent_job->pgid != INVALID_PID) {
+            j->pgid = parent_job->pgid;
+            j->set_flag(job_flag_t::JOB_CONTROL, true);
+            j->set_flag(job_flag_t::NESTED, true);
+            j->set_flag(job_flag_t::WAIT_BY_PROCESS, true);
+            child_jobs.push(j);
+        }
+    }
+
     // Verify that all IO_BUFFERs are output. We used to support a (single, hacked-in) magical input
     // IO_BUFFER used by fish_pager, but now the claim is that there are no more clients and it is
     // removed. This assertion double-checks that.
@@ -1053,8 +1096,8 @@ void exec_job(parser_t &parser, shared_ptr<job_t> j) {
     }
     pipe_next_read.close();
 
-
-    debug(3, L"Created job %d from command '%ls' with pgrp %d", j->job_id, j->command_wcstr(), j->pgid);
+    debug(3, L"Created job %d from command '%ls' with pgrp %d", j->job_id, j->command_wcstr(),
+          j->pgid);
 
     j->set_flag(job_flag_t::CONSTRUCTED, true);
     if (!j->is_foreground()) {
@@ -1062,6 +1105,21 @@ void exec_job(parser_t &parser, shared_ptr<job_t> j) {
         env_set(L"last_pid", ENV_GLOBAL, { to_string(proc_last_bg_pid) });
     }
 
+    if (job_pushed) {
+        active_jobs.pop();
+    }
+
+    if (!parent_job) {
+        // Unset WAIT_BY_PROCESS on all child jobs. We could leave it, but this speeds up the
+        // execution of `process_mark_finished_children()`.
+        while (!child_jobs.empty()) {
+            auto child = child_jobs.top();
+            child_jobs.pop();
+
+            child->set_flag(job_flag_t::WAIT_BY_PROCESS, false);
+        }
+    }
+
     if (!exec_error) {
         j->continue_job(false);
     } else {
diff --git a/src/proc.cpp b/src/proc.cpp
index d38782ae6..c729c46da 100644
--- a/src/proc.cpp
+++ b/src/proc.cpp
@@ -395,7 +395,10 @@ static bool process_mark_finished_children(bool block_on_fg) {
         }
 
         if (j != job_fg && j->is_foreground() && !j->is_stopped() && !j->is_completed()) {
-            assert(job_fg == nullptr && "More than one active, fully-constructed foreground job!");
+            // Ignore jobs created via function evaluation in this sanity check
+            if (!job_fg || (!job_fg->get_flag(job_flag_t::NESTED) && !j->get_flag(job_flag_t::NESTED))) {
+                assert(job_fg == nullptr && "More than one active, fully-constructed foreground job!");
+            }
             job_fg = j;
         }
 
@@ -421,15 +424,36 @@ static bool process_mark_finished_children(bool block_on_fg) {
             options &= ~WNOHANG;
         }
 
+        bool wait_by_process = j->get_flag(job_flag_t::WAIT_BY_PROCESS);
+        process_list_t::iterator process = j->processes.begin();
         // waitpid(2) returns 1 process each time, we need to keep calling it until we've reaped all
         // children of the pgrp in question or else we can't reset the dirty_state flag. In all
         // cases, calling waitpid(2) is faster than potentially calling select_try() on a process
         // that has exited, which will force us to wait the full timeout before coming back here and
         // calling waitpid() again.
         while (true) {
-            int status = -1;
-            // A negative PID passed in to `waitpid()` means wait on any child in that process group
-            auto pid = waitpid(-1 * j->pgid, &status, options);
+            int status;
+            pid_t pid;
+
+            if (wait_by_process) {
+                // If the evaluation of a function resulted in the sharing of a pgroup between the
+                // real job and the job that shouldn't have been created as a separate job AND the
+                // parent job is still under construction (which is the case when continue_job() is
+                // first called on the child job during the recursive call to exec_job() before the
+                // parent job has been fully constructed), we need to call waitpid(2) on the
+                // individual processes of the child job instead of using a catch-all waitpid(2)
+                // call on the job's process group.
+                if (process == j->processes.end()) {
+                    break;
+                }
+                assert((*process)->pid != INVALID_PID && "Waiting by process on an invalid PID!");
+                pid = waitpid((*process)->pid, &status, options);
+                process++;
+            } else {
+                // A negative PID passed in to `waitpid()` means wait on any child in that process group
+                pid = waitpid(-1 * j->pgid, &status, options);
+            }
+
             // Never make two calls to waitpid(2) without WNOHANG (i.e. with "HANG") in a row,
             // because we might wait on a non-stopped job that becomes stopped, but we don't refresh
             // our view of the process state before calling waitpid(2) again here.
@@ -439,19 +463,19 @@ static bool process_mark_finished_children(bool block_on_fg) {
                 // A child process has been reaped
                 handle_child_status(pid, status);
             }
-            else if (pid == 0) {
+            else if (pid == 0 || errno == ECHILD) {
                 // No killed/dead children in this particular process group
-                break;
+                if (!wait_by_process) {
+                    break;
+                }
             } else {
                 // pid < 0 indicates an error. One likely failure is ECHILD (no children), which is not
                 // an error and is ignored. The other likely failure is EINTR, which means we got a
                 // signal, which is considered an error. We absolutely do not break or return on error,
                 // as we need to iterate over all constructed jobs but we only call waitpid for one pgrp
                 // at a time. We do bypass future waits in case of error, however.
-                if (errno != ECHILD) {
-                    has_error = true;
-                    wperror(L"waitpid in process_mark_finished_children");
-                }
+                has_error = true;
+                wperror(L"waitpid in process_mark_finished_children");
                 break;
             }
         }
@@ -1034,13 +1058,13 @@ void job_t::continue_job(bool send_sigcont) {
             // discussion in #5219).
             process_mark_finished_children(false);
 
-            // sigcont is NOT sent only when a job is first created. In the event of "nested jobs"
-            // (i.e. new jobs started due to the evaluation of functions), we can start a process
-            // that feeds its output to a child job (e.g. `some_func | some_builtin`, which is two
-            // jobs), and if the first job in that expression generates enough data that it fills
-            // its pipe buffer, it'll hang waiting for the second job to read from it. If we block
-            // on foreground jobs in this case, we'll keep waiting forever.
-            bool block_on_fg = send_sigcont;
+            // If this is a child job and the parent job is still under construction (i.e. job1 |
+            // some_func), we can't block on execution of the nested job for `some_func`. Doing
+            // so can cause hangs if job1 emits more data than fits in the OS pipe buffer.
+            // The test for block_on_fg is this->parent_job.is_constructed(), which coincides
+            // with WAIT_BY_PROCESS (which will have to do since we don't store a reference to
+            // the parent job in the job_t structure).
+            bool block_on_fg = !get_flag(job_flag_t::WAIT_BY_PROCESS);
 
             // Wait for data to become available or the status of our own job to change
             while (!reader_exit_forced() && !is_stopped() && !is_completed()) {
diff --git a/src/proc.h b/src/proc.h
index b46ac034f..0d238a73e 100644
--- a/src/proc.h
+++ b/src/proc.h
@@ -163,10 +163,17 @@ enum class job_flag_t {
     SKIP_NOTIFICATION,
     /// Whether the exit status should be negated. This flag can only be set by the not builtin.
     NEGATE,
-    /// Whether the job is under job control.
+    /// Whether the job is under job control, i.e. has its own pgrp.
     JOB_CONTROL,
     /// Whether the job wants to own the terminal when in the foreground.
     TERMINAL,
+    /// This job was created via a recursive call to exec_job upon evaluation of a function.
+    /// Ideally it should not be a top-level job, and there are places where it won't be treated
+    /// as such.
+    NESTED,
+    /// This job shares a pgrp with a not-yet-constructed job, so we can't waitpid on its pgid
+    /// directly. Hack to work around fucntions starting new jobs. See `exec_job()`.
+    WAIT_BY_PROCESS
 };
 
 typedef int job_id_t;