Correct a race in topic monitor

This fixes a race condition in the topic monitor. A thread may decide to enter the wait queue, but before it does the generation list changes, and so our thread will wait forever, resulting in a hang. It also simplifies the implementation of the topic monitor considerably; on reflection the whole "metagen" thing isn't providing any value and we should just compare generations directly. In the new design, we have a lock-protected list of current generations, along with a boolean as to whether someone is reading from the pipe. The reader (only one at a time) is responsible for broadcasting notifications via a condition variable.
2025-02-22 01:13:10 +08:00 · 2019-05-31 09:13:21 -07:00 · 2019-05-31 09:13:21 -07:00 · 4e03d3c264
commit 4e03d3c264
parent d920a618de
3 changed files with 111 additions and 89 deletions
--- a/src/common.h
+++ b/src/common.h
@ -635,6 +635,9 @@ class acquired_lock {
    /// Create from a global lock.
    /// This is used in weird cases where a global lock protects more than one piece of data.
    static acquired_lock from_global(std::mutex &lk, Data *v) { return acquired_lock{lk, v}; }
    /// \return a reference to the lock, for use with a condition variable.
    std::unique_lock<std::mutex> &get_lock() { return lock; }
 };
 // A lock that owns a piece of data
--- a/src/topic_monitor.cpp
+++ b/src/topic_monitor.cpp
@ -26,6 +26,12 @@
 /// pointless at-exit handler for the dtor.
 static topic_monitor_t *const s_principal = new topic_monitor_t();
 /// \return the metagen for a topic generation list.
 /// The metagen is simply the sum of topic generations. Note it is monotone.
 static generation_t metagen_for(const generation_list_t &lst) {
    return std::accumulate(lst.begin(), lst.end(), generation_t{0});
 }
 topic_monitor_t &topic_monitor_t::principal() {
    // Do not attempt to move s_principal to a function-level static, it needs to be accessed from a
    // signal handler so it must not be lazily created.
@ -71,9 +77,7 @@ void topic_monitor_t::post(topic_t topic) {
    // Ignore EAGAIN and other errors (which conceivably could occur during shutdown).
 }
-generation_list_t topic_monitor_t::updated_gens() {
+generation_list_t topic_monitor_t::updated_gens_in_data(acquired_lock<data_t> &data) {
    auto current_gens = current_gen_.acquire();
    // Atomically acquire the pending updates, swapping in 0.
    // If there are no pending updates (likely), just return.
    // Otherwise CAS in 0 and update our topics.
@ -82,7 +86,7 @@ generation_list_t topic_monitor_t::updated_gens() {
    bool cas_success;
    do {
        raw = pending_updates_.load(relaxed);
-        if (raw == 0) return *current_gens;
+        if (raw == 0) return data->current_gens;
        cas_success = pending_updates_.compare_exchange_weak(raw, 0, relaxed, relaxed);
    } while (!cas_success);
@ -90,76 +94,88 @@ generation_list_t topic_monitor_t::updated_gens() {
    auto topics = topic_set_t::from_raw(raw);
    for (topic_t topic : topic_iter_t{}) {
        if (topics.get(topic)) {
-            current_gens->at(topic) += 1;
+            data->current_gens.at(topic) += 1;
-            FLOG(topic_monitor, "Updating topic", (int)topic, "to", current_gens->at(topic));
+            FLOG(topic_monitor, "Updating topic", (int)topic, "to", data->current_gens.at(topic));
        }
    }
-    return *current_gens;
+    // Report our change.
    data_notifier_.notify_all();
    return data->current_gens;
 }
-void topic_monitor_t::await_metagen(generation_t mgen) {
+generation_list_t topic_monitor_t::updated_gens() {
-    // Fast check of the metagen before taking the lock. If it's changed we're done.
+    auto data = data_.acquire();
-    generation_t current = current_metagen();
+    return updated_gens_in_data(data);
-    FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen, ": current", current);
+}
    if (mgen != current) return;
-    // Take the lock (which may take a long time) and then check again.
+bool topic_monitor_t::try_update_gens_maybe_becoming_reader(generation_list_t *gens) {
-    std::unique_lock<std::mutex> locker{wait_queue_lock_};
+    bool become_reader = false;
-    current = current_metagen();
+    auto data = data_.acquire();
-    FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen, ": current", current,
+    for (;;) {
-         "acquired lock");
+        // See if the updated gen list has changed. If so we don't need to become the reader.
-    if (mgen != current) return;
+        auto current = updated_gens_in_data(data);
        FLOG(topic_monitor, "TID", thread_id(), "local mgen", metagen_for(*gens), ": current",
             metagen_for(current));
        if (*gens != current) {
            *gens = current;
            break;
        }
-    // Our metagen hasn't changed. Push our metagen onto the queue, then wait until we're the
+        // The generations haven't changed. Perhaps we become the reader.
-    // lowest. If multiple waiters are the lowest, then anyone can be the observer.
+        if (!data->has_reader) {
-    // Note the reason for picking the lowest metagen is to avoid a priority inversion where a lower
+            become_reader = true;
-    // metagen (therefore someone who should see changes) is blocked waiting for a higher metagen
+            data->has_reader = true;
-    // (who has already seen the changes).
+            break;
-    wait_queue_.push(mgen);
+        }
-    while (wait_queue_.top() != mgen) {
+        // Not the reader, wait until the reader notifies us and loop again.
-        FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen, "releasing lock for",
+        data_notifier_.wait(data.get_lock());
             wait_queue_.top());
        wait_queue_notifier_.wait(locker);
    }
-    wait_queue_.pop();
+    return become_reader;
 }
-    // We now have the lowest metagen in the wait queue. Notice we still hold the lock.
+generation_list_t topic_monitor_t::await_gens(const generation_list_t &input_gens) {
-    // Read until the metagen changes. It may already have changed.
+    generation_list_t gens = input_gens;
-    // Note because changes are coalesced, we can read a lot, potentially draining the pipe.
+    while (gens == input_gens) {
-    current = current_metagen();
+        bool become_reader = try_update_gens_maybe_becoming_reader(&gens);
-    FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen, "considering waiting for mgen",
+        if (become_reader) {
-         current);
+            // Now we are the reader. Read from the pipe, and then update with any changes.
-    while (mgen == current) {
+            // Note we no longer hold the lock.
-        int fd = pipes_.read.fd();
+            assert(gens == input_gens &&
                   "Generations should not have changed if we are the reader.");
            int fd = pipes_.read.fd();
 #if TOPIC_MONITOR_TSAN_WORKAROUND
-        // Under tsan our notifying pipe is non-blocking, so we would busy-loop on the read() call
+            // Under tsan our notifying pipe is non-blocking, so we would busy-loop on the read()
-        // until data is available (that is, fish would use 100% cpu while waiting for processes).
+            // call until data is available (that is, fish would use 100% cpu while waiting for
-        // The select prevents that.
+            // processes). The select prevents that.
-        fd_set fds;
+            fd_set fds;
-        FD_ZERO(&fds);
+            FD_ZERO(&fds);
-        FD_SET(fd, &fds);
+            FD_SET(fd, &fds);
-        (void)select(fd + 1, &fds, nullptr, nullptr, nullptr /* timeout */);
+            (void)select(fd + 1, &fds, nullptr, nullptr, nullptr /* timeout */);
 #endif
-        uint8_t ignored[PIPE_BUF];
+            uint8_t ignored[PIPE_BUF];
-        (void)read(fd, ignored, sizeof ignored);
+            (void)read(fd, ignored, sizeof ignored);
        current = current_metagen();
        FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen,
             "read() complete, current mgen is", current);
    }
-    // Release the lock and wake up the remaining waiters.
+            // We are finished reading. We must stop being the reader, and post on the condition
-    FLOG(topic_monitor, "TID", thread_id(), "local mgen", mgen, "awakening all waiters");
+            // variable to wake up any other threads waiting for us to finish reading.
-    locker.unlock();
+            auto data = data_.acquire();
-    wait_queue_notifier_.notify_all();
+            gens = data->current_gens;
            FLOG(topic_monitor, "TID", thread_id(), "local mgen", metagen_for(input_gens),
                 "read() complete, current mgen is", metagen_for(gens));
            assert(data->has_reader && "We should be the reader");
            data->has_reader = false;
            data_notifier_.notify_all();
        }
    }
    return gens;
 }
 topic_set_t topic_monitor_t::check(generation_list_t *gens, topic_set_t topics, bool wait) {
    if (topics.none()) return topics;
    generation_list_t current = updated_gens();
    topic_set_t changed{};
    for (;;) {
        // Load the topic list and see if anything has changed.
        generation_list_t current = updated_gens();
        for (topic_t topic : topic_iter_t{}) {
            if (topics.get(topic)) {
                assert(gens->at(topic) <= current.at(topic) &&
@ -176,9 +192,8 @@ topic_set_t topic_monitor_t::check(generation_list_t *gens, topic_set_t topics,
            break;
        }
-        // Try again. Note that we use the metagen corresponding to the topic list we just
+        // Wait until our gens change.
-        // inspected, not the current one (which may have updates since we checked).
+        current = await_gens(current);
        await_metagen(metagen_for(current));
    }
    return changed;
 }
--- a/src/topic_monitor.h
+++ b/src/topic_monitor.h
@ -11,7 +11,6 @@
 #include <condition_variable>
 #include <limits>
 #include <numeric>
 #include <queue>
 /** Topic monitoring support. Topics are conceptually "a thing that can happen." For example,
 delivery of a SIGINT, a child process exits, etc. It is possible to post to a topic, which means
@ -73,47 +72,52 @@ class topic_monitor_t {
    static_assert(sizeof(topic_set_raw_t) * CHAR_BIT >= enum_count<topic_t>(),
                  "topic_set_raw is too small");
-    /// The current topic generation list, protected by a mutex. Note this may be opportunistically
+    // Some stuff that needs to be protected by the same lock.
-    /// updated at the point it is queried.
+    struct data_t {
-    owning_lock<generation_list_t> current_gen_{{}};
+        /// The current generation list.
        generation_list_t current_gens{};
        /// Whether there is a thread currently reading from the notifier pipe.
        bool has_reader{false};
    };
    owning_lock<data_t> data_{};
    /// Condition variable for broadcasting notifications.
    /// This is associated with data_'s mutex.
    std::condition_variable data_notifier_{};
    /// The set of topics which have pending increments.
    /// This is managed via atomics.
    std::atomic<topic_set_raw_t> pending_updates_{};
-    /// When a topic set is queried in a blocking way, the waiters are put into a queue. The waiter
+    /// Self-pipes used to communicate changes.
-    /// with the smallest metagen is responsible for announcing the change to the rest of the
+    /// The writer is a signal handler.
-    /// waiters. (The metagen is just the sum of the current generations.) Note that this is a
+    /// "The reader" refers to a thread that wants to wait for changes. Only one thread can be the
-    /// max-heap that defaults to std::less; by using std::greater it becomes a min heap. This is
+    /// reader at a given time.
    /// protected by wait_queue_lock_.
    std::priority_queue<generation_t, std::vector<generation_t>, std::greater<generation_t>>
        wait_queue_;
    /// Mutex guarding the wait queue.
    std::mutex wait_queue_lock_{};
    /// Condition variable for broadcasting notifications.
    std::condition_variable wait_queue_notifier_{};
    /// Pipes used to communicate changes from the signal handler.
    autoclose_pipes_t pipes_;
-    /// \return the metagen for a topic generation list.
+    /// Apply any pending updates to the data.
-    /// The metagen is simply the sum of topic generations. Note it is monotone.
+    /// This accepts data because it must be locked.
-    static inline generation_t metagen_for(const generation_list_t &lst) {
+    /// \return the updated generation list.
-        return std::accumulate(lst.begin(), lst.end(), generation_t{0});
+    generation_list_t updated_gens_in_data(acquired_lock<data_t> &data);
    }
-    /// Wait for the current metagen to become different from \p gen.
+    /// Given a list of input generations, attempt to update them to something newer.
-    /// If it is already different, return immediately.
+    /// If \p gens is older, then just return those by reference, and directly return false (not
-    void await_metagen(generation_t gen);
+    /// becoming the reader).
    /// If \p gens is current and there is not a reader, then do not update \p gens and return true,
    /// indicating we should become the reader. Now it is our responsibility to read from the pipes
    /// and notify on a change via the condition variable.
    /// If \p gens is current, and there is already a reader, then wait until the reader notifies us
    /// and try again.
    bool try_update_gens_maybe_becoming_reader(generation_list_t *gens);
-    /// Return the current generation list, opportunistically applying any pending updates.
+    /// Wait for some entry in the list of generations to change.
    /// \return the new gens.
    generation_list_t await_gens(const generation_list_t &input_gens);
    /// \return the current generation list, opportunistically applying any pending updates.
    generation_list_t updated_gens();
    /// \return the metagen for the current topic generation list.
    inline generation_t current_metagen() { return metagen_for(updated_gens()); }
   public:
    topic_monitor_t();
    ~topic_monitor_t();