Do not lock the history file on remote filesystems

This avoids using locks for the history file if the file appears to be on
a remote file system, like NFS. This is to avoid hangs if the filesystem
does not support locking.

If locking is not enabled, then in rare cases, history items may be
dropped if multiple sessions try to write to the history file at once.
This is thought to be better than hanging. Hopefully the recent change to
require a trailing newline will avoid propagating partial items.
This commit is contained in:
ridiculousfish 2021-05-09 14:47:10 -07:00
parent d1fd3d5825
commit 1af441b4cc
4 changed files with 54 additions and 38 deletions

View File

@ -122,27 +122,6 @@ maybe_t<wcstring> history_filename(const wcstring &session_id, const wcstring &s
return result; return result;
} }
/// Lock the history file.
/// Returns true on success, false on failure.
bool history_file_lock(int fd, int lock_type) {
static std::atomic<bool> do_locking(true);
if (!do_locking) return false;
double start_time = timef();
int retval = flock(fd, lock_type);
double duration = timef() - start_time;
if (duration > 0.25) {
FLOGF(warning, _(L"Locking the history file took too long (%.3f seconds)."), duration);
// we've decided to stop doing any locking behavior
// but make sure we don't leave the file locked!
if (retval == 0 && lock_type != LOCK_UN) {
flock(fd, LOCK_UN);
}
do_locking = false;
return false;
}
return retval != -1;
}
} // anonymous namespace } // anonymous namespace
@ -288,6 +267,10 @@ struct history_impl_t {
// List of old items, as offsets into out mmap data. // List of old items, as offsets into out mmap data.
std::deque<size_t> old_item_offsets{}; std::deque<size_t> old_item_offsets{};
// If set, we gave up on file locking because it took too long.
// Note this is shared among all history instances.
static relaxed_atomic_bool_t abandoned_locking;
/// \return a timestamp for new items - see the implementation for a subtlety. /// \return a timestamp for new items - see the implementation for a subtlety.
time_t timestamp_now() const; time_t timestamp_now() const;
@ -377,8 +360,38 @@ struct history_impl_t {
// Return the number of history entries. // Return the number of history entries.
size_t size(); size_t size();
// Maybe lock a history file.
// \return true if successful, false if locking was skipped.
static bool maybe_lock_file(int fd, int lock_type);
static void unlock_file(int fd);
}; };
relaxed_atomic_bool_t history_impl_t::abandoned_locking{false};
// static
bool history_impl_t::maybe_lock_file(int fd, int lock_type) {
assert(!(lock_type & LOCK_UN) && "Do not use lock_file to unlock");
// Don't lock if it took too long before, if we are simulating a failing lock, or if our history
// is on a remote filesystem.
if (abandoned_locking) return false;
if (history_t::chaos_mode) return false;
if (path_get_data_is_remote() == 1) return false;
double start_time = timef();
int retval = flock(fd, lock_type);
double duration = timef() - start_time;
if (duration > 0.25) {
FLOGF(warning, _(L"Locking the history file took too long (%.3f seconds)."), duration);
abandoned_locking = true;
}
return retval != -1;
}
// static
void history_impl_t::unlock_file(int fd) { flock(fd, LOCK_UN); }
void history_impl_t::add(history_item_t &&item, bool pending, bool do_save) { void history_impl_t::add(history_item_t &&item, bool pending, bool do_save) {
assert(item.timestamp() != 0 && "Should not add an item with a 0 timestamp"); assert(item.timestamp() != 0 && "Should not add an item with a 0 timestamp");
// We use empty items as sentinels to indicate the end of history. // We use empty items as sentinels to indicate the end of history.
@ -592,20 +605,18 @@ void history_impl_t::load_old_if_needed() {
autoclose_fd_t file{wopen_cloexec(*filename, O_RDONLY)}; autoclose_fd_t file{wopen_cloexec(*filename, O_RDONLY)};
int fd = file.fd(); int fd = file.fd();
if (fd >= 0) { if (fd >= 0) {
// Take a read lock to guard against someone else appending. This is released when the // Take a read lock to guard against someone else appending. This is released after
// file is closed (below). We will read the file after releasing the lock, but that's // getting the file's length. We will read the file after releasing the lock, but that's
// not a problem, because we never modify already written data. In short, the purpose of // not a problem, because we never modify already written data. In short, the purpose of
// this lock is to ensure we don't see the file size change mid-update. // this lock is to ensure we don't see the file size change mid-update.
// //
// We may fail to lock (e.g. on lockless NFS - see issue #685. In that case, we proceed // We may fail to lock (e.g. on lockless NFS - see issue #685. In that case, we proceed
// as if it did not fail. The risk is that we may get an incomplete history item; this // as if it did not fail. The risk is that we may get an incomplete history item; this
// is unlikely because we only treat an item as valid if it has a terminating newline. // is unlikely because we only treat an item as valid if it has a terminating newline.
// bool locked = maybe_lock_file(fd, LOCK_SH);
// Simulate a failing lock in chaos_mode.
if (!history_t::chaos_mode) history_file_lock(fd, LOCK_SH);
file_contents = history_file_contents_t::create(fd); file_contents = history_file_contents_t::create(fd);
this->history_file_id = file_contents ? file_id_for_fd(fd) : kInvalidFileID; this->history_file_id = file_contents ? file_id_for_fd(fd) : kInvalidFileID;
if (!history_t::chaos_mode) history_file_lock(fd, LOCK_UN); if (locked) unlock_file(fd);
time_profiler_t profiler("populate_from_file_contents"); //!OCLINT(side-effect) time_profiler_t profiler("populate_from_file_contents"); //!OCLINT(side-effect)
this->populate_from_file_contents(); this->populate_from_file_contents();
@ -816,10 +827,11 @@ bool history_impl_t::save_internal_via_rewrite() {
autoclose_fd_t target_fd_after{wopen_cloexec(target_name, O_RDONLY)}; autoclose_fd_t target_fd_after{wopen_cloexec(target_name, O_RDONLY)};
if (target_fd_after.valid()) { if (target_fd_after.valid()) {
// critical to take the lock before checking file IDs, // critical to take the lock before checking file IDs,
// and hold it until after we are done replacing // and hold it until after we are done replacing.
// Also critical to check the file at the path, NOT based on our fd // Also critical to check the file at the path, NOT based on our fd.
// It's only OK to replace the file while holding the lock // It's only OK to replace the file while holding the lock.
history_file_lock(target_fd_after.fd(), LOCK_EX); // Note any lock is released when target_fd_after is closed.
(void)maybe_lock_file(target_fd_after.fd(), LOCK_EX);
new_file_id = file_id_for_path(target_name); new_file_id = file_id_for_path(target_name);
} }
bool can_replace_file = (new_file_id == orig_file_id || new_file_id == kInvalidFileID); bool can_replace_file = (new_file_id == orig_file_id || new_file_id == kInvalidFileID);
@ -913,9 +925,7 @@ bool history_impl_t::save_internal_via_appending() {
// we may get interleaved history items, which is considered better than no history, or // we may get interleaved history items, which is considered better than no history, or
// forcing everything through the slow copy-move mode. We try to minimize this possibility // forcing everything through the slow copy-move mode. We try to minimize this possibility
// by writing with O_APPEND. // by writing with O_APPEND.
// maybe_lock_file(fd.fd(), LOCK_EX);
// Simulate a failing lock in chaos_mode
if (!history_t::chaos_mode) history_file_lock(fd.fd(), LOCK_EX);
const file_id_t file_id = file_id_for_fd(fd.fd()); const file_id_t file_id = file_id_for_fd(fd.fd());
if (file_id_for_path(history_path) == file_id) { if (file_id_for_path(history_path) == file_id) {
// File IDs match, so the file we opened is still at that path // File IDs match, so the file we opened is still at that path

View File

@ -6,6 +6,7 @@
#include "fds.h" #include "fds.h"
#include "history.h" #include "history.h"
#include "path.h"
// Some forward declarations. // Some forward declarations.
static history_item_t decode_item_fish_2_0(const char *base, size_t len); static history_item_t decode_item_fish_2_0(const char *base, size_t len);
@ -18,12 +19,11 @@ static maybe_t<size_t> offset_of_next_item_fish_1_x(const char *begin, size_t mm
// Check if we should mmap the fd. // Check if we should mmap the fd.
// Don't try mmap() on non-local filesystems. // Don't try mmap() on non-local filesystems.
static bool should_mmap(int fd) { static bool should_mmap() {
if (history_t::never_mmap) return false; if (history_t::never_mmap) return false;
// mmap only if we are known not-remote (return is 0). // mmap only if we are known not-remote (return is 0).
int ret = fd_check_is_remote(fd); return path_get_data_is_remote() == 0;
return ret == 0;
} }
// Read up to len bytes from fd into address, zeroing the rest. // Read up to len bytes from fd into address, zeroing the rest.
@ -163,7 +163,7 @@ std::unique_ptr<history_file_contents_t> history_file_contents_t::create(int fd)
off_t len = lseek(fd, 0, SEEK_END); off_t len = lseek(fd, 0, SEEK_END);
if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr; if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr;
bool mmap_file_directly = should_mmap(fd); bool mmap_file_directly = should_mmap();
std::unique_ptr<mmap_region_t> region = std::unique_ptr<mmap_region_t> region =
mmap_file_directly ? mmap_region_t::map_file(fd, len) : mmap_region_t::map_anon(len); mmap_file_directly ? mmap_region_t::map_file(fd, len) : mmap_region_t::map_anon(len);
if (!region) return nullptr; if (!region) return nullptr;

View File

@ -417,6 +417,8 @@ bool path_get_data(wcstring &path) {
return dir.success(); return dir.success();
} }
int path_get_data_is_remote() { return get_data_directory().is_remote; }
void path_make_canonical(wcstring &path) { void path_make_canonical(wcstring &path) {
// Ignore trailing slashes, unless it's the first character. // Ignore trailing slashes, unless it's the first character.
size_t len = path.size(); size_t len = path.size();

View File

@ -29,6 +29,10 @@ bool path_get_config(wcstring &path);
/// \return whether the directory was returned successfully /// \return whether the directory was returned successfully
bool path_get_data(wcstring &path); bool path_get_data(wcstring &path);
/// \return if the data directory is remote (eg. NFS).
/// -1 means unknown, 0 means known local, 1 means known remote.
int path_get_data_is_remote();
/// Emit any errors if config directories are missing. /// Emit any errors if config directories are missing.
/// Use the given environment stack to ensure this only occurs once. /// Use the given environment stack to ensure this only occurs once.
class env_stack_t; class env_stack_t;