readfailover: add new 'readfailover' feature

This commit is contained in:
Antonio SJ Musumeci 2020-06-28 23:51:42 -04:00
parent 562295be90
commit 7d436cd231
15 changed files with 510 additions and 115 deletions

View File

@ -103,6 +103,7 @@ See the mergerfs [wiki for real world deployments](https://github.com/trapexit/m
* **symlinkify=BOOL**: When enabled and a file is not writable and its mtime or ctime is older than **symlinkify_timeout** files will be reported as symlinks to the original files. Please read more below before using. (default: false)
* **symlinkify_timeout=INT**: Time to wait, in seconds, to activate the **symlinkify** behavior. (default: 3600)
* **nullrw=BOOL**: Turns reads and writes into no-ops. The request will succeed but do nothing. Useful for benchmarking mergerfs. (default: false)
* **readfailover=BOOL**: Should a `read` fail search other branches for the same file and use that going forward. (default: false)
* **ignorepponrename=BOOL**: Ignore path preserving on rename. Typically rename and link act differently depending on the policy of `create` (read below). Enabling this will cause rename and link to always use the non-path preserving behavior. This means files, when renamed or linked, will stay on the same drive. (default: false)
* **security_capability=BOOL**: If false return ENOATTR when xattr security.capability is queried. (default: true)
* **xattr=passthrough|noattr|nosys**: Runtime control of xattrs. Default is to passthrough xattr requests. 'noattr' will short circuit as if nothing exists. 'nosys' will respond with ENOSYS as if xattrs are not supported or disabled. (default: passthrough)
@ -173,6 +174,11 @@ To have the pool mounted at boot or otherwise accessible from related tools use
**NOTE:** for mounting via **fstab** to work you must have **mount.fuse** installed. For Ubuntu/Debian it is included in the **fuse** package.
### readfailover
One usecase of mergerfs is to create a union of mirrored files as a means to provide reliability. Often with networked filesysems such as NFS, sshfs, or rclone. This feature helps with situations where the device or mount of the file currently in use fails. If `read` returns `EIO` or `ENOTCONN` mergerfs will cycle through branches searching for the same relative path. It will `open` and attempt a `read`. Should that succeed the file will be used going forward. Otherwise the original error will be returned. Sometimes when an error occurs the `read` will still succeed but be a "short read." In those cases the kernel will issue a `fstat` to find out the length of the file. As a result this feature also performs a similar search and replacement behavior should `fstat` fail with the same errors.
### inodecalc
Inodes (st_ino) are unique identifiers within a filesystem. Each mounted filesystem has device ID (st_dev) as well and together they can uniquely identify a file on the whole of the system. Entries on the same device with the same inode are in fact references to the same underlying file. It is a many to one relationship between names and an inode. Directories, however, do not have multiple links on most systems due to the complexity they add.

View File

@ -165,20 +165,6 @@ struct fuse_operations
*/
int (*open) (const char *, struct fuse_file_info *);
/** Read data from an open file
*
* Read should return exactly the number of bytes requested except
* on EOF or error, otherwise the rest of the data will be
* substituted with zeroes. An exception to this is when the
* 'direct_io' mount option is specified, in which case the return
* value of the read system call will reflect the return value of
* this operation.
*
* Changed in version 2.2
*/
int (*read) (char *, size_t, off_t,
struct fuse_file_info *);
/** Write data to an open file
*
* Write should return exactly the number of bytes requested
@ -791,8 +777,6 @@ int fuse_fs_release(struct fuse_fs *fs,
struct fuse_file_info *fi);
int fuse_fs_open(struct fuse_fs *fs, const char *path,
struct fuse_file_info *fi);
int fuse_fs_read(struct fuse_fs *fs, char *buf, size_t size,
off_t off, struct fuse_file_info *fi);
int fuse_fs_read_buf(struct fuse_fs *fs,
struct fuse_bufvec **bufp, size_t size, off_t off,
struct fuse_file_info *fi);

View File

@ -1617,77 +1617,41 @@ static void fuse_free_buf(struct fuse_bufvec *buf)
}
}
int fuse_fs_read_buf(struct fuse_fs *fs,
struct fuse_bufvec **bufp, size_t size, off_t off,
struct fuse_file_info *fi)
{
fuse_get_context()->private_data = fs->user_data;
if (fs->op.read || fs->op.read_buf) {
int res;
if (fs->debug)
fprintf(stderr,
"read[%llu] %zu bytes from %llu flags: 0x%x\n",
(unsigned long long) fi->fh,
size, (unsigned long long) off, fi->flags);
if (fs->op.read_buf) {
res = fs->op.read_buf(bufp, size, off, fi);
} else {
struct fuse_bufvec *buf;
void *mem;
buf = malloc(sizeof(struct fuse_bufvec));
if (buf == NULL)
return -ENOMEM;
mem = malloc(size);
if (mem == NULL) {
free(buf);
return -ENOMEM;
}
*buf = FUSE_BUFVEC_INIT(size);
buf->buf[0].mem = mem;
*bufp = buf;
res = fs->op.read(mem, size, off, fi);
if (res >= 0)
buf->buf[0].size = res;
}
if (fs->debug && res >= 0)
fprintf(stderr, " read[%llu] %zu bytes from %llu\n",
(unsigned long long) fi->fh,
fuse_buf_size(*bufp),
(unsigned long long) off);
if (res >= 0 && fuse_buf_size(*bufp) > (int) size)
fprintf(stderr, "fuse: read too many bytes\n");
if (res < 0)
return res;
return 0;
} else {
return -ENOSYS;
}
}
int fuse_fs_read(struct fuse_fs *fs, char *mem, size_t size,
off_t off, struct fuse_file_info *fi)
int
fuse_fs_read_buf(struct fuse_fs *fs_,
struct fuse_bufvec **bufp_,
size_t size_,
off_t off_,
struct fuse_file_info *ffi_)
{
int res;
struct fuse_bufvec *buf = NULL;
res = fuse_fs_read_buf(fs, &buf, size, off, fi);
if (res == 0) {
struct fuse_bufvec dst = FUSE_BUFVEC_INIT(size);
fuse_get_context()->private_data = fs_->user_data;
if(fs_->op.read_buf == NULL)
return -ENOSYS;
dst.buf[0].mem = mem;
res = fuse_buf_copy(&dst, buf, 0);
}
fuse_free_buf(buf);
if(fs_->debug)
fprintf(stderr,
"read[%llu] %zu bytes from %llu flags: 0x%x\n",
(unsigned long long)ffi_->fh,
size_,
(unsigned long long)off_,
ffi_->flags);
return res;
res = fs_->op.read_buf(bufp_,size_,off_,ffi_);
if(fs_->debug && (res >= 0))
fprintf(stderr, " read[%llu] %zu bytes from %llu\n",
(unsigned long long)ffi_->fh,
fuse_buf_size(*bufp_),
(unsigned long long)off_);
if((res >= 0) && (fuse_buf_size(*bufp_) > (int)size_))
fprintf(stderr,"fuse: read too many bytes\n");
if(res < 0)
return res;
return 0;
}
int fuse_fs_write_buf(struct fuse_fs *fs,

View File

@ -171,6 +171,10 @@ The request will succeed but do nothing.
Useful for benchmarking mergerfs.
(default: false)
.IP \[bu] 2
\f[B]readfailover=BOOL\f[]: Should a \f[C]read\f[] fail search other
branches for the same file and use that going forward.
(default: false)
.IP \[bu] 2
\f[B]ignorepponrename=BOOL\f[]: Ignore path preserving on rename.
Typically rename and link act differently depending on the policy of
\f[C]create\f[] (read below).
@ -381,6 +385,23 @@ be automatically included.
\f[B]NOTE:\f[] for mounting via \f[B]fstab\f[] to work you must have
\f[B]mount.fuse\f[] installed.
For Ubuntu/Debian it is included in the \f[B]fuse\f[] package.
.SS readfailover
.PP
One usecase of mergerfs is to create a union of mirrored files as a
means to provide reliability.
Often with networked filesysems such as NFS, sshfs, or rclone.
This feature helps with situations where the device or mount of the file
currently in use fails.
If \f[C]read\f[] returns \f[C]EIO\f[] or \f[C]ENOTCONN\f[] mergerfs will
cycle through branches searching for the same relative path.
It will \f[C]open\f[] and attempt a \f[C]read\f[].
Should that succeed the file will be used going forward.
Otherwise the original error will be returned.
Sometimes when an error occurs the \f[C]read\f[] will still succeed but
be a "short read." In those cases the kernel will issue a \f[C]fstat\f[]
to find out the length of the file.
As a result this feature also performs a similar search and replacement
behavior should \f[C]fstat\f[] fail with the same errors.
.SS inodecalc
.PP
Inodes (st_ino) are unique identifiers within a filesystem.

View File

@ -93,6 +93,7 @@ Config::Config()
posix_acl(false),
readdir(ReadDir::ENUM::POSIX),
readdirplus(false),
readfailover(false),
security_capability(true),
srcmounts(branches),
statfs(StatFS::ENUM::BASE),
@ -155,6 +156,7 @@ Config::Config()
_map["posix_acl"] = &posix_acl;
// _map["readdir"] = &readdir;
_map["readdirplus"] = &readdirplus;
_map["readfailover"] = &readfailover;
_map["security_capability"] = &security_capability;
_map["srcmounts"] = &srcmounts;
_map["statfs"] = &statfs;

View File

@ -88,6 +88,7 @@ public:
ConfigBOOL posix_acl;
ReadDir readdir;
ConfigBOOL readdirplus;
ConfigBOOL readfailover;
ConfigBOOL security_capability;
SrcMounts srcmounts;
StatFS statfs;

View File

@ -24,12 +24,15 @@ class FileInfo : public FH
{
public:
FileInfo(const int fd_,
const char *fusepath_)
const char *fusepath_,
const int flags_)
: FH(fusepath_),
fd(fd_)
fd(fd_),
flags(flags_)
{
}
public:
int fd;
int flags;
};

View File

@ -118,7 +118,7 @@ namespace l
if(rv == -1)
return -errno;
*fh_ = reinterpret_cast<uint64_t>(new FileInfo(rv,fusepath_));
*fh_ = reinterpret_cast<uint64_t>(new FileInfo(rv,fusepath_,flags_));
return 0;
}

View File

@ -17,26 +17,141 @@
#include "config.hpp"
#include "errno.hpp"
#include "fileinfo.hpp"
#include "fs_close.hpp"
#include "fs_fstat.hpp"
#include "fs_inode.hpp"
#include "fs_open.hpp"
#include "fs_path.hpp"
#include "fs_read.hpp"
#include "ugid.hpp"
#include <fuse.h>
namespace l
{
static
bool
failoverable_error(const int error_)
{
switch(error_)
{
case EIO:
case ENOTCONN:
return true;
default:
return false;
}
}
static
bool
failoverable_flags(const int flags_)
{
return !(flags_ & (O_CREAT|O_TRUNC));
}
static
void
update_fi_fd(FileInfo *fi_,
const int fd_)
{
int tmp;
tmp = fi_->fd;
fi_->fd = fd_;
fs::close(tmp);
}
static
int
fgetattr(const int fd_,
const std::string &fusepath_,
struct stat *st_)
failover_fstat_loop(FileInfo *fi_,
const BranchVec &branches_,
struct stat *st_,
const int error_)
{
int fd;
int rv;
char buf;
std::string fullpath;
for(size_t i = 0, ei = branches_.size(); i != ei; i++)
{
fullpath = fs::path::make(branches_[i].path,fi_->fusepath);
fd = fs::open(fullpath,fi_->flags);
if(fd == -1)
continue;
rv = fs::pread(fd,&buf,1,0);
if(rv == -1)
{
fs::close(fd);
continue;
}
rv = fs::fstat(fd,st_);
if(rv == -1)
{
fs::close(fd);
continue;
}
l::update_fi_fd(fi_,fd);
return rv;
}
return (errno=error_,-1);
}
static
int
failover_fstat(const Branches &branches_,
FileInfo *fi_,
struct stat *st_,
const int error_)
{
const fuse_context *fc = fuse_get_context();
const Config &config = Config::ro();
const ugid::Set ugid(fc->uid,fc->gid);
rwlock::ReadGuard guard(config.branches.lock);
return l::failover_fstat_loop(fi_,config.branches.vec,st_,error_);
}
static
int
maybe_failover_fstat(FileInfo *fi_,
struct stat *st_,
const int error_)
{
const Config &config = Config::ro();
if(config.readfailover == false)
return (errno=error_,-1);
if(l::failoverable_flags(fi_->flags) == false)
return (errno=error_,-1);
if(l::failoverable_error(error_) == false)
return (errno=error_,-1);
return l::failover_fstat(config.branches,fi_,st_,error_);
}
static
int
fgetattr(FileInfo *fi_,
struct stat *st_)
{
int rv;
rv = fs::fstat(fd_,st_);
rv = fs::fstat(fi_->fd,st_);
if(rv == -1)
rv = l::maybe_failover_fstat(fi_,st_,errno);
if(rv == -1)
return -errno;
fs::inode::calc(fusepath_,st_);
fs::inode::calc(fi_->fusepath,st_);
return 0;
}
@ -53,7 +168,7 @@ namespace FUSE
const Config &config = Config::ro();
FileInfo *fi = reinterpret_cast<FileInfo*>(ffi_->fh);
rv = l::fgetattr(fi->fd,fi->fusepath,st_);
rv = l::fgetattr(fi,st_);
timeout_->entry = ((rv >= 0) ?
config.cache_entry :

View File

@ -173,7 +173,7 @@ namespace l
if(fd == -1)
return -errno;
*fh_ = reinterpret_cast<uint64_t>(new FileInfo(fd,fusepath_));
*fh_ = reinterpret_cast<uint64_t>(new FileInfo(fd,fusepath_,flags_));
return 0;
}

View File

@ -14,25 +14,101 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "branch.hpp"
#include "config.hpp"
#include "errno.hpp"
#include "fileinfo.hpp"
#include "fs_close.hpp"
#include "fs_open.hpp"
#include "fs_path.hpp"
#include "fs_read.hpp"
#include "ugid.hpp"
#include <fuse.h>
#include <string>
#include <vector>
using std::string;
using std::vector;
namespace l
{
static
bool
can_failover(const int error_,
const int flags_)
{
return (((error_ == EIO) ||
(error_ == ENOTCONN)) &&
!(flags_ & (O_CREAT|O_TRUNC)));
}
static
int
failover_read_loop(FileInfo *fi_,
const BranchVec &branches_,
void *buf_,
const size_t count_,
const off_t offset_,
const int error_)
{
int fd;
int rv;
std::string fullpath;
for(size_t i = 0, ei = branches_.size(); i != ei; i++)
{
fullpath = fs::path::make(branches_[i].path,fi_->fusepath);
fd = fs::open(fullpath,fi_->flags);
if(fd == -1)
continue;
rv = fs::pread(fd,buf_,count_,offset_);
if(rv >= 0)
{
fs::close(fi_->fd);
fi_->fd = fd;
return rv;
}
fs::close(fd);
}
return (errno=error_,-1);
}
static
int
failover_read(FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_,
const int error_)
{
const fuse_context *fc = fuse_get_context();
const Config &config = Config::ro();
const ugid::Set ugid(fc->uid,fc->gid);
rwlock::ReadGuard guard(config.branches.lock);
return l::failover_read_loop(fi_,config.branches.vec,buf_,count_,offset_,error_);
}
static
inline
int
read_regular(const int fd_,
read_regular(FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_)
{
int rv;
rv = fs::pread(fd_,buf_,count_,offset_);
rv = fs::pread(fi_->fd,buf_,count_,offset_);
if((rv == -1) && l::can_failover(errno,fi_->flags))
rv = l::failover_read(fi_,buf_,count_,offset_,errno);
if(rv == -1)
return -errno;
if(rv == 0)
@ -44,14 +120,17 @@ namespace l
static
inline
int
read_direct_io(const int fd_,
read_direct_io(FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_)
{
int rv;
rv = fs::pread(fd_,buf_,count_,offset_);
rv = fs::pread(fi_->fd,buf_,count_,offset_);
if((rv == -1) && l::can_failover(errno,fi_->flags))
rv = l::failover_read(fi_,buf_,count_,offset_,errno);
if(rv == -1)
return -errno;
@ -72,8 +151,8 @@ namespace FUSE
fi = reinterpret_cast<FileInfo*>(ffi_->fh);
if(ffi_->direct_io)
return l::read_direct_io(fi->fd,buf_,count_,offset_);
return l::read_regular(fi->fd,buf_,count_,offset_);
return l::read_direct_io(fi,buf_,count_,offset_);
return l::read_regular(fi,buf_,count_,offset_);
}
int

View File

@ -1,5 +1,5 @@
/*
Copyright (c) 2016, Antonio SJ Musumeci <trapexit@spawn.link>
Copyright (c) 2020, Antonio SJ Musumeci <trapexit@spawn.link>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
@ -14,36 +14,198 @@
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "branch.hpp"
#include "config.hpp"
#include "errno.hpp"
#include "fileinfo.hpp"
#include "fs_close.hpp"
#include "fs_open.hpp"
#include "fs_path.hpp"
#include "fs_read.hpp"
#include "ugid.hpp"
#include <fuse.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
typedef struct fuse_bufvec fuse_bufvec;
namespace l
{
static
int
read_buf(const int fd_,
fuse_bufvec **bufp_,
const size_t size_,
const off_t offset_)
bool
failoverable_error(const int error_)
{
switch(error_)
{
case EIO:
case ENOTCONN:
return true;
default:
return false;
}
}
static
bool
failoverable_flags(const int flags_)
{
return !(flags_ & (O_CREAT|O_TRUNC));
}
static
void
update_fi_fd(FileInfo *fi_,
const int fd_)
{
int tmp;
tmp = fi_->fd;
fi_->fd = fd_;
fs::close(tmp);
}
static
int
failover_read_loop(const BranchVec &branches_,
FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_,
const int error_)
{
int fd;
int rv;
std::string fullpath;
for(size_t i = 0, ei = branches_.size(); i != ei; i++)
{
fullpath = fs::path::make(branches_[i].path,fi_->fusepath);
fd = fs::open(fullpath,fi_->flags);
if(fd == -1)
continue;
rv = fs::pread(fd,buf_,count_,offset_);
if(rv == -1)
{
fs::close(fd);
continue;
}
l::update_fi_fd(fi_,fd);
return rv;
}
return (errno=error_,-1);
}
static
int
failover_read(const Branches &branches_,
FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_,
const int error_)
{
const fuse_context *fc = fuse_get_context();
const ugid::Set ugid(fc->uid,fc->gid);
rwlock::ReadGuard rwlock_guard(branches_.lock);
return l::failover_read_loop(branches_.vec,fi_,buf_,count_,offset_,error_);
}
static
int
maybe_failover_read(FileInfo *fi_,
void *buf_,
const size_t count_,
const off_t offset_,
const int error_)
{
const Config &config = Config::ro();
if(config.readfailover == false)
return (errno=error_,-1);
if(l::failoverable_flags(fi_->flags) == false)
return (errno=error_,-1);
if(l::failoverable_error(error_) == false)
return (errno=error_,-1);
return l::failover_read(config.branches,fi_,buf_,count_,offset_,error_);
}
static
int
read_buf_null(fuse_bufvec **bufp_,
const size_t count_,
const off_t offset_)
{
void *buf;
fuse_bufvec *src;
buf = (void*)calloc(count_,1);
if(buf == NULL)
return -ENOMEM;
src = (fuse_bufvec*)malloc(sizeof(fuse_bufvec));
if(src == NULL)
{
free(buf);
return -ENOMEM;
}
*src = FUSE_BUFVEC_INIT(count_);
src->buf->mem = buf;
src->buf->size = count_;
*bufp_ = src;
return 0;
}
static
int
read_buf(FileInfo *fi_,
fuse_bufvec **bufp_,
const size_t count_,
const off_t offset_)
{
int rv;
void *buf;
fuse_bufvec *src;
buf = (void*)malloc(count_);
if(buf == NULL)
return -ENOMEM;
*src = FUSE_BUFVEC_INIT(size_);
src = (fuse_bufvec*)malloc(sizeof(fuse_bufvec));
if(src == NULL)
{
free(buf);
return -ENOMEM;
}
src->buf->flags = (fuse_buf_flags)(FUSE_BUF_IS_FD|FUSE_BUF_FD_SEEK|FUSE_BUF_FD_RETRY);
src->buf->fd = fd_;
src->buf->pos = offset_;
*src = FUSE_BUFVEC_INIT(count_);
rv = fs::pread(fi_->fd,buf,count_,offset_);
if(rv == -1)
rv = l::maybe_failover_read(fi_,buf,count_,offset_,errno);
if(rv == -1)
{
free(buf);
free(src);
return -errno;
}
src->buf->mem = buf;
src->buf->size = rv;
*bufp_ = src;
@ -53,17 +215,28 @@ namespace l
namespace FUSE
{
int
read_buf_null(fuse_bufvec **bufp_,
size_t count_,
off_t offset_,
fuse_file_info *ffi_)
{
return l::read_buf_null(bufp_,
count_,
offset_);
}
int
read_buf(fuse_bufvec **bufp_,
size_t size_,
size_t count_,
off_t offset_,
fuse_file_info *ffi_)
{
FileInfo *fi = reinterpret_cast<FileInfo*>(ffi_->fh);
return l::read_buf(fi->fd,
bufp_,
size_,
offset_);
return l::read_buf(fi,
bufp_,
count_,
offset_);
}
}

View File

@ -27,4 +27,10 @@ namespace FUSE
size_t size,
off_t offset,
fuse_file_info *ffi);
int
read_buf_null(struct fuse_bufvec **buf,
size_t size,
off_t offset,
fuse_file_info *ffi);
}

View File

@ -111,8 +111,7 @@ namespace l
ops_.opendir = FUSE::opendir;
ops_.poll = NULL;
ops_.prepare_hide = FUSE::prepare_hide;
ops_.read = (nullrw_ ? FUSE::read_null : FUSE::read);
ops_.read_buf = (nullrw_ ? NULL : FUSE::read_buf);
ops_.read_buf = (nullrw_ ? FUSE::read_buf_null : FUSE::read_buf);
ops_.readdir = FUSE::readdir;
ops_.readdir_plus = FUSE::readdir_plus;
ops_.readlink = FUSE::readlink;

42
src/mutex.hpp Normal file
View File

@ -0,0 +1,42 @@
/*
ISC License
Copyright (c) 2020, Antonio SJ Musumeci <trapexit@spawn.link>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#pragma once
#include <pthread.h>
namespace mutex
{
class Guard
{
public:
Guard(pthread_mutex_t &lock_)
: _lock(lock_)
{
pthread_mutex_lock(&_lock);
}
~Guard()
{
pthread_mutex_unlock(&_lock);
}
private:
pthread_mutex_t &_lock;
};
}