Add support for 'direct-io-allow-mmap' if supported by kernel

This commit is contained in:
Antonio SJ Musumeci 2024-03-17 00:52:26 -05:00
parent 977d04229f
commit f0444a1ca9
8 changed files with 152 additions and 114 deletions

View File

@ -91,6 +91,11 @@ start with one of the following option sets.
`cache.files=auto-full,dropcacheonclose=true,category.create=mfs`
or if you are on a Linux kernel >= 6.6.x mergerfs will enable a mode
that allows shared mmap when `cache.files=off`. To be sure of the best
performance between `cache.files=off` and `cache.files=auto-full`
you'll need to do your own benchmarking but often `off` is faster.
#### You don't need `mmap`
`cache.files=off,dropcacheonclose=true,category.create=mfs`
@ -162,6 +167,11 @@ These options are the same regardless of whether you use them with the
longer need the data and it can drop its cache. Recommended when
**cache.files=partial|full|auto-full|per-process** to limit double
caching. (default: false)
* **direct-io-allow-mmap=BOOL**: On newer kernels (>= 6.6) it is
possible to disable file page caching while still allowing for
shared mmap support. mergerfs will enable this feature if available
but an option is provided to turn it off for testing and debugging
purposes. (default: true)
* **symlinkify=BOOL**: When enabled and a file is not writable and its
mtime or ctime is older than **symlinkify_timeout** files will be
reported as symlinks to the original files. Please read more below

View File

@ -108,23 +108,26 @@ struct fuse_file_info_t
* FUSE_CAP_IOCTL_DIR: ioctl support on directories
* FUSE_CAP_CACHE_SYMLINKS: cache READLINK responses
*/
#define FUSE_CAP_ASYNC_READ (1 << 0)
#define FUSE_CAP_POSIX_LOCKS (1 << 1)
#define FUSE_CAP_ATOMIC_O_TRUNC (1 << 3)
#define FUSE_CAP_EXPORT_SUPPORT (1 << 4)
#define FUSE_CAP_BIG_WRITES (1 << 5)
#define FUSE_CAP_DONT_MASK (1 << 6)
#define FUSE_CAP_FLOCK_LOCKS (1 << 10)
#define FUSE_CAP_IOCTL_DIR (1 << 11)
#define FUSE_CAP_READDIR_PLUS (1 << 13)
#define FUSE_CAP_READDIR_PLUS_AUTO (1 << 14)
#define FUSE_CAP_ASYNC_DIO (1 << 15)
#define FUSE_CAP_WRITEBACK_CACHE (1 << 16)
#define FUSE_CAP_PARALLEL_DIROPS (1 << 18)
#define FUSE_CAP_POSIX_ACL (1 << 19)
#define FUSE_CAP_CACHE_SYMLINKS (1 << 20)
#define FUSE_CAP_MAX_PAGES (1 << 21)
#define FUSE_CAP_SETXATTR_EXT (1 << 22)
#define FUSE_CAP_ASYNC_READ (1ULL << 0)
#define FUSE_CAP_POSIX_LOCKS (1ULL << 1)
#define FUSE_CAP_ATOMIC_O_TRUNC (1ULL << 3)
#define FUSE_CAP_EXPORT_SUPPORT (1ULL << 4)
#define FUSE_CAP_BIG_WRITES (1ULL << 5)
#define FUSE_CAP_DONT_MASK (1ULL << 6)
#define FUSE_CAP_FLOCK_LOCKS (1ULL << 10)
#define FUSE_CAP_IOCTL_DIR (1ULL << 11)
#define FUSE_CAP_READDIR_PLUS (1ULL << 13)
#define FUSE_CAP_READDIR_PLUS_AUTO (1ULL << 14)
#define FUSE_CAP_ASYNC_DIO (1ULL << 15)
#define FUSE_CAP_WRITEBACK_CACHE (1ULL << 16)
#define FUSE_CAP_PARALLEL_DIROPS (1ULL << 18)
#define FUSE_CAP_POSIX_ACL (1ULL << 19)
#define FUSE_CAP_CACHE_SYMLINKS (1ULL << 20)
#define FUSE_CAP_MAX_PAGES (1ULL << 21)
#define FUSE_CAP_SETXATTR_EXT (1ULL << 22)
#define FUSE_CAP_DIRECT_IO_ALLOW_MMAP (1ULL << 23)
#define FUSE_CAP_CREATE_SUPP_GROUP (1ULL << 24)
/**
* Ioctl flags
@ -150,56 +153,17 @@ struct fuse_file_info_t
* indicate the value requested by the filesystem. The requested
* value must usually be smaller than the indicated value.
*/
struct fuse_conn_info {
/**
* Major version of the protocol (read-only)
*/
struct fuse_conn_info
{
unsigned proto_major;
/**
* Minor version of the protocol (read-only)
*/
unsigned proto_minor;
/**
* Maximum size of the write buffer
*/
unsigned max_write;
/**
* Maximum readahead
*/
unsigned max_readahead;
/**
* Capability flags, that the kernel supports
*/
unsigned capable;
/**
* Capability flags, that the filesystem wants to enable
*/
unsigned want;
/**
* Maximum number of backgrounded requests
*/
uint64_t capable;
uint64_t want;
unsigned max_background;
/**
* Kernel congestion threshold parameter
*/
unsigned congestion_threshold;
/**
* Max pages
*/
uint16_t max_pages;
/**
* For future use.
*/
unsigned reserved[22];
};
struct fuse_session;

View File

@ -158,9 +158,9 @@ open_flag_to_str(const uint64_t offset_)
static
const
char*
fuse_flag_to_str(const uint32_t offset_)
fuse_flag_to_str(const uint64_t offset_)
{
switch(1 << offset_)
switch(1ULL << offset_)
{
FUSE_INIT_FLAG_CASE(ASYNC_READ);
FUSE_INIT_FLAG_CASE(POSIX_LOCKS);
@ -189,6 +189,16 @@ fuse_flag_to_str(const uint32_t offset_)
FUSE_INIT_FLAG_CASE(NO_OPENDIR_SUPPORT);
FUSE_INIT_FLAG_CASE(EXPLICIT_INVAL_DATA);
FUSE_INIT_FLAG_CASE(MAP_ALIGNMENT);
FUSE_INIT_FLAG_CASE(SUBMOUNTS);
FUSE_INIT_FLAG_CASE(HANDLE_KILLPRIV_V2);
FUSE_INIT_FLAG_CASE(SETXATTR_EXT);
FUSE_INIT_FLAG_CASE(INIT_EXT);
FUSE_INIT_FLAG_CASE(INIT_RESERVED);
FUSE_INIT_FLAG_CASE(SECURITY_CTX);
FUSE_INIT_FLAG_CASE(HAS_INODE_DAX);
FUSE_INIT_FLAG_CASE(CREATE_SUPP_GROUP);
FUSE_INIT_FLAG_CASE(HAS_EXPIRE_ONLY);
FUSE_INIT_FLAG_CASE(DIRECT_IO_ALLOW_MMAP);
}
return NULL;
@ -200,7 +210,7 @@ static
void
debug_open_flags(const uint32_t flags_)
{
fprintf(stderr,"%s,",open_accmode_to_str(flags_));
fprintf(stderr,"%s, ",open_accmode_to_str(flags_));
for(int i = 0; i < (sizeof(flags_) * 8); i++)
{
const char *str;
@ -212,7 +222,7 @@ debug_open_flags(const uint32_t flags_)
if(str == NULL)
continue;
fprintf(stderr,"%s,",str);
fprintf(stderr,"%s, ",str);
}
}
@ -717,28 +727,31 @@ debug_fuse_fallocate_in(const void *arg_)
void
debug_fuse_init_in(const struct fuse_init_in *arg_)
{
uint64_t flags;
flags = (((uint64_t)arg_->flags) | ((uint64_t)arg_->flags2) << 32);
fprintf(g_OUTPUT,
"FUSE_INIT_IN: "
" major=%u;"
" minor=%u;"
" max_readahead=%u;"
" flags=0x%08X (",
" flags=0x%016lx (",
arg_->major,
arg_->minor,
arg_->max_readahead,
arg_->flags);
for(uint64_t i = 0; i < (sizeof(arg_->flags)*8); i++)
flags);
for(uint64_t i = 0; i < (sizeof(flags)*8); i++)
{
const char *str;
if(!(arg_->flags & (1ULL << i)))
if(!(flags & (1ULL << i)))
continue;
str = fuse_flag_to_str(i);
if(str == NULL)
continue;
fprintf(g_OUTPUT,"%s,",str);
fprintf(g_OUTPUT,"%s, ",str);
}
fprintf(g_OUTPUT,")\n");
}
@ -748,7 +761,10 @@ debug_fuse_init_out(const uint64_t unique_,
const struct fuse_init_out *arg_,
const uint64_t argsize_)
{
uint64_t flags;
const struct fuse_init_out *arg = arg_;
flags = (((uint64_t)arg->flags) | ((uint64_t)arg->flags2) << 32);
fprintf(g_OUTPUT,
/* "unique=0x%016"PRIx64";" */
/* " opcode=RESPONSE;" */
@ -758,27 +774,27 @@ debug_fuse_init_out(const uint64_t unique_,
" major=%u;"
" minor=%u;"
" max_readahead=%u;"
" flags=0x%08X ("
" flags=0x%016lx ("
,
/* unique_, */
/* sizeof(struct fuse_out_header) + argsize_, */
arg->major,
arg->minor,
arg->max_readahead,
arg->flags);
flags);
for(uint64_t i = 0; i < (sizeof(arg->flags)*8); i++)
for(uint64_t i = 0; i < (sizeof(flags)*8); i++)
{
const char *str;
if(!(arg->flags & (1ULL << i)))
if(!(flags & (1ULL << i)))
continue;
str = fuse_flag_to_str(i);
if(str == NULL)
continue;
fprintf(g_OUTPUT,"%s,",str);
fprintf(g_OUTPUT,"%s, ",str);
}
fprintf(g_OUTPUT,

View File

@ -1097,6 +1097,11 @@ do_init(fuse_req_t req,
struct fuse_init_in *arg = (struct fuse_init_in *) &hdr_[1];
struct fuse_ll *f = req->f;
size_t bufsize = fuse_chan_bufsize(req->ch);
uint64_t inargflags;
uint64_t outargflags;
inargflags = 0;
outargflags = 0;
if(f->debug)
debug_fuse_init_in(arg);
@ -1127,40 +1132,49 @@ do_init(fuse_req_t req,
if(arg->minor >= 6)
{
inargflags = arg->flags;
if(inargflags & FUSE_INIT_EXT)
inargflags |= (((uint64_t)arg->flags2) << 32);
if(arg->max_readahead < f->conn.max_readahead)
f->conn.max_readahead = arg->max_readahead;
if(arg->flags & FUSE_ASYNC_READ)
if(inargflags & FUSE_ASYNC_READ)
f->conn.capable |= FUSE_CAP_ASYNC_READ;
if(arg->flags & FUSE_POSIX_LOCKS)
if(inargflags & FUSE_POSIX_LOCKS)
f->conn.capable |= FUSE_CAP_POSIX_LOCKS;
if(arg->flags & FUSE_ATOMIC_O_TRUNC)
if(inargflags & FUSE_ATOMIC_O_TRUNC)
f->conn.capable |= FUSE_CAP_ATOMIC_O_TRUNC;
if(arg->flags & FUSE_EXPORT_SUPPORT)
if(inargflags & FUSE_EXPORT_SUPPORT)
f->conn.capable |= FUSE_CAP_EXPORT_SUPPORT;
if(arg->flags & FUSE_BIG_WRITES)
if(inargflags & FUSE_BIG_WRITES)
f->conn.capable |= FUSE_CAP_BIG_WRITES;
if(arg->flags & FUSE_DONT_MASK)
if(inargflags & FUSE_DONT_MASK)
f->conn.capable |= FUSE_CAP_DONT_MASK;
if(arg->flags & FUSE_FLOCK_LOCKS)
if(inargflags & FUSE_FLOCK_LOCKS)
f->conn.capable |= FUSE_CAP_FLOCK_LOCKS;
if(arg->flags & FUSE_POSIX_ACL)
if(inargflags & FUSE_POSIX_ACL)
f->conn.capable |= FUSE_CAP_POSIX_ACL;
if(arg->flags & FUSE_CACHE_SYMLINKS)
if(inargflags & FUSE_CACHE_SYMLINKS)
f->conn.capable |= FUSE_CAP_CACHE_SYMLINKS;
if(arg->flags & FUSE_ASYNC_DIO)
if(inargflags & FUSE_ASYNC_DIO)
f->conn.capable |= FUSE_CAP_ASYNC_DIO;
if(arg->flags & FUSE_PARALLEL_DIROPS)
if(inargflags & FUSE_PARALLEL_DIROPS)
f->conn.capable |= FUSE_CAP_PARALLEL_DIROPS;
if(arg->flags & FUSE_MAX_PAGES)
if(inargflags & FUSE_MAX_PAGES)
f->conn.capable |= FUSE_CAP_MAX_PAGES;
if(arg->flags & FUSE_WRITEBACK_CACHE)
if(inargflags & FUSE_WRITEBACK_CACHE)
f->conn.capable |= FUSE_CAP_WRITEBACK_CACHE;
if(arg->flags & FUSE_DO_READDIRPLUS)
if(inargflags & FUSE_DO_READDIRPLUS)
f->conn.capable |= FUSE_CAP_READDIR_PLUS;
if(arg->flags & FUSE_READDIRPLUS_AUTO)
if(inargflags & FUSE_READDIRPLUS_AUTO)
f->conn.capable |= FUSE_CAP_READDIR_PLUS_AUTO;
if(arg->flags & FUSE_SETXATTR_EXT)
if(inargflags & FUSE_SETXATTR_EXT)
f->conn.capable |= FUSE_CAP_SETXATTR_EXT;
if(inargflags & FUSE_DIRECT_IO_ALLOW_MMAP)
f->conn.capable |= FUSE_CAP_DIRECT_IO_ALLOW_MMAP;
if(inargflags & FUSE_CREATE_SUPP_GROUP)
f->conn.capable |= FUSE_CAP_CREATE_SUPP_GROUP;
}
else
{
@ -1191,44 +1205,57 @@ do_init(fuse_req_t req,
if(f->op.init)
f->op.init(f->userdata, &f->conn);
if((arg->flags & FUSE_MAX_PAGES) && (f->conn.want & FUSE_CAP_MAX_PAGES))
outargflags = outarg.flags;
if((inargflags & FUSE_MAX_PAGES) && (f->conn.want & FUSE_CAP_MAX_PAGES))
{
outarg.flags |= FUSE_MAX_PAGES;
outargflags |= FUSE_MAX_PAGES;
outarg.max_pages = f->conn.max_pages;
msgbuf_set_bufsize(outarg.max_pages + 1);
}
if(f->conn.want & FUSE_CAP_ASYNC_READ)
outarg.flags |= FUSE_ASYNC_READ;
outargflags |= FUSE_ASYNC_READ;
if(f->conn.want & FUSE_CAP_POSIX_LOCKS)
outarg.flags |= FUSE_POSIX_LOCKS;
outargflags |= FUSE_POSIX_LOCKS;
if(f->conn.want & FUSE_CAP_ATOMIC_O_TRUNC)
outarg.flags |= FUSE_ATOMIC_O_TRUNC;
outargflags |= FUSE_ATOMIC_O_TRUNC;
if(f->conn.want & FUSE_CAP_EXPORT_SUPPORT)
outarg.flags |= FUSE_EXPORT_SUPPORT;
outargflags |= FUSE_EXPORT_SUPPORT;
if(f->conn.want & FUSE_CAP_BIG_WRITES)
outarg.flags |= FUSE_BIG_WRITES;
outargflags |= FUSE_BIG_WRITES;
if(f->conn.want & FUSE_CAP_DONT_MASK)
outarg.flags |= FUSE_DONT_MASK;
outargflags |= FUSE_DONT_MASK;
if(f->conn.want & FUSE_CAP_FLOCK_LOCKS)
outarg.flags |= FUSE_FLOCK_LOCKS;
outargflags |= FUSE_FLOCK_LOCKS;
if(f->conn.want & FUSE_CAP_POSIX_ACL)
outarg.flags |= FUSE_POSIX_ACL;
outargflags |= FUSE_POSIX_ACL;
if(f->conn.want & FUSE_CAP_CACHE_SYMLINKS)
outarg.flags |= FUSE_CACHE_SYMLINKS;
outargflags |= FUSE_CACHE_SYMLINKS;
if(f->conn.want & FUSE_CAP_ASYNC_DIO)
outarg.flags |= FUSE_ASYNC_DIO;
outargflags |= FUSE_ASYNC_DIO;
if(f->conn.want & FUSE_CAP_PARALLEL_DIROPS)
outarg.flags |= FUSE_PARALLEL_DIROPS;
outargflags |= FUSE_PARALLEL_DIROPS;
if(f->conn.want & FUSE_CAP_WRITEBACK_CACHE)
outarg.flags |= FUSE_WRITEBACK_CACHE;
outargflags |= FUSE_WRITEBACK_CACHE;
if(f->conn.want & FUSE_CAP_READDIR_PLUS)
outarg.flags |= FUSE_DO_READDIRPLUS;
outargflags |= FUSE_DO_READDIRPLUS;
if(f->conn.want & FUSE_CAP_READDIR_PLUS_AUTO)
outarg.flags |= FUSE_READDIRPLUS_AUTO;
outargflags |= FUSE_READDIRPLUS_AUTO;
if(f->conn.want & FUSE_CAP_SETXATTR_EXT)
outarg.flags |= FUSE_SETXATTR_EXT;
outargflags |= FUSE_SETXATTR_EXT;
if(f->conn.want & FUSE_CAP_CREATE_SUPP_GROUP)
outargflags |= FUSE_CREATE_SUPP_GROUP;
if(f->conn.want & FUSE_CAP_DIRECT_IO_ALLOW_MMAP)
outargflags |= FUSE_DIRECT_IO_ALLOW_MMAP;
if(inargflags & FUSE_INIT_EXT)
{
outargflags |= FUSE_INIT_EXT;
outarg.flags2 = (outargflags >> 32);
}
outarg.flags = outargflags;
outarg.max_readahead = f->conn.max_readahead;
outarg.max_write = f->conn.max_write;
@ -1247,6 +1274,9 @@ do_init(fuse_req_t req,
outarg.congestion_threshold = f->conn.congestion_threshold;
}
if(f->conn.proto_minor >= 23)
outarg.time_gran = 1;
size_t outargsize;
if(arg->minor < 5)
outargsize = FUSE_COMPAT_INIT_OUT_SIZE;

View File

@ -106,16 +106,22 @@ If you don\[cq]t already know that you have a special use case then just
start with one of the following option sets.
.SS You need \f[C]mmap\f[R] (used by rtorrent and many sqlite3 base software)
.PP
\f[C]cache.files=partial,dropcacheonclose=true,category.create=mfs\f[R]
\f[C]cache.files=auto-full,dropcacheonclose=true,category.create=mfs\f[R]
.PP
or if you are on a Linux kernel >= 6.6.x mergerfs will enable a mode
that allows shared mmap when \f[C]cache.files=off\f[R].
To be sure of the best performance between \f[C]cache.files=off\f[R] and
\f[C]cache.files=auto-full\f[R] you\[cq]ll need to do your own
benchmarking but often \f[C]off\f[R] is faster.
.SS You don\[cq]t need \f[C]mmap\f[R]
.PP
\f[C]cache.files=off,dropcacheonclose=true,category.create=mfs\f[R]
.SS Command Line
.PP
\f[C]mergerfs -o cache.files=partial,dropcacheonclose=true,category.create=mfs /mnt/hdd0:/mnt/hdd1 /media\f[R]
\f[C]mergerfs -o cache.files=auto-full,dropcacheonclose=true,category.create=mfs /mnt/hdd0:/mnt/hdd1 /media\f[R]
.SS /etc/fstab
.PP
\f[C]/mnt/hdd0:/mnt/hdd1 /media mergerfs cache.files=partial,dropcacheonclose=true,category.create=mfs 0 0\f[R]
\f[C]/mnt/hdd0:/mnt/hdd1 /media mergerfs cache.files=auto-full,dropcacheonclose=true,category.create=mfs 0 0\f[R]
.SS systemd mount
.PP
https://github.com/trapexit/mergerfs/wiki/systemd
@ -130,7 +136,7 @@ Type=simple
KillMode=none
ExecStart=/usr/bin/mergerfs \[rs]
-f \[rs]
-o cache.files=partial \[rs]
-o cache.files=auto-full \[rs]
-o dropcacheonclose=true \[rs]
-o category.create=mfs \[rs]
/mnt/hdd0:/mnt/hdd1 \[rs]
@ -185,6 +191,13 @@ Recommended when
caching.
(default: false)
.IP \[bu] 2
\f[B]direct-io-allow-mmap=BOOL\f[R]: On newer kernels (>= 6.6) it is
possible to disable file page caching while still allowing for shared
mmap support.
mergerfs will enable this feature if available but an option is provided
to turn it off for testing and debugging purposes.
(default: true)
.IP \[bu] 2
\f[B]symlinkify=BOOL\f[R]: When enabled and a file is not writable and
its mtime or ctime is older than \f[B]symlinkify_timeout\f[R] files will
be reported as symlinks to the original files.
@ -2396,8 +2409,8 @@ Take a look at the section on NFS in the #remote-filesystems for more
details.
.SS rtorrent fails with ENODEV (No such device)
.PP
Be sure to set \f[C]cache.files=partial|full|auto-full|per-processe\f[R]
or turn off \f[C]direct_io\f[R].
Be sure to set
\f[C]cache.files=partial|full|auto-full|per-processe\f[R].
rtorrent and some other applications use
mmap (http://linux.die.net/man/2/mmap) to read and write to files and
offer no fallback to traditional methods.

View File

@ -58,6 +58,7 @@ namespace l
IFERT("branches-mount-timeout");
IFERT("cache.symlinks");
IFERT("cache.writeback");
IFERT("direct-io-allow-mmap");
IFERT("export-support");
IFERT("fsname");
IFERT("fuse_msg_size");
@ -94,6 +95,7 @@ Config::Config()
cache_symlinks(false),
category(func),
direct_io(false),
direct_io_allow_mmap(true),
dropcacheonclose(false),
export_support(true),
flushonclose(FlushOnClose::ENUM::OPENED_FOR_WRITE),
@ -150,6 +152,7 @@ Config::Config()
_map["category.create"] = &category.create;
_map["category.search"] = &category.search;
_map["direct_io"] = &direct_io;
_map["direct-io-allow-mmap"] = &direct_io_allow_mmap;
_map["dropcacheonclose"] = &dropcacheonclose;
_map["export-support"] = &export_support;
_map["flush-on-close"] = &flushonclose;

View File

@ -117,6 +117,7 @@ public:
ConfigBOOL cache_symlinks;
Categories category;
ConfigBOOL direct_io;
ConfigBOOL direct_io_allow_mmap;
ConfigBOOL dropcacheonclose;
ConfigBOOL export_support;
FlushOnClose flushonclose;

View File

@ -144,14 +144,15 @@ namespace FUSE
l::want_if_capable(conn_,FUSE_CAP_ATOMIC_O_TRUNC);
l::want_if_capable(conn_,FUSE_CAP_BIG_WRITES);
l::want_if_capable(conn_,FUSE_CAP_CACHE_SYMLINKS,&cfg->cache_symlinks);
l::want_if_capable(conn_,FUSE_CAP_DIRECT_IO_ALLOW_MMAP,&cfg->direct_io_allow_mmap);
l::want_if_capable(conn_,FUSE_CAP_DONT_MASK);
l::want_if_capable(conn_,FUSE_CAP_EXPORT_SUPPORT,&cfg->export_support);
l::want_if_capable(conn_,FUSE_CAP_IOCTL_DIR);
l::want_if_capable(conn_,FUSE_CAP_PARALLEL_DIROPS);
l::want_if_capable(conn_,FUSE_CAP_POSIX_ACL,&cfg->posix_acl);
l::want_if_capable(conn_,FUSE_CAP_READDIR_PLUS,&cfg->readdirplus);
// l::want_if_capable(conn_,FUSE_CAP_READDIR_PLUS_AUTO);
l::want_if_capable(conn_,FUSE_CAP_WRITEBACK_CACHE,&cfg->writeback_cache);
// l::want_if_capable(conn_,FUSE_CAP_READDIR_PLUS_AUTO);
l::want_if_capable_max_pages(conn_,cfg);
conn_->want &= ~FUSE_CAP_POSIX_LOCKS;
conn_->want &= ~FUSE_CAP_FLOCK_LOCKS;