mirror of
https://github.com/trapexit/mergerfs.git
synced 2025-02-02 08:07:03 +08:00
Merge pull request #798 from trapexit/inodecalc-32bit
inodecalc: add 32bit versions of hashs
This commit is contained in:
commit
58a8f8326d
12
README.md
12
README.md
|
@ -1,6 +1,6 @@
|
|||
% mergerfs(1) mergerfs user manual
|
||||
% Antonio SJ Musumeci <trapexit@spawn.link>
|
||||
% 2020-08-02
|
||||
% 2020-08-03
|
||||
|
||||
# NAME
|
||||
|
||||
|
@ -183,8 +183,13 @@ mergerfs offers multiple ways to calculate the inode in hopes of covering differ
|
|||
|
||||
* passthrough: Passes through the underlying inode value. Mostly intended for testing as using this does not address any of the problems mentioned above and could confuse file deduplication software as inodes from different filesystems can be the same.
|
||||
* path-hash: Hashes the relative path of the entry in question. The underlying file's values are completely ignored. This means the inode value will always be the same for that file path. This is useful when using NFS and you make changes out of band such as copy data between branches. This also means that entries that do point to the same file will not be recognizable via inodes. That **does not** mean hard links don't work. They will.
|
||||
* path-hash32: 32bit version of path-hash.
|
||||
* devino-hash: Hashes the device id and inode of the underlying entry. This won't prevent issues with NFS should the policy pick a different file or files move out of band but will present the same inode for underlying files that do too.
|
||||
* devino-hash32: 32bit version of devino-hash.
|
||||
* hybrid-hash: Performs `path-hash` on directories and `devino-hash` on other file types. Since directories can't have hard links the static value won't make a difference and the files will get values useful for finding duplicates. Probably the best to use if not using NFS. As such it is the default.
|
||||
* hybrid-hash32: 32bit version of hybrid-hash.
|
||||
|
||||
32bit versions are provided as there is some software which does not handle 64bit inodes well.
|
||||
|
||||
While there is a risk of hash collision in tests of a couple million entries there were zero collisions. Unlike a typical filesystem FUSE filesystems can reuse inodes and not refer to the same entry. The internal identifier used to reference a file in FUSE is different from the inode value presented. The former is the `nodeid` and is actually a tuple of 2 64bit values: `nodeid` and `generation`. This tuple is not client facing. The inode that is presented to the client is passed through the kernel uninterpreted.
|
||||
|
||||
|
@ -894,6 +899,11 @@ The problem is that many applications do not properly handle `EXDEV` errors whic
|
|||
Ideally the offending software would be fixed and it is recommended that if you run into this problem you contact the software's author and request proper handling of `EXDEV` errors.
|
||||
|
||||
|
||||
#### my 32bit software has problems
|
||||
|
||||
Some software have problems with 64bit inode values. The symptoms can include EOVERFLOW errors when trying to list files. You can address this by setting `inodecalc` to one of the 32bit based algos as described in the relevant section.
|
||||
|
||||
|
||||
#### Samba: Moving files / directories fails
|
||||
|
||||
Workaround: Copy the file/directory and then remove the original rather than move.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
.\"t
|
||||
.\" Automatically generated by Pandoc 1.19.2.4
|
||||
.\"
|
||||
.TH "mergerfs" "1" "2020\-08\-02" "mergerfs user manual" ""
|
||||
.TH "mergerfs" "1" "2020\-08\-03" "mergerfs user manual" ""
|
||||
.hy
|
||||
.SH NAME
|
||||
.PP
|
||||
|
@ -426,11 +426,15 @@ recognizable via inodes.
|
|||
That \f[B]does not\f[] mean hard links don\[aq]t work.
|
||||
They will.
|
||||
.IP \[bu] 2
|
||||
path\-hash32: 32bit version of path\-hash.
|
||||
.IP \[bu] 2
|
||||
devino\-hash: Hashes the device id and inode of the underlying entry.
|
||||
This won\[aq]t prevent issues with NFS should the policy pick a
|
||||
different file or files move out of band but will present the same inode
|
||||
for underlying files that do too.
|
||||
.IP \[bu] 2
|
||||
devino\-hash32: 32bit version of devino\-hash.
|
||||
.IP \[bu] 2
|
||||
hybrid\-hash: Performs \f[C]path\-hash\f[] on directories and
|
||||
\f[C]devino\-hash\f[] on other file types.
|
||||
Since directories can\[aq]t have hard links the static value won\[aq]t
|
||||
|
@ -438,6 +442,11 @@ make a difference and the files will get values useful for finding
|
|||
duplicates.
|
||||
Probably the best to use if not using NFS.
|
||||
As such it is the default.
|
||||
.IP \[bu] 2
|
||||
hybrid\-hash32: 32bit version of hybrid\-hash.
|
||||
.PP
|
||||
32bit versions are provided as there is some software which does not
|
||||
handle 64bit inodes well.
|
||||
.PP
|
||||
While there is a risk of hash collision in tests of a couple million
|
||||
entries there were zero collisions.
|
||||
|
@ -1967,6 +1976,12 @@ For example: \f[C]\-o\ category.create=mfs\f[]
|
|||
Ideally the offending software would be fixed and it is recommended that
|
||||
if you run into this problem you contact the software\[aq]s author and
|
||||
request proper handling of \f[C]EXDEV\f[] errors.
|
||||
.SS my 32bit software has problems
|
||||
.PP
|
||||
Some software have problems with 64bit inode values.
|
||||
The symptoms can include EOVERFLOW errors when trying to list files.
|
||||
You can address this by setting \f[C]inodecalc\f[] to one of the 32bit
|
||||
based algos as described in the relevant section.
|
||||
.SS Samba: Moving files / directories fails
|
||||
.PP
|
||||
Workaround: Copy the file/directory and then remove the original rather
|
||||
|
|
|
@ -34,6 +34,14 @@ static uint64_t hybrid_hash(const char*,const uint64_t,const mode_t,const dev_t,
|
|||
|
||||
static inodefunc_t g_func = hybrid_hash;
|
||||
|
||||
|
||||
static
|
||||
uint32_t
|
||||
h64_to_h32(uint64_t h_)
|
||||
{
|
||||
return (h_ - (h_ >> 32));
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
passthrough(const char *fusepath_,
|
||||
|
@ -58,6 +66,25 @@ path_hash(const char *fusepath_,
|
|||
fs::inode::MAGIC);
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
path_hash32(const char *fusepath_,
|
||||
const uint64_t fusepath_len_,
|
||||
const mode_t mode_,
|
||||
const dev_t dev_,
|
||||
const ino_t ino_)
|
||||
{
|
||||
uint64_t h;
|
||||
|
||||
h = path_hash(fusepath_,
|
||||
fusepath_len_,
|
||||
mode_,
|
||||
dev_,
|
||||
ino_);
|
||||
|
||||
return h64_to_h32(h);
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
devino_hash(const char *fusepath_,
|
||||
|
@ -76,6 +103,25 @@ devino_hash(const char *fusepath_,
|
|||
fs::inode::MAGIC);
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
devino_hash32(const char *fusepath_,
|
||||
const uint64_t fusepath_len_,
|
||||
const mode_t mode_,
|
||||
const dev_t dev_,
|
||||
const ino_t ino_)
|
||||
{
|
||||
uint64_t h;
|
||||
|
||||
h = devino_hash(fusepath_,
|
||||
fusepath_len_,
|
||||
mode_,
|
||||
dev_,
|
||||
ino_);
|
||||
|
||||
return h64_to_h32(h);
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
hybrid_hash(const char *fusepath_,
|
||||
|
@ -89,6 +135,18 @@ hybrid_hash(const char *fusepath_,
|
|||
devino_hash(fusepath_,fusepath_len_,mode_,dev_,ino_));
|
||||
}
|
||||
|
||||
static
|
||||
uint64_t
|
||||
hybrid_hash32(const char *fusepath_,
|
||||
const uint64_t fusepath_len_,
|
||||
const mode_t mode_,
|
||||
const dev_t dev_,
|
||||
const ino_t ino_)
|
||||
{
|
||||
return (S_ISDIR(mode_) ?
|
||||
path_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_) :
|
||||
devino_hash32(fusepath_,fusepath_len_,mode_,dev_,ino_));
|
||||
}
|
||||
|
||||
namespace fs
|
||||
{
|
||||
|
@ -101,10 +159,16 @@ namespace fs
|
|||
g_func = passthrough;
|
||||
ef(algo_ == "path-hash")
|
||||
g_func = path_hash;
|
||||
ef(algo_ == "path-hash32")
|
||||
g_func = path_hash32;
|
||||
ef(algo_ == "devino-hash")
|
||||
g_func = devino_hash;
|
||||
ef(algo_ == "devino-hash32")
|
||||
g_func = devino_hash32;
|
||||
ef(algo_ == "hybrid-hash")
|
||||
g_func = hybrid_hash;
|
||||
ef(algo_ == "hybrid-hash32")
|
||||
g_func = hybrid_hash32;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user