fs: allow setting a write buffer for multithread

when multi-thread downloading is enabled, rclone used
to send a write to disk after every read, resulting in a lot
of small writes to different locations of the file.

depending on the underlying filesystem or device, it can be more
efficient to send bigger writes.
This commit is contained in:
Paulo Schreiner 2023-06-02 14:00:06 +02:00 committed by Nick Craig-Wood
parent 5f938fb9ed
commit fcb912a664
4 changed files with 160 additions and 100 deletions

View File

@ -1511,6 +1511,25 @@ if you are reading and writing to an OS X filing system this will be
This command line flag allows you to override that computed default.
### --multi-thread-write-buffer-size=SIZE ###
When downloading with multiple threads, rclone will buffer SIZE bytes in
memory before writing to disk for each thread.
This can improve performance if the underlying filesystem does not deal
well with a lot of small writes in different positions of the file, so
if you see downloads being limited by disk write speed, you might want
to experiment with different values. Specially for magnetic drives and
remote file systems a higher value can be useful.
Nevertheless, the default of `128k` should be fine for almost all use
cases, so before changing it ensure that network is not really your
bottleneck.
As a final hint, size is not the only factor: block size (or similar
concept) can have an impact. In one case, we observed that exact
multiples of 16k performed much better than other values.
### --multi-thread-cutoff=SIZE ###
When downloading files to the local backend above this size, rclone

View File

@ -94,6 +94,7 @@ type ConfigInfo struct {
SuffixKeepExtension bool
UseListR bool
BufferSize SizeSuffix
MultiThreadWriteBufferSize SizeSuffix
BwLimit BwTimetable
BwLimitFile BwTimetable
TPSLimit float64
@ -170,6 +171,7 @@ func NewConfig() *ConfigInfo {
c.MaxDepth = -1
c.DataRateUnit = "bytes"
c.BufferSize = SizeSuffix(16 << 20)
c.MultiThreadWriteBufferSize = SizeSuffix(128 * 1024)
c.UserAgent = "rclone/" + Version
c.StreamingUploadCutoff = SizeSuffix(100 * 1024)
c.MaxStatsGroups = 1000

View File

@ -126,6 +126,7 @@ func AddFlags(ci *fs.ConfigInfo, flagSet *pflag.FlagSet) {
flags.StringVarP(flagSet, &ci.ClientKey, "client-key", "", ci.ClientKey, "Client SSL private key (PEM) for mutual TLS auth")
flags.FVarP(flagSet, &ci.MultiThreadCutoff, "multi-thread-cutoff", "", "Use multi-thread downloads for files above this size")
flags.IntVarP(flagSet, &ci.MultiThreadStreams, "multi-thread-streams", "", ci.MultiThreadStreams, "Max number of streams to use for multi-thread downloads")
flags.FVarP(flagSet, &ci.MultiThreadWriteBufferSize, "multi-thread-write-buffer-size", "", "In memory buffer size for writing when in multi-thread mode")
flags.BoolVarP(flagSet, &ci.UseJSONLog, "use-json-log", "", ci.UseJSONLog, "Use json log format")
flags.StringVarP(flagSet, &ci.OrderBy, "order-by", "", ci.OrderBy, "Instructions on how to order the transfers, e.g. 'size,descending'")
flags.StringArrayVarP(flagSet, &uploadHeaders, "header-upload", "", nil, "Set HTTP header for upload transactions")

View File

@ -1,6 +1,7 @@
package operations
import (
"bufio"
"context"
"errors"
"fmt"
@ -14,9 +15,30 @@ import (
const (
multithreadChunkSize = 64 << 10
multithreadChunkSizeMask = multithreadChunkSize - 1
multithreadBufferSize = 32 * 1024
multithreadReadBufferSize = 32 * 1024
)
// An offsetWriter maps writes at offset base to offset base+off in the underlying writer.
//
// Modified from the go source code. Can be replaced with
// io.OffsetWriter when we no longer need to support go1.19
type offsetWriter struct {
w io.WriterAt
off int64 // the current offset
}
// newOffsetWriter returns an offsetWriter that writes to w
// starting at offset off.
func newOffsetWriter(w io.WriterAt, off int64) *offsetWriter {
return &offsetWriter{w, off}
}
func (o *offsetWriter) Write(p []byte) (n int, err error) {
n, err = o.w.WriteAt(p, o.off)
o.off += int64(n)
return
}
// Return a boolean as to whether we should use multi thread copy for
// this transfer
func doMultiThreadCopy(ctx context.Context, f fs.Fs, src fs.Object) bool {
@ -62,6 +84,7 @@ type multiThreadCopyState struct {
// Copy a single stream into place
func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err error) {
ci := fs.GetConfig(ctx)
defer func() {
if err != nil {
fs.Debugf(mc.src, "multi-thread copy: stream %d/%d failed: %v", stream+1, mc.streams, err)
@ -84,8 +107,13 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
}
defer fs.CheckClose(rc, &err)
var writer io.Writer = newOffsetWriter(mc.wc, start)
if ci.MultiThreadWriteBufferSize > 0 {
writer = bufio.NewWriterSize(writer, int(ci.MultiThreadWriteBufferSize))
fs.Debugf(mc.src, "multi-thread copy: write buffer set to %v", ci.MultiThreadWriteBufferSize)
}
// Copy the data
buf := make([]byte, multithreadBufferSize)
buf := make([]byte, multithreadReadBufferSize)
offset := start
for {
// Check if context cancelled and exit if so
@ -98,7 +126,7 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
if err != nil {
return fmt.Errorf("multipart copy: accounting failed: %w", err)
}
nw, ew := mc.wc.WriteAt(buf[0:nr], offset)
nw, ew := writer.Write(buf[0:nr])
if nw > 0 {
offset += int64(nw)
}
@ -113,6 +141,16 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
if er != io.EOF {
return fmt.Errorf("multipart copy: read failed: %w", er)
}
// if we were buffering, flush do disk
switch w := writer.(type) {
case *bufio.Writer:
er2 := w.Flush()
if er2 != nil {
return fmt.Errorf("multipart copy: flush failed: %w", er2)
}
}
break
}
}