fs: allow setting a write buffer for multithread

when multi-thread downloading is enabled, rclone used to send a write to disk after every read, resulting in a lot of small writes to different locations of the file. depending on the underlying filesystem or device, it can be more efficient to send bigger writes.
2024-11-29 03:48:27 +08:00 · 2023-06-02 14:00:06 +02:00 · 2023-06-02 14:00:06 +02:00 · fcb912a664
commit fcb912a664
parent 5f938fb9ed
4 changed files with 160 additions and 100 deletions
--- a/docs/content/docs.md
+++ b/docs/content/docs.md
@ -1511,6 +1511,25 @@ if you are reading and writing to an OS X filing system this will be

 This command line flag allows you to override that computed default.

+### --multi-thread-write-buffer-size=SIZE ###
+
+When downloading with multiple threads, rclone will buffer SIZE bytes in
+memory before writing to disk for each thread.
+
+This can improve performance if the underlying filesystem does not deal
+well with a lot of small writes in different positions of the file, so
+if you see downloads being limited by disk write speed, you might want
+to experiment with different values. Specially for magnetic drives and
+remote file systems a higher value can be useful.
+
+Nevertheless, the default of `128k` should be fine for almost all use
+cases, so before changing it ensure that network is not really your
+bottleneck.
+
+As a final hint, size is not the only factor: block size (or similar
+concept) can have an impact. In one case, we observed that exact
+multiples of 16k performed much better than other values.
+
 ### --multi-thread-cutoff=SIZE ###

 When downloading files to the local backend above this size, rclone
--- a/fs/config.go
+++ b/fs/config.go
@ -94,6 +94,7 @@ type ConfigInfo struct {
 	SuffixKeepExtension        bool
 	UseListR                   bool
 	BufferSize                 SizeSuffix
+	MultiThreadWriteBufferSize SizeSuffix
 	BwLimit                    BwTimetable
 	BwLimitFile                BwTimetable
 	TPSLimit                   float64
@ -170,6 +171,7 @@ func NewConfig() *ConfigInfo {
 	c.MaxDepth = -1
 	c.DataRateUnit = "bytes"
 	c.BufferSize = SizeSuffix(16 << 20)
+	c.MultiThreadWriteBufferSize = SizeSuffix(128 * 1024)
 	c.UserAgent = "rclone/" + Version
 	c.StreamingUploadCutoff = SizeSuffix(100 * 1024)
 	c.MaxStatsGroups = 1000
--- a/fs/config/configflags/configflags.go
+++ b/fs/config/configflags/configflags.go
@ -126,6 +126,7 @@ func AddFlags(ci *fs.ConfigInfo, flagSet *pflag.FlagSet) {
 	flags.StringVarP(flagSet, &ci.ClientKey, "client-key", "", ci.ClientKey, "Client SSL private key (PEM) for mutual TLS auth")
 	flags.FVarP(flagSet, &ci.MultiThreadCutoff, "multi-thread-cutoff", "", "Use multi-thread downloads for files above this size")
 	flags.IntVarP(flagSet, &ci.MultiThreadStreams, "multi-thread-streams", "", ci.MultiThreadStreams, "Max number of streams to use for multi-thread downloads")
+	flags.FVarP(flagSet, &ci.MultiThreadWriteBufferSize, "multi-thread-write-buffer-size", "", "In memory buffer size for writing when in multi-thread mode")
 	flags.BoolVarP(flagSet, &ci.UseJSONLog, "use-json-log", "", ci.UseJSONLog, "Use json log format")
 	flags.StringVarP(flagSet, &ci.OrderBy, "order-by", "", ci.OrderBy, "Instructions on how to order the transfers, e.g. 'size,descending'")
 	flags.StringArrayVarP(flagSet, &uploadHeaders, "header-upload", "", nil, "Set HTTP header for upload transactions")
--- a/fs/operations/multithread.go
+++ b/fs/operations/multithread.go
@ -1,6 +1,7 @@
 package operations

 import (
+	"bufio"
 	"context"
 	"errors"
 	"fmt"
@ -14,9 +15,30 @@ import (
 const (
 	multithreadChunkSize      = 64 << 10
 	multithreadChunkSizeMask  = multithreadChunkSize - 1
-	multithreadBufferSize    = 32 * 1024
+	multithreadReadBufferSize = 32 * 1024
 )

+// An offsetWriter maps writes at offset base to offset base+off in the underlying writer.
+//
+// Modified from the go source code. Can be replaced with
+// io.OffsetWriter when we no longer need to support go1.19
+type offsetWriter struct {
+	w   io.WriterAt
+	off int64 // the current offset
+}
+
+// newOffsetWriter returns an offsetWriter that writes to w
+// starting at offset off.
+func newOffsetWriter(w io.WriterAt, off int64) *offsetWriter {
+	return &offsetWriter{w, off}
+}
+
+func (o *offsetWriter) Write(p []byte) (n int, err error) {
+	n, err = o.w.WriteAt(p, o.off)
+	o.off += int64(n)
+	return
+}
+
 // Return a boolean as to whether we should use multi thread copy for
 // this transfer
 func doMultiThreadCopy(ctx context.Context, f fs.Fs, src fs.Object) bool {
@ -62,6 +84,7 @@ type multiThreadCopyState struct {

 // Copy a single stream into place
 func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err error) {
+	ci := fs.GetConfig(ctx)
 	defer func() {
 		if err != nil {
 			fs.Debugf(mc.src, "multi-thread copy: stream %d/%d failed: %v", stream+1, mc.streams, err)
@ -84,8 +107,13 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
 	}
 	defer fs.CheckClose(rc, &err)

+	var writer io.Writer = newOffsetWriter(mc.wc, start)
+	if ci.MultiThreadWriteBufferSize > 0 {
+		writer = bufio.NewWriterSize(writer, int(ci.MultiThreadWriteBufferSize))
+		fs.Debugf(mc.src, "multi-thread copy: write buffer set to %v", ci.MultiThreadWriteBufferSize)
+	}
 	// Copy the data
-	buf := make([]byte, multithreadBufferSize)
+	buf := make([]byte, multithreadReadBufferSize)
 	offset := start
 	for {
 		// Check if context cancelled and exit if so
@ -98,7 +126,7 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
 			if err != nil {
 				return fmt.Errorf("multipart copy: accounting failed: %w", err)
 			}
-			nw, ew := mc.wc.WriteAt(buf[0:nr], offset)
+			nw, ew := writer.Write(buf[0:nr])
 			if nw > 0 {
 				offset += int64(nw)
 			}
@ -113,6 +141,16 @@ func (mc *multiThreadCopyState) copyStream(ctx context.Context, stream int) (err
 			if er != io.EOF {
 				return fmt.Errorf("multipart copy: read failed: %w", er)
 			}
+
+			// if we were buffering, flush do disk
+			switch w := writer.(type) {
+			case *bufio.Writer:
+				er2 := w.Flush()
+				if er2 != nil {
+					return fmt.Errorf("multipart copy: flush failed: %w", er2)
+				}
+			}
+
 			break
 		}
 	}