diff --git a/README.md b/README.md index 01776699e..7f2302f89 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ Rclone *("rsync for cloud storage")* is a command-line program to sync files and * Google Drive [:page_facing_up:](https://rclone.org/drive/) * Google Photos [:page_facing_up:](https://rclone.org/googlephotos/) * HDFS (Hadoop Distributed Filesystem) [:page_facing_up:](https://rclone.org/hdfs/) + * HiDrive [:page_facing_up:](https://rclone.org/hidrive/) * HTTP [:page_facing_up:](https://rclone.org/http/) * Huawei Cloud Object Storage Service(OBS) [:page_facing_up:](https://rclone.org/s3/#huawei-obs) * Hubic [:page_facing_up:](https://rclone.org/hubic/) diff --git a/backend/all/all.go b/backend/all/all.go index 44414b287..dc17911e4 100644 --- a/backend/all/all.go +++ b/backend/all/all.go @@ -21,6 +21,7 @@ import ( _ "github.com/rclone/rclone/backend/googlephotos" _ "github.com/rclone/rclone/backend/hasher" _ "github.com/rclone/rclone/backend/hdfs" + _ "github.com/rclone/rclone/backend/hidrive" _ "github.com/rclone/rclone/backend/http" _ "github.com/rclone/rclone/backend/hubic" _ "github.com/rclone/rclone/backend/internetarchive" diff --git a/backend/hidrive/api/queries.go b/backend/hidrive/api/queries.go new file mode 100644 index 000000000..57a1477c1 --- /dev/null +++ b/backend/hidrive/api/queries.go @@ -0,0 +1,81 @@ +package api + +import ( + "encoding/json" + "net/url" + "path" + "strings" + "time" +) + +// Some presets for different amounts of information that can be requested for fields; +// it is recommended to only request the information that is actually needed. +var ( + HiDriveObjectNoMetadataFields = []string{"name", "type"} + HiDriveObjectWithMetadataFields = append(HiDriveObjectNoMetadataFields, "id", "size", "mtime", "chash") + HiDriveObjectWithDirectoryMetadataFields = append(HiDriveObjectWithMetadataFields, "nmembers") + DirectoryContentFields = []string{"nmembers"} +) + +// QueryParameters represents the parameters passed to an API-call. +type QueryParameters struct { + url.Values +} + +// NewQueryParameters initializes an instance of QueryParameters and +// returns a pointer to it. +func NewQueryParameters() *QueryParameters { + return &QueryParameters{url.Values{}} +} + +// SetFileInDirectory sets the appropriate parameters +// to specify a path to a file in a directory. +// This is used by requests that work with paths for files that do not exist yet. +// (For example when creating a file). +// Most requests use the format produced by SetPath(...). +func (p *QueryParameters) SetFileInDirectory(filePath string) { + directory, file := path.Split(path.Clean(filePath)) + p.Set("dir", path.Clean(directory)) + p.Set("name", file) + // NOTE: It would be possible to switch to pid-based requests + // by modifying this function. +} + +// SetPath sets the appropriate parameters to access the given path. +func (p *QueryParameters) SetPath(objectPath string) { + p.Set("path", path.Clean(objectPath)) + // NOTE: It would be possible to switch to pid-based requests + // by modifying this function. +} + +// SetTime sets the key to the time-value. It replaces any existing values. +func (p *QueryParameters) SetTime(key string, value time.Time) error { + valueAPI := Time(value) + valueBytes, err := json.Marshal(&valueAPI) + if err != nil { + return err + } + p.Set(key, string(valueBytes)) + return nil +} + +// AddList adds the given values as a list +// with each value separated by the separator. +// It appends to any existing values associated with key. +func (p *QueryParameters) AddList(key string, separator string, values ...string) { + original := p.Get(key) + p.Set(key, strings.Join(values, separator)) + if original != "" { + p.Set(key, original+separator+p.Get(key)) + } +} + +// AddFields sets the appropriate parameter to access the given fields. +// The given fields will be appended to any other existing fields. +func (p *QueryParameters) AddFields(prefix string, fields ...string) { + modifiedFields := make([]string, len(fields)) + for i, field := range fields { + modifiedFields[i] = prefix + field + } + p.AddList("fields", ",", modifiedFields...) +} diff --git a/backend/hidrive/api/types.go b/backend/hidrive/api/types.go new file mode 100644 index 000000000..4cc912a72 --- /dev/null +++ b/backend/hidrive/api/types.go @@ -0,0 +1,135 @@ +// Package api has type definitions and code related to API-calls for the HiDrive-API. +package api + +import ( + "encoding/json" + "fmt" + "net/url" + "strconv" + "time" +) + +// Time represents date and time information for the API. +type Time time.Time + +// MarshalJSON turns Time into JSON (in Unix-time/UTC). +func (t *Time) MarshalJSON() ([]byte, error) { + secs := time.Time(*t).Unix() + return []byte(strconv.FormatInt(secs, 10)), nil +} + +// UnmarshalJSON turns JSON into Time. +func (t *Time) UnmarshalJSON(data []byte) error { + secs, err := strconv.ParseInt(string(data), 10, 64) + if err != nil { + return err + } + *t = Time(time.Unix(secs, 0)) + return nil +} + +// Error is returned from the API when things go wrong. +type Error struct { + Code json.Number `json:"code"` + ContextInfo json.RawMessage + Message string `json:"msg"` +} + +// Error returns a string for the error and satisfies the error interface. +func (e *Error) Error() string { + out := fmt.Sprintf("Error %q", e.Code.String()) + if e.Message != "" { + out += ": " + e.Message + } + if e.ContextInfo != nil { + out += fmt.Sprintf(" (%+v)", e.ContextInfo) + } + return out +} + +// Check Error satisfies the error interface. +var _ error = (*Error)(nil) + +// possible types for HiDriveObject +const ( + HiDriveObjectTypeDirectory = "dir" + HiDriveObjectTypeFile = "file" + HiDriveObjectTypeSymlink = "symlink" +) + +// HiDriveObject describes a folder, a symlink or a file. +// Depending on the type and content, not all fields are present. +type HiDriveObject struct { + Type string `json:"type"` + ID string `json:"id"` + ParentID string `json:"parent_id"` + Name string `json:"name"` + Path string `json:"path"` + Size int64 `json:"size"` + MemberCount int64 `json:"nmembers"` + ModifiedAt Time `json:"mtime"` + ChangedAt Time `json:"ctime"` + MetaHash string `json:"mhash"` + MetaOnlyHash string `json:"mohash"` + NameHash string `json:"nhash"` + ContentHash string `json:"chash"` + IsTeamfolder bool `json:"teamfolder"` + Readable bool `json:"readable"` + Writable bool `json:"writable"` + Shareable bool `json:"shareable"` + MIMEType string `json:"mime_type"` +} + +// ModTime returns the modification time of the HiDriveObject. +func (i *HiDriveObject) ModTime() time.Time { + t := time.Time(i.ModifiedAt) + if t.IsZero() { + t = time.Time(i.ChangedAt) + } + return t +} + +// UnmarshalJSON turns JSON into HiDriveObject and +// introduces specific default-values where necessary. +func (i *HiDriveObject) UnmarshalJSON(data []byte) error { + type objectAlias HiDriveObject + defaultObject := objectAlias{ + Size: -1, + MemberCount: -1, + } + + err := json.Unmarshal(data, &defaultObject) + if err != nil { + return err + } + name, err := url.PathUnescape(defaultObject.Name) + if err == nil { + defaultObject.Name = name + } + + *i = HiDriveObject(defaultObject) + return nil +} + +// DirectoryContent describes the content of a directory. +type DirectoryContent struct { + TotalCount int64 `json:"nmembers"` + Entries []HiDriveObject `json:"members"` +} + +// UnmarshalJSON turns JSON into DirectoryContent and +// introduces specific default-values where necessary. +func (d *DirectoryContent) UnmarshalJSON(data []byte) error { + type directoryContentAlias DirectoryContent + defaultDirectoryContent := directoryContentAlias{ + TotalCount: -1, + } + + err := json.Unmarshal(data, &defaultDirectoryContent) + if err != nil { + return err + } + + *d = DirectoryContent(defaultDirectoryContent) + return nil +} diff --git a/backend/hidrive/helpers.go b/backend/hidrive/helpers.go new file mode 100644 index 000000000..7b925e1b3 --- /dev/null +++ b/backend/hidrive/helpers.go @@ -0,0 +1,888 @@ +package hidrive + +// This file is for helper-functions which may provide more general and +// specialized functionality than the generic interfaces. +// There are two sections: +// 1. methods bound to Fs +// 2. other functions independent from Fs used throughout the package + +// NOTE: Functions accessing paths expect any relative paths +// to be resolved prior to execution with resolvePath(...). + +import ( + "bytes" + "context" + "errors" + "io" + "net/http" + "path" + "strconv" + "sync" + "time" + + "github.com/rclone/rclone/backend/hidrive/api" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/accounting" + "github.com/rclone/rclone/fs/fserrors" + "github.com/rclone/rclone/lib/ranges" + "github.com/rclone/rclone/lib/readers" + "github.com/rclone/rclone/lib/rest" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" +) + +const ( + // MaximumUploadBytes represents the maximum amount of bytes + // a single upload-operation will support. + MaximumUploadBytes = 2147483647 // = 2GiB - 1 + // iterationChunkSize represents the chunk size used to iterate directory contents. + iterationChunkSize = 5000 +) + +var ( + // retryErrorCodes is a slice of error codes that we will always retry. + retryErrorCodes = []int{ + 429, // Too Many Requests + 500, // Internal Server Error + 502, // Bad Gateway + 503, // Service Unavailable + 504, // Gateway Timeout + 509, // Bandwidth Limit Exceeded + } + // ErrorFileExists is returned when a query tries to create a file + // that already exists. + ErrorFileExists = errors.New("destination file already exists") +) + +// MemberType represents the possible types of entries a directory can contain. +type MemberType string + +// possible values for MemberType +const ( + AllMembers MemberType = "all" + NoMembers MemberType = "none" + DirectoryMembers MemberType = api.HiDriveObjectTypeDirectory + FileMembers MemberType = api.HiDriveObjectTypeFile + SymlinkMembers MemberType = api.HiDriveObjectTypeSymlink +) + +// SortByField represents possible fields to sort entries of a directory by. +type SortByField string + +// possible values for SortByField +const ( + descendingSort string = "-" + SortByName SortByField = "name" + SortByModTime SortByField = "mtime" + SortByObjectType SortByField = "type" + SortBySize SortByField = "size" + SortByNameDescending SortByField = SortByField(descendingSort) + SortByName + SortByModTimeDescending SortByField = SortByField(descendingSort) + SortByModTime + SortByObjectTypeDescending SortByField = SortByField(descendingSort) + SortByObjectType + SortBySizeDescending SortByField = SortByField(descendingSort) + SortBySize +) + +var ( + // Unsorted disables sorting and can therefore not be combined with other values. + Unsorted = []SortByField{"none"} + // DefaultSorted does not specify how to sort and + // therefore implies the default sort order. + DefaultSorted = []SortByField{} +) + +// CopyOrMoveOperationType represents the possible types of copy- and move-operations. +type CopyOrMoveOperationType int + +// possible values for CopyOrMoveOperationType +const ( + MoveOriginal CopyOrMoveOperationType = iota + CopyOriginal + CopyOriginalPreserveModTime +) + +// OnExistAction represents possible actions the API should take, +// when a request tries to create a path that already exists. +type OnExistAction string + +// possible values for OnExistAction +const ( + // IgnoreOnExist instructs the API not to execute + // the request in case of a conflict, but to return an error. + IgnoreOnExist OnExistAction = "ignore" + // AutoNameOnExist instructs the API to automatically rename + // any conflicting request-objects. + AutoNameOnExist OnExistAction = "autoname" + // OverwriteOnExist instructs the API to overwrite any conflicting files. + // This can only be used, if the request operates on files directly. + // (For example when moving/copying a file.) + // For most requests this action will simply be ignored. + OverwriteOnExist OnExistAction = "overwrite" +) + +// shouldRetry returns a boolean as to whether this resp and err deserve to be retried. +// It tries to expire/invalidate the token, if necessary. +// It returns the err as a convenience. +func (f *Fs) shouldRetry(ctx context.Context, resp *http.Response, err error) (bool, error) { + if fserrors.ContextError(ctx, &err) { + return false, err + } + if resp != nil && (resp.StatusCode == 401 || isHTTPError(err, 401)) && len(resp.Header["Www-Authenticate"]) > 0 { + fs.Debugf(f, "Token might be invalid: %v", err) + if f.tokenRenewer != nil { + iErr := f.tokenRenewer.Expire() + if iErr == nil { + return true, err + } + } + } + return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(resp, retryErrorCodes), err +} + +// resolvePath resolves the given (relative) path and +// returns a path suitable for API-calls. +// This will consider the root-path of the fs and any needed prefixes. +// +// Any relative paths passed to functions that access these paths should +// be resolved with this first! +func (f *Fs) resolvePath(objectPath string) string { + resolved := path.Join(f.opt.RootPrefix, f.root, f.opt.Enc.FromStandardPath(objectPath)) + return resolved +} + +// iterateOverDirectory calls the given function callback +// on each item found in a given directory. +// +// If callback ever returns true then this exits early with found = true. +func (f *Fs) iterateOverDirectory(ctx context.Context, directory string, searchOnly MemberType, callback func(*api.HiDriveObject) bool, fields []string, sortBy []SortByField) (found bool, err error) { + parameters := api.NewQueryParameters() + parameters.SetPath(directory) + parameters.AddFields("members.", fields...) + parameters.AddFields("", api.DirectoryContentFields...) + parameters.Set("members", string(searchOnly)) + for _, v := range sortBy { + // The explicit conversion is necessary for each element. + parameters.AddList("sort", ",", string(v)) + } + + opts := rest.Opts{ + Method: "GET", + Path: "/dir", + Parameters: parameters.Values, + } + + iterateContent := func(result *api.DirectoryContent, err error) (bool, error) { + if err != nil { + return false, err + } + for _, item := range result.Entries { + item.Name = f.opt.Enc.ToStandardName(item.Name) + if callback(&item) { + return true, nil + } + } + return false, nil + } + return f.paginateDirectoryAccess(ctx, &opts, iterationChunkSize, 0, iterateContent) +} + +// paginateDirectoryAccess executes requests specified via ctx and opts +// which should produce api.DirectoryContent. +// This will paginate the requests using limit starting at the given offset. +// +// The given function callback is called on each api.DirectoryContent found +// along with any errors that occurred. +// If callback ever returns true then this exits early with found = true. +// If callback ever returns an error then this exits early with that error. +func (f *Fs) paginateDirectoryAccess(ctx context.Context, opts *rest.Opts, limit int64, offset int64, callback func(*api.DirectoryContent, error) (bool, error)) (found bool, err error) { + for { + opts.Parameters.Set("limit", strconv.FormatInt(offset, 10)+","+strconv.FormatInt(limit, 10)) + + var result api.DirectoryContent + var resp *http.Response + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.CallJSON(ctx, opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + + found, err = callback(&result, err) + if found || err != nil { + return found, err + } + + offset += int64(len(result.Entries)) + if offset >= result.TotalCount || limit > int64(len(result.Entries)) { + break + } + } + return false, nil +} + +// fetchMetadataForPath reads the metadata from the path. +func (f *Fs) fetchMetadataForPath(ctx context.Context, path string, fields []string) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.SetPath(path) + parameters.AddFields("", fields...) + + opts := rest.Opts{ + Method: "GET", + Path: "/meta", + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + if err != nil { + return nil, err + } + return &result, nil +} + +// copyOrMove copies or moves a directory or file +// from the source-path to the destination-path. +// +// The operation will only be successful +// if the parent-directory of the destination-path exists. +// +// NOTE: Use the explicit methods instead of directly invoking this method. +// (Those are: copyDirectory, moveDirectory, copyFile, moveFile.) +func (f *Fs) copyOrMove(ctx context.Context, isDirectory bool, operationType CopyOrMoveOperationType, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.Set("src", source) + parameters.Set("dst", destination) + if onExist == AutoNameOnExist || + (onExist == OverwriteOnExist && !isDirectory) { + parameters.Set("on_exist", string(onExist)) + } + + endpoint := "/" + if isDirectory { + endpoint += "dir" + } else { + endpoint += "file" + } + switch operationType { + case MoveOriginal: + endpoint += "/move" + case CopyOriginalPreserveModTime: + parameters.Set("preserve_mtime", strconv.FormatBool(true)) + fallthrough + case CopyOriginal: + endpoint += "/copy" + } + + opts := rest.Opts{ + Method: "POST", + Path: endpoint, + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + if err != nil { + return nil, err + } + return &result, nil +} + +// copyDirectory moves the directory at the source-path to the destination-path and +// returns the resulting api-object if successful. +// +// The operation will only be successful +// if the parent-directory of the destination-path exists. +func (f *Fs) copyDirectory(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) { + return f.copyOrMove(ctx, true, CopyOriginalPreserveModTime, source, destination, onExist) +} + +// moveDirectory moves the directory at the source-path to the destination-path and +// returns the resulting api-object if successful. +// +// The operation will only be successful +// if the parent-directory of the destination-path exists. +func (f *Fs) moveDirectory(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) { + return f.copyOrMove(ctx, true, MoveOriginal, source, destination, onExist) +} + +// copyFile copies the file at the source-path to the destination-path and +// returns the resulting api-object if successful. +// +// The operation will only be successful +// if the parent-directory of the destination-path exists. +// +// NOTE: This operation will expand sparse areas in the content of the source-file +// to blocks of 0-bytes in the destination-file. +func (f *Fs) copyFile(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) { + return f.copyOrMove(ctx, false, CopyOriginalPreserveModTime, source, destination, onExist) +} + +// moveFile moves the file at the source-path to the destination-path and +// returns the resulting api-object if successful. +// +// The operation will only be successful +// if the parent-directory of the destination-path exists. +// +// NOTE: This operation may expand sparse areas in the content of the source-file +// to blocks of 0-bytes in the destination-file. +func (f *Fs) moveFile(ctx context.Context, source string, destination string, onExist OnExistAction) (*api.HiDriveObject, error) { + return f.copyOrMove(ctx, false, MoveOriginal, source, destination, onExist) +} + +// createDirectory creates the directory at the given path and +// returns the resulting api-object if successful. +// +// The directory will only be created if its parent-directory exists. +// This returns fs.ErrorDirNotFound if the parent-directory is not found. +// This returns fs.ErrorDirExists if the directory already exists. +func (f *Fs) createDirectory(ctx context.Context, directory string, onExist OnExistAction) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.SetPath(directory) + if onExist == AutoNameOnExist { + parameters.Set("on_exist", string(onExist)) + } + + opts := rest.Opts{ + Method: "POST", + Path: "/dir", + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + + switch { + case err == nil: + return &result, nil + case isHTTPError(err, 404): + return nil, fs.ErrorDirNotFound + case isHTTPError(err, 409): + return nil, fs.ErrorDirExists + } + return nil, err +} + +// createDirectories creates the directory at the given path +// along with any missing parent directories and +// returns the resulting api-object (of the created directory) if successful. +// +// This returns fs.ErrorDirExists if the directory already exists. +// +// If an error occurs while the parent directories are being created, +// any directories already created will NOT be deleted again. +func (f *Fs) createDirectories(ctx context.Context, directory string, onExist OnExistAction) (*api.HiDriveObject, error) { + result, err := f.createDirectory(ctx, directory, onExist) + if err == nil { + return result, nil + } + if err != fs.ErrorDirNotFound { + return nil, err + } + parentDirectory := path.Dir(directory) + _, err = f.createDirectories(ctx, parentDirectory, onExist) + if err != nil && err != fs.ErrorDirExists { + return nil, err + } + // NOTE: Ignoring fs.ErrorDirExists does no harm, + // since it does not mean the child directory cannot be created. + return f.createDirectory(ctx, directory, onExist) +} + +// deleteDirectory deletes the directory at the given path. +// +// If recursive is false, the directory will only be deleted if it is empty. +// If recursive is true, the directory will be deleted regardless of its content. +// This returns fs.ErrorDirNotFound if the directory is not found. +// This returns fs.ErrorDirectoryNotEmpty if the directory is not empty and +// recursive is false. +func (f *Fs) deleteDirectory(ctx context.Context, directory string, recursive bool) error { + parameters := api.NewQueryParameters() + parameters.SetPath(directory) + parameters.Set("recursive", strconv.FormatBool(recursive)) + + opts := rest.Opts{ + Method: "DELETE", + Path: "/dir", + Parameters: parameters.Values, + NoResponse: true, + } + + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.Call(ctx, &opts) + return f.shouldRetry(ctx, resp, err) + }) + + switch { + case isHTTPError(err, 404): + return fs.ErrorDirNotFound + case isHTTPError(err, 409): + return fs.ErrorDirectoryNotEmpty + } + return err +} + +// deleteObject deletes the object/file at the given path. +// +// This returns fs.ErrorObjectNotFound if the object is not found. +func (f *Fs) deleteObject(ctx context.Context, path string) error { + parameters := api.NewQueryParameters() + parameters.SetPath(path) + + opts := rest.Opts{ + Method: "DELETE", + Path: "/file", + Parameters: parameters.Values, + NoResponse: true, + } + + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.Call(ctx, &opts) + return f.shouldRetry(ctx, resp, err) + }) + + if isHTTPError(err, 404) { + return fs.ErrorObjectNotFound + } + return err +} + +// createFile creates a file at the given path +// with the content of the io.ReadSeeker. +// This guarantees that existing files will not be overwritten. +// The maximum size of the content is limited by MaximumUploadBytes. +// The io.ReadSeeker should be resettable by seeking to its start. +// If modTime is not the zero time instant, +// it will be set as the file's modification time after the operation. +// +// This returns fs.ErrorDirNotFound +// if the parent directory of the file is not found. +// This returns ErrorFileExists if a file already exists at the specified path. +func (f *Fs) createFile(ctx context.Context, path string, content io.ReadSeeker, modTime time.Time, onExist OnExistAction) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.SetFileInDirectory(path) + if onExist == AutoNameOnExist { + parameters.Set("on_exist", string(onExist)) + } + + var err error + if !modTime.IsZero() { + err = parameters.SetTime("mtime", modTime) + if err != nil { + return nil, err + } + } + + opts := rest.Opts{ + Method: "POST", + Path: "/file", + Body: content, + ContentType: "application/octet-stream", + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + err = f.pacer.Call(func() (bool, error) { + // Reset the reading index (in case this is a retry). + if _, err = content.Seek(0, io.SeekStart); err != nil { + return false, err + } + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + + switch { + case err == nil: + return &result, nil + case isHTTPError(err, 404): + return nil, fs.ErrorDirNotFound + case isHTTPError(err, 409): + return nil, ErrorFileExists + } + return nil, err +} + +// overwriteFile updates the content of the file at the given path +// with the content of the io.ReadSeeker. +// If the file does not exist it will be created. +// The maximum size of the content is limited by MaximumUploadBytes. +// The io.ReadSeeker should be resettable by seeking to its start. +// If modTime is not the zero time instant, +// it will be set as the file's modification time after the operation. +// +// This returns fs.ErrorDirNotFound +// if the parent directory of the file is not found. +func (f *Fs) overwriteFile(ctx context.Context, path string, content io.ReadSeeker, modTime time.Time) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.SetFileInDirectory(path) + + var err error + if !modTime.IsZero() { + err = parameters.SetTime("mtime", modTime) + if err != nil { + return nil, err + } + } + + opts := rest.Opts{ + Method: "PUT", + Path: "/file", + Body: content, + ContentType: "application/octet-stream", + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + err = f.pacer.Call(func() (bool, error) { + // Reset the reading index (in case this is a retry). + if _, err = content.Seek(0, io.SeekStart); err != nil { + return false, err + } + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + + switch { + case err == nil: + return &result, nil + case isHTTPError(err, 404): + return nil, fs.ErrorDirNotFound + } + return nil, err +} + +// uploadFileChunked updates the content of the existing file at the given path +// with the content of the io.Reader. +// Returns the position of the last successfully written byte, stopping before the first failed write. +// If nothing was written this will be 0. +// Returns the resulting api-object if successful. +// +// Replaces the file contents by uploading multiple chunks of the given size in parallel. +// Therefore this can and be used to upload files of any size efficiently. +// The number of parallel transfers is limited by transferLimit which should larger than 0. +// If modTime is not the zero time instant, +// it will be set as the file's modification time after the operation. +// +// NOTE: This method uses updateFileChunked and may create sparse files, +// if the upload of a chunk fails unexpectedly. +// See note about sparse files in patchFile. +// If any of the uploads fail, the process will be aborted and +// the first error that occurred will be returned. +// This is not an atomic operation, +// therefore if the upload fails the file may be partially modified. +// +// This returns fs.ErrorObjectNotFound if the object is not found. +func (f *Fs) uploadFileChunked(ctx context.Context, path string, content io.Reader, modTime time.Time, chunkSize int, transferLimit int64) (okSize uint64, info *api.HiDriveObject, err error) { + okSize, err = f.updateFileChunked(ctx, path, content, 0, chunkSize, transferLimit) + + if err == nil { + info, err = f.resizeFile(ctx, path, okSize, modTime) + } + return okSize, info, err +} + +// updateFileChunked updates the content of the existing file at the given path +// starting at the given offset. +// Returns the position of the last successfully written byte, stopping before the first failed write. +// If nothing was written this will be 0. +// +// Replaces the file contents starting from the given byte offset +// with the content of the io.Reader. +// If the offset is beyond the file end, the file is extended up to the offset. +// +// The upload is done multiple chunks of the given size in parallel. +// Therefore this can and be used to upload files of any size efficiently. +// The number of parallel transfers is limited by transferLimit which should larger than 0. +// +// NOTE: Because it is inefficient to set the modification time with every chunk, +// setting it to a specific value must be done in a separate request +// after this operation finishes. +// +// NOTE: This method uses patchFile and may create sparse files, +// especially if the upload of a chunk fails unexpectedly. +// See note about sparse files in patchFile. +// If any of the uploads fail, the process will be aborted and +// the first error that occurred will be returned. +// This is not an atomic operation, +// therefore if the upload fails the file may be partially modified. +// +// This returns fs.ErrorObjectNotFound if the object is not found. +func (f *Fs) updateFileChunked(ctx context.Context, path string, content io.Reader, offset uint64, chunkSize int, transferLimit int64) (okSize uint64, err error) { + var ( + okChunksMu sync.Mutex // protects the variables below + okChunks []ranges.Range + ) + g, gCtx := errgroup.WithContext(ctx) + transferSemaphore := semaphore.NewWeighted(transferLimit) + + var readErr error + startMoreTransfers := true + zeroTime := time.Time{} + for chunk := uint64(0); startMoreTransfers; chunk++ { + // Acquire semaphore to limit number of transfers in parallel. + readErr = transferSemaphore.Acquire(gCtx, 1) + if readErr != nil { + break + } + + // Read a chunk of data. + chunkReader, bytesRead, readErr := readerForChunk(content, chunkSize) + if bytesRead < chunkSize { + startMoreTransfers = false + } + if readErr != nil || bytesRead <= 0 { + break + } + + // Transfer the chunk. + chunkOffset := uint64(chunkSize)*chunk + offset + g.Go(func() error { + // After this upload is done, + // signal that another transfer can be started. + defer transferSemaphore.Release(1) + uploadErr := f.patchFile(gCtx, path, cachedReader(chunkReader), chunkOffset, zeroTime) + if uploadErr == nil { + // Remember successfully written chunks. + okChunksMu.Lock() + okChunks = append(okChunks, ranges.Range{Pos: int64(chunkOffset), Size: int64(bytesRead)}) + okChunksMu.Unlock() + fs.Debugf(f, "Done uploading chunk of size %v at offset %v.", bytesRead, chunkOffset) + } else { + fs.Infof(f, "Error while uploading chunk at offset %v. Error is %v.", chunkOffset, uploadErr) + } + return uploadErr + }) + } + + if readErr != nil { + // Log the error in case it is later ignored because of an upload-error. + fs.Infof(f, "Error while reading/preparing to upload a chunk. Error is %v.", readErr) + } + + err = g.Wait() + + // Compute the first continuous range of the file content, + // which does not contain any failed chunks. + // Do not forget to add the file content up to the starting offset, + // which is presumed to be already correct. + rs := ranges.Ranges{} + rs.Insert(ranges.Range{Pos: 0, Size: int64(offset)}) + for _, chunkRange := range okChunks { + rs.Insert(chunkRange) + } + if len(rs) > 0 && rs[0].Pos == 0 { + okSize = uint64(rs[0].Size) + } + + if err != nil { + return okSize, err + } + if readErr != nil { + return okSize, readErr + } + + return okSize, nil +} + +// patchFile updates the content of the existing file at the given path +// starting at the given offset. +// +// Replaces the file contents starting from the given byte offset +// with the content of the io.ReadSeeker. +// If the offset is beyond the file end, the file is extended up to the offset. +// The maximum size of the update is limited by MaximumUploadBytes. +// The io.ReadSeeker should be resettable by seeking to its start. +// If modTime is not the zero time instant, +// it will be set as the file's modification time after the operation. +// +// NOTE: By extending the file up to the offset this may create sparse files, +// which allocate less space on the file system than their apparent size indicates, +// since holes between data chunks are "real" holes +// and not regions made up of consecutive 0-bytes. +// Subsequent operations (such as copying data) +// usually expand the holes into regions of 0-bytes. +// +// This returns fs.ErrorObjectNotFound if the object is not found. +func (f *Fs) patchFile(ctx context.Context, path string, content io.ReadSeeker, offset uint64, modTime time.Time) error { + parameters := api.NewQueryParameters() + parameters.SetPath(path) + parameters.Set("offset", strconv.FormatUint(offset, 10)) + + if !modTime.IsZero() { + err := parameters.SetTime("mtime", modTime) + if err != nil { + return err + } + } + + opts := rest.Opts{ + Method: "PATCH", + Path: "/file", + Body: content, + ContentType: "application/octet-stream", + Parameters: parameters.Values, + NoResponse: true, + } + + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + // Reset the reading index (in case this is a retry). + _, err = content.Seek(0, io.SeekStart) + if err != nil { + return false, err + } + resp, err = f.srv.Call(ctx, &opts) + if isHTTPError(err, 423) { + return true, err + } + return f.shouldRetry(ctx, resp, err) + }) + + if isHTTPError(err, 404) { + return fs.ErrorObjectNotFound + } + return err +} + +// resizeFile updates the existing file at the given path to be of the given size +// and returns the resulting api-object if successful. +// +// If the given size is smaller than the current filesize, +// the file is cut/truncated at that position. +// If the given size is larger, the file is extended up to that position. +// If modTime is not the zero time instant, +// it will be set as the file's modification time after the operation. +// +// NOTE: By extending the file this may create sparse files, +// which allocate less space on the file system than their apparent size indicates, +// since holes between data chunks are "real" holes +// and not regions made up of consecutive 0-bytes. +// Subsequent operations (such as copying data) +// usually expand the holes into regions of 0-bytes. +// +// This returns fs.ErrorObjectNotFound if the object is not found. +func (f *Fs) resizeFile(ctx context.Context, path string, size uint64, modTime time.Time) (*api.HiDriveObject, error) { + parameters := api.NewQueryParameters() + parameters.SetPath(path) + parameters.Set("size", strconv.FormatUint(size, 10)) + + if !modTime.IsZero() { + err := parameters.SetTime("mtime", modTime) + if err != nil { + return nil, err + } + } + + opts := rest.Opts{ + Method: "POST", + Path: "/file/truncate", + Parameters: parameters.Values, + } + + var result api.HiDriveObject + var resp *http.Response + var err error + err = f.pacer.Call(func() (bool, error) { + resp, err = f.srv.CallJSON(ctx, &opts, nil, &result) + return f.shouldRetry(ctx, resp, err) + }) + + switch { + case err == nil: + return &result, nil + case isHTTPError(err, 404): + return nil, fs.ErrorObjectNotFound + } + return nil, err +} + +// ------------------------------------------------------------ + +// isHTTPError compares the numerical status code +// of an api.Error to the given HTTP status. +// +// If the given error is not an api.Error or +// a numerical status code could not be determined, this returns false. +// Otherwise this returns whether the status code of the error is equal to the given status. +func isHTTPError(err error, status int64) bool { + if apiErr, ok := err.(*api.Error); ok { + errStatus, decodeErr := apiErr.Code.Int64() + if decodeErr == nil && errStatus == status { + return true + } + } + return false +} + +// createHiDriveScopes creates oauth-scopes +// from the given user-role and access-permissions. +// +// If the arguments are empty, they will not be included in the result. +func createHiDriveScopes(role string, access string) []string { + switch { + case role != "" && access != "": + return []string{access + "," + role} + case role != "": + return []string{role} + case access != "": + return []string{access} + } + return []string{} +} + +// cachedReader returns a version of the reader that caches its contents and +// can therefore be reset using Seek. +func cachedReader(reader io.Reader) io.ReadSeeker { + bytesReader, ok := reader.(*bytes.Reader) + if ok { + return bytesReader + } + + repeatableReader, ok := reader.(*readers.RepeatableReader) + if ok { + return repeatableReader + } + + return readers.NewRepeatableReader(reader) +} + +// readerForChunk reads a chunk of bytes from reader (after handling any accounting). +// Returns a new io.Reader (chunkReader) for that chunk +// and the number of bytes that have been read from reader. +func readerForChunk(reader io.Reader, length int) (chunkReader io.Reader, bytesRead int, err error) { + // Unwrap any accounting from the input if present. + reader, wrap := accounting.UnWrap(reader) + + // Read a chunk of data. + buffer := make([]byte, length) + bytesRead, err = io.ReadFull(reader, buffer) + if err == io.EOF || err == io.ErrUnexpectedEOF { + err = nil + } + if err != nil { + return nil, bytesRead, err + } + // Truncate unused capacity. + buffer = buffer[:bytesRead] + + // Use wrap to put any accounting back for chunkReader. + return wrap(bytes.NewReader(buffer)), bytesRead, nil +} diff --git a/backend/hidrive/hidrive.go b/backend/hidrive/hidrive.go new file mode 100644 index 000000000..b0f56844a --- /dev/null +++ b/backend/hidrive/hidrive.go @@ -0,0 +1,1002 @@ +// Package hidrive provides an interface to the HiDrive object storage system. +package hidrive + +// FIXME HiDrive only supports file or folder names of 255 characters or less. +// Operations that create files oder folder with longer names will throw a HTTP error: +// - 422 Unprocessable Entity +// A more graceful way for rclone to handle this may be desirable. + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "path" + "strconv" + "time" + + "github.com/rclone/rclone/lib/encoder" + + "github.com/rclone/rclone/backend/hidrive/api" + "github.com/rclone/rclone/backend/hidrive/hidrivehash" + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/config" + "github.com/rclone/rclone/fs/config/configmap" + "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/config/obscure" + "github.com/rclone/rclone/fs/fserrors" + "github.com/rclone/rclone/fs/hash" + "github.com/rclone/rclone/lib/oauthutil" + "github.com/rclone/rclone/lib/pacer" + "github.com/rclone/rclone/lib/rest" + "golang.org/x/oauth2" +) + +const ( + rcloneClientID = "6b0258fdda630d34db68a3ce3cbf19ae" + rcloneEncryptedClientSecret = "GC7UDZ3Ra4jLcmfQSagKCDJ1JEy-mU6pBBhFrS3tDEHILrK7j3TQHUrglkO5SgZ_" + minSleep = 10 * time.Millisecond + maxSleep = 2 * time.Second + decayConstant = 2 // bigger for slower decay, exponential + defaultUploadChunkSize = 48 * fs.Mebi + defaultUploadCutoff = 2 * defaultUploadChunkSize + defaultUploadConcurrency = 4 +) + +// Globals +var ( + // Description of how to auth for this app. + oauthConfig = &oauth2.Config{ + Endpoint: oauth2.Endpoint{ + AuthURL: "https://my.hidrive.com/client/authorize", + TokenURL: "https://my.hidrive.com/oauth2/token", + }, + ClientID: rcloneClientID, + ClientSecret: obscure.MustReveal(rcloneEncryptedClientSecret), + RedirectURL: oauthutil.TitleBarRedirectURL, + } + // hidrivehashType is the hash.Type for HiDrive hashes. + hidrivehashType hash.Type +) + +// Register the backend with Fs. +func init() { + hidrivehashType = hash.RegisterHash("hidrive", "HiDriveHash", 40, hidrivehash.New) + fs.Register(&fs.RegInfo{ + Name: "hidrive", + Description: "HiDrive", + NewFs: NewFs, + Config: func(ctx context.Context, name string, m configmap.Mapper, config fs.ConfigIn) (*fs.ConfigOut, error) { + // Parse config into Options struct + opt := new(Options) + err := configstruct.Set(m, opt) + if err != nil { + return nil, fmt.Errorf("couldn't parse config into struct: %w", err) + } + + //fs.Debugf(nil, "hidrive: configuring oauth-token.") + oauthConfig.Scopes = createHiDriveScopes(opt.ScopeRole, opt.ScopeAccess) + return oauthutil.ConfigOut("", &oauthutil.Options{ + OAuth2Config: oauthConfig, + }) + }, + Options: append(oauthutil.SharedOptions, []fs.Option{{ + Name: "scope_access", + Help: "Access permissions that rclone should use when requesting access from HiDrive.", + Default: "rw", + Examples: []fs.OptionExample{{ + Value: "rw", + Help: "Read and write access to resources.", + }, { + Value: "ro", + Help: "Read-only access to resources.", + }}, + }, { + Name: "scope_role", + Help: "User-level that rclone should use when requesting access from HiDrive.", + Default: "user", + Examples: []fs.OptionExample{{ + Value: "user", + Help: `User-level access to management permissions. +This will be sufficient in most cases.`, + }, { + Value: "admin", + Help: "Extensive access to management permissions.", + }, { + Value: "owner", + Help: "Full access to management permissions.", + }}, + Advanced: true, + }, { + Name: "root_prefix", + Help: `The root/parent folder for all paths. + +Fill in to use the specified folder as the parent for all paths given to the remote. +This way rclone can use any folder as its starting point.`, + Default: "/", + Examples: []fs.OptionExample{{ + Value: "/", + Help: `The topmost directory accessible by rclone. +This will be equivalent with "root" if rclone uses a regular HiDrive user account.`, + }, { + Value: "root", + Help: `The topmost directory of the HiDrive user account`, + }, { + Value: "", + Help: `This specifies that there is no root-prefix for your paths. +When using this you will always need to specify paths to this remote with a valid parent e.g. "remote:/path/to/dir" or "remote:root/path/to/dir".`, + }}, + Advanced: true, + }, { + Name: "endpoint", + Help: `Endpoint for the service. + +This is the URL that API-calls will be made to.`, + Default: "https://api.hidrive.strato.com/2.1", + Advanced: true, + }, { + Name: "disable_fetching_member_count", + Help: `Do not fetch number of objects in directories unless it is absolutely necessary. + +Requests may be faster if the number of objects in subdirectories is not fetched.`, + Default: false, + Advanced: true, + }, { + Name: "chunk_size", + Help: fmt.Sprintf(`Chunksize for chunked uploads. + +Any files larger than the configured cutoff (or files of unknown size) will be uploaded in chunks of this size. + +The upper limit for this is %v bytes (about %v). +That is the maximum amount of bytes a single upload-operation will support. +Setting this above the upper limit or to a negative value will cause uploads to fail. + +Setting this to larger values may increase the upload speed at the cost of using more memory. +It can be set to smaller values smaller to save on memory.`, MaximumUploadBytes, fs.SizeSuffix(MaximumUploadBytes)), + Default: defaultUploadChunkSize, + Advanced: true, + }, { + Name: "upload_cutoff", + Help: fmt.Sprintf(`Cutoff/Threshold for chunked uploads. + +Any files larger than this will be uploaded in chunks of the configured chunksize. + +The upper limit for this is %v bytes (about %v). +That is the maximum amount of bytes a single upload-operation will support. +Setting this above the upper limit will cause uploads to fail.`, MaximumUploadBytes, fs.SizeSuffix(MaximumUploadBytes)), + Default: defaultUploadCutoff, + Advanced: true, + }, { + Name: "upload_concurrency", + Help: `Concurrency for chunked uploads. + +This is the upper limit for how many transfers for the same file are running concurrently. +Setting this above to a value smaller than 1 will cause uploads to deadlock. + +If you are uploading small numbers of large files over high-speed links +and these uploads do not fully utilize your bandwidth, then increasing +this may help to speed up the transfers.`, + Default: defaultUploadConcurrency, + Advanced: true, + }, { + Name: config.ConfigEncoding, + Help: config.ConfigEncodingHelp, + Advanced: true, + // HiDrive only supports file or folder names of 255 characters or less. + // Names containing "/" are not supported. + // The special names "." and ".." are not supported. + Default: (encoder.EncodeZero | + encoder.EncodeSlash | + encoder.EncodeDot), + }}...), + }) +} + +// Options defines the configuration for this backend. +type Options struct { + EndpointAPI string `config:"endpoint"` + OptionalMemberCountDisabled bool `config:"disable_fetching_member_count"` + UploadChunkSize fs.SizeSuffix `config:"chunk_size"` + UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` + UploadConcurrency int64 `config:"upload_concurrency"` + Enc encoder.MultiEncoder `config:"encoding"` + RootPrefix string `config:"root_prefix"` + ScopeAccess string `config:"scope_access"` + ScopeRole string `config:"scope_role"` +} + +// Fs represents a remote hidrive. +type Fs struct { + name string // name of this remote + root string // the path we are working on + opt Options // parsed options + features *fs.Features // optional features + srv *rest.Client // the connection to the server + pacer *fs.Pacer // pacer for API calls + // retryOnce is NOT intended as a pacer for API calls. + // The intended use case is to repeat an action that failed because + // some preconditions were not previously fulfilled. + // Code using this should then establish these preconditions + // and let the pacer retry the operation. + retryOnce *pacer.Pacer // pacer with no delays to retry certain operations once + tokenRenewer *oauthutil.Renew // renew the token on expiry +} + +// Object describes a hidrive object. +// +// Will definitely have the remote-path but may lack meta-information. +type Object struct { + fs *Fs // what this object is part of + remote string // The remote path + hasMetadata bool // whether info below has been set + size int64 // size of the object + modTime time.Time // modification time of the object + id string // ID of the object + hash string // content-hash of the object +} + +// ------------------------------------------------------------ + +// Name returns the name of the remote (as passed into NewFs). +func (f *Fs) Name() string { + return f.name +} + +// Root returns the name of the remote (as passed into NewFs). +func (f *Fs) Root() string { + return f.root +} + +// String returns a string-representation of this Fs. +func (f *Fs) String() string { + return fmt.Sprintf("HiDrive root '%s'", f.root) +} + +// Precision returns the precision of this Fs. +func (f *Fs) Precision() time.Duration { + return time.Second +} + +// Hashes returns the supported hash sets. +func (f *Fs) Hashes() hash.Set { + return hash.Set(hidrivehashType) +} + +// Features returns the optional features of this Fs. +func (f *Fs) Features() *fs.Features { + return f.features +} + +// errorHandler parses a non 2xx error response into an error. +func errorHandler(resp *http.Response) error { + // Decode error response. + errResponse := new(api.Error) + err := rest.DecodeJSON(resp, &errResponse) + if err != nil { + fs.Debugf(nil, "Couldn't decode error response: %v", err) + } + _, err = errResponse.Code.Int64() + if err != nil { + errResponse.Code = json.Number(strconv.Itoa(resp.StatusCode)) + } + return errResponse +} + +// NewFs creates a new file system from the path. +func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { + //fs.Debugf(nil, "hidrive: creating new Fs.") + // Parse config into Options struct. + opt := new(Options) + err := configstruct.Set(m, opt) + if err != nil { + return nil, err + } + + // Clean root-prefix and root-path. + // NOTE: With the default-encoding "." and ".." will be encoded, + // but with custom encodings without encoder.EncodeDot + // "." and ".." will be interpreted as paths. + if opt.RootPrefix != "" { + opt.RootPrefix = path.Clean(opt.Enc.FromStandardPath(opt.RootPrefix)) + } + root = path.Clean(opt.Enc.FromStandardPath(root)) + + client, ts, err := oauthutil.NewClient(ctx, name, m, oauthConfig) + if err != nil { + return nil, fmt.Errorf("failed to configure HiDrive: %w", err) + } + + f := &Fs{ + name: name, + root: root, + opt: *opt, + srv: rest.NewClient(client).SetRoot(opt.EndpointAPI), + pacer: fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))), + retryOnce: pacer.New(pacer.RetriesOption(2), pacer.MaxConnectionsOption(-1), pacer.CalculatorOption(&pacer.ZeroDelayCalculator{})), + } + f.features = (&fs.Features{ + CanHaveEmptyDirectories: true, + }).Fill(ctx, f) + f.srv.SetErrorHandler(errorHandler) + + if ts != nil { + transaction := func() error { + resolvedRoot := f.resolvePath("") + _, err := f.fetchMetadataForPath(ctx, resolvedRoot, api.HiDriveObjectNoMetadataFields) + return err + } + f.tokenRenewer = oauthutil.NewRenew(f.String(), ts, transaction) + } + + // Do not allow the root-prefix to be non-existent nor a directory, + // but it can be empty. + if f.opt.RootPrefix != "" { + item, err := f.fetchMetadataForPath(ctx, f.opt.RootPrefix, api.HiDriveObjectNoMetadataFields) + if err != nil { + return nil, fmt.Errorf("could not access root-prefix: %w", err) + } + if item.Type != api.HiDriveObjectTypeDirectory { + return nil, errors.New("The root-prefix needs to point to a valid directory or be empty") + } + } + + resolvedRoot := f.resolvePath("") + item, err := f.fetchMetadataForPath(ctx, resolvedRoot, api.HiDriveObjectNoMetadataFields) + if err != nil { + if isHTTPError(err, 404) { + // NOTE: NewFs needs to work with paths that do not exist, + // in case they will be created later (see mkdir). + return f, nil + } + return nil, fmt.Errorf("could not access root-path: %w", err) + } + if item.Type != api.HiDriveObjectTypeDirectory { + fs.Debugf(f, "The root is not a directory. Setting its parent-directory as the new root.") + // NOTE: There is no need to check + // if the parent-directory is inside the root-prefix: + // If the parent-directory was outside, + // then the resolved path would be the root-prefix, + // therefore the root-prefix would point to a file, + // which has already been checked for. + // In case the root-prefix is empty, this needs not be checked, + // because top-level files cannot exist. + f.root = path.Dir(f.root) + return f, fs.ErrorIsFile + } + + return f, nil +} + +// newObject constructs an Object by calling the given function metaFiller +// on an Object with no metadata. +// +// metaFiller should set the metadata of the object or +// return an appropriate error. +func (f *Fs) newObject(remote string, metaFiller func(*Object) error) (fs.Object, error) { + o := &Object{ + fs: f, + remote: remote, + } + var err error + if metaFiller != nil { + err = metaFiller(o) + } + if err != nil { + return nil, err + } + return o, nil +} + +// newObjectFromHiDriveObject constructs an Object from the given api.HiDriveObject. +func (f *Fs) newObjectFromHiDriveObject(remote string, info *api.HiDriveObject) (fs.Object, error) { + metaFiller := func(o *Object) error { + return o.setMetadata(info) + } + return f.newObject(remote, metaFiller) +} + +// NewObject finds the Object at remote. +// +// If remote points to a directory then it returns fs.ErrorIsDir. +// If it can not be found it returns the error fs.ErrorObjectNotFound. +func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { + //fs.Debugf(f, "executing NewObject(%s).", remote) + metaFiller := func(o *Object) error { + return o.readMetadata(ctx) + } + return f.newObject(remote, metaFiller) +} + +// List the objects and directories in dir into entries. +// The entries can be returned in any order, +// but should be for a complete directory. +// +// dir should be "" to list the root, and should not have trailing slashes. +// +// This returns fs.ErrorDirNotFound if the directory is not found. +func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { + //fs.Debugf(f, "executing List(%s).", dir) + var iErr error + addEntry := func(info *api.HiDriveObject) bool { + fs.Debugf(f, "found directory-element with name %s", info.Name) + remote := path.Join(dir, info.Name) + if info.Type == api.HiDriveObjectTypeDirectory { + d := fs.NewDir(remote, info.ModTime()) + d.SetID(info.ID) + d.SetSize(info.Size) + d.SetItems(info.MemberCount) + entries = append(entries, d) + } else if info.Type == api.HiDriveObjectTypeFile { + o, err := f.newObjectFromHiDriveObject(remote, info) + if err != nil { + iErr = err + return true + } + entries = append(entries, o) + } + return false + } + + var fields []string + if f.opt.OptionalMemberCountDisabled { + fields = api.HiDriveObjectWithMetadataFields + } else { + fields = api.HiDriveObjectWithDirectoryMetadataFields + } + resolvedDir := f.resolvePath(dir) + _, err = f.iterateOverDirectory(ctx, resolvedDir, AllMembers, addEntry, fields, Unsorted) + + if err != nil { + if isHTTPError(err, 404) { + return nil, fs.ErrorDirNotFound + } + return nil, err + } + if iErr != nil { + return nil, iErr + } + return entries, nil +} + +// Put the contents of the io.Reader into the remote path +// with the modTime given of the given size. +// The existing or new object is returned. +// +// A new object may have been created or +// an existing one accessed even if an error is returned, +// in which case both the object and the error will be returned. +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + remote := src.Remote() + //fs.Debugf(f, "executing Put(%s, %v).", remote, options) + + existingObj, err := f.NewObject(ctx, remote) + switch err { + case nil: + return existingObj, existingObj.Update(ctx, in, src, options...) + case fs.ErrorObjectNotFound: + // Object was not found, so create a new one. + return f.PutUnchecked(ctx, in, src, options...) + } + return nil, err +} + +// PutStream uploads the contents of the io.Reader to the remote path +// with the modTime given of indeterminate size. +// The existing or new object is returned. +// +// A new object may have been created or +// an existing one accessed even if an error is returned, +// in which case both the object and the error will be returned. +func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + //fs.Debugf(f, "executing PutStream(%s, %v).", src.Remote(), options) + + return f.Put(ctx, in, src, options...) +} + +// PutUnchecked the contents of the io.Reader into the remote path +// with the modTime given of the given size. +// This guarantees that existing objects will not be overwritten. +// The new object is returned. +// +// This will produce an error if an object already exists at that path. +// +// In case the upload fails and an object has been created, +// this will try to delete the object at that path. +// In case the failed upload could not be deleted, +// both the object and the (upload-)error will be returned. +func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + remote := src.Remote() + modTime := src.ModTime(ctx) + //fs.Debugf(f, "executing PutUnchecked(%s, %v).", remote, options) + resolvedPath := f.resolvePath(remote) + + // NOTE: The file creation operation is a single atomic operation. + // Thus uploading as much content as is reasonable + // (i.e. everything up to the cutoff) in the first request, + // avoids files being created on upload failure for small files. + // (As opposed to creating an empty file and then uploading the content.) + tmpReader, bytesRead, err := readerForChunk(in, int(f.opt.UploadCutoff)) + cutoffReader := cachedReader(tmpReader) + if err != nil { + return nil, err + } + + var info *api.HiDriveObject + err = f.retryOnce.Call(func() (bool, error) { + var createErr error + // Reset the reading index (in case this is a retry). + if _, createErr = cutoffReader.Seek(0, io.SeekStart); createErr != nil { + return false, createErr + } + info, createErr = f.createFile(ctx, resolvedPath, cutoffReader, modTime, IgnoreOnExist) + + if createErr == fs.ErrorDirNotFound { + // Create the parent-directory for the object and repeat request. + _, parentErr := f.createDirectories(ctx, path.Dir(resolvedPath), IgnoreOnExist) + if parentErr != nil && parentErr != fs.ErrorDirExists { + fs.Errorf(f, "Tried to create parent-directory for '%s', but failed.", resolvedPath) + return false, parentErr + } + return true, createErr + } + return false, createErr + }) + + if err != nil { + return nil, err + } + + o, err := f.newObjectFromHiDriveObject(remote, info) + + if err != nil { + return nil, err + } + + if fs.SizeSuffix(bytesRead) < f.opt.UploadCutoff { + return o, nil + } + // If there is more left to write, o.Update needs to skip ahead. + // Use a fs.SeekOption with the current offset to do this. + options = append(options, &fs.SeekOption{Offset: int64(bytesRead)}) + err = o.Update(ctx, in, src, options...) + + if err == nil { + return o, nil + } + + // Try to remove object at path after the its content could not be uploaded. + deleteErr := f.pacer.Call(func() (bool, error) { + deleteErr := o.Remove(ctx) + return deleteErr == fs.ErrorObjectNotFound, deleteErr + }) + + if deleteErr == nil { + return nil, err + } + + fs.Errorf(f, "Tried to delete failed upload at path '%s', but failed: %v", resolvedPath, deleteErr) + return o, err +} + +// Mkdir creates the directory if it does not exist. +// +// This will create any missing parent directories. +// +// NOTE: If an error occurs while the parent directories are being created, +// any directories already created will NOT be deleted again. +func (f *Fs) Mkdir(ctx context.Context, dir string) error { + //fs.Debugf(f, "executing Mkdir(%s).", dir) + resolvedDir := f.resolvePath(dir) + _, err := f.createDirectories(ctx, resolvedDir, IgnoreOnExist) + + if err == fs.ErrorDirExists { + // NOTE: The conflict is caused by the directory already existing, + // which should be ignored here. + return nil + } + + return err +} + +// Rmdir removes the directory if empty. +// +// This returns fs.ErrorDirNotFound if the directory is not found. +// This returns fs.ErrorDirectoryNotEmpty if the directory is not empty. +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + //fs.Debugf(f, "executing Rmdir(%s).", dir) + resolvedDir := f.resolvePath(dir) + return f.deleteDirectory(ctx, resolvedDir, false) +} + +// Purge removes the directory and all of its contents. +// +// This returns fs.ErrorDirectoryNotEmpty if the directory is not empty. +func (f *Fs) Purge(ctx context.Context, dir string) error { + //fs.Debugf(f, "executing Purge(%s).", dir) + resolvedDir := f.resolvePath(dir) + return f.deleteDirectory(ctx, resolvedDir, true) +} + +// shouldRetryAndCreateParents returns a boolean as to whether the operation +// should be retried after the parent-directories of the destination have been created. +// If so, it will create the parent-directories. +// +// If any errors arrise while finding the source or +// creating the parent-directory those will be returned. +// Otherwise returns the originalError. +func (f *Fs) shouldRetryAndCreateParents(ctx context.Context, destinationPath string, sourcePath string, originalError error) (bool, error) { + if fserrors.ContextError(ctx, &originalError) { + return false, originalError + } + if isHTTPError(originalError, 404) { + // Check if source is missing. + _, srcErr := f.fetchMetadataForPath(ctx, sourcePath, api.HiDriveObjectNoMetadataFields) + if srcErr != nil { + return false, srcErr + } + // Source exists, so the parent of the destination must have been missing. + // Create the parent-directory and repeat request. + _, parentErr := f.createDirectories(ctx, path.Dir(destinationPath), IgnoreOnExist) + if parentErr != nil && parentErr != fs.ErrorDirExists { + fs.Errorf(f, "Tried to create parent-directory for '%s', but failed.", destinationPath) + return false, parentErr + } + return true, originalError + } + return false, originalError +} + +// Copy src to this remote using server-side copy operations. +// +// It returns the destination Object and a possible error. +// +// This returns fs.ErrorCantCopy if the operation cannot be performed. +// +// NOTE: If an error occurs when copying the Object, +// any parent-directories already created will NOT be deleted again. +// +// NOTE: This operation will expand sparse areas in the content of the source-Object +// to blocks of 0-bytes in the destination-Object. +func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + srcObj, ok := src.(*Object) + if !ok { + fs.Debugf(src, "Can't copy - not same remote type") + return nil, fs.ErrorCantCopy + } + // Get the absolute path to the source. + srcPath := srcObj.fs.resolvePath(srcObj.Remote()) + //fs.Debugf(f, "executing Copy(%s, %s).", srcPath, remote) + dstPath := f.resolvePath(remote) + + var info *api.HiDriveObject + err := f.retryOnce.Call(func() (bool, error) { + var copyErr error + info, copyErr = f.copyFile(ctx, srcPath, dstPath, OverwriteOnExist) + return f.shouldRetryAndCreateParents(ctx, dstPath, srcPath, copyErr) + }) + + if err != nil { + return nil, err + } + dstObj, err := f.newObjectFromHiDriveObject(remote, info) + if err != nil { + return nil, err + } + return dstObj, nil +} + +// Move src to this remote using server-side move operations. +// +// It returns the destination Object and a possible error. +// +// This returns fs.ErrorCantMove if the operation cannot be performed. +// +// NOTE: If an error occurs when moving the Object, +// any parent-directories already created will NOT be deleted again. +// +// NOTE: This operation will expand sparse areas in the content of the source-Object +// to blocks of 0-bytes in the destination-Object. +func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + srcObj, ok := src.(*Object) + if !ok { + fs.Debugf(src, "Can't move - not same remote type") + return nil, fs.ErrorCantMove + } + // Get the absolute path to the source. + srcPath := srcObj.fs.resolvePath(srcObj.Remote()) + //fs.Debugf(f, "executing Move(%s, %s).", srcPath, remote) + dstPath := f.resolvePath(remote) + + var info *api.HiDriveObject + err := f.retryOnce.Call(func() (bool, error) { + var moveErr error + info, moveErr = f.moveFile(ctx, srcPath, dstPath, OverwriteOnExist) + return f.shouldRetryAndCreateParents(ctx, dstPath, srcPath, moveErr) + }) + + if err != nil { + return nil, err + } + dstObj, err := f.newObjectFromHiDriveObject(remote, info) + if err != nil { + return nil, err + } + return dstObj, nil + +} + +// DirMove moves from src at srcRemote to this remote at dstRemote +// using server-side move operations. +// +// This returns fs.ErrorCantCopy if the operation cannot be performed. +// This returns fs.ErrorDirExists if the destination already exists. +// +// NOTE: If an error occurs when moving the directory, +// any parent-directories already created will NOT be deleted again. +func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error { + srcFs, ok := src.(*Fs) + if !ok { + fs.Debugf(srcFs, "Can't move directory - not same remote type") + return fs.ErrorCantDirMove + } + + // Get the absolute path to the source. + srcPath := srcFs.resolvePath(srcRemote) + //fs.Debugf(f, "executing DirMove(%s, %s).", srcPath, dstRemote) + dstPath := f.resolvePath(dstRemote) + + err := f.retryOnce.Call(func() (bool, error) { + var moveErr error + _, moveErr = f.moveDirectory(ctx, srcPath, dstPath, IgnoreOnExist) + return f.shouldRetryAndCreateParents(ctx, dstPath, srcPath, moveErr) + }) + + if err != nil { + if isHTTPError(err, 409) { + return fs.ErrorDirExists + } + return err + } + return nil +} + +// ------------------------------------------------------------ + +// Fs returns the parent Fs. +func (o *Object) Fs() fs.Info { + return o.fs +} + +// String returns a string-representation of this Object. +func (o *Object) String() string { + if o == nil { + return "" + } + return o.remote +} + +// Remote returns the remote path. +func (o *Object) Remote() string { + return o.remote +} + +// ID returns the ID of the Object if known, or "" if not. +func (o *Object) ID() string { + err := o.readMetadata(context.TODO()) + if err != nil { + fs.Logf(o, "Failed to read metadata: %v", err) + return "" + } + return o.id +} + +// Hash returns the selected checksum of the file. +// If no checksum is available it returns "". +func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { + err := o.readMetadata(ctx) + if err != nil { + return "", fmt.Errorf("failed to read hash from metadata: %w", err) + } + switch t { + case hidrivehashType: + return o.hash, nil + default: + return "", hash.ErrUnsupported + } +} + +// Size returns the size of an object in bytes. +func (o *Object) Size() int64 { + err := o.readMetadata(context.TODO()) + if err != nil { + fs.Logf(o, "Failed to read metadata: %v", err) + return -1 + } + return o.size +} + +// setMetadata sets the metadata from info. +func (o *Object) setMetadata(info *api.HiDriveObject) error { + if info.Type == api.HiDriveObjectTypeDirectory { + return fs.ErrorIsDir + } + if info.Type != api.HiDriveObjectTypeFile { + return fmt.Errorf("%q is %q: %w", o.remote, info.Type, fs.ErrorNotAFile) + } + o.hasMetadata = true + o.size = info.Size + o.modTime = info.ModTime() + o.id = info.ID + o.hash = info.ContentHash + return nil +} + +// readMetadata fetches the metadata if it has not already been fetched. +func (o *Object) readMetadata(ctx context.Context) error { + if o.hasMetadata { + return nil + } + resolvedPath := o.fs.resolvePath(o.remote) + info, err := o.fs.fetchMetadataForPath(ctx, resolvedPath, api.HiDriveObjectWithMetadataFields) + if err != nil { + if isHTTPError(err, 404) { + return fs.ErrorObjectNotFound + } + return err + } + return o.setMetadata(info) +} + +// ModTime returns the modification time of the object. +func (o *Object) ModTime(ctx context.Context) time.Time { + err := o.readMetadata(ctx) + if err != nil { + fs.Logf(o, "Failed to read metadata: %v", err) + return time.Now() + } + return o.modTime +} + +// SetModTime sets the metadata on the object to set the modification date. +func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { + parameters := api.NewQueryParameters() + resolvedPath := o.fs.resolvePath(o.remote) + parameters.SetPath(resolvedPath) + err := parameters.SetTime("mtime", modTime) + + if err != nil { + return err + } + + opts := rest.Opts{ + Method: "PATCH", + Path: "/meta", + Parameters: parameters.Values, + NoResponse: true, + } + + var resp *http.Response + err = o.fs.pacer.Call(func() (bool, error) { + resp, err = o.fs.srv.Call(ctx, &opts) + return o.fs.shouldRetry(ctx, resp, err) + }) + if err != nil { + return err + } + o.modTime = modTime + return nil +} + +// Storable says whether this object can be stored. +func (o *Object) Storable() bool { + return true +} + +// Open an object for reading. +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (io.ReadCloser, error) { + parameters := api.NewQueryParameters() + resolvedPath := o.fs.resolvePath(o.remote) + parameters.SetPath(resolvedPath) + + fs.FixRangeOption(options, o.Size()) + opts := rest.Opts{ + Method: "GET", + Path: "/file", + Parameters: parameters.Values, + Options: options, + } + var resp *http.Response + var err error + err = o.fs.pacer.Call(func() (bool, error) { + resp, err = o.fs.srv.Call(ctx, &opts) + return o.fs.shouldRetry(ctx, resp, err) + }) + if err != nil { + return nil, err + } + return resp.Body, err +} + +// Update the existing object +// with the contents of the io.Reader, modTime and size. +// +// For unknown-sized contents (indicated by src.Size() == -1) +// this will try to properly upload it in multiple chunks. +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { + //fs.Debugf(o.fs, "executing Update(%s, %v).", o.remote, options) + modTime := src.ModTime(ctx) + resolvedPath := o.fs.resolvePath(o.remote) + + if o.fs.tokenRenewer != nil { + o.fs.tokenRenewer.Start() + defer o.fs.tokenRenewer.Stop() + } + + // PutUnchecked can pass a valid SeekOption to skip ahead. + var offset uint64 + for _, option := range options { + if seekoption, ok := option.(*fs.SeekOption); ok { + offset = uint64(seekoption.Offset) + break + } + } + + var info *api.HiDriveObject + var err, metaErr error + if offset > 0 || src.Size() == -1 || src.Size() >= int64(o.fs.opt.UploadCutoff) { + fs.Debugf(o.fs, "Uploading with chunks of size %v and %v transfers in parallel at path '%s'.", int(o.fs.opt.UploadChunkSize), o.fs.opt.UploadConcurrency, resolvedPath) + // NOTE: o.fs.opt.UploadChunkSize should always + // be between 0 and MaximumUploadBytes, + // so the conversion to an int does not cause problems for valid inputs. + if offset > 0 { + // NOTE: The offset is only set + // when the file was newly created, + // therefore the file does not need truncating. + _, err = o.fs.updateFileChunked(ctx, resolvedPath, in, offset, int(o.fs.opt.UploadChunkSize), o.fs.opt.UploadConcurrency) + if err == nil { + err = o.SetModTime(ctx, modTime) + } + } else { + _, _, err = o.fs.uploadFileChunked(ctx, resolvedPath, in, modTime, int(o.fs.opt.UploadChunkSize), o.fs.opt.UploadConcurrency) + } + // Try to check if object was updated, eitherway. + // Metadata should be updated even if the upload fails. + info, metaErr = o.fs.fetchMetadataForPath(ctx, resolvedPath, api.HiDriveObjectWithMetadataFields) + } else { + info, err = o.fs.overwriteFile(ctx, resolvedPath, cachedReader(in), modTime) + metaErr = err + } + + // Update metadata of this object, + // if there was no error with getting the metadata. + if metaErr == nil { + metaErr = o.setMetadata(info) + } + + // Errors with the upload-process are more relevant, return those first. + if err != nil { + return err + } + return metaErr +} + +// Remove an object. +func (o *Object) Remove(ctx context.Context) error { + resolvedPath := o.fs.resolvePath(o.remote) + return o.fs.deleteObject(ctx, resolvedPath) +} + +// Check the interfaces are satisfied. +var ( + _ fs.Fs = (*Fs)(nil) + _ fs.Purger = (*Fs)(nil) + _ fs.PutStreamer = (*Fs)(nil) + _ fs.PutUncheckeder = (*Fs)(nil) + _ fs.Copier = (*Fs)(nil) + _ fs.Mover = (*Fs)(nil) + _ fs.DirMover = (*Fs)(nil) + _ fs.Object = (*Object)(nil) + _ fs.IDer = (*Object)(nil) +) diff --git a/backend/hidrive/hidrive_test.go b/backend/hidrive/hidrive_test.go new file mode 100644 index 000000000..30e1ef2f1 --- /dev/null +++ b/backend/hidrive/hidrive_test.go @@ -0,0 +1,45 @@ +// Test HiDrive filesystem interface +package hidrive + +import ( + "testing" + + "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fstest/fstests" +) + +// TestIntegration runs integration tests against the remote. +func TestIntegration(t *testing.T) { + name := "TestHiDrive" + fstests.Run(t, &fstests.Opt{ + RemoteName: name + ":", + NilObject: (*Object)(nil), + ChunkedUpload: fstests.ChunkedUploadConfig{ + MinChunkSize: 1, + MaxChunkSize: MaximumUploadBytes, + CeilChunkSize: nil, + NeedMultipleChunks: false, + }, + }) +} + +// Change the configured UploadChunkSize. +// Will only be called while no transfer is in progress. +func (f *Fs) SetUploadChunkSize(chunksize fs.SizeSuffix) (fs.SizeSuffix, error) { + var old fs.SizeSuffix + old, f.opt.UploadChunkSize = f.opt.UploadChunkSize, chunksize + return old, nil +} + +// Change the configured UploadCutoff. +// Will only be called while no transfer is in progress. +func (f *Fs) SetUploadCutoff(cutoff fs.SizeSuffix) (fs.SizeSuffix, error) { + var old fs.SizeSuffix + old, f.opt.UploadCutoff = f.opt.UploadCutoff, cutoff + return old, nil +} + +var ( + _ fstests.SetUploadChunkSizer = (*Fs)(nil) + _ fstests.SetUploadCutoffer = (*Fs)(nil) +) diff --git a/backend/hidrive/hidrivehash/hidrivehash.go b/backend/hidrive/hidrivehash/hidrivehash.go new file mode 100644 index 000000000..092663d42 --- /dev/null +++ b/backend/hidrive/hidrivehash/hidrivehash.go @@ -0,0 +1,410 @@ +// Package hidrivehash implements the HiDrive hashing algorithm which combines SHA-1 hashes hierarchically to a single top-level hash. +// +// Note: This implementation does not grant access to any partial hashes generated. +// +// See: https://developer.hidrive.com/wp-content/uploads/2021/07/HiDrive_Synchronization-v3.3-rev28.pdf +// (link to newest version: https://static.hidrive.com/dev/0001) +package hidrivehash + +import ( + "bytes" + "crypto/sha1" + "encoding" + "encoding/binary" + "errors" + "fmt" + "hash" + "io" + + "github.com/rclone/rclone/backend/hidrive/hidrivehash/internal" +) + +const ( + // BlockSize of the checksum in bytes. + BlockSize = 4096 + // Size of the checksum in bytes. + Size = sha1.Size + // sumsPerLevel is the number of checksums + sumsPerLevel = 256 +) + +var ( + // zeroSum is a special hash consisting of 20 null-bytes. + // This will be the hash of any empty file (or ones containing only null-bytes). + zeroSum = [Size]byte{} + // ErrorInvalidEncoding is returned when a hash should be decoded from a binary form that is invalid. + ErrorInvalidEncoding = errors.New("encoded binary form is invalid for this hash") + // ErrorHashFull is returned when a hash reached its capacity and cannot accept any more input. + ErrorHashFull = errors.New("hash reached its capacity") +) + +// writeByBlock writes len(p) bytes from p to the io.Writer in blocks of size blockSize. +// It returns the number of bytes written from p (0 <= n <= len(p)) +// and any error encountered that caused the write to stop early. +// +// A pointer bytesInBlock to a counter needs to be supplied, +// that is used to keep track how many bytes have been written to the writer already. +// A pointer onlyNullBytesInBlock to a boolean needs to be supplied, +// that is used to keep track whether the block so far only consists of null-bytes. +// The callback onBlockWritten is called whenever a full block has been written to the writer +// and is given as input the number of bytes that still need to be written. +func writeByBlock(p []byte, writer io.Writer, blockSize uint32, bytesInBlock *uint32, onlyNullBytesInBlock *bool, onBlockWritten func(remaining int) error) (n int, err error) { + total := len(p) + nullBytes := make([]byte, blockSize) + for len(p) > 0 { + toWrite := int(blockSize - *bytesInBlock) + if toWrite > len(p) { + toWrite = len(p) + } + c, err := writer.Write(p[:toWrite]) + *bytesInBlock += uint32(c) + *onlyNullBytesInBlock = *onlyNullBytesInBlock && bytes.Equal(nullBytes[:toWrite], p[:toWrite]) + // Discard data written through a reslice + p = p[c:] + if err != nil { + return total - len(p), err + } + if *bytesInBlock == blockSize { + err = onBlockWritten(len(p)) + if err != nil { + return total - len(p), err + } + *bytesInBlock = 0 + *onlyNullBytesInBlock = true + } + } + return total, nil +} + +// level is a hash.Hash that is used to aggregate the checksums produced by the level hierarchically beneath it. +// It is used to represent any level-n hash, except for level-0. +type level struct { + checksum [Size]byte // aggregated checksum of this level + sumCount uint32 // number of sums contained in this level so far + bytesInHasher uint32 // number of bytes written into hasher so far + onlyNullBytesInHasher bool // whether the hasher only contains null-bytes so far + hasher hash.Hash +} + +// NewLevel returns a new hash.Hash computing any level-n hash, except level-0. +func NewLevel() hash.Hash { + l := &level{} + l.Reset() + return l +} + +// Add takes a position-embedded SHA-1 checksum and adds it to the level. +func (l *level) Add(sha1sum []byte) { + var tmp uint + var carry bool + for i := Size - 1; i >= 0; i-- { + tmp = uint(sha1sum[i]) + uint(l.checksum[i]) + if carry { + tmp++ + } + carry = tmp > 255 + l.checksum[i] = byte(tmp) + } +} + +// IsFull returns whether the number of checksums added to this level reached its capacity. +func (l *level) IsFull() bool { + return l.sumCount >= sumsPerLevel +} + +// Write (via the embedded io.Writer interface) adds more data to the running hash. +// Contrary to the specification from hash.Hash, this DOES return an error, +// specifically ErrorHashFull if and only if IsFull() returns true. +func (l *level) Write(p []byte) (n int, err error) { + if l.IsFull() { + return 0, ErrorHashFull + } + onBlockWritten := func(remaining int) error { + if !l.onlyNullBytesInHasher { + c, err := l.hasher.Write([]byte{byte(l.sumCount)}) + l.bytesInHasher += uint32(c) + if err != nil { + return err + } + l.Add(l.hasher.Sum(nil)) + } + l.sumCount++ + l.hasher.Reset() + if remaining > 0 && l.IsFull() { + return ErrorHashFull + } + return nil + } + return writeByBlock(p, l.hasher, uint32(l.BlockSize()), &l.bytesInHasher, &l.onlyNullBytesInHasher, onBlockWritten) +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (l *level) Sum(b []byte) []byte { + return append(b, l.checksum[:]...) +} + +// Reset resets the Hash to its initial state. +func (l *level) Reset() { + l.checksum = zeroSum // clear the current checksum + l.sumCount = 0 + l.bytesInHasher = 0 + l.onlyNullBytesInHasher = true + l.hasher = sha1.New() +} + +// Size returns the number of bytes Sum will return. +func (l *level) Size() int { + return Size +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (l *level) BlockSize() int { + return Size +} + +// MarshalBinary encodes the hash into a binary form and returns the result. +func (l *level) MarshalBinary() ([]byte, error) { + b := make([]byte, Size+4+4+1) + copy(b, l.checksum[:]) + binary.BigEndian.PutUint32(b[Size:], l.sumCount) + binary.BigEndian.PutUint32(b[Size+4:], l.bytesInHasher) + if l.onlyNullBytesInHasher { + b[Size+4+4] = 1 + } + encodedHasher, err := l.hasher.(encoding.BinaryMarshaler).MarshalBinary() + if err != nil { + return nil, err + } + b = append(b, encodedHasher...) + return b, nil +} + +// UnmarshalBinary decodes the binary form generated by MarshalBinary. +// The hash will replace its internal state accordingly. +func (l *level) UnmarshalBinary(b []byte) error { + if len(b) < Size+4+4+1 { + return ErrorInvalidEncoding + } + copy(l.checksum[:], b) + l.sumCount = binary.BigEndian.Uint32(b[Size:]) + l.bytesInHasher = binary.BigEndian.Uint32(b[Size+4:]) + switch b[Size+4+4] { + case 0: + l.onlyNullBytesInHasher = false + case 1: + l.onlyNullBytesInHasher = true + default: + return ErrorInvalidEncoding + } + err := l.hasher.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[Size+4+4+1:]) + return err +} + +// hidriveHash is the hash computing the actual checksum used by HiDrive by combining multiple level-hashes. +type hidriveHash struct { + levels []*level // collection of level-hashes, one for each level starting at level-1 + lastSumWritten [Size]byte // the last checksum written to any of the levels + bytesInBlock uint32 // bytes written into blockHash so far + onlyNullBytesInBlock bool // whether the hasher only contains null-bytes so far + blockHash hash.Hash +} + +// New returns a new hash.Hash computing the HiDrive checksum. +func New() hash.Hash { + h := &hidriveHash{} + h.Reset() + return h +} + +// aggregateToLevel writes the checksum to the level at the given index +// and if necessary propagates any changes to levels above. +func (h *hidriveHash) aggregateToLevel(index int, sum []byte) { + for i := index; ; i++ { + if i >= len(h.levels) { + h.levels = append(h.levels, NewLevel().(*level)) + } + _, err := h.levels[i].Write(sum) + copy(h.lastSumWritten[:], sum) + if err != nil { + panic(fmt.Errorf("level-hash should not have produced an error: %w", err)) + } + if !h.levels[i].IsFull() { + break + } + sum = h.levels[i].Sum(nil) + h.levels[i].Reset() + } +} + +// Write (via the embedded io.Writer interface) adds more data to the running hash. +// It never returns an error. +func (h *hidriveHash) Write(p []byte) (n int, err error) { + onBlockWritten := func(remaining int) error { + var sum []byte + if h.onlyNullBytesInBlock { + sum = zeroSum[:] + } else { + sum = h.blockHash.Sum(nil) + } + h.blockHash.Reset() + h.aggregateToLevel(0, sum) + return nil + } + return writeByBlock(p, h.blockHash, uint32(BlockSize), &h.bytesInBlock, &h.onlyNullBytesInBlock, onBlockWritten) +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (h *hidriveHash) Sum(b []byte) []byte { + // Save internal state. + state, err := h.MarshalBinary() + if err != nil { + panic(fmt.Errorf("saving the internal state should not have produced an error: %w", err)) + } + + if h.bytesInBlock > 0 { + // Fill remainder of block with null-bytes. + filler := make([]byte, h.BlockSize()-int(h.bytesInBlock)) + _, err = h.Write(filler) + if err != nil { + panic(fmt.Errorf("filling with null-bytes should not have an error: %w", err)) + } + } + + checksum := zeroSum + for i := 0; i < len(h.levels); i++ { + level := h.levels[i] + if i < len(h.levels)-1 { + // Aggregate non-empty non-final levels. + if level.sumCount >= 1 { + h.aggregateToLevel(i+1, level.Sum(nil)) + level.Reset() + } + } else { + // Determine sum of final level. + if level.sumCount > 1 { + copy(checksum[:], level.Sum(nil)) + } else { + // This is needed, otherwise there is no way to return + // the non-position-embedded checksum. + checksum = h.lastSumWritten + } + } + } + + // Restore internal state. + err = h.UnmarshalBinary(state) + if err != nil { + panic(fmt.Errorf("restoring the internal state should not have produced an error: %w", err)) + } + + return append(b, checksum[:]...) +} + +// Reset resets the Hash to its initial state. +func (h *hidriveHash) Reset() { + h.levels = nil + h.lastSumWritten = zeroSum // clear the last written checksum + h.bytesInBlock = 0 + h.onlyNullBytesInBlock = true + h.blockHash = sha1.New() +} + +// Size returns the number of bytes Sum will return. +func (h *hidriveHash) Size() int { + return Size +} + +// BlockSize returns the hash's underlying block size. +// The Write method must be able to accept any amount +// of data, but it may operate more efficiently if all writes +// are a multiple of the block size. +func (h *hidriveHash) BlockSize() int { + return BlockSize +} + +// MarshalBinary encodes the hash into a binary form and returns the result. +func (h *hidriveHash) MarshalBinary() ([]byte, error) { + b := make([]byte, Size+4+1+8) + copy(b, h.lastSumWritten[:]) + binary.BigEndian.PutUint32(b[Size:], h.bytesInBlock) + if h.onlyNullBytesInBlock { + b[Size+4] = 1 + } + + binary.BigEndian.PutUint64(b[Size+4+1:], uint64(len(h.levels))) + for _, level := range h.levels { + encodedLevel, err := level.MarshalBinary() + if err != nil { + return nil, err + } + encodedLength := make([]byte, 8) + binary.BigEndian.PutUint64(encodedLength, uint64(len(encodedLevel))) + b = append(b, encodedLength...) + b = append(b, encodedLevel...) + } + encodedBlockHash, err := h.blockHash.(encoding.BinaryMarshaler).MarshalBinary() + if err != nil { + return nil, err + } + b = append(b, encodedBlockHash...) + return b, nil +} + +// UnmarshalBinary decodes the binary form generated by MarshalBinary. +// The hash will replace its internal state accordingly. +func (h *hidriveHash) UnmarshalBinary(b []byte) error { + if len(b) < Size+4+1+8 { + return ErrorInvalidEncoding + } + copy(h.lastSumWritten[:], b) + h.bytesInBlock = binary.BigEndian.Uint32(b[Size:]) + switch b[Size+4] { + case 0: + h.onlyNullBytesInBlock = false + case 1: + h.onlyNullBytesInBlock = true + default: + return ErrorInvalidEncoding + } + + amount := binary.BigEndian.Uint64(b[Size+4+1:]) + h.levels = make([]*level, int(amount)) + offset := Size + 4 + 1 + 8 + for i := range h.levels { + length := int(binary.BigEndian.Uint64(b[offset:])) + offset += 8 + h.levels[i] = NewLevel().(*level) + err := h.levels[i].UnmarshalBinary(b[offset : offset+length]) + if err != nil { + return err + } + offset += length + } + err := h.blockHash.(encoding.BinaryUnmarshaler).UnmarshalBinary(b[offset:]) + return err +} + +// Sum returns the HiDrive checksum of the data. +func Sum(data []byte) [Size]byte { + h := New().(*hidriveHash) + _, _ = h.Write(data) + var result [Size]byte + copy(result[:], h.Sum(nil)) + return result +} + +// Check the interfaces are satisfied. +var ( + _ hash.Hash = (*level)(nil) + _ encoding.BinaryMarshaler = (*level)(nil) + _ encoding.BinaryUnmarshaler = (*level)(nil) + _ internal.LevelHash = (*level)(nil) + _ hash.Hash = (*hidriveHash)(nil) + _ encoding.BinaryMarshaler = (*hidriveHash)(nil) + _ encoding.BinaryUnmarshaler = (*hidriveHash)(nil) +) diff --git a/backend/hidrive/hidrivehash/hidrivehash_test.go b/backend/hidrive/hidrivehash/hidrivehash_test.go new file mode 100644 index 000000000..d27970c34 --- /dev/null +++ b/backend/hidrive/hidrivehash/hidrivehash_test.go @@ -0,0 +1,395 @@ +package hidrivehash_test + +import ( + "crypto/sha1" + "encoding" + "encoding/hex" + "fmt" + "io" + "testing" + + "github.com/rclone/rclone/backend/hidrive/hidrivehash" + "github.com/rclone/rclone/backend/hidrive/hidrivehash/internal" + "github.com/stretchr/testify/assert" +) + +// helper functions to set up test-tables + +func sha1ArrayAsSlice(sum [sha1.Size]byte) []byte { + return sum[:] +} + +func mustDecode(hexstring string) []byte { + result, err := hex.DecodeString(hexstring) + if err != nil { + panic(err) + } + return result +} + +// ------------------------------------------------------------ + +var testTableLevelPositionEmbedded = []struct { + ins [][]byte + outs [][]byte + name string +}{ + { + [][]byte{ + sha1ArrayAsSlice([20]byte{245, 202, 195, 223, 121, 198, 189, 112, 138, 202, 222, 2, 146, 156, 127, 16, 208, 233, 98, 88}), + sha1ArrayAsSlice([20]byte{78, 188, 156, 219, 173, 54, 81, 55, 47, 220, 222, 207, 201, 21, 57, 252, 255, 239, 251, 186}), + }, + [][]byte{ + sha1ArrayAsSlice([20]byte{245, 202, 195, 223, 121, 198, 189, 112, 138, 202, 222, 2, 146, 156, 127, 16, 208, 233, 98, 88}), + sha1ArrayAsSlice([20]byte{68, 135, 96, 187, 38, 253, 14, 167, 186, 167, 188, 210, 91, 177, 185, 13, 208, 217, 94, 18}), + }, + "documentation-v3.2rev27-example L0 (position-embedded)", + }, + { + [][]byte{ + sha1ArrayAsSlice([20]byte{68, 254, 92, 166, 52, 37, 104, 180, 22, 123, 249, 144, 182, 78, 64, 74, 57, 117, 225, 195}), + sha1ArrayAsSlice([20]byte{75, 211, 153, 190, 125, 179, 67, 49, 60, 149, 98, 246, 142, 20, 11, 254, 159, 162, 129, 237}), + sha1ArrayAsSlice([20]byte{150, 2, 9, 153, 97, 153, 189, 104, 147, 14, 77, 203, 244, 243, 25, 212, 67, 48, 111, 107}), + }, + [][]byte{ + sha1ArrayAsSlice([20]byte{68, 254, 92, 166, 52, 37, 104, 180, 22, 123, 249, 144, 182, 78, 64, 74, 57, 117, 225, 195}), + sha1ArrayAsSlice([20]byte{144, 209, 246, 100, 177, 216, 171, 229, 83, 17, 92, 135, 68, 98, 76, 72, 217, 24, 99, 176}), + sha1ArrayAsSlice([20]byte{38, 211, 255, 254, 19, 114, 105, 77, 230, 31, 170, 83, 57, 85, 102, 29, 28, 72, 211, 27}), + }, + "documentation-example L0 (position-embedded)", + }, + { + [][]byte{ + sha1ArrayAsSlice([20]byte{173, 123, 132, 245, 176, 172, 43, 183, 121, 40, 66, 252, 101, 249, 188, 193, 160, 189, 2, 116}), + sha1ArrayAsSlice([20]byte{40, 34, 8, 238, 37, 5, 237, 184, 79, 105, 10, 167, 171, 254, 13, 229, 132, 112, 254, 8}), + sha1ArrayAsSlice([20]byte{39, 112, 26, 86, 190, 35, 100, 101, 28, 131, 122, 191, 254, 144, 239, 107, 253, 124, 104, 203}), + }, + [][]byte{ + sha1ArrayAsSlice([20]byte{173, 123, 132, 245, 176, 172, 43, 183, 121, 40, 66, 252, 101, 249, 188, 193, 160, 189, 2, 116}), + sha1ArrayAsSlice([20]byte{213, 157, 141, 227, 213, 178, 25, 111, 200, 145, 77, 164, 17, 247, 202, 167, 37, 46, 0, 124}), + sha1ArrayAsSlice([20]byte{253, 13, 168, 58, 147, 213, 125, 212, 229, 20, 200, 100, 16, 136, 186, 19, 34, 170, 105, 71}), + }, + "documentation-example L1 (position-embedded)", + }, +} + +var testTableLevel = []struct { + ins [][]byte + outs [][]byte + name string +}{ + { + [][]byte{ + mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"), + mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"), + mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"), + }, + [][]byte{ + mustDecode("44fe5ca6342568b4167bf990b64e404a3975e1c3"), + mustDecode("90d1f664b1d8abe553115c8744624c48d91863b0"), + mustDecode("26d3fffe1372694de61faa533955661d1c48d31b"), + }, + "documentation-example L0", + }, + { + [][]byte{ + mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"), + mustDecode("daedc425199501b1e86b5eaba5649cbde205e6ae"), + mustDecode("286ac5283f99c4e0f11683900a3e39661c375dd6"), + }, + [][]byte{ + mustDecode("ad7b84f5b0ac2bb7792842fc65f9bcc1a0bd0274"), + mustDecode("d59d8de3d5b2196fc8914da411f7caa7252e007c"), + mustDecode("fd0da83a93d57dd4e514c8641088ba1322aa6947"), + }, + "documentation-example L1", + }, + { + [][]byte{ + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("daedc425199501b1e86b5eaba5649cbde205e6ae"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("286ac5283f99c4e0f11683900a3e39661c375dd6"), + mustDecode("0000000000000000000000000000000000000000"), + }, + [][]byte{ + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("0000000000000000000000000000000000000000"), + mustDecode("a197464ec19f2b2b2bc6b21f6c939c7e57772843"), + mustDecode("a197464ec19f2b2b2bc6b21f6c939c7e57772843"), + mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"), + mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"), + mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"), + mustDecode("b04769357aa4eb4b52cd5bec6935bc8f977fa3a1"), + mustDecode("8f56351897b4e1d100646fa122c924347721b2f5"), + mustDecode("8f56351897b4e1d100646fa122c924347721b2f5"), + }, + "mixed-with-empties", + }, +} + +var testTable = []struct { + data []byte + // pattern describes how to use data to construct the hash-input. + // For every entry n at even indices this repeats the data n times. + // For every entry m at odd indices this repeats a null-byte m times. + // The input-data is constructed by concatinating the results in order. + pattern []int64 + out []byte + name string +}{ + { + []byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"), + []int64{64}, + mustDecode("09f077820a8a41f34a639f2172f1133b1eafe4e6"), + "documentation-example L0", + }, + { + []byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"), + []int64{64 * 256}, + mustDecode("75a9f88fb219ef1dd31adf41c93e2efaac8d0245"), + "documentation-example L1", + }, + { + []byte("#ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz\n"), + []int64{64 * 256, 0, 64 * 128, 4096 * 128, 64*2 + 32}, + mustDecode("fd0da83a93d57dd4e514c8641088ba1322aa6947"), + "documentation-example L2", + }, + { + []byte("hello rclone\n"), + []int64{316}, + mustDecode("72370f9c18a2c20b31d71f3f4cee7a3cd2703737"), + "not-block-aligned", + }, + { + []byte("hello rclone\n"), + []int64{13, 4096 * 3, 4}, + mustDecode("a6990b81791f0d2db750b38f046df321c975aa60"), + "not-block-aligned-with-null-bytes", + }, + { + []byte{}, + []int64{}, + mustDecode("0000000000000000000000000000000000000000"), + "empty", + }, + { + []byte{}, + []int64{0, 4096 * 256 * 256}, + mustDecode("0000000000000000000000000000000000000000"), + "null-bytes", + }, +} + +// ------------------------------------------------------------ + +func TestLevelAdd(t *testing.T) { + for _, test := range testTableLevelPositionEmbedded { + l := hidrivehash.NewLevel().(internal.LevelHash) + t.Run(test.name, func(t *testing.T) { + for i := range test.ins { + l.Add(test.ins[i]) + assert.Equal(t, test.outs[i], l.Sum(nil)) + } + }) + } +} + +func TestLevelWrite(t *testing.T) { + for _, test := range testTableLevel { + l := hidrivehash.NewLevel() + t.Run(test.name, func(t *testing.T) { + for i := range test.ins { + l.Write(test.ins[i]) + assert.Equal(t, test.outs[i], l.Sum(nil)) + } + }) + } +} + +func TestLevelIsFull(t *testing.T) { + content := [hidrivehash.Size]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19} + l := hidrivehash.NewLevel() + for i := 0; i < 256; i++ { + assert.False(t, l.(internal.LevelHash).IsFull()) + written, err := l.Write(content[:]) + assert.Equal(t, len(content), written) + if !assert.NoError(t, err) { + t.FailNow() + } + } + assert.True(t, l.(internal.LevelHash).IsFull()) + written, err := l.Write(content[:]) + assert.True(t, l.(internal.LevelHash).IsFull()) + assert.Equal(t, 0, written) + assert.ErrorIs(t, err, hidrivehash.ErrorHashFull) +} + +func TestLevelReset(t *testing.T) { + l := hidrivehash.NewLevel() + zeroHash := l.Sum(nil) + _, err := l.Write([]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}) + if assert.NoError(t, err) { + assert.NotEqual(t, zeroHash, l.Sum(nil)) + l.Reset() + assert.Equal(t, zeroHash, l.Sum(nil)) + } +} + +func TestLevelSize(t *testing.T) { + l := hidrivehash.NewLevel() + assert.Equal(t, 20, l.Size()) +} + +func TestLevelBlockSize(t *testing.T) { + l := hidrivehash.NewLevel() + assert.Equal(t, 20, l.BlockSize()) +} + +func TestLevelBinaryMarshaler(t *testing.T) { + content := []byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19} + l := hidrivehash.NewLevel().(internal.LevelHash) + l.Write(content[:10]) + encoded, err := l.MarshalBinary() + if assert.NoError(t, err) { + d := hidrivehash.NewLevel().(internal.LevelHash) + err = d.UnmarshalBinary(encoded) + if assert.NoError(t, err) { + assert.Equal(t, l.Sum(nil), d.Sum(nil)) + l.Write(content[10:]) + d.Write(content[10:]) + assert.Equal(t, l.Sum(nil), d.Sum(nil)) + } + } +} + +func TestLevelInvalidEncoding(t *testing.T) { + l := hidrivehash.NewLevel().(internal.LevelHash) + err := l.UnmarshalBinary([]byte{}) + assert.ErrorIs(t, err, hidrivehash.ErrorInvalidEncoding) +} + +// ------------------------------------------------------------ + +type infiniteReader struct { + source []byte + offset int +} + +func (m *infiniteReader) Read(b []byte) (int, error) { + count := copy(b, m.source[m.offset:]) + m.offset += count + m.offset %= len(m.source) + return count, nil +} + +func writeInChunks(writer io.Writer, chunkSize int64, data []byte, pattern []int64) error { + readers := make([]io.Reader, len(pattern)) + nullBytes := [4096]byte{} + for i, n := range pattern { + if i%2 == 0 { + readers[i] = io.LimitReader(&infiniteReader{data, 0}, n*int64(len(data))) + } else { + readers[i] = io.LimitReader(&infiniteReader{nullBytes[:], 0}, n) + } + } + reader := io.MultiReader(readers...) + for { + _, err := io.CopyN(writer, reader, chunkSize) + if err != nil { + if err == io.EOF { + err = nil + } + return err + } + } +} + +func TestWrite(t *testing.T) { + for _, test := range testTable { + t.Run(test.name, func(t *testing.T) { + h := hidrivehash.New() + err := writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern) + if assert.NoError(t, err) { + normalSum := h.Sum(nil) + assert.Equal(t, test.out, normalSum) + // Test if different block-sizes produce differing results. + for _, blockSize := range []int64{397, 512, 4091, 8192, 10000} { + t.Run(fmt.Sprintf("block-size %v", blockSize), func(t *testing.T) { + h := hidrivehash.New() + err := writeInChunks(h, blockSize, test.data, test.pattern) + if assert.NoError(t, err) { + assert.Equal(t, normalSum, h.Sum(nil)) + } + }) + } + } + }) + } +} + +func TestReset(t *testing.T) { + h := hidrivehash.New() + zeroHash := h.Sum(nil) + _, err := h.Write([]byte{1}) + if assert.NoError(t, err) { + assert.NotEqual(t, zeroHash, h.Sum(nil)) + h.Reset() + assert.Equal(t, zeroHash, h.Sum(nil)) + } +} + +func TestSize(t *testing.T) { + h := hidrivehash.New() + assert.Equal(t, 20, h.Size()) +} + +func TestBlockSize(t *testing.T) { + h := hidrivehash.New() + assert.Equal(t, 4096, h.BlockSize()) +} + +func TestBinaryMarshaler(t *testing.T) { + for _, test := range testTable { + h := hidrivehash.New() + d := hidrivehash.New() + half := len(test.pattern) / 2 + t.Run(test.name, func(t *testing.T) { + err := writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern[:half]) + assert.NoError(t, err) + encoded, err := h.(encoding.BinaryMarshaler).MarshalBinary() + if assert.NoError(t, err) { + err = d.(encoding.BinaryUnmarshaler).UnmarshalBinary(encoded) + if assert.NoError(t, err) { + assert.Equal(t, h.Sum(nil), d.Sum(nil)) + err = writeInChunks(h, int64(h.BlockSize()), test.data, test.pattern[half:]) + assert.NoError(t, err) + err = writeInChunks(d, int64(d.BlockSize()), test.data, test.pattern[half:]) + assert.NoError(t, err) + assert.Equal(t, h.Sum(nil), d.Sum(nil)) + } + } + }) + } +} + +func TestInvalidEncoding(t *testing.T) { + h := hidrivehash.New() + err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary([]byte{}) + assert.ErrorIs(t, err, hidrivehash.ErrorInvalidEncoding) +} + +func TestSum(t *testing.T) { + assert.Equal(t, [hidrivehash.Size]byte{}, hidrivehash.Sum([]byte{})) + content := []byte{1} + h := hidrivehash.New() + h.Write(content) + sum := hidrivehash.Sum(content) + assert.Equal(t, h.Sum(nil), sum[:]) +} diff --git a/backend/hidrive/hidrivehash/internal/internal.go b/backend/hidrive/hidrivehash/internal/internal.go new file mode 100644 index 000000000..f1596a9e6 --- /dev/null +++ b/backend/hidrive/hidrivehash/internal/internal.go @@ -0,0 +1,17 @@ +package internal + +import ( + "encoding" + "hash" +) + +// LevelHash is an internal interface for level-hashes. +type LevelHash interface { + encoding.BinaryMarshaler + encoding.BinaryUnmarshaler + hash.Hash + // Add takes a position-embedded checksum and adds it to the level. + Add(sum []byte) + // IsFull returns whether the number of checksums added to this level reached its capacity. + IsFull() bool +} diff --git a/bin/make_manual.py b/bin/make_manual.py index 335f4c814..fa14b36a3 100755 --- a/bin/make_manual.py +++ b/bin/make_manual.py @@ -47,6 +47,7 @@ docs = [ "googlephotos.md", "hasher.md", "hdfs.md", + "hidrive.md", "http.md", "hubic.md", "internetarchive.md", diff --git a/docs/content/_index.md b/docs/content/_index.md index b6e2ac4d1..9d0f86bf4 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -128,6 +128,7 @@ WebDAV or S3, that work out of the box.) {{< provider name="Google Photos" home="https://www.google.com/photos/about/" config="/googlephotos/" >}} {{< provider name="HDFS" home="https://hadoop.apache.org/" config="/hdfs/" >}} {{< provider name="Hetzner Storage Box" home="https://www.hetzner.com/storage/storage-box" config="/sftp/#hetzner-storage-box" >}} +{{< provider name="HiDrive" home="https://www.strato.de/cloud-speicher/" config="/hidrive/" >}} {{< provider name="HTTP" home="https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol" config="/http/" >}} {{< provider name="Hubic" home="https://hubic.com/" config="/hubic/" >}} {{< provider name="Internet Archive" home="https://archive.org/" config="/internetarchive/" >}} diff --git a/docs/content/docs.md b/docs/content/docs.md index a2bc38b07..a71728cc7 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -49,6 +49,7 @@ See the following for detailed instructions for * [Google Photos](/googlephotos/) * [Hasher](/hasher/) - to handle checksums for other remotes * [HDFS](/hdfs/) + * [HiDrive](/hidrive/) * [HTTP](/http/) * [Hubic](/hubic/) * [Internet Archive](/internetarchive/) diff --git a/docs/content/hidrive.md b/docs/content/hidrive.md new file mode 100644 index 000000000..2d667a9e6 --- /dev/null +++ b/docs/content/hidrive.md @@ -0,0 +1,461 @@ +--- +title: "HiDrive" +description: "Rclone docs for HiDrive" +--- + +# {{< icon "fa fa-cloud" >}} HiDrive + +Paths are specified as `remote:path` + +Paths may be as deep as required, e.g. `remote:directory/subdirectory`. + +The initial setup for hidrive involves getting a token from HiDrive +which you need to do in your browser. +`rclone config` walks you through it. + +## Configuration + +Here is an example of how to make a remote called `remote`. First run: + + rclone config + +This will guide you through an interactive setup process: + +``` +No remotes found - make a new one +n) New remote +s) Set configuration password +q) Quit config +n/s/q> n +name> remote +Type of storage to configure. +Choose a number from below, or type in your own value +[snip] +XX / HiDrive + \ "hidrive" +[snip] +Storage> hidrive +OAuth Client Id - Leave blank normally. +client_id> +OAuth Client Secret - Leave blank normally. +client_secret> +Access permissions that rclone should use when requesting access from HiDrive. +Leave blank normally. +scope_access> +Edit advanced config? +y/n> n +Use auto config? +y/n> y +If your browser doesn't open automatically go to the following link: http://127.0.0.1:53682/auth?state=xxxxxxxxxxxxxxxxxxxxxx +Log in and authorize rclone for access +Waiting for code... +Got code +-------------------- +[remote] +type = hidrive +token = {"access_token":"xxxxxxxxxxxxxxxxxxxx","token_type":"Bearer","refresh_token":"xxxxxxxxxxxxxxxxxxxxxxx","expiry":"xxxxxxxxxxxxxxxxxxxxxxx"} +-------------------- +y) Yes this is OK (default) +e) Edit this remote +d) Delete this remote +y/e/d> y +``` + +**You should be aware that OAuth-tokens can be used to access your account +and hence should not be shared with other persons.** +See the [below section](#keeping-your-tokens-safe) for more information. + +See the [remote setup docs](/remote_setup/) for how to set it up on a +machine with no Internet browser available. + +Note that rclone runs a webserver on your local machine to collect the +token as returned from HiDrive. This only runs from the moment it opens +your browser to the moment you get back the verification code. +The webserver runs on `http://127.0.0.1:53682/`. +If local port `53682` is protected by a firewall you may need to temporarily +unblock the firewall to complete authorization. + +Once configured you can then use `rclone` like this, + +List directories in top level of your HiDrive root folder + + rclone lsd remote: + +List all the files in your HiDrive filesystem + + rclone ls remote: + +To copy a local directory to a HiDrive directory called backup + + rclone copy /home/source remote:backup + +### Keeping your tokens safe + +Any OAuth-tokens will be stored by rclone in the remote's configuration file as unencrypted text. +Anyone can use a valid refresh-token to access your HiDrive filesystem without knowing your password. +Therefore you should make sure no one else can access your configuration. + +It is possible to encrypt rclone's configuration file. +You can find information on securing your configuration file by viewing the [configuration encryption docs](/docs/#configuration-encryption). + +### Invalid refresh token + +As can be verified [here](https://developer.hidrive.com/basics-flows/), +each `refresh_token` (for Native Applications) is valid for 60 days. +If used to access HiDrivei, its validity will be automatically extended. + +This means that if you + + * Don't use the HiDrive remote for 60 days + +then rclone will return an error which includes a text +that implies the refresh token is *invalid* or *expired*. + +To fix this you will need to authorize rclone to access your HiDrive account again. + +Using + + rclone config reconnect remote: + +the process is very similar to the process of initial setup exemplified before. + +### Modified time and hashes + +HiDrive allows modification times to be set on objects accurate to 1 second. + +HiDrive supports [its own hash type](https://static.hidrive.com/dev/0001) +which is used to verify the integrety of file contents after successful transfers. + +### Restricted filename characters + +HiDrive cannot store files or folders that include +`/` (0x2F) or null-bytes (0x00) in their name. +Any other characters can be used in the names of files or folders. +Additionally, files or folders cannot be named either of the following: `.` or `..` + +Therefore rclone will automatically replace these characters, +if files or folders are stored or accessed with such names. + +You can read about how this filename encoding works in general +[here](overview/#restricted-filenames). + +Keep in mind that HiDrive only supports file or folder names +with a length of 255 characters or less. + +### Transfers + +HiDrive limits file sizes per single request to a maximum of 2 GiB. +To allow storage of larger files and allow for better upload performance, +the hidrive backend will use a chunked transfer for files larger than 96 MiB. +Rclone will upload multiple parts/chunks of the file at the same time. +Chunks in the process of being uploaded are buffered in memory, +so you may want to restrict this behaviour on systems with limited resources. + +You can customize this behaviour using the following options: + +* `chunk_size`: size of file parts +* `upload_cutoff`: files larger or equal to this in size will use a chunked transfer +* `upload_concurrency`: number of file-parts to upload at the same time + +See the below section about configuration options for more details. + +### Root folder + +You can set the root folder for rclone. +This is the directory that rclone considers to be the root of your HiDrive. + +Usually, you will leave this blank, and rclone will use the root of the account. + +However, you can set this to restrict rclone to a specific folder hierarchy. + +This works by prepending the contents of the `root_prefix` option +to any paths accessed by rclone. +For example, the following two ways to access the home directory are equivalent: + + rclone lsd --hidrive-root-prefix="/users/test/" remote:path + + rclone lsd remote:/users/test/path + +See the below section about configuration options for more details. + +### Directory member count + +By default, rclone will know the number of directory members contained in a directory. +For example, `rclone lsd` uses this information. + +The acquisition of this information will result in additional time costs for HiDrive's API. +When dealing with large directory structures, it may be desirable to circumvent this time cost, +especially when this information is not explicitly needed. +For this, the `disable_fetching_member_count` option can be used. + +See the below section about configuration options for more details. + +{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/hidrive/hidrive.go then run make backenddocs" >}} +### Standard options + +Here are the standard options specific to hidrive (HiDrive). + +#### --hidrive-client-id + +OAuth Client Id. + +Leave blank normally. + +Properties: + +- Config: client_id +- Env Var: RCLONE_HIDRIVE_CLIENT_ID +- Type: string +- Required: false + +#### --hidrive-client-secret + +OAuth Client Secret. + +Leave blank normally. + +Properties: + +- Config: client_secret +- Env Var: RCLONE_HIDRIVE_CLIENT_SECRET +- Type: string +- Required: false + +#### --hidrive-scope-access + +Access permissions that rclone should use when requesting access from HiDrive. + +Properties: + +- Config: scope_access +- Env Var: RCLONE_HIDRIVE_SCOPE_ACCESS +- Type: string +- Default: "rw" +- Examples: + - "rw" + - Read and write access to resources. + - "ro" + - Read-only access to resources. + +### Advanced options + +Here are the advanced options specific to hidrive (HiDrive). + +#### --hidrive-token + +OAuth Access Token as a JSON blob. + +Properties: + +- Config: token +- Env Var: RCLONE_HIDRIVE_TOKEN +- Type: string +- Required: false + +#### --hidrive-auth-url + +Auth server URL. + +Leave blank to use the provider defaults. + +Properties: + +- Config: auth_url +- Env Var: RCLONE_HIDRIVE_AUTH_URL +- Type: string +- Required: false + +#### --hidrive-token-url + +Token server url. + +Leave blank to use the provider defaults. + +Properties: + +- Config: token_url +- Env Var: RCLONE_HIDRIVE_TOKEN_URL +- Type: string +- Required: false + +#### --hidrive-scope-role + +User-level that rclone should use when requesting access from HiDrive. + +Properties: + +- Config: scope_role +- Env Var: RCLONE_HIDRIVE_SCOPE_ROLE +- Type: string +- Default: "user" +- Examples: + - "user" + - User-level access to management permissions. + - This will be sufficient in most cases. + - "admin" + - Extensive access to management permissions. + - "owner" + - Full access to management permissions. + +#### --hidrive-root-prefix + +The root/parent folder for all paths. + +Fill in to use the specified folder as the parent for all paths given to the remote. +This way rclone can use any folder as its starting point. + +Properties: + +- Config: root_prefix +- Env Var: RCLONE_HIDRIVE_ROOT_PREFIX +- Type: string +- Default: "/" +- Examples: + - "/" + - The topmost directory accessible by rclone. + - This will be equivalent with "root" if rclone uses a regular HiDrive user account. + - "root" + - The topmost directory of the HiDrive user account + - "" + - This specifies that there is no root-prefix for your paths. + - When using this you will always need to specify paths to this remote with a valid parent e.g. "remote:/path/to/dir" or "remote:root/path/to/dir". + +#### --hidrive-endpoint + +Endpoint for the service. + +This is the URL that API-calls will be made to. + +Properties: + +- Config: endpoint +- Env Var: RCLONE_HIDRIVE_ENDPOINT +- Type: string +- Default: "https://api.hidrive.strato.com/2.1" + +#### --hidrive-disable-fetching-member-count + +Do not fetch number of objects in directories unless it is absolutely necessary. + +Requests may be faster if the number of objects in subdirectories is not fetched. + +Properties: + +- Config: disable_fetching_member_count +- Env Var: RCLONE_HIDRIVE_DISABLE_FETCHING_MEMBER_COUNT +- Type: bool +- Default: false + +#### --hidrive-disable-unicode-normalization + +Do not apply Unicode "Normalization Form C" to remote paths. + +In Unicode there are multiple valid representations for the same abstract character. +They (should) result in the same visual appearance, but are represented by different byte-sequences. +This is known as canonical equivalence. + +In HiDrive paths are always represented as byte-sequences. +This means that two paths that are canonically equivalent (and therefore look the same) are treated as two distinct paths. +As this behaviour may be undesired, by default rclone will apply unicode normalization to paths it will access. + +Properties: + +- Config: disable_unicode_normalization +- Env Var: RCLONE_HIDRIVE_DISABLE_UNICODE_NORMALIZATION +- Type: bool +- Default: false + +#### --hidrive-chunk-size + +Chunksize for chunked uploads. + +Any files larger than the configured cutoff (or files of unknown size) will be uploaded in chunks of this size. + +The upper limit for this is 2147483647 bytes (about 2.000Gi). +That is the maximum amount of bytes a single upload-operation will support. +Setting this above the upper limit or to a negative value will cause uploads to fail. + +Setting this to larger values may increase the upload speed at the cost of using more memory. +It can be set to smaller values smaller to save on memory. + +Properties: + +- Config: chunk_size +- Env Var: RCLONE_HIDRIVE_CHUNK_SIZE +- Type: SizeSuffix +- Default: 48Mi + +#### --hidrive-upload-cutoff + +Cutoff/Threshold for chunked uploads. + +Any files larger than this will be uploaded in chunks of the configured chunksize. + +The upper limit for this is 2147483647 bytes (about 2.000Gi). +That is the maximum amount of bytes a single upload-operation will support. +Setting this above the upper limit will cause uploads to fail. + +Properties: + +- Config: upload_cutoff +- Env Var: RCLONE_HIDRIVE_UPLOAD_CUTOFF +- Type: SizeSuffix +- Default: 96Mi + +#### --hidrive-upload-concurrency + +Concurrency for chunked uploads. + +This is the upper limit for how many transfers for the same file are running concurrently. +Setting this above to a value smaller than 1 will cause uploads to deadlock. + +If you are uploading small numbers of large files over high-speed links +and these uploads do not fully utilize your bandwidth, then increasing +this may help to speed up the transfers. + +Properties: + +- Config: upload_concurrency +- Env Var: RCLONE_HIDRIVE_UPLOAD_CONCURRENCY +- Type: int +- Default: 4 + +#### --hidrive-encoding + +The encoding for the backend. + +See the [encoding section in the overview](/overview/#encoding) for more info. + +Properties: + +- Config: encoding +- Env Var: RCLONE_HIDRIVE_ENCODING +- Type: MultiEncoder +- Default: Slash,Dot + +{{< rem autogenerated options stop >}} + +## Limitations + +### Symbolic links + +HiDrive is able to store symbolic links (*symlinks*) by design, +for example, when unpacked from a zip archive. + +There exists no direct mechanism to manage native symlinks in remotes. +As such this implementation has chosen to ignore any native symlinks present in the remote. +rclone will not be able to access or show any symlinks stored in the hidrive-remote. +This means symlinks cannot be individually removed, copied, or moved, +except when removing, copying, or moving the parent folder. + +*This does not affect the `.rclonelink`-files +that rclone uses to encode and store symbolic links.* + +### Sparse files + +It is possible to store sparse files in HiDrive. + +Note that copying a sparse file will expand the holes +into null-byte (0x00) regions that will then consume disk space. +Likewise, when downloading a sparse file, +the resulting file will have null-byte regions in the place of file holes. diff --git a/docs/content/overview.md b/docs/content/overview.md index 74194f28f..3f298c1ba 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -30,6 +30,7 @@ Here is an overview of the major features of each cloud storage system. | Google Drive | MD5 | R/W | No | Yes | R/W | - | | Google Photos | - | - | No | Yes | R | - | | HDFS | - | R/W | No | No | - | - | +| HiDrive | HiDrive ¹² | R/W | No | No | - | - | | HTTP | - | R | No | No | R | - | | Hubic | MD5 | R/W | No | No | R/W | - | | Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | - | @@ -93,6 +94,11 @@ for more details. ¹¹ Internet Archive requires option `wait_archive` to be set to a non-zero value for full modtime support. +¹² HiDrive supports [its own custom +hash](https://static.hidrive.com/dev/0001). +It combines SHA1 sums for each 4 KiB block hierarchically to a single +top-level sum. + ### Hash ### The cloud storage system supports various hash types of the objects. @@ -475,6 +481,7 @@ upon backend-specific capabilities. | Google Drive | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | Google Photos | No | No | No | No | No | No | No | No | No | No | | HDFS | Yes | No | Yes | Yes | No | No | Yes | No | Yes | Yes | +| HiDrive | Yes | Yes | Yes | Yes | No | No | Yes | No | No | Yes | | HTTP | No | No | No | No | No | No | No | No | No | Yes | | Hubic | Yes † | Yes | No | No | No | Yes | Yes | No | Yes | No | | Internet Archive | No | Yes | No | No | Yes | Yes | No | Yes | Yes | No | diff --git a/docs/layouts/chrome/navbar.html b/docs/layouts/chrome/navbar.html index 32c05d3e0..f732756e3 100644 --- a/docs/layouts/chrome/navbar.html +++ b/docs/layouts/chrome/navbar.html @@ -72,6 +72,7 @@ Google Photos Hasher (better checksums for others) HDFS (Hadoop Distributed Filesystem) + HiDrive HTTP Hubic Internet Archive diff --git a/fstest/test_all/config.yaml b/fstest/test_all/config.yaml index 68f17c2dd..23cdee6db 100644 --- a/fstest/test_all/config.yaml +++ b/fstest/test_all/config.yaml @@ -133,6 +133,9 @@ backends: remote: "TestGooglePhotos:" tests: - backend + - backend: "hidrive" + remote: "TestHiDrive:" + fastlist: false - backend: "hubic" remote: "TestHubic:" fastlist: false