From bb88b8499b8f529b5beaf6baf939bfaa7f46cd21 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 9 Sep 2023 12:40:06 +0100 Subject: [PATCH] box: fix performance problem reading metadata for single files Before this change the backend used to list the directory to find the metadata for a single file. For lots of files in a directory this caused a serious performance problem. This change uses the preflight check to check for a files existence and find its ID. See: https://forum.rclone.org/t/psa-box-com-has-serious-performance-issues-in-directories-with-thousands-of-files/41128/10 See: https://forum.box.com/t/is-there-an-api-to-find-a-file-by-leaf-name-given-a-folder-id/997/ See: https://developer.box.com/guides/uploads/check/ --- backend/box/api/types.go | 14 +---------- backend/box/box.go | 50 ++++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/backend/box/api/types.go b/backend/box/api/types.go index b96349903..275dea950 100644 --- a/backend/box/api/types.go +++ b/backend/box/api/types.go @@ -167,19 +167,7 @@ type PreUploadCheckResponse struct { // PreUploadCheckConflict is returned in the ContextInfo error field // from PreUploadCheck when the error code is "item_name_in_use" type PreUploadCheckConflict struct { - Conflicts struct { - Type string `json:"type"` - ID string `json:"id"` - FileVersion struct { - Type string `json:"type"` - ID string `json:"id"` - Sha1 string `json:"sha1"` - } `json:"file_version"` - SequenceID string `json:"sequence_id"` - Etag string `json:"etag"` - Sha1 string `json:"sha1"` - Name string `json:"name"` - } `json:"conflicts"` + Conflicts ItemMini `json:"conflicts"` } // UpdateFileModTime is used in Update File Info diff --git a/backend/box/box.go b/backend/box/box.go index ae2d168b2..26e2cb356 100644 --- a/backend/box/box.go +++ b/backend/box/box.go @@ -380,7 +380,7 @@ func shouldRetry(ctx context.Context, resp *http.Response, err error) (bool, err // readMetaDataForPath reads the metadata from the path func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.Item, err error) { - // defer fs.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err) + // defer log.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err) leaf, directoryID, err := f.dirCache.FindPath(ctx, path, false) if err != nil { if err == fs.ErrorDirNotFound { @@ -389,20 +389,30 @@ func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.It return nil, err } - found, err := f.listAll(ctx, directoryID, false, true, true, func(item *api.Item) bool { - if strings.EqualFold(item.Name, leaf) { - info = item - return true - } - return false + // Use preupload to find the ID + itemMini, err := f.preUploadCheck(ctx, leaf, directoryID, -1) + if err != nil { + return nil, err + } + if itemMini == nil { + return nil, fs.ErrorObjectNotFound + } + + // Now we have the ID we can look up the object proper + opts := rest.Opts{ + Method: "GET", + Path: "/files/" + itemMini.ID, + Parameters: fieldsValue(), + } + var item api.Item + err = f.pacer.Call(func() (bool, error) { + resp, err := f.srv.CallJSON(ctx, &opts, nil, &item) + return shouldRetry(ctx, resp, err) }) if err != nil { return nil, err } - if !found { - return nil, fs.ErrorObjectNotFound - } - return info, nil + return &item, nil } // errorHandler parses a non 2xx error response into an error @@ -762,7 +772,7 @@ func (f *Fs) createObject(ctx context.Context, remote string, modTime time.Time, // // It returns "", nil if the file is good to go // It returns "ID", nil if the file must be updated -func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (ID string, err error) { +func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (item *api.ItemMini, err error) { check := api.PreUploadCheck{ Name: f.opt.Enc.FromStandardName(leaf), Parent: api.Parent{ @@ -787,16 +797,16 @@ func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size var conflict api.PreUploadCheckConflict err = json.Unmarshal(apiErr.ContextInfo, &conflict) if err != nil { - return "", fmt.Errorf("pre-upload check: JSON decode failed: %w", err) + return nil, fmt.Errorf("pre-upload check: JSON decode failed: %w", err) } if conflict.Conflicts.Type != api.ItemTypeFile { - return "", fmt.Errorf("pre-upload check: can't overwrite non file with file: %w", err) + return nil, fs.ErrorIsDir } - return conflict.Conflicts.ID, nil + return &conflict.Conflicts, nil } - return "", fmt.Errorf("pre-upload check: %w", err) + return nil, fmt.Errorf("pre-upload check: %w", err) } - return "", nil + return nil, nil } // Put the object @@ -817,11 +827,11 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options . // Preflight check the upload, which returns the ID if the // object already exists - ID, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size()) + item, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size()) if err != nil { return nil, err } - if ID == "" { + if item == nil { return f.PutUnchecked(ctx, in, src, options...) } @@ -829,7 +839,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options . o := &Object{ fs: f, remote: remote, - id: ID, + id: item.ID, } return o, o.Update(ctx, in, src, options...) }