box: fix performance problem reading metadata for single files

Before this change the backend used to list the directory to find the
metadata for a single file. For lots of files in a directory this
caused a serious performance problem.

This change uses the preflight check to check for a files existence
and find its ID.

See: https://forum.rclone.org/t/psa-box-com-has-serious-performance-issues-in-directories-with-thousands-of-files/41128/10
See: https://forum.box.com/t/is-there-an-api-to-find-a-file-by-leaf-name-given-a-folder-id/997/
See: https://developer.box.com/guides/uploads/check/
This commit is contained in:
Nick Craig-Wood 2023-09-09 12:40:06 +01:00
parent 4ac5cb07ca
commit bb88b8499b
2 changed files with 31 additions and 33 deletions

View File

@ -167,19 +167,7 @@ type PreUploadCheckResponse struct {
// PreUploadCheckConflict is returned in the ContextInfo error field // PreUploadCheckConflict is returned in the ContextInfo error field
// from PreUploadCheck when the error code is "item_name_in_use" // from PreUploadCheck when the error code is "item_name_in_use"
type PreUploadCheckConflict struct { type PreUploadCheckConflict struct {
Conflicts struct { Conflicts ItemMini `json:"conflicts"`
Type string `json:"type"`
ID string `json:"id"`
FileVersion struct {
Type string `json:"type"`
ID string `json:"id"`
Sha1 string `json:"sha1"`
} `json:"file_version"`
SequenceID string `json:"sequence_id"`
Etag string `json:"etag"`
Sha1 string `json:"sha1"`
Name string `json:"name"`
} `json:"conflicts"`
} }
// UpdateFileModTime is used in Update File Info // UpdateFileModTime is used in Update File Info

View File

@ -380,7 +380,7 @@ func shouldRetry(ctx context.Context, resp *http.Response, err error) (bool, err
// readMetaDataForPath reads the metadata from the path // readMetaDataForPath reads the metadata from the path
func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.Item, err error) { func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.Item, err error) {
// defer fs.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err) // defer log.Trace(f, "path=%q", path)("info=%+v, err=%v", &info, &err)
leaf, directoryID, err := f.dirCache.FindPath(ctx, path, false) leaf, directoryID, err := f.dirCache.FindPath(ctx, path, false)
if err != nil { if err != nil {
if err == fs.ErrorDirNotFound { if err == fs.ErrorDirNotFound {
@ -389,20 +389,30 @@ func (f *Fs) readMetaDataForPath(ctx context.Context, path string) (info *api.It
return nil, err return nil, err
} }
found, err := f.listAll(ctx, directoryID, false, true, true, func(item *api.Item) bool { // Use preupload to find the ID
if strings.EqualFold(item.Name, leaf) { itemMini, err := f.preUploadCheck(ctx, leaf, directoryID, -1)
info = item if err != nil {
return true return nil, err
} }
return false if itemMini == nil {
return nil, fs.ErrorObjectNotFound
}
// Now we have the ID we can look up the object proper
opts := rest.Opts{
Method: "GET",
Path: "/files/" + itemMini.ID,
Parameters: fieldsValue(),
}
var item api.Item
err = f.pacer.Call(func() (bool, error) {
resp, err := f.srv.CallJSON(ctx, &opts, nil, &item)
return shouldRetry(ctx, resp, err)
}) })
if err != nil { if err != nil {
return nil, err return nil, err
} }
if !found { return &item, nil
return nil, fs.ErrorObjectNotFound
}
return info, nil
} }
// errorHandler parses a non 2xx error response into an error // errorHandler parses a non 2xx error response into an error
@ -762,7 +772,7 @@ func (f *Fs) createObject(ctx context.Context, remote string, modTime time.Time,
// //
// It returns "", nil if the file is good to go // It returns "", nil if the file is good to go
// It returns "ID", nil if the file must be updated // It returns "ID", nil if the file must be updated
func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (ID string, err error) { func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size int64) (item *api.ItemMini, err error) {
check := api.PreUploadCheck{ check := api.PreUploadCheck{
Name: f.opt.Enc.FromStandardName(leaf), Name: f.opt.Enc.FromStandardName(leaf),
Parent: api.Parent{ Parent: api.Parent{
@ -787,16 +797,16 @@ func (f *Fs) preUploadCheck(ctx context.Context, leaf, directoryID string, size
var conflict api.PreUploadCheckConflict var conflict api.PreUploadCheckConflict
err = json.Unmarshal(apiErr.ContextInfo, &conflict) err = json.Unmarshal(apiErr.ContextInfo, &conflict)
if err != nil { if err != nil {
return "", fmt.Errorf("pre-upload check: JSON decode failed: %w", err) return nil, fmt.Errorf("pre-upload check: JSON decode failed: %w", err)
} }
if conflict.Conflicts.Type != api.ItemTypeFile { if conflict.Conflicts.Type != api.ItemTypeFile {
return "", fmt.Errorf("pre-upload check: can't overwrite non file with file: %w", err) return nil, fs.ErrorIsDir
} }
return conflict.Conflicts.ID, nil return &conflict.Conflicts, nil
} }
return "", fmt.Errorf("pre-upload check: %w", err) return nil, fmt.Errorf("pre-upload check: %w", err)
} }
return "", nil return nil, nil
} }
// Put the object // Put the object
@ -817,11 +827,11 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
// Preflight check the upload, which returns the ID if the // Preflight check the upload, which returns the ID if the
// object already exists // object already exists
ID, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size()) item, err := f.preUploadCheck(ctx, leaf, directoryID, src.Size())
if err != nil { if err != nil {
return nil, err return nil, err
} }
if ID == "" { if item == nil {
return f.PutUnchecked(ctx, in, src, options...) return f.PutUnchecked(ctx, in, src, options...)
} }
@ -829,7 +839,7 @@ func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options .
o := &Object{ o := &Object{
fs: f, fs: f,
remote: remote, remote: remote,
id: ID, id: item.ID,
} }
return o, o.Update(ctx, in, src, options...) return o, o.Update(ctx, in, src, options...)
} }