diff --git a/backend/http/http.go b/backend/http/http.go index 15ea2744c..e2a993a44 100644 --- a/backend/http/http.go +++ b/backend/http/http.go @@ -73,8 +73,9 @@ directories.`, Advanced: true, }, { Name: "no_head", - Help: `Don't use HEAD requests to find file sizes in dir listing. + Help: `Don't use HEAD requests. +HEAD requests are mainly used to find file sizes in dir listing. If your site is being very slow to load then you can try this option. Normally rclone does a HEAD request for each potential file in a directory listing to: @@ -134,6 +135,82 @@ func statusError(res *http.Response, err error) error { return nil } +// getFsEndpoint decides if url is to be considered a file or directory, +// and returns a proper endpoint url to use for the fs. +func getFsEndpoint(ctx context.Context, client *http.Client, url string, opt *Options) (string, bool) { + // If url ends with '/' it is already a proper url always assumed to be a directory. + if url[len(url)-1] == '/' { + return url, false + } + + // If url does not end with '/' we send a HEAD request to decide + // if it is directory or file, and if directory appends the missing + // '/', or if file returns the directory url to parent instead. + createFileResult := func() (string, bool) { + fs.Debugf(nil, "If path is a directory you must add a trailing '/'") + parent, _ := path.Split(url) + return parent, true + } + createDirResult := func() (string, bool) { + fs.Debugf(nil, "To avoid the initial HEAD request add a trailing '/' to the path") + return url + "/", false + } + + // If HEAD requests are not allowed we just have to assume it is a file. + if opt.NoHead { + fs.Debugf(nil, "Assuming path is a file as --http-no-head is set") + return createFileResult() + } + + // Use a client which doesn't follow redirects so the server + // doesn't redirect http://host/dir to http://host/dir/ + noRedir := *client + noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + } + req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + if err != nil { + fs.Debugf(nil, "Assuming path is a file as HEAD request could not be created: %v", err) + return createFileResult() + } + addHeaders(req, opt) + res, err := noRedir.Do(req) + + if err != nil { + fs.Debugf(nil, "Assuming path is a file as HEAD request could not be sent: %v", err) + return createFileResult() + } + if res.StatusCode == http.StatusNotFound { + fs.Debugf(nil, "Assuming path is a directory as HEAD response is it does not exist as a file (%s)", res.Status) + return createDirResult() + } + if res.StatusCode == http.StatusMovedPermanently || + res.StatusCode == http.StatusFound || + res.StatusCode == http.StatusSeeOther || + res.StatusCode == http.StatusTemporaryRedirect || + res.StatusCode == http.StatusPermanentRedirect { + redir := res.Header.Get("Location") + if redir != "" { + if redir[len(redir)-1] == '/' { + fs.Debugf(nil, "Assuming path is a directory as HEAD response is redirect (%s) to a path that ends with '/': %s", res.Status, redir) + return createDirResult() + } + fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) to a path that does not end with '/': %s", res.Status, redir) + return createFileResult() + } + fs.Debugf(nil, "Assuming path is a file as HEAD response is redirect (%s) but no location header", res.Status) + return createFileResult() + } + if res.StatusCode < 200 || res.StatusCode > 299 { + // Example is 403 (http.StatusForbidden) for servers not allowing HEAD requests. + fs.Debugf(nil, "Assuming path is a file as HEAD response is an error (%s)", res.Status) + return createFileResult() + } + + fs.Debugf(nil, "Assuming path is a file as HEAD response is success (%s)", res.Status) + return createFileResult() +} + // NewFs creates a new Fs object from the name and root. It connects to // the host specified in the config file. func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { @@ -164,37 +241,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e client := fshttp.NewClient(ctx) - var isFile = false - if !strings.HasSuffix(u.String(), "/") { - // Make a client which doesn't follow redirects so the server - // doesn't redirect http://host/dir to http://host/dir/ - noRedir := *client - noRedir.CheckRedirect = func(req *http.Request, via []*http.Request) error { - return http.ErrUseLastResponse - } - // check to see if points to a file - req, err := http.NewRequestWithContext(ctx, "HEAD", u.String(), nil) - if err == nil { - addHeaders(req, opt) - res, err := noRedir.Do(req) - err = statusError(res, err) - if err == nil { - isFile = true - } - } - } - - newRoot := u.String() - if isFile { - // Point to the parent if this is a file - newRoot, _ = path.Split(u.String()) - } else { - if !strings.HasSuffix(newRoot, "/") { - newRoot += "/" - } - } - - u, err = url.Parse(newRoot) + endpoint, isFile := getFsEndpoint(ctx, client, u.String(), opt) + fs.Debugf(nil, "Root: %s", endpoint) + u, err = url.Parse(endpoint) if err != nil { return nil, err } @@ -212,12 +261,16 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e f.features = (&fs.Features{ CanHaveEmptyDirectories: true, }).Fill(ctx, f) + if isFile { + // return an error with an fs which points to the parent return f, fs.ErrorIsFile } + if !strings.HasSuffix(f.endpointURL, "/") { return nil, errors.New("internal error: url doesn't end with /") } + return f, nil } diff --git a/backend/http/http_internal_test.go b/backend/http/http_internal_test.go index bd49503c5..17c58737c 100644 --- a/backend/http/http_internal_test.go +++ b/backend/http/http_internal_test.go @@ -8,8 +8,10 @@ import ( "net/http/httptest" "net/url" "os" + "path" "path/filepath" "sort" + "strconv" "strings" "testing" "time" @@ -374,3 +376,106 @@ func TestParseCaddy(t *testing.T) { "v1.36-22-g06ea13a-ssh-agentĪ²/", }) } + +func TestFsNoSlashRoots(t *testing.T) { + // Test Fs with roots that does not end with '/', the logic that + // decides if url is to be considered a file or directory, based + // on result from a HEAD request. + + // Handler for faking HEAD responses with different status codes + headCount := 0 + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "HEAD" { + headCount++ + responseCode, err := strconv.Atoi(path.Base(r.URL.String())) + require.NoError(t, err) + if strings.HasPrefix(r.URL.String(), "/redirect/") { + var redir string + if strings.HasPrefix(r.URL.String(), "/redirect/file/") { + redir = "/redirected" + } else if strings.HasPrefix(r.URL.String(), "/redirect/dir/") { + redir = "/redirected/" + } else { + require.Fail(t, "Redirect test requests must start with '/redirect/file/' or '/redirect/dir/'") + } + http.Redirect(w, r, redir, responseCode) + } else { + http.Error(w, http.StatusText(responseCode), responseCode) + } + } + }) + + // Make the test server + ts := httptest.NewServer(handler) + defer ts.Close() + + // Configure the remote + configfile.Install() + m := configmap.Simple{ + "type": "http", + "url": ts.URL, + } + + // Test + for i, test := range []struct { + root string + isFile bool + }{ + // 2xx success + {"parent/200", true}, + {"parent/204", true}, + + // 3xx redirection Redirect status 301, 302, 303, 307, 308 + {"redirect/file/301", true}, // Request is redirected to "/redirected" + {"redirect/dir/301", false}, // Request is redirected to "/redirected/" + {"redirect/file/302", true}, // Request is redirected to "/redirected" + {"redirect/dir/302", false}, // Request is redirected to "/redirected/" + {"redirect/file/303", true}, // Request is redirected to "/redirected" + {"redirect/dir/303", false}, // Request is redirected to "/redirected/" + + {"redirect/file/304", true}, // Not really a redirect, handled like 4xx errors (below) + {"redirect/file/305", true}, // Not really a redirect, handled like 4xx errors (below) + {"redirect/file/306", true}, // Not really a redirect, handled like 4xx errors (below) + + {"redirect/file/307", true}, // Request is redirected to "/redirected" + {"redirect/dir/307", false}, // Request is redirected to "/redirected/" + {"redirect/file/308", true}, // Request is redirected to "/redirected" + {"redirect/dir/308", false}, // Request is redirected to "/redirected/" + + // 4xx client errors + {"parent/403", true}, // Forbidden status (head request blocked) + {"parent/404", false}, // Not found status + } { + for _, noHead := range []bool{false, true} { + var isFile bool + if noHead { + m.Set("no_head", "true") + isFile = true + } else { + m.Set("no_head", "false") + isFile = test.isFile + } + headCount = 0 + f, err := NewFs(context.Background(), remoteName, test.root, m) + if noHead { + assert.Equal(t, 0, headCount) + } else { + assert.Equal(t, 1, headCount) + } + if isFile { + assert.ErrorIs(t, err, fs.ErrorIsFile) + } else { + assert.NoError(t, err) + } + var endpoint string + if isFile { + parent, _ := path.Split(test.root) + endpoint = "/" + parent + } else { + endpoint = "/" + test.root + "/" + } + what := fmt.Sprintf("i=%d, root=%q, isFile=%v, noHead=%v", i, test.root, isFile, noHead) + assert.Equal(t, ts.URL+endpoint, f.String(), what) + } + } +} diff --git a/docs/content/http.md b/docs/content/http.md index 9bf3e0bec..d4c3e55b8 100644 --- a/docs/content/http.md +++ b/docs/content/http.md @@ -12,7 +12,26 @@ webservers such as Apache/Nginx/Caddy and will likely work with file listings from most web servers. (If it doesn't then please file an issue, or send a pull request!) -Paths are specified as `remote:` or `remote:path/to/dir`. +Paths are specified as `remote:` or `remote:path`. + +The `remote:` represents the configured [url](#http-url), and any path following +it will be resolved relative to this url, according to the URL standard. This +means with remote url `https://beta.rclone.org/branch` and path `fix`, the +resolved URL will be `https://beta.rclone.org/branch/fix`, while with path +`/fix` the resolved URL will be `https://beta.rclone.org/fix` as the absolute +path is resolved from the root of the domain. + +If the path following the `remote:` ends with `/` it will be assumed to point +to a directory. If the path does not end with `/`, then a HEAD request is sent +and the response used to decide if it it is treated as a file or a directory +(run with `-vv` to see details). When [--http-no-head](#http-no-head) is +specified, a path without ending `/` is always assumed to be a file. If rclone +incorrectly assumes the path is a file, the solution is to specify the path with +ending `/`. When you know the path is a directory, ending it with `/` is always +better as it avoids the initial HEAD request. + +To just download a single file it is easier to use +[copyurl](/commands/rclone_copyurl/). ## Configuration @@ -81,25 +100,29 @@ Sync the remote `directory` to `/home/local/directory`, deleting any excess file rclone sync -i remote:directory /home/local/directory -### Read only ### +### Read only This remote is read only - you can't upload files to an HTTP server. -### Modified time ### +### Modified time Most HTTP servers store time accurate to 1 second. -### Checksum ### +### Checksum No checksums are stored. -### Usage without a config file ### +### Usage without a config file Since the http remote only has one config parameter it is easy to use without a config file: rclone lsd --http-url https://beta.rclone.org :http: +or: + + rclone lsd :http,url='https://beta.rclone.org': + {{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/http/http.go then run make backenddocs" >}} ### Standard options