Introduce path_normalize

This new function performs normalization of paths including dropping /./ segments, and resolving /../ segments, in preparation for switching fish to a "virtual" PWD.
2025-02-01 07:18:30 +08:00 · 2018-09-16 17:55:15 -07:00 · 2018-09-16 17:55:15 -07:00 · 6ad4d94e12
commit 6ad4d94e12
parent 767b7a2180
3 changed files with 57 additions and 0 deletions
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@ -4664,6 +4664,22 @@ void test_layout_cache() {
    do_test(seqs.find_prompt_layout(L"whatever")->line_count == 100);
 }

+void test_normalize_path() {
+    say(L"Testing path normalization");
+    do_test(normalize_path(L"") == L".");
+    do_test(normalize_path(L"..") == L"..");
+    do_test(normalize_path(L"./") == L".");
+    do_test(normalize_path(L"////abc") == L"//abc");
+    do_test(normalize_path(L"/abc") == L"/abc");
+    do_test(normalize_path(L"/abc/") == L"/abc");
+    do_test(normalize_path(L"/abc/..def/") == L"/abc/..def");
+    do_test(normalize_path(L"//abc/../def/") == L"//def");
+    do_test(normalize_path(L"abc/../abc/../abc/../abc") == L"abc");
+    do_test(normalize_path(L"../../") == L"../..");
+    do_test(normalize_path(L"foo/./bar") == L"foo/bar");
+    do_test(normalize_path(L"foo/././bar/.././baz") == L"foo/baz");
+}
+
 /// Main test.
 int main(int argc, char **argv) {
    UNUSED(argc);
@ -4762,6 +4778,7 @@ int main(int argc, char **argv) {
    if (should_test_function("illegal_command_exit_code")) test_illegal_command_exit_code();
    if (should_test_function("maybe")) test_maybe();
    if (should_test_function("layout_cache")) test_layout_cache();
+    if (should_test_function("normalize")) test_normalize_path();
    // history_tests_t::test_history_speed();

    say(L"Encountered %d errors in low-level tests", err_count);
--- a/src/wutil.cpp
+++ b/src/wutil.cpp
@ -433,6 +433,40 @@ maybe_t<wcstring> wrealpath(const wcstring &pathname) {
    return str2wcstring(real_path);
 }

+wcstring normalize_path(const wcstring &path) {
+    // Count the leading slashes.
+    // Preserve up to 2.
+    const wchar_t sep = L'/';
+    size_t leading_slashes = 0;
+    for (wchar_t c : path) {
+        if (c != sep) break;
+        leading_slashes++;
+    }
+
+    wcstring_list_t comps = split_string(path, sep);
+    wcstring_list_t new_comps;
+    for (wcstring &comp : comps) {
+        if (comp.empty() || comp == L".") {
+            continue;
+        } else if (comp == L"..") {
+            if (new_comps.empty() || new_comps.back() == L"..") {
+                // We underflowed the ..s, retain this component.
+                new_comps.push_back(L"..");
+            } else {
+                new_comps.pop_back();
+            }
+        } else {
+            new_comps.push_back(std::move(comp));
+        }
+    }
+
+    // Prepend up to two leading slashes (as empty components).
+    new_comps.insert(new_comps.begin(), leading_slashes > 2 ? 2 : leading_slashes, wcstring());
+    // Ensure e.g. './' normalizes to '.' and not empty.
+    if (new_comps.empty()) new_comps.push_back(L".");
+    return join_strings(new_comps, sep);
+}
+
 wcstring wdirname(const wcstring &path) {
    char *tmp = wcs2str(path);
    char *narrow_res = dirname(tmp);
--- a/src/wutil.h
+++ b/src/wutil.h
@ -68,6 +68,12 @@ int wchdir(const wcstring &dir);
 /// \returns the canonicalized path, or none if the path is invalid.
 maybe_t<wcstring> wrealpath(const wcstring &pathname);

+/// Given an input path, "normalize" it:
+/// 1. Collapse multiple /s into a single /, except maybe at the beginning.
+/// 2. .. goes up a level.
+/// 3. Remove /./ in the middle.
+wcstring normalize_path(const wcstring &path);
+
 /// Wide character version of readdir().
 bool wreaddir(DIR *dir, wcstring &out_name);
 bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, wcstring &out_name,