From 70548f7cc75ea9dcf325d8f2a87126a845e9dc46 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Sat, 1 Aug 2015 16:02:46 -0700 Subject: [PATCH] Initial implementation of wildcard match refactoring Not yet turned on. --- src/expand.cpp | 2 +- src/fish.cpp | 2 - src/fish_tests.cpp | 1 - src/wildcard.cpp | 327 ++++++++++++++++++++++++++++++++++++++++++++- src/wutil.cpp | 8 -- src/wutil.h | 13 +- 6 files changed, 324 insertions(+), 29 deletions(-) diff --git a/src/expand.cpp b/src/expand.cpp index eeeb7bac6..aca6d97a3 100644 --- a/src/expand.cpp +++ b/src/expand.cpp @@ -1793,7 +1793,7 @@ int expand_string(const wcstring &input, std::vector *output, expa int wc_res; remove_internal_separator(next, (EXPAND_SKIP_WILDCARDS & flags) ? true : false); - const bool has_wildcard = wildcard_has(next, 1); + const bool has_wildcard = wildcard_has(next, true /* internal, i.e. ANY_CHAR */); if (has_wildcard && (flags & EXECUTABLES_ONLY)) { diff --git a/src/fish.cpp b/src/fish.cpp index a50e08869..2d44b01f8 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -523,7 +523,6 @@ int main(int argc, char **argv) proc_init(); event_init(); - wutil_init(); builtin_init(); function_init(); env_init(&paths); @@ -636,7 +635,6 @@ int main(int argc, char **argv) proc_destroy(); builtin_destroy(); reader_destroy(); - wutil_destroy(); event_destroy(); if (g_log_forks) diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index b245dc607..999282906 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -4009,7 +4009,6 @@ int main(int argc, char **argv) reader_destroy(); builtin_destroy(); - wutil_destroy(); event_destroy(); proc_destroy(); diff --git a/src/wildcard.cpp b/src/wildcard.cpp index a10978291..96fba5a76 100644 --- a/src/wildcard.cpp +++ b/src/wildcard.cpp @@ -145,7 +145,10 @@ bool wildcard_has(const wcstring &str, bool internal) static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool leading_dots_fail_to_match, bool is_first) { if (*str == 0 && *wc==0) + { + /* We're done */ return true; + } /* Hackish fix for https://github.com/fish-shell/fish-shell/issues/270 . Prevent wildcards from matching . or .., but we must still allow literal matches. */ if (leading_dots_fail_to_match && is_first && contains(str, L".", L"..")) @@ -161,6 +164,12 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool { return false; } + + /* Common case of * at the end. In that case we can early out since we know it will match. */ + if (wc[1] == L'\0') + { + return true; + } /* Try all submatches */ do @@ -179,8 +188,7 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool */ return false; } - - if (*wc == ANY_CHAR) + else if (*wc == ANY_CHAR) { if (is_first && *str == L'.') { @@ -189,9 +197,10 @@ static bool wildcard_match_internal(const wchar_t *str, const wchar_t *wc, bool return wildcard_match_internal(str+1, wc+1, leading_dots_fail_to_match, false); } - - if (*wc == *str) + else if (*wc == *str) + { return wildcard_match_internal(str+1, wc+1, leading_dots_fail_to_match, false); + } return false; } @@ -603,7 +612,7 @@ static void wildcard_completion_allocate(std::vector *list, expansion flags specified. flags can be a combination of EXECUTABLES_ONLY and DIRECTORIES_ONLY. */ -static bool test_flags(const wchar_t *filename, expand_flags_t flags) +static bool test_flags(const wcstring &filename, expand_flags_t flags) { if (flags & DIRECTORIES_ONLY) { @@ -646,6 +655,301 @@ static void insert_completion_if_missing(const wcstring &str, std::vector completion_set; + + /* the set of file IDs we have visited, used to avoid symlink loops */ + std::set visited_files; + + /* flags controlling expansion */ + const expand_flags_t flags; + + /* resolved items get inserted into here. This is transient of course. */ + std::vector *resolved; + + /* whether we have been interrupted */ + bool did_interrupt; + + /* whether we have successfully added any completions */ + bool did_add; + + /* We are a trailing slash - expand at the end */ + void expand_trailing_slash(const wcstring &base_dir); + + /* Given a directory base_dir, which is opened as base_dir_fp, expand an intermediate segment of the wildcard. + Treat ANY_STRING_RECURSIVE as ANY_STRING. + wc_segment is the wildcard segment for this directory + wc_remainder is the wildcard for subdirectories + */ + void expand_intermediate_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc_segment, const wchar_t *wc_remainder); + + /* Given a directory base_dir, which is opened as base_dir_fp, expand the last segment of the wildcard. + Treat ANY_STRING_RECURSIVE as ANY_STRING. + wc is the wildcard segment to use for matching + wc_remainder is the wildcard for subdirectories + */ + void expand_last_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc); + + /* Given a directory base_dir, which is openend as base_dir_fp, call expand() recursively + on matching subdirectories. + head_wc is the portion before the recursive match + wc_remainder is the portion after it, and starts with ANY_STRING_RECURSIVE + */ + void recurse_to_subdirectories(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &head_wc, const wchar_t *wc_remainder); + + /* Helper to resolve an empty base directory */ + static DIR *open_dir(const wcstring &base_dir) + { + return wopendir(base_dir.empty() ? L"." : base_dir); + } + +public: + + wildcard_expander_t(expand_flags_t f, std::vector *r) : flags(f), resolved(r), did_interrupt(false), did_add(false) + { + assert(resolved != NULL); + + /* Insert initial completions into our set to avoid duplicates */ + for (std::vector::const_iterator iter = resolved->begin(); iter != resolved->end(); ++iter) + { + this->completion_set.insert(iter->completion); + } + } + + /* Do wildcard expansion. This is recursive. */ + void expand(const wcstring &base_dir, const wchar_t *wc); + + + /* Indicate whether we should cancel wildcard expansion. This latches 'interrupt' */ + bool interrupted() + { + if (! did_interrupt) + { + did_interrupt = (is_main_thread() ? reader_interrupted() : reader_thread_job_is_stale()); + } + return did_interrupt; + } + + /* Indicates whether something was added */ + bool added() const + { + return this->did_add; + } +}; + +void wildcard_expander_t::expand_trailing_slash(const wcstring &base_dir) +{ + if (interrupted()) + { + return; + } + + if (! (flags & ACCEPT_INCOMPLETE)) + { + /* Trailing slash and not accepting incomplete, e.g. `echo /tmp/`. Insert this file if it exists. */ + if (waccess(base_dir, F_OK)) + { + append_completion(this->resolved, base_dir); + this->did_add = true; + } + } + else + { + /* Trailing slashes and accepting incomplete, e.g. `echo /tmp/`. Everything is added. */ + DIR *dir = open_dir(base_dir); + if (dir) + { + wcstring next; + while (wreaddir(dir, next) && ! interrupted()) + { + if (! next.empty() && next.at(0) != L'.') + { + const wcstring abs_path = base_dir + next; + if (test_flags(abs_path, this->flags)) + { + wildcard_completion_allocate(this->resolved, abs_path, next, L"", flags); + this->did_add = true; + } + } + } + closedir(dir); + } + } +} + +void wildcard_expander_t::expand_intermediate_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc_segment, const wchar_t *wc_remainder) +{ + wcstring name_str; + while (!interrupted() && wreaddir(base_dir_fp, name_str)) + { + /* Note that it's critical we ignore leading dots here, else we may descend into . and .. */ + if (! wildcard_match(name_str, wc_segment, true)) + { + /* Doesn't match the wildcard for this segment, skip it */ + continue; + } + + wcstring full_path = base_dir + name_str; + struct stat buf; + if (0 != wstat(full_path, &buf) || !S_ISDIR(buf.st_mode)) + { + /* We either can't stat it, or we did but it's not a directory */ + continue; + } + + const file_id_t file_id = file_id_t::file_id_from_stat(&buf); + if (!this->visited_files.insert(file_id).second) + { + /* Symlink loop! This directory was already visited, so skip it */ + continue; + } + + /* We made it through. Perform normal wildcard expansion on this new directory, starting at our tail_wc, which includes the ANY_STRING_RECURSIVE guy. */ + full_path.push_back(L'/'); + this->expand(full_path, wc_remainder); + } +} + +void wildcard_expander_t::expand_last_segment(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &wc) +{ + wcstring name_str; + while (wreaddir(base_dir_fp, name_str)) + { + if (flags & ACCEPT_INCOMPLETE) + { + /* Test for matches before stating file, so as to minimize the number of calls to the much slower stat function. The only expand flag we care about is EXPAND_FUZZY_MATCH; we have no complete flags. */ + std::vector local_matches; + if (wildcard_complete(name_str, wc.c_str(), L"", NULL, &local_matches, flags & EXPAND_FUZZY_MATCH, 0)) + { + const wcstring abs_path = base_dir + name_str; + if (test_flags(abs_path, flags)) + { + wildcard_completion_allocate(this->resolved, abs_path, name_str, wc.c_str(), flags); + this->did_add = true; + } + } + } + else + { + if (wildcard_match(name_str, wc, true /* skip files with leading dots */)) + { + const wcstring abs_path = base_dir + name_str; + if (this->completion_set.insert(abs_path).second) + { + append_completion(this->resolved, abs_path); + this->did_add = true; + } + } + } + } +} + +void wildcard_expander_t::recurse_to_subdirectories(const wcstring &base_dir, DIR *base_dir_fp, const wcstring &head_wc, const wchar_t *wc_remainder) +{ + assert(! base_dir.empty()); + assert(wc_remainder[0] == ANY_STRING_RECURSIVE); + // note head_wc may be empty + + /* Construct a "head + any" wildcard for matching stuff in this directory. Then just match this segment with that, then future segments with the remainder of the wildcard. */ + wcstring head_any = head_wc; + head_any.push_back(ANY_STRING); + this->expand_intermediate_segment(base_dir, base_dir_fp, head_any, wc_remainder); +} + +/** + The real implementation of wildcard expansion is in this + function. Other functions are just wrappers around this one. + + This function traverses the relevant directory tree looking for + matches, and recurses when needed to handle wildcrards spanning + multiple components and recursive wildcards. + + Because this function calls itself recursively with substrings, + it's important that the parameters be raw pointers instead of wcstring, + which would be too expensive to construct for all substrings. + + Args: + base_dir: the "working directory" against which the wildcard is to be resolved + wc: the wildcard string itself, e.g. foo*bar/baz (where * is acutally ANY_CHAR) +*/ +void wildcard_expander_t::expand(const wcstring &base_dir, const wchar_t *wc) +{ + assert(wc != NULL); + + if (interrupted()) + { + return; + } + + /* Get the current segment and compute interesting properties about it. */ + const size_t wc_len = wcslen(wc); + const wchar_t * const next_slash = wcschr(wc, L'/'); + const bool is_last_segment = (next_slash == NULL); + const size_t wc_segment_len = next_slash ? next_slash - wc : wc_len; + const wcstring wc_segment = wcstring(wc, wc_segment_len); + const bool segment_has_wildcards = wildcard_has(wc_segment, true /* internal, i.e. look for ANY_CHAR instead of ? */); + + if (wc_segment.empty()) + { + /* Handle empty segment */ + assert(! segment_has_wildcards); + if (is_last_segment) + { + this->expand_trailing_slash(base_dir); + } + else + { + /* Multiple adjacent slashes in the wildcard. Just skip them. */ + this->expand(base_dir, next_slash + 1); + } + } + else if (! segment_has_wildcards && ! is_last_segment) + { + /* Literal intermediate match. Note that we may not be able to actually read the directory (#2099) */ + assert(next_slash != NULL); + /* This just trumps everything */ + this->expand(base_dir + wc_segment + L'/', next_slash + 1); + } + else + { + assert(! wc_segment.empty() && (segment_has_wildcards || is_last_segment)); + DIR *dir = open_dir(base_dir); + if (dir) + { + if (is_last_segment) + { + /* Last wildcard segment, nonempty wildcard */ + this->expand_last_segment(base_dir, dir, wc_segment); + } + else + { + /* Not the last segment, nonempty wildcard */ + assert(next_slash != NULL); + const wchar_t *wc_remainder = next_slash; + while (*wc_remainder == L'/') + { + wc_remainder++; + } + this->expand_intermediate_segment(base_dir, dir, wc_segment, wc_remainder); + } + + /* Recursive wildcards require special handling */ + size_t asr_idx = wc_segment.find(ANY_STRING_RECURSIVE); + if (asr_idx != wcstring::npos) + { + const wcstring head(wc_segment, 0, asr_idx); + const wchar_t *tail = wc + asr_idx; // starts at the ASR wildcard + assert(*tail == ANY_STRING_RECURSIVE); + rewinddir(dir); + this->recurse_to_subdirectories(base_dir, dir, head, tail); + } + closedir(dir); + } + } +} + /** The real implementation of wildcard expansion is in this function. Other functions are just wrappers around this one. @@ -657,6 +961,17 @@ static void insert_completion_if_missing(const wcstring &str, std::vector *output) { assert(output != NULL); - /* Hackish fix for 1631. We are about to call c_str(), which will produce a string truncated at any embedded nulls. We could fix this by passing around the size, etc. However embedded nulls are never allowed in a filename, so we just check for them and return 0 (no matches) if there is an embedded null. This isn't quite right, e.g. it will fail for \0?, but that is an edge case. */ + /* Hackish fix for 1631. We are about to call c_str(), which will produce a string truncated at any embedded nulls. We could fix this by passing around the size, etc. However embedded nulls are never allowed in a filename, so we just check for them and return 0 (no matches) if there is an embedded null. */ if (wc.find(L'\0') != wcstring::npos) { return 0; diff --git a/src/wutil.cpp b/src/wutil.cpp index f429b07aa..b5689eecc 100644 --- a/src/wutil.cpp +++ b/src/wutil.cpp @@ -53,14 +53,6 @@ static pthread_mutex_t wgettext_lock; typedef std::map wgettext_map_t; static wgettext_map_t wgettext_map; -void wutil_init() -{ -} - -void wutil_destroy() -{ -} - bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring &out_name, bool *out_is_dir) { struct dirent *d = readdir(dir); diff --git a/src/wutil.h b/src/wutil.h index 011f34779..351846c5a 100644 --- a/src/wutil.h +++ b/src/wutil.h @@ -15,17 +15,6 @@ #include #include "common.h" -/** - Call this function on startup to create internal wutil - resources. This function doesn't do anything. -*/ -void wutil_init(); - -/** - Call this function on exit to free internal wutil resources -*/ -void wutil_destroy(); - /** Wide character version of fopen(). This sets CLO_EXEC. */ @@ -105,6 +94,8 @@ wchar_t *wrealpath(const wcstring &pathname, wchar_t *resolved_path); bool wreaddir(DIR *dir, std::wstring &out_name); bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring &out_name, bool *out_is_dir); +bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, std::wstring *out_name, bool *out_is_dir); + /** Wide character version of dirname() */