mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-21 05:06:27 +08:00
Port the easy part of wildcard.{h,cpp}
- wildcard_match is now closer to the original that is linked in a comment, as pointer-arithmetic translates very poorly. The act of calling wildcard patterns wc or wildcard is kinda confusing when wc elsewhere is widechar.
This commit is contained in:
parent
e1f5751ba0
commit
80d92dcc6d
@ -87,6 +87,7 @@ fn main() {
|
||||
"fish-rust/src/trace.rs",
|
||||
"fish-rust/src/util.rs",
|
||||
"fish-rust/src/wait_handle.rs",
|
||||
"fish-rust/src/wildcard.rs",
|
||||
];
|
||||
cxx_build::bridges(&source_files)
|
||||
.flag_if_supported("-std=c++11")
|
||||
|
@ -6,8 +6,7 @@ use super::*;
|
||||
use crate::env::{EnvMode, EnvVar, EnvVarFlags};
|
||||
use crate::flog::FLOG;
|
||||
use crate::parse_util::parse_util_unescape_wildcards;
|
||||
use crate::wchar_ffi::WCharToFFI;
|
||||
use crate::wildcard::ANY_STRING;
|
||||
use crate::wildcard::{wildcard_match, ANY_STRING};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Match<'args> {
|
||||
@ -380,13 +379,11 @@ impl<'opts, 'args> WildCardMatcher<'opts, 'args> {
|
||||
fn report_matches(&mut self, arg: &wstr, streams: &mut io_streams_t) {
|
||||
// Note: --all is a no-op for glob matching since the pattern is always matched
|
||||
// against the entire argument.
|
||||
use crate::ffi::wildcard_match;
|
||||
|
||||
let subject = match self.opts.ignore_case {
|
||||
true => arg.to_lowercase(),
|
||||
false => arg.to_owned(),
|
||||
};
|
||||
let m = wildcard_match(&subject.to_ffi(), &self.pattern.to_ffi(), false);
|
||||
let m = wildcard_match(subject, &self.pattern, false);
|
||||
|
||||
if m ^ self.opts.invert_match {
|
||||
self.total_matched += 1;
|
||||
|
@ -45,7 +45,6 @@ include_cpp! {
|
||||
#include "reader.h"
|
||||
#include "screen.h"
|
||||
#include "tokenizer.h"
|
||||
#include "wildcard.h"
|
||||
#include "wutil.h"
|
||||
|
||||
// We need to block these types so when exposing C++ to Rust.
|
||||
@ -92,7 +91,6 @@ include_cpp! {
|
||||
|
||||
generate!("log_extra_to_flog_file")
|
||||
|
||||
generate!("wildcard_match")
|
||||
generate!("wgettext_ptr")
|
||||
|
||||
generate!("block_t")
|
||||
|
@ -1,7 +1,6 @@
|
||||
use crate::ffi::wildcard_match;
|
||||
use crate::parse_util::parse_util_unescape_wildcards;
|
||||
use crate::wchar::prelude::*;
|
||||
use crate::wchar_ffi::WCharToFFI;
|
||||
use crate::wildcard::wildcard_match;
|
||||
use libc::c_int;
|
||||
use std::io::Write;
|
||||
use std::os::unix::prelude::*;
|
||||
@ -212,7 +211,7 @@ fn apply_one_wildcard(wc_esc: &wstr, sense: bool) {
|
||||
let wc = parse_util_unescape_wildcards(wc_esc);
|
||||
let mut match_found = false;
|
||||
for cat in categories::all_categories() {
|
||||
if wildcard_match(&cat.name.to_ffi(), &wc.to_ffi(), false) {
|
||||
if wildcard_match(cat.name, &wc, false) {
|
||||
cat.enabled.store(sense, Ordering::Relaxed);
|
||||
match_found = true;
|
||||
}
|
||||
|
@ -1,6 +1,14 @@
|
||||
// Enumeration of all wildcard types.
|
||||
|
||||
use crate::common::{char_offset, WILDCARD_RESERVED_BASE};
|
||||
use cxx::CxxWString;
|
||||
|
||||
use crate::common::{
|
||||
char_offset, unescape_string, UnescapeFlags, UnescapeStringStyle, WILDCARD_RESERVED_BASE,
|
||||
};
|
||||
use crate::future_feature_flags::feature_test;
|
||||
use crate::future_feature_flags::FeatureFlag;
|
||||
use crate::wchar::prelude::*;
|
||||
use crate::wchar_ffi::WCharFromFFI;
|
||||
|
||||
/// Character representing any character except '/' (slash).
|
||||
pub const ANY_CHAR: char = char_offset(WILDCARD_RESERVED_BASE, 0);
|
||||
@ -11,3 +19,210 @@ pub const ANY_STRING_RECURSIVE: char = char_offset(WILDCARD_RESERVED_BASE, 2);
|
||||
/// This is a special pseudo-char that is not used other than to mark the
|
||||
/// end of the the special characters so we can sanity check the enum range.
|
||||
pub const ANY_SENTINEL: char = char_offset(WILDCARD_RESERVED_BASE, 3);
|
||||
|
||||
/// Expand the wildcard by matching against the filesystem.
|
||||
///
|
||||
/// wildcard_expand works by dividing the wildcard into segments at each directory boundary. Each
|
||||
/// segment is processed separately. All except the last segment are handled by matching the
|
||||
/// wildcard segment against all subdirectories of matching directories, and recursively calling
|
||||
/// wildcard_expand for matches. On the last segment, matching is made to any file, and all matches
|
||||
/// are inserted to the list.
|
||||
///
|
||||
/// If wildcard_expand encounters any errors (such as insufficient privileges) during matching, no
|
||||
/// error messages will be printed and wildcard_expand will continue the matching process.
|
||||
///
|
||||
/// \param wc The wildcard string
|
||||
/// \param working_directory The working directory
|
||||
/// \param flags flags for the search. Can be any combination of for_completions and
|
||||
/// executables_only
|
||||
/// \param output The list in which to put the output
|
||||
///
|
||||
enum WildcardResult {
|
||||
/// The wildcard did not match.
|
||||
NoMatch,
|
||||
/// The wildcard did match.
|
||||
Match,
|
||||
/// Expansion was cancelled (e.g. control-C).
|
||||
Cancel,
|
||||
/// Expansion produced too many results.
|
||||
Overflow,
|
||||
}
|
||||
|
||||
// pub fn wildcard_expand_string(wc: &wstr, working_directory: &wstr, flags: ExpandFlags, cancel_checker: impl CancelChecker, output: *mut completion_receiver_t) -> WildcardResult {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
/// Test whether the given wildcard matches the string. Does not perform any I/O.
|
||||
///
|
||||
/// \param str The string to test
|
||||
/// \param wc The wildcard to test against
|
||||
/// \param leading_dots_fail_to_match if set, strings with leading dots are assumed to be hidden
|
||||
/// files and are not matched (default was false)
|
||||
///
|
||||
/// \return true if the wildcard matched
|
||||
#[must_use]
|
||||
pub fn wildcard_match(
|
||||
name: impl AsRef<wstr>,
|
||||
pattern: impl AsRef<wstr>,
|
||||
leading_dots_fail_to_match: bool,
|
||||
) -> bool {
|
||||
let name = name.as_ref();
|
||||
let pattern = pattern.as_ref();
|
||||
// Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow
|
||||
// literal matches.
|
||||
if leading_dots_fail_to_match && (name == L!(".") || name == L!("..")) {
|
||||
// The string is '.' or '..' so the only possible match is an exact match.
|
||||
return name == pattern;
|
||||
}
|
||||
|
||||
// Near Linear implementation as proposed here https://research.swtch.com/glob.
|
||||
let mut px = 0;
|
||||
let mut nx = 0;
|
||||
let mut next_px = 0;
|
||||
let mut next_nx = 0;
|
||||
|
||||
while px < pattern.len() || nx < name.len() {
|
||||
if px < pattern.len() {
|
||||
match pattern.char_at(px) {
|
||||
ANY_STRING | ANY_STRING_RECURSIVE => {
|
||||
// Ignore hidden file
|
||||
if leading_dots_fail_to_match && nx == 0 && name.char_at(0) == '.' {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Common case of * at the end. In that case we can early out since we know it will
|
||||
// match.
|
||||
if px == pattern.len() - 1 {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try to match at nx.
|
||||
// If that doesn't work out, restart at nx+1 next.
|
||||
next_px = px;
|
||||
next_nx = nx + 1;
|
||||
px += 1;
|
||||
continue;
|
||||
}
|
||||
ANY_CHAR => {
|
||||
if nx < name.len() {
|
||||
if nx == 0 && name.char_at(nx) == '.' {
|
||||
return false;
|
||||
}
|
||||
|
||||
px += 1;
|
||||
nx += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
c => {
|
||||
// ordinary char
|
||||
if nx < name.len() && name.char_at(nx) == c {
|
||||
px += 1;
|
||||
nx += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mismatch. Maybe restart.
|
||||
if 0 < next_nx && next_nx <= name.len() {
|
||||
px = next_px;
|
||||
nx = next_nx;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Matched all of pattern to all of name. Success.
|
||||
true
|
||||
}
|
||||
|
||||
// Check if the string has any unescaped wildcards (e.g. ANY_STRING).
|
||||
#[inline]
|
||||
#[must_use]
|
||||
fn wildcard_has_internal(s: impl AsRef<wstr>) -> bool {
|
||||
s.as_ref()
|
||||
.chars()
|
||||
.any(|c| matches!(c, ANY_STRING | ANY_STRING_RECURSIVE | ANY_CHAR))
|
||||
}
|
||||
|
||||
/// Check if the specified string contains wildcards (e.g. *).
|
||||
#[must_use]
|
||||
fn wildcard_has(s: impl AsRef<wstr>) -> bool {
|
||||
let s = s.as_ref();
|
||||
let qmark_is_wild = !feature_test(FeatureFlag::qmark_noglob);
|
||||
// Fast check for * or ?; if none there is no wildcard.
|
||||
// Note some strings contain * but no wildcards, e.g. if they are quoted.
|
||||
if !s.contains('*') && (!qmark_is_wild || !s.contains('?')) {
|
||||
return false;
|
||||
}
|
||||
let unescaped =
|
||||
unescape_string(s, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap_or_default();
|
||||
return wildcard_has_internal(unescaped);
|
||||
}
|
||||
|
||||
/// Test wildcard completion.
|
||||
// pub fn wildcard_complete(str: &wstr, wc: &wstr, desc_func: impl Fn(&wstr) -> WString, out: *mut completion_receiver_t, expand_flags: ExpandFlags, flags: CompleteFlags) -> bool {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::future_feature_flags::scoped_test;
|
||||
|
||||
#[test]
|
||||
fn test_wildcards() {
|
||||
assert!(!wildcard_has(L!("")));
|
||||
assert!(wildcard_has(L!("*")));
|
||||
assert!(!wildcard_has(L!("\\*")));
|
||||
|
||||
let wc = L!("foo*bar");
|
||||
assert!(wildcard_has(wc) && !wildcard_has_internal(wc));
|
||||
let wc = unescape_string(wc, UnescapeStringStyle::Script(UnescapeFlags::SPECIAL)).unwrap();
|
||||
assert!(!wildcard_has(&wc) && wildcard_has_internal(&wc));
|
||||
|
||||
scoped_test(FeatureFlag::qmark_noglob, false, || {
|
||||
assert!(wildcard_has(L!("?")));
|
||||
assert!(!wildcard_has(L!("\\?")));
|
||||
});
|
||||
|
||||
scoped_test(FeatureFlag::qmark_noglob, true, || {
|
||||
assert!(!wildcard_has(L!("?")));
|
||||
assert!(!wildcard_has(L!("\\?")));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cxx::bridge]
|
||||
mod ffi {
|
||||
extern "C++" {
|
||||
include!("wutil.h");
|
||||
}
|
||||
extern "Rust" {
|
||||
#[cxx_name = "wildcard_match_ffi"]
|
||||
fn wildcard_match_ffi(
|
||||
str: &CxxWString,
|
||||
wc: &CxxWString,
|
||||
leading_dots_fail_to_match: bool,
|
||||
) -> bool;
|
||||
|
||||
#[cxx_name = "wildcard_has"]
|
||||
fn wildcard_has_ffi(s: &CxxWString) -> bool;
|
||||
|
||||
#[cxx_name = "wildcard_has_internal"]
|
||||
fn wildcard_has_internal_ffi(s: &CxxWString) -> bool;
|
||||
}
|
||||
}
|
||||
|
||||
fn wildcard_match_ffi(str: &CxxWString, wc: &CxxWString, leading_dots_fail_to_match: bool) -> bool {
|
||||
wildcard_match(str.from_ffi(), wc.from_ffi(), leading_dots_fail_to_match)
|
||||
}
|
||||
|
||||
fn wildcard_has_ffi(s: &CxxWString) -> bool {
|
||||
wildcard_has(s.from_ffi())
|
||||
}
|
||||
|
||||
fn wildcard_has_internal_ffi(s: &CxxWString) -> bool {
|
||||
wildcard_has_internal(s.from_ffi())
|
||||
}
|
||||
|
@ -2444,28 +2444,6 @@ static void test_autoload() {
|
||||
autoload_tester_t::run_test();
|
||||
}
|
||||
|
||||
static void test_wildcards() {
|
||||
say(L"Testing wildcards");
|
||||
do_test(!wildcard_has(L""));
|
||||
do_test(wildcard_has(L"*"));
|
||||
do_test(!wildcard_has(L"\\*"));
|
||||
do_test(!wildcard_has(L"\"*\""));
|
||||
|
||||
wcstring wc = L"foo*bar";
|
||||
do_test(wildcard_has(wc) && !wildcard_has_internal(wc));
|
||||
unescape_string_in_place(&wc, UNESCAPE_SPECIAL);
|
||||
do_test(!wildcard_has(wc) && wildcard_has_internal(wc));
|
||||
|
||||
auto saved = feature_test(feature_flag_t::qmark_noglob);
|
||||
feature_set(feature_flag_t::qmark_noglob, false);
|
||||
do_test(wildcard_has(L"?"));
|
||||
do_test(!wildcard_has(L"\\?"));
|
||||
feature_set(feature_flag_t::qmark_noglob, true);
|
||||
do_test(!wildcard_has(L"?"));
|
||||
do_test(!wildcard_has(L"\\?"));
|
||||
feature_set(feature_flag_t::qmark_noglob, saved);
|
||||
}
|
||||
|
||||
static void test_complete() {
|
||||
say(L"Testing complete");
|
||||
|
||||
@ -5595,7 +5573,6 @@ static const test_t s_tests[]{
|
||||
{TEST_GROUP("word_motion"), test_word_motion},
|
||||
{TEST_GROUP("is_potential_path"), test_is_potential_path},
|
||||
{TEST_GROUP("colors"), test_colors},
|
||||
{TEST_GROUP("wildcard"), test_wildcards},
|
||||
{TEST_GROUP("complete"), test_complete},
|
||||
{TEST_GROUP("autoload"), test_autoload},
|
||||
{TEST_GROUP("input"), test_input},
|
||||
|
@ -39,101 +39,6 @@ static size_t wildcard_find(const wchar_t *wc) {
|
||||
return wcstring::npos;
|
||||
}
|
||||
|
||||
bool wildcard_has_internal(const wchar_t *s, size_t len) {
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
wchar_t c = s[i];
|
||||
if (c == ANY_CHAR || c == ANY_STRING || c == ANY_STRING_RECURSIVE) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Note we want to handle embedded nulls (issue #1631).
|
||||
bool wildcard_has(const wchar_t *str, size_t len) {
|
||||
assert(str != nullptr);
|
||||
const wchar_t *end = str + len;
|
||||
bool qmark_is_wild = !feature_test(feature_flag_t::qmark_noglob);
|
||||
// Fast check for * or ?; if none there is no wildcard.
|
||||
// Note some strings contain * but no wildcards, e.g. if they are quoted.
|
||||
if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) {
|
||||
return false;
|
||||
}
|
||||
wcstring unescaped;
|
||||
if (auto tmp = unescape_string(wcstring{str, len}, UNESCAPE_SPECIAL)) {
|
||||
unescaped = *tmp;
|
||||
}
|
||||
return wildcard_has_internal(unescaped);
|
||||
}
|
||||
|
||||
/// Check whether the string str matches the wildcard string wc.
|
||||
///
|
||||
/// \param str String to be matched.
|
||||
/// \param wc The wildcard.
|
||||
/// \param leading_dots_fail_to_match Whether files beginning with dots should not be matched
|
||||
/// against wildcards.
|
||||
bool wildcard_match(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match) {
|
||||
// Hackish fix for issue #270. Prevent wildcards from matching . or .., but we must still allow
|
||||
// literal matches.
|
||||
if (leading_dots_fail_to_match && (str == L"." || str == L"..")) {
|
||||
// The string is '.' or '..' so the only possible match is an exact match.
|
||||
return str == wc;
|
||||
}
|
||||
|
||||
// Near Linear implementation as proposed here https://research.swtch.com/glob.
|
||||
const wchar_t *const str_start = str.c_str();
|
||||
const wchar_t *wc_x = wc.c_str();
|
||||
const wchar_t *str_x = str_start;
|
||||
const wchar_t *restart_wc_x = wc.c_str();
|
||||
const wchar_t *restart_str_x = str_start;
|
||||
|
||||
bool restart_is_out_of_str = false;
|
||||
for (; *wc_x != 0 || *str_x != 0;) {
|
||||
bool is_first = (str_x == str_start);
|
||||
if (*wc_x != 0) {
|
||||
if (*wc_x == ANY_STRING || *wc_x == ANY_STRING_RECURSIVE) {
|
||||
// Ignore hidden file
|
||||
if (leading_dots_fail_to_match && is_first && str[0] == L'.') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Common case of * at the end. In that case we can early out since we know it will
|
||||
// match.
|
||||
if (wc_x[1] == L'\0') {
|
||||
return true;
|
||||
}
|
||||
// Try to match at str_x.
|
||||
// If that doesn't work out, restart at str_x+1 next.
|
||||
restart_wc_x = wc_x;
|
||||
restart_str_x = str_x + 1;
|
||||
restart_is_out_of_str = (*str_x == 0);
|
||||
wc_x++;
|
||||
continue;
|
||||
} else if (*wc_x == ANY_CHAR && *str_x != 0) {
|
||||
if (is_first && *str_x == L'.') {
|
||||
return false;
|
||||
}
|
||||
wc_x++;
|
||||
str_x++;
|
||||
continue;
|
||||
} else if (*str_x != 0 && *str_x == *wc_x) { // ordinary character
|
||||
wc_x++;
|
||||
str_x++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Mismatch. Maybe restart.
|
||||
if (restart_str_x != str.c_str() && !restart_is_out_of_str) {
|
||||
wc_x = restart_wc_x;
|
||||
str_x = restart_str_x;
|
||||
continue;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Matched all of pattern to all of name. Success.
|
||||
return true;
|
||||
}
|
||||
|
||||
// This does something horrible refactored from an even more horrible function.
|
||||
static wcstring resolve_description(const wcstring &full_completion, wcstring *completion,
|
||||
expand_flags_t expand_flags,
|
||||
|
@ -75,6 +75,11 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor
|
||||
const cancel_checker_t &cancel_checker,
|
||||
completion_receiver_t *output);
|
||||
|
||||
#if INCLUDE_RUST_HEADERS
|
||||
|
||||
#include "wildcard.rs.h"
|
||||
|
||||
#else
|
||||
/// Test whether the given wildcard matches the string. Does not perform any I/O.
|
||||
///
|
||||
/// \param str The string to test
|
||||
@ -83,18 +88,24 @@ wildcard_result_t wildcard_expand_string(const wcstring &wc, const wcstring &wor
|
||||
/// files and are not matched
|
||||
///
|
||||
/// \return true if the wildcard matched
|
||||
bool wildcard_match(const wcstring &str, const wcstring &wc,
|
||||
bool leading_dots_fail_to_match = false);
|
||||
bool wildcard_match_ffi(const wcstring &str, const wcstring &wc, bool leading_dots_fail_to_match);
|
||||
|
||||
// Check if the string has any unescaped wildcards (e.g. ANY_STRING).
|
||||
bool wildcard_has_internal(const wchar_t *s, size_t len);
|
||||
inline bool wildcard_has_internal(const wcstring &s) {
|
||||
return wildcard_has_internal(s.c_str(), s.size());
|
||||
}
|
||||
bool wildcard_has_internal(const wcstring &s);
|
||||
|
||||
/// Check if the specified string contains wildcards (e.g. *).
|
||||
bool wildcard_has(const wchar_t *s, size_t len);
|
||||
inline bool wildcard_has(const wcstring &s) { return wildcard_has(s.c_str(), s.size()); }
|
||||
bool wildcard_has(const wcstring &s);
|
||||
|
||||
#endif
|
||||
|
||||
inline bool wildcard_match(const wcstring &str, const wcstring &wc,
|
||||
bool leading_dots_fail_to_match = false) {
|
||||
return wildcard_match_ffi(str, wc, leading_dots_fail_to_match);
|
||||
}
|
||||
|
||||
inline bool wildcard_has(const wchar_t *s, size_t len) {
|
||||
return wildcard_has(wcstring(s, len));
|
||||
};
|
||||
|
||||
/// Test wildcard completion.
|
||||
wildcard_result_t wildcard_complete(const wcstring &str, const wchar_t *wc,
|
||||
|
Loading…
x
Reference in New Issue
Block a user