mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-16 23:02:45 +08:00
re: port regex make anchored to rust and helper ffi funtions for regex
This commit is contained in:
parent
6851d52924
commit
e384e63b24
|
@ -1,7 +1,13 @@
|
|||
use crate::wchar;
|
||||
use crate::wchar_ffi::WCharToFFI;
|
||||
#[rustfmt::skip]
|
||||
use ::std::fmt::{self, Debug, Formatter};
|
||||
#[rustfmt::skip]
|
||||
use ::std::pin::Pin;
|
||||
#[rustfmt::skip]
|
||||
use ::std::slice;
|
||||
use crate::wchar::wstr;
|
||||
use autocxx::prelude::*;
|
||||
use core::pin::Pin;
|
||||
use core::slice;
|
||||
use cxx::SharedPtr;
|
||||
|
||||
// autocxx has been hacked up to know about this.
|
||||
|
@ -10,14 +16,17 @@ pub type wchar_t = u32;
|
|||
include_cpp! {
|
||||
#include "builtin.h"
|
||||
#include "common.h"
|
||||
#include "env.h"
|
||||
#include "event.h"
|
||||
#include "fallback.h"
|
||||
#include "fds.h"
|
||||
#include "flog.h"
|
||||
#include "io.h"
|
||||
#include "parse_constants.h"
|
||||
#include "parser.h"
|
||||
#include "parse_util.h"
|
||||
#include "proc.h"
|
||||
#include "re.h"
|
||||
#include "tokenizer.h"
|
||||
#include "wildcard.h"
|
||||
#include "wutil.h"
|
||||
|
@ -74,6 +83,12 @@ include_cpp! {
|
|||
generate!("signal_get_desc")
|
||||
|
||||
generate!("fd_event_signaller_t")
|
||||
|
||||
generate_pod!("re::flags_t")
|
||||
generate_pod!("re::re_error_t")
|
||||
generate!("re::regex_t")
|
||||
generate!("re::regex_result_ffi")
|
||||
generate!("re::try_compile_ffi")
|
||||
}
|
||||
|
||||
impl parser_t {
|
||||
|
@ -89,6 +104,10 @@ impl parser_t {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn try_compile(anchored: &wstr, flags: &re::flags_t) -> Pin<Box<re::regex_result_ffi>> {
|
||||
re::try_compile_ffi(&anchored.to_ffi(), flags).within_box()
|
||||
}
|
||||
|
||||
impl job_t {
|
||||
#[allow(clippy::mut_from_ref)]
|
||||
pub fn get_procs(&self) -> &mut [UniquePtr<process_t>] {
|
||||
|
@ -115,6 +134,12 @@ impl From<wcharz_t> for wchar::WString {
|
|||
}
|
||||
}
|
||||
|
||||
impl Debug for re::regex_t {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
f.write_str("regex_t")
|
||||
}
|
||||
}
|
||||
|
||||
/// A bogus trait for turning &mut Foo into Pin<&mut Foo>.
|
||||
/// autocxx enforces that non-const methods must be called through Pin,
|
||||
/// but this means we can't pass around mutable references to types like parser_t.
|
||||
|
@ -133,11 +158,15 @@ pub trait Repin {
|
|||
}
|
||||
|
||||
// Implement Repin for our types.
|
||||
impl Repin for env_stack_t {}
|
||||
impl Repin for io_streams_t {}
|
||||
impl Repin for job_t {}
|
||||
impl Repin for output_stream_t {}
|
||||
impl Repin for parser_t {}
|
||||
impl Repin for process_t {}
|
||||
impl Repin for re::regex_result_ffi {}
|
||||
|
||||
unsafe impl Send for re::regex_t {}
|
||||
|
||||
pub use autocxx::c_int;
|
||||
pub use ffi::*;
|
||||
|
|
46
fish-rust/src/re.rs
Normal file
46
fish-rust/src/re.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use crate::wchar::{wstr, WString, L};
|
||||
|
||||
/// Adjust a pattern so that it is anchored at both beginning and end.
|
||||
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
|
||||
/// (e.g. 10.21, on Xenial).
|
||||
pub fn regex_make_anchored(pattern: &wstr) -> WString {
|
||||
let mut anchored = pattern.to_owned();
|
||||
// PATTERN -> ^(:?PATTERN)$.
|
||||
let prefix = L!("^(?:");
|
||||
let suffix = L!(")$");
|
||||
anchored.reserve(pattern.len() + prefix.len() + suffix.len());
|
||||
anchored.insert_utfstr(0, prefix);
|
||||
anchored.push_utfstr(suffix);
|
||||
anchored
|
||||
}
|
||||
|
||||
use crate::ffi_tests::add_test;
|
||||
add_test!("test_regex_make_anchored", || {
|
||||
use crate::ffi;
|
||||
use crate::wchar::L;
|
||||
use crate::wchar_ffi::WCharToFFI;
|
||||
|
||||
let flags = ffi::re::flags_t { icase: false };
|
||||
let mut result = ffi::try_compile(®ex_make_anchored(L!("ab(.+?)")), &flags);
|
||||
assert!(!result.has_error());
|
||||
|
||||
let re = result.as_mut().get_regex();
|
||||
|
||||
assert!(!re.is_null());
|
||||
assert!(!re.matches_ffi(&L!("").to_ffi()));
|
||||
assert!(!re.matches_ffi(&L!("ab").to_ffi()));
|
||||
assert!(re.matches_ffi(&L!("abcd").to_ffi()));
|
||||
assert!(!re.matches_ffi(&L!("xabcd").to_ffi()));
|
||||
assert!(re.matches_ffi(&L!("abcdefghij").to_ffi()));
|
||||
|
||||
let mut result = ffi::try_compile(®ex_make_anchored(L!("(a+)|(b+)")), &flags);
|
||||
assert!(!result.has_error());
|
||||
|
||||
let re = result.as_mut().get_regex();
|
||||
assert!(!re.is_null());
|
||||
assert!(!re.matches_ffi(&L!("").to_ffi()));
|
||||
assert!(!re.matches_ffi(&L!("aabb").to_ffi()));
|
||||
assert!(re.matches_ffi(&L!("aaaa").to_ffi()));
|
||||
assert!(re.matches_ffi(&L!("bbbb").to_ffi()));
|
||||
assert!(!re.matches_ffi(&L!("aaaax").to_ffi()));
|
||||
});
|
|
@ -6828,23 +6828,6 @@ static void test_re_basic() {
|
|||
}
|
||||
do_test(join_strings(matches, L',') == L"AA,CC,11");
|
||||
do_test(join_strings(captures, L',') == L"A,C,1");
|
||||
|
||||
// Test make_anchored
|
||||
re = regex_t::try_compile(make_anchored(L"ab(.+?)"));
|
||||
do_test(re.has_value());
|
||||
do_test(!re->match(L""));
|
||||
do_test(!re->match(L"ab"));
|
||||
do_test((re->match(L"abcd") == match_range_t{0, 4}));
|
||||
do_test(!re->match(L"xabcd"));
|
||||
do_test((re->match(L"abcdefghij") == match_range_t{0, 10}));
|
||||
|
||||
re = regex_t::try_compile(make_anchored(L"(a+)|(b+)"));
|
||||
do_test(re.has_value());
|
||||
do_test(!re->match(L""));
|
||||
do_test(!re->match(L"aabb"));
|
||||
do_test((re->match(L"aaaa") == match_range_t{0, 4}));
|
||||
do_test((re->match(L"bbbb") == match_range_t{0, 4}));
|
||||
do_test(!re->match(L"aaaax"));
|
||||
}
|
||||
|
||||
static void test_re_reset() {
|
||||
|
|
26
src/re.cpp
26
src/re.cpp
|
@ -135,6 +135,10 @@ maybe_t<match_range_t> regex_t::match(const wcstring &subject) const {
|
|||
return this->match(md, subject);
|
||||
}
|
||||
|
||||
bool regex_t::matches_ffi(const wcstring &subject) const {
|
||||
return this->match(subject).has_value();
|
||||
}
|
||||
|
||||
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
|
||||
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
|
||||
return none();
|
||||
|
@ -295,12 +299,18 @@ regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) {
|
|||
|
||||
wcstring re_error_t::message() const { return message_for_code(this->code); }
|
||||
|
||||
wcstring re::make_anchored(wcstring pattern) {
|
||||
// PATTERN -> ^(:?PATTERN)$.
|
||||
const wchar_t *prefix = L"^(?:";
|
||||
const wchar_t *suffix = L")$";
|
||||
pattern.reserve(pattern.size() + wcslen(prefix) + wcslen(suffix));
|
||||
pattern.insert(0, prefix);
|
||||
pattern.append(suffix);
|
||||
return pattern;
|
||||
re::regex_result_ffi re::try_compile_ffi(const wcstring &pattern, const flags_t &flags) {
|
||||
re_error_t error{};
|
||||
auto regex = regex_t::try_compile(pattern, flags, &error);
|
||||
|
||||
if (regex) {
|
||||
return regex_result_ffi{std::make_unique<re::regex_t>(regex.acquire()), error};
|
||||
}
|
||||
|
||||
return re::regex_result_ffi{nullptr, error};
|
||||
}
|
||||
|
||||
bool re::regex_result_ffi::has_error() const { return error.code != 0; }
|
||||
re::re_error_t re::regex_result_ffi::get_error() const { return error; };
|
||||
|
||||
std::unique_ptr<re::regex_t> re::regex_result_ffi::get_regex() { return std::move(regex); }
|
||||
|
|
17
src/re.h
17
src/re.h
|
@ -114,6 +114,9 @@ class regex_t : noncopyable_t {
|
|||
/// A convenience function which calls prepare() for you.
|
||||
maybe_t<match_range_t> match(const wcstring &subject) const;
|
||||
|
||||
/// A convenience function which calls prepare() for you.
|
||||
bool matches_ffi(const wcstring &subject) const;
|
||||
|
||||
/// \return the matched range for an indexed or named capture group. 0 means the entire match.
|
||||
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
|
||||
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
|
||||
|
@ -148,10 +151,16 @@ class regex_t : noncopyable_t {
|
|||
adapters::bytecode_ptr_t code_;
|
||||
};
|
||||
|
||||
/// Adjust a pattern so that it is anchored at both beginning and end.
|
||||
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
|
||||
/// (e.g. 10.21, on Xenial).
|
||||
wcstring make_anchored(wcstring pattern);
|
||||
struct regex_result_ffi {
|
||||
std::unique_ptr<re::regex_t> regex;
|
||||
re::re_error_t error;
|
||||
|
||||
bool has_error() const;
|
||||
std::unique_ptr<re::regex_t> get_regex();
|
||||
re::re_error_t get_error() const;
|
||||
};
|
||||
|
||||
regex_result_ffi try_compile_ffi(const wcstring &pattern, const flags_t &flags);
|
||||
|
||||
} // namespace re
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue
Block a user