re: port regex make anchored to rust and helper ffi funtions for regex

This commit is contained in:
Neeraj Jaiswal 2023-02-24 21:25:49 +05:30 committed by Johannes Altmanninger
parent 6851d52924
commit e384e63b24
5 changed files with 108 additions and 31 deletions

View File

@ -1,7 +1,13 @@
use crate::wchar;
use crate::wchar_ffi::WCharToFFI;
#[rustfmt::skip]
use ::std::fmt::{self, Debug, Formatter};
#[rustfmt::skip]
use ::std::pin::Pin;
#[rustfmt::skip]
use ::std::slice;
use crate::wchar::wstr;
use autocxx::prelude::*;
use core::pin::Pin;
use core::slice;
use cxx::SharedPtr;
// autocxx has been hacked up to know about this.
@ -10,14 +16,17 @@ pub type wchar_t = u32;
include_cpp! {
#include "builtin.h"
#include "common.h"
#include "env.h"
#include "event.h"
#include "fallback.h"
#include "fds.h"
#include "flog.h"
#include "io.h"
#include "parse_constants.h"
#include "parser.h"
#include "parse_util.h"
#include "proc.h"
#include "re.h"
#include "tokenizer.h"
#include "wildcard.h"
#include "wutil.h"
@ -74,6 +83,12 @@ include_cpp! {
generate!("signal_get_desc")
generate!("fd_event_signaller_t")
generate_pod!("re::flags_t")
generate_pod!("re::re_error_t")
generate!("re::regex_t")
generate!("re::regex_result_ffi")
generate!("re::try_compile_ffi")
}
impl parser_t {
@ -89,6 +104,10 @@ impl parser_t {
}
}
pub fn try_compile(anchored: &wstr, flags: &re::flags_t) -> Pin<Box<re::regex_result_ffi>> {
re::try_compile_ffi(&anchored.to_ffi(), flags).within_box()
}
impl job_t {
#[allow(clippy::mut_from_ref)]
pub fn get_procs(&self) -> &mut [UniquePtr<process_t>] {
@ -115,6 +134,12 @@ impl From<wcharz_t> for wchar::WString {
}
}
impl Debug for re::regex_t {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.write_str("regex_t")
}
}
/// A bogus trait for turning &mut Foo into Pin<&mut Foo>.
/// autocxx enforces that non-const methods must be called through Pin,
/// but this means we can't pass around mutable references to types like parser_t.
@ -133,11 +158,15 @@ pub trait Repin {
}
// Implement Repin for our types.
impl Repin for env_stack_t {}
impl Repin for io_streams_t {}
impl Repin for job_t {}
impl Repin for output_stream_t {}
impl Repin for parser_t {}
impl Repin for process_t {}
impl Repin for re::regex_result_ffi {}
unsafe impl Send for re::regex_t {}
pub use autocxx::c_int;
pub use ffi::*;

46
fish-rust/src/re.rs Normal file
View File

@ -0,0 +1,46 @@
use crate::wchar::{wstr, WString, L};
/// Adjust a pattern so that it is anchored at both beginning and end.
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
/// (e.g. 10.21, on Xenial).
pub fn regex_make_anchored(pattern: &wstr) -> WString {
let mut anchored = pattern.to_owned();
// PATTERN -> ^(:?PATTERN)$.
let prefix = L!("^(?:");
let suffix = L!(")$");
anchored.reserve(pattern.len() + prefix.len() + suffix.len());
anchored.insert_utfstr(0, prefix);
anchored.push_utfstr(suffix);
anchored
}
use crate::ffi_tests::add_test;
add_test!("test_regex_make_anchored", || {
use crate::ffi;
use crate::wchar::L;
use crate::wchar_ffi::WCharToFFI;
let flags = ffi::re::flags_t { icase: false };
let mut result = ffi::try_compile(&regex_make_anchored(L!("ab(.+?)")), &flags);
assert!(!result.has_error());
let re = result.as_mut().get_regex();
assert!(!re.is_null());
assert!(!re.matches_ffi(&L!("").to_ffi()));
assert!(!re.matches_ffi(&L!("ab").to_ffi()));
assert!(re.matches_ffi(&L!("abcd").to_ffi()));
assert!(!re.matches_ffi(&L!("xabcd").to_ffi()));
assert!(re.matches_ffi(&L!("abcdefghij").to_ffi()));
let mut result = ffi::try_compile(&regex_make_anchored(L!("(a+)|(b+)")), &flags);
assert!(!result.has_error());
let re = result.as_mut().get_regex();
assert!(!re.is_null());
assert!(!re.matches_ffi(&L!("").to_ffi()));
assert!(!re.matches_ffi(&L!("aabb").to_ffi()));
assert!(re.matches_ffi(&L!("aaaa").to_ffi()));
assert!(re.matches_ffi(&L!("bbbb").to_ffi()));
assert!(!re.matches_ffi(&L!("aaaax").to_ffi()));
});

View File

@ -6828,23 +6828,6 @@ static void test_re_basic() {
}
do_test(join_strings(matches, L',') == L"AA,CC,11");
do_test(join_strings(captures, L',') == L"A,C,1");
// Test make_anchored
re = regex_t::try_compile(make_anchored(L"ab(.+?)"));
do_test(re.has_value());
do_test(!re->match(L""));
do_test(!re->match(L"ab"));
do_test((re->match(L"abcd") == match_range_t{0, 4}));
do_test(!re->match(L"xabcd"));
do_test((re->match(L"abcdefghij") == match_range_t{0, 10}));
re = regex_t::try_compile(make_anchored(L"(a+)|(b+)"));
do_test(re.has_value());
do_test(!re->match(L""));
do_test(!re->match(L"aabb"));
do_test((re->match(L"aaaa") == match_range_t{0, 4}));
do_test((re->match(L"bbbb") == match_range_t{0, 4}));
do_test(!re->match(L"aaaax"));
}
static void test_re_reset() {

View File

@ -135,6 +135,10 @@ maybe_t<match_range_t> regex_t::match(const wcstring &subject) const {
return this->match(md, subject);
}
bool regex_t::matches_ffi(const wcstring &subject) const {
return this->match(subject).has_value();
}
maybe_t<match_range_t> regex_t::group(const match_data_t &md, size_t group_idx) const {
if (group_idx >= md.max_capture || group_idx >= pcre2_get_ovector_count(get_md(md.data))) {
return none();
@ -295,12 +299,18 @@ regex_t::regex_t(adapters::bytecode_ptr_t &&code) : code_(std::move(code)) {
wcstring re_error_t::message() const { return message_for_code(this->code); }
wcstring re::make_anchored(wcstring pattern) {
// PATTERN -> ^(:?PATTERN)$.
const wchar_t *prefix = L"^(?:";
const wchar_t *suffix = L")$";
pattern.reserve(pattern.size() + wcslen(prefix) + wcslen(suffix));
pattern.insert(0, prefix);
pattern.append(suffix);
return pattern;
re::regex_result_ffi re::try_compile_ffi(const wcstring &pattern, const flags_t &flags) {
re_error_t error{};
auto regex = regex_t::try_compile(pattern, flags, &error);
if (regex) {
return regex_result_ffi{std::make_unique<re::regex_t>(regex.acquire()), error};
}
return re::regex_result_ffi{nullptr, error};
}
bool re::regex_result_ffi::has_error() const { return error.code != 0; }
re::re_error_t re::regex_result_ffi::get_error() const { return error; };
std::unique_ptr<re::regex_t> re::regex_result_ffi::get_regex() { return std::move(regex); }

View File

@ -114,6 +114,9 @@ class regex_t : noncopyable_t {
/// A convenience function which calls prepare() for you.
maybe_t<match_range_t> match(const wcstring &subject) const;
/// A convenience function which calls prepare() for you.
bool matches_ffi(const wcstring &subject) const;
/// \return the matched range for an indexed or named capture group. 0 means the entire match.
maybe_t<match_range_t> group(const match_data_t &md, size_t group_idx) const;
maybe_t<match_range_t> group(const match_data_t &md, const wcstring &name) const;
@ -148,10 +151,16 @@ class regex_t : noncopyable_t {
adapters::bytecode_ptr_t code_;
};
/// Adjust a pattern so that it is anchored at both beginning and end.
/// This is a workaround for the fact that PCRE2_ENDANCHORED is unavailable on pre-2017 PCRE2
/// (e.g. 10.21, on Xenial).
wcstring make_anchored(wcstring pattern);
struct regex_result_ffi {
std::unique_ptr<re::regex_t> regex;
re::re_error_t error;
bool has_error() const;
std::unique_ptr<re::regex_t> get_regex();
re::re_error_t get_error() const;
};
regex_result_ffi try_compile_ffi(const wcstring &pattern, const flags_t &flags);
} // namespace re
#endif