2023-03-19 23:54:07 +08:00
|
|
|
use std::{iter, slice};
|
|
|
|
|
2023-01-15 06:56:24 +08:00
|
|
|
use crate::wchar::{wstr, WString};
|
|
|
|
use widestring::utfstr::CharsUtf32;
|
|
|
|
|
2023-03-19 11:11:18 +08:00
|
|
|
/// Helpers to convert things to widestring.
|
|
|
|
/// This is like std::string::ToString.
|
|
|
|
pub trait ToWString {
|
|
|
|
fn to_wstring(&self) -> WString;
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
fn to_wstring_impl(mut val: u64, neg: bool) -> WString {
|
|
|
|
// 20 digits max in u64: 18446744073709551616.
|
|
|
|
let mut digits = [0; 24];
|
|
|
|
let mut ndigits = 0;
|
|
|
|
while val > 0 {
|
|
|
|
digits[ndigits] = (val % 10) as u8;
|
|
|
|
val /= 10;
|
|
|
|
ndigits += 1;
|
|
|
|
}
|
|
|
|
if ndigits == 0 {
|
|
|
|
digits[0] = 0;
|
|
|
|
ndigits = 1;
|
|
|
|
}
|
|
|
|
let mut result = WString::with_capacity(ndigits + neg as usize);
|
|
|
|
if neg {
|
|
|
|
result.push('-');
|
|
|
|
}
|
|
|
|
for i in (0..ndigits).rev() {
|
|
|
|
result.push((digits[i] + b'0') as char);
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Implement to_wstring() for signed types.
|
|
|
|
macro_rules! impl_to_wstring_signed {
|
2023-03-20 15:15:21 +08:00
|
|
|
($($t:ty), *) => {
|
|
|
|
$(
|
2023-03-19 11:11:18 +08:00
|
|
|
impl ToWString for $t {
|
|
|
|
fn to_wstring(&self) -> WString {
|
|
|
|
let val = *self as i64;
|
|
|
|
to_wstring_impl(val.unsigned_abs(), val < 0)
|
|
|
|
}
|
|
|
|
}
|
2023-03-20 15:15:21 +08:00
|
|
|
)*
|
2023-03-19 11:11:18 +08:00
|
|
|
};
|
|
|
|
}
|
2023-03-20 15:15:21 +08:00
|
|
|
impl_to_wstring_signed!(i8, i16, i32, i64, isize);
|
2023-03-19 11:11:18 +08:00
|
|
|
|
|
|
|
/// Implement to_wstring() for unsigned types.
|
|
|
|
macro_rules! impl_to_wstring_unsigned {
|
2023-03-20 15:15:21 +08:00
|
|
|
($($t:ty), *) => {
|
|
|
|
$(
|
2023-03-19 11:11:18 +08:00
|
|
|
impl ToWString for $t {
|
|
|
|
fn to_wstring(&self) -> WString {
|
|
|
|
to_wstring_impl(*self as u64, false)
|
|
|
|
}
|
|
|
|
}
|
2023-03-20 15:15:21 +08:00
|
|
|
)*
|
2023-03-19 11:11:18 +08:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-03-20 15:15:21 +08:00
|
|
|
impl_to_wstring_unsigned!(u8, u16, u32, u64, usize);
|
2023-03-19 11:11:18 +08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_to_wstring() {
|
|
|
|
assert_eq!(0_u64.to_wstring(), "0");
|
|
|
|
assert_eq!(1_u64.to_wstring(), "1");
|
|
|
|
assert_eq!(0_i64.to_wstring(), "0");
|
|
|
|
assert_eq!(1_i64.to_wstring(), "1");
|
|
|
|
assert_eq!((-1_i64).to_wstring(), "-1");
|
|
|
|
assert_eq!((-5_i64).to_wstring(), "-5");
|
|
|
|
let mut val: i64 = 1;
|
|
|
|
loop {
|
|
|
|
assert_eq!(val.to_wstring(), val.to_string());
|
|
|
|
let Some(next) = val.checked_mul(-3) else { break; };
|
|
|
|
val = next;
|
|
|
|
}
|
|
|
|
assert_eq!(u64::MAX.to_wstring(), "18446744073709551615");
|
|
|
|
assert_eq!(i64::MIN.to_wstring(), "-9223372036854775808");
|
|
|
|
assert_eq!(i64::MAX.to_wstring(), "9223372036854775807");
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:11 +08:00
|
|
|
/// A trait for a thing that can produce a double-ended, cloneable
|
|
|
|
/// iterator of chars.
|
|
|
|
/// Common implementations include char, &str, &wstr, &WString.
|
|
|
|
pub trait IntoCharIter {
|
|
|
|
type Iter: DoubleEndedIterator<Item = char> + Clone;
|
2023-01-15 06:56:24 +08:00
|
|
|
fn chars(self) -> Self::Iter;
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:11 +08:00
|
|
|
impl IntoCharIter for char {
|
2023-01-15 06:56:24 +08:00
|
|
|
type Iter = std::iter::Once<char>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
std::iter::once(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:11 +08:00
|
|
|
impl<'a> IntoCharIter for &'a str {
|
2023-01-15 06:56:24 +08:00
|
|
|
type Iter = std::str::Chars<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
str::chars(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-19 23:54:07 +08:00
|
|
|
impl<'a> IntoCharIter for &'a [char] {
|
|
|
|
type Iter = iter::Copied<slice::Iter<'a, char>>;
|
|
|
|
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self.iter().copied()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:11 +08:00
|
|
|
impl<'a> IntoCharIter for &'a wstr {
|
2023-01-15 06:56:24 +08:00
|
|
|
type Iter = CharsUtf32<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
wstr::chars(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:11 +08:00
|
|
|
impl<'a> IntoCharIter for &'a WString {
|
2023-01-15 06:56:24 +08:00
|
|
|
type Iter = CharsUtf32<'a>;
|
|
|
|
fn chars(self) -> Self::Iter {
|
2023-02-05 07:45:25 +08:00
|
|
|
wstr::chars(self)
|
2023-01-15 06:56:24 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-06 11:52:12 +08:00
|
|
|
// Also support `str.chars()` itself.
|
|
|
|
impl<'a> IntoCharIter for std::str::Chars<'a> {
|
|
|
|
type Iter = Self;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Also support `wstr.chars()` itself.
|
|
|
|
impl<'a> IntoCharIter for CharsUtf32<'a> {
|
|
|
|
type Iter = Self;
|
|
|
|
fn chars(self) -> Self::Iter {
|
|
|
|
self
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-15 06:56:24 +08:00
|
|
|
/// \return true if \p prefix is a prefix of \p contents.
|
2023-02-05 07:45:25 +08:00
|
|
|
fn iter_prefixes_iter<Prefix, Contents>(prefix: Prefix, mut contents: Contents) -> bool
|
2023-01-15 06:56:24 +08:00
|
|
|
where
|
|
|
|
Prefix: Iterator,
|
|
|
|
Contents: Iterator,
|
|
|
|
Prefix::Item: PartialEq<Contents::Item>,
|
|
|
|
{
|
2023-02-05 07:45:25 +08:00
|
|
|
for c1 in prefix {
|
2023-01-15 06:56:24 +08:00
|
|
|
match contents.next() {
|
|
|
|
Some(c2) if c1 == c2 => {}
|
|
|
|
_ => return false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
true
|
|
|
|
}
|
|
|
|
|
2023-04-10 04:41:04 +08:00
|
|
|
/// Iterator type for splitting a wide string on a char.
|
|
|
|
pub struct WStrCharSplitIter<'a> {
|
|
|
|
split: char,
|
2023-04-24 10:33:10 +08:00
|
|
|
chars: Option<&'a [char]>,
|
2023-04-10 04:41:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator for WStrCharSplitIter<'a> {
|
|
|
|
type Item = &'a wstr;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2023-04-24 10:33:10 +08:00
|
|
|
let chars = self.chars?;
|
|
|
|
if let Some(idx) = chars.iter().position(|c| *c == self.split) {
|
|
|
|
let (prefix, rest) = chars.split_at(idx);
|
|
|
|
self.chars = Some(&rest[1..]);
|
2023-04-10 04:41:04 +08:00
|
|
|
return Some(wstr::from_char_slice(prefix));
|
|
|
|
} else {
|
2023-04-24 10:33:10 +08:00
|
|
|
self.chars = None;
|
|
|
|
return Some(wstr::from_char_slice(chars));
|
2023-04-10 04:41:04 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-15 06:56:24 +08:00
|
|
|
/// Convenience functions for WString.
|
|
|
|
pub trait WExt {
|
|
|
|
/// Access the chars of a WString or wstr.
|
|
|
|
fn as_char_slice(&self) -> &[char];
|
|
|
|
|
2023-04-02 01:17:49 +08:00
|
|
|
/// Return a char slice from a *char index*.
|
|
|
|
/// This is different from Rust string slicing, which takes a byte index.
|
|
|
|
fn slice_from(&self, start: usize) -> &wstr {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
wstr::from_char_slice(&chars[start..])
|
|
|
|
}
|
|
|
|
|
2023-05-01 03:38:06 +08:00
|
|
|
/// Return a char slice up to a *char index*.
|
|
|
|
/// This is different from Rust string slicing, which takes a byte index.
|
|
|
|
fn slice_to(&self, end: usize) -> &wstr {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
wstr::from_char_slice(&chars[..end])
|
|
|
|
}
|
|
|
|
|
2023-04-24 10:33:10 +08:00
|
|
|
/// Return the number of chars.
|
|
|
|
/// This is different from Rust string len, which returns the number of bytes.
|
|
|
|
fn char_count(&self) -> usize {
|
|
|
|
self.as_char_slice().len()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the char at an index.
|
2023-01-15 06:56:24 +08:00
|
|
|
/// If the index is equal to the length, return '\0'.
|
|
|
|
/// If the index exceeds the length, then panic.
|
|
|
|
fn char_at(&self, index: usize) -> char {
|
|
|
|
let chars = self.as_char_slice();
|
|
|
|
if index == chars.len() {
|
|
|
|
'\0'
|
|
|
|
} else {
|
|
|
|
chars[index]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-10 04:41:04 +08:00
|
|
|
/// \return an iterator over substrings, split by a given char.
|
|
|
|
/// The split char is not included in the substrings.
|
|
|
|
fn split(&self, c: char) -> WStrCharSplitIter {
|
|
|
|
WStrCharSplitIter {
|
|
|
|
split: c,
|
2023-04-24 10:33:10 +08:00
|
|
|
chars: Some(self.as_char_slice()),
|
2023-04-10 04:41:04 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-15 06:56:24 +08:00
|
|
|
/// \return the index of the first occurrence of the given char, or None.
|
|
|
|
fn find_char(&self, c: char) -> Option<usize> {
|
|
|
|
self.as_char_slice().iter().position(|&x| x == c)
|
|
|
|
}
|
|
|
|
|
2023-04-18 17:53:48 +08:00
|
|
|
fn contains(&self, c: char) -> bool {
|
|
|
|
self.as_char_slice().iter().any(|&x| x == c)
|
|
|
|
}
|
|
|
|
|
2023-01-15 06:56:24 +08:00
|
|
|
/// \return whether we start with a given Prefix.
|
|
|
|
/// The Prefix can be a char, a &str, a &wstr, or a &WString.
|
2023-03-06 11:52:11 +08:00
|
|
|
fn starts_with<Prefix: IntoCharIter>(&self, prefix: Prefix) -> bool {
|
2023-01-15 06:56:24 +08:00
|
|
|
iter_prefixes_iter(prefix.chars(), self.as_char_slice().iter().copied())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// \return whether we end with a given Suffix.
|
|
|
|
/// The Suffix can be a char, a &str, a &wstr, or a &WString.
|
2023-03-06 11:52:11 +08:00
|
|
|
fn ends_with<Suffix: IntoCharIter>(&self, suffix: Suffix) -> bool {
|
2023-01-15 06:56:24 +08:00
|
|
|
iter_prefixes_iter(
|
|
|
|
suffix.chars().rev(),
|
|
|
|
self.as_char_slice().iter().copied().rev(),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl WExt for WString {
|
|
|
|
fn as_char_slice(&self) -> &[char] {
|
|
|
|
self.as_utfstr().as_char_slice()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl WExt for wstr {
|
|
|
|
fn as_char_slice(&self) -> &[char] {
|
|
|
|
wstr::as_char_slice(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::WExt;
|
2023-04-10 04:41:04 +08:00
|
|
|
use crate::wchar::{wstr, WString, L};
|
2023-01-15 06:56:24 +08:00
|
|
|
/// Write some tests.
|
|
|
|
#[cfg(test)]
|
|
|
|
fn test_find_char() {
|
|
|
|
assert_eq!(Some(0), L!("abc").find_char('a'));
|
|
|
|
assert_eq!(Some(1), L!("abc").find_char('b'));
|
|
|
|
assert_eq!(None, L!("abc").find_char('X'));
|
|
|
|
assert_eq!(None, L!("").find_char('X'));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
fn test_prefix() {
|
|
|
|
assert!(L!("").starts_with(L!("")));
|
|
|
|
assert!(L!("abc").starts_with(L!("")));
|
|
|
|
assert!(L!("abc").starts_with('a'));
|
|
|
|
assert!(L!("abc").starts_with("ab"));
|
|
|
|
assert!(L!("abc").starts_with(L!("ab")));
|
|
|
|
assert!(L!("abc").starts_with(&WString::from_str("abc")));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
fn test_suffix() {
|
|
|
|
assert!(L!("").ends_with(L!("")));
|
|
|
|
assert!(L!("abc").ends_with(L!("")));
|
|
|
|
assert!(L!("abc").ends_with('c'));
|
|
|
|
assert!(L!("abc").ends_with("bc"));
|
|
|
|
assert!(L!("abc").ends_with(L!("bc")));
|
|
|
|
assert!(L!("abc").ends_with(&WString::from_str("abc")));
|
|
|
|
}
|
2023-04-10 04:41:04 +08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_split() {
|
|
|
|
fn do_split(s: &wstr, c: char) -> Vec<&wstr> {
|
|
|
|
s.split(c).collect()
|
|
|
|
}
|
2023-04-24 10:33:10 +08:00
|
|
|
assert_eq!(do_split(L!(""), 'b'), &[""]);
|
2023-04-10 04:41:04 +08:00
|
|
|
assert_eq!(do_split(L!("abc"), 'b'), &["a", "c"]);
|
|
|
|
assert_eq!(do_split(L!("xxb"), 'x'), &["", "", "b"]);
|
|
|
|
assert_eq!(do_split(L!("bxxxb"), 'x'), &["b", "", "", "b"]);
|
2023-04-24 10:33:10 +08:00
|
|
|
assert_eq!(do_split(L!(""), 'x'), &[""]);
|
2023-04-10 04:41:04 +08:00
|
|
|
assert_eq!(do_split(L!("foo,bar,baz"), ','), &["foo", "bar", "baz"]);
|
|
|
|
assert_eq!(do_split(L!("foobar"), ','), &["foobar"]);
|
|
|
|
assert_eq!(do_split(L!("1,2,3,4,5"), ','), &["1", "2", "3", "4", "5"]);
|
2023-04-24 10:33:10 +08:00
|
|
|
assert_eq!(
|
|
|
|
do_split(L!("1,2,3,4,5,"), ','),
|
|
|
|
&["1", "2", "3", "4", "5", ""]
|
|
|
|
);
|
2023-04-10 04:41:04 +08:00
|
|
|
assert_eq!(
|
|
|
|
do_split(L!("Hello\nworld\nRust"), '\n'),
|
|
|
|
&["Hello", "world", "Rust"]
|
|
|
|
);
|
|
|
|
}
|
2023-01-15 06:56:24 +08:00
|
|
|
}
|