fish-shell/fish-rust/src/wchar.rs

67 lines
2.2 KiB
Rust

//! Support for wide strings.
//!
//! There are two wide string types that are commonly used:
//! - wstr: a string slice without a nul terminator. Like `&str` but wide chars.
//! - WString: an owning string without a nul terminator. Like `String` but wide chars.
use crate::common::{ENCODE_DIRECT_BASE, ENCODE_DIRECT_END};
pub use widestring::{Utf32Str as wstr, Utf32String as WString};
/// Pull in our extensions.
pub use crate::wchar_ext::IntoCharIter;
pub(crate) mod prelude {
pub(crate) use crate::{
wchar::{wstr, IntoCharIter, WString, L},
wchar_ext::{ToWString, WExt},
wutil::{sprintf, wgettext, wgettext_fmt, wgettext_maybe_fmt, wgettext_str},
};
pub(crate) use widestring_suffix::widestrs;
}
/// Creates a wstr string slice, like the "L" prefix of C++.
/// The result is of type wstr.
/// It is NOT nul-terminated.
macro_rules! L {
($string:expr) => {
widestring::utf32str!($string)
};
}
pub(crate) use L;
/// A proc-macro for creating wide string literals using an L *suffix*.
/// Example usage:
/// ```
/// #[widestrs]
/// pub fn func() {
/// let s = "hello"L; // type &'static wstr
/// }
/// ```
/// Note: the resulting string is NOT nul-terminated.
pub use widestring_suffix::widestrs;
/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose
/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g.
/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it
/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8
/// character.
///
/// See https://github.com/fish-shell/fish-shell/issues/1894.
pub fn encode_byte_to_char(byte: u8) -> char {
char::from_u32(u32::from(ENCODE_DIRECT_BASE) + u32::from(byte))
.expect("private-use codepoint should be valid char")
}
/// Decode a literal byte from a UTF-32 character.
pub fn decode_byte_from_char(c: char) -> Option<u8> {
if c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END {
Some(
(u32::from(c) - u32::from(ENCODE_DIRECT_BASE))
.try_into()
.unwrap(),
)
} else {
None
}
}