mirror of
https://github.com/fish-shell/fish-shell.git
synced 2025-02-01 22:04:16 +08:00
Fix comparison warnings on UTF-16 platforms
Without true handling of UTF-16 surrogate pairs, all we can do is properly detect the BMP range in UTF-16 environments and bail if the input is in a non-BMP region. There isn't much else we can do as it is incorrect to encode the surrogate pairs themselves (fish doesn't know what to do with them and they're illegal under either of UTF-8 or UTF-32). (I'm not aware of fish being used in any UTF-16 platforms other than Cygwin.)
This commit is contained in:
parent
215df7eec6
commit
1305a0899c
|
@ -146,11 +146,34 @@ static wchar_t *char_to_symbol(wchar_t wc, bool bind_friendly) {
|
|||
del_to_symbol(buf, sizeof(buf) / sizeof(*buf), wc, bind_friendly);
|
||||
} else if (wc < 0x80) { // ASCII characters that are not control characters
|
||||
ascii_printable_to_symbol(buf, sizeof(buf) / sizeof(*buf), wc, bind_friendly);
|
||||
} else if (wc <= 0xFFFF) { // BMP Unicode chararacter
|
||||
}
|
||||
// Conditional handling of BMP Unicode characters depends on the encoding. Assume width of wchar_t
|
||||
// corresponds to the encoding, i.e. WCHAR_T_BITS == 16 implies UTF-16 and WCHAR_T_BITS == 32
|
||||
// because there's no other sane way of handling the input.
|
||||
#if WCHAR_T_BITS == 16
|
||||
else if (wc <= 0xD7FF || (wc >= 0xE000 && wc <= 0xFFFD)) {
|
||||
// UTF-16 encoding of Unicode character in BMP range
|
||||
std::swprintf(buf, sizeof(buf) / sizeof(*buf), L"\\u%04X", wc);
|
||||
} else {
|
||||
// Our support for UTF-16 surrogate pairs is non-existent.
|
||||
// See https://github.com/fish-shell/fish-shell/issues/6585#issuecomment-783669903 for what
|
||||
// correct handling of surrogate pairs would look like - except it would need to be done
|
||||
// everywhere.
|
||||
|
||||
// 0xFFFD is the unicode codepoint for "symbol doesn't exist in codepage" and is the most
|
||||
// correct thing we can do given the byte-by-byte parsing without any support for surrogate
|
||||
// pairs.
|
||||
std::swprintf(buf, sizeof(buf) / sizeof(*buf), L"\\uFFFD");
|
||||
}
|
||||
#elif WCHAR_T_BITS == 32
|
||||
else if (wc <= 0xFFFF) { // BMP Unicode chararacter
|
||||
std::swprintf(buf, sizeof(buf) / sizeof(*buf), L"\\u%04X", wc);
|
||||
} else { // Non-BMP Unicode chararacter
|
||||
std::swprintf(buf, sizeof(buf) / sizeof(*buf), L"\\U%06X", wc);
|
||||
}
|
||||
#else
|
||||
static_assert(false, "Unsupported WCHAR_T size; unknown encoding!");
|
||||
#endif
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user