// printf - format and print data // Copyright (C) 1990-2007 Free Software Foundation, Inc. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2, or (at your option) // any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software Foundation, // Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ // Usage: printf format [argument...] // // A front end to the printf function that lets it be used from the shell. // // Backslash escapes: // // \" = double quote // \\ = backslash // \a = alert (bell) // \b = backspace // \c = produce no further output // \e = escape // \f = form feed // \n = new line // \r = carriage return // \t = horizontal tab // \v = vertical tab // \ooo = octal number (ooo is 1 to 3 digits) // \xhh = hexadecimal number (hhh is 1 to 2 digits) // \uhhhh = 16-bit Unicode character (hhhh is 4 digits) // \Uhhhhhhhh = 32-bit Unicode character (hhhhhhhh is 8 digits) // // Additional directive: // // %b = print an argument string, interpreting backslash escapes, // except that octal escapes are of the form \0 or \0ooo. // // The `format' argument is re-used as many times as necessary // to convert all of the given arguments. // // David MacKenzie // This file has been imported from source code of printf command in GNU Coreutils version 6.9. #include "config.h" // IWYU pragma: keep #include #include #include #include #include #include #include #include #include #include "builtin.h" #include "common.h" #include "io.h" #include "proc.h" #include "wutil.h" // IWYU pragma: keep class parser_t; struct builtin_printf_state_t { // Out and err streams. Note this is a captured reference! io_streams_t &streams; // The status of the operation. int exit_code; // Whether we should stop outputting. This gets set in the case of an error, and also with the // \c escape. bool early_exit; explicit builtin_printf_state_t(io_streams_t &s) : streams(s), exit_code(0), early_exit(false) {} void verify_numeric(const wchar_t *s, const wchar_t *end, int errcode); void print_direc(const wchar_t *start, size_t length, wchar_t conversion, bool have_field_width, int field_width, bool have_precision, int precision, wchar_t const *argument); int print_formatted(const wchar_t *format, int argc, wchar_t **argv); void fatal_error(const wchar_t *format, ...); long print_esc(const wchar_t *escstart, bool octal_0); void print_esc_string(const wchar_t *str); void print_esc_char(wchar_t c); void append_output(wchar_t c); void append_output(const wchar_t *c); void append_format_output(const wchar_t *fmt, ...); }; static bool is_octal_digit(wchar_t c) { return c != L'\0' && wcschr(L"01234567", c) != NULL; } static bool is_hex_digit(wchar_t c) { return c != L'\0' && wcschr(L"0123456789ABCDEFabcdef", c) != NULL; } static int hex_to_bin(const wchar_t &c) { switch (c) { case L'0': { return 0; } case L'1': { return 1; } case L'2': { return 2; } case L'3': { return 3; } case L'4': { return 4; } case L'5': { return 5; } case L'6': { return 6; } case L'7': { return 7; } case L'8': { return 8; } case L'9': { return 9; } case L'a': case L'A': { return 10; } case L'b': case L'B': { return 11; } case L'c': case L'C': { return 12; } case L'd': case L'D': { return 13; } case L'e': case L'E': { return 14; } case L'f': case L'F': { return 15; } default: { return -1; } } } static int octal_to_bin(wchar_t c) { switch (c) { case L'0': { return 0; } case L'1': { return 1; } case L'2': { return 2; } case L'3': { return 3; } case L'4': { return 4; } case L'5': { return 5; } case L'6': { return 6; } case L'7': { return 7; } default: { return -1; } } } void builtin_printf_state_t::fatal_error(const wchar_t *fmt, ...) { // Don't error twice. if (early_exit) return; va_list va; va_start(va, fmt); wcstring errstr = vformat_string(fmt, va); va_end(va); streams.err.append(errstr); if (!string_suffixes_string(L"\n", errstr)) streams.err.push_back(L'\n'); this->exit_code = STATUS_BUILTIN_ERROR; this->early_exit = true; } void builtin_printf_state_t::append_output(wchar_t c) { // Don't output if we're done. if (early_exit) return; streams.out.push_back(c); } void builtin_printf_state_t::append_output(const wchar_t *c) { // Don't output if we're done. if (early_exit) return; streams.out.append(c); } void builtin_printf_state_t::append_format_output(const wchar_t *fmt, ...) { // Don't output if we're done. if (early_exit) return; va_list va; va_start(va, fmt); wcstring tmp = vformat_string(fmt, va); va_end(va); streams.out.append(tmp); } void builtin_printf_state_t::verify_numeric(const wchar_t *s, const wchar_t *end, int errcode) { if (errcode != 0) { this->fatal_error(L"%ls: %s", s, strerror(errcode)); } else if (*end) { if (s == end) this->fatal_error(_(L"%ls: expected a numeric value"), s); else this->fatal_error(_(L"%ls: value not completely converted"), s); } } template static T raw_string_to_scalar_type(const wchar_t *s, wchar_t **end); // we use wcstoll instead of wcstoimax because FreeBSD 8 has busted wcstoumax and wcstoimax - see // #626 template <> intmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) { return wcstoll(s, end, 0); } template <> uintmax_t raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) { return wcstoull(s, end, 0); } template <> long double raw_string_to_scalar_type(const wchar_t *s, wchar_t **end) { double val = wcstod(s, end); if (**end == L'\0') return val; // The conversion using the user's locale failed. That may be due to the string not being a // valid floating point value. It could also be due to the locale using different separator // characters than the normal english convention. So try again by forcing the use of a locale // that employs the english convention for writing floating point numbers. // // TODO: switch to the wcstod_l() function to avoid changing the global locale. char *saved_locale = strdup(setlocale(LC_NUMERIC, NULL)); setlocale(LC_NUMERIC, "C"); val = wcstod(s, end); setlocale(LC_NUMERIC, saved_locale); free(saved_locale); return val; } template static T string_to_scalar_type(const wchar_t *s, builtin_printf_state_t *state) { T val; if (*s == L'\"' || *s == L'\'') { wchar_t ch = *++s; val = ch; } else { wchar_t *end = NULL; errno = 0; val = raw_string_to_scalar_type(s, &end); state->verify_numeric(s, end, errno); } return val; } /// Output a single-character \ escape. void builtin_printf_state_t::print_esc_char(wchar_t c) { switch (c) { case L'a': { // alert this->append_output(L'\a'); break; } case L'b': { // backspace this->append_output(L'\b'); break; } case L'c': { // cancel the rest of the output this->early_exit = true; break; } case L'e': { // escape this->append_output(L'\x1B'); break; } case L'f': { // form feed this->append_output(L'\f'); break; } case L'n': { // new line this->append_output(L'\n'); break; } case L'r': { // carriage return this->append_output(L'\r'); break; } case L't': { // horizontal tab this->append_output(L'\t'); break; } case L'v': { // vertical tab this->append_output(L'\v'); break; } default: { this->append_output(c); break; } } } /// Print a \ escape sequence starting at ESCSTART. /// Return the number of characters in the escape sequence besides the backslash.. /// If OCTAL_0 is nonzero, octal escapes are of the form \0ooo, where o /// is an octal digit; otherwise they are of the form \ooo. long builtin_printf_state_t::print_esc(const wchar_t *escstart, bool octal_0) { const wchar_t *p = escstart + 1; int esc_value = 0; /* Value of \nnn escape. */ int esc_length; /* Length of \nnn escape. */ if (*p == L'x') { // A hexadecimal \xhh escape sequence must have 1 or 2 hex. digits. for (esc_length = 0, ++p; esc_length < 2 && is_hex_digit(*p); ++esc_length, ++p) esc_value = esc_value * 16 + hex_to_bin(*p); if (esc_length == 0) this->fatal_error(_(L"missing hexadecimal number in escape")); this->append_output(ENCODE_DIRECT_BASE + esc_value % 256); } else if (is_octal_digit(*p)) { // Parse \0ooo (if octal_0 && *p == L'0') or \ooo (otherwise). Allow \ooo if octal_0 && *p // != L'0'; this is an undocumented extension to POSIX that is compatible with Bash 2.05b. // Wrap mod 256, which matches historic behavior. for (esc_length = 0, p += octal_0 && *p == L'0'; esc_length < 3 && is_octal_digit(*p); ++esc_length, ++p) esc_value = esc_value * 8 + octal_to_bin(*p); this->append_output(ENCODE_DIRECT_BASE + esc_value % 256); } else if (*p && wcschr(L"\"\\abcefnrtv", *p)) { print_esc_char(*p++); } else if (*p == L'u' || *p == L'U') { wchar_t esc_char = *p; p++; uint32_t uni_value = 0; for (size_t esc_length = 0; esc_length < (esc_char == L'u' ? 4 : 8); esc_length++) { if (!is_hex_digit(*p)) { // Escape sequence must be done. Complain if we didn't get anything. if (esc_length == 0) { this->fatal_error(_(L"Missing hexadecimal number in Unicode escape")); } break; } uni_value = uni_value * 16 + hex_to_bin(*p); p++; } // PCA GNU printf respects the limitations described in ISO N717, about which universal // characters "shall not" be specified. I believe this limitation is for the benefit of // compilers; I see no reason to impose it in builtin_printf. // // If __STDC_ISO_10646__ is defined, then it means wchar_t can and does hold Unicode code // points, so just use that. If not defined, use the %lc printf conversion; this probably // won't do anything good if your wide character set is not Unicode, but such platforms are // exceedingly rare. if (uni_value > 0x10FFFF) { this->fatal_error(_(L"Unicode character out of range: \\%c%0*x"), esc_char, (esc_char == L'u' ? 4 : 8), uni_value); } else { #if defined(__STDC_ISO_10646__) this->append_output(uni_value); #else this->append_format_output(L"%lc", uni_value); #endif } } else { this->append_output(L'\\'); if (*p) { this->append_output(*p); p++; } } return p - escstart - 1; } /// Print string STR, evaluating \ escapes. void builtin_printf_state_t::print_esc_string(const wchar_t *str) { for (; *str; str++) if (*str == L'\\') str += print_esc(str, true); else this->append_output(*str); } /// Evaluate a printf conversion specification. START is the start of the directive, LENGTH is its /// length, and CONVERSION specifies the type of conversion. LENGTH does not include any length /// modifier or the conversion specifier itself. FIELD_WIDTH and PRECISION are the field width and /// precision for '*' values, if HAVE_FIELD_WIDTH and HAVE_PRECISION are true, respectively. /// ARGUMENT is the argument to be formatted. void builtin_printf_state_t::print_direc(const wchar_t *start, size_t length, wchar_t conversion, bool have_field_width, int field_width, bool have_precision, int precision, wchar_t const *argument) { // Start with everything except the conversion specifier. wcstring fmt(start, length); // Create a copy of the % directive, with an intmax_t-wide width modifier substituted for any // existing integer length modifier. switch (conversion) { case L'd': case L'i': case L'u': { fmt.append(L"ll"); break; } case L'a': case L'e': case L'f': case L'g': case L'A': case L'E': case L'F': case L'G': { fmt.append(L"L"); break; } case L's': case L'c': { fmt.append(L"l"); break; } default: { break; } } // Append the conversion itself. fmt.push_back(conversion); switch (conversion) { case L'd': case L'i': { intmax_t arg = string_to_scalar_type(argument, this); if (!have_field_width) { if (!have_precision) this->append_format_output(fmt.c_str(), arg); else this->append_format_output(fmt.c_str(), precision, arg); } else { if (!have_precision) this->append_format_output(fmt.c_str(), field_width, arg); else this->append_format_output(fmt.c_str(), field_width, precision, arg); } break; } case L'o': case L'u': case L'x': case L'X': { uintmax_t arg = string_to_scalar_type(argument, this); if (!have_field_width) { if (!have_precision) this->append_format_output(fmt.c_str(), arg); else this->append_format_output(fmt.c_str(), precision, arg); } else { if (!have_precision) this->append_format_output(fmt.c_str(), field_width, arg); else this->append_format_output(fmt.c_str(), field_width, precision, arg); } break; } case L'a': case L'A': case L'e': case L'E': case L'f': case L'F': case L'g': case L'G': { long double arg = string_to_scalar_type(argument, this); if (!have_field_width) { if (!have_precision) { this->append_format_output(fmt.c_str(), arg); } else { this->append_format_output(fmt.c_str(), precision, arg); } } else { if (!have_precision) { this->append_format_output(fmt.c_str(), field_width, arg); } else { this->append_format_output(fmt.c_str(), field_width, precision, arg); } } break; } case L'c': { if (!have_field_width) { this->append_format_output(fmt.c_str(), *argument); } else { this->append_format_output(fmt.c_str(), field_width, *argument); } break; } case L's': { if (!have_field_width) { if (!have_precision) { this->append_format_output(fmt.c_str(), argument); } else { this->append_format_output(fmt.c_str(), precision, argument); } } else { if (!have_precision) { this->append_format_output(fmt.c_str(), field_width, argument); } else { this->append_format_output(fmt.c_str(), field_width, precision, argument); } } break; } default: { DIE("unexpected opt"); break; } } } /// For each character in str, set the corresponding boolean in the array to the given flag. static inline void modify_allowed_format_specifiers(bool ok[UCHAR_MAX + 1], const char *str, bool flag) { for (const char *c = str; *c != '\0'; c++) { unsigned char idx = static_cast(*c); ok[idx] = flag; } } /// Print the text in FORMAT, using ARGV (with ARGC elements) for arguments to any `%' directives. /// Return the number of elements of ARGV used. int builtin_printf_state_t::print_formatted(const wchar_t *format, int argc, wchar_t **argv) { int save_argc = argc; /* Preserve original value. */ const wchar_t *f; /* Pointer into `format'. */ const wchar_t *direc_start; /* Start of % directive. */ size_t direc_length; /* Length of % directive. */ bool have_field_width; /* True if FIELD_WIDTH is valid. */ int field_width = 0; /* Arg to first '*'. */ bool have_precision; /* True if PRECISION is valid. */ int precision = 0; /* Arg to second '*'. */ bool ok[UCHAR_MAX + 1] = {}; /* ok['x'] is true if %x is allowed. */ for (f = format; *f != L'\0'; ++f) { switch (*f) { case L'%': { direc_start = f++; direc_length = 1; have_field_width = have_precision = false; if (*f == L'%') { this->append_output(L'%'); break; } if (*f == L'b') { // FIXME: Field width and precision are not supported for %b, even though POSIX // requires it. if (argc > 0) { print_esc_string(*argv); ++argv; --argc; } break; } modify_allowed_format_specifiers(ok, "aAcdeEfFgGiosuxX", true); for (bool continue_looking_for_flags = true; continue_looking_for_flags;) { switch (*f) { case L'I': case L'\'': { modify_allowed_format_specifiers(ok, "aAceEosxX", false); break; } case '-': case '+': case ' ': { break; } case L'#': { modify_allowed_format_specifiers(ok, "cdisu", false); break; } case '0': { modify_allowed_format_specifiers(ok, "cs", false); break; } default: { continue_looking_for_flags = false; break; } } if (continue_looking_for_flags) { f++; direc_length++; } } if (*f == L'*') { ++f; ++direc_length; if (argc > 0) { intmax_t width = string_to_scalar_type(*argv, this); if (INT_MIN <= width && width <= INT_MAX) field_width = static_cast(width); else this->fatal_error(_(L"invalid field width: %ls"), *argv); ++argv; --argc; } else { field_width = 0; } have_field_width = true; } else { while (iswdigit(*f)) { ++f; ++direc_length; } } if (*f == L'.') { ++f; ++direc_length; modify_allowed_format_specifiers(ok, "c", false); if (*f == L'*') { ++f; ++direc_length; if (argc > 0) { intmax_t prec = string_to_scalar_type(*argv, this); if (prec < 0) { // A negative precision is taken as if the precision were omitted, // so -1 is safe here even if prec < INT_MIN. precision = -1; } else if (INT_MAX < prec) this->fatal_error(_(L"invalid precision: %ls"), *argv); else { precision = static_cast(prec); } ++argv; --argc; } else { precision = 0; } have_precision = true; } else { while (iswdigit(*f)) { ++f; ++direc_length; } } } while (*f == L'l' || *f == L'L' || *f == L'h' || *f == L'j' || *f == L't' || *f == L'z') { ++f; } wchar_t conversion = *f; if (conversion > 0xFF || !ok[conversion]) { this->fatal_error(_(L"%.*ls: invalid conversion specification"), (int)(f + 1 - direc_start), direc_start); return 0; } print_direc(direc_start, direc_length, *f, have_field_width, field_width, have_precision, precision, (argc <= 0 ? L"" : (argc--, *argv++))); break; } case L'\\': { f += print_esc(f, false); break; } default: { this->append_output(*f); break; } } } return save_argc - argc; } /// The printf builtin. int builtin_printf(parser_t &parser, io_streams_t &streams, wchar_t **argv) { UNUSED(parser); builtin_printf_state_t state(streams); wchar_t *format; int args_used; int argc = builtin_count_args(argv); if (argc <= 1) { state.fatal_error(_(L"printf: not enough arguments")); return STATUS_BUILTIN_ERROR; } format = argv[1]; argc -= 2; argv += 2; do { args_used = state.print_formatted(format, argc, argv); argc -= args_used; argv += args_used; } while (args_used > 0 && argc > 0 && !state.early_exit); return state.exit_code; }