2005-09-20 21:26:39 +08:00
/** \file common.c
2012-11-19 08:30:30 +08:00
2005-09-20 21:26:39 +08:00
Various functions , mostly string utilities , that are used by most
parts of fish .
*/
2005-10-05 17:58:00 +08:00
# include "config.h"
2006-02-28 21:17:16 +08:00
2006-07-28 20:50:57 +08:00
# include <unistd.h>
2006-08-01 08:35:56 +08:00
# ifdef HAVE_STROPTS_H
2006-07-28 20:50:57 +08:00
# include <stropts.h>
# endif
2006-07-31 04:26:59 +08:00
# ifdef HAVE_SIGINFO_H
# include <siginfo.h>
# endif
2005-09-20 21:26:39 +08:00
# include <stdlib.h>
# include <termios.h>
# include <wchar.h>
# include <string.h>
# include <stdio.h>
# include <dirent.h>
# include <sys/types.h>
2006-08-10 06:53:38 +08:00
# ifdef HAVE_SYS_IOCTL_H
2005-11-28 22:42:02 +08:00
# include <sys/ioctl.h>
2006-08-10 06:53:38 +08:00
# endif
2005-09-20 21:26:39 +08:00
# include <sys/stat.h>
# include <unistd.h>
# include <wctype.h>
# include <errno.h>
# include <limits.h>
2012-11-19 08:30:30 +08:00
# include <stdarg.h>
2005-09-20 21:26:39 +08:00
# include <locale.h>
2005-09-21 07:42:00 +08:00
# include <time.h>
2005-09-28 09:43:09 +08:00
# include <sys/time.h>
# include <fcntl.h>
2011-12-27 11:18:46 +08:00
# include <algorithm>
2005-09-20 21:26:39 +08:00
2007-01-20 10:36:49 +08:00
# ifdef HAVE_EXECINFO_H
# include <execinfo.h>
# endif
2005-10-02 03:18:52 +08:00
2005-09-20 21:26:39 +08:00
# if HAVE_NCURSES_H
# include <ncurses.h>
# else
# include <curses.h>
# endif
2006-01-19 20:22:07 +08:00
# if HAVE_TERM_H
2005-09-20 21:26:39 +08:00
# include <term.h>
2006-01-19 20:22:07 +08:00
# elif HAVE_NCURSES_TERM_H
# include <ncurses/term.h>
# endif
2005-09-20 21:26:39 +08:00
2006-02-28 21:17:16 +08:00
# include "fallback.h"
2005-09-20 21:26:39 +08:00
# include "util.h"
2006-02-28 21:17:16 +08:00
2005-09-20 21:26:39 +08:00
# include "wutil.h"
# include "common.h"
# include "expand.h"
# include "proc.h"
# include "wildcard.h"
# include "parser.h"
2012-01-17 00:56:47 +08:00
# include "complete.h"
2005-09-20 21:26:39 +08:00
2011-12-27 11:18:46 +08:00
# include "util.cpp"
# include "fallback.cpp"
2006-06-13 21:43:28 +08:00
2013-11-25 14:57:49 +08:00
# define NOT_A_WCHAR WEOF
2012-03-02 16:27:40 +08:00
2012-11-19 08:30:30 +08:00
struct termios shell_modes ;
2005-09-20 21:26:39 +08:00
2012-05-14 11:19:02 +08:00
// Note we foolishly assume that pthread_t is just a primitive. But it might be a struct.
2012-01-06 05:58:48 +08:00
static pthread_t main_thread_id = 0 ;
2012-05-14 11:19:02 +08:00
static bool thread_assertions_configured_for_testing = false ;
2012-01-06 05:58:48 +08:00
2012-11-05 16:05:42 +08:00
wchar_t ellipsis_char ;
2012-12-02 07:44:09 +08:00
wchar_t omitted_newline_char ;
2012-11-05 16:05:42 +08:00
2005-09-20 21:26:39 +08:00
char * profile = 0 ;
2011-12-27 11:18:46 +08:00
const wchar_t * program_name ;
2005-09-20 21:26:39 +08:00
2005-09-25 03:31:17 +08:00
int debug_level = 1 ;
2005-10-14 19:40:33 +08:00
/**
This struct should be continually updated by signals as the term resizes , and as such always contain the correct current size .
*/
static struct winsize termsize ;
2012-12-13 07:44:01 +08:00
static char * wcs2str_internal ( const wchar_t * in , char * out ) ;
2007-01-20 10:36:49 +08:00
2012-11-19 08:30:30 +08:00
void show_stackframe ( )
2007-01-20 10:36:49 +08:00
{
2013-01-12 07:09:33 +08:00
ASSERT_IS_NOT_FORKED_CHILD ( ) ;
2013-01-13 04:55:23 +08:00
2012-07-11 11:30:54 +08:00
/* Hack to avoid showing backtraces in the tester */
if ( program_name & & ! wcscmp ( program_name , L " (ignore) " ) )
return ;
2007-01-20 10:36:49 +08:00
2012-11-19 08:30:30 +08:00
void * trace [ 32 ] ;
int i , trace_size = 0 ;
trace_size = backtrace ( trace , 32 ) ;
2013-02-16 16:02:40 +08:00
char * * messages = backtrace_symbols ( trace , trace_size ) ;
2007-01-20 10:36:49 +08:00
2012-11-19 08:30:30 +08:00
if ( messages )
{
debug ( 0 , L " Backtrace: " ) ;
for ( i = 0 ; i < trace_size ; i + + )
{
fwprintf ( stderr , L " %s \n " , messages [ i ] ) ;
}
free ( messages ) ;
}
2007-01-20 10:36:49 +08:00
}
2012-07-17 03:05:36 +08:00
int fgetws2 ( wcstring * s , FILE * f )
{
2012-11-19 08:30:30 +08:00
int i = 0 ;
wint_t c ;
while ( 1 )
{
errno = 0 ;
c = getwc ( f ) ;
2012-12-19 03:37:54 +08:00
if ( errno = = EILSEQ | | errno = = EINTR )
2012-11-19 08:30:30 +08:00
{
continue ;
}
switch ( c )
{
2012-11-19 16:31:03 +08:00
/* End of line */
case WEOF :
case L ' \n ' :
case L ' \0 ' :
return i ;
/* Ignore carriage returns */
case L ' \r ' :
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
default :
i + + ;
s - > push_back ( ( wchar_t ) c ) ;
break ;
2012-11-19 08:30:30 +08:00
}
}
}
2012-12-20 05:31:06 +08:00
/**
2013-01-08 18:39:22 +08:00
Converts the narrow character string \ c in into its wide
equivalent , and return it
2012-12-20 05:31:06 +08:00
The string may contain embedded nulls .
This function encodes illegal character sequences in a reversible
way using the private use area .
*/
2012-11-19 08:30:30 +08:00
2012-12-21 04:25:35 +08:00
static wcstring str2wcs_internal ( const char * in , const size_t in_len )
{
if ( in_len = = 0 )
return wcstring ( ) ;
2012-11-19 08:30:30 +08:00
2012-12-21 04:25:35 +08:00
assert ( in ! = NULL ) ;
2012-11-19 08:30:30 +08:00
2012-12-21 04:25:35 +08:00
wcstring result ;
result . reserve ( in_len ) ;
mbstate_t state = { } ;
size_t in_pos = 0 ;
while ( in_pos < in_len )
2012-11-19 08:30:30 +08:00
{
2012-12-21 04:25:35 +08:00
wchar_t wc = 0 ;
size_t ret = mbrtowc ( & wc , & in [ in_pos ] , in_len - in_pos , & state ) ;
2012-11-19 08:30:30 +08:00
2012-12-21 04:25:35 +08:00
/* Determine whether to encode this characters with our crazy scheme */
bool use_encode_direct = false ;
if ( wc > = ENCODE_DIRECT_BASE & & wc < ENCODE_DIRECT_BASE + 256 )
{
use_encode_direct = true ;
}
else if ( wc = = INTERNAL_SEPARATOR )
2012-11-19 08:30:30 +08:00
{
2012-12-21 04:25:35 +08:00
use_encode_direct = true ;
}
else if ( ret = = ( size_t ) ( - 2 ) )
{
/* Incomplete sequence */
use_encode_direct = true ;
}
else if ( ret = = ( size_t ) ( - 1 ) )
{
/* Invalid data */
use_encode_direct = true ;
}
else if ( ret > in_len - in_pos )
{
/* Other error codes? Terrifying, should never happen */
use_encode_direct = true ;
}
if ( use_encode_direct )
{
wc = ENCODE_DIRECT_BASE + ( unsigned char ) in [ in_pos ] ;
result . push_back ( wc ) ;
2012-11-19 08:30:30 +08:00
in_pos + + ;
2012-12-21 04:25:35 +08:00
bzero ( & state , sizeof state ) ;
}
else if ( ret = = 0 )
{
/* Embedded null byte! */
result . push_back ( L ' \0 ' ) ;
in_pos + + ;
bzero ( & state , sizeof state ) ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-12-21 04:25:35 +08:00
/* Normal case */
result . push_back ( wc ) ;
in_pos + = ret ;
2012-11-19 08:30:30 +08:00
}
}
2012-12-21 04:25:35 +08:00
return result ;
}
wcstring str2wcstring ( const char * in , size_t len )
{
return str2wcs_internal ( in , len ) ;
}
2012-11-19 08:30:30 +08:00
2012-12-21 04:25:35 +08:00
wcstring str2wcstring ( const char * in )
{
return str2wcs_internal ( in , strlen ( in ) ) ;
}
wcstring str2wcstring ( const std : : string & in )
{
/* Handles embedded nulls! */
return str2wcs_internal ( in . data ( ) , in . size ( ) ) ;
2012-11-19 08:30:30 +08:00
}
char * wcs2str ( const wchar_t * in )
2005-09-20 21:26:39 +08:00
{
2012-03-09 15:21:07 +08:00
if ( ! in )
return NULL ;
2012-11-19 08:30:30 +08:00
char * out ;
2012-03-09 15:21:07 +08:00
size_t desired_size = MAX_UTF8_BYTES * wcslen ( in ) + 1 ;
char local_buff [ 512 ] ;
2012-11-19 08:30:30 +08:00
if ( desired_size < = sizeof local_buff / sizeof * local_buff )
{
2012-03-09 15:21:07 +08:00
// convert into local buff, then use strdup() so we don't waste malloc'd space
char * result = wcs2str_internal ( in , local_buff ) ;
2012-11-19 08:30:30 +08:00
if ( result )
{
2012-03-09 15:21:07 +08:00
// It converted into the local buffer, so copy it
result = strdup ( result ) ;
2012-11-19 08:30:30 +08:00
if ( ! result )
{
2012-03-09 15:21:07 +08:00
DIE_MEM ( ) ;
}
}
return result ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-03-09 15:21:07 +08:00
// here we fall into the bad case of allocating a buffer probably much larger than necessary
2012-11-19 08:30:30 +08:00
out = ( char * ) malloc ( MAX_UTF8_BYTES * wcslen ( in ) + 1 ) ;
if ( ! out )
{
2012-03-09 15:21:07 +08:00
DIE_MEM ( ) ;
}
2012-11-19 08:30:30 +08:00
return wcs2str_internal ( in , out ) ;
2012-03-09 15:21:07 +08:00
}
2005-09-20 21:26:39 +08:00
2012-11-19 08:30:30 +08:00
return wcs2str_internal ( in , out ) ;
2006-02-08 22:58:47 +08:00
}
2013-01-13 06:18:34 +08:00
char * wcs2str ( const wcstring & in )
{
return wcs2str ( in . c_str ( ) ) ;
}
2012-12-13 07:44:01 +08:00
/* This function is distinguished from wcs2str_internal in that it allows embedded null bytes */
2011-12-27 11:18:46 +08:00
std : : string wcs2string ( const wcstring & input )
{
2012-12-13 07:44:01 +08:00
std : : string result ;
result . reserve ( input . size ( ) ) ;
2012-12-20 05:31:06 +08:00
2012-12-13 07:44:01 +08:00
mbstate_t state ;
memset ( & state , 0 , sizeof ( state ) ) ;
2012-12-20 05:31:06 +08:00
2012-12-13 13:09:42 +08:00
char converted [ MB_LEN_MAX + 1 ] ;
2012-12-20 05:31:06 +08:00
2012-12-13 07:44:01 +08:00
for ( size_t i = 0 ; i < input . size ( ) ; i + + )
{
wchar_t wc = input [ i ] ;
if ( wc = = INTERNAL_SEPARATOR )
{
}
else if ( ( wc > = ENCODE_DIRECT_BASE ) & &
( wc < ENCODE_DIRECT_BASE + 256 ) )
{
result . push_back ( wc - ENCODE_DIRECT_BASE ) ;
}
else
{
2012-12-13 13:09:42 +08:00
bzero ( converted , sizeof converted ) ;
2012-12-13 07:44:01 +08:00
size_t len = wcrtomb ( converted , wc , & state ) ;
if ( len = = ( size_t ) ( - 1 ) )
{
debug ( 1 , L " Wide character %d has no narrow representation " , wc ) ;
memset ( & state , 0 , sizeof ( state ) ) ;
}
else
{
result . append ( converted , len ) ;
}
}
}
2012-12-20 05:31:06 +08:00
2011-12-27 11:18:46 +08:00
return result ;
}
2012-12-13 07:44:01 +08:00
/**
Converts the wide character string \ c in into it ' s narrow
equivalent , stored in \ c out . \ c out must have enough space to fit
the entire string .
This function decodes illegal character sequences in a reversible
way using the private use area .
*/
static char * wcs2str_internal ( const wchar_t * in , char * out )
2012-11-19 08:30:30 +08:00
{
size_t res = 0 ;
size_t in_pos = 0 ;
size_t out_pos = 0 ;
mbstate_t state ;
CHECK ( in , 0 ) ;
CHECK ( out , 0 ) ;
memset ( & state , 0 , sizeof ( state ) ) ;
while ( in [ in_pos ] )
{
if ( in [ in_pos ] = = INTERNAL_SEPARATOR )
{
}
else if ( ( in [ in_pos ] > = ENCODE_DIRECT_BASE ) & &
( in [ in_pos ] < ENCODE_DIRECT_BASE + 256 ) )
{
out [ out_pos + + ] = in [ in_pos ] - ENCODE_DIRECT_BASE ;
}
else
{
res = wcrtomb ( & out [ out_pos ] , in [ in_pos ] , & state ) ;
if ( res = = ( size_t ) ( - 1 ) )
{
debug ( 1 , L " Wide character %d has no narrow representation " , in [ in_pos ] ) ;
memset ( & state , 0 , sizeof ( state ) ) ;
}
else
{
out_pos + = res ;
}
}
in_pos + + ;
}
out [ out_pos ] = 0 ;
return out ;
}
char * * wcsv2strv ( const wchar_t * const * in )
{
size_t i , count = 0 ;
while ( in [ count ] ! = 0 )
count + + ;
char * * res = ( char * * ) malloc ( sizeof ( char * ) * ( count + 1 ) ) ;
if ( res = = 0 )
{
DIE_MEM ( ) ;
}
for ( i = 0 ; i < count ; i + + )
{
res [ i ] = wcs2str ( in [ i ] ) ;
}
res [ count ] = 0 ;
return res ;
2005-09-20 21:26:39 +08:00
}
2011-12-27 11:18:46 +08:00
wcstring format_string ( const wchar_t * format , . . . )
{
2012-11-19 08:30:30 +08:00
va_list va ;
va_start ( va , format ) ;
2012-02-10 10:43:36 +08:00
wcstring result = vformat_string ( format , va ) ;
2012-11-19 08:30:30 +08:00
va_end ( va ) ;
2012-02-10 10:43:36 +08:00
return result ;
}
wcstring vformat_string ( const wchar_t * format , va_list va_orig )
2012-11-19 08:30:30 +08:00
{
2012-03-04 11:12:06 +08:00
const int saved_err = errno ;
/*
As far as I know , there is no way to check if a
vswprintf - call failed because of a badly formated string
option or because the supplied destination string was to
2012-11-19 08:30:30 +08:00
small . In GLIBC , errno seems to be set to EINVAL either way .
2012-03-04 11:12:06 +08:00
Because of this , on failiure we try to
increase the buffer size until the free space is
larger than max_size , at which point it will
conclude that the error was probably due to a badly
formated string option , and return an error . Make
sure to null terminate string before that , though .
*/
const size_t max_size = ( 128 * 1024 * 1024 ) ;
wchar_t static_buff [ 256 ] ;
size_t size = 0 ;
wchar_t * buff = NULL ;
int status = - 1 ;
2012-11-19 08:30:30 +08:00
while ( status < 0 )
{
2012-03-04 11:12:06 +08:00
/* Reallocate if necessary */
2012-11-19 08:30:30 +08:00
if ( size = = 0 )
{
2012-03-04 11:12:06 +08:00
buff = static_buff ;
size = sizeof static_buff ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-03-04 11:12:06 +08:00
size * = 2 ;
2012-11-19 08:30:30 +08:00
if ( size > = max_size )
{
2012-03-04 11:12:06 +08:00
buff [ 0 ] = ' \0 ' ;
break ;
}
2012-11-19 08:30:30 +08:00
buff = ( wchar_t * ) realloc ( ( buff = = static_buff ? NULL : buff ) , size ) ;
if ( buff = = NULL )
{
2012-03-04 11:12:06 +08:00
DIE_MEM ( ) ;
}
}
2012-11-19 08:30:30 +08:00
2012-03-04 11:12:06 +08:00
/* Try printing */
2012-11-19 08:30:30 +08:00
va_list va ;
va_copy ( va , va_orig ) ;
2012-03-04 11:12:06 +08:00
status = vswprintf ( buff , size / sizeof ( wchar_t ) , format , va ) ;
2012-11-19 08:30:30 +08:00
va_end ( va ) ;
2012-03-04 11:12:06 +08:00
}
2012-11-19 08:30:30 +08:00
2012-03-04 11:12:06 +08:00
wcstring result = wcstring ( buff ) ;
2012-11-19 08:30:30 +08:00
2012-03-04 11:12:06 +08:00
if ( buff ! = static_buff )
free ( buff ) ;
2012-11-19 08:30:30 +08:00
2012-03-04 11:12:06 +08:00
errno = saved_err ;
2011-12-27 11:18:46 +08:00
return result ;
}
2013-03-25 06:24:29 +08:00
void append_formatv ( wcstring & str , const wchar_t * format , va_list ap )
2012-02-23 02:51:06 +08:00
{
2012-03-04 07:28:16 +08:00
/* Preserve errno across this call since it likes to stomp on it */
int err = errno ;
2013-03-25 06:24:29 +08:00
str . append ( vformat_string ( format , ap ) ) ;
errno = err ;
}
void append_format ( wcstring & str , const wchar_t * format , . . . )
{
2012-11-19 08:30:30 +08:00
va_list va ;
va_start ( va , format ) ;
2013-03-25 06:24:29 +08:00
append_formatv ( str , format , va ) ;
2012-11-19 08:30:30 +08:00
va_end ( va ) ;
2012-02-23 02:51:06 +08:00
}
2012-02-10 10:43:36 +08:00
2012-11-19 08:30:30 +08:00
wchar_t * wcsvarname ( const wchar_t * str )
2005-09-20 21:26:39 +08:00
{
2012-11-19 08:30:30 +08:00
while ( * str )
{
if ( ( ! iswalnum ( * str ) ) & & ( * str ! = L ' _ ' ) )
{
return ( wchar_t * ) str ;
}
str + + ;
}
return 0 ;
2005-09-20 21:26:39 +08:00
}
2012-11-19 08:30:30 +08:00
const wchar_t * wcsfuncname ( const wchar_t * str )
2006-10-19 23:47:47 +08:00
{
2012-11-19 08:30:30 +08:00
return wcschr ( str , L ' / ' ) ;
2006-10-19 23:47:47 +08:00
}
2012-11-19 08:30:30 +08:00
int wcsvarchr ( wchar_t chr )
2006-06-02 07:04:38 +08:00
{
2012-11-19 08:30:30 +08:00
return iswalnum ( chr ) | | chr = = L ' _ ' ;
2006-06-02 07:04:38 +08:00
}
2005-09-20 21:26:39 +08:00
2012-11-19 08:30:30 +08:00
/**
2005-09-20 21:26:39 +08:00
The glibc version of wcswidth seems to hang on some strings . fish uses this replacement .
*/
2012-11-19 08:30:30 +08:00
int my_wcswidth ( const wchar_t * c )
{
return fish_wcswidth ( c , wcslen ( c ) ) ;
}
wchar_t * quote_end ( const wchar_t * pos )
{
wchar_t c = * pos ;
while ( 1 )
{
pos + + ;
if ( ! * pos )
return 0 ;
if ( * pos = = L ' \\ ' )
{
pos + + ;
if ( ! * pos )
return 0 ;
}
else
{
if ( * pos = = c )
{
return ( wchar_t * ) pos ;
}
}
}
return 0 ;
}
2012-02-01 13:06:52 +08:00
wcstring wsetlocale ( int category , const wchar_t * locale )
2005-09-20 21:26:39 +08:00
{
2006-01-09 07:00:49 +08:00
2013-04-08 14:54:43 +08:00
char * lang = locale ? wcs2str ( locale ) : NULL ;
char * res = setlocale ( category , lang ) ;
2012-11-19 08:30:30 +08:00
free ( lang ) ;
/*
Use ellipsis if on known unicode system , otherwise use $
*/
char * ctype = setlocale ( LC_CTYPE , NULL ) ;
2012-12-02 07:44:09 +08:00
bool unicode = ( strstr ( ctype , " .UTF " ) | | strstr ( ctype , " .utf " ) ) ;
2012-12-03 18:25:08 +08:00
2012-12-02 07:44:09 +08:00
ellipsis_char = unicode ? L ' \ x2026 ' : L ' $ ' ;
2012-12-03 18:25:08 +08:00
2012-12-02 07:44:09 +08:00
// U+23CE is the "return" character
omitted_newline_char = unicode ? L ' \ x23CE ' : L ' ~ ' ;
2013-05-05 17:33:17 +08:00
2012-11-19 08:30:30 +08:00
if ( ! res )
return wcstring ( ) ;
2012-02-01 13:06:52 +08:00
else
2012-11-19 08:30:30 +08:00
return format_string ( L " %s " , res ) ;
2005-09-20 21:26:39 +08:00
}
2012-11-19 08:30:30 +08:00
bool contains_internal ( const wchar_t * a , . . . )
2005-09-20 21:26:39 +08:00
{
2012-11-19 08:30:30 +08:00
const wchar_t * arg ;
va_list va ;
bool res = false ;
CHECK ( a , 0 ) ;
2006-05-03 00:28:30 +08:00
2012-11-19 08:30:30 +08:00
va_start ( va , a ) ;
while ( ( arg = va_arg ( va , const wchar_t * ) ) ! = 0 )
{
if ( wcscmp ( a , arg ) = = 0 )
{
res = true ;
break ;
}
}
va_end ( va ) ;
return res ;
2012-01-31 01:46:33 +08:00
}
/* wcstring variant of contains_internal. The first parameter is a wcstring, the rest are const wchar_t* */
2012-11-19 08:30:30 +08:00
__sentinel bool contains_internal ( const wcstring & needle , . . . )
{
const wchar_t * arg ;
va_list va ;
int res = 0 ;
va_start ( va , needle ) ;
while ( ( arg = va_arg ( va , const wchar_t * ) ) ! = 0 )
{
if ( needle = = arg )
{
res = 1 ;
break ;
}
}
va_end ( va ) ;
return res ;
2005-09-20 21:26:39 +08:00
}
2012-08-05 06:11:43 +08:00
long read_blocked ( int fd , void * buf , size_t count )
2005-09-20 21:26:39 +08:00
{
2012-11-19 08:30:30 +08:00
ssize_t res ;
sigset_t chldset , oldset ;
2005-09-20 21:26:39 +08:00
2012-11-19 08:30:30 +08:00
sigemptyset ( & chldset ) ;
sigaddset ( & chldset , SIGCHLD ) ;
VOMIT_ON_FAILURE ( pthread_sigmask ( SIG_BLOCK , & chldset , & oldset ) ) ;
res = read ( fd , buf , count ) ;
VOMIT_ON_FAILURE ( pthread_sigmask ( SIG_SETMASK , & oldset , NULL ) ) ;
return res ;
2005-09-20 21:26:39 +08:00
}
2012-01-14 19:41:50 +08:00
ssize_t write_loop ( int fd , const char * buff , size_t count )
2009-02-23 04:28:52 +08:00
{
2012-11-19 08:30:30 +08:00
size_t out_cum = 0 ;
2013-01-05 14:32:40 +08:00
while ( out_cum < count )
2012-11-19 08:30:30 +08:00
{
2013-01-05 14:32:40 +08:00
ssize_t out = write ( fd , & buff [ out_cum ] , count - out_cum ) ;
2012-11-19 08:30:30 +08:00
if ( out < 0 )
{
if ( errno ! = EAGAIN & & errno ! = EINTR )
{
return - 1 ;
}
}
else
{
out_cum + = ( size_t ) out ;
}
}
return ( ssize_t ) out_cum ;
2009-02-23 04:28:52 +08:00
}
2012-03-01 09:55:28 +08:00
ssize_t read_loop ( int fd , void * buff , size_t count )
{
ssize_t result ;
2012-11-19 08:30:30 +08:00
do
{
2012-03-01 09:55:28 +08:00
result = read ( fd , buff , count ) ;
2012-11-19 08:30:30 +08:00
}
while ( result < 0 & & ( errno = = EAGAIN | | errno = = EINTR ) ) ;
2012-03-01 09:55:28 +08:00
return result ;
}
2012-07-18 03:47:01 +08:00
static bool should_debug ( int level )
2005-09-20 21:26:39 +08:00
{
2012-11-19 08:30:30 +08:00
if ( level > debug_level )
return false ;
2005-09-20 21:26:39 +08:00
2012-07-18 03:47:01 +08:00
/* Hack to not print error messages in the tests */
2012-11-19 08:30:30 +08:00
if ( program_name & & ! wcscmp ( program_name , L " (ignore) " ) )
2012-07-18 03:47:01 +08:00
return false ;
2012-11-19 08:30:30 +08:00
2012-07-18 03:47:01 +08:00
return true ;
}
2006-05-31 23:40:28 +08:00
2012-11-19 08:30:30 +08:00
static void debug_shared ( const wcstring & msg )
2012-07-18 03:47:01 +08:00
{
const wcstring sb = wcstring ( program_name ) + L " : " + msg ;
2012-11-19 08:30:30 +08:00
wcstring sb2 ;
write_screen ( sb , sb2 ) ;
fwprintf ( stderr , L " %ls " , sb2 . c_str ( ) ) ;
2012-07-18 03:47:01 +08:00
}
2006-05-31 23:40:28 +08:00
2012-11-19 08:30:30 +08:00
void debug ( int level , const wchar_t * msg , . . . )
2012-07-18 03:47:01 +08:00
{
if ( ! should_debug ( level ) )
return ;
int errno_old = errno ;
va_list va ;
2012-11-19 08:30:30 +08:00
va_start ( va , msg ) ;
2012-07-18 03:47:01 +08:00
wcstring local_msg = vformat_string ( msg , va ) ;
2012-11-19 08:30:30 +08:00
va_end ( va ) ;
2012-07-18 03:47:01 +08:00
debug_shared ( local_msg ) ;
errno = errno_old ;
}
2012-11-19 08:30:30 +08:00
void debug ( int level , const char * msg , . . . )
2012-07-18 03:47:01 +08:00
{
if ( ! should_debug ( level ) )
return ;
int errno_old = errno ;
char local_msg [ 512 ] ;
va_list va ;
2012-11-19 08:30:30 +08:00
va_start ( va , msg ) ;
2012-07-18 03:47:01 +08:00
vsnprintf ( local_msg , sizeof local_msg , msg , va ) ;
2012-11-19 08:30:30 +08:00
va_end ( va ) ;
2012-07-18 03:47:01 +08:00
debug_shared ( str2wcstring ( local_msg ) ) ;
errno = errno_old ;
2012-02-23 02:51:06 +08:00
}
2006-01-15 19:58:05 +08:00
2012-07-18 03:47:01 +08:00
2012-03-09 15:21:07 +08:00
void debug_safe ( int level , const char * msg , const char * param1 , const char * param2 , const char * param3 , const char * param4 , const char * param5 , const char * param6 , const char * param7 , const char * param8 , const char * param9 , const char * param10 , const char * param11 , const char * param12 )
2012-03-01 03:27:14 +08:00
{
2012-03-09 15:21:07 +08:00
const char * const params [ ] = { param1 , param2 , param3 , param4 , param5 , param6 , param7 , param8 , param9 , param10 , param11 , param12 } ;
2012-03-01 03:27:14 +08:00
if ( ! msg )
return ;
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
/* Can't call printf, that may allocate memory Just call write() over and over. */
if ( level > debug_level )
return ;
int errno_old = errno ;
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
size_t param_idx = 0 ;
const char * cursor = msg ;
2012-11-19 08:30:30 +08:00
while ( * cursor ! = ' \0 ' )
{
2012-03-01 03:27:14 +08:00
const char * end = strchr ( cursor , ' % ' ) ;
if ( end = = NULL )
end = cursor + strlen ( cursor ) ;
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
write ( STDERR_FILENO , cursor , end - cursor ) ;
2012-11-19 08:30:30 +08:00
if ( end [ 0 ] = = ' % ' & & end [ 1 ] = = ' s ' )
{
2012-03-01 03:27:14 +08:00
/* Handle a format string */
2012-03-09 15:21:07 +08:00
assert ( param_idx < sizeof params / sizeof * params ) ;
const char * format = params [ param_idx + + ] ;
2012-11-19 08:30:30 +08:00
if ( ! format )
2012-03-01 03:27:14 +08:00
format = " (null) " ;
write ( STDERR_FILENO , format , strlen ( format ) ) ;
cursor = end + 2 ;
2012-11-19 08:30:30 +08:00
}
else if ( end [ 0 ] = = ' \0 ' )
{
2012-03-01 03:27:14 +08:00
/* Must be at the end of the string */
cursor = end ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-03-01 03:27:14 +08:00
/* Some other format specifier, just skip it */
cursor = end + 1 ;
}
}
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
// We always append a newline
write ( STDERR_FILENO , " \n " , 1 ) ;
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
errno = errno_old ;
}
2012-11-19 08:30:30 +08:00
void format_long_safe ( char buff [ 128 ] , long val )
{
if ( val = = 0 )
{
2012-03-01 03:27:14 +08:00
strcpy ( buff , " 0 " ) ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-03-01 03:27:14 +08:00
/* Generate the string in reverse */
size_t idx = 0 ;
bool negative = ( val < 0 ) ;
2012-11-19 08:30:30 +08:00
2012-03-04 07:20:30 +08:00
/* Note that we can't just negate val if it's negative, because it may be the most negative value. We do rely on round-towards-zero division though. */
2012-03-01 03:27:14 +08:00
2012-11-19 08:30:30 +08:00
while ( val ! = 0 )
{
2012-03-04 07:20:30 +08:00
long rem = val % 10 ;
buff [ idx + + ] = ' 0 ' + ( rem < 0 ? - rem : rem ) ;
2012-03-01 03:27:14 +08:00
val / = 10 ;
}
if ( negative )
buff [ idx + + ] = ' - ' ;
2012-03-01 09:55:28 +08:00
buff [ idx ] = 0 ;
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
size_t left = 0 , right = idx - 1 ;
2012-11-19 08:30:30 +08:00
while ( left < right )
{
2012-03-01 03:27:14 +08:00
char tmp = buff [ left ] ;
buff [ left + + ] = buff [ right ] ;
buff [ right - - ] = tmp ;
}
}
}
2012-11-19 08:30:30 +08:00
void format_long_safe ( wchar_t buff [ 128 ] , long val )
{
if ( val = = 0 )
{
2012-03-04 07:20:30 +08:00
wcscpy ( buff , L " 0 " ) ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-03-04 07:20:30 +08:00
/* Generate the string in reverse */
size_t idx = 0 ;
bool negative = ( val < 0 ) ;
2012-11-19 08:30:30 +08:00
while ( val > 0 )
{
2012-03-04 07:20:30 +08:00
long rem = val % 10 ;
/* Here we're assuming that wide character digits are contiguous - is that a correct assumption? */
2012-07-29 08:49:46 +08:00
buff [ idx + + ] = L ' 0 ' + ( wchar_t ) ( rem < 0 ? - rem : rem ) ;
2012-03-04 07:20:30 +08:00
val / = 10 ;
}
if ( negative )
buff [ idx + + ] = L ' - ' ;
buff [ idx ] = 0 ;
2012-11-19 08:30:30 +08:00
2012-03-04 07:20:30 +08:00
size_t left = 0 , right = idx - 1 ;
2012-11-19 08:30:30 +08:00
while ( left < right )
{
2012-03-04 07:20:30 +08:00
wchar_t tmp = buff [ left ] ;
buff [ left + + ] = buff [ right ] ;
buff [ right - - ] = tmp ;
}
}
}
2012-11-19 08:30:30 +08:00
void write_screen ( const wcstring & msg , wcstring & buff )
{
const wchar_t * start , * pos ;
int line_width = 0 ;
int tok_width = 0 ;
int screen_width = common_get_width ( ) ;
if ( screen_width )
{
start = pos = msg . c_str ( ) ;
while ( 1 )
{
int overflow = 0 ;
tok_width = 0 ;
/*
Tokenize on whitespace , and also calculate the width of the token
*/
while ( * pos & & ( ! wcschr ( L " \n \r \t " , * pos ) ) )
{
/*
Check is token is wider than one line .
If so we mark it as an overflow and break the token .
*/
if ( ( tok_width + fish_wcwidth ( * pos ) ) > ( screen_width - 1 ) )
{
overflow = 1 ;
break ;
}
tok_width + = fish_wcwidth ( * pos ) ;
pos + + ;
}
/*
If token is zero character long , we don ' t do anything
*/
if ( pos = = start )
{
start = pos = pos + 1 ;
}
else if ( overflow )
{
/*
In case of overflow , we print a newline , except if we already are at position 0
*/
wchar_t * token = wcsndup ( start , pos - start ) ;
if ( line_width ! = 0 )
2012-02-23 02:51:06 +08:00
buff . push_back ( L ' \n ' ) ;
buff . append ( format_string ( L " %ls- \n " , token ) ) ;
2012-11-19 08:30:30 +08:00
free ( token ) ;
line_width = 0 ;
}
else
{
/*
Print the token
*/
wchar_t * token = wcsndup ( start , pos - start ) ;
if ( ( line_width + ( line_width ! = 0 ? 1 : 0 ) + tok_width ) > screen_width )
{
2012-02-23 02:51:06 +08:00
buff . push_back ( L ' \n ' ) ;
2012-11-19 08:30:30 +08:00
line_width = 0 ;
}
buff . append ( format_string ( L " %ls%ls " , line_width ? L " " : L " " , token ) ) ;
free ( token ) ;
line_width + = ( line_width ! = 0 ? 1 : 0 ) + tok_width ;
}
/*
Break on end of string
*/
if ( ! * pos )
{
break ;
}
start = pos ;
}
}
else
{
2012-02-23 02:51:06 +08:00
buff . append ( msg ) ;
2012-11-19 08:30:30 +08:00
}
2012-02-23 02:51:06 +08:00
buff . push_back ( L ' \n ' ) ;
2006-01-15 19:58:05 +08:00
}
2008-01-14 00:47:47 +08:00
/**
Perform string escaping of a strinng by only quoting it . Assumes
the string has already been checked for characters that can not be
escaped this way .
*/
2012-11-19 08:30:30 +08:00
static wchar_t * escape_simple ( const wchar_t * in )
2005-09-20 21:26:39 +08:00
{
2012-11-19 08:30:30 +08:00
wchar_t * out ;
size_t len = wcslen ( in ) ;
out = ( wchar_t * ) malloc ( sizeof ( wchar_t ) * ( len + 3 ) ) ;
if ( ! out )
DIE_MEM ( ) ;
2011-12-27 11:18:46 +08:00
2012-11-19 08:30:30 +08:00
out [ 0 ] = L ' \' ' ;
wcscpy ( & out [ 1 ] , in ) ;
out [ len + 1 ] = L ' \' ' ;
out [ len + 2 ] = 0 ;
return out ;
2005-09-20 21:26:39 +08:00
}
2012-11-19 08:30:30 +08:00
wchar_t * escape ( const wchar_t * in_orig , escape_flags_t flags )
2005-10-14 19:40:33 +08:00
{
2012-11-19 08:30:30 +08:00
const wchar_t * in = in_orig ;
2005-10-14 19:40:33 +08:00
2012-11-19 08:30:30 +08:00
bool escape_all = ! ! ( flags & ESCAPE_ALL ) ;
bool no_quoted = ! ! ( flags & ESCAPE_NO_QUOTED ) ;
bool no_tilde = ! ! ( flags & ESCAPE_NO_TILDE ) ;
2005-10-14 19:40:33 +08:00
2012-11-19 08:30:30 +08:00
wchar_t * out ;
wchar_t * pos ;
2005-10-14 19:40:33 +08:00
2012-11-19 08:30:30 +08:00
int need_escape = 0 ;
int need_complex_escape = 0 ;
2005-10-14 19:40:33 +08:00
2012-11-19 08:30:30 +08:00
if ( ! in )
{
debug ( 0 , L " %s called with null input " , __func__ ) ;
FATAL_EXIT ( ) ;
2011-12-27 15:13:05 +08:00
}
2012-11-19 08:30:30 +08:00
if ( ! no_quoted & & ( wcslen ( in ) = = 0 ) )
{
out = wcsdup ( L " '' " ) ;
if ( ! out )
DIE_MEM ( ) ;
return out ;
}
2012-05-09 17:33:42 +08:00
2012-11-19 08:30:30 +08:00
out = ( wchar_t * ) malloc ( sizeof ( wchar_t ) * ( wcslen ( in ) * 4 + 1 ) ) ;
pos = out ;
2006-05-29 19:13:42 +08:00
2012-11-19 08:30:30 +08:00
if ( ! out )
DIE_MEM ( ) ;
2012-03-02 09:31:45 +08:00
2012-11-19 08:30:30 +08:00
while ( * in ! = 0 )
{
2012-05-14 11:49:14 +08:00
2012-11-19 08:30:30 +08:00
if ( ( * in > = ENCODE_DIRECT_BASE ) & &
( * in < ENCODE_DIRECT_BASE + 256 ) )
{
int val = * in - ENCODE_DIRECT_BASE ;
int tmp ;
2012-05-14 11:49:14 +08:00
2012-11-19 08:30:30 +08:00
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' X ' ;
2012-02-27 12:11:34 +08:00
2012-11-19 08:30:30 +08:00
tmp = val / 16 ;
* pos + + = tmp > 9 ? L ' a ' + ( tmp - 10 ) : L ' 0 ' + tmp ;
2006-10-19 19:50:23 +08:00
2012-11-19 08:30:30 +08:00
tmp = val % 16 ;
* pos + + = tmp > 9 ? L ' a ' + ( tmp - 10 ) : L ' 0 ' + tmp ;
need_escape = need_complex_escape = 1 ;
2006-11-17 22:58:25 +08:00
2012-11-19 08:30:30 +08:00
}
else
{
wchar_t c = * in ;
switch ( c )
{
2012-11-19 16:31:03 +08:00
case L ' \t ' :
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' t ' ;
need_escape = need_complex_escape = 1 ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \n ' :
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' n ' ;
need_escape = need_complex_escape = 1 ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \b ' :
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' b ' ;
need_escape = need_complex_escape = 1 ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \r ' :
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' r ' ;
need_escape = need_complex_escape = 1 ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \x1b ' :
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' e ' ;
need_escape = need_complex_escape = 1 ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \\ ' :
case L ' \' ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
need_escape = need_complex_escape = 1 ;
2012-11-19 08:30:30 +08:00
if ( escape_all )
* pos + + = L ' \\ ' ;
2012-11-19 16:31:03 +08:00
* pos + + = * in ;
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
case L ' & ' :
case L ' $ ' :
case L ' ' :
case L ' # ' :
case L ' ^ ' :
case L ' < ' :
case L ' > ' :
case L ' ( ' :
case L ' ) ' :
case L ' [ ' :
case L ' ] ' :
case L ' { ' :
case L ' } ' :
case L ' ? ' :
case L ' * ' :
case L ' | ' :
case L ' ; ' :
case L ' " ' :
case L ' % ' :
case L ' ~ ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
if ( ! no_tilde | | c ! = L ' ~ ' )
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
need_escape = 1 ;
if ( escape_all )
* pos + + = L ' \\ ' ;
}
* pos + + = * in ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
default :
{
if ( * in < 32 )
{
if ( * in < 27 & & * in > 0 )
{
* ( pos + + ) = L ' \\ ' ;
* ( pos + + ) = L ' c ' ;
* ( pos + + ) = L ' a ' + * in - 1 ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
need_escape = need_complex_escape = 1 ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
int tmp = ( * in ) % 16 ;
* pos + + = L ' \\ ' ;
* pos + + = L ' x ' ;
* pos + + = ( ( * in > 15 ) ? L ' 1 ' : L ' 0 ' ) ;
* pos + + = tmp > 9 ? L ' a ' + ( tmp - 10 ) : L ' 0 ' + tmp ;
need_escape = need_complex_escape = 1 ;
}
else
{
* pos + + = * in ;
}
break ;
2012-11-19 08:30:30 +08:00
}
}
}
in + + ;
}
* pos = 0 ;
/*
Use quoted escaping if possible , since most people find it
easier to read .
*/
if ( ! no_quoted & & need_escape & & ! need_complex_escape & & escape_all )
{
free ( out ) ;
out = escape_simple ( in_orig ) ;
}
return out ;
}
wcstring escape_string ( const wcstring & in , escape_flags_t flags )
{
wchar_t * tmp = escape ( in . c_str ( ) , flags ) ;
wcstring result ( tmp ) ;
free ( tmp ) ;
return result ;
}
2013-11-25 14:57:49 +08:00
/* Helper to return the last character in a string, or NOT_A_WCHAR */
static wint_t string_last_char ( const wcstring & str )
{
size_t len = str . size ( ) ;
return len = = 0 ? NOT_A_WCHAR : str . at ( len - 1 ) ;
}
/* Given a null terminated string starting with a backslash, read the escape as if it is unquoted, appending to result. Return the number of characters consumed, or 0 on error */
static size_t read_unquoted_escape ( const wchar_t * input , wcstring * result , bool allow_incomplete , bool unescape_special )
{
if ( input [ 0 ] ! = L ' \\ ' )
{
// not an escape
return 0 ;
}
/* Here's the character we'll ultimately append. Note that L'\0' is a valid thing to append. */
wchar_t result_char = NOT_A_WCHAR ;
bool errored = false ;
size_t in_pos = 1 ; //in_pos always tracks the next character to read (and therefore the number of characters read so far)
const wchar_t c = input [ in_pos + + ] ;
switch ( c )
{
/* A null character after a backslash is an error */
case L ' \0 ' :
{
/* Adjust in_pos to only include the backslash */
assert ( in_pos > 0 ) ;
in_pos - - ;
/* It's an error, unless we're allowing incomplete escapes */
if ( ! allow_incomplete )
errored = true ;
break ;
}
/* Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal. */
case L ' 0 ' :
case L ' 1 ' :
case L ' 2 ' :
case L ' 3 ' :
case L ' 4 ' :
case L ' 5 ' :
case L ' 6 ' :
case L ' 7 ' :
case L ' u ' :
case L ' U ' :
case L ' x ' :
case L ' X ' :
{
long long res = 0 ;
size_t chars = 2 ;
int base = 16 ;
bool byte_literal = false ;
wchar_t max_val = ASCII_MAX ;
switch ( c )
{
case L ' u ' :
{
chars = 4 ;
max_val = UCS2_MAX ;
break ;
}
case L ' U ' :
{
chars = 8 ;
max_val = WCHAR_MAX ;
break ;
}
case L ' x ' :
{
chars = 2 ;
max_val = ASCII_MAX ;
break ;
}
case L ' X ' :
{
byte_literal = true ;
max_val = BYTE_MAX ;
break ;
}
default :
{
base = 8 ;
chars = 3 ;
// note that in_pos currently is just after the first post-backslash character; we want to start our escape from there
assert ( in_pos > 0 ) ;
in_pos - - ;
break ;
}
}
for ( size_t i = 0 ; i < chars ; i + + )
{
long d = convert_digit ( input [ in_pos ] , base ) ;
if ( d < 0 )
{
break ;
}
res = ( res * base ) + d ;
in_pos + + ;
}
if ( res < = max_val )
{
result_char = ( wchar_t ) ( ( byte_literal ? ENCODE_DIRECT_BASE : 0 ) + res ) ;
}
else
{
errored = true ;
}
break ;
}
/* \a means bell (alert) */
case L ' a ' :
{
result_char = L ' \a ' ;
break ;
}
/* \b means backspace */
case L ' b ' :
{
result_char = L ' \b ' ;
break ;
}
/* \cX means control sequence X */
case L ' c ' :
{
const wchar_t sequence_char = input [ in_pos + + ] ;
if ( sequence_char > = L ' a ' & & sequence_char < = ( L ' a ' + 32 ) )
{
result_char = sequence_char - L ' a ' + 1 ;
}
else if ( sequence_char > = L ' A ' & & sequence_char < = ( L ' A ' + 32 ) )
{
result_char = sequence_char - L ' A ' + 1 ;
}
else
{
errored = true ;
}
break ;
}
/* \x1b means escape */
case L ' e ' :
{
result_char = L ' \x1b ' ;
break ;
}
/*
\ f means form feed
*/
case L ' f ' :
{
result_char = L ' \f ' ;
break ;
}
/*
\ n means newline
*/
case L ' n ' :
{
result_char = L ' \n ' ;
break ;
}
/*
\ r means carriage return
*/
case L ' r ' :
{
result_char = L ' \r ' ;
break ;
}
/*
\ t means tab
*/
case L ' t ' :
{
result_char = L ' \t ' ;
break ;
}
/*
\ v means vertical tab
*/
case L ' v ' :
{
result_char = L ' \v ' ;
break ;
}
/* If a backslash is followed by an actual newline, swallow them both */
case L ' \n ' :
{
result_char = NOT_A_WCHAR ;
break ;
}
default :
{
if ( unescape_special )
result - > push_back ( INTERNAL_SEPARATOR ) ;
result_char = c ;
break ;
}
}
if ( ! errored & & result_char ! = NOT_A_WCHAR )
{
result - > push_back ( result_char ) ;
}
return errored ? 0 : in_pos ;
}
/* Returns the unescaped version of input_str into output_str (by reference). Returns true if successful. If false, the contents of output_str are undefined (!) */
static bool unescape_string_internal ( const wchar_t * const input , const size_t input_len , wcstring * output_str , unescape_flags_t flags )
{
/* Set up result string, which we'll swap with the output on success */
wcstring result ;
result . reserve ( input_len ) ;
const bool unescape_special = ! ! ( flags & UNESCAPE_SPECIAL ) ;
const bool allow_incomplete = ! ! ( flags & UNESCAPE_INCOMPLETE ) ;
int bracket_count = 0 ;
bool errored = false ;
enum
{
mode_unquoted ,
mode_single_quotes ,
mode_double_quotes
} mode = mode_unquoted ;
for ( size_t input_position = 0 ; input_position < input_len & & ! errored ; input_position + + )
{
const wchar_t c = input [ input_position ] ;
/* Here's the character we'll append to result, or NOT_A_WCHAR to suppress it */
wchar_t to_append = c ;
if ( mode = = mode_unquoted )
{
switch ( c )
{
case L ' \\ ' :
{
/* Backslashes (escapes) are complicated and may result in errors, or appending INTERNAL_SEPARATORs, so we have to handle them specially */
size_t escape_chars = read_unquoted_escape ( input + input_position , & result , allow_incomplete , unescape_special ) ;
if ( escape_chars = = 0 )
{
/* A 0 return indicates an error */
errored = true ;
}
else
{
/* Skip over the characters we read, minus one because the outer loop will increment it */
assert ( escape_chars > 0 ) ;
input_position + = escape_chars - 1 ;
}
/* We've already appended, don't append anything else */
to_append = NOT_A_WCHAR ;
break ;
}
case L ' ~ ' :
{
if ( unescape_special & & ( input_position = = 0 ) )
{
to_append = HOME_DIRECTORY ;
}
break ;
}
case L ' % ' :
{
if ( unescape_special & & ( input_position = = 0 ) )
{
to_append = PROCESS_EXPAND ;
}
break ;
}
case L ' * ' :
{
if ( unescape_special )
{
/* In general, this is ANY_STRING. But as a hack, if the last appended char is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to reflect the fact that ** is the recursive wildcard. */
if ( string_last_char ( result ) = = ANY_STRING )
{
assert ( result . size ( ) > 0 ) ;
result . resize ( result . size ( ) - 1 ) ;
to_append = ANY_STRING_RECURSIVE ;
}
else
{
to_append = ANY_STRING ;
}
}
break ;
}
case L ' ? ' :
{
if ( unescape_special )
{
to_append = ANY_CHAR ;
}
break ;
}
case L ' $ ' :
{
if ( unescape_special )
{
to_append = VARIABLE_EXPAND ;
}
break ;
}
case L ' { ' :
{
if ( unescape_special )
{
bracket_count + + ;
to_append = BRACKET_BEGIN ;
}
break ;
}
case L ' } ' :
{
if ( unescape_special )
{
bracket_count - - ;
to_append = BRACKET_END ;
}
break ;
}
case L ' , ' :
{
/* If the last character was a separator, then treat this as a literal comma */
if ( unescape_special & & bracket_count > 0 & & string_last_char ( result ) ! = BRACKET_SEP )
{
to_append = BRACKET_SEP ;
}
break ;
}
case L ' \' ' :
{
mode = mode_single_quotes ;
to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR ;
break ;
}
case L ' \" ' :
{
mode = mode_double_quotes ;
to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR ;
break ;
}
}
}
else if ( mode = = mode_single_quotes )
{
if ( c = = L ' \\ ' )
{
/* A backslash may or may not escape something in single quotes */
switch ( input [ input_position + 1 ] )
{
case ' \\ ' :
case L ' \' ' :
{
to_append = input [ input_position + 1 ] ;
input_position + = 1 ; /* Skip over the backslash */
break ;
}
case L ' \0 ' :
{
if ( ! allow_incomplete )
{
errored = true ;
}
else
{
// PCA this line had the following cryptic comment:
// 'We may ever escape a NULL character, but still appending a \ in case I am wrong.'
// Not sure what it means or the importance of this
input_position + = 1 ; /* Skip over the backslash */
to_append = L ' \\ ' ;
}
}
break ;
default :
{
/* Literal backslash that doesn't escape anything! Leave things alone; we'll append the backslash itself */
break ;
}
}
}
else if ( c = = L ' \' ' )
{
to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR ;
mode = mode_unquoted ;
}
}
else if ( mode = = mode_double_quotes )
{
switch ( c )
{
case L ' " ' :
{
mode = mode_unquoted ;
to_append = unescape_special ? INTERNAL_SEPARATOR : NOT_A_WCHAR ;
break ;
}
case ' \\ ' :
{
switch ( input [ input_position + 1 ] )
{
case L ' \0 ' :
{
if ( ! allow_incomplete )
{
errored = true ;
}
else
{
to_append = L ' \0 ' ;
}
}
break ;
case ' \\ ' :
case L ' $ ' :
case ' " ' :
{
to_append = input [ input_position + 1 ] ;
input_position + = 1 ; /* Skip over the backslash */
break ;
}
case ' \n ' :
{
/* Swallow newline */
to_append = NOT_A_WCHAR ;
break ;
}
default :
{
/* Literal backslash that doesn't escape anything! Leave things alone; we'll append the backslash itself */
break ;
}
}
break ;
}
case ' $ ' :
{
if ( unescape_special )
{
to_append = VARIABLE_EXPAND_SINGLE ;
}
break ;
}
}
}
/* Now maybe append the char */
if ( to_append ! = NOT_A_WCHAR )
{
result . push_back ( to_append ) ;
}
}
/* Return the string by reference, and then success */
if ( ! errored )
{
output_str - > swap ( result ) ;
}
return ! errored ;
}
2012-11-19 08:30:30 +08:00
wchar_t * unescape ( const wchar_t * orig , int flags )
{
int out_pos ;
size_t in_pos ;
size_t len ;
int c ;
int bracket_count = 0 ;
wchar_t prev = 0 ;
wchar_t * in ;
2012-11-24 03:12:22 +08:00
bool unescape_special = ! ! ( flags & UNESCAPE_SPECIAL ) ;
bool allow_incomplete = ! ! ( flags & UNESCAPE_INCOMPLETE ) ;
2012-11-19 08:30:30 +08:00
CHECK ( orig , 0 ) ;
len = wcslen ( orig ) ;
in = wcsdup ( orig ) ;
if ( ! in )
DIE_MEM ( ) ;
2012-11-24 03:12:22 +08:00
enum
{
2012-11-22 17:09:07 +08:00
mode_unquoted ,
mode_single_quotes ,
mode_double_quotes
} mode = mode_unquoted ;
2012-11-19 08:30:30 +08:00
for ( in_pos = 0 , out_pos = 0 ;
in_pos < len ;
( prev = ( out_pos > = 0 ) ? in [ out_pos ] : 0 ) , out_pos + + , in_pos + + )
{
c = in [ in_pos ] ;
switch ( mode )
{
2012-11-24 03:12:22 +08:00
/*
Mode 0 means unquoted string
*/
2012-11-22 17:09:07 +08:00
case mode_unquoted :
2012-11-19 16:31:03 +08:00
{
if ( c = = L ' \\ ' )
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
switch ( in [ + + in_pos ] )
{
2012-11-19 08:30:30 +08:00
2012-11-24 03:12:22 +08:00
/*
A null character after a backslash is an
error , return null
*/
2012-11-19 16:31:03 +08:00
case L ' \0 ' :
{
if ( ! allow_incomplete )
{
free ( in ) ;
return 0 ;
}
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
Numeric escape sequences . No prefix means
octal escape , otherwise hexadecimal .
*/
case L ' 0 ' :
case L ' 1 ' :
case L ' 2 ' :
case L ' 3 ' :
case L ' 4 ' :
case L ' 5 ' :
case L ' 6 ' :
case L ' 7 ' :
case L ' u ' :
case L ' U ' :
case L ' x ' :
case L ' X ' :
{
int i ;
long long res = 0 ;
int chars = 2 ;
int base = 16 ;
int byte = 0 ;
wchar_t max_val = ASCII_MAX ;
switch ( in [ in_pos ] )
{
case L ' u ' :
{
chars = 4 ;
max_val = UCS2_MAX ;
break ;
}
case L ' U ' :
{
chars = 8 ;
max_val = WCHAR_MAX ;
break ;
}
case L ' x ' :
{
break ;
}
case L ' X ' :
{
byte = 1 ;
max_val = BYTE_MAX ;
break ;
}
default :
{
base = 8 ;
chars = 3 ;
// note in_pod must be larger than 0 since we incremented it above
assert ( in_pos > 0 ) ;
in_pos - - ;
break ;
}
}
for ( i = 0 ; i < chars ; i + + )
{
long d = convert_digit ( in [ + + in_pos ] , base ) ;
if ( d < 0 )
{
in_pos - - ;
break ;
}
2012-12-30 08:55:48 +08:00
res = ( res * base ) + d ;
2012-11-19 16:31:03 +08:00
}
if ( ( res < = max_val ) )
{
in [ out_pos ] = ( wchar_t ) ( ( byte ? ENCODE_DIRECT_BASE : 0 ) + res ) ;
}
else
{
free ( in ) ;
return 0 ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ a means bell ( alert )
*/
case L ' a ' :
{
in [ out_pos ] = L ' \a ' ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ b means backspace
*/
case L ' b ' :
{
in [ out_pos ] = L ' \b ' ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ cX means control sequence X
*/
case L ' c ' :
{
in_pos + + ;
if ( in [ in_pos ] > = L ' a ' & &
in [ in_pos ] < = ( L ' a ' + 32 ) )
{
in [ out_pos ] = in [ in_pos ] - L ' a ' + 1 ;
}
else if ( in [ in_pos ] > = L ' A ' & &
in [ in_pos ] < = ( L ' A ' + 32 ) )
{
in [ out_pos ] = in [ in_pos ] - L ' A ' + 1 ;
}
else
{
free ( in ) ;
return 0 ;
}
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ x1b means escape
*/
case L ' e ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
in [ out_pos ] = L ' \x1b ' ;
2012-11-19 08:30:30 +08:00
break ;
}
2012-11-19 16:31:03 +08:00
/*
\ f means form feed
*/
case L ' f ' :
{
in [ out_pos ] = L ' \f ' ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ n means newline
*/
case L ' n ' :
{
in [ out_pos ] = L ' \n ' ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ r means carriage return
*/
case L ' r ' :
{
in [ out_pos ] = L ' \r ' ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
\ t means tab
*/
case L ' t ' :
{
in [ out_pos ] = L ' \t ' ;
break ;
}
2012-11-24 03:12:22 +08:00
2012-11-19 16:31:03 +08:00
/*
\ v means vertical tab
*/
case L ' v ' :
{
in [ out_pos ] = L ' \v ' ;
break ;
}
2012-11-24 03:12:22 +08:00
2012-11-22 17:09:07 +08:00
/* If a backslash is followed by an actual newline, swallow them both */
case L ' \n ' :
out_pos - - ;
break ;
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
default :
{
if ( unescape_special )
in [ out_pos + + ] = INTERNAL_SEPARATOR ;
in [ out_pos ] = in [ in_pos ] ;
break ;
}
2012-11-19 08:30:30 +08:00
}
}
2012-11-19 16:31:03 +08:00
else
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
switch ( in [ in_pos ] )
{
case L ' ~ ' :
{
if ( unescape_special & & ( in_pos = = 0 ) )
{
in [ out_pos ] = HOME_DIRECTORY ;
}
else
{
in [ out_pos ] = L ' ~ ' ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' % ' :
{
if ( unescape_special & & ( in_pos = = 0 ) )
{
in [ out_pos ] = PROCESS_EXPAND ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' * ' :
{
if ( unescape_special )
{
if ( out_pos > 0 & & in [ out_pos - 1 ] = = ANY_STRING )
{
out_pos - - ;
in [ out_pos ] = ANY_STRING_RECURSIVE ;
}
else
in [ out_pos ] = ANY_STRING ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' ? ' :
{
if ( unescape_special )
{
in [ out_pos ] = ANY_CHAR ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' $ ' :
{
if ( unescape_special )
{
in [ out_pos ] = VARIABLE_EXPAND ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' { ' :
{
if ( unescape_special )
{
bracket_count + + ;
in [ out_pos ] = BRACKET_BEGIN ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' } ' :
{
if ( unescape_special )
{
bracket_count - - ;
in [ out_pos ] = BRACKET_END ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' , ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
if ( unescape_special & & bracket_count & & prev ! = BRACKET_SEP )
{
in [ out_pos ] = BRACKET_SEP ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
case L ' \' ' :
{
2012-11-22 17:09:07 +08:00
mode = mode_single_quotes ;
2012-11-19 16:31:03 +08:00
if ( unescape_special )
in [ out_pos ] = INTERNAL_SEPARATOR ;
else
out_pos - - ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
case L ' \" ' :
{
2012-11-22 17:09:07 +08:00
mode = mode_double_quotes ;
2012-11-19 16:31:03 +08:00
if ( unescape_special )
in [ out_pos ] = INTERNAL_SEPARATOR ;
else
out_pos - - ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
default :
{
in [ out_pos ] = in [ in_pos ] ;
break ;
}
2012-11-19 08:30:30 +08:00
}
}
2012-11-19 16:31:03 +08:00
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-19 16:31:03 +08:00
/*
2012-11-22 17:09:07 +08:00
Mode 1 means single quoted string , i . e ' foo ' .
A backslash at the end of a line in a single quoted string does not swallow the backslash or newline .
2012-11-19 16:31:03 +08:00
*/
2012-11-22 17:09:07 +08:00
case mode_single_quotes :
2012-11-19 16:31:03 +08:00
{
if ( c = = L ' \\ ' )
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
switch ( in [ + + in_pos ] )
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
case ' \\ ' :
case L ' \' ' :
{
in [ out_pos ] = in [ in_pos ] ;
break ;
}
2012-11-19 08:30:30 +08:00
2012-11-22 17:09:07 +08:00
case L ' \0 ' :
2012-11-19 16:31:03 +08:00
{
if ( ! allow_incomplete )
{
free ( in ) ;
return 0 ;
}
else
{
//We may ever escape a NULL character, but still appending a \ in case I am wrong.
in [ out_pos ] = L ' \\ ' ;
}
}
break ;
2012-11-24 03:12:22 +08:00
2012-11-19 16:31:03 +08:00
default :
{
in [ out_pos + + ] = L ' \\ ' ;
in [ out_pos ] = in [ in_pos ] ;
}
2012-11-19 08:30:30 +08:00
}
}
2012-11-19 16:31:03 +08:00
if ( c = = L ' \' ' )
2012-11-19 08:30:30 +08:00
{
if ( unescape_special )
in [ out_pos ] = INTERNAL_SEPARATOR ;
else
out_pos - - ;
2012-11-22 17:09:07 +08:00
mode = mode_unquoted ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
else
2012-11-19 08:30:30 +08:00
{
in [ out_pos ] = in [ in_pos ] ;
}
2012-11-19 16:31:03 +08:00
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
/*
Mode 2 means double quoted string , i . e . " foo "
*/
2012-11-22 17:09:07 +08:00
case mode_double_quotes :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
switch ( c )
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
case ' " ' :
2012-11-19 08:30:30 +08:00
{
2012-11-22 17:09:07 +08:00
mode = mode_unquoted ;
2012-11-19 16:31:03 +08:00
if ( unescape_special )
in [ out_pos ] = INTERNAL_SEPARATOR ;
else
out_pos - - ;
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
case ' \\ ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
switch ( in [ + + in_pos ] )
{
case L ' \0 ' :
{
if ( ! allow_incomplete )
{
free ( in ) ;
return 0 ;
}
else
{
//We probably don't need it since NULL character is always appended before ending this function.
in [ out_pos ] = in [ in_pos ] ;
}
}
break ;
case ' \\ ' :
case L ' $ ' :
case ' " ' :
{
in [ out_pos ] = in [ in_pos ] ;
break ;
}
2012-11-24 03:12:22 +08:00
2012-11-22 17:09:07 +08:00
case ' \n ' :
{
out_pos - - ;
break ;
}
2012-11-19 16:31:03 +08:00
default :
{
in [ out_pos + + ] = L ' \\ ' ;
in [ out_pos ] = in [ in_pos ] ;
break ;
}
}
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
case ' $ ' :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
if ( unescape_special )
{
in [ out_pos ] = VARIABLE_EXPAND_SINGLE ;
}
else
{
in [ out_pos ] = in [ in_pos ] ;
}
break ;
2012-11-19 08:30:30 +08:00
}
2012-11-19 16:31:03 +08:00
default :
2012-11-19 08:30:30 +08:00
{
2012-11-19 16:31:03 +08:00
in [ out_pos ] = in [ in_pos ] ;
break ;
2012-11-19 08:30:30 +08:00
}
}
break ;
}
}
}
if ( ! allow_incomplete & & mode )
{
free ( in ) ;
return 0 ;
}
in [ out_pos ] = L ' \0 ' ;
return in ;
}
2013-11-25 14:57:49 +08:00
bool unescape_string_in_place ( wcstring * str , unescape_flags_t escape_special )
2012-11-19 08:30:30 +08:00
{
2013-11-25 14:57:49 +08:00
assert ( str ! = NULL ) ;
wcstring output ;
bool success = unescape_string_internal ( str - > c_str ( ) , str - > size ( ) , & output , escape_special ) ;
if ( success )
2012-11-19 08:30:30 +08:00
{
2013-11-25 14:57:49 +08:00
str - > swap ( output ) ;
2012-11-19 08:30:30 +08:00
}
return success ;
}
2013-11-25 14:57:49 +08:00
bool unescape_string ( const wchar_t * input , wcstring * output , unescape_flags_t escape_special )
{
bool success = unescape_string_internal ( input , wcslen ( input ) , output , escape_special ) ;
if ( ! success )
output - > clear ( ) ;
return success ;
}
bool unescape_string ( const wcstring & input , wcstring * output , unescape_flags_t escape_special )
{
bool success = unescape_string_internal ( input . c_str ( ) , input . size ( ) , output , escape_special ) ;
if ( ! success )
output - > clear ( ) ;
return success ;
}
2012-11-19 08:30:30 +08:00
void common_handle_winch ( int signal )
{
# ifdef HAVE_WINSIZE
if ( ioctl ( 1 , TIOCGWINSZ , & termsize ) ! = 0 )
{
return ;
}
# else
termsize . ws_col = 80 ;
termsize . ws_row = 24 ;
# endif
}
int common_get_width ( )
{
return termsize . ws_col ;
}
int common_get_height ( )
{
return termsize . ws_row ;
}
void tokenize_variable_array ( const wcstring & val , std : : vector < wcstring > & out )
{
size_t pos = 0 , end = val . size ( ) ;
while ( pos < end )
{
size_t next_pos = val . find ( ARRAY_SEP , pos ) ;
if ( next_pos = = wcstring : : npos ) break ;
out . push_back ( val . substr ( pos , next_pos - pos ) ) ;
pos = next_pos + 1 ; //skip the separator
}
out . push_back ( val . substr ( pos , end - pos ) ) ;
}
bool string_prefixes_string ( const wchar_t * proposed_prefix , const wcstring & value )
{
size_t prefix_size = wcslen ( proposed_prefix ) ;
return prefix_size < = value . size ( ) & & value . compare ( 0 , prefix_size , proposed_prefix ) = = 0 ;
}
bool string_prefixes_string ( const wcstring & proposed_prefix , const wcstring & value )
{
size_t prefix_size = proposed_prefix . size ( ) ;
return prefix_size < = value . size ( ) & & value . compare ( 0 , prefix_size , proposed_prefix ) = = 0 ;
}
bool string_prefixes_string_case_insensitive ( const wcstring & proposed_prefix , const wcstring & value )
{
size_t prefix_size = proposed_prefix . size ( ) ;
return prefix_size < = value . size ( ) & & wcsncasecmp ( proposed_prefix . c_str ( ) , value . c_str ( ) , prefix_size ) = = 0 ;
}
bool string_suffixes_string ( const wcstring & proposed_suffix , const wcstring & value )
{
size_t suffix_size = proposed_suffix . size ( ) ;
return suffix_size < = value . size ( ) & & value . compare ( value . size ( ) - suffix_size , suffix_size , proposed_suffix ) = = 0 ;
}
bool string_suffixes_string ( const wchar_t * proposed_suffix , const wcstring & value )
{
size_t suffix_size = wcslen ( proposed_suffix ) ;
return suffix_size < = value . size ( ) & & value . compare ( value . size ( ) - suffix_size , suffix_size , proposed_suffix ) = = 0 ;
}
2013-05-26 06:41:18 +08:00
// Returns true if seq, represented as a subsequence, is contained within string
static bool subsequence_in_string ( const wcstring & seq , const wcstring & str )
{
/* Impossible if seq is larger than string */
if ( seq . size ( ) > str . size ( ) )
{
return false ;
}
2013-06-02 16:14:26 +08:00
2013-05-26 06:41:18 +08:00
/* Empty strings are considered to be subsequences of everything */
if ( seq . empty ( ) )
{
return true ;
}
2013-06-02 16:14:26 +08:00
2013-05-26 06:41:18 +08:00
size_t str_idx , seq_idx ;
for ( seq_idx = str_idx = 0 ; seq_idx < seq . size ( ) & & str_idx < str . size ( ) ; seq_idx + + )
{
wchar_t c = seq . at ( seq_idx ) ;
size_t char_loc = str . find ( c , str_idx ) ;
if ( char_loc = = wcstring : : npos )
{
/* Didn't find this character */
break ;
}
else
{
/* We found it. Continue the search just after it. */
str_idx = char_loc + 1 ;
}
}
2013-06-02 16:14:26 +08:00
2013-05-26 06:41:18 +08:00
/* We succeeded if we exhausted our sequence */
assert ( seq_idx < = seq . size ( ) ) ;
return seq_idx = = seq . size ( ) ;
}
string_fuzzy_match_t : : string_fuzzy_match_t ( enum fuzzy_match_type_t t , size_t distance_first , size_t distance_second ) :
2013-06-02 16:14:26 +08:00
type ( t ) ,
match_distance_first ( distance_first ) ,
match_distance_second ( distance_second )
2013-05-26 06:41:18 +08:00
{
}
string_fuzzy_match_t string_fuzzy_match_string ( const wcstring & string , const wcstring & match_against , fuzzy_match_type_t limit_type )
{
// Distances are generally the amount of text not matched
string_fuzzy_match_t result ( fuzzy_match_none , 0 , 0 ) ;
size_t location ;
if ( limit_type > = fuzzy_match_exact & & string = = match_against )
{
result . type = fuzzy_match_exact ;
}
else if ( limit_type > = fuzzy_match_prefix & & string_prefixes_string ( string , match_against ) )
{
result . type = fuzzy_match_prefix ;
assert ( match_against . size ( ) > = string . size ( ) ) ;
result . match_distance_first = match_against . size ( ) - string . size ( ) ;
}
else if ( limit_type > = fuzzy_match_case_insensitive & & wcscasecmp ( string . c_str ( ) , match_against . c_str ( ) ) = = 0 )
{
result . type = fuzzy_match_case_insensitive ;
}
else if ( limit_type > = fuzzy_match_prefix_case_insensitive & & string_prefixes_string_case_insensitive ( string , match_against ) )
{
result . type = fuzzy_match_prefix_case_insensitive ;
assert ( match_against . size ( ) > = string . size ( ) ) ;
result . match_distance_first = match_against . size ( ) - string . size ( ) ;
}
else if ( limit_type > = fuzzy_match_substring & & ( location = match_against . find ( string ) ) ! = wcstring : : npos )
{
// string is contained within match against
result . type = fuzzy_match_substring ;
assert ( match_against . size ( ) > = string . size ( ) ) ;
result . match_distance_first = match_against . size ( ) - string . size ( ) ;
result . match_distance_second = location ; //prefer earlier matches
}
else if ( limit_type > = fuzzy_match_subsequence_insertions_only & & subsequence_in_string ( string , match_against ) )
{
result . type = fuzzy_match_subsequence_insertions_only ;
assert ( match_against . size ( ) > = string . size ( ) ) ;
result . match_distance_first = match_against . size ( ) - string . size ( ) ;
// it would be nice to prefer matches with greater matching runs here
}
return result ;
}
template < typename T >
static inline int compare_ints ( T a , T b )
{
if ( a < b ) return - 1 ;
if ( a = = b ) return 0 ;
return 1 ;
}
// Compare types; if the types match, compare distances
int string_fuzzy_match_t : : compare ( const string_fuzzy_match_t & rhs ) const
{
if ( this - > type ! = rhs . type )
{
return compare_ints ( this - > type , rhs . type ) ;
}
else if ( this - > match_distance_first ! = rhs . match_distance_first )
{
return compare_ints ( this - > match_distance_first , rhs . match_distance_first ) ;
}
else if ( this - > match_distance_second ! = rhs . match_distance_second )
{
return compare_ints ( this - > match_distance_second , rhs . match_distance_second ) ;
}
return 0 ; //equal
}
2012-11-19 08:30:30 +08:00
bool list_contains_string ( const wcstring_list_t & list , const wcstring & str )
{
return std : : find ( list . begin ( ) , list . end ( ) , str ) ! = list . end ( ) ;
}
int create_directory ( const wcstring & d )
{
int ok = 0 ;
struct stat buf ;
int stat_res = 0 ;
while ( ( stat_res = wstat ( d , & buf ) ) ! = 0 )
{
if ( errno ! = EAGAIN )
break ;
}
if ( stat_res = = 0 )
{
if ( S_ISDIR ( buf . st_mode ) )
{
ok = 1 ;
}
}
else
{
if ( errno = = ENOENT )
{
wcstring dir = wdirname ( d ) ;
if ( ! create_directory ( dir ) )
{
if ( ! wmkdir ( d , 0700 ) )
{
ok = 1 ;
}
}
}
}
return ok ? 0 : - 1 ;
}
__attribute__ ( ( noinline ) )
void bugreport ( )
{
debug ( 1 ,
_ ( L " This is a bug. Break on bugreport to debug. "
L " If you can reproduce it, please send a bug report to %s. " ) ,
PACKAGE_BUGREPORT ) ;
}
wcstring format_size ( long long sz )
{
wcstring result ;
const wchar_t * sz_name [ ] =
{
L " kB " , L " MB " , L " GB " , L " TB " , L " PB " , L " EB " , L " ZB " , L " YB " , 0
} ;
if ( sz < 0 )
{
result . append ( L " unknown " ) ;
}
else if ( sz < 1 )
{
result . append ( _ ( L " empty " ) ) ;
}
else if ( sz < 1024 )
{
result . append ( format_string ( L " %lldB " , sz ) ) ;
}
else
{
int i ;
for ( i = 0 ; sz_name [ i ] ; i + + )
{
if ( sz < ( 1024 * 1024 ) | | ! sz_name [ i + 1 ] )
{
long isz = ( ( long ) sz ) / 1024 ;
if ( isz > 9 )
result . append ( format_string ( L " %d%ls " , isz , sz_name [ i ] ) ) ;
else
result . append ( format_string ( L " %.1f%ls " , ( double ) sz / 1024 , sz_name [ i ] ) ) ;
break ;
}
sz / = 1024 ;
}
}
2012-02-10 02:14:06 +08:00
return result ;
2007-10-15 17:51:08 +08:00
}
2009-02-03 06:46:45 +08:00
2012-03-01 03:27:14 +08:00
/* Crappy function to extract the most significant digit of an unsigned long long value */
2012-11-19 08:30:30 +08:00
static char extract_most_significant_digit ( unsigned long long * xp )
{
2012-03-01 03:27:14 +08:00
unsigned long long place_value = 1 ;
unsigned long long x = * xp ;
2012-11-19 08:30:30 +08:00
while ( x > = 10 )
{
2012-03-01 03:27:14 +08:00
x / = 10 ;
place_value * = 10 ;
}
* xp - = ( place_value * x ) ;
return x + ' 0 ' ;
}
2012-11-19 08:30:30 +08:00
void append_ull ( char * buff , unsigned long long val , size_t * inout_idx , size_t max_len )
{
2012-03-01 03:27:14 +08:00
size_t idx = * inout_idx ;
while ( val > 0 & & idx < max_len )
buff [ idx + + ] = extract_most_significant_digit ( & val ) ;
* inout_idx = idx ;
}
2012-11-19 08:30:30 +08:00
void append_str ( char * buff , const char * str , size_t * inout_idx , size_t max_len )
{
2012-03-01 03:27:14 +08:00
size_t idx = * inout_idx ;
while ( * str & & idx < max_len )
buff [ idx + + ] = * str + + ;
* inout_idx = idx ;
}
2012-11-19 08:30:30 +08:00
void format_size_safe ( char buff [ 128 ] , unsigned long long sz )
{
2012-03-01 03:27:14 +08:00
const size_t buff_size = 128 ;
const size_t max_len = buff_size - 1 ; //need to leave room for a null terminator
bzero ( buff , buff_size ) ;
size_t idx = 0 ;
2012-11-19 08:30:30 +08:00
const char * const sz_name [ ] =
{
2012-03-01 03:27:14 +08:00
" kB " , " MB " , " GB " , " TB " , " PB " , " EB " , " ZB " , " YB " , NULL
} ;
2012-11-19 08:30:30 +08:00
if ( sz < 1 )
2012-03-01 03:27:14 +08:00
{
strncpy ( buff , " empty " , buff_size ) ;
}
else if ( sz < 1024 )
{
append_ull ( buff , sz , & idx , max_len ) ;
append_str ( buff , " B " , & idx , max_len ) ;
}
else
{
2012-11-19 08:30:30 +08:00
for ( size_t i = 0 ; sz_name [ i ] ; i + + )
{
if ( sz < ( 1024 * 1024 ) | | ! sz_name [ i + 1 ] )
{
unsigned long long isz = sz / 1024 ;
if ( isz > 9 )
2012-03-01 03:27:14 +08:00
{
append_ull ( buff , isz , & idx , max_len ) ;
}
2012-11-19 08:30:30 +08:00
else
2012-03-01 03:27:14 +08:00
{
if ( isz = = 0 )
{
append_str ( buff , " 0 " , & idx , max_len ) ;
}
else
{
append_ull ( buff , isz , & idx , max_len ) ;
}
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
// Maybe append a single fraction digit
unsigned long long remainder = sz % 1024 ;
if ( remainder > 0 )
{
char tmp [ 3 ] = { ' . ' , extract_most_significant_digit ( & remainder ) , 0 } ;
append_str ( buff , tmp , & idx , max_len ) ;
}
}
append_str ( buff , sz_name [ i ] , & idx , max_len ) ;
2012-11-19 08:30:30 +08:00
break ;
}
sz / = 1024 ;
}
2012-03-01 03:27:14 +08:00
}
}
2009-02-03 06:46:45 +08:00
double timef ( )
{
2012-11-19 08:30:30 +08:00
int time_res ;
struct timeval tv ;
time_res = gettimeofday ( & tv , 0 ) ;
if ( time_res )
{
/*
Fixme : What on earth is the correct parameter value for NaN ?
The man pages and the standard helpfully state that this
parameter is implementation defined . Gcc gives a warning if
a null pointer is used . But not even all mighty Google gives
a hint to what value should actually be returned .
*/
return nan ( " " ) ;
}
return ( double ) tv . tv_sec + 0.000001 * tv . tv_usec ;
}
void exit_without_destructors ( int code )
{
2012-02-29 07:11:46 +08:00
_exit ( code ) ;
}
2012-03-01 03:27:14 +08:00
/* Helper function to convert from a null_terminated_array_t<wchar_t> to a null_terminated_array_t<char_t> */
2013-02-23 08:22:56 +08:00
void convert_wide_array_to_narrow ( const null_terminated_array_t < wchar_t > & wide_arr , null_terminated_array_t < char > * output )
2012-11-19 08:30:30 +08:00
{
2012-03-01 03:27:14 +08:00
const wchar_t * const * arr = wide_arr . get ( ) ;
if ( ! arr )
2013-02-23 08:22:56 +08:00
{
output - > clear ( ) ;
return ;
}
2012-11-19 08:30:30 +08:00
2012-03-01 03:27:14 +08:00
std : : vector < std : : string > list ;
2012-11-19 08:30:30 +08:00
for ( size_t i = 0 ; arr [ i ] ; i + + )
{
2012-03-01 03:27:14 +08:00
list . push_back ( wcs2string ( arr [ i ] ) ) ;
}
2013-02-23 08:22:56 +08:00
output - > set ( list ) ;
2012-03-01 03:27:14 +08:00
}
2011-12-27 11:18:46 +08:00
void append_path_component ( wcstring & path , const wcstring & component )
{
2012-11-19 08:30:30 +08:00
if ( path . empty ( ) | | component . empty ( ) )
{
2012-05-09 17:33:42 +08:00
path . append ( component ) ;
2012-11-19 08:30:30 +08:00
}
else
{
2012-05-09 17:33:42 +08:00
size_t path_len = path . size ( ) ;
bool path_slash = path . at ( path_len - 1 ) = = L ' / ' ;
bool comp_slash = component . at ( 0 ) = = L ' / ' ;
2012-11-19 08:30:30 +08:00
if ( ! path_slash & & ! comp_slash )
{
2012-05-09 17:33:42 +08:00
// Need a slash
path . push_back ( L ' / ' ) ;
2012-11-19 08:30:30 +08:00
}
else if ( path_slash & & comp_slash )
{
2012-05-09 17:33:42 +08:00
// Too many slashes
path . erase ( path_len - 1 , 1 ) ;
}
2011-12-27 11:18:46 +08:00
path . append ( component ) ;
}
}
extern " C " {
2012-11-19 08:30:30 +08:00
__attribute__ ( ( noinline ) ) void debug_thread_error ( void )
{
while ( 1 ) sleep ( 9999999 ) ;
}
2011-12-27 11:18:46 +08:00
}
2012-11-19 08:30:30 +08:00
void set_main_thread ( )
{
main_thread_id = pthread_self ( ) ;
2012-01-06 05:58:48 +08:00
}
2012-11-19 08:30:30 +08:00
void configure_thread_assertions_for_testing ( void )
{
2012-05-14 11:19:02 +08:00
thread_assertions_configured_for_testing = true ;
}
2012-02-28 10:43:24 +08:00
/* Notice when we've forked */
2012-06-17 12:25:33 +08:00
static pid_t initial_pid = 0 ;
2012-03-07 07:12:37 +08:00
2012-11-18 18:16:14 +08:00
/* Be able to restore the term's foreground process group */
static pid_t initial_foreground_process_group = - 1 ;
2012-11-19 08:30:30 +08:00
bool is_forked_child ( void )
{
2012-06-17 12:25:33 +08:00
/* Just bail if nobody's called setup_fork_guards - e.g. fishd */
if ( ! initial_pid ) return false ;
2012-11-19 08:30:30 +08:00
2012-03-07 06:34:18 +08:00
bool is_child_of_fork = ( getpid ( ) ! = initial_pid ) ;
2012-11-19 08:30:30 +08:00
if ( is_child_of_fork )
{
2012-03-01 03:27:14 +08:00
printf ( " Uh-oh: %d \n " , getpid ( ) ) ;
while ( 1 ) sleep ( 10000 ) ;
}
2012-02-28 10:43:24 +08:00
return is_child_of_fork ;
}
2012-11-18 18:16:14 +08:00
void setup_fork_guards ( void )
{
2012-03-07 06:34:18 +08:00
/* Notice when we fork by stashing our pid. This seems simpler than pthread_atfork(). */
initial_pid = getpid ( ) ;
2012-02-28 10:43:24 +08:00
}
2012-11-18 18:16:14 +08:00
void save_term_foreground_process_group ( void )
{
initial_foreground_process_group = tcgetpgrp ( STDIN_FILENO ) ;
}
void restore_term_foreground_process_group ( void )
{
if ( initial_foreground_process_group ! = - 1 )
{
tcsetpgrp ( STDIN_FILENO , initial_foreground_process_group ) ;
}
}
2012-11-19 08:30:30 +08:00
bool is_main_thread ( )
{
assert ( main_thread_id ! = 0 ) ;
return main_thread_id = = pthread_self ( ) ;
2012-01-06 05:58:48 +08:00
}
2011-12-27 11:18:46 +08:00
void assert_is_main_thread ( const char * who )
{
2012-11-19 08:30:30 +08:00
if ( ! is_main_thread ( ) & & ! thread_assertions_configured_for_testing )
{
2011-12-27 11:18:46 +08:00
fprintf ( stderr , " Warning: %s called off of main thread. Break on debug_thread_error to debug. \n " , who ) ;
debug_thread_error ( ) ;
2012-02-28 10:43:24 +08:00
}
}
void assert_is_not_forked_child ( const char * who )
{
2012-11-19 08:30:30 +08:00
if ( is_forked_child ( ) )
{
2012-02-28 10:43:24 +08:00
fprintf ( stderr , " Warning: %s called in a forked child. Break on debug_thread_error to debug. \n " , who ) ;
debug_thread_error ( ) ;
2011-12-27 11:18:46 +08:00
}
}
void assert_is_background_thread ( const char * who )
{
2012-11-19 08:30:30 +08:00
if ( is_main_thread ( ) & & ! thread_assertions_configured_for_testing )
{
2011-12-27 11:18:46 +08:00
fprintf ( stderr , " Warning: %s called on the main thread (may block!). Break on debug_thread_error to debug. \n " , who ) ;
debug_thread_error ( ) ;
2012-02-25 04:13:35 +08:00
}
}
2012-04-22 11:08:08 +08:00
void assert_is_locked ( void * vmutex , const char * who , const char * caller )
2012-02-25 04:13:35 +08:00
{
pthread_mutex_t * mutex = static_cast < pthread_mutex_t * > ( vmutex ) ;
2012-11-19 08:30:30 +08:00
if ( 0 = = pthread_mutex_trylock ( mutex ) )
{
2012-04-22 11:08:08 +08:00
fprintf ( stderr , " Warning: %s is not locked when it should be in '%s'. Break on debug_thread_error to debug. \n " , who , caller ) ;
2012-02-25 04:13:35 +08:00
debug_thread_error ( ) ;
pthread_mutex_unlock ( mutex ) ;
}
2011-12-27 11:18:46 +08:00
}
2012-02-28 10:43:24 +08:00
2012-11-19 08:30:30 +08:00
void scoped_lock : : lock ( void )
{
2012-02-28 10:43:24 +08:00
assert ( ! locked ) ;
assert ( ! is_forked_child ( ) ) ;
VOMIT_ON_FAILURE ( pthread_mutex_lock ( lock_obj ) ) ;
locked = true ;
}
2012-11-19 08:30:30 +08:00
void scoped_lock : : unlock ( void )
{
2012-02-28 10:43:24 +08:00
assert ( locked ) ;
assert ( ! is_forked_child ( ) ) ;
VOMIT_ON_FAILURE ( pthread_mutex_unlock ( lock_obj ) ) ;
locked = false ;
}
2012-11-19 08:30:30 +08:00
scoped_lock : : scoped_lock ( pthread_mutex_t & mutex ) : lock_obj ( & mutex ) , locked ( false )
{
2012-02-28 10:43:24 +08:00
this - > lock ( ) ;
}
2012-11-19 08:30:30 +08:00
scoped_lock : : ~ scoped_lock ( )
{
2012-02-28 10:43:24 +08:00
if ( locked ) this - > unlock ( ) ;
}
2012-07-21 05:33:08 +08:00
2012-08-06 04:24:33 +08:00
wcstokenizer : : wcstokenizer ( const wcstring & s , const wcstring & separator ) :
buffer ( ) ,
str ( ) ,
state ( ) ,
sep ( separator )
{
2012-07-21 05:33:08 +08:00
buffer = wcsdup ( s . c_str ( ) ) ;
str = buffer ;
state = NULL ;
}
2012-11-19 08:30:30 +08:00
bool wcstokenizer : : next ( wcstring & result )
{
2012-07-21 05:33:08 +08:00
wchar_t * tmp = wcstok ( str , sep . c_str ( ) , & state ) ;
str = NULL ;
if ( tmp ) result = tmp ;
return tmp ! = NULL ;
}
2012-11-19 08:30:30 +08:00
wcstokenizer : : ~ wcstokenizer ( )
{
2012-07-21 05:33:08 +08:00
free ( buffer ) ;
}
2013-02-23 08:22:56 +08:00
template < typename CharType_t >
static CharType_t * * make_null_terminated_array_helper ( const std : : vector < std : : basic_string < CharType_t > > & argv )
{
size_t count = argv . size ( ) ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* We allocate everything in one giant block. First compute how much space we need. */
/* N + 1 pointers */
size_t pointers_allocation_len = ( count + 1 ) * sizeof ( CharType_t * ) ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* In the very unlikely event that CharType_t has stricter alignment requirements than does a pointer, round us up to the size of a CharType_t */
pointers_allocation_len + = sizeof ( CharType_t ) - 1 ;
pointers_allocation_len - = pointers_allocation_len % sizeof ( CharType_t ) ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* N null terminated strings */
size_t strings_allocation_len = 0 ;
for ( size_t i = 0 ; i < count ; i + + )
{
/* The size of the string, plus a null terminator */
strings_allocation_len + = ( argv . at ( i ) . size ( ) + 1 ) * sizeof ( CharType_t ) ;
}
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* Now allocate their sum */
unsigned char * base = static_cast < unsigned char * > ( malloc ( pointers_allocation_len + strings_allocation_len ) ) ;
if ( ! base ) return NULL ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* Divvy it up into the pointers and strings */
CharType_t * * pointers = reinterpret_cast < CharType_t * * > ( base ) ;
CharType_t * strings = reinterpret_cast < CharType_t * > ( base + pointers_allocation_len ) ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
/* Start copying */
for ( size_t i = 0 ; i < count ; i + + )
{
const std : : basic_string < CharType_t > & str = argv . at ( i ) ;
// store the current string pointer into self
* pointers + + = strings ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
// copy the string into strings
strings = std : : copy ( str . begin ( ) , str . end ( ) , strings ) ;
// each string needs a null terminator
* strings + + = ( CharType_t ) ( 0 ) ;
}
// array of pointers needs a null terminator
* pointers + + = NULL ;
2013-02-24 04:47:10 +08:00
2013-02-23 08:22:56 +08:00
// Make sure we know what we're doing
2013-02-24 04:47:10 +08:00
assert ( ( unsigned char * ) pointers - base = = ( std : : ptrdiff_t ) pointers_allocation_len ) ;
assert ( ( unsigned char * ) strings - ( unsigned char * ) pointers = = ( std : : ptrdiff_t ) strings_allocation_len ) ;
assert ( ( unsigned char * ) strings - base = = ( std : : ptrdiff_t ) ( pointers_allocation_len + strings_allocation_len ) ) ;
2013-02-23 08:22:56 +08:00
// Return what we did
return reinterpret_cast < CharType_t * * > ( base ) ;
}
wchar_t * * make_null_terminated_array ( const wcstring_list_t & lst )
{
return make_null_terminated_array_helper ( lst ) ;
}
char * * make_null_terminated_array ( const std : : vector < std : : string > & lst )
{
return make_null_terminated_array_helper ( lst ) ;
}