fish-shell/common.c
axel f643a4e200 Fix minor bugs in parser
darcs-hash:20050924193117-ac50b-eb8ecfe4fec3bca6f89356a9079977802eb4a2c8.gz
2005-09-25 05:31:17 +10:00

1065 lines
18 KiB
C

/** \file common.c
Various functions, mostly string utilities, that are used by most
parts of fish.
*/
#include <stdlib.h>
#include <termios.h>
#include <wchar.h>
#include <string.h>
#include <stdio.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <wctype.h>
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <signal.h>
#include <locale.h>
#include <time.h>
#if HAVE_NCURSES_H
#include <ncurses.h>
#else
#include <curses.h>
#endif
#if HAVE_TERMIO_H
#include <termio.h>
#endif
#include <term.h>
#include "config.h"
#include "util.h"
#include "wutil.h"
#include "common.h"
#include "expand.h"
#include "proc.h"
#include "wildcard.h"
#include "parser.h"
/**
Error message to show on string convertion error
*/
#define STR2WCS_MSG "fish: Invalid multibyte sequence \'"
/**
The maximum number of minor errors to report. Further errors will be omitted.
*/
#define ERROR_MAX_COUNT 1
struct termios shell_modes;
/**
Number of error encountered. This is reset after each command, and
used to limit the number of error messages on commands with many
string convertion problems.
*/
static int error_count=0;
int error_max=1;
wchar_t ellipsis_char;
static int c1=0, c2=0, c3=0, c4=0, c5;
char *profile=0;
wchar_t *program_name;
int debug_level=1;
static int block_count=0;
void common_destroy()
{
debug( 3, L"Calls: wcsdupcat %d, wcsdupcat2 %d, wcsndup %d, str2wcs %d, wcs2str %d", c1, c2, c3, c4, c5 );
}
wchar_t **list_to_char_arr( array_list_t *l )
{
wchar_t ** res = malloc( sizeof(wchar_t *)*(al_get_count( l )+1) );
int i;
if( res == 0 )
{
die_mem();
}
for( i=0; i<al_get_count( l ); i++ )
res[i] = (wchar_t *)al_get(l,i);
res[i]='\0';
return res;
}
void block()
{
block_count++;
if( block_count == 1 )
{
sigset_t chldset;
sigemptyset( &chldset );
sigaddset( &chldset, SIGCHLD );
sigprocmask(SIG_BLOCK, &chldset, 0);
}
}
void unblock()
{
block_count--;
if( block_count == 0 )
{
sigset_t chldset;
sigemptyset( &chldset );
sigaddset( &chldset, SIGCHLD );
sigprocmask(SIG_UNBLOCK, &chldset, 0);
}
}
int fgetws2( wchar_t **b, int *len, FILE *f )
{
int i=0;
wint_t c;
wchar_t *buff = *b;
/*
This is a kludge: We block SIGCHLD while reading, since I can't
get getwc to perform reliably when signals are flying. Even when
watching for EINTR errors, bytes are lost.
*/
block();
while( 1 )
{
/* Reallocate the buffer if necessary */
if( i+1 >= *len )
{
int new_len = maxi( 128, (*len)*2);
buff = realloc( buff, sizeof(wchar_t)*new_len );
if( buff == 0 )
{
die_mem();
}
else
{
*len = new_len;
*b = buff;
}
}
errno=0;
c = getwc( f );
if( errno == EILSEQ )
{
getc( f );
continue;
}
//fwprintf( stderr, L"b\n" );
switch( c )
{
/* End of line */
case WEOF:
case L'\n':
case L'\0':
buff[i]=L'\0';
unblock();
return i;
/* Ignore carriage returns */
case L'\r':
break;
default:
buff[i++]=c;
break;
}
}
unblock();
}
/**
Wrapper for wcsfilecmp
*/
static int completion_cmp( const void *a, const void *b )
{
wchar_t *c= *((wchar_t **)a);
wchar_t *d= *((wchar_t **)b);
return wcsfilecmp( c, d );
}
void sort_list( array_list_t *comp )
{
qsort( comp->arr,
al_get_count( comp ),
sizeof( void*),
&completion_cmp );
}
wchar_t *str2wcs( const char *in )
{
c4++;
wchar_t *res;
res = malloc( sizeof(wchar_t)*(strlen(in)+1) );
if( !res )
{
die_mem();
}
if( (size_t)-1 == mbstowcs( res, in, sizeof(wchar_t)*(strlen(in)) +1) )
{
error_count++;
if( error_count <=error_max )
{
fflush( stderr );
write( 2,
STR2WCS_MSG,
strlen(STR2WCS_MSG) );
write( 2,
in,
strlen(in ));
write( 2,
"\'\n",
2 );
}
free(res);
return 0;
}
return res;
}
void error_reset()
{
error_count=0;
}
char *wcs2str( const wchar_t *in )
{
c5++;
char *res = malloc( MAX_UTF8_BYTES*wcslen(in)+1 );
if( res == 0 )
{
die_mem();
}
wcstombs( res,
in,
MAX_UTF8_BYTES*wcslen(in)+1 );
res = realloc( res, strlen( res )+1 );
return res;
}
char **wcsv2strv( const wchar_t **in )
{
int count =0;
int i;
while( in[count] != 0 )
count++;
char **res = malloc( sizeof( char *)*(count+1));
if( res == 0 )
{
die_mem();
}
for( i=0; i<count; i++ )
{
res[i]=wcs2str(in[i]);
}
res[count]=0;
return res;
}
wchar_t *wcsdupcat( const wchar_t *a, const wchar_t *b )
{
c1++;
return wcsdupcat2( a, b, 0 );
}
wchar_t *wcsdupcat2( const wchar_t *a, ... )
{
c2++;
int len=wcslen(a);
int pos;
va_list va, va2;
wchar_t *arg;
va_start( va, a );
va_copy( va2, va );
while( (arg=va_arg(va, wchar_t *) )!= 0 )
{
len += wcslen( arg );
}
va_end( va );
wchar_t *res = malloc( sizeof(wchar_t)*(len +1 ));
if( res == 0 )
return 0;
wcscpy( res, a );
pos = wcslen(a);
while( (arg=va_arg(va2, wchar_t *) )!= 0 )
{
wcscpy( res+pos, arg );
pos += wcslen(arg);
}
va_end( va2 );
return res;
}
wchar_t **strv2wcsv( const char **in )
{
int count =0;
int i;
while( in[count] != 0 )
count++;
wchar_t **res = malloc( sizeof( wchar_t *)*(count+1));
if( res == 0 )
{
die_mem();
}
for( i=0; i<count; i++ )
{
res[i]=str2wcs(in[i]);
}
res[count]=0;
return res;
}
wchar_t *wcsndup( const wchar_t *in, int c )
{
c3++;
wchar_t *res = malloc( sizeof(wchar_t)*(c+1) );
if( res == 0 )
{
die_mem();
}
wcsncpy( res, in, c );
res[c] = L'\0';
return res;
}
long convert_digit( wchar_t d, int base )
{
long res=-1;
if( (d <= L'9') && (d >= L'0') )
{
res = d - L'0';
}
else if( (d <= L'z') && (d >= L'a') )
{
res = d + 10 - L'a';
}
else if( (d <= L'Z') && (d >= L'A') )
{
res = d + 10 - L'A';
}
if( res >= base )
{
res = -1;
}
return res;
}
long wcstol(const wchar_t *nptr,
wchar_t **endptr,
int base)
{
long long res=0;
int is_set=0;
if( base > 36 )
{
errno = EINVAL;
return 0;
}
while( 1 )
{
long nxt = convert_digit( *nptr, base );
if( endptr != 0 )
*endptr = (wchar_t *)nptr;
if( nxt < 0 )
{
if( !is_set )
{
errno = EINVAL;
}
return res;
}
res = (res*base)+nxt;
is_set = 1;
if( res > LONG_MAX )
{
errno = ERANGE;
return LONG_MAX;
}
if( res < LONG_MIN )
{
errno = ERANGE;
return LONG_MIN;
}
nptr++;
}
}
/*$OpenBSD: strlcat.c,v 1.11 2003/06/17 21:56:24 millert Exp $*/
/*
* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/**
Appends src to string dst of size siz (unlike wcsncat, siz is the
full size of dst, not space left). At most siz-1 characters will be
copied. Always NUL terminates (unless siz <= wcslen(dst)). Returns
wcslen(src) + MIN(siz, wcslen(initial dst)). If retval >= siz,
truncation occurred.
This is the OpenBSD strlcat function, modified for wide characters,
and renamed to reflect this change.
*/
size_t
wcslcat(wchar_t *dst, const wchar_t *src, size_t siz)
{
register wchar_t *d = dst;
register const wchar_t *s = src;
register size_t n = siz;
size_t dlen;
/* Find the end of dst and adjust bytes left but don't go past end */
while (n-- != 0 && *d != '\0')
d++;
dlen = d - dst;
n = siz - dlen;
if (n == 0)
return(dlen + wcslen(s));
while (*s != '\0')
{
if (n != 1)
{
*d++ = *s;
n--;
}
s++;
}
*d = '\0';
return(dlen + (s - src));
/* count does not include NUL */
}
/*$OpenBSD: strlcpy.c,v 1.8 2003/06/17 21:56:24 millert Exp $*/
/*
* Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/**
Copy src to string dst of size siz. At most siz-1 characters will
be copied. Always NUL terminates (unless siz == 0). Returns
wcslen(src); if retval >= siz, truncation occurred.
This is the OpenBSD strlcpy function, modified for wide characters,
and renamed to reflect this change.
*/
size_t
wcslcpy(wchar_t *dst, const wchar_t *src, size_t siz)
{
register wchar_t *d = dst;
register const wchar_t *s = src;
register size_t n = siz;
/* Copy as many bytes as will fit */
if (n != 0 && --n != 0)
{
do
{
if ((*d++ = *s++) == 0)
break;
}
while (--n != 0);
}
/* Not enough room in dst, add NUL and traverse rest of src */
if (n == 0)
{
if (siz != 0)
*d = '\0';
/* NUL-terminate dst */
while (*s++)
;
}
return(s - src - 1);
/* count does not include NUL */
}
wchar_t *wcsdup( const wchar_t *in )
{
wchar_t *out = malloc( sizeof( wchar_t)*(wcslen(in)+1));
if( out == 0 )
{
die_mem();
}
wcscpy( out, in );
return out;
}
int wcscasecmp( const wchar_t *a, const wchar_t *b )
{
if( *a == 0 )
{
return (*b==0)?0:-1;
}
else if( *b == 0 )
{
return 1;
}
int diff = towlower(*a)-towlower(*b);
if( diff != 0 )
return diff;
else
return wcscasecmp( a+1,b+1);
}
int wcsvarname( wchar_t *str )
{
while( *str )
{
if( (!iswalnum(*str)) && (*str != L'_' ) )
{
return 0;
}
str++;
}
return 1;
}
#if !HAVE_WCWIDTH
/**
Return the number of columns used by a character.
In locales without a native wcwidth, Unicode is probably so broken
that it isn't worth trying to implement a real wcwidth. This
wcwidth assumes any printing character takes up one column.
*/
int wcwidth( wchar_t c )
{
if( c < 32 )
return 0;
if ( c == 127 )
return 0;
return 1;
}
#endif
/**
The glibc version of wcswidth seems to hang on some strings. fish uses this replacement.
*/
int my_wcswidth( const wchar_t *c )
{
int res=0;
while( *c )
{
int w = wcwidth( *c++ );
if( w < 0 )
w = 1;
if( w > 2 )
w=1;
res += w;
}
return res;
}
wchar_t *quote_end( const wchar_t *in )
{
int level=1;
int offset = (*in != L'\"');
in++;
while(1)
{
/* fwprintf( stderr, L"Check %c\n", *tok->buff );*/
switch( *in )
{
case L'\\':
in++;
if( *in == L'\0' )
{
return 0;
}
break;
case L'\"':
case L'\'':
if( (((level+offset) % 2)?L'\"':L'\'') == *in )
{
level--;
}
else
{
level++;
}
break;
}
if( (*in == L'\0') ||(level==0))
break;
in++;
}
return level?0:(wchar_t *)in;
}
void fish_setlocale(int category, const wchar_t *locale)
{
char *lang = wcs2str( locale );
setlocale(category,lang);
free( lang );
/*
Use ellipsis if on known unicode system, otherwise use $
*/
if( wcslen( locale ) )
{
ellipsis_char = wcsstr( locale, L".UTF")?L'\u2026':L'$';
}
else
{
char *lang = getenv( "LANG" );
if( lang )
ellipsis_char = strstr( lang, ".UTF")?L'\u2026':L'$';
else
ellipsis_char = L'$';
}
}
int contains_str( const wchar_t *a, ... )
{
wchar_t *arg;
va_list va;
int res = 0;
va_start( va, a );
while( (arg=va_arg(va, wchar_t *) )!= 0 )
{
if( wcscmp( a,arg) == 0 )
{
res=1;
break;
}
}
va_end( va );
return res;
}
int read_blocked(int fd, void *buf, size_t count)
{
int res;
sigset_t chldset, oldset;
sigemptyset( &chldset );
sigaddset( &chldset, SIGCHLD );
sigprocmask(SIG_BLOCK, &chldset, &oldset);
res = read( fd, buf, count );
sigprocmask( SIG_SETMASK, &oldset, 0 );
return res;
}
int writeb( tputs_arg_t b )
{
write( 1, &b, 1 );
// putc( b, stdout );
return 0;
}
void die_mem()
{
/*
int *foo=0;
*foo = 6;
*/
debug( 0, L"Out of memory, shutting down fish." );
exit(1);
}
void debug( int level, wchar_t *msg, ... )
{
va_list va;
if( level > debug_level )
return;
va_start( va, msg );
fwprintf( stderr, L"%ls: ", program_name );
vfwprintf( stderr, msg, va );
va_end( va );
fwprintf( stderr, L"\n" );
}
wchar_t *escape( wchar_t *in,
int escape_all )
{
wchar_t *killme=in;
wchar_t *out = malloc( sizeof(wchar_t)*(wcslen(in)*4 + 1));
if( out != 0 )
{
wchar_t *pos=out;
while( *in != 0 )
{
switch( *in )
{
case L'\t':
*(pos++) = L'\\';
*(pos++) = L't';
break;
case L'\n':
*(pos++) = L'\\';
*(pos++) = L'n';
break;
case L'\b':
*(pos++) = L'\\';
*(pos++) = L'b';
break;
case L'\r':
*(pos++) = L'\\';
*(pos++) = L'r';
break;
case L'\e':
*(pos++) = L'\\';
*(pos++) = L'e';
break;
case L'\\':
case L'&':
case L'$':
case L' ':
case L'#':
case L'^':
case L'<':
case L'>':
case L'@':
case L'(':
case L')':
case L'{':
case L'}':
case L'?':
case L'*':
case L'|':
case L';':
case L':':
case L'\'':
case L'\"':
if( escape_all )
*pos++ = L'\\';
*pos++ = *in;
break;
default:
if( *in < 32 )
{
int tmp = (*in)%16;
*pos++ = L'\\';
*pos++ = L'x';
*pos++ = ((*in>15)? L'1' : L'0');
*pos++ = tmp > 9? L'a'+(tmp-10):L'0'+tmp;
}
else
*pos++ = *in;
break;
}
in++;
}
*pos = 0;
free(killme);
}
return out;
}
wchar_t *unescape( wchar_t * in, int escape_special )
{
int in_pos, out_pos, len = wcslen( in );
int c;
int bracket_count=0;
wchar_t prev=0;
for( in_pos=0, out_pos=0; in_pos<len; prev=in[out_pos], out_pos++, in_pos++ )
{
c = in[in_pos];
if( c == L'\\' )
{
switch( in[++in_pos] )
{
case L'\0':
error( SYNTAX_ERROR, L"Unexpected end of string", -1 );
return in;
break;
case L'n':
in[out_pos]=L'\n';
break;
case L'r':
in[out_pos]=L'\r';
break;
case L't':
in[out_pos]=L'\t';
break;
case L'b':
in[out_pos]=L'\b';
break;
case L'e':
in[out_pos]=L'\e';
break;
case L'u':
case L'U':
case L'x':
case L'o':
{
int i;
wchar_t res=0;
int chars=2;
int base=16;
switch( in[in_pos] )
{
case L'u':
base=16;
chars=4;
break;
case L'U':
base=16;
chars=8;
break;
case L'x':
base=16;
chars=2;
break;
case L'o':
base=8;
chars=3;
break;
}
for( i=0; i<chars; i++ )
{
int d = convert_digit( in[++in_pos],base);
if( d < 0 )
{
in_pos--;
break;
}
res=(res*base)|d;
}
in[out_pos] = res;
break;
}
default:
in[out_pos]=in[in_pos];
break;
}
}
else
{
switch( in[in_pos]){
case L'~':
if( escape_special && (in_pos == 0) )
in[out_pos]=HOME_DIRECTORY;
else
in[out_pos] = L'~';
break;
case L'%':
if( escape_special && (in_pos == 0) )
in[out_pos]=PROCESS_EXPAND;
else
in[out_pos]=in[in_pos];
break;
case L'*':
if( escape_special )
{
if( out_pos > 0 && in[out_pos-1]==ANY_STRING )
{
out_pos--;
in[out_pos] = ANY_STRING_RECURSIVE;
}
else
in[out_pos]=ANY_STRING;
}
else
in[out_pos]=in[in_pos];
break;
case L'?':
if( escape_special )
in[out_pos]=ANY_CHAR;
else
in[out_pos]=in[in_pos];
break;
case L'$':
if( escape_special )
in[out_pos]=VARIABLE_EXPAND;
else
in[out_pos]=in[in_pos];
break;
case L'{':
if( escape_special )
{
bracket_count++;
in[out_pos]=BRACKET_BEGIN;
}
else
in[out_pos]=in[in_pos];
break;
case L'}':
if( escape_special )
{
bracket_count--;
in[out_pos]=BRACKET_END;
}
else
in[out_pos]=in[in_pos];
break;
case L',':
if( escape_special && bracket_count && prev!=BRACKET_SEP)
{
in[out_pos]=BRACKET_SEP;
}
else
in[out_pos]=in[in_pos];
break;
case L'\'':
case L'\"':
{
wchar_t *end = quote_end( &in[in_pos] );
int len;
if( end == 0 )
{
error( SYNTAX_ERROR, L"Unexpected end of string", -1 );
return in;
}
len = end- &in[in_pos]-1;
if( escape_special)
in[out_pos++]=INTERNAL_SEPARATOR;
memmove( &in[out_pos], &in[in_pos+1], sizeof(wchar_t)*(len) );
in_pos += len+1;
out_pos += len-1;
if( escape_special)
in[++out_pos]=INTERNAL_SEPARATOR;
break;
}
default:
in[out_pos] = in[in_pos];
break;
}
}
}
in[out_pos]=L'\0';
return in;
}