Properly handle character sets in universal variables

darcs-hash:20061017211129-ac50b-6aecd04b74a9122ff0e1b320a2cf48db36dd2194.gz
This commit is contained in:
axel 2006-10-18 07:11:29 +10:00
parent d563e428e8
commit f86f80fcd6
6 changed files with 237 additions and 51 deletions

View File

@ -1320,7 +1320,6 @@ g++, javac, java, gcj, lpr, doxygen, whois, find)
- Completion for gcc -\#\#\# option doesn't work.
- Suspending and then resuming pipelines containing a builtin is broken. How should this be handled?
- fishd should use utf-8, not whatever is the default character set
- screen handling code can't handle tabs in input
If you think you have found a bug not described here, please send a

View File

@ -17,13 +17,14 @@ When no clients are connected, fishd will automatically shut down.
\subsection fishd-files Files
~/.fishd.HOSTNAME permanent storage location for universal variable
data. The data is stored as a set of \c set and \c set_export commands
such as would be parsed by fishd. If an instance of fishd is running
(which is generally the case), manual modifications to ~/.fishd.HOSTNAME
will be lost. Do NOT edit this file manually!
\c ~/.config/fish/fishd.HOSTNAME permanent storage location for universal
variable data. The data is stored as a set of \c set and \c set_export
commands such as would be parsed by fishd. The file must always be
stored in ASCII format. If an instance of fishd is running (which is
generally the case), manual modifications to ~/.fishd.HOSTNAME will be
lost. Do NOT edit this file manually!
/tmp/fishd.socket.USERNAME the socket which fishd uses to communicate
\c /tmp/fishd.socket.USERNAME the socket which fishd uses to communicate
with all clients.
/tmp/fishd.log.USERNAME the fishd log file

View File

@ -236,10 +236,9 @@ void env_universal_init( wchar_t * p,
env_universal_server.fd = -1;
env_universal_server.killme = 0;
env_universal_server.fd = get_socket(1);
memset (&env_universal_server.wstate, '\0', sizeof (mbstate_t));
q_init( &env_universal_server.unsent );
env_universal_common_init( &callback );
sb_init( &env_universal_server.input );
b_init( &env_universal_server.input );
env_universal_read_all();
init = 1;
if( env_universal_server.fd >= 0 )

View File

@ -21,6 +21,7 @@
#include <sys/stat.h>
#include <dirent.h>
#include <wctype.h>
#include <iconv.h>
#include <errno.h>
#include <locale.h>
@ -125,6 +126,161 @@ static int get_names_show_exported;
static int get_names_show_unexported;
wchar_t *utf2wcs( char *in )
{
iconv_t cd=(iconv_t) -1;
int i,j;
wchar_t *out;
char *to_name[]=
{
"wchar_t", "WCHAR_T", "wchar", "WCHAR", 0
}
;
char *from_name[]=
{
"utf-8", "UTF-8", "utf8", "UTF8", 0
}
;
size_t in_len = strlen( in );
size_t out_len = sizeof( wchar_t )*(in_len+1);
size_t nconv;
char *nout;
out = malloc( out_len );
nout = (char *)out;
if( !out )
return 0;
for( i=0; to_name[i]; i++ )
{
for( j=0; from_name[j]; j++ )
{
cd = iconv_open ( to_name[i], from_name[j] );
if( cd != (iconv_t) -1)
{
goto start_conversion;
}
}
}
start_conversion:
if (cd == (iconv_t) -1)
{
/* Something went wrong. */
debug( 0, L"Could not perform utf-8 conversion" );
if(errno != EINVAL)
wperror( L"iconv_open" );
/* Terminate the output string. */
free(out);
return 0;
}
nconv = iconv( cd, &in, &in_len, &nout, &out_len );
if (nconv == (size_t) -1)
{
debug( 0, L"Error while converting from utf string" );
return 0;
}
*((wchar_t *) nout) = L'\0';
if (iconv_close (cd) != 0)
wperror (L"iconv_close");
return out;
}
char *wcs2utf( wchar_t *in )
{
iconv_t cd=(iconv_t) -1;
int i,j;
char *char_in = (char *)in;
char *out;
char *from_name[]=
{
"wchar_t", "WCHAR_T", "wchar", "WCHAR", 0
}
;
char *to_name[]=
{
"utf-8", "UTF-8", "utf8", "UTF8", 0
}
;
size_t in_len = wcslen( in );
size_t out_len = sizeof( char )*( (MAX_UTF8_BYTES*in_len)+1);
size_t nconv;
char *nout;
out = malloc( out_len );
nout = (char *)out;
in_len *= sizeof( wchar_t );
if( !out )
return 0;
for( i=0; to_name[i]; i++ )
{
for( j=0; from_name[j]; j++ )
{
cd = iconv_open ( to_name[i], from_name[j] );
if( cd != (iconv_t) -1)
{
goto start_conversion;
}
}
}
start_conversion:
if (cd == (iconv_t) -1)
{
/* Something went wrong. */
debug( 0, L"Could not perform utf-8 conversion" );
if(errno != EINVAL)
wperror( L"iconv_open" );
/* Terminate the output string. */
free(out);
return 0;
}
nconv = iconv( cd, &char_in, &in_len, &nout, &out_len );
if (nconv == (size_t) -1)
{
debug( 0, L"%d %d", in_len, out_len );
debug( 0, L"Error while converting from to string" );
return 0;
}
*nout = '\0';
if (iconv_close (cd) != 0)
wperror (L"iconv_close");
return out;
}
void env_universal_common_init( void (*cb)(int type, const wchar_t *key, const wchar_t *val ) )
{
callback = cb;
@ -195,8 +351,6 @@ void read_message( connection_t *src )
int ib = read_byte( src );
char b;
wchar_t res=0;
switch( ib )
{
case ENV_UNIVERSAL_AGAIN:
@ -218,8 +372,10 @@ void read_message( connection_t *src )
debug( 3, L"Fd %d has reached eof, set killme flag", src->fd );
if( src->input.used > 0 )
{
char c = 0;
b_append( &src->input, &c, 1 );
debug( 1,
L"Universal variable connection closed while reading command. Partial command recieved: '%ls'",
L"Universal variable connection closed while reading command. Partial command recieved: '%s'",
(wchar_t *)src->input.buff );
}
return;
@ -228,36 +384,37 @@ void read_message( connection_t *src )
b = (char)ib;
int sz = mbrtowc( &res, &b, 1, &src->wstate );
if( sz == -1 )
{
debug( 1, L"Error while reading universal variable after '%ls'", (wchar_t *)src->input.buff );
wperror( L"mbrtowc" );
}
else if( sz > 0 )
if( b == '\n' )
{
if( res == L'\n' )
wchar_t *msg;
b = 0;
b_append( &src->input, &b, 1 );
msg = utf2wcs( src->input.buff );
/*
Before calling parse_message, we must empty reset
everything, since the callback function could
potentially call read_message.
*/
src->input.used=0;
if( msg )
{
/*
Before calling parse_message, we must empty reset
everything, since the callback function could
potentially call read_message.
*/
wchar_t *msg = wcsdup( (wchar_t *)src->input.buff );
sb_clear( &src->input );
memset (&src->wstate, '\0', sizeof (mbstate_t));
parse_message( msg, src );
free( msg );
}
else
{
sb_append_char( &src->input, res );
debug( 0, _(L"Could not convert message '%s' to wide character string"), src->input.buff );
}
free( msg );
}
else
{
b_append( &src->input, &b, 1 );
}
}
}
@ -464,6 +621,33 @@ void try_send_all( connection_t *c )
}
}
static wchar_t *full_escape( const wchar_t *in )
{
string_buffer_t out;
sb_init( &out );
for( ; *in; in++ )
{
if( *in < 32 )
{
sb_printf( &out, L"\\x%.2x", *in );
}
else if( *in < 128 )
{
sb_append_char( &out, *in );
}
else if( *in < 65536 )
{
sb_printf( &out, L"\\u%.4x", *in );
}
else
{
sb_printf( &out, L"\\U%.8x", *in );
}
}
return (wchar_t *)out.buff;
}
message_t *create_message( int type,
const wchar_t *key_in,
const wchar_t *val_in )
@ -477,7 +661,13 @@ message_t *create_message( int type,
if( key_in )
{
key = wcs2str(key_in);
if( wcsvarname( key_in ) )
{
debug( 0, L"Illegal variable name: '%ls'", key_in );
return 0;
}
key = wcs2utf(key_in);
if( !key )
{
debug( 0,
@ -498,11 +688,11 @@ message_t *create_message( int type,
val_in=L"";
}
wchar_t *esc = escape(val_in,1);
wchar_t *esc = full_escape( val_in );
if( !esc )
break;
char *val = wcs2str(esc );
char *val = wcs2utf(esc );
free(esc);
sz = strlen(type==SET?SET_MBS:SET_EXPORT_MBS) + strlen(key) + strlen(val) + 4;

View File

@ -71,15 +71,11 @@ typedef struct connection
Set to one when this connection should be killed
*/
int killme;
/**
The state used for character conversions
*/
mbstate_t wstate;
/**
The input string. Input from the socket goes here. When a
newline is encountered, the buffer is parsed and cleared.
*/
string_buffer_t input;
buffer_t input;
/**
The read buffer.

17
fishd.c
View File

@ -17,10 +17,12 @@ set_export KEY:VALUE
These commands update the value of a variable. The only difference
between the two is that <tt>set_export</tt>-variables should be
exported to children of the process using them. The variable value may
be escaped using C-style backslash escapes. In fact, this is required
for newline characters, which would otherwise be interpreted as end of
command.
exported to children of the process using them. When sending messages,
all values below 32 or above 127 must be escaped using C-style
backslash escapes. This means that the over the wire protocol is
ASCII. However, any conforming reader must also accept non-ascii
characters and interpret them as UTF-8. Lines containing invalid UTF-8
escape sequences must be ignored entirely.
<pre>erase KEY
</pre>
@ -426,8 +428,7 @@ void load_or_save( int save)
}
debug( 4, L"File open on fd %d", c.fd );
sb_init( &c.input );
memset (&c.wstate, '\0', sizeof (mbstate_t));
b_init( &c.input );
q_init( &c.unsent );
if( save )
@ -609,8 +610,7 @@ int main( int argc, char ** argv )
new->next = conn;
q_init( &new->unsent );
new->killme=0;
sb_init( &new->input );
memset (&new->wstate, '\0', sizeof (mbstate_t));
b_init( &new->input );
send( new->fd, GREETING, strlen(GREETING), MSG_DONTWAIT );
enqueue_all( new );
conn=new;
@ -686,6 +686,7 @@ int main( int argc, char ** argv )
c=c->next;
}
}
if( !conn )
{
debug( 0, L"No more clients. Quitting" );