Add fallback implementations of wcstok, putwc and getwc. Move all fallbacks from common.c to wutil.c.

darcs-hash:20060120142721-ac50b-4b9850d889e2210e1d545339e29dcc038a3f2b04.gz
2025-03-23 03:05:13 +08:00 · 2006-01-21 00:27:21 +10:00 · 2006-01-21 00:27:21 +10:00 · a47065f648
commit a47065f648
parent 70351b0e4f
12 changed files with 431 additions and 241 deletions
--- a/builtin_commandline.c
+++ b/builtin_commandline.c
@ -13,6 +13,7 @@ Functions used for implementing the commandline builtin.

 #include "config.h"
 #include "util.h"
+#include "wutil.h"
 #include "builtin.h"
 #include "common.h"
 #include "wgetopt.h"
--- a/builtin_set.c
+++ b/builtin_set.c
@ -13,6 +13,7 @@ Functions used for implementing the set builtin.

 #include "config.h"
 #include "util.h"
+#include "wutil.h"
 #include "builtin.h"
 #include "env.h"
 #include "expand.h"
--- a/common.c
+++ b/common.c
@ -404,87 +404,6 @@ wchar_t **strv2wcsv( const char **in )
 }


-#ifndef HAVE_WCSNDUP
-wchar_t *wcsndup( const wchar_t *in, int c )
-{
-	c3++;
-	
-	wchar_t *res = malloc( sizeof(wchar_t)*(c+1) );
-	if( res == 0 )
-	{
-		die_mem();
-	}
-	wcsncpy( res, in, c );
-	res[c] = L'\0';	
-	return res;	
-}
-#endif
-
-long convert_digit( wchar_t d, int base )
-{
-	long res=-1;
-	if( (d <= L'9') && (d >= L'0') )
-	{
-		res = d - L'0';
-	}
-	else if( (d <= L'z') && (d >= L'a') )
-	{
-		res = d + 10 - L'a';		
-	}
-	else if( (d <= L'Z') && (d >= L'A') )
-	{
-		res = d + 10 - L'A';		
-	}
-	if( res >= base )
-	{
-		res = -1;
-	}
-	
-	return res;
-}
-
-
-long wcstol(const wchar_t *nptr, 
-			wchar_t **endptr,
-			int base)
-{
-	long long res=0;
-	int is_set=0;
-	if( base > 36 )
-	{
-		errno = EINVAL;
-		return 0;
-	}
-
-	while( 1 )
-	{
-		long nxt = convert_digit( *nptr, base );
-		if( endptr != 0 )
-			*endptr = (wchar_t *)nptr;
-		if( nxt < 0 )
-		{
-			if( !is_set )
-			{
-				errno = EINVAL;
-			}
-			return res;			
-		}
-		res = (res*base)+nxt;
-		is_set = 1;
-		if( res > LONG_MAX )
-		{
-			errno = ERANGE;
-			return LONG_MAX;
-		}
-		if( res < LONG_MIN )
-		{
-			errno = ERANGE;
-			return LONG_MIN;
-		}
-		nptr++;
-	}
-}
-
 /*$OpenBSD: strlcat.c,v 1.11 2003/06/17 21:56:24 millert Exp $*/

 /*
@ -586,75 +505,6 @@ wcslcpy(wchar_t *dst, const wchar_t *src, size_t siz)
 	/* count does not include NUL */
 }

-#ifndef HAVE_WCSDUP
-wchar_t *wcsdup( const wchar_t *in )
-{
-	size_t len=wcslen(in);
-	wchar_t *out = malloc( sizeof( wchar_t)*(len+1));
-	if( out == 0 )
-	{
-		die_mem();
-	}
-
-	memcpy( out, in, sizeof( wchar_t)*(len+1));
-	return out;
-	
-}
-#endif
-
-#ifndef HAVE_WCSLEN
-size_t wcslen(const wchar_t *in)
-{
-	const wchar_t *end=in;
-	while( *end )
-		end++;
-	return end-in;
-}
-#endif
-
-
-#ifndef HAVE_WCSCASECMP
-int wcscasecmp( const wchar_t *a, const wchar_t *b )
-{
-	if( *a == 0 )
-	{
-		return (*b==0)?0:-1;
-	}
-	else if( *b == 0 )
-	{
-		return 1;
-	}
-	int diff = towlower(*a)-towlower(*b);
-	if( diff != 0 )
-		return diff;
-	else
-		return wcscasecmp( a+1,b+1);
-}
-#endif
-
-
-#ifndef HAVE_WCSNCASECMP
-int wcsncasecmp( const wchar_t *a, const wchar_t *b, int count )
-{
-	if( count == 0 )
-		return 0;
-	
-	if( *a == 0 )
-	{
-		return (*b==0)?0:-1;
-	}
-	else if( *b == 0 )
-	{
-		return 1;
-	}
-	int diff = towlower(*a)-towlower(*b);
-	if( diff != 0 )
-		return diff;
-	else
-		return wcsncasecmp( a+1,b+1, count-1);
-}
-#endif
-
 int wcsvarname( wchar_t *str )
 {
 	while( *str )
@ -670,23 +520,6 @@ int wcsvarname( wchar_t *str )
 	
 }

-#if !HAVE_WCWIDTH
-/**
-   Return the number of columns used by a character. 
-
-   In locales without a native wcwidth, Unicode is probably so broken
-   that it isn't worth trying to implement a real wcwidth. This
-   wcwidth assumes any printing character takes up one column.
-*/
-int wcwidth( wchar_t c )
-{
-	if( c < 32 )
-		return 0;
-	if ( c == 127 )
-		return 0;
-	return 1;
-}
-#endif

 /** 
 	The glibc version of wcswidth seems to hang on some strings. fish uses this replacement.
--- a/common.h
+++ b/common.h
@ -134,31 +134,6 @@ wchar_t *wcsdupcat( const wchar_t *a, const wchar_t *b );
 */
 wchar_t *wcsdupcat2( const wchar_t *a, ... );

-/**
-   Returns a newly allocated wide character string wich is a copy of
-   the string in, but of length c or shorter. The returned string is
-   always null terminated, and the null is not included in the string
-   length.
-*/
-wchar_t *wcsndup( const wchar_t *in, int c );
-
-/**
-   Converts from wide char to digit in the specified base. If d is not
-   a valid digit in the specified base, return -1.
-*/
-long convert_digit( wchar_t d, int base );
-
-
-/**
-   Convert a wide character string to a number in the specified
-   base. This functions is the wide character string equivalent of
-   strtol. For bases of 10 or lower, 0..9 are used to represent
-   numbers. For bases below 36, a-z and A-Z are used to represent
-   numbers higher than 9. Higher bases than 36 are not supported.
-*/
-long wcstol(const wchar_t *nptr,
-			wchar_t **endptr,
-			int base);

 /**
   Appends src to string dst of size siz (unlike wcsncat, siz is the
@ -183,53 +158,12 @@ size_t wcslcat( wchar_t *dst, const wchar_t *src, size_t siz );
 */
 size_t wcslcpy( wchar_t *dst, const wchar_t *src, size_t siz );

-/**
-   Create a duplicate string. Wide string version of strdup. Will
-   automatically exit if out of memory.
-*/
-wchar_t *wcsdup(const wchar_t *in);
-
-size_t wcslen(const wchar_t *in);
-
-/**
-   Case insensitive string compare function. Wide string version of
-   strcasecmp.
-
-   This implementation of wcscasecmp does not take into account
-   esoteric locales where uppercase and lowercase do not cleanly
-   transform between each other. Hopefully this should be fine since
-   fish only uses this function with one of the strings supplied by
-   fish and guaranteed to be a sane, english word. Using wcscasecmp on
-   a user-supplied string should be considered a bug.
-*/
-int wcscasecmp( const wchar_t *a, const wchar_t *b );
-
-/**
-   Case insensitive string compare function. Wide string version of
-   strncasecmp.
-
-   This implementation of wcsncasecmp does not take into account
-   esoteric locales where uppercase and lowercase do not cleanly
-   transform between each other. Hopefully this should be fine since
-   fish only uses this function with one of the strings supplied by
-   fish and guaranteed to be a sane, english word. Using wcsncasecmp on
-   a user-supplied string should be considered a bug.
-*/
-int wcsncasecmp( const wchar_t *a, const wchar_t *b, int count );
-
 /**
   Test if the given string is a valid variable name
 */

 int wcsvarname( wchar_t *str );

-/**
-   The prototype for this function is missing in some libc
-   implementations. Fish has a fallback implementation in case the
-   implementation is missing altogether.
-*/
-int wcwidth( wchar_t c );
-

 /**
   A wcswidth workalike. Fish uses this since the regular wcswidth seems flaky.
--- a/configure.ac
+++ b/configure.ac
@ -152,7 +152,8 @@ AC_CHECK_LIB(intl, gettext)
 AC_CHECK_HEADERS([getopt.h termio.h sys/resource.h term.h ncurses/term.h libintl.h])

 # Check for various functions, and insert results into config.h
-AC_CHECK_FUNCS(wcsdup wcsndup wcslen wcscasecmp wcsncasecmp gettext wprintf futimes wcwidth wcswidth getopt_long ) 
+AC_CHECK_FUNCS( wcsdup wcsndup wcslen wcscasecmp wcsncasecmp gettext fwprintf )
+AC_CHECK_FUNCS( futimes wcwidth wcswidth getopt_long wcstok fputwc fgetwc wcstol )

 # Check again for gettext library, and insert results into the Makefile
 AC_CHECK_FUNC(gettext, AC_SUBST(HAVE_GETTEXT,1), AC_SUBST(HAVE_GETTEXT,0) )
--- a/event.c
+++ b/event.c
@ -13,6 +13,7 @@

 #include "config.h"
 #include "util.h"
+#include "wutil.h"
 #include "function.h"
 #include "proc.h"
 #include "parser.h"
--- a/function.c
+++ b/function.c
@ -9,6 +9,7 @@
 #include <signal.h>

 #include "config.h"
+#include "wutil.h"
 #include "util.h"
 #include "function.h"
 #include "proc.h"
--- a/intern.c
+++ b/intern.c
@ -11,6 +11,7 @@
 #include <unistd.h>

 #include "util.h"
+#include "wutil.h"
 #include "common.h"
 #include "intern.h"

--- a/proc.c
+++ b/proc.c
@ -650,12 +650,15 @@ int job_reap( int interactive )


 #ifdef HAVE__PROC_SELF_STAT
+
+#define FN_SIZE 256
+
 /**
   Get the CPU time for the specified process
 */
 unsigned long proc_get_jiffies( process_t *p )
 {
-	wchar_t fn[256];
+	wchar_t fn[FN_SIZE];
 	//char stat_line[1024];

 	char state;
@ -678,7 +681,7 @@ unsigned long proc_get_jiffies( process_t *p )
 	if( p->pid <= 0 )
 		return 0;
 	
-	swprintf( fn, 512, L"/proc/%d/stat", p->pid );
+	swprintf( fn, FN_SIZE, L"/proc/%d/stat", p->pid );
 	
 	FILE *f = wfopen( fn, "r" );
 	if( !f )
--- a/proc.h
+++ b/proc.h
@ -14,6 +14,7 @@
 #include <wchar.h>
 #include <signal.h>
 #include <unistd.h>
+#include <sys/time.h>

 #include "util.h"
 #include "io.h"
--- a/wutil.c
+++ b/wutil.c
@ -1,5 +1,7 @@
 /** \file wutil.c
-	Wide character equivalents of various standard unix functions. 
+	Wide character equivalents of various standard unix
+	functions. Also contains fallback implementations of a large number
+	of wide character unix functions.
 */
 #include "config.h"

@ -218,7 +220,7 @@ void wperror(const wchar_t *s)
 }


-#if !HAVE_WPRINTF
+#if !HAVE_FWPRINTF

 void pad( void (*writer)(wchar_t), int count)
 {
@ -707,3 +709,318 @@ int wprintf( const wchar_t *filter, ... )
 }

 #endif
+
+#ifndef HAVE_FGETWC
+
+wint_t fgetwc(FILE *stream)
+{
+	wchar_t res=0;
+	mbstate_t state=0;
+	memset (&state, '\0', sizeof (state));
+
+	while(1)
+	{
+		int b = fgetc( stream );
+		char bb;
+			
+		int sz;
+			
+		if( b == EOF )
+			return WEOF;
+
+		bb=b;
+			
+		sz = mbrtowc( &res, &bb, 1, &state );
+			
+		switch( sz )
+		{
+			case -1:
+				memset (&state, '\0', sizeof (state));
+				return WEOF;
+
+			case -2:
+				break;
+			case 0:
+				return 0;
+			default:
+				return res;
+		}
+	}
+
+}
+
+
+wint_t getwc(FILE *stream)
+{
+	return fgetwc( stream );
+}
+
+
+#endif
+
+#ifndef HAVE_FPUTWC
+
+wint_t fputwc(wchar_t wc, FILE *stream)
+{
+	int res;
+	char *s[MB_CUR_MAX+1];
+	memset( s, 0, MB_CUR_MAX+1 );
+	wctomb( s, wc );
+	res = fputs( sm stream );
+	return res==EOF?WEOF:wc;
+}
+
+wint_t putwc(wchar_t wc, FILE *stream)
+{
+	return fputwc( wc, stream );
+}
+
+#endif
+
+#ifndef HAVE_WCSTOK
+
+/*
+  Used by fallback wcstok. Borrowed from glibc
+*/
+static size_t wcsspn (const wchar_t *wcs,
+					  const wchar_t *accept )
+{
+	register const wchar_t *p;
+	register const wchar_t *a;
+	register size_t count = 0;
+
+	for (p = wcs; *p != L'\0'; ++p)
+    {
+		for (a = accept; *a != L'\0'; ++a)
+			if (*p == *a)
+				break;
+		
+		if (*a == L'\0')
+			return count;
+		else
+			++count;
+    }
+	return count;	
+}
+
+/*
+  Used by fallback wcstok. Borrowed from glibc
+*/
+static wchar_t *wcspbrk (const wchar_t *wcs, const wchar_t *accept)
+{
+	while (*wcs != L'\0')
+		if (wcschr (accept, *wcs) == NULL)
+			++wcs;
+		else
+			return (wchar_t *) wcs;
+	return NULL;	
+}
+
+/*
+  Fallback wcstok implementation. Borrowed from glibc.
+*/
+wchar_t *wcstok(wchar_t *wcs, const wchar_t *delim, wchar_t **ptr)
+{
+	wchar_t *result;
+
+	if (wcs == NULL)
+    {
+		if (*save_ptr == NULL)
+        {
+			errno = EINVAL;
+			return NULL;
+        }
+		else
+			wcs = *save_ptr;
+    }
+
+	/* Scan leading delimiters.  */
+	wcs += wcsspn (wcs, delim);
+	
+	if (*wcs == L'\0')
+    {
+		*save_ptr = NULL;		
+		return NULL;
+    }
+
+	/* Find the end of the token.  */
+	result = wcs;
+	
+	wcs = wcspbrk (result, delim);
+	
+	if (wcs == NULL)
+	{
+		/* This token finishes the string.  */
+		*save_ptr = NULL;
+	}
+	else
+    {
+		/* Terminate the token and make *SAVE_PTR point past it.  */
+		*wcs = L'\0';
+		*save_ptr = wcs + 1;
+    }
+	return result;
+}
+
+#endif
+
+#ifndef HAVE_WCSDUP
+wchar_t *wcsdup( const wchar_t *in )
+{
+	size_t len=wcslen(in);
+	wchar_t *out = malloc( sizeof( wchar_t)*(len+1));
+	if( out == 0 )
+	{
+		die_mem();
+	}
+
+	memcpy( out, in, sizeof( wchar_t)*(len+1));
+	return out;
+	
+}
+#endif
+
+#ifndef HAVE_WCSLEN
+size_t wcslen(const wchar_t *in)
+{
+	const wchar_t *end=in;
+	while( *end )
+		end++;
+	return end-in;
+}
+#endif
+
+
+#ifndef HAVE_WCSCASECMP
+int wcscasecmp( const wchar_t *a, const wchar_t *b )
+{
+	if( *a == 0 )
+	{
+		return (*b==0)?0:-1;
+	}
+	else if( *b == 0 )
+	{
+		return 1;
+	}
+	int diff = towlower(*a)-towlower(*b);
+	if( diff != 0 )
+		return diff;
+	else
+		return wcscasecmp( a+1,b+1);
+}
+#endif
+
+
+#ifndef HAVE_WCSNCASECMP
+int wcsncasecmp( const wchar_t *a, const wchar_t *b, int count )
+{
+	if( count == 0 )
+		return 0;
+	
+	if( *a == 0 )
+	{
+		return (*b==0)?0:-1;
+	}
+	else if( *b == 0 )
+	{
+		return 1;
+	}
+	int diff = towlower(*a)-towlower(*b);
+	if( diff != 0 )
+		return diff;
+	else
+		return wcsncasecmp( a+1,b+1, count-1);
+}
+#endif
+
+#ifndef HAVE_WCWIDTH
+int wcwidth( wchar_t c )
+{
+	if( c < 32 )
+		return 0;
+	if ( c == 127 )
+		return 0;
+	return 1;
+}
+#endif
+
+#ifndef HAVE_WCSNDUP
+wchar_t *wcsndup( const wchar_t *in, int c )
+{
+	wchar_t *res = malloc( sizeof(wchar_t)*(c+1) );
+	if( res == 0 )
+	{
+		die_mem();
+	}
+	wcsncpy( res, in, c );
+	res[c] = L'\0';	
+	return res;	
+}
+#endif
+
+long convert_digit( wchar_t d, int base )
+{
+	long res=-1;
+	if( (d <= L'9') && (d >= L'0') )
+	{
+		res = d - L'0';
+	}
+	else if( (d <= L'z') && (d >= L'a') )
+	{
+		res = d + 10 - L'a';		
+	}
+	else if( (d <= L'Z') && (d >= L'A') )
+	{
+		res = d + 10 - L'A';		
+	}
+	if( res >= base )
+	{
+		res = -1;
+	}
+	
+	return res;
+}
+
+#ifndef HAVE_WCSTOL
+long wcstol(const wchar_t *nptr, 
+			wchar_t **endptr,
+			int base)
+{
+	long long res=0;
+	int is_set=0;
+	if( base > 36 )
+	{
+		errno = EINVAL;
+		return 0;
+	}
+
+	while( 1 )
+	{
+		long nxt = convert_digit( *nptr, base );
+		if( endptr != 0 )
+			*endptr = (wchar_t *)nptr;
+		if( nxt < 0 )
+		{
+			if( !is_set )
+			{
+				errno = EINVAL;
+			}
+			return res;			
+		}
+		res = (res*base)+nxt;
+		is_set = 1;
+		if( res > LONG_MAX )
+		{
+			errno = ERANGE;
+			return LONG_MAX;
+		}
+		if( res < LONG_MIN )
+		{
+			errno = ERANGE;
+			return LONG_MIN;
+		}
+		nptr++;
+	}
+}
+#endif
+
--- a/wutil.h
+++ b/wutil.h
@ -1,8 +1,8 @@
 /** \file wutil.h

  Prototypes for wide character equivalents of various standard unix
-  functions. 
-
+  functions. Also contains fallback implementations of a large number
+  of wide character unix functions.
 */
 #ifndef FISH_WUTIL_H
 #define FISH_WUTIL_H
@ -82,7 +82,7 @@ wchar_t *wgetcwd( wchar_t *buff, size_t sz );
 int wchdir( const wchar_t * dir );


-#if !HAVE_WPRINTF
+#if !HAVE_FWPRINTF

 /**
   Print formated string. Some operating systems (Like NetBSD) do not
@ -112,7 +112,103 @@ int swprintf( wchar_t *str, size_t l, const wchar_t *format, ... );
 */
 int wprintf( const wchar_t *format, ... );

+int vwprintf( const wchar_t *filter, va_list va );
+int vfwprintf( FILE *f, const wchar_t *filter, va_list va );
+int vswprintf( wchar_t *out, size_t n, const wchar_t *filter, va_list va );

 #endif

+#ifndef HAVE_FGETWC
+
+wint_t fgetwc(FILE *stream);
+wint_t getwc(FILE *stream);
+
+#endif
+
+#ifndef HAVE_FPUTWC
+
+wint_t fputwc(wchar_t wc, FILE *stream);
+wint_t putwc(wchar_t wc, FILE *stream);
+
+#endif
+
+#ifndef HAVE_WCSTOK
+
+wchar_t *wcstok(wchar_t *wcs, const wchar_t *delim, wchar_t **ptr);
+
+#endif
+
+/**
+   Return the number of columns used by a character. This is a libc
+   function, but the prototype for this function is missing in some libc
+   implementations. 
+
+   Fish has a fallback implementation in case the implementation is
+   missing altogether.  In locales without a native wcwidth, Unicode
+   is probably so broken that it isn't worth trying to implement a
+   real wcwidth. Therefore, the fallback wcwidth assumes any printing
+   character takes up one column and anything else uses 0 columns.
+*/
+int wcwidth( wchar_t c );
+
+/**
+   Create a duplicate string. Wide string version of strdup. Will
+   automatically exit if out of memory.
+*/
+wchar_t *wcsdup(const wchar_t *in);
+
+size_t wcslen(const wchar_t *in);
+
+/**
+   Case insensitive string compare function. Wide string version of
+   strcasecmp.
+
+   This implementation of wcscasecmp does not take into account
+   esoteric locales where uppercase and lowercase do not cleanly
+   transform between each other. Hopefully this should be fine since
+   fish only uses this function with one of the strings supplied by
+   fish and guaranteed to be a sane, english word. Using wcscasecmp on
+   a user-supplied string should be considered a bug.
+*/
+int wcscasecmp( const wchar_t *a, const wchar_t *b );
+
+/**
+   Case insensitive string compare function. Wide string version of
+   strncasecmp.
+
+   This implementation of wcsncasecmp does not take into account
+   esoteric locales where uppercase and lowercase do not cleanly
+   transform between each other. Hopefully this should be fine since
+   fish only uses this function with one of the strings supplied by
+   fish and guaranteed to be a sane, english word. Using wcsncasecmp on
+   a user-supplied string should be considered a bug.
+*/
+int wcsncasecmp( const wchar_t *a, const wchar_t *b, int count );
+
+/**
+   Returns a newly allocated wide character string wich is a copy of
+   the string in, but of length c or shorter. The returned string is
+   always null terminated, and the null is not included in the string
+   length.
+*/
+wchar_t *wcsndup( const wchar_t *in, int c );
+
+/**
+   Converts from wide char to digit in the specified base. If d is not
+   a valid digit in the specified base, return -1.
+*/
+long convert_digit( wchar_t d, int base );
+
+/**
+   Fallback implementation. Convert a wide character string to a
+   number in the specified base. This functions is the wide character
+   string equivalent of strtol. For bases of 10 or lower, 0..9 are
+   used to represent numbers. For bases below 36, a-z and A-Z are used
+   to represent numbers higher than 9. Higher bases than 36 are not
+   supported.
+*/
+long wcstol(const wchar_t *nptr,
+			wchar_t **endptr,
+			int base);
+
 #endif