/**
 * @file
 * $Id$
 * $Revision$
 * $Author$
 * $Date$
 *
 * This file is part of The iWear Framework.
 *
 * The iWear Framework is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by the
 * Free Software Foundation as in version 2 of the License.

 * 
 * The iWear Framework is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * The iWear Framework; if not, write to the Free Software Foundation, Inc., 59
 * Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef __IWEAR_UNICODE_H
#define __IWEAR_UNICODE_H
/**
 * @file This file contains some functions for unicode/utf8 support
 */

/**
 * This converts the current character sequence from utf8 to a ucs 32Bit
 * representation. The number of characters used for it is put into the int
 * passed.
 * Conversion is done according to ISO 10646, @see
 * http://www.cl.cam.ac.uk/~mgk25/ucs/ISO-10646-UTF-8.html
 */
inline int32_t utf8_2_ucs( const char* p, int* n)
{
    int N;
    if( ! n ) n = &N; // To make it easy to not write if n is 0 so we dont need to check always
    
    unsigned char z = *p;
    if( z <= 0x7F )
    {
	*n = 1;
	return z;
    }
    else if( z >= 0xC0 && z<= 0xDF )
    {
	unsigned char y = p[1];
	if( y == '\0' ) { *n = 0; return 0; }
	int32_t r = (z-0xC0) * (1<<6) + (y-0x80);
	*n = 2;
	return r;
    }
    else if( z >= 0xE0 && z<= 0xEF )
    {
	unsigned char y = p[1];
	if( y == '\0' ) { *n = 0; return 0; }
	unsigned char x = p[2];
	if( x == '\0' ) { *n = 0; return 0; }

	int32_t r = (z-0xE0)*(1<<12) + (y-0x80)*(1<<6) + (x-0x80);
	*n = 3;
	return r;
    }
    else if( z >= 0xF0 && z<= 0xF7 )
    {
	unsigned char y = p[1];
	if( y == '\0' ) { *n = 0; return 0; }
	unsigned char x = p[2];
	if( x == '\0' ) { *n = 0; return 0; }
	unsigned char w = p[3];
	if( w == '\0' ) { *n = 0; return 0; }

	int32_t r = (z-0xF0)*(1<<18) + (y-0x80)*(1<<12) +(x-0x80)*(1<<6) + (w-0x80);
	*n = 4;
	return r;
    }
    else if( z >= 0xF8 && z<= 0xFB )
    {
	unsigned char y = p[1];
	if( y == '\0' ) { *n = 0; return 0; }
	unsigned char x = p[2];
	if( x == '\0' ) { *n = 0; return 0; }
	unsigned char w = p[3];
	if( w == '\0' ) { *n = 0; return 0; }
	unsigned char v = p[4];
	if( v == '\0' ) { *n = 0; return 0; }

	int32_t r = (z-0xF8)*(1<<24) + (y-0x80)*(1<<18) +(x-0x80)*(1<<12) + (w-0x80)*(1<<6) + (v - 0x80);
	*n = 5;
	return r;
    }
    else if( z >= 0xFC && z<= 0xFD )
    {
	unsigned char y = p[1];
	if( y == '\0' ) { *n = 0; return 0; }
	unsigned char x = p[2];
	if( x == '\0' ) { *n = 0; return 0; }
	unsigned char w = p[3];
	if( w == '\0' ) { *n = 0; return 0; }
	unsigned char v = p[4];
	if( v == '\0' ) { *n = 0; return 0; }
	unsigned char u = p[5];
	if( u == '\0' ) { *n = 0; return 0; }

	int32_t r = (z-0xFC)*(1<<30) + (y-0x80)*(1<<24) +(x-0x80)*(1<<18) + (w-0x80)*(1<<12) + (v-0x80)*(1<<6) + (u-0x80);
	*n = 6;
	return r;
    }
    else
    {
	*n = 0;
	return 0;
    }
}
/**
 * @return <0 if the character is not an alpha or >0 if it is. The absolute
 * value of the return denotes the number of bytes in the multibyte character.
 * It returns 0 if the character sequence is invalid multibyte and therefore
 * could not be properly tested.
 */
inline int isalpha_utf8( const char* p )
{
    int n;
//    int32_t ucs = utf8_2_ucs( p, &n );
    (void) utf8_2_ucs( p, &n );
    if( n )
    {
	// It is multibyte ucs, but the isalpha() seems to support only strange
	// other representation...
	unsigned int mb = 0;
	uint32_t bp = 1;
	for( int i = 0; i < n; ++ i )
	{
	    mb |= ( bp * 
		    static_cast<unsigned int>(
			static_cast<unsigned char>(p[n-i-1])));
	    bp *= 256;
	}

	if( isalpha(mb) )
	{
	    return n;
	}
	else
	{
	    return -n;
	}
    }
    else
    {
	return 0;
    }
}

#endif
