判斷字串是否為utf8編碼
參考「自动辨别文本是不是utf-8的c#程序」 改成 C code
//0000 0080-0000 07FF - 110xxxxx 10xxxxxx ( 2 octet format)
//0000 0800-0000 FFFF - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format)
BOOL IsUTF8(const char *str)
{
int i;
BYTE cOctets; // octets to go in this UTF-8 encoded character
BYTE chr;
BOOL bAllAscii= TRUE;
long iLen = strlen(str);
cOctets= 0;
for( i=0; i <iLen; i++ )
{
chr = (BYTE)str[i];
if( (chr & 0x80) != 0 ) bAllAscii= FALSE;
if( cOctets == 0 )
{
if( chr>= 0x80 )
{
do {
chr <<= 1;
cOctets++;
}
while( (chr & 0x80) != 0 );
cOctets--;
if( cOctets == 0 ) return FALSE;
}
}
else
{
if( (chr & 0xC0) != 0x80 )
return FALSE;
cOctets--;
}
}
if( cOctets> 0 )
return FALSE;
if( bAllAscii )
return FALSE;
return TRUE;
}