Add a function for UTF-8 encoding
git-svn-id: https://warzone2100.svn.sourceforge.net/svnroot/warzone2100/trunk@7394 4a71c877-e1ca-e34f-864e-861f7616d084master
parent
e5e90e5121
commit
aaa1efe4bb
|
@ -110,6 +110,90 @@ utf_32_char UTF8DecodeChar(const char *utf8_char, const char **next_char)
|
|||
return decoded;
|
||||
}
|
||||
|
||||
char* UTF8EncodeChar(char utf8_char[MAX_UTF8_LEN], utf_32_char c)
|
||||
{
|
||||
char* curOutPos = utf8_char;
|
||||
|
||||
// 7 bits
|
||||
if (c < 0x00000080)
|
||||
{
|
||||
*(curOutPos++) = c;
|
||||
}
|
||||
// 11 bits
|
||||
else if (c < 0x00000800)
|
||||
{
|
||||
// 0xc0 provides the counting bits: 110
|
||||
// then append the 5 most significant bits
|
||||
*(curOutPos++) = 0xc0 | (c >> 6);
|
||||
// Put the next 6 bits in a byte of their own
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
// 16 bits
|
||||
else if (c < 0x00010000)
|
||||
{
|
||||
// 0xe0 provides the counting bits: 1110
|
||||
// then append the 4 most significant bits
|
||||
*(curOutPos++) = 0xe0 | (c >> 12);
|
||||
// Put the next 12 bits in two bytes of their own
|
||||
*(curOutPos++) = 0x80 | ((c >> 6) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
// 21 bits
|
||||
else if (c < 0x00200000)
|
||||
{
|
||||
// 0xf0 provides the counting bits: 11110
|
||||
// then append the 3 most significant bits
|
||||
*(curOutPos++) = 0xf0 | (c >> 18);
|
||||
// Put the next 18 bits in three bytes of their own
|
||||
*(curOutPos++) = 0x80 | ((c >> 12) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 6) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
// 26 bits
|
||||
else if (c < 0x04000000)
|
||||
{
|
||||
// 0xf8 provides the counting bits: 111110
|
||||
// then append the 2 most significant bits
|
||||
*(curOutPos++) = 0xf8 | (c >> 24 );
|
||||
// Put the next 24 bits in four bytes of their own
|
||||
*(curOutPos++) = 0x80 | ((c >> 18) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 12) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 6) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
// 31 bits
|
||||
else if (c < 0x80000000)
|
||||
{
|
||||
// 0xfc provides the counting bits: 1111110
|
||||
// then append the 1 most significant bit
|
||||
*(curOutPos++) = 0xfc | (c >> 30);
|
||||
// Put the next 30 bits in five bytes of their own
|
||||
*(curOutPos++) = 0x80 | ((c >> 24) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 18) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 12) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 6) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
// 36 bits
|
||||
else
|
||||
{
|
||||
// 0xfe provides the counting bits: 11111110
|
||||
*(curOutPos++) = 0xfe;
|
||||
// Put the next 36 bits in six bytes of their own
|
||||
*(curOutPos++) = 0x80 | ((c >> 30) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 24) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 18) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 12) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | ((c >> 6) & 0x3f);
|
||||
*(curOutPos++) = 0x80 | (c & 0x3f);
|
||||
}
|
||||
|
||||
// NUL terminate the string
|
||||
*curOutPos = '\0';
|
||||
|
||||
return utf8_char;
|
||||
}
|
||||
|
||||
size_t UTF8CharacterCount(const char *utf8_string)
|
||||
{
|
||||
size_t length = 0;
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
|
||||
#include "types.h"
|
||||
|
||||
/**
|
||||
* The maximum size (in octets) a single UTF-8 encoded codepoint can use.
|
||||
*/
|
||||
#define MAX_UTF8_LEN 8
|
||||
|
||||
/** Used to store a UTF-32 character in
|
||||
*/
|
||||
typedef uint32_t utf_32_char;
|
||||
|
@ -61,6 +66,14 @@ utf_32_char UTF16DecodeChar(const utf_16_char *utf16_char, const utf_16_char **n
|
|||
*/
|
||||
utf_32_char UTF8DecodeChar(const char *utf8_char, const char **next_char);
|
||||
|
||||
/** Encode a single Unicode character as UTF-8.
|
||||
* \param[out] utf8_char Points to a character buffer at least \c MAX_UTF8_LEN octects large. Will be used to store a UTF-8 encoded version of \c.
|
||||
* \param c The Unicode character to encode.
|
||||
*
|
||||
* \return \c utf8_char on success, NULL otherwise.
|
||||
*/
|
||||
char* UTF8EncodeChar(char utf8_char[MAX_UTF8_LEN], utf_32_char c);
|
||||
|
||||
/** Determines the amount of unicode codepoints in a UTF-8 encoded string
|
||||
* \param utf8_string the UTF-8 encoded string to count
|
||||
* \return the amount of codepoints found in the UTF-8 string
|
||||
|
|
Loading…
Reference in New Issue