Don't accept out-of-range Unicode codepoints in function unicode_utf8_char_length either

git-svn-id: svn+ssh://svn.gna.org/svn/warzone/trunk@5915 4a71c877-e1ca-e34f-864e-861f7616d084
master
Giel van Schijndel 2008-09-01 19:16:00 +00:00
parent aabf08d6fa
commit 99fe22e0a4
1 changed files with 9 additions and 7 deletions

View File

@ -136,14 +136,16 @@ static size_t unicode_utf8_char_length(const utf_32_char unicode_char)
return 2; // stores 11 bits
else if (unicode_char < 0x00010000)
return 3; // stores 16 bits
else if (unicode_char < 0x00200000)
/* This encoder can deal with < 0x00200000, but Unicode only ranges
* from 0x0 to 0x10FFFF. Thus we don't accept anything else.
*/
else if (unicode_char < 0x00110000)
return 4; // stores 21 bits
else if (unicode_char < 0x04000000)
return 5; // stores 26 bits
else if (unicode_char < 0x80000000)
return 6; // stores 31 bits
else // if (unicode_char < 0x1000000000)
return 7; // stores 36 bits
else
/* Apparently this character lies outside the 0x0 - 0x10FFFF
* Unicode range, so don't accept it.
*/
ASSERT(!"out-of-range Unicode codepoint", "This Unicode codepoint is too large (%u > 0x10FFFF) to be a valid Unicode codepoint", (unsigned int)unicode_char);
}
size_t utf32_utf8_buffer_length(const utf_32_char* unicode_string)