@@ -219,16 +219,22 @@ namespace utf { | |||||
if(BOOSTER_LOCALE_UNLIKELY(p==e)) | if(BOOSTER_LOCALE_UNLIKELY(p==e)) | ||||
return incomplete; | return incomplete; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
case 2: | case 2: | ||||
if(BOOSTER_LOCALE_UNLIKELY(p==e)) | if(BOOSTER_LOCALE_UNLIKELY(p==e)) | ||||
return incomplete; | return incomplete; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
case 1: | case 1: | ||||
if(BOOSTER_LOCALE_UNLIKELY(p==e)) | if(BOOSTER_LOCALE_UNLIKELY(p==e)) | ||||
return incomplete; | return incomplete; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
} | } | ||||
@@ -140,6 +140,20 @@ int main() | |||||
TEST_TO("\xf8\x90\x80\x80\x80",illegal); // 400 0000 | TEST_TO("\xf8\x90\x80\x80\x80",illegal); // 400 0000 | ||||
TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf",illegal); // 7fff ffff | TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf",illegal); // 7fff ffff | ||||
std::cout << "-- Invalid trail" << std::endl; | |||||
TEST_TO("\xC2\x7F",illegal); | |||||
TEST_TO("\xdf\x7F",illegal); | |||||
TEST_TO("\xe0\x7F\x80",illegal); | |||||
TEST_TO("\xef\xbf\x7F",illegal); | |||||
TEST_TO("\xe0\x7F\x80",illegal); | |||||
TEST_TO("\xef\xbf\x7F",illegal); | |||||
TEST_TO("\xf0\x7F\x80\x80",illegal); | |||||
TEST_TO("\xf4\x7f\xbf\xbf",illegal); | |||||
TEST_TO("\xf0\x90\x7F\x80",illegal); | |||||
TEST_TO("\xf4\x8f\x7F\xbf",illegal); | |||||
TEST_TO("\xf0\x90\x80\x7F",illegal); | |||||
TEST_TO("\xf4\x8f\xbf\x7F",illegal); | |||||
std::cout << "-- Invalid length" << std::endl; | std::cout << "-- Invalid length" << std::endl; | ||||
/// Test that this actually works | /// Test that this actually works | ||||
@@ -25,7 +25,12 @@ namespace utf { | |||||
} | } | ||||
namespace utf8 { | namespace utf8 { | ||||
inline int trail_length(unsigned char c) | |||||
inline bool is_trail(char ci) | |||||
{ | |||||
unsigned char c=ci; | |||||
return (c & 0xC0)==0x80; | |||||
} | |||||
inline int trail_length(unsigned char c) | |||||
{ | { | ||||
if(c < 128) | if(c < 128) | ||||
return 0; | return 0; | ||||
@@ -91,16 +96,22 @@ namespace utf8 { | |||||
if(p==e) | if(p==e) | ||||
return illegal; | return illegal; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
case 2: | case 2: | ||||
if(p==e) | if(p==e) | ||||
return illegal; | return illegal; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
case 1: | case 1: | ||||
if(p==e) | if(p==e) | ||||
return illegal; | return illegal; | ||||
tmp = *p++; | tmp = *p++; | ||||
if (!is_trail(tmp)) | |||||
return illegal; | |||||
c = (c << 6) | ( tmp & 0x3F); | c = (c << 6) | ( tmp & 0x3F); | ||||
} | } | ||||
@@ -127,6 +127,10 @@ test_valid('text5','/text','_1=%d7%a9%d6%b8%d7%9c%d7%95%d7%9d','valid\nשָלו | |||||
test_valid('text6','/text','_1=%d7%a9%d7%9c','valid\nשל') | test_valid('text6','/text','_1=%d7%a9%d7%9c','valid\nשל') | ||||
test_valid('text7','/text','_1=%FF%FF','invalid\n\xFF\xFF') | test_valid('text7','/text','_1=%FF%FF','invalid\n\xFF\xFF') | ||||
test_valid('text8','/text','_1=%01%01','invalid\n\x01\x01') | test_valid('text8','/text','_1=%01%01','invalid\n\x01\x01') | ||||
test_valid('text9.1','/text','_1=xx%DF%7F','invalid\nxx\xDF\x7F') | |||||
test_valid('text9.2','/text','_1=xx%C2%7F','invalid\nxx\xC2\x7F') | |||||
test_valid('text9.3','/text','_1=xx%e0%7F%80','invalid\nxx\xe0\x7F\x80') | |||||
test_valid('text9.4','/text','_1=xx%f0%7F%80%80','invalid\nxx\xf0\x7F\x80\x80') | |||||
test_valid('number','/number','_1=','invalid\n') | test_valid('number','/number','_1=','invalid\n') | ||||
test_valid('number1','/number','_1=10','valid\n10') | test_valid('number1','/number','_1=10','valid\n10') | ||||