diff --git a/booster/booster/locale/utf.h b/booster/booster/locale/utf.h index 069cd19..c7dc024 100644 --- a/booster/booster/locale/utf.h +++ b/booster/booster/locale/utf.h @@ -219,16 +219,22 @@ namespace utf { if(BOOSTER_LOCALE_UNLIKELY(p==e)) return incomplete; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); case 2: if(BOOSTER_LOCALE_UNLIKELY(p==e)) return incomplete; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); case 1: if(BOOSTER_LOCALE_UNLIKELY(p==e)) return incomplete; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); } diff --git a/booster/lib/locale/test/test_codepage_converter.cpp b/booster/lib/locale/test/test_codepage_converter.cpp index aabab73..d48b5bd 100644 --- a/booster/lib/locale/test/test_codepage_converter.cpp +++ b/booster/lib/locale/test/test_codepage_converter.cpp @@ -140,6 +140,20 @@ int main() TEST_TO("\xf8\x90\x80\x80\x80",illegal); // 400 0000 TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf",illegal); // 7fff ffff + std::cout << "-- Invalid trail" << std::endl; + TEST_TO("\xC2\x7F",illegal); + TEST_TO("\xdf\x7F",illegal); + TEST_TO("\xe0\x7F\x80",illegal); + TEST_TO("\xef\xbf\x7F",illegal); + TEST_TO("\xe0\x7F\x80",illegal); + TEST_TO("\xef\xbf\x7F",illegal); + TEST_TO("\xf0\x7F\x80\x80",illegal); + TEST_TO("\xf4\x7f\xbf\xbf",illegal); + TEST_TO("\xf0\x90\x7F\x80",illegal); + TEST_TO("\xf4\x8f\x7F\xbf",illegal); + TEST_TO("\xf0\x90\x80\x7F",illegal); + TEST_TO("\xf4\x8f\xbf\x7F",illegal); + std::cout << "-- Invalid length" << std::endl; /// Test that this actually works diff --git a/private/utf_iterator.h b/private/utf_iterator.h index f15c4eb..f8205b3 100644 --- a/private/utf_iterator.h +++ b/private/utf_iterator.h @@ -25,7 +25,12 @@ namespace utf { } namespace utf8 { - inline int trail_length(unsigned char c) + inline bool is_trail(char ci) + { + unsigned char c=ci; + return (c & 0xC0)==0x80; + } + inline int trail_length(unsigned char c) { if(c < 128) return 0; @@ -91,16 +96,22 @@ namespace utf8 { if(p==e) return illegal; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); case 2: if(p==e) return illegal; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); case 1: if(p==e) return illegal; tmp = *p++; + if (!is_trail(tmp)) + return illegal; c = (c << 6) | ( tmp & 0x3F); } diff --git a/tests/form_test.py b/tests/form_test.py index 2f83bf6..c85c524 100755 --- a/tests/form_test.py +++ b/tests/form_test.py @@ -127,6 +127,10 @@ test_valid('text5','/text','_1=%d7%a9%d6%b8%d7%9c%d7%95%d7%9d','valid\nשָלו test_valid('text6','/text','_1=%d7%a9%d7%9c','valid\nשל') test_valid('text7','/text','_1=%FF%FF','invalid\n\xFF\xFF') test_valid('text8','/text','_1=%01%01','invalid\n\x01\x01') +test_valid('text9.1','/text','_1=xx%DF%7F','invalid\nxx\xDF\x7F') +test_valid('text9.2','/text','_1=xx%C2%7F','invalid\nxx\xC2\x7F') +test_valid('text9.3','/text','_1=xx%e0%7F%80','invalid\nxx\xe0\x7F\x80') +test_valid('text9.4','/text','_1=xx%f0%7F%80%80','invalid\nxx\xf0\x7F\x80\x80') test_valid('number','/number','_1=','invalid\n') test_valid('number1','/number','_1=10','valid\n10')