ChipMaster's trial hacks on C++CMS starting with v1.2.1. Not sure I'll follow on with the v2 since it looks to be breaking and mostly frivolous.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

518 lines
17 KiB

  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #include <booster/locale/encoding.h>
  9. #include <booster/locale/generator.h>
  10. #include <booster/locale/localization_backend.h>
  11. #include <booster/locale/info.h>
  12. #include <booster/config.h>
  13. #include <fstream>
  14. #include "test_locale.h"
  15. #include "test_locale_tools.h"
  16. #ifndef BOOSTER_LOCALE_NO_POSIX_BACKEND
  17. # ifdef __APPLE__
  18. # include <xlocale.h>
  19. # endif
  20. # include <locale.h>
  21. #endif
  22. #if !defined(BOOSTER_LOCALE_WITH_ICU) && !defined(BOOSTER_LOCALE_WITH_ICONV) && (defined(BOOSTER_WIN_NATIVE) || defined(__CYGWIN__))
  23. #ifndef NOMINMAX
  24. # define NOMINMAX
  25. #endif
  26. #include <windows.h>
  27. #endif
  28. bool test_iso;
  29. bool test_iso_8859_8 = true;
  30. bool test_utf;
  31. bool test_sjis;
  32. std::string he_il_8bit;
  33. std::string en_us_8bit;
  34. std::string ja_jp_shiftjis;
  35. template<typename Char>
  36. std::basic_string<Char> read_file(std::basic_istream<Char> &in)
  37. {
  38. std::basic_string<Char> res;
  39. Char c;
  40. while(in.get(c))
  41. res+=c;
  42. return res;
  43. }
  44. template<typename Char>
  45. void test_ok(std::string file,std::locale const &l,std::basic_string<Char> cmp=std::basic_string<Char>())
  46. {
  47. if(cmp.empty())
  48. cmp=to<Char>(file);
  49. std::ofstream test("testi.txt");
  50. test << file;
  51. test.close();
  52. typedef std::basic_fstream<Char> stream_type;
  53. stream_type f1("testi.txt",stream_type::in);
  54. f1.imbue(l);
  55. TEST(read_file<Char>(f1) == cmp);
  56. f1.close();
  57. stream_type f2("testo.txt",stream_type::out);
  58. f2.imbue(l);
  59. f2 << cmp;
  60. f2.close();
  61. std::ifstream testo("testo.txt");
  62. TEST(read_file<char>(testo) == file);
  63. }
  64. template<typename Char>
  65. void test_rfail(std::string file,std::locale const &l,int pos)
  66. {
  67. std::ofstream test("testi.txt");
  68. test << file;
  69. test.close();
  70. typedef std::basic_fstream<Char> stream_type;
  71. stream_type f1("testi.txt",stream_type::in);
  72. f1.imbue(l);
  73. Char c;
  74. for(int i=0;i<pos;i++) {
  75. f1.get(c);
  76. if(f1.fail()) { // failed before as detected errors at forward;
  77. return;
  78. }
  79. TEST(f1);
  80. }
  81. // if the pos above suceed, at this point
  82. // it MUST fail
  83. TEST(f1.get(c).fail());
  84. }
  85. template<typename Char>
  86. void test_wfail(std::string file,std::locale const &l,int pos)
  87. {
  88. typedef std::basic_fstream<Char> stream_type;
  89. stream_type f1("testo.txt",stream_type::out);
  90. f1.imbue(l);
  91. std::basic_string<Char> out=to<Char>(file);
  92. int i;
  93. for(i=0;i<pos;i++) {
  94. f1 << out.at(i);
  95. f1<<std::flush;
  96. TEST(f1.good());
  97. }
  98. f1 << out.at(i);
  99. TEST(f1.fail() || (f1<<std::flush).fail());
  100. }
  101. template<typename Char>
  102. void test_for_char()
  103. {
  104. booster::locale::generator g;
  105. if(test_utf) {
  106. std::cout << " UTF-8" << std::endl;
  107. test_ok<Char>("grüße\nn i",g("en_US.UTF-8"));
  108. test_rfail<Char>("abc\xFF\xFF",g("en_US.UTF-8"),3);
  109. std::cout << " Testing codepoints above 0xFFFF" << std::endl;
  110. std::cout << " Single U+2008A" << std::endl;
  111. test_ok<Char>("\xf0\xa0\x82\x8a",g("en_US.UTF-8")); // U+2008A
  112. std::cout << " Single U+2008A withing text" << std::endl;
  113. test_ok<Char>("abc\"\xf0\xa0\x82\x8a\"",g("en_US.UTF-8")); // U+2008A
  114. std::string one = "\xf0\xa0\x82\x8a";
  115. std::string res;
  116. for(unsigned i=0;i<1000;i++)
  117. res+=one;
  118. std::cout << " U+2008A x 1000" << std::endl;
  119. test_ok<Char>(res.c_str(),g("en_US.UTF-8")); // U+2008A
  120. }
  121. else {
  122. std::cout << " UTF-8 Not supported " << std::endl;
  123. }
  124. if(test_iso) {
  125. if(test_iso_8859_8) {
  126. std::cout << " ISO8859-8" << std::endl;
  127. test_ok<Char>("hello \xf9\xec\xe5\xed",g(he_il_8bit),to<Char>("hello שלום"));
  128. }
  129. std::cout << " ISO8859-1" << std::endl;
  130. test_ok<Char>(to<char>("grüße\nn i"),g(en_us_8bit),to<Char>("grüße\nn i"));
  131. test_wfail<Char>("grüßen שלום",g(en_us_8bit),7);
  132. }
  133. if(test_sjis) {
  134. std::cout << " Shift-JIS" << std::endl;
  135. test_ok<Char>("\x93\xfa\x96\x7b",g(ja_jp_shiftjis),
  136. booster::locale::conv::to_utf<Char>("\xe6\x97\xa5\xe6\x9c\xac","UTF-8")); // Japan
  137. }
  138. }
  139. void test_wide_io()
  140. {
  141. std::cout << " wchar_t" << std::endl;
  142. test_for_char<wchar_t>();
  143. #if defined BOOSTER_HAS_CHAR16_T && !defined(BOOSTER_NO_CHAR16_T_CODECVT)
  144. std::cout << " char16_t" << std::endl;
  145. test_for_char<char16_t>();
  146. #endif
  147. #if defined BOOSTER_HAS_CHAR32_T && !defined(BOOSTER_NO_CHAR32_T_CODECVT)
  148. std::cout << " char32_t" << std::endl;
  149. test_for_char<char32_t>();
  150. #endif
  151. }
  152. template<typename Char>
  153. void test_pos(std::string source,std::basic_string<Char> target,std::string encoding)
  154. {
  155. using namespace booster::locale::conv;
  156. booster::locale::generator g;
  157. std::locale l= encoding == "ISO8859-8" ? g("he_IL."+encoding) : g("en_US."+encoding);
  158. TEST(to_utf<Char>(source,encoding)==target);
  159. TEST(to_utf<Char>(source.c_str(),encoding)==target);
  160. TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
  161. TEST(to_utf<Char>(source,l)==target);
  162. TEST(to_utf<Char>(source.c_str(),l)==target);
  163. TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
  164. TEST(from_utf<Char>(target,encoding)==source);
  165. TEST(from_utf<Char>(target.c_str(),encoding)==source);
  166. TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),encoding)==source);
  167. TEST(from_utf<Char>(target,l)==source);
  168. TEST(from_utf<Char>(target.c_str(),l)==source);
  169. TEST(from_utf<Char>(target.c_str(),target.c_str()+target.size(),l)==source);
  170. }
  171. #define TESTF(X) TEST_THROWS(X,booster::locale::conv::conversion_error)
  172. template<typename Char>
  173. void test_to_neg(std::string source,std::basic_string<Char> target,std::string encoding)
  174. {
  175. using namespace booster::locale::conv;
  176. booster::locale::generator g;
  177. std::locale l=g("en_US."+encoding);
  178. TEST(to_utf<Char>(source,encoding)==target);
  179. TEST(to_utf<Char>(source.c_str(),encoding)==target);
  180. TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
  181. TEST(to_utf<Char>(source,l)==target);
  182. TEST(to_utf<Char>(source.c_str(),l)==target);
  183. TEST(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
  184. TESTF(to_utf<Char>(source,encoding,stop));
  185. TESTF(to_utf<Char>(source.c_str(),encoding,stop));
  186. TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
  187. TESTF(to_utf<Char>(source,l,stop));
  188. TESTF(to_utf<Char>(source.c_str(),l,stop));
  189. TESTF(to_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
  190. }
  191. template<typename Char>
  192. void test_from_neg(std::basic_string<Char> source,std::string target,std::string encoding)
  193. {
  194. using namespace booster::locale::conv;
  195. booster::locale::generator g;
  196. std::locale l=g("en_US."+encoding);
  197. TEST(from_utf<Char>(source,encoding)==target);
  198. TEST(from_utf<Char>(source.c_str(),encoding)==target);
  199. TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding)==target);
  200. TEST(from_utf<Char>(source,l)==target);
  201. TEST(from_utf<Char>(source.c_str(),l)==target);
  202. TEST(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l)==target);
  203. TESTF(from_utf<Char>(source,encoding,stop));
  204. TESTF(from_utf<Char>(source.c_str(),encoding,stop));
  205. TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),encoding,stop));
  206. TESTF(from_utf<Char>(source,l,stop));
  207. TESTF(from_utf<Char>(source.c_str(),l,stop));
  208. TESTF(from_utf<Char>(source.c_str(),source.c_str()+source.size(),l,stop));
  209. }
  210. template<typename Char>
  211. std::basic_string<Char> utf(char const *s)
  212. {
  213. return to<Char>(s);
  214. }
  215. template<>
  216. std::basic_string<char> utf(char const *s)
  217. {
  218. return s;
  219. }
  220. template<typename Char>
  221. void test_with_0()
  222. {
  223. std::string a("abc\0\0 yz\0",3+2+3+1);
  224. TEST(booster::locale::conv::from_utf<Char>(booster::locale::conv::to_utf<Char>(a,"UTF-8"),"UTF-8") == a);
  225. TEST(booster::locale::conv::from_utf<Char>(booster::locale::conv::to_utf<Char>(a,"ISO8859-1"),"ISO8859-1") == a);
  226. }
  227. template<typename Char,int n=sizeof(Char)>
  228. struct utfutf;
  229. template<>
  230. struct utfutf<char,1> {
  231. static char const *ok() {return "grüßen";}
  232. static char const *bad() { return "gr\xFF" "üßen"; }
  233. // split into 2 to make SunCC happy
  234. };
  235. template<>
  236. struct utfutf<wchar_t,2> {
  237. static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
  238. static wchar_t const *bad() {
  239. static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xFE\xFD\xdf\x65\x6e";
  240. buf[2]=0xDC01; // second surrogate must not be
  241. buf[4]=0xD801; // First
  242. buf[5]=0xD801; // Must be surrogate trail
  243. return buf;
  244. }
  245. };
  246. template<>
  247. struct utfutf<wchar_t,4> {
  248. static wchar_t const *ok(){ return L"\x67\x72\xfc\xdf\x65\x6e"; }
  249. static wchar_t const *bad() {
  250. static wchar_t buf[256] = L"\x67\x72\xFF\xfc\xdf\x65\x6e";
  251. buf[2]=static_cast<wchar_t>(0x1000000); // > 10FFFF
  252. return buf;
  253. }
  254. };
  255. template<typename CharOut,typename CharIn>
  256. void test_combinations()
  257. {
  258. using booster::locale::conv::utf_to_utf;
  259. typedef utfutf<CharOut> out;
  260. typedef utfutf<CharIn> in;
  261. TEST( (utf_to_utf<CharOut,CharIn>(in::ok())==out::ok()) );
  262. TESTF( (utf_to_utf<CharOut,CharIn>(in::bad(),booster::locale::conv::stop)) );
  263. TEST( (utf_to_utf<CharOut,CharIn>(in::bad())==out::ok()) );
  264. }
  265. void test_all_combinations()
  266. {
  267. std::cout << "Testing utf_to_utf" << std::endl;
  268. std::cout <<" char<-char"<<std::endl;
  269. test_combinations<char,char>();
  270. std::cout <<" char<-wchar"<<std::endl;
  271. test_combinations<char,wchar_t>();
  272. std::cout <<" wchar<-char"<<std::endl;
  273. test_combinations<wchar_t,char>();
  274. std::cout <<" wchar<-wchar"<<std::endl;
  275. test_combinations<wchar_t,wchar_t>();
  276. }
  277. template<typename Char>
  278. void test_to()
  279. {
  280. test_pos<Char>(to<char>("grüßen"),utf<Char>("grüßen"),"ISO8859-1");
  281. if(test_iso_8859_8)
  282. test_pos<Char>("\xf9\xec\xe5\xed",utf<Char>("שלום"),"ISO8859-8");
  283. test_pos<Char>("grüßen",utf<Char>("grüßen"),"UTF-8");
  284. test_pos<Char>("abc\"\xf0\xa0\x82\x8a\"",utf<Char>("abc\"\xf0\xa0\x82\x8a\""),"UTF-8");
  285. test_to_neg<Char>("g\xFFrüßen",utf<Char>("grüßen"),"UTF-8");
  286. test_from_neg<Char>(utf<Char>("hello שלום"),"hello ","ISO8859-1");
  287. test_with_0<Char>();
  288. }
  289. void test_skip(char const *enc,char const *utf,char const *name,char const *opt=0)
  290. {
  291. if(opt!=0) {
  292. if(booster::locale::conv::to_utf<char>(enc,name) == opt) {
  293. test_skip(enc,opt,name);
  294. return;
  295. }
  296. }
  297. TEST(booster::locale::conv::to_utf<char>(enc,name) == utf);
  298. TEST(booster::locale::conv::to_utf<wchar_t>(enc,name) == booster::locale::conv::utf_to_utf<wchar_t>(utf));
  299. #ifdef BOOSTER_HAS_CHAR16_T
  300. TEST(booster::locale::conv::to_utf<char16_t>(enc,name) == booster::locale::conv::utf_to_utf<char16_t>(utf));
  301. #endif
  302. #ifdef BOOSTER_HAS_CHAR32_T
  303. TEST(booster::locale::conv::to_utf<char32_t>(enc,name) == booster::locale::conv::utf_to_utf<char32_t>(utf));
  304. #endif
  305. }
  306. void test_simple_conversions()
  307. {
  308. namespace blc=booster::locale::conv;
  309. std::cout << "- Testing correct invalid bytes skipping" << std::endl;
  310. try {
  311. std::cout << "-- ISO-8859-8" << std::endl;
  312. test_skip("test \xE0\xE1\xFB-","test \xd7\x90\xd7\x91-","ISO-8859-8");
  313. test_skip("\xFB","","ISO-8859-8");
  314. test_skip("test \xE0\xE1\xFB","test \xd7\x90\xd7\x91","ISO-8859-8");
  315. test_skip("\xFB-","-","ISO-8859-8");
  316. }
  317. catch(blc::invalid_charset_error const &) {
  318. std::cout <<"--- not supported" << std::endl;
  319. }
  320. try {
  321. std::cout << "-- cp932" << std::endl;
  322. test_skip("test\xE0\xA0 \x83\xF8-","test\xe7\x87\xbf -","cp932","test\xe7\x87\xbf ");
  323. test_skip("\x83\xF8","","cp932");
  324. test_skip("test\xE0\xA0 \x83\xF8","test\xe7\x87\xbf ","cp932");
  325. test_skip("\x83\xF8-","-","cp932","");
  326. }
  327. catch(blc::invalid_charset_error const &) {
  328. std::cout <<"--- not supported" << std::endl;
  329. }
  330. }
  331. int main()
  332. {
  333. try {
  334. std::vector<std::string> def;
  335. #ifdef BOOSTER_LOCALE_WITH_ICU
  336. def.push_back("icu");
  337. #endif
  338. #ifndef BOOSTER_LOCALE_NO_STD_BACKEND
  339. def.push_back("std");
  340. #endif
  341. #ifndef BOOSTER_LOCALE_NO_WINAPI_BACKEND
  342. def.push_back("winapi");
  343. #endif
  344. #ifndef BOOSTER_LOCALE_NO_POSIX_BACKEND
  345. def.push_back("posix");
  346. #endif
  347. #if !defined(BOOSTER_LOCALE_WITH_ICU) && !defined(BOOSTER_LOCALE_WITH_ICONV) && (defined(BOOSTER_WIN_NATIVE) || defined(__CYGWIN__))
  348. test_iso_8859_8 = IsValidCodePage(28598)!=0;
  349. #endif
  350. test_simple_conversions();
  351. for(int type = 0; type < int(def.size()); type ++ ) {
  352. booster::locale::localization_backend_manager tmp_backend = booster::locale::localization_backend_manager::global();
  353. tmp_backend.select(def[type]);
  354. booster::locale::localization_backend_manager::global(tmp_backend);
  355. std::string bname = def[type];
  356. if(bname=="std") {
  357. en_us_8bit = get_std_name("en_US.ISO8859-1");
  358. he_il_8bit = get_std_name("he_IL.ISO8859-8");
  359. ja_jp_shiftjis = get_std_name("ja_JP.SJIS");
  360. if(!test_std_supports_SJIS_codecvt(ja_jp_shiftjis))
  361. {
  362. std::cout << "Warning: detected unproper support of " << ja_jp_shiftjis << " locale, disableling it" << std::endl;
  363. ja_jp_shiftjis = "";
  364. }
  365. }
  366. else {
  367. en_us_8bit = "en_US.ISO8859-1";
  368. he_il_8bit = "he_IL.ISO8859-8";
  369. ja_jp_shiftjis = "ja_JP.SJIS";
  370. }
  371. std::cout << "Testing for backend " << def[type] << std::endl;
  372. test_iso = true;
  373. if(bname=="std" && (he_il_8bit.empty() || en_us_8bit.empty())) {
  374. std::cout << "no iso locales availible, passing" << std::endl;
  375. test_iso = false;
  376. }
  377. test_sjis = true;
  378. if(bname=="std" && ja_jp_shiftjis.empty()) {
  379. test_sjis = false;
  380. }
  381. if(bname=="winapi") {
  382. test_iso = false;
  383. test_sjis = false;
  384. }
  385. test_utf = true;
  386. #ifndef BOOSTER_LOCALE_NO_POSIX_BACKEND
  387. if(bname=="posix") {
  388. {
  389. locale_t l = newlocale(LC_ALL_MASK,he_il_8bit.c_str(),0);
  390. if(!l)
  391. test_iso = false;
  392. else
  393. freelocale(l);
  394. }
  395. {
  396. locale_t l = newlocale(LC_ALL_MASK,en_us_8bit.c_str(),0);
  397. if(!l)
  398. test_iso = false;
  399. else
  400. freelocale(l);
  401. }
  402. {
  403. locale_t l = newlocale(LC_ALL_MASK,"en_US.UTF-8",0);
  404. if(!l)
  405. test_utf = false;
  406. else
  407. freelocale(l);
  408. }
  409. #ifdef BOOSTER_LOCALE_WITH_ICONV
  410. {
  411. locale_t l = newlocale(LC_ALL_MASK,ja_jp_shiftjis.c_str(),0);
  412. if(!l)
  413. test_sjis = false;
  414. else
  415. freelocale(l);
  416. }
  417. #else
  418. test_sjis = false;
  419. #endif
  420. }
  421. #endif
  422. if(def[type]=="std" && (get_std_name("en_US.UTF-8").empty() || get_std_name("he_IL.UTF-8").empty()))
  423. {
  424. test_utf = false;
  425. }
  426. std::cout << "Testing wide I/O" << std::endl;
  427. test_wide_io();
  428. std::cout << "Testing charset to/from UTF conversion functions" << std::endl;
  429. std::cout << " char" << std::endl;
  430. test_to<char>();
  431. std::cout << " wchar_t" << std::endl;
  432. test_to<wchar_t>();
  433. #ifdef BOOSTER_HAS_CHAR16_T
  434. if(bname == "icu" || bname == "std") {
  435. std::cout << " char16_t" << std::endl;
  436. test_to<char16_t>();
  437. }
  438. #endif
  439. #ifdef BOOSTER_HAS_CHAR32_T
  440. if(bname == "icu" || bname == "std") {
  441. std::cout << " char32_t" << std::endl;
  442. test_to<char32_t>();
  443. }
  444. #endif
  445. test_all_combinations();
  446. }
  447. }
  448. catch(std::exception const &e) {
  449. std::cerr << "Failed " << e.what() << std::endl;
  450. return EXIT_FAILURE;
  451. }
  452. FINALIZE();
  453. }
  454. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  455. // boostinspect:noascii