ChipMaster's trial hacks on C++CMS starting with v1.2.1. Not sure I'll follow on with the v2 since it looks to be breaking and mostly frivolous.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

317 lines
9.9 KiB

  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See
  5. // accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. #ifndef BOOSTER_SRC_LOCALE_ICU_UCONV_HPP
  9. #define BOOSTER_SRC_LOCALE_ICU_UCONV_HPP
  10. #include <unicode/unistr.h>
  11. #include <unicode/ucnv.h>
  12. #include <unicode/ustring.h>
  13. #include <unicode/utf.h>
  14. #include <unicode/utf16.h>
  15. #include <booster/locale/encoding.h>
  16. #include <string>
  17. #include <booster/auto_ptr_inc.h>
  18. #include "icu_util.h"
  19. namespace booster {
  20. namespace locale {
  21. namespace impl_icu {
  22. typedef enum {
  23. cvt_skip,
  24. cvt_stop
  25. } cpcvt_type;
  26. template<typename CharType,int char_size = sizeof(CharType) >
  27. class icu_std_converter {
  28. public:
  29. typedef CharType char_type;
  30. typedef std::basic_string<char_type> string_type;
  31. icu_std_converter(std::string charset,cpcvt_type cv=cvt_skip);
  32. icu::UnicodeString icu(char_type const *begin,char_type const *end) const;
  33. string_type std(icu::UnicodeString const &str) const;
  34. size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,size_t n,size_t from_u=0,size_t from_c=0) const;
  35. };
  36. template<typename CharType>
  37. class icu_std_converter<CharType,1> {
  38. public:
  39. typedef CharType char_type;
  40. typedef std::basic_string<char_type> string_type;
  41. icu::UnicodeString icu_checked(char_type const *vb,char_type const *ve) const
  42. {
  43. return icu(vb,ve); // Already done
  44. }
  45. icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
  46. {
  47. char const *begin=reinterpret_cast<char const *>(vb);
  48. char const *end=reinterpret_cast<char const *>(ve);
  49. uconv cvt(charset_,cvt_type_);
  50. UErrorCode err=U_ZERO_ERROR;
  51. icu::UnicodeString tmp(begin,end-begin,cvt.cvt(),err);
  52. check_and_throw_icu_error(err);
  53. return tmp;
  54. }
  55. string_type std(icu::UnicodeString const &str) const
  56. {
  57. uconv cvt(charset_,cvt_type_);
  58. return cvt.go(str.getBuffer(),str.length(),max_len_);
  59. }
  60. icu_std_converter(std::string charset,cpcvt_type cvt_type = cvt_skip) :
  61. charset_(charset),
  62. cvt_type_(cvt_type)
  63. {
  64. uconv cvt(charset_,cvt_type);
  65. max_len_=cvt.max_char_size();
  66. }
  67. size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,
  68. size_t n,size_t from_u=0,size_t from_char=0) const
  69. {
  70. size_t code_points = str.countChar32(from_u,n);
  71. uconv cvt(charset_,cvt_type_);
  72. return cvt.cut(code_points,begin+from_char,end);
  73. }
  74. struct uconv {
  75. uconv(uconv const &other);
  76. void operator=(uconv const &other);
  77. public:
  78. uconv(std::string const &charset,cpcvt_type cvt_type=cvt_skip)
  79. {
  80. UErrorCode err=U_ZERO_ERROR;
  81. cvt_ = ucnv_open(charset.c_str(),&err);
  82. if(!cvt_ || U_FAILURE(err)) {
  83. if(cvt_)
  84. ucnv_close(cvt_);
  85. throw conv::invalid_charset_error(charset);
  86. }
  87. try {
  88. if(cvt_type==cvt_skip) {
  89. ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_SKIP,0,0,0,&err);
  90. check_and_throw_icu_error(err);
  91. err=U_ZERO_ERROR;
  92. ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_SKIP,0,0,0,&err);
  93. check_and_throw_icu_error(err);
  94. }
  95. else {
  96. ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err);
  97. check_and_throw_icu_error(err);
  98. err=U_ZERO_ERROR;
  99. ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err);
  100. check_and_throw_icu_error(err);
  101. }
  102. }
  103. catch(...) { ucnv_close(cvt_) ; throw; }
  104. }
  105. int max_char_size()
  106. {
  107. return ucnv_getMaxCharSize(cvt_);
  108. }
  109. string_type go(UChar const *buf,int length,int max_size)
  110. {
  111. string_type res;
  112. res.resize(UCNV_GET_MAX_BYTES_FOR_STRING(length,max_size));
  113. char *ptr=reinterpret_cast<char *>(&res[0]);
  114. UErrorCode err=U_ZERO_ERROR;
  115. int n = ucnv_fromUChars(cvt_,ptr,res.size(),buf,length,&err);
  116. check_and_throw_icu_error(err);
  117. res.resize(n);
  118. return res;
  119. }
  120. size_t cut(size_t n,char_type const *begin,char_type const *end)
  121. {
  122. char_type const *saved = begin;
  123. while(n > 0 && begin < end) {
  124. UErrorCode err=U_ZERO_ERROR;
  125. ucnv_getNextUChar(cvt_,&begin,end,&err);
  126. if(U_FAILURE(err))
  127. return 0;
  128. n--;
  129. }
  130. return begin - saved;
  131. }
  132. UConverter *cvt() { return cvt_; }
  133. ~uconv()
  134. {
  135. ucnv_close(cvt_);
  136. }
  137. private:
  138. UConverter *cvt_;
  139. };
  140. private:
  141. int max_len_;
  142. std::string charset_;
  143. cpcvt_type cvt_type_;
  144. };
  145. template<typename CharType>
  146. class icu_std_converter<CharType,2> {
  147. public:
  148. typedef CharType char_type;
  149. typedef std::basic_string<char_type> string_type;
  150. icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
  151. {
  152. icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
  153. while(begin!=end) {
  154. UChar cl = *begin++;
  155. if(U16_IS_SINGLE(cl))
  156. tmp.append(static_cast<UChar32>(cl));
  157. else if(U16_IS_LEAD(cl)) {
  158. if(begin==end) {
  159. throw_if_needed();
  160. }
  161. else {
  162. UChar ct=*begin++;
  163. if(!U16_IS_TRAIL(ct))
  164. throw_if_needed();
  165. else {
  166. UChar32 c=U16_GET_SUPPLEMENTARY(cl,ct);
  167. tmp.append(c);
  168. }
  169. }
  170. }
  171. else
  172. throw_if_needed();
  173. }
  174. return tmp;
  175. }
  176. void throw_if_needed() const
  177. {
  178. if(mode_ == cvt_stop)
  179. throw conv::conversion_error();
  180. }
  181. icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
  182. {
  183. UChar const *begin=reinterpret_cast<UChar const *>(vb);
  184. UChar const *end=reinterpret_cast<UChar const *>(ve);
  185. icu::UnicodeString tmp(begin,end-begin);
  186. return tmp;
  187. }
  188. string_type std(icu::UnicodeString const &str) const
  189. {
  190. char_type const *ptr=reinterpret_cast<char_type const *>(str.getBuffer());
  191. return string_type(ptr,str.length());
  192. }
  193. size_t cut(icu::UnicodeString const &/*str*/,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
  194. size_t /*from_u*/=0,size_t /*from_c*/=0) const
  195. {
  196. return n;
  197. }
  198. icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
  199. mode_(mode)
  200. {
  201. }
  202. private:
  203. cpcvt_type mode_;
  204. };
  205. template<typename CharType>
  206. class icu_std_converter<CharType,4> {
  207. public:
  208. typedef CharType char_type;
  209. typedef std::basic_string<char_type> string_type;
  210. icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
  211. {
  212. icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
  213. while(begin!=end) {
  214. UChar32 c = static_cast<UChar32>(*begin++);
  215. if(U_IS_UNICODE_CHAR(c))
  216. tmp.append(c);
  217. else
  218. throw_if_needed();
  219. }
  220. return tmp;
  221. }
  222. void throw_if_needed() const
  223. {
  224. if(mode_ == cvt_stop)
  225. throw conv::conversion_error();
  226. }
  227. icu::UnicodeString icu(char_type const *begin,char_type const *end) const
  228. {
  229. icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
  230. while(begin!=end) {
  231. UChar32 c=static_cast<UChar32>(*begin++);
  232. tmp.append(c);
  233. }
  234. return tmp;
  235. }
  236. string_type std(icu::UnicodeString const &str) const
  237. {
  238. string_type tmp;
  239. tmp.resize(str.length());
  240. UChar32 *ptr=reinterpret_cast<UChar32 *>(&tmp[0]);
  241. #ifdef __SUNPRO_CC
  242. int len=0;
  243. #else
  244. ::int32_t len=0;
  245. #endif
  246. UErrorCode code=U_ZERO_ERROR;
  247. u_strToUTF32(ptr,tmp.size(),&len,str.getBuffer(),str.length(),&code);
  248. check_and_throw_icu_error(code);
  249. tmp.resize(len);
  250. return tmp;
  251. }
  252. size_t cut(icu::UnicodeString const &str,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
  253. size_t from_u=0,size_t /*from_c*/=0) const
  254. {
  255. return str.countChar32(from_u,n);
  256. }
  257. icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
  258. mode_(mode)
  259. {
  260. }
  261. private:
  262. cpcvt_type mode_;
  263. };
  264. } /// impl_icu
  265. } // locale
  266. } // boost
  267. #endif
  268. // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4