ChipMaster's trial hacks on C++CMS starting with v1.2.1. Not sure I'll follow on with the v2 since it looks to be breaking and mostly frivolous.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

121 lines
5.5 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) <artyomtnk@yahoo.com>
  4. //
  5. // See accompanying file COPYING.TXT file for licensing details.
  6. //
  7. ///////////////////////////////////////////////////////////////////////////////
  8. #ifndef CPPCMS_ENCODING_H
  9. #define CPPCMS_ENCODING_H
  10. #include <string>
  11. #include <map>
  12. #include <locale>
  13. #include <cppcms/defs.h>
  14. #include <cppcms/config.h>
  15. namespace cppcms {
  16. ///
  17. /// \brief this Namespace holds various function for dealing with encoding
  18. ///
  19. ///
  20. namespace encoding {
  21. /// Note: all these function assume that control characters that invalid in HTML are illegal.
  22. /// For example. NUL is legal UTF-8 code but it is illegal in terms of HTML validity thus,
  23. /// valid_utf8 would return false.
  24. ///
  25. /// Check if string in range [begin,end) is valid in the locale \a loc and does not include
  26. /// HTML illegal characters. Number of codepoints is stored in \a count
  27. ///
  28. bool CPPCMS_API valid(std::locale const &loc,char const *begin,char const *end,size_t &count);
  29. ///
  30. /// Check if string in range [begin,end) is valid UTF-8 and does not include
  31. /// HTML illegal characters. Number of codepoints is stored in \a count
  32. ///
  33. bool CPPCMS_API valid_utf8(char const *begin,char const *end,size_t &count);
  34. ///
  35. /// Check if string in range [begin,end) is valid encoding \a encoding and does not include
  36. /// HTML illegal characters. Number of codepoints is stored in \a count
  37. ///
  38. bool CPPCMS_API valid(char const *encoding,char const *begin,char const *end,size_t &count);
  39. ///
  40. /// Check if string in range [begin,end) is valid encoding \a encoding and does not include
  41. /// HTML illegal characters. Number of codepoints is stored in \a count
  42. ///
  43. bool CPPCMS_API valid(std::string const &encoding,char const *begin,char const *end,size_t &count);
  44. ///
  45. /// Returns true if ASCII is strict subset of the encoding, i.e. All non-ASCII characters
  46. /// encoding using bytes >= 0x80.
  47. ///
  48. /// This is very important for XML or HTML parsing to prevent invlaid detenction of HTML specific
  49. /// characters. So filters that work with encodings that are not ASCII compatible should convert
  50. /// the text to UTF-8 and then convert them back.
  51. ///
  52. /// These are UTF-8, ISO-8859-*, windows-12* and koi encodings families.
  53. ///
  54. ///
  55. bool CPPCMS_API is_ascii_compatible(std::string const &encoding);
  56. ///
  57. /// Check if the \a encoding is valid for the text in range [\a begin, \a end) , if it is valid,
  58. /// returns true otherwise removes all invalid characters (if replace == 0) or replaces them with \a replace
  59. /// and saves the result to \a output returning false.
  60. ///
  61. /// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-*
  62. /// and single byte windows-12XX, and koi family
  63. ///
  64. bool CPPCMS_API validate_or_filter( std::string const &encoding,
  65. char const *begin,char const *end,
  66. std::string &output,
  67. char replace = 0);
  68. ///
  69. /// Convert string in range [begin,end) from local 8 bit encoding according to locale \a loc to UTF-8
  70. /// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
  71. ///
  72. std::string CPPCMS_API to_utf8(std::locale const &loc,char const *begin,char const *end);
  73. ///
  74. /// Convert string in range [begin,end) from local 8 bit encoding \a encoding to UTF-8
  75. /// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
  76. ///
  77. std::string CPPCMS_API to_utf8(char const *encoding,char const *begin,char const *end);
  78. ///
  79. /// Convert string \a str from local 8 bit encoding according to locale \a loc to UTF-8
  80. /// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
  81. ///
  82. std::string CPPCMS_API to_utf8(std::locale const &loc,std::string const &str);
  83. ///
  84. /// Convert string \a str from local 8 bit encoding according to encoding \a encoding
  85. /// If illegal characters found, the conversion is aborted and only sucessefully converted part is returned.
  86. ///
  87. std::string CPPCMS_API to_utf8(char const *encoding,std::string const &str);
  88. ///
  89. /// Convert UTF-8 string in range [begin,end) to local 8 bit encoding according to locale \a loc.
  90. /// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
  91. ///
  92. std::string CPPCMS_API from_utf8(std::locale const &loc,char const *begin,char const *end);
  93. ///
  94. /// Convert UTF-8 string in range [begin,end) to local 8 bit encoding \a encoding.
  95. /// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
  96. ///
  97. std::string CPPCMS_API from_utf8(char const *encoding,char const *begin,char const *end);
  98. ///
  99. /// Convert UTF-8 string \a str to local 8 bit encoding according to locale \a loc.
  100. /// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
  101. ///
  102. std::string CPPCMS_API from_utf8(std::locale const &loc,std::string const &str);
  103. ///
  104. /// Convert UTF-8 string \a str to local 8 bit encoding \a encoding.
  105. /// If non-convertable characters found, the conversion is aborted and only sucessefully converted part is returned.
  106. ///
  107. std::string CPPCMS_API from_utf8(char const *encoding,std::string const &str);
  108. } // encoding
  109. } // cppcms
  110. #endif