ChipMaster's trial hacks on C++CMS starting with v1.2.1. Not sure I'll follow on with the v2 since it looks to be breaking and mostly frivolous.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

460 lines
17 KiB

  1. ///////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) <artyomtnk@yahoo.com>
  4. //
  5. // See accompanying file COPYING.TXT file for licensing details.
  6. //
  7. ///////////////////////////////////////////////////////////////////////////////
  8. #ifndef CPPCMS_XSS_H
  9. #define CPPCMS_XSS_H
  10. #include <booster/copy_ptr.h>
  11. #include <booster/regex.h>
  12. #include <booster/function.h>
  13. #include <cppcms/defs.h>
  14. #include <string.h>
  15. #include <string>
  16. #include <algorithm>
  17. namespace cppcms {
  18. namespace json {
  19. class value;
  20. }
  21. ///
  22. /// \brief Namespace that holds Anti-Cross Site Scripting Filter support
  23. ///
  24. /// The classes in this namespace created to provide a filtering for a save
  25. /// handing of HTML and preventing XSS attacks
  26. ///
  27. namespace xss {
  28. /// \cond INTERNAL
  29. namespace details {
  30. class c_string;
  31. }
  32. struct basic_rules_holder;
  33. /// \endcond
  34. ///
  35. /// \brief The class that holds XSS filter rules
  36. ///
  37. /// This is the major class the defines the white list rules to handle the
  38. /// Correct HTML input.
  39. ///
  40. /// When using these rules you should be very strict about what you need and what you
  41. /// allow.
  42. ///
  43. /// Basically you need to specify:
  44. ///
  45. /// -# The XHTML or HTML parsing rules - should be done first
  46. /// -# The encoding of the text. If you do not specify the encoding
  47. /// it would be assumed that it is ASCII compatible.
  48. /// You may not specify encoding only if you know that it was validated
  49. /// for example by using widgets::text, otherwise \b always specify
  50. /// encoding
  51. /// -# Provide the list of tags that should be used. Specify only thous you need.
  52. /// .
  53. /// Never allow tags like style, object, embed or of course script as they can be easily
  54. /// used for XSS attacks
  55. /// -# Provide essential HTML attributes - properties for tags you need.
  56. /// Use add_uri_property for links like src for img or href for a.
  57. /// It would check correctness of URI syntax and ensure that only white-listed
  58. /// schemas are allowed (i.e. no javascript would be allowed).
  59. /// .
  60. /// Never allow style tags unless you specify very strict white list of really used
  61. /// styles. Styles can be easily exploited for both XSS and click-jacking. For example
  62. /// \code
  63. /// <p style="width: expression(alert('XSS'));"></p>
  64. /// \endcode
  65. /// .
  66. /// If you want to use styles specify very strict list of things you need like:
  67. /// \code
  68. /// add_property("p","style",booster::regex("text-align:(left|right|center)"));
  69. /// \endcode
  70. /// -# Do not allow comments unless you need them. Note not all comments are allowed. Comments
  71. /// containing "<", ">" or "&" would be considered invalid as some exploits use them.
  72. ///
  73. /// Remember more strict you are it is harder to make attack. Read about XSS, see existing
  74. /// attacks to understand how they work and then decide what you allow.
  75. ///
  76. /// rules class can be treated as value for thread safe access, i.e. you can safely use
  77. /// const reference and const member functions as long as you don't change the rules
  78. /// under the hood.
  79. ///
  80. /// The simplest way: define at application startup some global rules object configure
  81. /// it and use it for filtering and validation - and make your attackers cry :-).
  82. ///
  83. ///
  84. class CPPCMS_API rules {
  85. public:
  86. rules();
  87. rules(rules const &);
  88. rules const &operator=(rules const &);
  89. ~rules();
  90. /// Create rules from JSON object \a r
  91. ///
  92. /// The json object the defines the XSS prevention rules.
  93. /// This object has following properties:
  94. ///
  95. /// - "xhtml" - boolean; default true - use XHTML (true) or HTML input
  96. /// - "comments" - boolean; setting it to true allows comments, default false
  97. /// - "numeric_entities" - boolean; setting it to true allows numeric_entities, default false
  98. /// - "entities" - array of strings: list of allowed HTML entities besides lt, gt and amp
  99. /// - "encoding" - string; the encoding of the text to validate, by default not checked and the
  100. /// input is assumed to be ASCII compatible. Always specifiy it for multibyte encodings
  101. /// like Shift-JIS or GBK as they are not ASCII compatible.
  102. /// - "tags" - object with 3 properties of type array of string:
  103. /// - "opening_and_closing" - the tags that should come in pair like "<b></b>"
  104. /// - "stand_alone" - the tags that should appear stand alone like "<br/>"
  105. /// - "any_tag" - the tags that can be both like "<input>"
  106. /// - "attributes" - array of objects that define HTML attributes. Each object consists
  107. /// of following properties:
  108. /// - "type" - string - the type of the attribute one of: "boolean", "uri", "relative_uri",
  109. /// "absolute_uri", "integer", "regex".
  110. /// - "scheme" - string the allowed URI scheme - regular expression like "(http|ftp)". Used with
  111. /// "uri" and "absolute_uri" type
  112. /// - "expression" - string the regular expression that defines the value that the attribute
  113. /// should match.
  114. /// - "tags" - array of strings - list of tags that this attribute is allowed for.
  115. /// - "attributes" - array of strings - lisf of names of the attribute
  116. /// - "pairs" - array of objects that consists of two properities "tag" and "attr" of
  117. /// type string that define tag and attributed that such type of property
  118. /// should be allowed for.
  119. ///
  120. /// The extra properties that are not defined by this scheme are ingored
  121. ///
  122. /// For example:
  123. /// \code
  124. /// {
  125. /// "xhtml" : true,
  126. /// "encoding" : "UTF-8",
  127. /// "entities" : [ "nbsp" , "copy" ],
  128. /// "comments" : false,
  129. /// "numeric_entities" : false,
  130. /// "tags" : {
  131. /// "opening_and_closing" : [
  132. /// "p", "b", "i", "tt",
  133. /// "a",
  134. /// "strong", "em",
  135. /// "sub", "sup",
  136. /// "ol", "ul", "li",
  137. /// "dd", "dt", "dl",
  138. /// "blockquote","code", "pre",
  139. /// "span", "div"
  140. /// ],
  141. /// "stand_alone" : [ "br", "hr", "img" ]
  142. /// ],
  143. /// "attributes": [
  144. /// {
  145. /// "tags" : [ "p", "li", "ul" ]
  146. /// "attr" : [ "style" ],
  147. /// "type" : "regex",
  148. /// "expression" : "\\s*text-algin:\\s*(center|left|right|justify);?\\s*"
  149. /// },
  150. /// {
  151. /// "tags" : [ "span", "div" ]
  152. /// "attr" : [ "class", "id" ],
  153. /// "type" : "regex",
  154. /// "expression" : "[a-zA-Z_0-9]+"
  155. /// },
  156. /// {
  157. /// "pairs" : [
  158. /// { "tag" : "a", "attr" : "href" },
  159. /// { "tag" : "img", "attr" : "src" }
  160. /// ],
  161. /// "type" : "absolute_uri",
  162. /// "scheme" : "(http|https|ftp)"
  163. /// },
  164. /// {
  165. /// "tags" : [ "img" ],
  166. /// "attr" : [ "alt" ],
  167. /// "type" : "regex",
  168. /// "expression" : ".*"
  169. /// }
  170. /// ]
  171. /// }
  172. /// \endcode
  173. ///
  174. rules(json::value const &r);
  175. ///
  176. /// Create rules from the JSON object stored in the file \a file_name
  177. ///
  178. /// \see rules(json::value const&)
  179. ///
  180. rules(std::string const &file_name);
  181. ///
  182. /// How to treat in input
  183. ///
  184. typedef enum {
  185. xhtml_input, ///< Assume that the input is XHTML
  186. html_input ///< Assume that the input is HTML
  187. } html_type;
  188. ///
  189. /// The type of tag
  190. ///
  191. typedef enum {
  192. invalid_tag = 0, ///< This tag is invalid (returned by validate)
  193. opening_and_closing = 1, ///< This tag should be opened and closed like em , or strong
  194. stand_alone = 2, ///< This tag should stand alone (like hr or br)
  195. any_tag = 3, ///< This tag can be used in both roles (like input)
  196. } tag_type;
  197. ///
  198. /// Get how to treat input - HTML or XHTML
  199. ///
  200. html_type html() const;
  201. ///
  202. /// Set how to treat input - HTML or XHTML, it should be called first before you add any other
  203. /// rules
  204. ///
  205. void html(html_type t);
  206. ///
  207. /// Add the tag that should be allowed to appear in the text, for HTML the name is case
  208. /// insensitive, i.e. "br", "Br", "bR" and "BR" are valid tags for name "br".
  209. ///
  210. /// The \a name should be ASCII only
  211. ///
  212. void add_tag(std::string const &name,tag_type = any_tag);
  213. ///
  214. /// Add allowed HTML entity, by default only "lt", "gt", "quot" and "amp" are allowed
  215. ///
  216. void add_entity(std::string const &name);
  217. ///
  218. /// Get if numeric entities are allowed, default is false
  219. ///
  220. bool numeric_entities_allowed() const;
  221. ///
  222. /// Set if numeric entities are allowed
  223. ///
  224. void numeric_entities_allowed(bool v);
  225. ///
  226. /// Functor that allows to provide custom validations for different properties
  227. ///
  228. typedef booster::function<bool(char const *begin,char const *end)> validator_type;
  229. ///
  230. /// Add the property that should be allowed to appear for specific tag as boolean property like
  231. /// checked="checked", when the type
  232. /// is HTML it is case insensitive.
  233. ///
  234. /// The \a property should be ASCII only
  235. ///
  236. void add_boolean_property(std::string const &tag_name,std::string const &property);
  237. ///
  238. /// Add the property that should be checked using custom functor
  239. ///
  240. void add_property(std::string const &tag_name,std::string const &property,validator_type const &val);
  241. ///
  242. /// Add the property that should be checked using regular expression.
  243. ///
  244. void add_property(std::string const &tag_name,std::string const &property,booster::regex const &r);
  245. ///
  246. /// Add numeric property, same as add_property(tag_name,property,booster::regex("-?[0-9]+") but
  247. /// little bit more efficient
  248. ///
  249. void add_integer_property(std::string const &tag_name,std::string const &property);
  250. ///
  251. /// Add URI property.
  252. /// It should be used for properties like like "href" or "src".
  253. /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
  254. ///
  255. /// It's behavior is same as add_property(tag_name,property,rules::uri_validator());
  256. ///
  257. void add_uri_property(std::string const &tag_name,std::string const &property);
  258. ///
  259. /// Add URI property, using regular expression that matches allowed schemas.
  260. /// It should be used for properties like like "href" or "src".
  261. /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
  262. ///
  263. /// It's behavior is same as add_property(tag_name,property,rules::uri_validator(schema));
  264. ///
  265. void add_uri_property(std::string const &tag_name,std::string const &property,std::string const &schema);
  266. ///
  267. /// \deprecated use uri_validator
  268. ///
  269. /// Create a regular expression that checks URI for safe inclusion in the property.
  270. /// By default it allows only: http, https, ftp, mailto, news, nntp.
  271. ///
  272. /// If you need finer control over allowed schemas, use uri_matcher(std::string const&).
  273. ///
  274. CPPCMS_DEPRECATED static booster::regex uri_matcher();
  275. ///
  276. /// \deprecated use uri_validator
  277. ///
  278. /// Create a regular expression that checks URI for safe inclusion in the text, where
  279. /// schema is a regular expression that matches specific protocols that can be used.
  280. ///
  281. /// \note Don't add "^" or "$" tags as this expression would be used in construction of regular
  282. /// other expression.
  283. ///
  284. /// For example:
  285. /// \code
  286. /// booster::regex uri = uri_matcher("(http|https)");
  287. /// \endcode
  288. ///
  289. CPPCMS_DEPRECATED static booster::regex uri_matcher(std::string const &schema);
  290. ////
  291. /// Create a validator that checks URI for safe inclusion in the property.
  292. /// By default it allows only: http, https, ftp, mailto, news, nntp.
  293. ///
  294. /// If you need finer control over allowed schemas, use uri_validator(std::string const&).
  295. ///
  296. static validator_type uri_validator();
  297. ////
  298. /// Create a validator that checks URI for safe inclusion in the property.
  299. /// - schema is a regular expression that matches specific protocols that can be used.
  300. /// - absolute_only - set to true to prevent accepting relative URIs like "/files/img.png" or "test.html"
  301. ///
  302. /// \note You don't need to add "^" or "$" tags to \a scheme
  303. ///
  304. /// For example:
  305. /// \code
  306. /// uri_validator("(http|https)");
  307. /// \endcode
  308. ///
  309. ///
  310. /// If you need finer control over allowed schemas, use uri_validator(std::string const&).
  311. ///
  312. static validator_type uri_validator(std::string const &scheme,bool absolute_only = false);
  313. ///
  314. /// Create a validator that checks that this URI is relative and it is safe for inclusion
  315. /// in URI property like href or src
  316. ///
  317. static validator_type relative_uri_validator();
  318. ///
  319. /// Check if the comments are allowed in the text
  320. ///
  321. bool comments_allowed() const;
  322. ///
  323. /// Set to true if the comments are allowed in the text
  324. ///
  325. void comments_allowed(bool comments);
  326. ///
  327. /// Set the character encoding of the source, otherwise encoding is not checked and
  328. /// assumed valid all invalid characters are removed from the text or replaced with default character
  329. ///
  330. /// It is very important to specify this option. You may skip it if you are sure that the
  331. /// the input encoding was already validated using cppcms::form::text widget that handles
  332. /// character encoding validation by default.
  333. ///
  334. /// In any case it is generally better to always specify this option.
  335. ///
  336. ///
  337. /// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-* and single byte windows-12XX
  338. /// encodings support such replacement with default character, for all other encodings like Shift-JIS, the invalid
  339. /// characters or characters that are invalid for use in HTML are removed.
  340. ///
  341. void encoding(std::string const &enc);
  342. /// \cond INTERNAL
  343. ///
  344. /// Test if the tag is valid.
  345. /// \a tag should be lower case for HTML or unchanged for XHTML
  346. ///
  347. tag_type valid_tag(details::c_string const &tag) const;
  348. ///
  349. /// Test if the property is valid (without value) or unchanged for XHTML
  350. /// \a tag and \a property should be lower case for HTML or unchanged for XHTML
  351. ///
  352. bool valid_boolean_property(details::c_string const &tag,details::c_string const &property) const;
  353. ///
  354. /// Test if the property and its \a value are valid;
  355. ///
  356. /// \a tag and \a property should be lower case for HTML or unchanged for XHTML
  357. ///
  358. bool valid_property(details::c_string const &tag,details::c_string const &property,details::c_string const &value) const;
  359. ///
  360. /// Test if specific html entity is valid
  361. ///
  362. bool valid_entity(details::c_string const &val) const;
  363. ///
  364. /// Get the encoding, returns empty string if not encoding testing
  365. /// is required
  366. ///
  367. std::string encoding() const;
  368. /// \endcond
  369. private:
  370. basic_rules_holder &impl();
  371. basic_rules_holder const &impl() const;
  372. struct data;
  373. booster::copy_ptr<data> d;
  374. };
  375. ///
  376. /// \brief The enumerator that defines filtering invalid HTML method
  377. ///
  378. typedef enum {
  379. remove_invalid, ///< Remove all invalid HTML form the input
  380. escape_invalid ///< Escape (convert to text) all invalid HTML in the input
  381. } filtering_method_type;
  382. ///
  383. /// \brief Check the input in range [\a begin, \a end) according to the rules \a r.
  384. ///
  385. /// It does not filters the input it only checks its validity, it would be faster then validate_and_filter_if_invalid
  386. /// or filter functions but it does not correct errors.
  387. ///
  388. CPPCMS_API bool validate(char const *begin,char const *end,rules const &r);
  389. ///
  390. /// \brief Validate the input in range [\a begin, \a end) according to the rules \a r and if it is not valid filter it
  391. /// and save filtered text into \a filtered string using a filtering method \a method.
  392. ///
  393. /// If the data was valid, \a filtered remains unchanged and the function returns true, otherwise it returns false
  394. /// and the filtered data is saved.
  395. ///
  396. CPPCMS_API bool validate_and_filter_if_invalid( char const *begin,
  397. char const *end,
  398. rules const &r,
  399. std::string &filtered,
  400. filtering_method_type method=remove_invalid,
  401. char replacement_char = 0);
  402. ///
  403. /// \brief Filter the input in range [\a begin, \a end) according to the rules \a r using filtering
  404. /// method \a method
  405. ///
  406. CPPCMS_API std::string filter(char const *begin,
  407. char const *end,
  408. rules const &r,
  409. filtering_method_type method=remove_invalid,
  410. char replacement_char = 0);
  411. ///
  412. /// \brief Filter the input text \a input according to the rules \a r using filtering method \a method
  413. ///
  414. CPPCMS_API std::string filter(std::string const &input,
  415. rules const &r,
  416. filtering_method_type method=remove_invalid,
  417. char replacement_char = 0);
  418. } // xss
  419. }
  420. #endif