|
- ///////////////////////////////////////////////////////////////////////////////
- //
- // Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) <artyomtnk@yahoo.com>
- //
- // See accompanying file COPYING.TXT file for licensing details.
- //
- ///////////////////////////////////////////////////////////////////////////////
- #ifndef CPPCMS_XSS_H
- #define CPPCMS_XSS_H
-
- #include <booster/copy_ptr.h>
- #include <booster/regex.h>
- #include <booster/function.h>
- #include <cppcms/defs.h>
-
- #include <string.h>
- #include <string>
- #include <algorithm>
-
- namespace cppcms {
- namespace json {
- class value;
- }
- ///
- /// \brief Namespace that holds Anti-Cross Site Scripting Filter support
- ///
- /// The classes in this namespace created to provide a filtering for a save
- /// handing of HTML and preventing XSS attacks
- ///
- namespace xss {
-
- /// \cond INTERNAL
- namespace details {
- class c_string;
- }
- struct basic_rules_holder;
-
- /// \endcond
-
- ///
- /// \brief The class that holds XSS filter rules
- ///
- /// This is the major class the defines the white list rules to handle the
- /// Correct HTML input.
- ///
- /// When using these rules you should be very strict about what you need and what you
- /// allow.
- ///
- /// Basically you need to specify:
- ///
- /// -# The XHTML or HTML parsing rules - should be done first
- /// -# The encoding of the text. If you do not specify the encoding
- /// it would be assumed that it is ASCII compatible.
- /// You may not specify encoding only if you know that it was validated
- /// for example by using widgets::text, otherwise \b always specify
- /// encoding
- /// -# Provide the list of tags that should be used. Specify only thous you need.
- /// .
- /// Never allow tags like style, object, embed or of course script as they can be easily
- /// used for XSS attacks
- /// -# Provide essential HTML attributes - properties for tags you need.
- /// Use add_uri_property for links like src for img or href for a.
- /// It would check correctness of URI syntax and ensure that only white-listed
- /// schemas are allowed (i.e. no javascript would be allowed).
- /// .
- /// Never allow style tags unless you specify very strict white list of really used
- /// styles. Styles can be easily exploited for both XSS and click-jacking. For example
- /// \code
- /// <p style="width: expression(alert('XSS'));"></p>
- /// \endcode
- /// .
- /// If you want to use styles specify very strict list of things you need like:
- /// \code
- /// add_property("p","style",booster::regex("text-align:(left|right|center)"));
- /// \endcode
- /// -# Do not allow comments unless you need them. Note not all comments are allowed. Comments
- /// containing "<", ">" or "&" would be considered invalid as some exploits use them.
- ///
- /// Remember more strict you are it is harder to make attack. Read about XSS, see existing
- /// attacks to understand how they work and then decide what you allow.
- ///
- /// rules class can be treated as value for thread safe access, i.e. you can safely use
- /// const reference and const member functions as long as you don't change the rules
- /// under the hood.
- ///
- /// The simplest way: define at application startup some global rules object configure
- /// it and use it for filtering and validation - and make your attackers cry :-).
- ///
- ///
- class CPPCMS_API rules {
- public:
- rules();
- rules(rules const &);
- rules const &operator=(rules const &);
- ~rules();
-
- /// Create rules from JSON object \a r
- ///
- /// The json object the defines the XSS prevention rules.
- /// This object has following properties:
- ///
- /// - "xhtml" - boolean; default true - use XHTML (true) or HTML input
- /// - "comments" - boolean; setting it to true allows comments, default false
- /// - "numeric_entities" - boolean; setting it to true allows numeric_entities, default false
- /// - "entities" - array of strings: list of allowed HTML entities besides lt, gt and amp
- /// - "encoding" - string; the encoding of the text to validate, by default not checked and the
- /// input is assumed to be ASCII compatible. Always specifiy it for multibyte encodings
- /// like Shift-JIS or GBK as they are not ASCII compatible.
- /// - "tags" - object with 3 properties of type array of string:
- /// - "opening_and_closing" - the tags that should come in pair like "<b></b>"
- /// - "stand_alone" - the tags that should appear stand alone like "<br/>"
- /// - "any_tag" - the tags that can be both like "<input>"
- /// - "attributes" - array of objects that define HTML attributes. Each object consists
- /// of following properties:
- /// - "type" - string - the type of the attribute one of: "boolean", "uri", "relative_uri",
- /// "absolute_uri", "integer", "regex".
- /// - "scheme" - string the allowed URI scheme - regular expression like "(http|ftp)". Used with
- /// "uri" and "absolute_uri" type
- /// - "expression" - string the regular expression that defines the value that the attribute
- /// should match.
- /// - "tags" - array of strings - list of tags that this attribute is allowed for.
- /// - "attributes" - array of strings - lisf of names of the attribute
- /// - "pairs" - array of objects that consists of two properities "tag" and "attr" of
- /// type string that define tag and attributed that such type of property
- /// should be allowed for.
- ///
- /// The extra properties that are not defined by this scheme are ingored
- ///
- /// For example:
- /// \code
- /// {
- /// "xhtml" : true,
- /// "encoding" : "UTF-8",
- /// "entities" : [ "nbsp" , "copy" ],
- /// "comments" : false,
- /// "numeric_entities" : false,
- /// "tags" : {
- /// "opening_and_closing" : [
- /// "p", "b", "i", "tt",
- /// "a",
- /// "strong", "em",
- /// "sub", "sup",
- /// "ol", "ul", "li",
- /// "dd", "dt", "dl",
- /// "blockquote","code", "pre",
- /// "span", "div"
- /// ],
- /// "stand_alone" : [ "br", "hr", "img" ]
- /// ],
- /// "attributes": [
- /// {
- /// "tags" : [ "p", "li", "ul" ]
- /// "attr" : [ "style" ],
- /// "type" : "regex",
- /// "expression" : "\\s*text-algin:\\s*(center|left|right|justify);?\\s*"
- /// },
- /// {
- /// "tags" : [ "span", "div" ]
- /// "attr" : [ "class", "id" ],
- /// "type" : "regex",
- /// "expression" : "[a-zA-Z_0-9]+"
- /// },
- /// {
- /// "pairs" : [
- /// { "tag" : "a", "attr" : "href" },
- /// { "tag" : "img", "attr" : "src" }
- /// ],
- /// "type" : "absolute_uri",
- /// "scheme" : "(http|https|ftp)"
- /// },
- /// {
- /// "tags" : [ "img" ],
- /// "attr" : [ "alt" ],
- /// "type" : "regex",
- /// "expression" : ".*"
- /// }
- /// ]
- /// }
- /// \endcode
- ///
- rules(json::value const &r);
-
- ///
- /// Create rules from the JSON object stored in the file \a file_name
- ///
- /// \see rules(json::value const&)
- ///
- rules(std::string const &file_name);
-
- ///
- /// How to treat in input
- ///
- typedef enum {
- xhtml_input, ///< Assume that the input is XHTML
- html_input ///< Assume that the input is HTML
- } html_type;
-
- ///
- /// The type of tag
- ///
- typedef enum {
- invalid_tag = 0, ///< This tag is invalid (returned by validate)
- opening_and_closing = 1, ///< This tag should be opened and closed like em , or strong
- stand_alone = 2, ///< This tag should stand alone (like hr or br)
- any_tag = 3, ///< This tag can be used in both roles (like input)
- } tag_type;
-
- ///
- /// Get how to treat input - HTML or XHTML
- ///
- html_type html() const;
- ///
- /// Set how to treat input - HTML or XHTML, it should be called first before you add any other
- /// rules
- ///
- void html(html_type t);
-
- ///
- /// Add the tag that should be allowed to appear in the text, for HTML the name is case
- /// insensitive, i.e. "br", "Br", "bR" and "BR" are valid tags for name "br".
- ///
- /// The \a name should be ASCII only
- ///
- void add_tag(std::string const &name,tag_type = any_tag);
-
- ///
- /// Add allowed HTML entity, by default only "lt", "gt", "quot" and "amp" are allowed
- ///
- void add_entity(std::string const &name);
-
-
- ///
- /// Get if numeric entities are allowed, default is false
- ///
- bool numeric_entities_allowed() const;
-
- ///
- /// Set if numeric entities are allowed
- ///
- void numeric_entities_allowed(bool v);
-
- ///
- /// Functor that allows to provide custom validations for different properties
- ///
- typedef booster::function<bool(char const *begin,char const *end)> validator_type;
-
- ///
- /// Add the property that should be allowed to appear for specific tag as boolean property like
- /// checked="checked", when the type
- /// is HTML it is case insensitive.
- ///
- /// The \a property should be ASCII only
- ///
- void add_boolean_property(std::string const &tag_name,std::string const &property);
- ///
- /// Add the property that should be checked using custom functor
- ///
- void add_property(std::string const &tag_name,std::string const &property,validator_type const &val);
- ///
- /// Add the property that should be checked using regular expression.
- ///
- void add_property(std::string const &tag_name,std::string const &property,booster::regex const &r);
- ///
- /// Add numeric property, same as add_property(tag_name,property,booster::regex("-?[0-9]+") but
- /// little bit more efficient
- ///
- void add_integer_property(std::string const &tag_name,std::string const &property);
-
- ///
- /// Add URI property.
- /// It should be used for properties like like "href" or "src".
- /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
- ///
- /// It's behavior is same as add_property(tag_name,property,rules::uri_validator());
- ///
- void add_uri_property(std::string const &tag_name,std::string const &property);
- ///
- /// Add URI property, using regular expression that matches allowed schemas.
- /// It should be used for properties like like "href" or "src".
- /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS')
- ///
- /// It's behavior is same as add_property(tag_name,property,rules::uri_validator(schema));
- ///
- void add_uri_property(std::string const &tag_name,std::string const &property,std::string const &schema);
-
- ///
- /// \deprecated use uri_validator
- ///
- /// Create a regular expression that checks URI for safe inclusion in the property.
- /// By default it allows only: http, https, ftp, mailto, news, nntp.
- ///
- /// If you need finer control over allowed schemas, use uri_matcher(std::string const&).
- ///
- CPPCMS_DEPRECATED static booster::regex uri_matcher();
- ///
- /// \deprecated use uri_validator
- ///
- /// Create a regular expression that checks URI for safe inclusion in the text, where
- /// schema is a regular expression that matches specific protocols that can be used.
- ///
- /// \note Don't add "^" or "$" tags as this expression would be used in construction of regular
- /// other expression.
- ///
- /// For example:
- /// \code
- /// booster::regex uri = uri_matcher("(http|https)");
- /// \endcode
- ///
- CPPCMS_DEPRECATED static booster::regex uri_matcher(std::string const &schema);
-
- ////
- /// Create a validator that checks URI for safe inclusion in the property.
- /// By default it allows only: http, https, ftp, mailto, news, nntp.
- ///
- /// If you need finer control over allowed schemas, use uri_validator(std::string const&).
- ///
- static validator_type uri_validator();
- ////
- /// Create a validator that checks URI for safe inclusion in the property.
- /// - schema is a regular expression that matches specific protocols that can be used.
- /// - absolute_only - set to true to prevent accepting relative URIs like "/files/img.png" or "test.html"
- ///
- /// \note You don't need to add "^" or "$" tags to \a scheme
- ///
- /// For example:
- /// \code
- /// uri_validator("(http|https)");
- /// \endcode
- ///
- ///
- /// If you need finer control over allowed schemas, use uri_validator(std::string const&).
- ///
- static validator_type uri_validator(std::string const &scheme,bool absolute_only = false);
-
- ///
- /// Create a validator that checks that this URI is relative and it is safe for inclusion
- /// in URI property like href or src
- ///
- static validator_type relative_uri_validator();
-
- ///
- /// Check if the comments are allowed in the text
- ///
- bool comments_allowed() const;
- ///
- /// Set to true if the comments are allowed in the text
- ///
- void comments_allowed(bool comments);
-
- ///
- /// Set the character encoding of the source, otherwise encoding is not checked and
- /// assumed valid all invalid characters are removed from the text or replaced with default character
- ///
- /// It is very important to specify this option. You may skip it if you are sure that the
- /// the input encoding was already validated using cppcms::form::text widget that handles
- /// character encoding validation by default.
- ///
- /// In any case it is generally better to always specify this option.
- ///
- ///
- /// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-* and single byte windows-12XX
- /// encodings support such replacement with default character, for all other encodings like Shift-JIS, the invalid
- /// characters or characters that are invalid for use in HTML are removed.
- ///
- void encoding(std::string const &enc);
-
-
- /// \cond INTERNAL
-
- ///
- /// Test if the tag is valid.
- /// \a tag should be lower case for HTML or unchanged for XHTML
- ///
- tag_type valid_tag(details::c_string const &tag) const;
-
- ///
- /// Test if the property is valid (without value) or unchanged for XHTML
- /// \a tag and \a property should be lower case for HTML or unchanged for XHTML
- ///
- bool valid_boolean_property(details::c_string const &tag,details::c_string const &property) const;
- ///
- /// Test if the property and its \a value are valid;
- ///
- /// \a tag and \a property should be lower case for HTML or unchanged for XHTML
- ///
- bool valid_property(details::c_string const &tag,details::c_string const &property,details::c_string const &value) const;
-
- ///
- /// Test if specific html entity is valid
- ///
- bool valid_entity(details::c_string const &val) const;
-
- ///
- /// Get the encoding, returns empty string if not encoding testing
- /// is required
- ///
- std::string encoding() const;
-
- /// \endcond
-
-
- private:
- basic_rules_holder &impl();
- basic_rules_holder const &impl() const;
-
- struct data;
- booster::copy_ptr<data> d;
-
- };
-
- ///
- /// \brief The enumerator that defines filtering invalid HTML method
- ///
- typedef enum {
- remove_invalid, ///< Remove all invalid HTML form the input
- escape_invalid ///< Escape (convert to text) all invalid HTML in the input
- } filtering_method_type;
-
- ///
- /// \brief Check the input in range [\a begin, \a end) according to the rules \a r.
- ///
- /// It does not filters the input it only checks its validity, it would be faster then validate_and_filter_if_invalid
- /// or filter functions but it does not correct errors.
- ///
- CPPCMS_API bool validate(char const *begin,char const *end,rules const &r);
- ///
- /// \brief Validate the input in range [\a begin, \a end) according to the rules \a r and if it is not valid filter it
- /// and save filtered text into \a filtered string using a filtering method \a method.
- ///
- /// If the data was valid, \a filtered remains unchanged and the function returns true, otherwise it returns false
- /// and the filtered data is saved.
- ///
- CPPCMS_API bool validate_and_filter_if_invalid( char const *begin,
- char const *end,
- rules const &r,
- std::string &filtered,
- filtering_method_type method=remove_invalid,
- char replacement_char = 0);
-
- ///
- /// \brief Filter the input in range [\a begin, \a end) according to the rules \a r using filtering
- /// method \a method
- ///
- CPPCMS_API std::string filter(char const *begin,
- char const *end,
- rules const &r,
- filtering_method_type method=remove_invalid,
- char replacement_char = 0);
- ///
- /// \brief Filter the input text \a input according to the rules \a r using filtering method \a method
- ///
- CPPCMS_API std::string filter(std::string const &input,
- rules const &r,
- filtering_method_type method=remove_invalid,
- char replacement_char = 0);
-
- } // xss
- }
- #endif
|