/////////////////////////////////////////////////////////////////////////////// // // Copyright (C) 2008-2012 Artyom Beilis (Tonkikh) // // See accompanying file COPYING.TXT file for licensing details. // /////////////////////////////////////////////////////////////////////////////// #ifndef CPPCMS_XSS_H #define CPPCMS_XSS_H #include #include #include #include #include #include #include namespace cppcms { namespace json { class value; } /// /// \brief Namespace that holds Anti-Cross Site Scripting Filter support /// /// The classes in this namespace created to provide a filtering for a save /// handing of HTML and preventing XSS attacks /// namespace xss { /// \cond INTERNAL namespace details { class c_string; } struct basic_rules_holder; /// \endcond /// /// \brief The class that holds XSS filter rules /// /// This is the major class the defines the white list rules to handle the /// Correct HTML input. /// /// When using these rules you should be very strict about what you need and what you /// allow. /// /// Basically you need to specify: /// /// -# The XHTML or HTML parsing rules - should be done first /// -# The encoding of the text. If you do not specify the encoding /// it would be assumed that it is ASCII compatible. /// You may not specify encoding only if you know that it was validated /// for example by using widgets::text, otherwise \b always specify /// encoding /// -# Provide the list of tags that should be used. Specify only thous you need. /// . /// Never allow tags like style, object, embed or of course script as they can be easily /// used for XSS attacks /// -# Provide essential HTML attributes - properties for tags you need. /// Use add_uri_property for links like src for img or href for a. /// It would check correctness of URI syntax and ensure that only white-listed /// schemas are allowed (i.e. no javascript would be allowed). /// . /// Never allow style tags unless you specify very strict white list of really used /// styles. Styles can be easily exploited for both XSS and click-jacking. For example /// \code ///

/// \endcode /// . /// If you want to use styles specify very strict list of things you need like: /// \code /// add_property("p","style",booster::regex("text-align:(left|right|center)")); /// \endcode /// -# Do not allow comments unless you need them. Note not all comments are allowed. Comments /// containing "<", ">" or "&" would be considered invalid as some exploits use them. /// /// Remember more strict you are it is harder to make attack. Read about XSS, see existing /// attacks to understand how they work and then decide what you allow. /// /// rules class can be treated as value for thread safe access, i.e. you can safely use /// const reference and const member functions as long as you don't change the rules /// under the hood. /// /// The simplest way: define at application startup some global rules object configure /// it and use it for filtering and validation - and make your attackers cry :-). /// /// class CPPCMS_API rules { public: rules(); rules(rules const &); rules const &operator=(rules const &); ~rules(); /// Create rules from JSON object \a r /// /// The json object the defines the XSS prevention rules. /// This object has following properties: /// /// - "xhtml" - boolean; default true - use XHTML (true) or HTML input /// - "comments" - boolean; setting it to true allows comments, default false /// - "numeric_entities" - boolean; setting it to true allows numeric_entities, default false /// - "entities" - array of strings: list of allowed HTML entities besides lt, gt and amp /// - "encoding" - string; the encoding of the text to validate, by default not checked and the /// input is assumed to be ASCII compatible. Always specifiy it for multibyte encodings /// like Shift-JIS or GBK as they are not ASCII compatible. /// - "tags" - object with 3 properties of type array of string: /// - "opening_and_closing" - the tags that should come in pair like "" /// - "stand_alone" - the tags that should appear stand alone like "
" /// - "any_tag" - the tags that can be both like "" /// - "attributes" - array of objects that define HTML attributes. Each object consists /// of following properties: /// - "type" - string - the type of the attribute one of: "boolean", "uri", "relative_uri", /// "absolute_uri", "integer", "regex". /// - "scheme" - string the allowed URI scheme - regular expression like "(http|ftp)". Used with /// "uri" and "absolute_uri" type /// - "expression" - string the regular expression that defines the value that the attribute /// should match. /// - "tags" - array of strings - list of tags that this attribute is allowed for. /// - "attributes" - array of strings - lisf of names of the attribute /// - "pairs" - array of objects that consists of two properities "tag" and "attr" of /// type string that define tag and attributed that such type of property /// should be allowed for. /// /// The extra properties that are not defined by this scheme are ingored /// /// For example: /// \code /// { /// "xhtml" : true, /// "encoding" : "UTF-8", /// "entities" : [ "nbsp" , "copy" ], /// "comments" : false, /// "numeric_entities" : false, /// "tags" : { /// "opening_and_closing" : [ /// "p", "b", "i", "tt", /// "a", /// "strong", "em", /// "sub", "sup", /// "ol", "ul", "li", /// "dd", "dt", "dl", /// "blockquote","code", "pre", /// "span", "div" /// ], /// "stand_alone" : [ "br", "hr", "img" ] /// ], /// "attributes": [ /// { /// "tags" : [ "p", "li", "ul" ] /// "attr" : [ "style" ], /// "type" : "regex", /// "expression" : "\\s*text-algin:\\s*(center|left|right|justify);?\\s*" /// }, /// { /// "tags" : [ "span", "div" ] /// "attr" : [ "class", "id" ], /// "type" : "regex", /// "expression" : "[a-zA-Z_0-9]+" /// }, /// { /// "pairs" : [ /// { "tag" : "a", "attr" : "href" }, /// { "tag" : "img", "attr" : "src" } /// ], /// "type" : "absolute_uri", /// "scheme" : "(http|https|ftp)" /// }, /// { /// "tags" : [ "img" ], /// "attr" : [ "alt" ], /// "type" : "regex", /// "expression" : ".*" /// } /// ] /// } /// \endcode /// rules(json::value const &r); /// /// Create rules from the JSON object stored in the file \a file_name /// /// \see rules(json::value const&) /// rules(std::string const &file_name); /// /// How to treat in input /// typedef enum { xhtml_input, ///< Assume that the input is XHTML html_input ///< Assume that the input is HTML } html_type; /// /// The type of tag /// typedef enum { invalid_tag = 0, ///< This tag is invalid (returned by validate) opening_and_closing = 1, ///< This tag should be opened and closed like em , or strong stand_alone = 2, ///< This tag should stand alone (like hr or br) any_tag = 3, ///< This tag can be used in both roles (like input) } tag_type; /// /// Get how to treat input - HTML or XHTML /// html_type html() const; /// /// Set how to treat input - HTML or XHTML, it should be called first before you add any other /// rules /// void html(html_type t); /// /// Add the tag that should be allowed to appear in the text, for HTML the name is case /// insensitive, i.e. "br", "Br", "bR" and "BR" are valid tags for name "br". /// /// The \a name should be ASCII only /// void add_tag(std::string const &name,tag_type = any_tag); /// /// Add allowed HTML entity, by default only "lt", "gt", "quot" and "amp" are allowed /// void add_entity(std::string const &name); /// /// Get if numeric entities are allowed, default is false /// bool numeric_entities_allowed() const; /// /// Set if numeric entities are allowed /// void numeric_entities_allowed(bool v); /// /// Functor that allows to provide custom validations for different properties /// typedef booster::function validator_type; /// /// Add the property that should be allowed to appear for specific tag as boolean property like /// checked="checked", when the type /// is HTML it is case insensitive. /// /// The \a property should be ASCII only /// void add_boolean_property(std::string const &tag_name,std::string const &property); /// /// Add the property that should be checked using custom functor /// void add_property(std::string const &tag_name,std::string const &property,validator_type const &val); /// /// Add the property that should be checked using regular expression. /// void add_property(std::string const &tag_name,std::string const &property,booster::regex const &r); /// /// Add numeric property, same as add_property(tag_name,property,booster::regex("-?[0-9]+") but /// little bit more efficient /// void add_integer_property(std::string const &tag_name,std::string const &property); /// /// Add URI property. /// It should be used for properties like like "href" or "src". /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS') /// /// It's behavior is same as add_property(tag_name,property,rules::uri_validator()); /// void add_uri_property(std::string const &tag_name,std::string const &property); /// /// Add URI property, using regular expression that matches allowed schemas. /// It should be used for properties like like "href" or "src". /// It is very good idea to use it in order to prevent urls like javascript:alert('XSS') /// /// It's behavior is same as add_property(tag_name,property,rules::uri_validator(schema)); /// void add_uri_property(std::string const &tag_name,std::string const &property,std::string const &schema); /// /// \deprecated use uri_validator /// /// Create a regular expression that checks URI for safe inclusion in the property. /// By default it allows only: http, https, ftp, mailto, news, nntp. /// /// If you need finer control over allowed schemas, use uri_matcher(std::string const&). /// CPPCMS_DEPRECATED static booster::regex uri_matcher(); /// /// \deprecated use uri_validator /// /// Create a regular expression that checks URI for safe inclusion in the text, where /// schema is a regular expression that matches specific protocols that can be used. /// /// \note Don't add "^" or "$" tags as this expression would be used in construction of regular /// other expression. /// /// For example: /// \code /// booster::regex uri = uri_matcher("(http|https)"); /// \endcode /// CPPCMS_DEPRECATED static booster::regex uri_matcher(std::string const &schema); //// /// Create a validator that checks URI for safe inclusion in the property. /// By default it allows only: http, https, ftp, mailto, news, nntp. /// /// If you need finer control over allowed schemas, use uri_validator(std::string const&). /// static validator_type uri_validator(); //// /// Create a validator that checks URI for safe inclusion in the property. /// - schema is a regular expression that matches specific protocols that can be used. /// - absolute_only - set to true to prevent accepting relative URIs like "/files/img.png" or "test.html" /// /// \note You don't need to add "^" or "$" tags to \a scheme /// /// For example: /// \code /// uri_validator("(http|https)"); /// \endcode /// /// /// If you need finer control over allowed schemas, use uri_validator(std::string const&). /// static validator_type uri_validator(std::string const &scheme,bool absolute_only = false); /// /// Create a validator that checks that this URI is relative and it is safe for inclusion /// in URI property like href or src /// static validator_type relative_uri_validator(); /// /// Check if the comments are allowed in the text /// bool comments_allowed() const; /// /// Set to true if the comments are allowed in the text /// void comments_allowed(bool comments); /// /// Set the character encoding of the source, otherwise encoding is not checked and /// assumed valid all invalid characters are removed from the text or replaced with default character /// /// It is very important to specify this option. You may skip it if you are sure that the /// the input encoding was already validated using cppcms::form::text widget that handles /// character encoding validation by default. /// /// In any case it is generally better to always specify this option. /// /// /// \note the replace functionality is not supported for all encoding, only UTF-8, ISO-8859-* and single byte windows-12XX /// encodings support such replacement with default character, for all other encodings like Shift-JIS, the invalid /// characters or characters that are invalid for use in HTML are removed. /// void encoding(std::string const &enc); /// \cond INTERNAL /// /// Test if the tag is valid. /// \a tag should be lower case for HTML or unchanged for XHTML /// tag_type valid_tag(details::c_string const &tag) const; /// /// Test if the property is valid (without value) or unchanged for XHTML /// \a tag and \a property should be lower case for HTML or unchanged for XHTML /// bool valid_boolean_property(details::c_string const &tag,details::c_string const &property) const; /// /// Test if the property and its \a value are valid; /// /// \a tag and \a property should be lower case for HTML or unchanged for XHTML /// bool valid_property(details::c_string const &tag,details::c_string const &property,details::c_string const &value) const; /// /// Test if specific html entity is valid /// bool valid_entity(details::c_string const &val) const; /// /// Get the encoding, returns empty string if not encoding testing /// is required /// std::string encoding() const; /// \endcond private: basic_rules_holder &impl(); basic_rules_holder const &impl() const; struct data; booster::copy_ptr d; }; /// /// \brief The enumerator that defines filtering invalid HTML method /// typedef enum { remove_invalid, ///< Remove all invalid HTML form the input escape_invalid ///< Escape (convert to text) all invalid HTML in the input } filtering_method_type; /// /// \brief Check the input in range [\a begin, \a end) according to the rules \a r. /// /// It does not filters the input it only checks its validity, it would be faster then validate_and_filter_if_invalid /// or filter functions but it does not correct errors. /// CPPCMS_API bool validate(char const *begin,char const *end,rules const &r); /// /// \brief Validate the input in range [\a begin, \a end) according to the rules \a r and if it is not valid filter it /// and save filtered text into \a filtered string using a filtering method \a method. /// /// If the data was valid, \a filtered remains unchanged and the function returns true, otherwise it returns false /// and the filtered data is saved. /// CPPCMS_API bool validate_and_filter_if_invalid( char const *begin, char const *end, rules const &r, std::string &filtered, filtering_method_type method=remove_invalid, char replacement_char = 0); /// /// \brief Filter the input in range [\a begin, \a end) according to the rules \a r using filtering /// method \a method /// CPPCMS_API std::string filter(char const *begin, char const *end, rules const &r, filtering_method_type method=remove_invalid, char replacement_char = 0); /// /// \brief Filter the input text \a input according to the rules \a r using filtering method \a method /// CPPCMS_API std::string filter(std::string const &input, rules const &r, filtering_method_type method=remove_invalid, char replacement_char = 0); } // xss } #endif