<?php /** * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar * * This program is free software and open source software; you can redistribute * it and/or modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the License, * or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA * http://www.gnu.org/licenses/gpl.html * * [kses strips evil scripts!] * * Added wp_ prefix to avoid conflicts with existing kses users * * @version 0.2.2 * @copyright (C) 2002, 2003, 2005 * @author Ulf Harnhammar <http://advogato.org/person/metaur/> * * @package External * @subpackage KSES */
/** * Specifies the default allowable HTML tags. * * Using `CUSTOM_TAGS` is not recommended and should be considered deprecated. The * {@see 'wp_kses_allowed_html'} filter is more powerful and supplies context. * * When using this constant, make sure to set all of these globals to arrays: * * - `$allowedposttags` * - `$allowedtags` * - `$allowedentitynames` * - `$allowedxmlentitynames` * * @see wp_kses_allowed_html() * @since 1.2.0 * * @var array[]|false Array of default allowable HTML tags, or false to use the defaults. */ if ( ! defined( 'CUSTOM_TAGS' ) ) { define( 'CUSTOM_TAGS', false ); }
// Ensure that these variables are added to the global namespace // (e.g. if using namespaces / autoload in the current PHP environment). global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames;
if ( $missing_kses_globals ) { _doing_it_wrong( 'wp_kses_allowed_html', sprintf( /* translators: 1: CUSTOM_TAGS, 2: Global variable names. */ __( 'When using the %1$s constant, make sure to set these globals to an array: %2$s.' ), '<code>CUSTOM_TAGS</code>', implode( ', ', $missing_kses_globals ) ), '6.2.0' ); }
/** * Filters text content and strips out disallowed HTML. * * This function makes sure that only the allowed HTML element names, attribute * names, attribute values, and HTML entities will occur in the given text string. * * This function expects unslashed data. * * @see wp_kses_post() for specifically filtering post content and fields. * @see wp_allowed_protocols() for the default allowed protocols in link URLs. * * @since 1.0.0 * * @param string $content Text content to filter. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Optional. Array of allowed URL protocols. * Defaults to the result of wp_allowed_protocols(). * @return string Filtered content containing only the allowed HTML. */ function wp_kses( $content, $allowed_html, $allowed_protocols = array() ) { if ( empty( $allowed_protocols ) ) { $allowed_protocols = wp_allowed_protocols(); }
/** * Filters one HTML attribute and ensures its value is allowed. * * This function can escape data in some situations where `wp_kses()` must strip the whole attribute. * * @since 4.2.3 * * @param string $attr The 'whole' attribute, including name and value. * @param string $element The HTML element name to which the attribute belongs. * @return string Filtered attribute. */ function wp_kses_one_attr( $attr, $element ) { $uris = wp_kses_uri_attributes(); $allowed_html = wp_kses_allowed_html( 'post' ); $allowed_protocols = wp_allowed_protocols(); $attr = wp_kses_no_null( $attr, array( 'slash_zero' => 'keep' ) );
/** * Returns an array of allowed HTML tags and attributes for a given context. * * @since 3.5.0 * @since 5.0.1 `form` removed as allowable HTML tag. * * @global array $allowedposttags * @global array $allowedtags * @global array $allowedentitynames * * @param string|array $context The context for which to retrieve tags. Allowed values are 'post', * 'strip', 'data', 'entities', or the name of a field filter such as * 'pre_user_description', or an array of allowed HTML elements and attributes. * @return array Array of allowed HTML tags and their allowed attributes. */ function wp_kses_allowed_html( $context = '' ) { global $allowedposttags, $allowedtags, $allowedentitynames;
if ( is_array( $context ) ) { // When `$context` is an array it's actually an array of allowed HTML elements and attributes. $html = $context; $context = 'explicit';
/** * Filters the HTML tags that are allowed for a given context. * * HTML tags and attribute names are case-insensitive in HTML but must be * added to the KSES allow list in lowercase. An item added to the allow list * in upper or mixed case will not recognized as permitted by KSES. * * @since 3.5.0 * * @param array[] $html Allowed HTML tags. * @param string $context Context name. */ return apply_filters( 'wp_kses_allowed_html', $html, $context ); }
switch ( $context ) { case 'post': /** This filter is documented in wp-includes/kses.php */ $tags = apply_filters( 'wp_kses_allowed_html', $allowedposttags, $context );
// 5.0.1 removed the `<form>` tag, allow it if a filter is allowing it's sub-elements `<input>` or `<select>`. if ( ! CUSTOM_TAGS && ! isset( $tags['form'] ) && ( isset( $tags['input'] ) || isset( $tags['select'] ) ) ) { $tags = $allowedposttags;
/** This filter is documented in wp-includes/kses.php */ $tags = apply_filters( 'wp_kses_allowed_html', $tags, $context ); }
return $tags;
case 'user_description': case 'pre_user_description': $tags = $allowedtags; $tags['a']['rel'] = true; /** This filter is documented in wp-includes/kses.php */ return apply_filters( 'wp_kses_allowed_html', $tags, $context );
case 'strip': /** This filter is documented in wp-includes/kses.php */ return apply_filters( 'wp_kses_allowed_html', array(), $context );
case 'entities': /** This filter is documented in wp-includes/kses.php */ return apply_filters( 'wp_kses_allowed_html', $allowedentitynames, $context );
case 'data': default: /** This filter is documented in wp-includes/kses.php */ return apply_filters( 'wp_kses_allowed_html', $allowedtags, $context ); } }
/** * You add any KSES hooks here. * * There is currently only one KSES WordPress hook, {@see 'pre_kses'}, and it is called here. * All parameters are passed to the hooks and expected to receive a string. * * @since 1.0.0 * * @param string $content Content to filter through KSES. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Filtered content through {@see 'pre_kses'} hook. */ function wp_kses_hook( $content, $allowed_html, $allowed_protocols ) { /** * Filters content to be run through KSES. * * @since 2.3.0 * * @param string $content Content to filter through KSES. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. */ return apply_filters( 'pre_kses', $content, $allowed_html, $allowed_protocols ); }
/** * Returns the version number of KSES. * * @since 1.0.0 * * @return string KSES version number. */ function wp_kses_version() { return '0.2.2'; }
/** * Searches for HTML tags, no matter how malformed. * * It also matches stray `>` characters. * * @since 1.0.0 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. * * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. * @global string[] $pass_allowed_protocols Array of allowed URL protocols. * * @param string $content Content to filter. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Content with fixed HTML tags */ function wp_kses_split( $content, $allowed_html, $allowed_protocols ) { global $pass_allowed_html, $pass_allowed_protocols;
$token_pattern = <<<REGEX ~ ( # Detect comments of various flavors before attempting to find tags. (<!--.*?(-->|$)) # - Normative HTML comments. | </[^a-zA-Z][^>]*> # - Closing tags with invalid tag names. | <![^>]*> # - Invalid markup declaration nodes. Not all invalid nodes # are matched so as to avoid breaking legacy behaviors. ) | (<[^>]*(>|$)|>) # Tag-like spans of text. ~x REGEX; return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content ); }
/** * Returns an array of HTML attribute names whose value contains a URL. * * This function returns a list of all HTML attributes that must contain * a URL according to the HTML specification. * * This list includes URI attributes both allowed and disallowed by KSES. * * @link https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes * * @since 5.0.1 * * @return string[] HTML attribute names whose value contains a URL. */ function wp_kses_uri_attributes() { $uri_attributes = array( 'action', 'archive', 'background', 'cite', 'classid', 'codebase', 'data', 'formaction', 'href', 'icon', 'longdesc', 'manifest', 'poster', 'profile', 'src', 'usemap', 'xmlns', );
/** * Filters the list of attributes that are required to contain a URL. * * Use this filter to add any `data-` attributes that are required to be * validated as a URL. * * @since 5.0.1 * * @param string[] $uri_attributes HTML attribute names whose value contains a URL. */ $uri_attributes = apply_filters( 'wp_kses_uri_attributes', $uri_attributes );
return $uri_attributes; }
/** * Callback for `wp_kses_split()`. * * @since 3.1.0 * @access private * @ignore * * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. * @global string[] $pass_allowed_protocols Array of allowed URL protocols. * * @param array $matches preg_replace regexp matches * @return string */ function _wp_kses_split_callback( $matches ) { global $pass_allowed_html, $pass_allowed_protocols;
/** * Callback for `wp_kses_split()` for fixing malformed HTML tags. * * This function does a lot of work. It rejects some very malformed things like * `<:::>`. It returns an empty string, if the element isn't allowed (look ma, no * `strip_tags()`!). Otherwise it splits the tag into an element and an attribute * list. * * After the tag is split into an element and an attribute list, it is run * through another filter which will remove illegal attributes and once that is * completed, will be returned. * * @access private * @ignore * @since 1.0.0 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. * * @param string $content Content to filter. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * * @return string Fixed HTML element */ function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) { $content = wp_kses_stripslashes( $content );
/* * The regex pattern used to split HTML into chunks attempts * to split on HTML token boundaries. This function should * thus receive chunks that _either_ start with meaningful * syntax tokens, like a tag `<div>` or a comment `<!-- ... -->`. * * If the first character of the `$content` chunk _isn't_ one * of these syntax elements, which always starts with `<`, then * the match had to be for the final alternation of `>`. In such * case, it's probably standing on its own and could be encoded * with a character reference to remove ambiguity. * * In other words, if this chunk isn't from a match of a syntax * token, it's just a plaintext greater-than (`>`) sign. */ if ( ! str_starts_with( $content, '<' ) ) { return '>'; }
/* * When certain invalid syntax constructs appear, the HTML parser * shifts into what's called the "bogus comment state." This is a * plaintext state that consumes everything until the nearest `>` * and then transforms the entire span into an HTML comment. * * Preserve these comments and do not treat them like tags. * * @see https://html.spec.whatwg.org/#bogus-comment-state */ if ( 1 === preg_match( '~^(?:</[^a-zA-Z][^>]*>|<![a-z][^>]*>)$~', $content ) ) { /** * Since the pattern matches `</…>` and also `<!…>`, this will * preserve the type of the cleaned-up token in the output. */ $opener = $content[1]; $content = substr( $content, 2, -1 );
do { $prev = $content; $content = wp_kses( $content, $allowed_html, $allowed_protocols ); } while ( $prev !== $content );
// Recombine the modified inner content with the original token structure. return "<{$opener}{$content}>"; }
/* * Normative HTML comments should be handled separately as their * parsing rules differ from those for tags and text nodes. */ if ( str_starts_with( $content, '<!--' ) ) { $content = str_replace( array( '<!--', '-->' ), '', $content );
/** * Removes all attributes, if none are allowed for this element. * * If some are allowed it calls `wp_kses_hair()` to split them further, and then * it builds up new HTML code from the data that `wp_kses_hair()` returns. It also * removes `<` and `>` characters, if there are any left. One more thing it does * is to check if the tag has a closing XHTML slash, and if it does, it puts one * in the returned code as well. * * An array of allowed values can be defined for attributes. If the attribute value * doesn't fall into the list, the attribute will be removed from the tag. * * Attributes can be marked as required. If a required attribute is not present, * KSES will remove all attributes from the tag. As KSES doesn't match opening and * closing tags, it's not possible to safely remove the tag itself, the safest * fallback is to strip all attributes from the tag, instead. * * @since 1.0.0 * @since 5.9.0 Added support for an array of allowed values for attributes. * Added support for required attributes. * * @param string $element HTML element/tag. * @param string $attr HTML attributes from HTML element to closing HTML element tag. * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, * or a context name such as 'post'. See wp_kses_allowed_html() * for the list of accepted context names. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Sanitized HTML element. */ function wp_kses_attr( $element, $attr, $allowed_html, $allowed_protocols ) { if ( ! is_array( $allowed_html ) ) { $allowed_html = wp_kses_allowed_html( $allowed_html ); }
// Is there a closing XHTML slash at the end of the attributes? $xhtml_slash = ''; if ( preg_match( '%\s*/\s*$%', $attr ) ) { $xhtml_slash = ' /'; }
// Are any attributes allowed at all for this element? $element_low = strtolower( $element ); if ( empty( $allowed_html[ $element_low ] ) || true === $allowed_html[ $element_low ] ) { return "<$element$xhtml_slash>"; }
// Split it. $attrarr = wp_kses_hair( $attr, $allowed_protocols );
// Check if there are attributes that are required. $required_attrs = array_filter( $allowed_html[ $element_low ], static function ( $required_attr_limits ) { return isset( $required_attr_limits['required'] ) && true === $required_attr_limits['required']; } );
/* * If a required attribute check fails, we can return nothing for a self-closing tag, * but for a non-self-closing tag the best option is to return the element with attributes, * as KSES doesn't handle matching the relevant closing tag. */ $stripped_tag = ''; if ( empty( $xhtml_slash ) ) { $stripped_tag = "<$element>"; }
// Go through $attrarr, and save the allowed attributes for this element in $attr2. $attr2 = ''; foreach ( $attrarr as $arreach ) { // Check if this attribute is required. $required = isset( $required_attrs[ strtolower( $arreach['name'] ) ] );
// If this was a required attribute, we can mark it as found. if ( $required ) { unset( $required_attrs[ strtolower( $arreach['name'] ) ] ); } } elseif ( $required ) { // This attribute was required, but didn't pass the check. The entire tag is not allowed. return $stripped_tag; } }
// If some required attributes weren't set, the entire tag is not allowed. if ( ! empty( $required_attrs ) ) { return $stripped_tag; }
// Remove any "<" or ">" characters. $attr2 = preg_replace( '/[<>]/', '', $attr2 );
return "<$element$attr2$xhtml_slash>"; }
/** * Determines whether an attribute is allowed. * * @since 4.2.3 * @since 5.0.0 Added support for `data-*` wildcard attributes. * * @param string $name The attribute name. Passed by reference. Returns empty string when not allowed. * @param string $value The attribute value. Passed by reference. Returns a filtered value. * @param string $whole The `name=value` input. Passed by reference. Returns filtered input. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $element The name of the element to which this attribute belongs. * @param array $allowed_html The full list of allowed elements and attributes. * @return bool Whether or not the attribute is allowed. */ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) { $name_low = strtolower( $name ); $element_low = strtolower( $element );
if ( ! isset( $allowed_attr[ $name_low ] ) || '' === $allowed_attr[ $name_low ] ) { /* * Allow `data-*` attributes. * * When specifying `$allowed_html`, the attribute name should be set as * `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see * https://www.w3.org/TR/html40/struct/objects.html#adef-data). * * Note: the attribute name should only contain `A-Za-z0-9_-` chars. */ if ( str_starts_with( $name_low, 'data-' ) && ! empty( $allowed_attr['data-*'] ) && preg_match( '/^data-[a-z0-9_-]+$/', $name_low, $match ) ) { /* * Add the whole attribute name to the allowed attributes and set any restrictions * for the `data-*` attribute values for the current element. */ $allowed_attr[ $match[0] ] = $allowed_attr['data-*']; } else { $name = ''; $value = ''; $whole = ''; return false; } }
if ( is_array( $allowed_attr[ $name_low ] ) ) { // There are some checks. foreach ( $allowed_attr[ $name_low ] as $currkey => $currval ) { if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) { $name = ''; $value = ''; $whole = ''; return false; } } }
return true; }
/** * Builds an attribute list from string containing attributes. * * This function does a lot of work. It parses an attribute list into an array * with attribute data, and tries to do the right thing even if it gets weird * input. It will add quotes around attribute values that don't have any quotes * or apostrophes around them, to make it easier to produce HTML code that will * conform to W3C's HTML specification. It will also remove bad URL protocols * from attribute values. It also reduces duplicate attributes by using the * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`). * * @since 1.0.0 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { $attrarr = array(); $mode = 0; $attrname = ''; $uris = wp_kses_uri_attributes();
// Loop through the whole attribute list.
while ( strlen( $attr ) !== 0 ) { $working = 0; // Was the last operation successful?
// We add quotes to conform to W3C's HTML spec. $working = 1; $mode = 0; $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); }
break; } // End switch.
if ( 0 === $working ) { // Not well-formed, remove and try again. $attr = wp_kses_html_error( $attr ); $mode = 0; } } // End while.
if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { /* * Special case, for when the attribute list ends with a valueless * attribute like "selected". */ $attrarr[ $attrname ] = array( 'name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y', ); }
return $attrarr; }
/** * Finds all attributes of an HTML element. * * Does not modify input. May return "evil" output. * * Based on `wp_kses_split2()` and `wp_kses_attr()`. * * @since 4.2.3 * * @param string $element HTML element. * @return array|false List of attributes found in the element. Returns false on failure. */ function wp_kses_attr_parse( $element ) { $valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches ); if ( 1 !== $valid ) { return false; }
if ( '' !== $slash ) { // Closing elements do not get parsed. return false; }
// Is there a closing XHTML slash at the end of the attributes? if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) { $xhtml_slash = $matches[0]; $attr = substr( $attr, 0, -strlen( $xhtml_slash ) ); } else { $xhtml_slash = ''; }
// Split it. $attrarr = wp_kses_hair_parse( $attr ); if ( false === $attrarr ) { return false; }
// Make sure all input is returned by adding front and back matter. array_unshift( $attrarr, $begin . $slash . $elname ); array_push( $attrarr, $xhtml_slash . $end );
return $attrarr; }
/** * Builds an attribute list from string containing attributes. * * Does not modify input. May return "evil" output. * In case of unexpected input, returns false instead of stripping things. * * Based on `wp_kses_hair()` but does not return a multi-dimensional array. * * @since 4.2.3 * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @return array|false List of attributes found in $attr. Returns false on failure. */ function wp_kses_hair_parse( $attr ) { if ( '' === $attr ) { return array(); }
$regex = '(?: [_a-zA-Z][-_a-zA-Z0-9:.]* # Attribute name. | \[\[?[^\[\]]+\]\]? # Shortcode in the name position implies unfiltered_html. ) (?: # Attribute value. \s*=\s* # All values begin with "=". (?: "[^"]*" # Double-quoted. | \'[^\']*\' # Single-quoted. | [^\s"\']+ # Non-quoted. (?:\s|$) # Must have a space. ) | (?:\s|$) # If attribute has no value, space is required. ) \s* # Trailing space is optional except as mentioned above. ';
/* * Although it is possible to reduce this procedure to a single regexp, * we must run that regexp twice to get exactly the expected result. * * Note: do NOT remove the `x` modifiers as they are essential for the above regex! */
/** * Performs different checks for attribute values. * * The currently implemented checks are "maxlen", "minlen", "maxval", "minval", * and "valueless". * * @since 1.0.0 * * @param string $value Attribute value. * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. * @param string $checkname What $checkvalue is checking for. * @param mixed $checkvalue What constraint the value should pass. * @return bool Whether check passes. */ function wp_kses_check_attr_val( $value, $vless, $checkname, $checkvalue ) { $ok = true;
switch ( strtolower( $checkname ) ) { case 'maxlen': /* * The maxlen check makes sure that the attribute value has a length not * greater than the given value. This can be used to avoid Buffer Overflows * in WWW clients and various Internet servers. */
case 'maxval': /* * The maxval check does two things: it checks that the attribute value is * an integer from 0 and up, without an excessive amount of zeroes or * whitespace (to avoid Buffer Overflows). It also checks that the attribute * value is not greater than the given value. * This check can be used to avoid Denial of Service attacks. */
case 'valueless': /* * The valueless check makes sure if the attribute has a value * (like `<a href="blah">`) or not (`<option selected>`). If the given value * is a "y" or a "Y", the attribute must not have a value. * If the given value is an "n" or an "N", the attribute must have a value. */
case 'value_callback': /* * The value_callback check is used when you want to make sure that the attribute * value is accepted by the callback function. */
if ( ! call_user_func( $checkvalue, $value ) ) { $ok = false; } break; } // End switch.
return $ok; }
/** * Sanitizes a string and removed disallowed URL protocols. * * This function removes all non-allowed protocols from the beginning of the * string. It ignores whitespace and the case of the letters, and it does * understand HTML entities. It does its work recursively, so it won't be * fooled by a string like `javascript:javascript:alert(57)`. * * @since 1.0.0 * * @param string $content Content to filter bad protocols from. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Filtered content. */ function wp_kses_bad_protocol( $content, $allowed_protocols ) { $content = wp_kses_no_null( $content );
// Short-circuit if the string starts with `https://` or `http://`. Most common cases. if ( ( str_starts_with( $content, 'https://' ) && in_array( 'https', $allowed_protocols, true ) ) || ( str_starts_with( $content, 'http://' ) && in_array( 'http', $allowed_protocols, true ) ) ) { return $content; }
/** * Strips slashes from in front of quotes. * * This function changes the character sequence `\"` to just `"`. It leaves all other * slashes alone. The quoting from `preg_replace(//e)` requires this. * * @since 1.0.0 * * @param string $content String to strip slashes from. * @return string Fixed string with quoted slashes. */ function wp_kses_stripslashes( $content ) { return preg_replace( '%\\\\"%', '"', $content ); }
/** * Converts the keys of an array to lowercase. * * @since 1.0.0 * * @param array $inarray Unfiltered array. * @return array Fixed array with all lowercase keys. */ function wp_kses_array_lc( $inarray ) { $outarray = array();
/** * Handles parsing errors in `wp_kses_hair()`. * * The general plan is to remove everything to and including some whitespace, * but it deals with quotes and apostrophes as well. * * @since 1.0.0 * * @param string $attr * @return string */ function wp_kses_html_error( $attr ) { return preg_replace( '/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $attr ); }
/** * Sanitizes content from bad protocols and other characters. * * This function searches for URL protocols at the beginning of the string, while * handling whitespace and HTML entities. * * @since 1.0.0 * * @param string $content Content to check for bad protocols. * @param string[] $allowed_protocols Array of allowed URL protocols. * @param int $count Depth of call recursion to this function. * @return string Sanitized content. */ function wp_kses_bad_protocol_once( $content, $allowed_protocols, $count = 1 ) { $content = preg_replace( '/(�*58(?![;0-9])|�*3a(?![;a-f0-9]))/i', '$1;', $content ); $content2 = preg_split( '/:|�*58;|�*3a;|:/i', $content, 2 );
/** * Callback for `wp_kses_bad_protocol_once()` regular expression. * * This function processes URL protocols, checks to see if they're in the * list of allowed protocols or not, and returns different data depending * on the answer. * * @access private * @ignore * @since 1.0.0 * * @param string $scheme URI scheme to check against the list of allowed protocols. * @param string[] $allowed_protocols Array of allowed URL protocols. * @return string Sanitized content. */ function wp_kses_bad_protocol_once2( $scheme, $allowed_protocols ) { $scheme = wp_kses_decode_entities( $scheme ); $scheme = preg_replace( '/\s/', '', $scheme ); $scheme = wp_kses_no_null( $scheme ); $scheme = strtolower( $scheme );
/** * Converts and fixes HTML entities. * * This function normalizes HTML entities. It will convert `AT&T` to the correct * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on. * * When `$context` is set to 'xml', HTML entities are converted to their code points. For * example, `AT&T…&#XYZZY;` is converted to `AT&T…&#XYZZY;`. * * @since 1.0.0 * @since 5.5.0 Added `$context` parameter. * * @param string $content Content to normalize entities. * @param string $context Context for normalization. Can be either 'html' or 'xml'. * Default 'html'. * @return string Content with normalized entities. */ function wp_kses_normalize_entities( $content, $context = 'html' ) { // Disarm all entities by converting & to & $content = str_replace( '&', '&', $content );
// Change back the allowed entities in our list of allowed entities. if ( 'xml' === $context ) { $content = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $content ); } else { $content = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $content ); } $content = preg_replace_callback( '/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $content ); $content = preg_replace_callback( '/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $content );
return $content; }
/** * Callback for `wp_kses_normalize_entities()` regular expression. * * This function only accepts valid named entity references, which are finite, * case-sensitive, and highly scrutinized by HTML and XML validators. * * @since 3.0.0 * * @global array $allowedentitynames * * @param array $matches preg_replace_callback() matches array. * @return string Correctly encoded entity. */ function wp_kses_named_entities( $matches ) { global $allowedentitynames;
/** * Callback for `wp_kses_normalize_entities()` regular expression. * * This function only accepts valid named entity references, which are finite, * case-sensitive, and highly scrutinized by XML validators. HTML named entity * references are converted to their code points. * * @since 5.5.0 * * @global array $allowedentitynames * @global array $allowedxmlentitynames * * @param array $matches preg_replace_callback() matches array. * @return string Correctly encoded entity. */ function wp_kses_xml_named_entities( $matches ) { global $allowedentitynames, $allowedxmlentitynames;
/** * Determines if a Unicode codepoint is valid. * * @since 2.7.0 * * @param int $i Unicode codepoint. * @return bool Whether or not the codepoint is a valid Unicode codepoint. */ function valid_unicode( $i ) { $i = (int) $i;
/** * Converts all numeric HTML entities to their named counterparts. * * This function decodes numeric HTML entities (`A` and `A`). * It doesn't do anything with named entities like `ä`, but we don't * need them in the allowed URL protocols system anyway. * * @since 1.0.0 * * @param string $content Content to change entities. * @return string Content after decoded entities. */ function wp_kses_decode_entities( $content ) { $content = preg_replace_callback( '/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $content ); $content = preg_replace_callback( '/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $content );
/** * Sanitize content with allowed HTML KSES rules. * * This function expects slashed data. * * @since 1.0.0 * * @param string $data Content to filter, expected to be escaped with slashes. * @return string Filtered content. */ function wp_filter_kses( $data ) { return addslashes( wp_kses( stripslashes( $data ), current_filter() ) ); }
/** * Sanitize content with allowed HTML KSES rules. * * This function expects unslashed data. * * @since 2.9.0 * * @param string $data Content to filter, expected to not be escaped. * @return string Filtered content. */ function wp_kses_data( $data ) { return wp_kses( $data, current_filter() ); }
/** * Sanitizes content for allowed HTML tags for post content. * * Post content refers to the page contents of the 'post' type and not `$_POST` * data from forms. * * This function expects slashed data. * * @since 2.0.0 * * @param string $data Post content to filter, expected to be escaped with slashes. * @return string Filtered post content with allowed HTML tags and attributes intact. */ function wp_filter_post_kses( $data ) { return addslashes( wp_kses( stripslashes( $data ), 'post' ) ); }
/** * Sanitizes global styles user content removing unsafe rules. * * @since 5.9.0 * * @param string $data Post content to filter. * @return string Filtered post content with unsafe rules removed. */ function wp_filter_global_styles_post( $data ) { $decoded_data = json_decode( wp_unslash( $data ), true ); $json_decoding_error = json_last_error(); if ( JSON_ERROR_NONE === $json_decoding_error && is_array( $decoded_data ) && isset( $decoded_data['isGlobalStylesUserThemeJSON'] ) && $decoded_data['isGlobalStylesUserThemeJSON'] ) { unset( $decoded_data['isGlobalStylesUserThemeJSON'] );
/** * Sanitizes content for allowed HTML tags for post content. * * Post content refers to the page contents of the 'post' type and not `$_POST` * data from forms. * * This function expects unslashed data. * * @since 2.9.0 * * @param string $data Post content to filter. * @return string Filtered post content with allowed HTML tags and attributes intact. */ function wp_kses_post( $data ) { return wp_kses( $data, 'post' ); }
/** * Navigates through an array, object, or scalar, and sanitizes content for * allowed HTML tags for post content. * * @since 4.4.2 * * @see map_deep() * * @param mixed $data The array, object, or scalar value to inspect. * @return mixed The filtered content. */ function wp_kses_post_deep( $data ) { return map_deep( $data, 'wp_kses_post' ); }
/** * Strips all HTML from a text string. * * This function expects slashed data. * * @since 2.1.0 * * @param string $data Content to strip all HTML from. * @return string Filtered content without any HTML. */ function wp_filter_nohtml_kses( $data ) { return addslashes( wp_kses( stripslashes( $data ), 'strip' ) ); }
/** * Adds all KSES input form content filters. * * All hooks have default priority. The `wp_filter_kses()` function is added to * the 'pre_comment_content' and 'title_save_pre' hooks. * * The `wp_filter_post_kses()` function is added to the 'content_save_pre', * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks. * * @since 2.0.0 */ function kses_init_filters() { // Normal filtering. add_filter( 'title_save_pre', 'wp_filter_kses' );
// Global Styles filtering: Global Styles filters should be executed before normal post_kses HTML filters. add_filter( 'content_save_pre', 'wp_filter_global_styles_post', 9 ); add_filter( 'content_filtered_save_pre', 'wp_filter_global_styles_post', 9 );
/** * Removes all KSES input form content filters. * * A quick procedural method to removing all of the filters that KSES uses for * content in WordPress Loop. * * Does not remove the `kses_init()` function from {@see 'init'} hook (priority is * default). Also does not remove `kses_init()` function from {@see 'set_current_user'} * hook (priority is also default). * * @since 2.0.6 */ function kses_remove_filters() { // Normal filtering. remove_filter( 'title_save_pre', 'wp_filter_kses' );
/** * Sets up most of the KSES filters for input form content. * * First removes all of the KSES filters in case the current user does not need * to have KSES filter the content. If the user does not have `unfiltered_html` * capability, then KSES filters are added. * * @since 2.0.0 */ function kses_init() { kses_remove_filters();
/** * Filters an inline style attribute and removes disallowed rules. * * @since 2.8.1 * @since 4.4.0 Added support for `min-height`, `max-height`, `min-width`, and `max-width`. * @since 4.6.0 Added support for `list-style-type`. * @since 5.0.0 Added support for `background-image`. * @since 5.1.0 Added support for `text-transform`. * @since 5.2.0 Added support for `background-position` and `grid-template-columns`. * @since 5.3.0 Added support for `grid`, `flex` and `column` layout properties. * Extended `background-*` support for individual properties. * @since 5.3.1 Added support for gradient backgrounds. * @since 5.7.1 Added support for `object-position`. * @since 5.8.0 Added support for `calc()` and `var()` values. * @since 6.1.0 Added support for `min()`, `max()`, `minmax()`, `clamp()`, * nested `var()` values, and assigning values to CSS variables. * Added support for `object-fit`, `gap`, `column-gap`, `row-gap`, and `flex-wrap`. * Extended `margin-*` and `padding-*` support for logical properties. * @since 6.2.0 Added support for `aspect-ratio`, `position`, `top`, `right`, `bottom`, `left`, * and `z-index` CSS properties. * @since 6.3.0 Extended support for `filter` to accept a URL and added support for repeat(). * Added support for `box-shadow`. * @since 6.4.0 Added support for `writing-mode`. * @since 6.5.0 Added support for `background-repeat`. * @since 6.6.0 Added support for `grid-column`, `grid-row`, and `container-type`. * * @param string $css A string of CSS rules. * @param string $deprecated Not used. * @return string Filtered string of CSS rules. */ function safecss_filter_attr( $css, $deprecated = '' ) { if ( ! empty( $deprecated ) ) { _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented. }
/* * CSS attributes that accept URL data types. * * This is in accordance to the CSS spec and unrelated to * the sub-set of supported attributes above. * * See: https://developer.mozilla.org/en-US/docs/Web/CSS/url */ $css_url_data_types = array( 'background', 'background-image',
'cursor', 'filter',
'list-style', 'list-style-image', );
/* * CSS attributes that accept gradient data types. * */ $css_gradient_data_types = array( 'background', 'background-image', );
if ( empty( $allowed_attr ) ) { return $css; }
$css = ''; foreach ( $css_array as $css_item ) { if ( '' === $css_item ) { continue; }
if ( $found && $url_attr ) { // Simplified: matches the sequence `url(*)`. preg_match_all( '/url\([^)]+\)/', $parts[1], $url_matches );
foreach ( $url_matches[0] as $url_match ) { // Clean up the URL from each of the matches above. preg_match( '/^url\(\s*([\'\"]?)(.*)(\g1)\s*\)$/', $url_match, $url_pieces );
if ( empty( $url ) || wp_kses_bad_protocol( $url, $allowed_protocols ) !== $url ) { $found = false; break; } else { // Remove the whole `url(*)` bit that was matched above from the CSS. $css_test_string = str_replace( $url_match, '', $css_test_string ); } } }
if ( $found && $gradient_attr ) { $css_value = trim( $parts[1] ); if ( preg_match( '/^(repeating-)?(linear|radial|conic)-gradient\(([^()]|rgb[a]?\([^()]*\))*\)$/', $css_value ) ) { // Remove the whole `gradient` bit that was matched above from the CSS. $css_test_string = str_replace( $css_value, '', $css_test_string ); } }
if ( $found ) { /* * Allow CSS functions like var(), calc(), etc. by removing them from the test string. * Nested functions and parentheses are also removed, so long as the parentheses are balanced. */ $css_test_string = preg_replace( '/\b(?:var|calc|min|max|minmax|clamp|repeat)(\((?:[^()]|(?1))*\))/', '', $css_test_string );
/* * Disallow CSS containing \ ( & } = or comments, except for within url(), var(), calc(), etc. * which were removed from the test string above. */ $allow_css = ! preg_match( '%[\\\(&=}]|/\*%', $css_test_string );
/** * Filters the check for unsafe CSS in `safecss_filter_attr`. * * Enables developers to determine whether a section of CSS should be allowed or discarded. * By default, the value will be false if the part contains \ ( & } = or comments. * Return true to allow the CSS part to be included in the output. * * @since 5.5.0 * * @param bool $allow_css Whether the CSS in the test string is considered safe. * @param string $css_test_string The CSS string to test. */ $allow_css = apply_filters( 'safecss_filter_attr_allow_css', $allow_css, $css_test_string );
// Only add the CSS part if it passes the regex check. if ( $allow_css ) { if ( '' !== $css ) { $css .= ';'; }
$css .= $css_item; } } }
return $css; }
/** * Helper function to add global attributes to a tag in the allowed HTML list. * * @since 3.5.0 * @since 5.0.0 Added support for `data-*` wildcard attributes. * @since 6.0.0 Added `dir`, `lang`, and `xml:lang` to global attributes. * @since 6.3.0 Added `aria-controls`, `aria-current`, and `aria-expanded` attributes. * @since 6.4.0 Added `aria-live` and `hidden` attributes. * * @access private * @ignore * * @param array $value An array of attributes. * @return array The array of attributes with global attributes added. */ function _wp_add_global_attributes( $value ) { $global_attributes = array( 'aria-controls' => true, 'aria-current' => true, 'aria-describedby' => true, 'aria-details' => true, 'aria-expanded' => true, 'aria-hidden' => true, 'aria-label' => true, 'aria-labelledby' => true, 'aria-live' => true, 'class' => true, 'data-*' => true, 'dir' => true, 'hidden' => true, 'id' => true, 'lang' => true, 'style' => true, 'title' => true, 'role' => true, 'xml:lang' => true, );
/** * Helper function to check if this is a safe PDF URL. * * @since 5.9.0 * @access private * @ignore * * @param string $url The URL to check. * @return bool True if the URL is safe, false otherwise. */ function _wp_kses_allow_pdf_objects( $url ) { // We're not interested in URLs that contain query strings or fragments. if ( str_contains( $url, '?' ) || str_contains( $url, '#' ) ) { return false; }
// If it doesn't have a PDF extension, it's not safe. if ( ! str_ends_with( $url, '.pdf' ) ) { return false; }
// If the URL host matches the current site's media URL, it's safe. $upload_info = wp_upload_dir( null, false ); $parsed_url = wp_parse_url( $upload_info['url'] ); $upload_host = isset( $parsed_url['host'] ) ? $parsed_url['host'] : ''; $upload_port = isset( $parsed_url['port'] ) ? ':' . $parsed_url['port'] : '';