package.xml0000644000175000017500000014722313512700112012550 0ustar ezyangezyang HTMLPurifier htmlpurifier.org Standards-compliant HTML filter HTML Purifier is an HTML filter that will remove all malicious code (better known as XSS) with a thoroughly audited, secure yet permissive whitelist and will also make sure your documents are standards compliant. Edward Z. Yang ezyang admin@htmlpurifier.org yes 2019-07-14 4.11.0 4.11 stable stable LGPL HTML Purifier 4.11.x is a maintenance release, collecting a year and a half of accumulated bug fixes. Most notable fixes are compatibility with PHP 7.3, and case-sensitive matching for the SafeScripting whitelist. There are a number small feature enhancements, including an expanded supported color list, initial and inherit support for {min-,max-,}{width,height} and multidimensional array support for purifyArray. 5.0.0 1.4.3 4.11.0 4.11 stable stable 2019-07-14 LGPL HTML Purifier 4.11.x is a maintenance release, collecting a year and a half of accumulated bug fixes. Most notable fixes are compatibility with PHP 7.3, and case-sensitive matching for the SafeScripting whitelist. There are a number small feature enhancements, including an expanded supported color list, initial and inherit support for {min-,max-,}{width,height} and multidimensional array support for purifyArray. HTMLPurifier-4.11.0/HTMLPurifier/Arborize.php0000644000175000017500000000476613512700112020170 0ustar ezyangezyanggetHTMLDefinition(); $parent = new HTMLPurifier_Token_Start($definition->info_parent); $stack = array($parent->toNode()); foreach ($tokens as $token) { $token->skip = null; // [MUT] $token->carryover = null; // [MUT] if ($token instanceof HTMLPurifier_Token_End) { $token->start = null; // [MUT] $r = array_pop($stack); //assert($r->name === $token->name); //assert(empty($token->attr)); $r->endCol = $token->col; $r->endLine = $token->line; $r->endArmor = $token->armor; continue; } $node = $token->toNode(); $stack[count($stack)-1]->children[] = $node; if ($token instanceof HTMLPurifier_Token_Start) { $stack[] = $node; } } //assert(count($stack) == 1); return $stack[0]; } public static function flatten($node, $config, $context) { $level = 0; $nodes = array($level => new HTMLPurifier_Queue(array($node))); $closingTokens = array(); $tokens = array(); do { while (!$nodes[$level]->isEmpty()) { $node = $nodes[$level]->shift(); // FIFO list($start, $end) = $node->toTokenPair(); if ($level > 0) { $tokens[] = $start; } if ($end !== NULL) { $closingTokens[$level][] = $end; } if ($node instanceof HTMLPurifier_Node_Element) { $level++; $nodes[$level] = new HTMLPurifier_Queue(); foreach ($node->children as $childNode) { $nodes[$level]->push($childNode); } } } $level--; if ($level && isset($closingTokens[$level])) { while ($token = array_pop($closingTokens[$level])) { $tokens[] = $token; } } } while ($level > 0); return $tokens; } } HTMLPurifier-4.11.0/HTMLPurifier/AttrCollections.php0000644000175000017500000001137613512700112021517 0ustar ezyangezyangdoConstruct($attr_types, $modules); } public function doConstruct($attr_types, $modules) { // load extensions from the modules foreach ($modules as $module) { foreach ($module->attr_collections as $coll_i => $coll) { if (!isset($this->info[$coll_i])) { $this->info[$coll_i] = array(); } foreach ($coll as $attr_i => $attr) { if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) { // merge in includes $this->info[$coll_i][$attr_i] = array_merge( $this->info[$coll_i][$attr_i], $attr ); continue; } $this->info[$coll_i][$attr_i] = $attr; } } } // perform internal expansions and inclusions foreach ($this->info as $name => $attr) { // merge attribute collections that include others $this->performInclusions($this->info[$name]); // replace string identifiers with actual attribute objects $this->expandIdentifiers($this->info[$name], $attr_types); } } /** * Takes a reference to an attribute associative array and performs * all inclusions specified by the zero index. * @param array &$attr Reference to attribute array */ public function performInclusions(&$attr) { if (!isset($attr[0])) { return; } $merge = $attr[0]; $seen = array(); // recursion guard // loop through all the inclusions for ($i = 0; isset($merge[$i]); $i++) { if (isset($seen[$merge[$i]])) { continue; } $seen[$merge[$i]] = true; // foreach attribute of the inclusion, copy it over if (!isset($this->info[$merge[$i]])) { continue; } foreach ($this->info[$merge[$i]] as $key => $value) { if (isset($attr[$key])) { continue; } // also catches more inclusions $attr[$key] = $value; } if (isset($this->info[$merge[$i]][0])) { // recursion $merge = array_merge($merge, $this->info[$merge[$i]][0]); } } unset($attr[0]); } /** * Expands all string identifiers in an attribute array by replacing * them with the appropriate values inside HTMLPurifier_AttrTypes * @param array &$attr Reference to attribute array * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance */ public function expandIdentifiers(&$attr, $attr_types) { // because foreach will process new elements we add, make sure we // skip duplicates $processed = array(); foreach ($attr as $def_i => $def) { // skip inclusions if ($def_i === 0) { continue; } if (isset($processed[$def_i])) { continue; } // determine whether or not attribute is required if ($required = (strpos($def_i, '*') !== false)) { // rename the definition unset($attr[$def_i]); $def_i = trim($def_i, '*'); $attr[$def_i] = $def; } $processed[$def_i] = true; // if we've already got a literal object, move on if (is_object($def)) { // preserve previous required $attr[$def_i]->required = ($required || $attr[$def_i]->required); continue; } if ($def === false) { unset($attr[$def_i]); continue; } if ($t = $attr_types->get($def)) { $attr[$def_i] = $t; $attr[$def_i]->required = $required; } else { unset($attr[$def_i]); } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef.php0000644000175000017500000001211313512700112017725 0ustar ezyangezyang by removing * leading and trailing whitespace, ignoring line feeds, and replacing * carriage returns and tabs with spaces. While most useful for HTML * attributes specified as CDATA, it can also be applied to most CSS * values. * * @note This method is not entirely standards compliant, as trim() removes * more types of whitespace than specified in the spec. In practice, * this is rarely a problem, as those extra characters usually have * already been removed by HTMLPurifier_Encoder. * * @warning This processing is inconsistent with XML's whitespace handling * as specified by section 3.3.3 and referenced XHTML 1.0 section * 4.7. However, note that we are NOT necessarily * parsing XML, thus, this behavior may still be correct. We * assume that newlines have been normalized. */ public function parseCDATA($string) { $string = trim($string); $string = str_replace(array("\n", "\t", "\r"), ' ', $string); return $string; } /** * Factory method for creating this class from a string. * @param string $string String construction info * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string */ public function make($string) { // default implementation, return a flyweight of this object. // If $string has an effect on the returned object (i.e. you // need to overload this method), it is best // to clone or instantiate new copies. (Instantiation is safer.) return $this; } /** * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work * properly. THIS IS A HACK! * @param string $string a CSS colour definition * @return string */ protected function mungeRgb($string) { $p = '\s*(\d+(\.\d+)?([%]?))\s*'; if (preg_match('/(rgba|hsla)\(/', $string)) { return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); } return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); } /** * Parses a possibly escaped CSS string and returns the "pure" * version of it. */ protected function expandCSSEscape($string) { // flexibly parse it $ret = ''; for ($i = 0, $c = strlen($string); $i < $c; $i++) { if ($string[$i] === '\\') { $i++; if ($i >= $c) { $ret .= '\\'; break; } if (ctype_xdigit($string[$i])) { $code = $string[$i]; for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { if (!ctype_xdigit($string[$i])) { break; } $code .= $string[$i]; } // We have to be extremely careful when adding // new characters, to make sure we're not breaking // the encoding. $char = HTMLPurifier_Encoder::unichr(hexdec($code)); if (HTMLPurifier_Encoder::cleanUTF8($char) === '') { continue; } $ret .= $char; if ($i < $c && trim($string[$i]) !== '') { $i--; } continue; } if ($string[$i] === "\n") { continue; } } $ret .= $string[$i]; } return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform.php0000644000175000017500000000370513512700112021211 0ustar ezyangezyanginfo['Enum'] = new HTMLPurifier_AttrDef_Enum(); $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text(); $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID(); $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length(); $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength(); $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens(); $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels(); $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); $this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right'); $this->info['LAlign'] = self::makeEnum('top,bottom,left,right'); $this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget(); // unimplemented aliases $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text(); $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text(); $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text(); $this->info['Character'] = new HTMLPurifier_AttrDef_Text(); // "proprietary" types $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); // number is really a positive integer (one or more digits) // FIXME: ^^ not always, see start and value of list items $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); } private static function makeEnum($in) { return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in))); } /** * Retrieves a type * @param string $type String type name * @return HTMLPurifier_AttrDef Object AttrDef for type */ public function get($type) { // determine if there is any extra info tacked on if (strpos($type, '#') !== false) { list($type, $string) = explode('#', $type, 2); } else { $string = ''; } if (!isset($this->info[$type])) { trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR); return; } return $this->info[$type]->make($string); } /** * Sets a new implementation for a type * @param string $type String type name * @param HTMLPurifier_AttrDef $impl Object AttrDef for type */ public function set($type, $impl) { $this->info[$type] = $impl; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrValidator.php0000644000175000017500000001465413512700112021170 0ustar ezyangezyanggetHTMLDefinition(); $e =& $context->get('ErrorCollector', true); // initialize IDAccumulator if necessary $ok =& $context->get('IDAccumulator', true); if (!$ok) { $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); } // initialize CurrentToken if necessary $current_token =& $context->get('CurrentToken', true); if (!$current_token) { $context->register('CurrentToken', $token); } if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty ) { return; } // create alias to global definition array, see also $defs // DEFINITION CALL $d_defs = $definition->info_global_attr; // don't update token until the very end, to ensure an atomic update $attr = $token->attr; // do global transformations (pre) // nothing currently utilizes this foreach ($definition->info_attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // do local transformations only applicable to this element (pre) // ex.

to

foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // create alias to this element's attribute definition array, see // also $d_defs (global attribute definition array) // DEFINITION CALL $defs = $definition->info[$token->name]->attr; $attr_key = false; $context->register('CurrentAttr', $attr_key); // iterate through all the attribute keypairs // Watch out for name collisions: $key has previously been used foreach ($attr as $attr_key => $value) { // call the definition if (isset($defs[$attr_key])) { // there is a local definition defined if ($defs[$attr_key] === false) { // We've explicitly been told not to allow this element. // This is usually when there's a global definition // that must be overridden. // Theoretically speaking, we could have a // AttrDef_DenyAll, but this is faster! $result = false; } else { // validate according to the element's definition $result = $defs[$attr_key]->validate( $value, $config, $context ); } } elseif (isset($d_defs[$attr_key])) { // there is a global definition defined, validate according // to the global definition $result = $d_defs[$attr_key]->validate( $value, $config, $context ); } else { // system never heard of the attribute? DELETE! $result = false; } // put the results into effect if ($result === false || $result === null) { // this is a generic error message that should replaced // with more specific ones when possible if ($e) { $e->send(E_ERROR, 'AttrValidator: Attribute removed'); } // remove the attribute unset($attr[$attr_key]); } elseif (is_string($result)) { // generally, if a substitution is happening, there // was some sort of implicit correction going on. We'll // delegate it to the attribute classes to say exactly what. // simple substitution $attr[$attr_key] = $result; } else { // nothing happens } // we'd also want slightly more complicated substitution // involving an array as the return value, // although we're not sure how colliding attributes would // resolve (certain ones would be completely overriden, // others would prepend themselves). } $context->destroy('CurrentAttr'); // post transforms // global (error reporting untested) foreach ($definition->info_attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // local (error reporting untested) foreach ($definition->info[$token->name]->attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } $token->attr = $attr; // destroy CurrentToken if we made it ourselves if (!$current_token) { $context->destroy('CurrentToken'); } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Bootstrap.php0000644000175000017500000001100513512700112020350 0ustar ezyangezyang if (!defined('PHP_EOL')) { switch (strtoupper(substr(PHP_OS, 0, 3))) { case 'WIN': define('PHP_EOL', "\r\n"); break; case 'DAR': define('PHP_EOL', "\r"); break; default: define('PHP_EOL', "\n"); } } /** * Bootstrap class that contains meta-functionality for HTML Purifier such as * the autoload function. * * @note * This class may be used without any other files from HTML Purifier. */ class HTMLPurifier_Bootstrap { /** * Autoload function for HTML Purifier * @param string $class Class to load * @return bool */ public static function autoload($class) { $file = HTMLPurifier_Bootstrap::getPath($class); if (!$file) { return false; } // Technically speaking, it should be ok and more efficient to // just do 'require', but Antonio Parraga reports that with // Zend extensions such as Zend debugger and APC, this invariant // may be broken. Since we have efficient alternatives, pay // the cost here and avoid the bug. require_once HTMLPURIFIER_PREFIX . '/' . $file; return true; } /** * Returns the path for a specific class. * @param string $class Class path to get * @return string */ public static function getPath($class) { if (strncmp('HTMLPurifier', $class, 12) !== 0) { return false; } // Custom implementations if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) { $code = str_replace('_', '-', substr($class, 22)); $file = 'HTMLPurifier/Language/classes/' . $code . '.php'; } else { $file = str_replace('_', '/', $class) . '.php'; } if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) { return false; } return $file; } /** * "Pre-registers" our autoloader on the SPL stack. */ public static function registerAutoload() { $autoload = array('HTMLPurifier_Bootstrap', 'autoload'); if (($funcs = spl_autoload_functions()) === false) { spl_autoload_register($autoload); } elseif (function_exists('spl_autoload_unregister')) { if (version_compare(PHP_VERSION, '5.3.0', '>=')) { // prepend flag exists, no need for shenanigans spl_autoload_register($autoload, true, true); } else { $buggy = version_compare(PHP_VERSION, '5.2.11', '<'); $compat = version_compare(PHP_VERSION, '5.1.2', '<=') && version_compare(PHP_VERSION, '5.1.0', '>='); foreach ($funcs as $func) { if ($buggy && is_array($func)) { // :TRICKY: There are some compatibility issues and some // places where we need to error out $reflector = new ReflectionMethod($func[0], $func[1]); if (!$reflector->isStatic()) { throw new Exception( 'HTML Purifier autoloader registrar is not compatible with non-static object methods due to PHP Bug #44144; Please do not use HTMLPurifier.autoload.php (or any file that includes this file); instead, place the code: spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\')) after your own autoloaders.' ); } // Suprisingly, spl_autoload_register supports the // Class::staticMethod callback format, although call_user_func doesn't if ($compat) { $func = implode('::', $func); } } spl_autoload_unregister($func); } spl_autoload_register($autoload); foreach ($funcs as $func) { spl_autoload_register($func); } } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef.php0000644000175000017500000000302713512700112020042 0ustar ezyangezyangelements; } /** * Validates nodes according to definition and returns modification. * * @param HTMLPurifier_Node[] $children Array of HTMLPurifier_Node * @param HTMLPurifier_Config $config HTMLPurifier_Config object * @param HTMLPurifier_Context $context HTMLPurifier_Context object * @return bool|array true to leave nodes as is, false to remove parent node, array of replacement children */ abstract public function validateChildren($children, $config, $context); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Config.php0000644000175000017500000007560413512700112017617 0ustar ezyangezyangdefaultPlist; $this->plist = new HTMLPurifier_PropertyList($parent); $this->def = $definition; // keep a copy around for checking $this->parser = new HTMLPurifier_VarParser_Flexible(); } /** * Convenience constructor that creates a config object based on a mixed var * @param mixed $config Variable that defines the state of the config * object. Can be: a HTMLPurifier_Config() object, * an array of directives based on loadArray(), * or a string filename of an ini file. * @param HTMLPurifier_ConfigSchema $schema Schema object * @return HTMLPurifier_Config Configured object */ public static function create($config, $schema = null) { if ($config instanceof HTMLPurifier_Config) { // pass-through return $config; } if (!$schema) { $ret = HTMLPurifier_Config::createDefault(); } else { $ret = new HTMLPurifier_Config($schema); } if (is_string($config)) { $ret->loadIni($config); } elseif (is_array($config)) $ret->loadArray($config); return $ret; } /** * Creates a new config object that inherits from a previous one. * @param HTMLPurifier_Config $config Configuration object to inherit from. * @return HTMLPurifier_Config object with $config as its parent. */ public static function inherit(HTMLPurifier_Config $config) { return new HTMLPurifier_Config($config->def, $config->plist); } /** * Convenience constructor that creates a default configuration object. * @return HTMLPurifier_Config default object. */ public static function createDefault() { $definition = HTMLPurifier_ConfigSchema::instance(); $config = new HTMLPurifier_Config($definition); return $config; } /** * Retrieves a value from the configuration. * * @param string $key String key * @param mixed $a * * @return mixed */ public function get($key, $a = null) { if ($a !== null) { $this->triggerError( "Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING ); $key = "$key.$a"; } if (!$this->finalized) { $this->autoFinalize(); } if (!isset($this->def->info[$key])) { // can't add % due to SimpleTest bug $this->triggerError( 'Cannot retrieve value of undefined directive ' . htmlspecialchars($key), E_USER_WARNING ); return; } if (isset($this->def->info[$key]->isAlias)) { $d = $this->def->info[$key]; $this->triggerError( 'Cannot get value from aliased directive, use real name ' . $d->key, E_USER_ERROR ); return; } if ($this->lock) { list($ns) = explode('.', $key); if ($ns !== $this->lock) { $this->triggerError( 'Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method ' . 'is accessing directives that are not within its namespace', E_USER_ERROR ); return; } } return $this->plist->get($key); } /** * Retrieves an array of directives to values from a given namespace * * @param string $namespace String namespace * * @return array */ public function getBatch($namespace) { if (!$this->finalized) { $this->autoFinalize(); } $full = $this->getAll(); if (!isset($full[$namespace])) { $this->triggerError( 'Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), E_USER_WARNING ); return; } return $full[$namespace]; } /** * Returns a SHA-1 signature of a segment of the configuration object * that uniquely identifies that particular configuration * * @param string $namespace Namespace to get serial for * * @return string * @note Revision is handled specially and is removed from the batch * before processing! */ public function getBatchSerial($namespace) { if (empty($this->serials[$namespace])) { $batch = $this->getBatch($namespace); unset($batch['DefinitionRev']); $this->serials[$namespace] = sha1(serialize($batch)); } return $this->serials[$namespace]; } /** * Returns a SHA-1 signature for the entire configuration object * that uniquely identifies that particular configuration * * @return string */ public function getSerial() { if (empty($this->serial)) { $this->serial = sha1(serialize($this->getAll())); } return $this->serial; } /** * Retrieves all directives, organized by namespace * * @warning This is a pretty inefficient function, avoid if you can */ public function getAll() { if (!$this->finalized) { $this->autoFinalize(); } $ret = array(); foreach ($this->plist->squash() as $name => $value) { list($ns, $key) = explode('.', $name, 2); $ret[$ns][$key] = $value; } return $ret; } /** * Sets a value to configuration. * * @param string $key key * @param mixed $value value * @param mixed $a */ public function set($key, $value, $a = null) { if (strpos($key, '.') === false) { $namespace = $key; $directive = $value; $value = $a; $key = "$key.$directive"; $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); } else { list($namespace) = explode('.', $key); } if ($this->isFinalized('Cannot set directive after finalization')) { return; } if (!isset($this->def->info[$key])) { $this->triggerError( 'Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', E_USER_WARNING ); return; } $def = $this->def->info[$key]; if (isset($def->isAlias)) { if ($this->aliasMode) { $this->triggerError( 'Double-aliases not allowed, please fix '. 'ConfigSchema bug with' . $key, E_USER_ERROR ); return; } $this->aliasMode = true; $this->set($def->key, $value); $this->aliasMode = false; $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); return; } // Raw type might be negative when using the fully optimized form // of stdClass, which indicates allow_null == true $rtype = is_int($def) ? $def : $def->type; if ($rtype < 0) { $type = -$rtype; $allow_null = true; } else { $type = $rtype; $allow_null = isset($def->allow_null); } try { $value = $this->parser->parse($value, $type, $allow_null); } catch (HTMLPurifier_VarParserException $e) { $this->triggerError( 'Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING ); return; } if (is_string($value) && is_object($def)) { // resolve value alias if defined if (isset($def->aliases[$value])) { $value = $def->aliases[$value]; } // check to see if the value is allowed if (isset($def->allowed) && !isset($def->allowed[$value])) { $this->triggerError( 'Value not supported, valid values are: ' . $this->_listify($def->allowed), E_USER_WARNING ); return; } } $this->plist->set($key, $value); // reset definitions if the directives they depend on changed // this is a very costly process, so it's discouraged // with finalization if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { $this->definitions[$namespace] = null; } $this->serials[$namespace] = false; } /** * Convenience function for error reporting * * @param array $lookup * * @return string */ private function _listify($lookup) { $list = array(); foreach ($lookup as $name => $b) { $list[] = $name; } return implode(', ', $list); } /** * Retrieves object reference to the HTML definition. * * @param bool $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. * @param bool $optimized If true, this method may return null, to * indicate that a cached version of the modified * definition object is available and no further edits * are necessary. Consider using * maybeGetRawHTMLDefinition, which is more explicitly * named, instead. * * @return HTMLPurifier_HTMLDefinition */ public function getHTMLDefinition($raw = false, $optimized = false) { return $this->getDefinition('HTML', $raw, $optimized); } /** * Retrieves object reference to the CSS definition * * @param bool $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. * @param bool $optimized If true, this method may return null, to * indicate that a cached version of the modified * definition object is available and no further edits * are necessary. Consider using * maybeGetRawCSSDefinition, which is more explicitly * named, instead. * * @return HTMLPurifier_CSSDefinition */ public function getCSSDefinition($raw = false, $optimized = false) { return $this->getDefinition('CSS', $raw, $optimized); } /** * Retrieves object reference to the URI definition * * @param bool $raw Return a copy that has not been setup yet. Must be * called before it's been setup, otherwise won't work. * @param bool $optimized If true, this method may return null, to * indicate that a cached version of the modified * definition object is available and no further edits * are necessary. Consider using * maybeGetRawURIDefinition, which is more explicitly * named, instead. * * @return HTMLPurifier_URIDefinition */ public function getURIDefinition($raw = false, $optimized = false) { return $this->getDefinition('URI', $raw, $optimized); } /** * Retrieves a definition * * @param string $type Type of definition: HTML, CSS, etc * @param bool $raw Whether or not definition should be returned raw * @param bool $optimized Only has an effect when $raw is true. Whether * or not to return null if the result is already present in * the cache. This is off by default for backwards * compatibility reasons, but you need to do things this * way in order to ensure that caching is done properly. * Check out enduser-customize.html for more details. * We probably won't ever change this default, as much as the * maybe semantics is the "right thing to do." * * @throws HTMLPurifier_Exception * @return HTMLPurifier_Definition */ public function getDefinition($type, $raw = false, $optimized = false) { if ($optimized && !$raw) { throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false"); } if (!$this->finalized) { $this->autoFinalize(); } // temporarily suspend locks, so we can handle recursive definition calls $lock = $this->lock; $this->lock = null; $factory = HTMLPurifier_DefinitionCacheFactory::instance(); $cache = $factory->create($type, $this); $this->lock = $lock; if (!$raw) { // full definition // --------------- // check if definition is in memory if (!empty($this->definitions[$type])) { $def = $this->definitions[$type]; // check if the definition is setup if ($def->setup) { return $def; } else { $def->setup($this); if ($def->optimized) { $cache->add($def, $this); } return $def; } } // check if definition is in cache $def = $cache->get($this); if ($def) { // definition in cache, save to memory and return it $this->definitions[$type] = $def; return $def; } // initialize it $def = $this->initDefinition($type); // set it up $this->lock = $type; $def->setup($this); $this->lock = null; // save in cache $cache->add($def, $this); // return it return $def; } else { // raw definition // -------------- // check preconditions $def = null; if ($optimized) { if (is_null($this->get($type . '.DefinitionID'))) { // fatally error out if definition ID not set throw new HTMLPurifier_Exception( "Cannot retrieve raw version without specifying %$type.DefinitionID" ); } } if (!empty($this->definitions[$type])) { $def = $this->definitions[$type]; if ($def->setup && !$optimized) { $extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : ""; throw new HTMLPurifier_Exception( "Cannot retrieve raw definition after it has already been setup" . $extra ); } if ($def->optimized === null) { $extra = $this->chatty ? " (try flushing your cache)" : ""; throw new HTMLPurifier_Exception( "Optimization status of definition is unknown" . $extra ); } if ($def->optimized !== $optimized) { $msg = $optimized ? "optimized" : "unoptimized"; $extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : ""; throw new HTMLPurifier_Exception( "Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra ); } } // check if definition was in memory if ($def) { if ($def->setup) { // invariant: $optimized === true (checked above) return null; } else { return $def; } } // if optimized, check if definition was in cache // (because we do the memory check first, this formulation // is prone to cache slamming, but I think // guaranteeing that either /all/ of the raw // setup code or /none/ of it is run is more important.) if ($optimized) { // This code path only gets run once; once we put // something in $definitions (which is guaranteed by the // trailing code), we always short-circuit above. $def = $cache->get($this); if ($def) { // save the full definition for later, but don't // return it yet $this->definitions[$type] = $def; return null; } } // check invariants for creation if (!$optimized) { if (!is_null($this->get($type . '.DefinitionID'))) { if ($this->chatty) { $this->triggerError( 'Due to a documentation error in previous version of HTML Purifier, your ' . 'definitions are not being cached. If this is OK, you can remove the ' . '%$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, ' . 'modify your code to use maybeGetRawDefinition, and test if the returned ' . 'value is null before making any edits (if it is null, that means that a ' . 'cached version is available, and no raw operations are necessary). See ' . '' . 'Customize for more details', E_USER_WARNING ); } else { $this->triggerError( "Useless DefinitionID declaration", E_USER_WARNING ); } } } // initialize it $def = $this->initDefinition($type); $def->optimized = $optimized; return $def; } throw new HTMLPurifier_Exception("The impossible happened!"); } /** * Initialise definition * * @param string $type What type of definition to create * * @return HTMLPurifier_CSSDefinition|HTMLPurifier_HTMLDefinition|HTMLPurifier_URIDefinition * @throws HTMLPurifier_Exception */ private function initDefinition($type) { // quick checks failed, let's create the object if ($type == 'HTML') { $def = new HTMLPurifier_HTMLDefinition(); } elseif ($type == 'CSS') { $def = new HTMLPurifier_CSSDefinition(); } elseif ($type == 'URI') { $def = new HTMLPurifier_URIDefinition(); } else { throw new HTMLPurifier_Exception( "Definition of $type type not supported" ); } $this->definitions[$type] = $def; return $def; } public function maybeGetRawDefinition($name) { return $this->getDefinition($name, true, true); } /** * @return HTMLPurifier_HTMLDefinition */ public function maybeGetRawHTMLDefinition() { return $this->getDefinition('HTML', true, true); } /** * @return HTMLPurifier_CSSDefinition */ public function maybeGetRawCSSDefinition() { return $this->getDefinition('CSS', true, true); } /** * @return HTMLPurifier_URIDefinition */ public function maybeGetRawURIDefinition() { return $this->getDefinition('URI', true, true); } /** * Loads configuration values from an array with the following structure: * Namespace.Directive => Value * * @param array $config_array Configuration associative array */ public function loadArray($config_array) { if ($this->isFinalized('Cannot load directives after finalization')) { return; } foreach ($config_array as $key => $value) { $key = str_replace('_', '.', $key); if (strpos($key, '.') !== false) { $this->set($key, $value); } else { $namespace = $key; $namespace_values = $value; foreach ($namespace_values as $directive => $value2) { $this->set($namespace .'.'. $directive, $value2); } } } } /** * Returns a list of array(namespace, directive) for all directives * that are allowed in a web-form context as per an allowed * namespaces/directives list. * * @param array $allowed List of allowed namespaces/directives * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy * * @return array */ public static function getAllowedDirectivesForForm($allowed, $schema = null) { if (!$schema) { $schema = HTMLPurifier_ConfigSchema::instance(); } if ($allowed !== true) { if (is_string($allowed)) { $allowed = array($allowed); } $allowed_ns = array(); $allowed_directives = array(); $blacklisted_directives = array(); foreach ($allowed as $ns_or_directive) { if (strpos($ns_or_directive, '.') !== false) { // directive if ($ns_or_directive[0] == '-') { $blacklisted_directives[substr($ns_or_directive, 1)] = true; } else { $allowed_directives[$ns_or_directive] = true; } } else { // namespace $allowed_ns[$ns_or_directive] = true; } } } $ret = array(); foreach ($schema->info as $key => $def) { list($ns, $directive) = explode('.', $key, 2); if ($allowed !== true) { if (isset($blacklisted_directives["$ns.$directive"])) { continue; } if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) { continue; } } if (isset($def->isAlias)) { continue; } if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') { continue; } $ret[] = array($ns, $directive); } return $ret; } /** * Loads configuration values from $_GET/$_POST that were posted * via ConfigForm * * @param array $array $_GET or $_POST array to import * @param string|bool $index Index/name that the config variables are in * @param array|bool $allowed List of allowed namespaces/directives * @param bool $mq_fix Boolean whether or not to enable magic quotes fix * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy * * @return mixed */ public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); $config = HTMLPurifier_Config::create($ret, $schema); return $config; } /** * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. * * @param array $array $_GET or $_POST array to import * @param string|bool $index Index/name that the config variables are in * @param array|bool $allowed List of allowed namespaces/directives * @param bool $mq_fix Boolean whether or not to enable magic quotes fix */ public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); $this->loadArray($ret); } /** * Prepares an array from a form into something usable for the more * strict parts of HTMLPurifier_Config * * @param array $array $_GET or $_POST array to import * @param string|bool $index Index/name that the config variables are in * @param array|bool $allowed List of allowed namespaces/directives * @param bool $mq_fix Boolean whether or not to enable magic quotes fix * @param HTMLPurifier_ConfigSchema $schema Schema to use, if not global copy * * @return array */ public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { if ($index !== false) { $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); } $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); $ret = array(); foreach ($allowed as $key) { list($ns, $directive) = $key; $skey = "$ns.$directive"; if (!empty($array["Null_$skey"])) { $ret[$ns][$directive] = null; continue; } if (!isset($array[$skey])) { continue; } $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; $ret[$ns][$directive] = $value; } return $ret; } /** * Loads configuration values from an ini file * * @param string $filename Name of ini file */ public function loadIni($filename) { if ($this->isFinalized('Cannot load directives after finalization')) { return; } $array = parse_ini_file($filename, true); $this->loadArray($array); } /** * Checks whether or not the configuration object is finalized. * * @param string|bool $error String error message, or false for no error * * @return bool */ public function isFinalized($error = false) { if ($this->finalized && $error) { $this->triggerError($error, E_USER_ERROR); } return $this->finalized; } /** * Finalizes configuration only if auto finalize is on and not * already finalized */ public function autoFinalize() { if ($this->autoFinalize) { $this->finalize(); } else { $this->plist->squash(true); } } /** * Finalizes a configuration object, prohibiting further change */ public function finalize() { $this->finalized = true; $this->parser = null; } /** * Produces a nicely formatted error message by supplying the * stack frame information OUTSIDE of HTMLPurifier_Config. * * @param string $msg An error message * @param int $no An error number */ protected function triggerError($msg, $no) { // determine previous stack frame $extra = ''; if ($this->chatty) { $trace = debug_backtrace(); // zip(tail(trace), trace) -- but PHP is not Haskell har har for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { // XXX this is not correct on some versions of HTML Purifier if (isset($trace[$i + 1]['class']) && $trace[$i + 1]['class'] === 'HTMLPurifier_Config') { continue; } $frame = $trace[$i]; $extra = " invoked on line {$frame['line']} in file {$frame['file']}"; break; } } trigger_error($msg . $extra, $no); } /** * Returns a serialized form of the configuration object that can * be reconstituted. * * @return string */ public function serialize() { $this->getDefinition('HTML'); $this->getDefinition('CSS'); $this->getDefinition('URI'); return serialize($this); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema.php0000644000175000017500000001341713512700112020732 0ustar ezyangezyang array( * 'Directive' => new stdClass(), * ) * ) * * The stdClass may have the following properties: * * - If isAlias isn't set: * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions * - allow_null: If set, this directive allows null values * - aliases: If set, an associative array of value aliases to real values * - allowed: If set, a lookup array of allowed (string) values * - If isAlias is set: * - namespace: Namespace this directive aliases to * - name: Directive name this directive aliases to * * In certain degenerate cases, stdClass will actually be an integer. In * that case, the value is equivalent to an stdClass with the type * property set to the integer. If the integer is negative, type is * equal to the absolute value of integer, and allow_null is true. * * This class is friendly with HTMLPurifier_Config. If you need introspection * about the schema, you're better of using the ConfigSchema_Interchange, * which uses more memory but has much richer information. * @type array */ public $info = array(); /** * Application-wide singleton * @type HTMLPurifier_ConfigSchema */ protected static $singleton; public function __construct() { $this->defaultPlist = new HTMLPurifier_PropertyList(); } /** * Unserializes the default ConfigSchema. * @return HTMLPurifier_ConfigSchema */ public static function makeFromSerial() { $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'); $r = unserialize($contents); if (!$r) { $hash = sha1($contents); trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR); } return $r; } /** * Retrieves an instance of the application-wide configuration definition. * @param HTMLPurifier_ConfigSchema $prototype * @return HTMLPurifier_ConfigSchema */ public static function instance($prototype = null) { if ($prototype !== null) { HTMLPurifier_ConfigSchema::$singleton = $prototype; } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) { HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial(); } return HTMLPurifier_ConfigSchema::$singleton; } /** * Defines a directive for configuration * @warning Will fail of directive's namespace is defined. * @warning This method's signature is slightly different from the legacy * define() static method! Beware! * @param string $key Name of directive * @param mixed $default Default value of directive * @param string $type Allowed type of the directive. See * HTMLPurifier_VarParser::$types for allowed values * @param bool $allow_null Whether or not to allow null values */ public function add($key, $default, $type, $allow_null) { $obj = new stdClass(); $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; if ($allow_null) { $obj->allow_null = true; } $this->info[$key] = $obj; $this->defaults[$key] = $default; $this->defaultPlist->set($key, $default); } /** * Defines a directive value alias. * * Directive value aliases are convenient for developers because it lets * them set a directive to several values and get the same result. * @param string $key Name of Directive * @param array $aliases Hash of aliased values to the real alias */ public function addValueAliases($key, $aliases) { if (!isset($this->info[$key]->aliases)) { $this->info[$key]->aliases = array(); } foreach ($aliases as $alias => $real) { $this->info[$key]->aliases[$alias] = $real; } } /** * Defines a set of allowed values for a directive. * @warning This is slightly different from the corresponding static * method definition. * @param string $key Name of directive * @param array $allowed Lookup array of allowed values */ public function addAllowedValues($key, $allowed) { $this->info[$key]->allowed = $allowed; } /** * Defines a directive alias for backwards compatibility * @param string $key Directive that will be aliased * @param string $new_key Directive that the alias will be to */ public function addAlias($key, $new_key) { $obj = new stdClass; $obj->key = $new_key; $obj->isAlias = true; $this->info[$key] = $obj; } /** * Replaces any stdClass that only has the type property with type integer. */ public function postProcess() { foreach ($this->info as $key => $v) { if (count((array) $v) == 1) { $this->info[$key] = $v->type; } elseif (count((array) $v) == 2 && isset($v->allow_null)) { $this->info[$key] = -$v->type; } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ContentSets.php0000644000175000017500000001301013512700112020642 0ustar ezyangezyang true) indexed by name. * @type array * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets */ public $lookup = array(); /** * Synchronized list of defined content sets (keys of info). * @type array */ protected $keys = array(); /** * Synchronized list of defined content values (values of info). * @type array */ protected $values = array(); /** * Merges in module's content sets, expands identifiers in the content * sets and populates the keys, values and lookup member variables. * @param HTMLPurifier_HTMLModule[] $modules List of HTMLPurifier_HTMLModule */ public function __construct($modules) { if (!is_array($modules)) { $modules = array($modules); } // populate content_sets based on module hints // sorry, no way of overloading foreach ($modules as $module) { foreach ($module->content_sets as $key => $value) { $temp = $this->convertToLookup($value); if (isset($this->lookup[$key])) { // add it into the existing content set $this->lookup[$key] = array_merge($this->lookup[$key], $temp); } else { $this->lookup[$key] = $temp; } } } $old_lookup = false; while ($old_lookup !== $this->lookup) { $old_lookup = $this->lookup; foreach ($this->lookup as $i => $set) { $add = array(); foreach ($set as $element => $x) { if (isset($this->lookup[$element])) { $add += $this->lookup[$element]; unset($this->lookup[$i][$element]); } } $this->lookup[$i] += $add; } } foreach ($this->lookup as $key => $lookup) { $this->info[$key] = implode(' | ', array_keys($lookup)); } $this->keys = array_keys($this->info); $this->values = array_values($this->info); } /** * Accepts a definition; generates and assigns a ChildDef for it * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef reference * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef */ public function generateChildDef(&$def, $module) { if (!empty($def->child)) { // already done! return; } $content_model = $def->content_model; if (is_string($content_model)) { // Assume that $this->keys is alphanumeric $def->content_model = preg_replace_callback( '/\b(' . implode('|', $this->keys) . ')\b/', array($this, 'generateChildDefCallback'), $content_model ); //$def->content_model = str_replace( // $this->keys, $this->values, $content_model); } $def->child = $this->getChildDef($def, $module); } public function generateChildDefCallback($matches) { return $this->info[$matches[0]]; } /** * Instantiates a ChildDef based on content_model and content_model_type * member variables in HTMLPurifier_ElementDef * @note This will also defer to modules for custom HTMLPurifier_ChildDef * subclasses that need content set expansion * @param HTMLPurifier_ElementDef $def HTMLPurifier_ElementDef to have ChildDef extracted * @param HTMLPurifier_HTMLModule $module Module that defined the ElementDef * @return HTMLPurifier_ChildDef corresponding to ElementDef */ public function getChildDef($def, $module) { $value = $def->content_model; if (is_object($value)) { trigger_error( 'Literal object child definitions should be stored in '. 'ElementDef->child not ElementDef->content_model', E_USER_NOTICE ); return $value; } switch ($def->content_model_type) { case 'required': return new HTMLPurifier_ChildDef_Required($value); case 'optional': return new HTMLPurifier_ChildDef_Optional($value); case 'empty': return new HTMLPurifier_ChildDef_Empty(); case 'custom': return new HTMLPurifier_ChildDef_Custom($value); } // defer to its module $return = false; if ($module->defines_child_def) { // save a func call $return = $module->getChildDef($def); } if ($return !== false) { return $return; } // error-out trigger_error( 'Could not determine which ChildDef class to instantiate', E_USER_ERROR ); return false; } /** * Converts a string list of elements separated by pipes into * a lookup array. * @param string $string List of elements * @return array Lookup array of elements */ protected function convertToLookup($string) { $array = explode('|', str_replace(' ', '', $string)); $ret = array(); foreach ($array as $k) { $ret[$k] = true; } return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Context.php0000644000175000017500000000511213512700112020021 0ustar ezyangezyang_storage)) { trigger_error( "Name $name produces collision, cannot re-register", E_USER_ERROR ); return; } $this->_storage[$name] =& $ref; } /** * Retrieves a variable reference from the context. * @param string $name String name * @param bool $ignore_error Boolean whether or not to ignore error * @return mixed */ public function &get($name, $ignore_error = false) { if (!array_key_exists($name, $this->_storage)) { if (!$ignore_error) { trigger_error( "Attempted to retrieve non-existent variable $name", E_USER_ERROR ); } $var = null; // so we can return by reference return $var; } return $this->_storage[$name]; } /** * Destroys a variable in the context. * @param string $name String name */ public function destroy($name) { if (!array_key_exists($name, $this->_storage)) { trigger_error( "Attempted to destroy non-existent variable $name", E_USER_ERROR ); return; } unset($this->_storage[$name]); } /** * Checks whether or not the variable exists. * @param string $name String name * @return bool */ public function exists($name) { return array_key_exists($name, $this->_storage); } /** * Loads a series of variables from an associative array * @param array $context_array Assoc array of variables to load */ public function loadArray($context_array) { foreach ($context_array as $key => $discard) { $this->register($key, $context_array[$key]); } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/CSSDefinition.php0000644000175000017500000004517513512700112021053 0ustar ezyangezyanginfo['text-align'] = new HTMLPurifier_AttrDef_Enum( array('left', 'right', 'center', 'justify'), false ); $border_style = $this->info['border-bottom-style'] = $this->info['border-right-style'] = $this->info['border-left-style'] = $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( array( 'none', 'hidden', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge', 'inset', 'outset' ), false ); $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right', 'both'), false ); $this->info['float'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right'), false ); $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'italic', 'oblique'), false ); $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'small-caps'), false ); $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('none')), new HTMLPurifier_AttrDef_CSS_URI() ) ); $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( array('inside', 'outside'), false ); $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( array( 'disc', 'circle', 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', 'upper-alpha', 'none' ), false ); $this->info['list-style-image'] = $uri_or_none; $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( array('capitalize', 'uppercase', 'lowercase', 'none'), false ); $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['background-image'] = $uri_or_none; $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') ); $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( array('scroll', 'fixed') ); $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); $border_color = $this->info['border-top-color'] = $this->info['border-bottom-color'] = $this->info['border-left-color'] = $this->info['border-right-color'] = $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('transparent')), new HTMLPurifier_AttrDef_CSS_Color() ) ); $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); $border_width = $this->info['border-top-width'] = $this->info['border-bottom-width'] = $this->info['border-left-width'] = $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative ) ); $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', 'larger', 'smaller' ) ), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $margin = $this->info['margin-top'] = $this->info['margin-bottom'] = $this->info['margin-left'] = $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ); $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); // non-negative $padding = $this->info['padding-top'] = $this->info['padding-bottom'] = $this->info['padding-left'] = $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit')) ) ); $trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ); $trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ); $max = $config->get('CSS.MaxImgLength'); $this->info['width'] = $this->info['height'] = $max === null ? $trusted_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ), // For everyone else: $trusted_wh ); $this->info['min-width'] = $this->info['min-height'] = $max === null ? $trusted_min_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ), // For everyone else: $trusted_min_wh ); $this->info['max-width'] = $this->info['max-height'] = $max === null ? $trusted_max_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ), // For everyone else: $trusted_max_wh ); $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); // this could use specialized code $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( array( 'normal', 'bold', 'bolder', 'lighter', '100', '200', '300', '400', '500', '600', '700', '800', '900' ), false ); // MUST be called after other font properties, as it references // a CSSDefinition object $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); // same here $this->info['border'] = $this->info['border-bottom'] = $this->info['border-top'] = $this->info['border-left'] = $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum( array('collapse', 'separate') ); $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum( array('top', 'bottom') ); $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum( array('auto', 'fixed') ); $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ) ), new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); // These CSS properties don't work on many browsers, but we live // in THE FUTURE! $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum( array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line') ); if ($config->get('CSS.Proprietary')) { $this->doSetupProprietary($config); } if ($config->get('CSS.AllowTricky')) { $this->doSetupTricky($config); } if ($config->get('CSS.Trusted')) { $this->doSetupTrusted($config); } $allow_important = $config->get('CSS.AllowImportant'); // wrap all attr-defs with decorator that handles !important foreach ($this->info as $k => $v) { $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); } $this->setupConfigStuff($config); } /** * @param HTMLPurifier_Config $config */ protected function doSetupProprietary($config) { // Internet Explorer only scrollbar colors $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); // vendor specific prefixes of opacity $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); // only opacity, for now $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); // more CSS3 $this->info['page-break-after'] = $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum( array( 'auto', 'always', 'avoid', 'left', 'right' ) ); $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid')); $border_radius = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative )); $this->info['border-top-left-radius'] = $this->info['border-top-right-radius'] = $this->info['border-bottom-right-radius'] = $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2); // TODO: support SLASH syntax $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTricky($config) { $this->info['display'] = new HTMLPurifier_AttrDef_Enum( array( 'inline', 'block', 'list-item', 'run-in', 'compact', 'marker', 'table', 'inline-block', 'inline-table', 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row', 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' ) ); $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum( array('visible', 'hidden', 'collapse') ); $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTrusted($config) { $this->info['position'] = new HTMLPurifier_AttrDef_Enum( array('static', 'relative', 'absolute', 'fixed') ); $this->info['top'] = $this->info['left'] = $this->info['right'] = $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Integer(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); } /** * Performs extra config-based processing. Based off of * HTMLPurifier_HTMLDefinition. * @param HTMLPurifier_Config $config * @todo Refactor duplicate elements into common class (probably using * composition, not inheritance). */ protected function setupConfigStuff($config) { // setup allowed elements $support = "(for information on implementing this, see the " . "support forums) "; $allowed_properties = $config->get('CSS.AllowedProperties'); if ($allowed_properties !== null) { foreach ($this->info as $name => $d) { if (!isset($allowed_properties[$name])) { unset($this->info[$name]); } unset($allowed_properties[$name]); } // emit errors foreach ($allowed_properties as $name => $d) { // :TODO: Is this htmlspecialchars() call really necessary? $name = htmlspecialchars($name); trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); } } $forbidden_properties = $config->get('CSS.ForbiddenProperties'); if ($forbidden_properties !== null) { foreach ($this->info as $name => $d) { if (isset($forbidden_properties[$name])) { unset($this->info[$name]); } } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Definition.php0000644000175000017500000000252113512700112020466 0ustar ezyangezyangsetup) { return; } $this->setup = true; $this->doSetup($config); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCache.php0000644000175000017500000000751113512700112021416 0ustar ezyangezyangtype = $type; } /** * Generates a unique identifier for a particular configuration * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @return string */ public function generateKey($config) { return $config->version . ',' . // possibly replace with function calls $config->getBatchSerial($this->type) . ',' . $config->get($this->type . '.DefinitionRev'); } /** * Tests whether or not a key is old with respect to the configuration's * version and revision number. * @param string $key Key to test * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against * @return bool */ public function isOld($key, $config) { if (substr_count($key, ',') < 2) { return true; } list($version, $hash, $revision) = explode(',', $key, 3); $compare = version_compare($version, $config->version); // version mismatch, is always old if ($compare != 0) { return true; } // versions match, ids match, check revision number if ($hash == $config->getBatchSerial($this->type) && $revision < $config->get($this->type . '.DefinitionRev')) { return true; } return false; } /** * Checks if a definition's type jives with the cache's type * @note Throws an error on failure * @param HTMLPurifier_Definition $def Definition object to check * @return bool true if good, false if not */ public function checkDefType($def) { if ($def->type !== $this->type) { trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}"); return false; } return true; } /** * Adds a definition object to the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function add($def, $config); /** * Unconditionally saves a definition object to the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function set($def, $config); /** * Replace an object in the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function replace($def, $config); /** * Retrieves a definition object from the cache * @param HTMLPurifier_Config $config */ abstract public function get($config); /** * Removes a definition object to the cache * @param HTMLPurifier_Config $config */ abstract public function remove($config); /** * Clears all objects from cache * @param HTMLPurifier_Config $config */ abstract public function flush($config); /** * Clears all expired (older version or revision) objects from cache * @note Be careful implementing this method as flush. Flush must * not interfere with other Definition types, and cleanup() * should not be repeatedly called by userland code. * @param HTMLPurifier_Config $config */ abstract public function cleanup($config); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCacheFactory.php0000644000175000017500000000620113512700112022741 0ustar ezyangezyang array()); /** * @type array */ protected $implementations = array(); /** * @type HTMLPurifier_DefinitionCache_Decorator[] */ protected $decorators = array(); /** * Initialize default decorators */ public function setup() { $this->addDecorator('Cleanup'); } /** * Retrieves an instance of global definition cache factory. * @param HTMLPurifier_DefinitionCacheFactory $prototype * @return HTMLPurifier_DefinitionCacheFactory */ public static function instance($prototype = null) { static $instance; if ($prototype !== null) { $instance = $prototype; } elseif ($instance === null || $prototype === true) { $instance = new HTMLPurifier_DefinitionCacheFactory(); $instance->setup(); } return $instance; } /** * Registers a new definition cache object * @param string $short Short name of cache object, for reference * @param string $long Full class name of cache object, for construction */ public function register($short, $long) { $this->implementations[$short] = $long; } /** * Factory method that creates a cache object based on configuration * @param string $type Name of definitions handled by cache * @param HTMLPurifier_Config $config Config instance * @return mixed */ public function create($type, $config) { $method = $config->get('Cache.DefinitionImpl'); if ($method === null) { return new HTMLPurifier_DefinitionCache_Null($type); } if (!empty($this->caches[$method][$type])) { return $this->caches[$method][$type]; } if (isset($this->implementations[$method]) && class_exists($class = $this->implementations[$method], false)) { $cache = new $class($type); } else { if ($method != 'Serializer') { trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); } $cache = new HTMLPurifier_DefinitionCache_Serializer($type); } foreach ($this->decorators as $decorator) { $new_cache = $decorator->decorate($cache); // prevent infinite recursion in PHP 4 unset($cache); $cache = $new_cache; } $this->caches[$method][$type] = $cache; return $this->caches[$method][$type]; } /** * Registers a decorator to add to all new cache objects * @param HTMLPurifier_DefinitionCache_Decorator|string $decorator An instance or the name of a decorator */ public function addDecorator($decorator) { if (is_string($decorator)) { $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; $decorator = new $class; } $this->decorators[$decorator->name] = $decorator; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Doctype.php0000644000175000017500000000305613512700112020011 0ustar ezyangezyangrenderDoctype. * If structure changes, please update that function. */ class HTMLPurifier_Doctype { /** * Full name of doctype * @type string */ public $name; /** * List of standard modules (string identifiers or literal objects) * that this doctype uses * @type array */ public $modules = array(); /** * List of modules to use for tidying up code * @type array */ public $tidyModules = array(); /** * Is the language derived from XML (i.e. XHTML)? * @type bool */ public $xml = true; /** * List of aliases for this doctype * @type array */ public $aliases = array(); /** * Public DTD identifier * @type string */ public $dtdPublic; /** * System DTD identifier * @type string */ public $dtdSystem; public function __construct( $name = null, $xml = true, $modules = array(), $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null ) { $this->name = $name; $this->xml = $xml; $this->modules = $modules; $this->tidyModules = $tidyModules; $this->aliases = $aliases; $this->dtdPublic = $dtd_public; $this->dtdSystem = $dtd_system; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DoctypeRegistry.php0000644000175000017500000001020013512700112021527 0ustar ezyangezyangdoctypes[$doctype->name] = $doctype; $name = $doctype->name; // hookup aliases foreach ($doctype->aliases as $alias) { if (isset($this->doctypes[$alias])) { continue; } $this->aliases[$alias] = $name; } // remove old aliases if (isset($this->aliases[$name])) { unset($this->aliases[$name]); } return $doctype; } /** * Retrieves reference to a doctype of a certain name * @note This function resolves aliases * @note When possible, use the more fully-featured make() * @param string $doctype Name of doctype * @return HTMLPurifier_Doctype Editable doctype object */ public function get($doctype) { if (isset($this->aliases[$doctype])) { $doctype = $this->aliases[$doctype]; } if (!isset($this->doctypes[$doctype])) { trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR); $anon = new HTMLPurifier_Doctype($doctype); return $anon; } return $this->doctypes[$doctype]; } /** * Creates a doctype based on a configuration object, * will perform initialization on the doctype * @note Use this function to get a copy of doctype that config * can hold on to (this is necessary in order to tell * Generator whether or not the current document is XML * based or not). * @param HTMLPurifier_Config $config * @return HTMLPurifier_Doctype */ public function make($config) { return clone $this->get($this->getDoctypeFromConfig($config)); } /** * Retrieves the doctype from the configuration object * @param HTMLPurifier_Config $config * @return string */ public function getDoctypeFromConfig($config) { // recommended test $doctype = $config->get('HTML.Doctype'); if (!empty($doctype)) { return $doctype; } $doctype = $config->get('HTML.CustomDoctype'); if (!empty($doctype)) { return $doctype; } // backwards-compatibility if ($config->get('HTML.XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; } if ($config->get('HTML.Strict')) { $doctype .= ' Strict'; } else { $doctype .= ' Transitional'; } return $doctype; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ElementDef.php0000644000175000017500000001654313512700112020417 0ustar ezyangezyangsetup(), this array may also * contain an array at index 0 that indicates which attribute * collections to load into the full array. It may also * contain string indentifiers in lieu of HTMLPurifier_AttrDef, * see HTMLPurifier_AttrTypes on how they are expanded during * HTMLPurifier_HTMLDefinition->setup() processing. */ public $attr = array(); // XXX: Design note: currently, it's not possible to override // previously defined AttrTransforms without messing around with // the final generated config. This is by design; a previous version // used an associated list of attr_transform, but it was extremely // easy to accidentally override other attribute transforms by // forgetting to specify an index (and just using 0.) While we // could check this by checking the index number and complaining, // there is a second problem which is that it is not at all easy to // tell when something is getting overridden. Combine this with a // codebase where this isn't really being used, and it's perfect for // nuking. /** * List of tags HTMLPurifier_AttrTransform to be done before validation. * @type array */ public $attr_transform_pre = array(); /** * List of tags HTMLPurifier_AttrTransform to be done after validation. * @type array */ public $attr_transform_post = array(); /** * HTMLPurifier_ChildDef of this tag. * @type HTMLPurifier_ChildDef */ public $child; /** * Abstract string representation of internal ChildDef rules. * @see HTMLPurifier_ContentSets for how this is parsed and then transformed * into an HTMLPurifier_ChildDef. * @warning This is a temporary variable that is not available after * being processed by HTMLDefinition * @type string */ public $content_model; /** * Value of $child->type, used to determine which ChildDef to use, * used in combination with $content_model. * @warning This must be lowercase * @warning This is a temporary variable that is not available after * being processed by HTMLDefinition * @type string */ public $content_model_type; /** * Does the element have a content model (#PCDATA | Inline)*? This * is important for chameleon ins and del processing in * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't * have to worry about this one. * @type bool */ public $descendants_are_inline = false; /** * List of the names of required attributes this element has. * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement() * @type array */ public $required_attr = array(); /** * Lookup table of tags excluded from all descendants of this tag. * @type array * @note SGML permits exclusions for all descendants, but this is * not possible with DTDs or XML Schemas. W3C has elected to * use complicated compositions of content_models to simulate * exclusion for children, but we go the simpler, SGML-style * route of flat-out exclusions, which correctly apply to * all descendants and not just children. Note that the XHTML * Modularization Abstract Modules are blithely unaware of such * distinctions. */ public $excludes = array(); /** * This tag is explicitly auto-closed by the following tags. * @type array */ public $autoclose = array(); /** * If a foreign element is found in this element, test if it is * allowed by this sub-element; if it is, instead of closing the * current element, place it inside this element. * @type string */ public $wrap; /** * Whether or not this is a formatting element affected by the * "Active Formatting Elements" algorithm. * @type bool */ public $formatting; /** * Low-level factory constructor for creating new standalone element defs */ public static function create($content_model, $content_model_type, $attr) { $def = new HTMLPurifier_ElementDef(); $def->content_model = $content_model; $def->content_model_type = $content_model_type; $def->attr = $attr; return $def; } /** * Merges the values of another element definition into this one. * Values from the new element def take precedence if a value is * not mergeable. * @param HTMLPurifier_ElementDef $def */ public function mergeIn($def) { // later keys takes precedence foreach ($def->attr as $k => $v) { if ($k === 0) { // merge in the includes // sorry, no way to override an include foreach ($v as $v2) { $this->attr[0][] = $v2; } continue; } if ($v === false) { if (isset($this->attr[$k])) { unset($this->attr[$k]); } continue; } $this->attr[$k] = $v; } $this->_mergeAssocArray($this->excludes, $def->excludes); $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre); $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post); if (!empty($def->content_model)) { $this->content_model = str_replace("#SUPER", $this->content_model, $def->content_model); $this->child = false; } if (!empty($def->content_model_type)) { $this->content_model_type = $def->content_model_type; $this->child = false; } if (!is_null($def->child)) { $this->child = $def->child; } if (!is_null($def->formatting)) { $this->formatting = $def->formatting; } if ($def->descendants_are_inline) { $this->descendants_are_inline = $def->descendants_are_inline; } } /** * Merges one array into another, removes values which equal false * @param $a1 Array by reference that is merged into * @param $a2 Array that merges into $a1 */ private function _mergeAssocArray(&$a1, $a2) { foreach ($a2 as $k => $v) { if ($v === false) { if (isset($a1[$k])) { unset($a1[$k]); } continue; } $a1[$k] = $v; } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Encoder.php0000644000175000017500000006205713512700112017767 0ustar ezyangezyang= $c) { $r .= self::unsafeIconv($in, $out, substr($text, $i)); break; } // wibble the boundary if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) { $chunk_size = $max_chunk_size; } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) { $chunk_size = $max_chunk_size - 1; } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) { $chunk_size = $max_chunk_size - 2; } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) { $chunk_size = $max_chunk_size - 3; } else { return false; // rather confusing UTF-8... } $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths $r .= self::unsafeIconv($in, $out, $chunk); $i += $chunk_size; } return $r; } else { return false; } } else { return false; } } /** * Cleans a UTF-8 string for well-formedness and SGML validity * * It will parse according to UTF-8 and return a valid UTF8 string, with * non-SGML codepoints excluded. * * Specifically, it will permit: * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF} * Source: https://www.w3.org/TR/REC-xml/#NT-Char * Arguably this function should be modernized to the HTML5 set * of allowed characters: * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream * which simultaneously expand and restrict the set of allowed characters. * * @param string $str The string to clean * @param bool $force_php * @return string * * @note Just for reference, the non-SGML code points are 0 to 31 and * 127 to 159, inclusive. However, we allow code points 9, 10 * and 13, which are the tab, line feed and carriage return * respectively. 128 and above the code points map to multibyte * UTF-8 representations. * * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and * hsivonen@iki.fi at under the * LGPL license. Notes on what changed are inside, but in general, * the original code transformed UTF-8 text into an array of integer * Unicode codepoints. Understandably, transforming that back to * a string would be somewhat expensive, so the function was modded to * directly operate on the string. However, this discourages code * reuse, and the logic enumerated here would be useful for any * function that needs to be able to understand UTF-8 characters. * As of right now, only smart lossless character encoding converters * would need that, and I'm probably not going to implement them. */ public static function cleanUTF8($str, $force_php = false) { // UTF-8 validity is checked since PHP 4.3.5 // This is an optimization: if the string is already valid UTF-8, no // need to do PHP stuff. 99% of the time, this will be the case. if (preg_match( '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str )) { return $str; } $mState = 0; // cached expected number of octets after the current octet // until the beginning of the next UTF8 character sequence $mUcs4 = 0; // cached Unicode character $mBytes = 1; // cached expected number of octets in the current sequence // original code involved an $out that was an array of Unicode // codepoints. Instead of having to convert back into UTF-8, we've // decided to directly append valid UTF-8 characters onto a string // $out once they're done. $char accumulates raw bytes, while $mUcs4 // turns into the Unicode code point, so there's some redundancy. $out = ''; $char = ''; $len = strlen($str); for ($i = 0; $i < $len; $i++) { $in = ord($str{$i}); $char .= $str[$i]; // append byte to char if (0 == $mState) { // When mState is zero we expect either a US-ASCII character // or a multi-octet sequence. if (0 == (0x80 & ($in))) { // US-ASCII, pass straight through. if (($in <= 31 || $in == 127) && !($in == 9 || $in == 13 || $in == 10) // save \r\t\n ) { // control characters, remove } else { $out .= $char; } // reset $char = ''; $mBytes = 1; } elseif (0xC0 == (0xE0 & ($in))) { // First octet of 2 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x1F) << 6; $mState = 1; $mBytes = 2; } elseif (0xE0 == (0xF0 & ($in))) { // First octet of 3 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x0F) << 12; $mState = 2; $mBytes = 3; } elseif (0xF0 == (0xF8 & ($in))) { // First octet of 4 octet sequence $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x07) << 18; $mState = 3; $mBytes = 4; } elseif (0xF8 == (0xFC & ($in))) { // First octet of 5 octet sequence. // // This is illegal because the encoded codepoint must be // either: // (a) not the shortest form or // (b) outside the Unicode range of 0-0x10FFFF. // Rather than trying to resynchronize, we will carry on // until the end of the sequence and let the later error // handling code catch it. $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 0x03) << 24; $mState = 4; $mBytes = 5; } elseif (0xFC == (0xFE & ($in))) { // First octet of 6 octet sequence, see comments for 5 // octet sequence. $mUcs4 = ($in); $mUcs4 = ($mUcs4 & 1) << 30; $mState = 5; $mBytes = 6; } else { // Current octet is neither in the US-ASCII range nor a // legal first octet of a multi-octet sequence. $mState = 0; $mUcs4 = 0; $mBytes = 1; $char = ''; } } else { // When mState is non-zero, we expect a continuation of the // multi-octet sequence if (0x80 == (0xC0 & ($in))) { // Legal continuation. $shift = ($mState - 1) * 6; $tmp = $in; $tmp = ($tmp & 0x0000003F) << $shift; $mUcs4 |= $tmp; if (0 == --$mState) { // End of the multi-octet sequence. mUcs4 now contains // the final Unicode codepoint to be output // Check for illegal sequences and codepoints. // From Unicode 3.1, non-shortest form is illegal if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || ((3 == $mBytes) && ($mUcs4 < 0x0800)) || ((4 == $mBytes) && ($mUcs4 < 0x10000)) || (4 < $mBytes) || // From Unicode 3.2, surrogate characters = illegal (($mUcs4 & 0xFFFFF800) == 0xD800) || // Codepoints outside the Unicode range are illegal ($mUcs4 > 0x10FFFF) ) { } elseif (0xFEFF != $mUcs4 && // omit BOM // check for valid Char unicode codepoints ( 0x9 == $mUcs4 || 0xA == $mUcs4 || 0xD == $mUcs4 || (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || // 7F-9F is not strictly prohibited by XML, // but it is non-SGML, and thus we don't allow it (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) || (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) ) ) { $out .= $char; } // initialize UTF8 cache (reset) $mState = 0; $mUcs4 = 0; $mBytes = 1; $char = ''; } } else { // ((0xC0 & (*in) != 0x80) && (mState != 0)) // Incomplete multi-octet sequence. // used to result in complete fail, but we'll reset $mState = 0; $mUcs4 = 0; $mBytes = 1; $char =''; } } } return $out; } /** * Translates a Unicode codepoint into its corresponding UTF-8 character. * @note Based on Feyd's function at * , * which is in public domain. * @note While we're going to do code point parsing anyway, a good * optimization would be to refuse to translate code points that * are non-SGML characters. However, this could lead to duplication. * @note This is very similar to the unichr function in * maintenance/generate-entity-file.php (although this is superior, * due to its sanity checks). */ // +----------+----------+----------+----------+ // | 33222222 | 22221111 | 111111 | | // | 10987654 | 32109876 | 54321098 | 76543210 | bit // +----------+----------+----------+----------+ // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF // +----------+----------+----------+----------+ // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes // +----------+----------+----------+----------+ public static function unichr($code) { if ($code > 1114111 or $code < 0 or ($code >= 55296 and $code <= 57343) ) { // bits are set outside the "valid" range as defined // by UNICODE 4.1.0 return ''; } $x = $y = $z = $w = 0; if ($code < 128) { // regular ASCII character $x = $code; } else { // set up bits for UTF-8 $x = ($code & 63) | 128; if ($code < 2048) { $y = (($code & 2047) >> 6) | 192; } else { $y = (($code & 4032) >> 6) | 128; if ($code < 65536) { $z = (($code >> 12) & 15) | 224; } else { $z = (($code >> 12) & 63) | 128; $w = (($code >> 18) & 7) | 240; } } } // set up the actual character $ret = ''; if ($w) { $ret .= chr($w); } if ($z) { $ret .= chr($z); } if ($y) { $ret .= chr($y); } $ret .= chr($x); return $ret; } /** * @return bool */ public static function iconvAvailable() { static $iconv = null; if ($iconv === null) { $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE; } return $iconv; } /** * Convert a string to UTF-8 based on configuration. * @param string $str The string to convert * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public static function convertToUTF8($str, $config, $context) { $encoding = $config->get('Core.Encoding'); if ($encoding === 'utf-8') { return $str; } static $iconv = null; if ($iconv === null) { $iconv = self::iconvAvailable(); } if ($iconv && !$config->get('Test.ForceNoIconv')) { // unaffected by bugs, since UTF-8 support all characters $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str); if ($str === false) { // $encoding is not a valid encoding trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); return ''; } // If the string is bjorked by Shift_JIS or a similar encoding // that doesn't support all of ASCII, convert the naughty // characters to their true byte-wise ASCII/UTF-8 equivalents. $str = strtr($str, self::testEncodingSupportsASCII($encoding)); return $str; } elseif ($encoding === 'iso-8859-1') { $str = utf8_encode($str); return $str; } $bug = HTMLPurifier_Encoder::testIconvTruncateBug(); if ($bug == self::ICONV_OK) { trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); } else { trigger_error( 'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' . 'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541', E_USER_ERROR ); } } /** * Converts a string from UTF-8 based on configuration. * @param string $str The string to convert * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string * @note Currently, this is a lossy conversion, with unexpressable * characters being omitted. */ public static function convertFromUTF8($str, $config, $context) { $encoding = $config->get('Core.Encoding'); if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { $str = self::convertToASCIIDumbLossless($str); } if ($encoding === 'utf-8') { return $str; } static $iconv = null; if ($iconv === null) { $iconv = self::iconvAvailable(); } if ($iconv && !$config->get('Test.ForceNoIconv')) { // Undo our previous fix in convertToUTF8, otherwise iconv will barf $ascii_fix = self::testEncodingSupportsASCII($encoding); if (!$escape && !empty($ascii_fix)) { $clear_fix = array(); foreach ($ascii_fix as $utf8 => $native) { $clear_fix[$utf8] = ''; } $str = strtr($str, $clear_fix); } $str = strtr($str, array_flip($ascii_fix)); // Normal stuff $str = self::iconv('utf-8', $encoding . '//IGNORE', $str); return $str; } elseif ($encoding === 'iso-8859-1') { $str = utf8_decode($str); return $str; } trigger_error('Encoding not supported', E_USER_ERROR); // You might be tempted to assume that the ASCII representation // might be OK, however, this is *not* universally true over all // encodings. So we take the conservative route here, rather // than forcibly turn on %Core.EscapeNonASCIICharacters } /** * Lossless (character-wise) conversion of HTML to ASCII * @param string $str UTF-8 string to be converted to ASCII * @return string ASCII encoded string with non-ASCII character entity-ized * @warning Adapted from MediaWiki, claiming fair use: this is a common * algorithm. If you disagree with this license fudgery, * implement it yourself. * @note Uses decimal numeric entities since they are best supported. * @note This is a DUMB function: it has no concept of keeping * character entities that the projected character encoding * can allow. We could possibly implement a smart version * but that would require it to also know which Unicode * codepoints the charset supported (not an easy task). * @note Sort of with cleanUTF8() but it assumes that $str is * well-formed UTF-8 */ public static function convertToASCIIDumbLossless($str) { $bytesleft = 0; $result = ''; $working = 0; $len = strlen($str); for ($i = 0; $i < $len; $i++) { $bytevalue = ord($str[$i]); if ($bytevalue <= 0x7F) { //0xxx xxxx $result .= chr($bytevalue); $bytesleft = 0; } elseif ($bytevalue <= 0xBF) { //10xx xxxx $working = $working << 6; $working += ($bytevalue & 0x3F); $bytesleft--; if ($bytesleft <= 0) { $result .= "&#" . $working . ";"; } } elseif ($bytevalue <= 0xDF) { //110x xxxx $working = $bytevalue & 0x1F; $bytesleft = 1; } elseif ($bytevalue <= 0xEF) { //1110 xxxx $working = $bytevalue & 0x0F; $bytesleft = 2; } else { //1111 0xxx $working = $bytevalue & 0x07; $bytesleft = 3; } } return $result; } /** No bugs detected in iconv. */ const ICONV_OK = 0; /** Iconv truncates output if converting from UTF-8 to another * character set with //IGNORE, and a non-encodable character is found */ const ICONV_TRUNCATES = 1; /** Iconv does not support //IGNORE, making it unusable for * transcoding purposes */ const ICONV_UNUSABLE = 2; /** * glibc iconv has a known bug where it doesn't handle the magic * //IGNORE stanza correctly. In particular, rather than ignore * characters, it will return an EILSEQ after consuming some number * of characters, and expect you to restart iconv as if it were * an E2BIG. Old versions of PHP did not respect the errno, and * returned the fragment, so as a result you would see iconv * mysteriously truncating output. We can work around this by * manually chopping our input into segments of about 8000 * characters, as long as PHP ignores the error code. If PHP starts * paying attention to the error code, iconv becomes unusable. * * @return int Error code indicating severity of bug. */ public static function testIconvTruncateBug() { static $code = null; if ($code === null) { // better not use iconv, otherwise infinite loop! $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000)); if ($r === false) { $code = self::ICONV_UNUSABLE; } elseif (($c = strlen($r)) < 9000) { $code = self::ICONV_TRUNCATES; } elseif ($c > 9000) { trigger_error( 'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' . 'include your iconv version as per phpversion()', E_USER_ERROR ); } else { $code = self::ICONV_OK; } } return $code; } /** * This expensive function tests whether or not a given character * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will * fail this test, and require special processing. Variable width * encodings shouldn't ever fail. * * @param string $encoding Encoding name to test, as per iconv format * @param bool $bypass Whether or not to bypass the precompiled arrays. * @return Array of UTF-8 characters to their corresponding ASCII, * which can be used to "undo" any overzealous iconv action. */ public static function testEncodingSupportsASCII($encoding, $bypass = false) { // All calls to iconv here are unsafe, proof by case analysis: // If ICONV_OK, no difference. // If ICONV_TRUNCATE, all calls involve one character inputs, // so bug is not triggered. // If ICONV_UNUSABLE, this call is irrelevant static $encodings = array(); if (!$bypass) { if (isset($encodings[$encoding])) { return $encodings[$encoding]; } $lenc = strtolower($encoding); switch ($lenc) { case 'shift_jis': return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); case 'johab': return array("\xE2\x82\xA9" => '\\'); } if (strpos($lenc, 'iso-8859-') === 0) { return array(); } } $ret = array(); if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) { return false; } for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars $c = chr($i); // UTF-8 char $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion if ($r === '' || // This line is needed for iconv implementations that do not // omit characters that do not exist in the target character set ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c) ) { // Reverse engineer: what's the UTF-8 equiv of this byte // sequence? This assumes that there's no variable width // encoding that doesn't support ASCII. $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c; } } $encodings[$encoding] = $ret; return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/EntityLookup.php0000644000175000017500000000262213512700112021046 0ustar ezyangezyangtable = unserialize(file_get_contents($file)); } /** * Retrieves sole instance of the object. * @param bool|HTMLPurifier_EntityLookup $prototype Optional prototype of custom lookup table to overload with. * @return HTMLPurifier_EntityLookup */ public static function instance($prototype = false) { // no references, since PHP doesn't copy unless modified static $instance = null; if ($prototype) { $instance = $prototype; } elseif (!$instance) { $instance = new HTMLPurifier_EntityLookup(); $instance->setup(); } return $instance; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/EntityParser.php0000644000175000017500000002337413512700112021040 0ustar ezyangezyang_semiOptionalPrefixRegex = "/&()()()($semi_optional)/"; $this->_textEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) "($semi_optional)". ')/'; $this->_attrEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) // don't match if trailing is equals or alphanumeric (URL // like) "($semi_optional)(?![=;A-Za-z0-9])". ')/'; } /** * Substitute entities with the parsed equivalents. Use this on * textual data in an HTML document (as opposed to attributes.) * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteTextEntities($string) { return preg_replace_callback( $this->_textEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Substitute entities with the parsed equivalents. Use this on * attribute contents in documents. * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteAttrEntities($string) { return preg_replace_callback( $this->_attrEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function entityCallback($matches) { $entity = $matches[0]; $hex_part = @$matches[1]; $dec_part = @$matches[2]; $named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3]; if ($hex_part !== NULL && $hex_part !== "") { return HTMLPurifier_Encoder::unichr(hexdec($hex_part)); } elseif ($dec_part !== NULL && $dec_part !== "") { return HTMLPurifier_Encoder::unichr((int) $dec_part); } else { if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$named_part])) { return $this->_entity_lookup->table[$named_part]; } else { // exact match didn't match anything, so test if // any of the semicolon optional match the prefix. // Test that this is an EXACT match is important to // prevent infinite loop if (!empty($matches[3])) { return preg_replace_callback( $this->_semiOptionalPrefixRegex, array($this, 'entityCallback'), $entity ); } return $entity; } } } // LEGACY CODE BELOW /** * Callback regex string for parsing entities. * @type string */ protected $_substituteEntitiesRegex = '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; // 1. hex 2. dec 3. string (XML style) /** * Decimal to parsed string conversion table for special entities. * @type array */ protected $_special_dec2str = array( 34 => '"', 38 => '&', 39 => "'", 60 => '<', 62 => '>' ); /** * Stripped entity names to decimal conversion table for special entities. * @type array */ protected $_special_ent2dec = array( 'quot' => 34, 'amp' => 38, 'lt' => 60, 'gt' => 62 ); /** * Substitutes non-special entities with their parsed equivalents. Since * running this whenever you have parsed character is t3h 5uck, we run * it before everything else. * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteNonSpecialEntities($string) { // it will try to detect missing semicolons, but don't rely on it return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'nonSpecialEntityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function nonSpecialEntityCallback($matches) { // replaces all but big five $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; // abort for special characters if (isset($this->_special_dec2str[$code])) { return $entity; } return HTMLPurifier_Encoder::unichr($code); } else { if (isset($this->_special_ent2dec[$matches[3]])) { return $entity; } if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$matches[3]])) { return $this->_entity_lookup->table[$matches[3]]; } else { return $entity; } } } /** * Substitutes only special entities with their parsed equivalents. * * @notice We try to avoid calling this function because otherwise, it * would have to be called a lot (for every parsed section). * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteSpecialEntities($string) { return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'specialEntityCallback'), $string ); } /** * Callback function for substituteSpecialEntities() that does the work. * * This callback has same syntax as nonSpecialEntityCallback(). * * @param array $matches PCRE-style matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function specialEntityCallback($matches) { $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; return isset($this->_special_dec2str[$int]) ? $this->_special_dec2str[$int] : $entity; } else { return isset($this->_special_ent2dec[$matches[3]]) ? $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] : $entity; } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ErrorCollector.php0000644000175000017500000001671113512700112021344 0ustar ezyangezyanglocale =& $context->get('Locale'); $this->context = $context; $this->_current =& $this->_stacks[0]; $this->errors =& $this->_stacks[0]; } /** * Sends an error message to the collector for later use * @param int $severity Error severity, PHP error style (don't use E_USER_) * @param string $msg Error message text */ public function send($severity, $msg) { $args = array(); if (func_num_args() > 2) { $args = func_get_args(); array_shift($args); unset($args[0]); } $token = $this->context->get('CurrentToken', true); $line = $token ? $token->line : $this->context->get('CurrentLine', true); $col = $token ? $token->col : $this->context->get('CurrentCol', true); $attr = $this->context->get('CurrentAttr', true); // perform special substitutions, also add custom parameters $subst = array(); if (!is_null($token)) { $args['CurrentToken'] = $token; } if (!is_null($attr)) { $subst['$CurrentAttr.Name'] = $attr; if (isset($token->attr[$attr])) { $subst['$CurrentAttr.Value'] = $token->attr[$attr]; } } if (empty($args)) { $msg = $this->locale->getMessage($msg); } else { $msg = $this->locale->formatMessage($msg, $args); } if (!empty($subst)) { $msg = strtr($msg, $subst); } // (numerically indexed) $error = array( self::LINENO => $line, self::SEVERITY => $severity, self::MESSAGE => $msg, self::CHILDREN => array() ); $this->_current[] = $error; // NEW CODE BELOW ... // Top-level errors are either: // TOKEN type, if $value is set appropriately, or // "syntax" type, if $value is null $new_struct = new HTMLPurifier_ErrorStruct(); $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; if ($token) { $new_struct->value = clone $token; } if (is_int($line) && is_int($col)) { if (isset($this->lines[$line][$col])) { $struct = $this->lines[$line][$col]; } else { $struct = $this->lines[$line][$col] = $new_struct; } // These ksorts may present a performance problem ksort($this->lines[$line], SORT_NUMERIC); } else { if (isset($this->lines[-1])) { $struct = $this->lines[-1]; } else { $struct = $this->lines[-1] = $new_struct; } } ksort($this->lines, SORT_NUMERIC); // Now, check if we need to operate on a lower structure if (!empty($attr)) { $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); if (!$struct->value) { $struct->value = array($attr, 'PUT VALUE HERE'); } } if (!empty($cssprop)) { $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); if (!$struct->value) { // if we tokenize CSS this might be a little more difficult to do $struct->value = array($cssprop, 'PUT VALUE HERE'); } } // Ok, structs are all setup, now time to register the error $struct->addError($severity, $msg); } /** * Retrieves raw error data for custom formatter to use */ public function getRaw() { return $this->errors; } /** * Default HTML formatting implementation for error messages * @param HTMLPurifier_Config $config Configuration, vital for HTML output nature * @param array $errors Errors array to display; used for recursion. * @return string */ public function getHTMLFormatted($config, $errors = null) { $ret = array(); $this->generator = new HTMLPurifier_Generator($config, $this->context); if ($errors === null) { $errors = $this->errors; } // 'At line' message needs to be removed // generation code for new structure goes here. It needs to be recursive. foreach ($this->lines as $line => $col_array) { if ($line == -1) { continue; } foreach ($col_array as $col => $struct) { $this->_renderStruct($ret, $struct, $line, $col); } } if (isset($this->lines[-1])) { $this->_renderStruct($ret, $this->lines[-1]); } if (empty($errors)) { return '

' . $this->locale->getMessage('ErrorCollector: No errors') . '

'; } else { return ''; } } private function _renderStruct(&$ret, $struct, $line = null, $col = null) { $stack = array($struct); $context_stack = array(array()); while ($current = array_pop($stack)) { $context = array_pop($context_stack); foreach ($current->errors as $error) { list($severity, $msg) = $error; $string = ''; $string .= '
'; // W3C uses an icon to indicate the severity of the error. $error = $this->locale->getErrorName($severity); $string .= "$error "; if (!is_null($line) && !is_null($col)) { $string .= "Line $line, Column $col: "; } else { $string .= 'End of Document: '; } $string .= '' . $this->generator->escape($msg) . ' '; $string .= '
'; // Here, have a marker for the character on the column appropriate. // Be sure to clip extremely long lines. //$string .= '
';
                //$string .= '';
                //$string .= '
'; $ret[] = $string; } foreach ($current->children as $array) { $context[] = $current; $stack = array_merge($stack, array_reverse($array, true)); for ($i = count($array); $i > 0; $i--) { $context_stack[] = $context; } } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ErrorStruct.php0000644000175000017500000000354513512700112020703 0ustar ezyangezyangchildren[$type][$id])) { $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); $this->children[$type][$id]->type = $type; } return $this->children[$type][$id]; } /** * @param int $severity * @param string $message */ public function addError($severity, $message) { $this->errors[] = array($severity, $message); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Exception.php0000644000175000017500000000026113512700112020333 0ustar ezyangezyangpreFilter, * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter, * 1->postFilter. * * @note Methods are not declared abstract as it is perfectly legitimate * for an implementation not to want anything to happen on a step */ class HTMLPurifier_Filter { /** * Name of the filter for identification purposes. * @type string */ public $name; /** * Pre-processor function, handles HTML before HTML Purifier * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function preFilter($html, $config, $context) { return $html; } /** * Post-processor function, handles HTML after HTML Purifier * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function postFilter($html, $config, $context) { return $html; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Generator.php0000644000175000017500000002401313512700112020324 0ustar ezyangezyang tags. * @type bool */ private $_scriptFix = false; /** * Cache of HTMLDefinition during HTML output to determine whether or * not attributes should be minimized. * @type HTMLPurifier_HTMLDefinition */ private $_def; /** * Cache of %Output.SortAttr. * @type bool */ private $_sortAttr; /** * Cache of %Output.FlashCompat. * @type bool */ private $_flashCompat; /** * Cache of %Output.FixInnerHTML. * @type bool */ private $_innerHTMLFix; /** * Stack for keeping track of object information when outputting IE * compatibility code. * @type array */ private $_flashStack = array(); /** * Configuration for the generator * @type HTMLPurifier_Config */ protected $config; /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context */ public function __construct($config, $context) { $this->config = $config; $this->_scriptFix = $config->get('Output.CommentScriptContents'); $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); $this->_sortAttr = $config->get('Output.SortAttr'); $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); $this->_xhtml = $this->_def->doctype->xml; } /** * Generates HTML from an array of tokens. * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token * @return string Generated HTML */ public function generateFromTokens($tokens) { if (!$tokens) { return ''; } // Basic algorithm $html = ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { // script special case // the contents of the script block must be ONE token // for this to work. $html .= $this->generateFromToken($tokens[$i++]); $html .= $this->generateScriptFromToken($tokens[$i++]); } $html .= $this->generateFromToken($tokens[$i]); } // Tidy cleanup if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy; $tidy->parseString( $html, array( 'indent'=> true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68, ), 'utf8' ); $tidy->cleanRepair(); $html = (string) $tidy; // explicit cast necessary } // Normalize newlines to system defined value if ($this->config->get('Core.NormalizeNewlines')) { $nl = $this->config->get('Output.Newline'); if ($nl === null) { $nl = PHP_EOL; } if ($nl !== "\n") { $html = str_replace("\n", $nl, $html); } } return $html; } /** * Generates HTML from a single token. * @param HTMLPurifier_Token $token HTMLPurifier_Token object. * @return string Generated HTML */ public function generateFromToken($token) { if (!$token instanceof HTMLPurifier_Token) { trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); return ''; } elseif ($token instanceof HTMLPurifier_Token_Start) { $attr = $this->generateAttributes($token->attr, $token->name); if ($this->_flashCompat) { if ($token->name == "object") { $flash = new stdClass(); $flash->attr = $token->attr; $flash->param = array(); $this->_flashStack[] = $flash; } } return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token instanceof HTMLPurifier_Token_End) { $_extra = ''; if ($this->_flashCompat) { if ($token->name == "object" && !empty($this->_flashStack)) { // doesn't do anything for now } } return $_extra . 'name . '>'; } elseif ($token instanceof HTMLPurifier_Token_Empty) { if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; } $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) //
v.
. '>'; } elseif ($token instanceof HTMLPurifier_Token_Text) { return $this->escape($token->data, ENT_NOQUOTES); } elseif ($token instanceof HTMLPurifier_Token_Comment) { return ''; } else { return ''; } } /** * Special case processor for the contents of script tags * @param HTMLPurifier_Token $token HTMLPurifier_Token object. * @return string * @warning This runs into problems if there's already a literal * --> somewhere inside the script contents. */ public function generateScriptFromToken($token) { if (!$token instanceof HTMLPurifier_Token_Text) { return $this->generateFromToken($token); } // Thanks $data = preg_replace('#//\s*$#', '', $token->data); return ''; } /** * Generates attribute declarations from attribute array. * @note This does not include the leading or trailing space. * @param array $assoc_array_of_attributes Attribute array * @param string $element Name of element attributes are for, used to check * attribute minimization. * @return string Generated HTML fragment for insertion. */ public function generateAttributes($assoc_array_of_attributes, $element = '') { $html = ''; if ($this->_sortAttr) { ksort($assoc_array_of_attributes); } foreach ($assoc_array_of_attributes as $key => $value) { if (!$this->_xhtml) { // Remove namespaced attributes if (strpos($key, ':') !== false) { continue; } // Check if we should minimize the attribute: val="val" -> val if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { $html .= $key . ' '; continue; } } // Workaround for Internet Explorer innerHTML bug. // Essentially, Internet Explorer, when calculating // innerHTML, omits quotes if there are no instances of // angled brackets, quotes or spaces. However, when parsing // HTML (for example, when you assign to innerHTML), it // treats backticks as quotes. Thus, // `` // becomes // `` // becomes // // Fortunately, all we need to do is trigger an appropriate // quoting style, which we do by adding an extra space. // This also is consistent with the W3C spec, which states // that user agents may ignore leading or trailing // whitespace (in fact, most don't, at least for attributes // like alt, but an extra space at the end is barely // noticeable). Still, we have a configuration knob for // this, since this transformation is not necesary if you // don't process user input with innerHTML or you don't plan // on supporting Internet Explorer. if ($this->_innerHTMLFix) { if (strpos($value, '`') !== false) { // check if correct quoting style would not already be // triggered if (strcspn($value, '"\' <>') === strlen($value)) { // protect! $value .= ' '; } } } $html .= $key.'="'.$this->escape($value).'" '; } return rtrim($html); } /** * Escapes raw text data. * @todo This really ought to be protected, but until we have a facility * for properly generating HTML here w/o using tokens, it stays * public. * @param string $string String data to escape for HTML. * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is * permissible for non-attribute output. * @return string escaped data. */ public function escape($string, $quote = null) { // Workaround for APC bug on Mac Leopard reported by sidepodcast // http://htmlpurifier.org/phorum/read.php?3,4823,4846 if ($quote === null) { $quote = ENT_COMPAT; } return htmlspecialchars($string, $quote, 'UTF-8'); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLDefinition.php0000644000175000017500000004252313512700112021161 0ustar ezyangezyanggetAnonymousModule(); if (!isset($module->info[$element_name])) { $element = $module->addBlankElement($element_name); } else { $element = $module->info[$element_name]; } $element->attr[$attr_name] = $def; } /** * Adds a custom element to your HTML definition * @see HTMLPurifier_HTMLModule::addElement() for detailed * parameter and return value descriptions. */ public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { $module = $this->getAnonymousModule(); // assume that if the user is calling this, the element // is safe. This may not be a good idea $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); return $element; } /** * Adds a blank element to your HTML definition, for overriding * existing behavior * @param string $element_name * @return HTMLPurifier_ElementDef * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed * parameter and return value descriptions. */ public function addBlankElement($element_name) { $module = $this->getAnonymousModule(); $element = $module->addBlankElement($element_name); return $element; } /** * Retrieves a reference to the anonymous module, so you can * bust out advanced features without having to make your own * module. * @return HTMLPurifier_HTMLModule */ public function getAnonymousModule() { if (!$this->_anonModule) { $this->_anonModule = new HTMLPurifier_HTMLModule(); $this->_anonModule->name = 'Anonymous'; } return $this->_anonModule; } private $_anonModule = null; // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- /** * @type string */ public $type = 'HTML'; /** * @type HTMLPurifier_HTMLModuleManager */ public $manager; /** * Performs low-cost, preliminary initialization. */ public function __construct() { $this->manager = new HTMLPurifier_HTMLModuleManager(); } /** * @param HTMLPurifier_Config $config */ protected function doSetup($config) { $this->processModules($config); $this->setupConfigStuff($config); unset($this->manager); // cleanup some of the element definitions foreach ($this->info as $k => $v) { unset($this->info[$k]->content_model); unset($this->info[$k]->content_model_type); } } /** * Extract out the information from the manager * @param HTMLPurifier_Config $config */ protected function processModules($config) { if ($this->_anonModule) { // for user specific changes // this is late-loaded so we don't have to deal with PHP4 // reference wonky-ness $this->manager->addModule($this->_anonModule); unset($this->_anonModule); } $this->manager->setup($config); $this->doctype = $this->manager->doctype; foreach ($this->manager->modules as $module) { foreach ($module->info_tag_transform as $k => $v) { if ($v === false) { unset($this->info_tag_transform[$k]); } else { $this->info_tag_transform[$k] = $v; } } foreach ($module->info_attr_transform_pre as $k => $v) { if ($v === false) { unset($this->info_attr_transform_pre[$k]); } else { $this->info_attr_transform_pre[$k] = $v; } } foreach ($module->info_attr_transform_post as $k => $v) { if ($v === false) { unset($this->info_attr_transform_post[$k]); } else { $this->info_attr_transform_post[$k] = $v; } } foreach ($module->info_injector as $k => $v) { if ($v === false) { unset($this->info_injector[$k]); } else { $this->info_injector[$k] = $v; } } } $this->info = $this->manager->getElements(); $this->info_content_sets = $this->manager->contentSets->lookup; } /** * Sets up stuff based on config. We need a better way of doing this. * @param HTMLPurifier_Config $config */ protected function setupConfigStuff($config) { $block_wrapper = $config->get('HTML.BlockWrapper'); if (isset($this->info_content_sets['Block'][$block_wrapper])) { $this->info_block_wrapper = $block_wrapper; } else { trigger_error( 'Cannot use non-block element as block wrapper', E_USER_ERROR ); } $parent = $config->get('HTML.Parent'); $def = $this->manager->getElement($parent, true); if ($def) { $this->info_parent = $parent; $this->info_parent_def = $def; } else { trigger_error( 'Cannot use unrecognized element as parent', E_USER_ERROR ); $this->info_parent_def = $this->manager->getElement($this->info_parent, true); } // support template text $support = "(for information on implementing this, see the support forums) "; // setup allowed elements ----------------------------------------- $allowed_elements = $config->get('HTML.AllowedElements'); $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { $allowed = $config->get('HTML.Allowed'); if (is_string($allowed)) { list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); } } if (is_array($allowed_elements)) { foreach ($this->info as $name => $d) { if (!isset($allowed_elements[$name])) { unset($this->info[$name]); } unset($allowed_elements[$name]); } // emit errors foreach ($allowed_elements as $element => $d) { $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! trigger_error("Element '$element' is not supported $support", E_USER_WARNING); } } // setup allowed attributes --------------------------------------- $allowed_attributes_mutable = $allowed_attributes; // by copy! if (is_array($allowed_attributes)) { // This actually doesn't do anything, since we went away from // global attributes. It's possible that userland code uses // it, but HTMLModuleManager doesn't! foreach ($this->info_global_attr as $attr => $x) { $keys = array($attr, "*@$attr", "*.$attr"); $delete = true; foreach ($keys as $key) { if ($delete && isset($allowed_attributes[$key])) { $delete = false; } if (isset($allowed_attributes_mutable[$key])) { unset($allowed_attributes_mutable[$key]); } } if ($delete) { unset($this->info_global_attr[$attr]); } } foreach ($this->info as $tag => $info) { foreach ($info->attr as $attr => $x) { $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); $delete = true; foreach ($keys as $key) { if ($delete && isset($allowed_attributes[$key])) { $delete = false; } if (isset($allowed_attributes_mutable[$key])) { unset($allowed_attributes_mutable[$key]); } } if ($delete) { if ($this->info[$tag]->attr[$attr]->required) { trigger_error( "Required attribute '$attr' in element '$tag' " . "was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING ); } unset($this->info[$tag]->attr[$attr]); } } } // emit errors foreach ($allowed_attributes_mutable as $elattr => $d) { $bits = preg_split('/[.@]/', $elattr, 2); $c = count($bits); switch ($c) { case 2: if ($bits[0] !== '*') { $element = htmlspecialchars($bits[0]); $attribute = htmlspecialchars($bits[1]); if (!isset($this->info[$element])) { trigger_error( "Cannot allow attribute '$attribute' if element " . "'$element' is not allowed/supported $support" ); } else { trigger_error( "Attribute '$attribute' in element '$element' not supported $support", E_USER_WARNING ); } break; } // otherwise fall through case 1: $attribute = htmlspecialchars($bits[0]); trigger_error( "Global attribute '$attribute' is not ". "supported in any elements $support", E_USER_WARNING ); break; } } } // setup forbidden elements --------------------------------------- $forbidden_elements = $config->get('HTML.ForbiddenElements'); $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); foreach ($this->info as $tag => $info) { if (isset($forbidden_elements[$tag])) { unset($this->info[$tag]); continue; } foreach ($info->attr as $attr => $x) { if (isset($forbidden_attributes["$tag@$attr"]) || isset($forbidden_attributes["*@$attr"]) || isset($forbidden_attributes[$attr]) ) { unset($this->info[$tag]->attr[$attr]); continue; } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually // $tag.$attr are not user supplied, so no worries! trigger_error( "Error with $tag.$attr: tag.attr syntax not supported for " . "HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING ); } } } foreach ($forbidden_attributes as $key => $v) { if (strlen($key) < 2) { continue; } if ($key[0] != '*') { continue; } if ($key[1] == '.') { trigger_error( "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING ); } } // setup injectors ----------------------------------------------------- foreach ($this->info_injector as $i => $injector) { if ($injector->checkNeeded($config) !== false) { // remove injector that does not have it's required // elements/attributes present, and is thus not needed. unset($this->info_injector[$i]); } } } /** * Parses a TinyMCE-flavored Allowed Elements and Attributes list into * separate lists for processing. Format is element[attr1|attr2],element2... * @warning Although it's largely drawn from TinyMCE's implementation, * it is different, and you'll probably have to modify your lists * @param array $list String list to parse * @return array * @todo Give this its own class, probably static interface */ public function parseTinyMCEAllowedList($list) { $list = str_replace(array(' ', "\t"), '', $list); $elements = array(); $attributes = array(); $chunks = preg_split('/(,|[\n\r]+)/', $list); foreach ($chunks as $chunk) { if (empty($chunk)) { continue; } // remove TinyMCE element control characters if (!strpos($chunk, '[')) { $element = $chunk; $attr = false; } else { list($element, $attr) = explode('[', $chunk); } if ($element !== '*') { $elements[$element] = true; } if (!$attr) { continue; } $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] $attr = explode('|', $attr); foreach ($attr as $key) { $attributes["$element.$key"] = true; } } return array($elements, $attributes); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule.php0000644000175000017500000002363713512700112020323 0ustar ezyangezyanginfo, since the object's data is only info, * with extra behavior associated with it. * @type array */ public $attr_collections = array(); /** * Associative array of deprecated tag name to HTMLPurifier_TagTransform. * @type array */ public $info_tag_transform = array(); /** * List of HTMLPurifier_AttrTransform to be performed before validation. * @type array */ public $info_attr_transform_pre = array(); /** * List of HTMLPurifier_AttrTransform to be performed after validation. * @type array */ public $info_attr_transform_post = array(); /** * List of HTMLPurifier_Injector to be performed during well-formedness fixing. * An injector will only be invoked if all of it's pre-requisites are met; * if an injector fails setup, there will be no error; it will simply be * silently disabled. * @type array */ public $info_injector = array(); /** * Boolean flag that indicates whether or not getChildDef is implemented. * For optimization reasons: may save a call to a function. Be sure * to set it if you do implement getChildDef(), otherwise it will have * no effect! * @type bool */ public $defines_child_def = false; /** * Boolean flag whether or not this module is safe. If it is not safe, all * of its members are unsafe. Modules are safe by default (this might be * slightly dangerous, but it doesn't make much sense to force HTML Purifier, * which is based off of safe HTML, to explicitly say, "This is safe," even * though there are modules which are "unsafe") * * @type bool * @note Previously, safety could be applied at an element level granularity. * We've removed this ability, so in order to add "unsafe" elements * or attributes, a dedicated module with this property set to false * must be used. */ public $safe = true; /** * Retrieves a proper HTMLPurifier_ChildDef subclass based on * content_model and content_model_type member variables of * the HTMLPurifier_ElementDef class. There is a similar function * in HTMLPurifier_HTMLDefinition. * @param HTMLPurifier_ElementDef $def * @return HTMLPurifier_ChildDef subclass */ public function getChildDef($def) { return false; } // -- Convenience ----------------------------------------------------- /** * Convenience function that sets up a new element * @param string $element Name of element to add * @param string|bool $type What content set should element be registered to? * Set as false to skip this step. * @param string $contents Allowed children in form of: * "$content_model_type: $content_model" * @param array $attr_includes What attribute collections to register to * element? * @param array $attr What unique attributes does the element define? * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters. * @return HTMLPurifier_ElementDef Created element definition object, so you * can set advanced parameters */ public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) { $this->elements[] = $element; // parse content_model list($content_model_type, $content_model) = $this->parseContents($contents); // merge in attribute inclusions $this->mergeInAttrIncludes($attr, $attr_includes); // add element to content sets if ($type) { $this->addElementToContentSet($element, $type); } // create element $this->info[$element] = HTMLPurifier_ElementDef::create( $content_model, $content_model_type, $attr ); // literal object $contents means direct child manipulation if (!is_string($contents)) { $this->info[$element]->child = $contents; } return $this->info[$element]; } /** * Convenience function that creates a totally blank, non-standalone * element. * @param string $element Name of element to create * @return HTMLPurifier_ElementDef Created element */ public function addBlankElement($element) { if (!isset($this->info[$element])) { $this->elements[] = $element; $this->info[$element] = new HTMLPurifier_ElementDef(); $this->info[$element]->standalone = false; } else { trigger_error("Definition for $element already exists in module, cannot redefine"); } return $this->info[$element]; } /** * Convenience function that registers an element to a content set * @param string $element Element to register * @param string $type Name content set (warning: case sensitive, usually upper-case * first letter) */ public function addElementToContentSet($element, $type) { if (!isset($this->content_sets[$type])) { $this->content_sets[$type] = ''; } else { $this->content_sets[$type] .= ' | '; } $this->content_sets[$type] .= $element; } /** * Convenience function that transforms single-string contents * into separate content model and content model type * @param string $contents Allowed children in form of: * "$content_model_type: $content_model" * @return array * @note If contents is an object, an array of two nulls will be * returned, and the callee needs to take the original $contents * and use it directly. */ public function parseContents($contents) { if (!is_string($contents)) { return array(null, null); } // defer switch ($contents) { // check for shorthand content model forms case 'Empty': return array('empty', ''); case 'Inline': return array('optional', 'Inline | #PCDATA'); case 'Flow': return array('optional', 'Flow | #PCDATA'); } list($content_model_type, $content_model) = explode(':', $contents); $content_model_type = strtolower(trim($content_model_type)); $content_model = trim($content_model); return array($content_model_type, $content_model); } /** * Convenience function that merges a list of attribute includes into * an attribute array. * @param array $attr Reference to attr array to modify * @param array $attr_includes Array of includes / string include to merge in */ public function mergeInAttrIncludes(&$attr, $attr_includes) { if (!is_array($attr_includes)) { if (empty($attr_includes)) { $attr_includes = array(); } else { $attr_includes = array($attr_includes); } } $attr[0] = $attr_includes; } /** * Convenience function that generates a lookup table with boolean * true as value. * @param string $list List of values to turn into a lookup * @note You can also pass an arbitrary number of arguments in * place of the regular argument * @return array array equivalent of list */ public function makeLookup($list) { if (is_string($list)) { $list = func_get_args(); } $ret = array(); foreach ($list as $value) { if (is_null($value)) { continue; } $ret[$value] = true; } return $ret; } /** * Lazy load construction of the module after determining whether * or not it's needed, and also when a finalized configuration object * is available. * @param HTMLPurifier_Config $config */ public function setup($config) { } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModuleManager.php0000644000175000017500000003711213512700112021607 0ustar ezyangezyangattrTypes = new HTMLPurifier_AttrTypes(); $this->doctypes = new HTMLPurifier_DoctypeRegistry(); // setup basic modules $common = array( 'CommonAttributes', 'Text', 'Hypertext', 'List', 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute', // Unsafe: 'Scripting', 'Object', 'Forms', // Sorta legacy, but present in strict: 'Name', ); $transitional = array('Legacy', 'Target', 'Iframe'); $xml = array('XMLCommonAttributes'); $non_xml = array('NonXMLCommonAttributes'); // setup basic doctypes $this->doctypes->register( 'HTML 4.01 Transitional', false, array_merge($common, $transitional, $non_xml), array('Tidy_Transitional', 'Tidy_Proprietary'), array(), '-//W3C//DTD HTML 4.01 Transitional//EN', 'http://www.w3.org/TR/html4/loose.dtd' ); $this->doctypes->register( 'HTML 4.01 Strict', false, array_merge($common, $non_xml), array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd' ); $this->doctypes->register( 'XHTML 1.0 Transitional', true, array_merge($common, $transitional, $xml, $non_xml), array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' ); $this->doctypes->register( 'XHTML 1.0 Strict', true, array_merge($common, $xml, $non_xml), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), array(), '-//W3C//DTD XHTML 1.0 Strict//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' ); $this->doctypes->register( 'XHTML 1.1', true, // Iframe is a real XHTML 1.1 module, despite being // "transitional"! array_merge($common, $xml, array('Ruby', 'Iframe')), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 array(), '-//W3C//DTD XHTML 1.1//EN', 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' ); } /** * Registers a module to the recognized module list, useful for * overloading pre-existing modules. * @param $module Mixed: string module name, with or without * HTMLPurifier_HTMLModule prefix, or instance of * subclass of HTMLPurifier_HTMLModule. * @param $overload Boolean whether or not to overload previous modules. * If this is not set, and you do overload a module, * HTML Purifier will complain with a warning. * @note This function will not call autoload, you must instantiate * (and thus invoke) autoload outside the method. * @note If a string is passed as a module name, different variants * will be tested in this order: * - Check for HTMLPurifier_HTMLModule_$name * - Check all prefixes with $name in order they were added * - Check for literal object name * - Throw fatal error * If your object name collides with an internal class, specify * your module manually. All modules must have been included * externally: registerModule will not perform inclusions for you! */ public function registerModule($module, $overload = false) { if (is_string($module)) { // attempt to load the module $original_module = $module; $ok = false; foreach ($this->prefixes as $prefix) { $module = $prefix . $original_module; if (class_exists($module)) { $ok = true; break; } } if (!$ok) { $module = $original_module; if (!class_exists($module)) { trigger_error( $original_module . ' module does not exist', E_USER_ERROR ); return; } } $module = new $module(); } if (empty($module->name)) { trigger_error('Module instance of ' . get_class($module) . ' must have name'); return; } if (!$overload && isset($this->registeredModules[$module->name])) { trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); } $this->registeredModules[$module->name] = $module; } /** * Adds a module to the current doctype by first registering it, * and then tacking it on to the active doctype */ public function addModule($module) { $this->registerModule($module); if (is_object($module)) { $module = $module->name; } $this->userModules[] = $module; } /** * Adds a class prefix that registerModule() will use to resolve a * string name to a concrete class */ public function addPrefix($prefix) { $this->prefixes[] = $prefix; } /** * Performs processing on modules, after being called you may * use getElement() and getElements() * @param HTMLPurifier_Config $config */ public function setup($config) { $this->trusted = $config->get('HTML.Trusted'); // generate $this->doctype = $this->doctypes->make($config); $modules = $this->doctype->modules; // take out the default modules that aren't allowed $lookup = $config->get('HTML.AllowedModules'); $special_cases = $config->get('HTML.CoreModules'); if (is_array($lookup)) { foreach ($modules as $k => $m) { if (isset($special_cases[$m])) { continue; } if (!isset($lookup[$m])) { unset($modules[$k]); } } } // custom modules if ($config->get('HTML.Proprietary')) { $modules[] = 'Proprietary'; } if ($config->get('HTML.SafeObject')) { $modules[] = 'SafeObject'; } if ($config->get('HTML.SafeEmbed')) { $modules[] = 'SafeEmbed'; } if ($config->get('HTML.SafeScripting') !== array()) { $modules[] = 'SafeScripting'; } if ($config->get('HTML.Nofollow')) { $modules[] = 'Nofollow'; } if ($config->get('HTML.TargetBlank')) { $modules[] = 'TargetBlank'; } // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank // so that its post-attr-transform gets run afterwards. if ($config->get('HTML.TargetNoreferrer')) { $modules[] = 'TargetNoreferrer'; } if ($config->get('HTML.TargetNoopener')) { $modules[] = 'TargetNoopener'; } // merge in custom modules $modules = array_merge($modules, $this->userModules); foreach ($modules as $module) { $this->processModule($module); $this->modules[$module]->setup($config); } foreach ($this->doctype->tidyModules as $module) { $this->processModule($module); $this->modules[$module]->setup($config); } // prepare any injectors foreach ($this->modules as $module) { $n = array(); foreach ($module->info_injector as $injector) { if (!is_object($injector)) { $class = "HTMLPurifier_Injector_$injector"; $injector = new $class; } $n[$injector->name] = $injector; } $module->info_injector = $n; } // setup lookup table based on all valid modules foreach ($this->modules as $module) { foreach ($module->info as $name => $def) { if (!isset($this->elementLookup[$name])) { $this->elementLookup[$name] = array(); } $this->elementLookup[$name][] = $module->name; } } // note the different choice $this->contentSets = new HTMLPurifier_ContentSets( // content set assembly deals with all possible modules, // not just ones deemed to be "safe" $this->modules ); $this->attrCollections = new HTMLPurifier_AttrCollections( $this->attrTypes, // there is no way to directly disable a global attribute, // but using AllowedAttributes or simply not including // the module in your custom doctype should be sufficient $this->modules ); } /** * Takes a module and adds it to the active module collection, * registering it if necessary. */ public function processModule($module) { if (!isset($this->registeredModules[$module]) || is_object($module)) { $this->registerModule($module); } $this->modules[$module] = $this->registeredModules[$module]; } /** * Retrieves merged element definitions. * @return Array of HTMLPurifier_ElementDef */ public function getElements() { $elements = array(); foreach ($this->modules as $module) { if (!$this->trusted && !$module->safe) { continue; } foreach ($module->info as $name => $v) { if (isset($elements[$name])) { continue; } $elements[$name] = $this->getElement($name); } } // remove dud elements, this happens when an element that // appeared to be safe actually wasn't foreach ($elements as $n => $v) { if ($v === false) { unset($elements[$n]); } } return $elements; } /** * Retrieves a single merged element definition * @param string $name Name of element * @param bool $trusted Boolean trusted overriding parameter: set to true * if you want the full version of an element * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef * @note You may notice that modules are getting iterated over twice (once * in getElements() and once here). This * is because */ public function getElement($name, $trusted = null) { if (!isset($this->elementLookup[$name])) { return false; } // setup global state variables $def = false; if ($trusted === null) { $trusted = $this->trusted; } // iterate through each module that has registered itself to this // element foreach ($this->elementLookup[$name] as $module_name) { $module = $this->modules[$module_name]; // refuse to create/merge from a module that is deemed unsafe-- // pretend the module doesn't exist--when trusted mode is not on. if (!$trusted && !$module->safe) { continue; } // clone is used because, ideally speaking, the original // definition should not be modified. Usually, this will // make no difference, but for consistency's sake $new_def = clone $module->info[$name]; if (!$def && $new_def->standalone) { $def = $new_def; } elseif ($def) { // This will occur even if $new_def is standalone. In practice, // this will usually result in a full replacement. $def->mergeIn($new_def); } else { // :TODO: // non-standalone definitions that don't have a standalone // to merge into could be deferred to the end // HOWEVER, it is perfectly valid for a non-standalone // definition to lack a standalone definition, even // after all processing: this allows us to safely // specify extra attributes for elements that may not be // enabled all in one place. In particular, this might // be the case for trusted elements. WARNING: care must // be taken that the /extra/ definitions are all safe. continue; } // attribute value expansions $this->attrCollections->performInclusions($def->attr); $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); // descendants_are_inline, for ChildDef_Chameleon if (is_string($def->content_model) && strpos($def->content_model, 'Inline') !== false) { if ($name != 'del' && $name != 'ins') { // this is for you, ins/del $def->descendants_are_inline = true; } } $this->contentSets->generateChildDef($def, $module); } // This can occur if there is a blank definition, but no base to // mix it in with if (!$def) { return false; } // add information on required attributes foreach ($def->attr as $attr_name => $attr_def) { if ($attr_def->required) { $def->required_attr[] = $attr_name; } } return $def; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/IDAccumulator.php0000644000175000017500000000315713512700112021100 0ustar ezyangezyangload($config->get('Attr.IDBlacklist')); return $id_accumulator; } /** * Add an ID to the lookup table. * @param string $id ID to be added. * @return bool status, true if success, false if there's a dupe */ public function add($id) { if (isset($this->ids[$id])) { return false; } return $this->ids[$id] = true; } /** * Load a list of IDs into the lookup table * @param $array_of_ids Array of IDs to load * @note This function doesn't care about duplicates */ public function load($array_of_ids) { foreach ($array_of_ids as $id) { $this->ids[$id] = true; } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Injector.php0000644000175000017500000002145613512700112020163 0ustar ezyangezyangprocessToken() * documentation. * * @todo Allow injectors to request a re-run on their output. This * would help if an operation is recursive. */ abstract class HTMLPurifier_Injector { /** * Advisory name of injector, this is for friendly error messages. * @type string */ public $name; /** * @type HTMLPurifier_HTMLDefinition */ protected $htmlDefinition; /** * Reference to CurrentNesting variable in Context. This is an array * list of tokens that we are currently "inside" * @type array */ protected $currentNesting; /** * Reference to current token. * @type HTMLPurifier_Token */ protected $currentToken; /** * Reference to InputZipper variable in Context. * @type HTMLPurifier_Zipper */ protected $inputZipper; /** * Array of elements and attributes this injector creates and therefore * need to be allowed by the definition. Takes form of * array('element' => array('attr', 'attr2'), 'element2') * @type array */ public $needed = array(); /** * Number of elements to rewind backwards (relative). * @type bool|int */ protected $rewindOffset = false; /** * Rewind to a spot to re-perform processing. This is useful if you * deleted a node, and now need to see if this change affected any * earlier nodes. Rewinding does not affect other injectors, and can * result in infinite loops if not used carefully. * @param bool|int $offset * @warning HTML Purifier will prevent you from fast-forwarding with this * function. */ public function rewindOffset($offset) { $this->rewindOffset = $offset; } /** * Retrieves rewind offset, and then unsets it. * @return bool|int */ public function getRewindOffset() { $r = $this->rewindOffset; $this->rewindOffset = false; return $r; } /** * Prepares the injector by giving it the config and context objects: * this allows references to important variables to be made within * the injector. This function also checks if the HTML environment * will work with the Injector (see checkNeeded()). * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string Boolean false if success, string of missing needed element/attribute if failure */ public function prepare($config, $context) { $this->htmlDefinition = $config->getHTMLDefinition(); // Even though this might fail, some unit tests ignore this and // still test checkNeeded, so be careful. Maybe get rid of that // dependency. $result = $this->checkNeeded($config); if ($result !== false) { return $result; } $this->currentNesting =& $context->get('CurrentNesting'); $this->currentToken =& $context->get('CurrentToken'); $this->inputZipper =& $context->get('InputZipper'); return false; } /** * This function checks if the HTML environment * will work with the Injector: if p tags are not allowed, the * Auto-Paragraphing injector should not be enabled. * @param HTMLPurifier_Config $config * @return bool|string Boolean false if success, string of missing needed element/attribute if failure */ public function checkNeeded($config) { $def = $config->getHTMLDefinition(); foreach ($this->needed as $element => $attributes) { if (is_int($element)) { $element = $attributes; } if (!isset($def->info[$element])) { return $element; } if (!is_array($attributes)) { continue; } foreach ($attributes as $name) { if (!isset($def->info[$element]->attr[$name])) { return "$element.$name"; } } } return false; } /** * Tests if the context node allows a certain element * @param string $name Name of element to test for * @return bool True if element is allowed, false if it is not */ public function allowsElement($name) { if (!empty($this->currentNesting)) { $parent_token = array_pop($this->currentNesting); $this->currentNesting[] = $parent_token; $parent = $this->htmlDefinition->info[$parent_token->name]; } else { $parent = $this->htmlDefinition->info_parent_def; } if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { return false; } // check for exclusion if (!empty($this->currentNesting)) { for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { $node = $this->currentNesting[$i]; $def = $this->htmlDefinition->info[$node->name]; if (isset($def->excludes[$name])) { return false; } } } return true; } /** * Iterator function, which starts with the next token and continues until * you reach the end of the input tokens. * @warning Please prevent previous references from interfering with this * functions by setting $i = null beforehand! * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @return bool */ protected function forward(&$i, &$current) { if ($i === null) { $i = count($this->inputZipper->back) - 1; } else { $i--; } if ($i < 0) { return false; } $current = $this->inputZipper->back[$i]; return true; } /** * Similar to _forward, but accepts a third parameter $nesting (which * should be initialized at 0) and stops when we hit the end tag * for the node $this->inputIndex starts in. * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @param int $nesting * @return bool */ protected function forwardUntilEndToken(&$i, &$current, &$nesting) { $result = $this->forward($i, $current); if (!$result) { return false; } if ($nesting === null) { $nesting = 0; } if ($current instanceof HTMLPurifier_Token_Start) { $nesting++; } elseif ($current instanceof HTMLPurifier_Token_End) { if ($nesting <= 0) { return false; } $nesting--; } return true; } /** * Iterator function, starts with the previous token and continues until * you reach the beginning of input tokens. * @warning Please prevent previous references from interfering with this * functions by setting $i = null beforehand! * @param int $i Current integer index variable for inputTokens * @param HTMLPurifier_Token $current Current token variable. * Do NOT use $token, as that variable is also a reference * @return bool */ protected function backward(&$i, &$current) { if ($i === null) { $i = count($this->inputZipper->front) - 1; } else { $i--; } if ($i < 0) { return false; } $current = $this->inputZipper->front[$i]; return true; } /** * Handler that is called when a text token is processed */ public function handleText(&$token) { } /** * Handler that is called when a start or empty token is processed */ public function handleElement(&$token) { } /** * Handler that is called when an end token is processed */ public function handleEnd(&$token) { $this->notifyEnd($token); } /** * Notifier that is called when an end token is processed * @param HTMLPurifier_Token $token Current token variable. * @note This differs from handlers in that the token is read-only * @deprecated */ public function notifyEnd($token) { } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Language.php0000644000175000017500000001365613512700112020134 0ustar ezyangezyangconfig = $config; $this->context = $context; } /** * Loads language object with necessary info from factory cache * @note This is a lazy loader */ public function load() { if ($this->_loaded) { return; } $factory = HTMLPurifier_LanguageFactory::instance(); $factory->loadLanguage($this->code); foreach ($factory->keys as $key) { $this->$key = $factory->cache[$this->code][$key]; } $this->_loaded = true; } /** * Retrieves a localised message. * @param string $key string identifier of message * @return string localised message */ public function getMessage($key) { if (!$this->_loaded) { $this->load(); } if (!isset($this->messages[$key])) { return "[$key]"; } return $this->messages[$key]; } /** * Retrieves a localised error name. * @param int $int error number, corresponding to PHP's error reporting * @return string localised message */ public function getErrorName($int) { if (!$this->_loaded) { $this->load(); } if (!isset($this->errorNames[$int])) { return "[Error: $int]"; } return $this->errorNames[$int]; } /** * Converts an array list into a string readable representation * @param array $array * @return string */ public function listify($array) { $sep = $this->getMessage('Item separator'); $sep_last = $this->getMessage('Item separator last'); $ret = ''; for ($i = 0, $c = count($array); $i < $c; $i++) { if ($i == 0) { } elseif ($i + 1 < $c) { $ret .= $sep; } else { $ret .= $sep_last; } $ret .= $array[$i]; } return $ret; } /** * Formats a localised message with passed parameters * @param string $key string identifier of message * @param array $args Parameters to substitute in * @return string localised message * @todo Implement conditionals? Right now, some messages make * reference to line numbers, but those aren't always available */ public function formatMessage($key, $args = array()) { if (!$this->_loaded) { $this->load(); } if (!isset($this->messages[$key])) { return "[$key]"; } $raw = $this->messages[$key]; $subst = array(); $generator = false; foreach ($args as $i => $value) { if (is_object($value)) { if ($value instanceof HTMLPurifier_Token) { // factor this out some time if (!$generator) { $generator = $this->context->get('Generator'); } if (isset($value->name)) { $subst['$'.$i.'.Name'] = $value->name; } if (isset($value->data)) { $subst['$'.$i.'.Data'] = $value->data; } $subst['$'.$i.'.Compact'] = $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); // a more complex algorithm for compact representation // could be introduced for all types of tokens. This // may need to be factored out into a dedicated class if (!empty($value->attr)) { $stripped_token = clone $value; $stripped_token->attr = array(); $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); } $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; } continue; } elseif (is_array($value)) { $keys = array_keys($value); if (array_keys($keys) === $keys) { // list $subst['$'.$i] = $this->listify($value); } else { // associative array // no $i implementation yet, sorry $subst['$'.$i.'.Keys'] = $this->listify($keys); $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); } continue; } $subst['$' . $i] = $value; } return strtr($raw, $subst); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/LanguageFactory.php0000644000175000017500000001473213512700112021460 0ustar ezyangezyangcache[$language_code][$key] = $value * @type array */ public $cache; /** * Valid keys in the HTMLPurifier_Language object. Designates which * variables to slurp out of a message file. * @type array */ public $keys = array('fallback', 'messages', 'errorNames'); /** * Instance to validate language codes. * @type HTMLPurifier_AttrDef_Lang * */ protected $validator; /** * Cached copy of dirname(__FILE__), directory of current file without * trailing slash. * @type string */ protected $dir; /** * Keys whose contents are a hash map and can be merged. * @type array */ protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true); /** * Keys whose contents are a list and can be merged. * @value array lookup */ protected $mergeable_keys_list = array(); /** * Retrieve sole instance of the factory. * @param HTMLPurifier_LanguageFactory $prototype Optional prototype to overload sole instance with, * or bool true to reset to default factory. * @return HTMLPurifier_LanguageFactory */ public static function instance($prototype = null) { static $instance = null; if ($prototype !== null) { $instance = $prototype; } elseif ($instance === null || $prototype == true) { $instance = new HTMLPurifier_LanguageFactory(); $instance->setup(); } return $instance; } /** * Sets up the singleton, much like a constructor * @note Prevents people from getting this outside of the singleton */ public function setup() { $this->validator = new HTMLPurifier_AttrDef_Lang(); $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; } /** * Creates a language object, handles class fallbacks * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @param bool|string $code Code to override configuration with. Private parameter. * @return HTMLPurifier_Language */ public function create($config, $context, $code = false) { // validate language code if ($code === false) { $code = $this->validator->validate( $config->get('Core.Language'), $config, $context ); } else { $code = $this->validator->validate($code, $config, $context); } if ($code === false) { $code = 'en'; // malformed code becomes English } $pcode = str_replace('-', '_', $code); // make valid PHP classname static $depth = 0; // recursion protection if ($code == 'en') { $lang = new HTMLPurifier_Language($config, $context); } else { $class = 'HTMLPurifier_Language_' . $pcode; $file = $this->dir . '/Language/classes/' . $code . '.php'; if (file_exists($file) || class_exists($class, false)) { $lang = new $class($config, $context); } else { // Go fallback $raw_fallback = $this->getFallbackFor($code); $fallback = $raw_fallback ? $raw_fallback : 'en'; $depth++; $lang = $this->create($config, $context, $fallback); if (!$raw_fallback) { $lang->error = true; } $depth--; } } $lang->code = $code; return $lang; } /** * Returns the fallback language for language * @note Loads the original language into cache * @param string $code language code * @return string|bool */ public function getFallbackFor($code) { $this->loadLanguage($code); return $this->cache[$code]['fallback']; } /** * Loads language into the cache, handles message file and fallbacks * @param string $code language code */ public function loadLanguage($code) { static $languages_seen = array(); // recursion guard // abort if we've already loaded it if (isset($this->cache[$code])) { return; } // generate filename $filename = $this->dir . '/Language/messages/' . $code . '.php'; // default fallback : may be overwritten by the ensuing include $fallback = ($code != 'en') ? 'en' : false; // load primary localisation if (!file_exists($filename)) { // skip the include: will rely solely on fallback $filename = $this->dir . '/Language/messages/en.php'; $cache = array(); } else { include $filename; $cache = compact($this->keys); } // load fallback localisation if (!empty($fallback)) { // infinite recursion guard if (isset($languages_seen[$code])) { trigger_error( 'Circular fallback reference in language ' . $code, E_USER_ERROR ); $fallback = 'en'; } $language_seen[$code] = true; // load the fallback recursively $this->loadLanguage($fallback); $fallback_cache = $this->cache[$fallback]; // merge fallback with current language foreach ($this->keys as $key) { if (isset($cache[$key]) && isset($fallback_cache[$key])) { if (isset($this->mergeable_keys_map[$key])) { $cache[$key] = $cache[$key] + $fallback_cache[$key]; } elseif (isset($this->mergeable_keys_list[$key])) { $cache[$key] = array_merge($fallback_cache[$key], $cache[$key]); } } else { $cache[$key] = $fallback_cache[$key]; } } } // save to cache for later retrieval $this->cache[$code] = $cache; return; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Length.php0000644000175000017500000000743313512700112017626 0ustar ezyangezyang true, 'ex' => true, 'px' => true, 'in' => true, 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true, 'ch' => true, 'rem' => true, 'vw' => true, 'vh' => true, 'vmin' => true, 'vmax' => true ); /** * @param string $n Magnitude * @param bool|string $u Unit */ public function __construct($n = '0', $u = false) { $this->n = (string) $n; $this->unit = $u !== false ? (string) $u : false; } /** * @param string $s Unit string, like '2em' or '3.4in' * @return HTMLPurifier_Length * @warning Does not perform validation. */ public static function make($s) { if ($s instanceof HTMLPurifier_Length) { return $s; } $n_length = strspn($s, '1234567890.+-'); $n = substr($s, 0, $n_length); $unit = substr($s, $n_length); if ($unit === '') { $unit = false; } return new HTMLPurifier_Length($n, $unit); } /** * Validates the number and unit. * @return bool */ protected function validate() { // Special case: if ($this->n === '+0' || $this->n === '-0') { $this->n = '0'; } if ($this->n === '0' && $this->unit === false) { return true; } if (!ctype_lower($this->unit)) { $this->unit = strtolower($this->unit); } if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) { return false; } // Hack: $def = new HTMLPurifier_AttrDef_CSS_Number(); $result = $def->validate($this->n, false, false); if ($result === false) { return false; } $this->n = $result; return true; } /** * Returns string representation of number. * @return string */ public function toString() { if (!$this->isValid()) { return false; } return $this->n . $this->unit; } /** * Retrieves string numeric magnitude. * @return string */ public function getN() { return $this->n; } /** * Retrieves string unit. * @return string */ public function getUnit() { return $this->unit; } /** * Returns true if this length unit is valid. * @return bool */ public function isValid() { if ($this->isValid === null) { $this->isValid = $this->validate(); } return $this->isValid; } /** * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal. * @param HTMLPurifier_Length $l * @return int * @warning If both values are too large or small, this calculation will * not work properly */ public function compareTo($l) { if ($l === false) { return false; } if ($l->unit !== $this->unit) { $converter = new HTMLPurifier_UnitConverter(); $l = $converter->convert($l, $this->unit); if ($l === false) { return false; } } return $this->n - $l->n; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Lexer.php0000644000175000017500000003217213512700112017462 0ustar ezyangezyangget('Core.LexerImpl'); } $needs_tracking = $config->get('Core.MaintainLineNumbers') || $config->get('Core.CollectErrors'); $inst = null; if (is_object($lexer)) { $inst = $lexer; } else { if (is_null($lexer)) { do { // auto-detection algorithm if ($needs_tracking) { $lexer = 'DirectLex'; break; } if (class_exists('DOMDocument', false) && method_exists('DOMDocument', 'loadHTML') && !extension_loaded('domxml') ) { // check for DOM support, because while it's part of the // core, it can be disabled compile time. Also, the PECL // domxml extension overrides the default DOM, and is evil // and nasty and we shan't bother to support it $lexer = 'DOMLex'; } else { $lexer = 'DirectLex'; } } while (0); } // do..while so we can break // instantiate recognized string names switch ($lexer) { case 'DOMLex': $inst = new HTMLPurifier_Lexer_DOMLex(); break; case 'DirectLex': $inst = new HTMLPurifier_Lexer_DirectLex(); break; case 'PH5P': $inst = new HTMLPurifier_Lexer_PH5P(); break; default: throw new HTMLPurifier_Exception( "Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer) ); } } if (!$inst) { throw new HTMLPurifier_Exception('No lexer was instantiated'); } // once PHP DOM implements native line numbers, or we // hack out something using XSLT, remove this stipulation if ($needs_tracking && !$inst->tracksLineNumbers) { throw new HTMLPurifier_Exception( 'Cannot use lexer that does not support line numbers with ' . 'Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)' ); } return $inst; } // -- CONVENIENCE MEMBERS --------------------------------------------- public function __construct() { $this->_entity_parser = new HTMLPurifier_EntityParser(); } /** * Most common entity to raw value conversion table for special entities. * @type array */ protected $_special_entity2str = array( '"' => '"', '&' => '&', '<' => '<', '>' => '>', ''' => "'", ''' => "'", ''' => "'" ); public function parseText($string, $config) { return $this->parseData($string, false, $config); } public function parseAttr($string, $config) { return $this->parseData($string, true, $config); } /** * Parses special entities into the proper characters. * * This string will translate escaped versions of the special characters * into the correct ones. * * @param string $string String character data to be parsed. * @return string Parsed character data. */ public function parseData($string, $is_attr, $config) { // following functions require at least one character if ($string === '') { return ''; } // subtracts amps that cannot possibly be escaped $num_amp = substr_count($string, '&') - substr_count($string, '& ') - ($string[strlen($string) - 1] === '&' ? 1 : 0); if (!$num_amp) { return $string; } // abort if no entities $num_esc_amp = substr_count($string, '&'); $string = strtr($string, $this->_special_entity2str); // code duplication for sake of optimization, see above $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - ($string[strlen($string) - 1] === '&' ? 1 : 0); if ($num_amp_2 <= $num_esc_amp) { return $string; } // hmm... now we have some uncommon entities. Use the callback. if ($config->get('Core.LegacyEntityDecoder')) { $string = $this->_entity_parser->substituteSpecialEntities($string); } else { if ($is_attr) { $string = $this->_entity_parser->substituteAttrEntities($string); } else { $string = $this->_entity_parser->substituteTextEntities($string); } } return $string; } /** * Lexes an HTML string into tokens. * @param $string String HTML. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] array representation of HTML. */ public function tokenizeHTML($string, $config, $context) { trigger_error('Call to abstract class', E_USER_ERROR); } /** * Translates CDATA sections into regular sections (through escaping). * @param string $string HTML string to process. * @return string HTML with CDATA sections escaped. */ protected static function escapeCDATA($string) { return preg_replace_callback( '//s', array('HTMLPurifier_Lexer', 'CDATACallback'), $string ); } /** * Special CDATA case that is especially convoluted for #i', '', $html); } return $html; } /** * Takes a string of HTML (fragment or document) and returns the content * @todo Consider making protected */ public function extractBody($html) { $matches = array(); $result = preg_match('|(.*?)]*>(.*)|is', $html, $matches); if ($result) { // Make sure it's not in a comment $comment_start = strrpos($matches[1], ''); if ($comment_start === false || ($comment_end !== false && $comment_end > $comment_start)) { return $matches[2]; } } return $html; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Node.php0000644000175000017500000000240013512700112017257 0ustar ezyangezyangpreserve[$i] = true; } for ($i = 65; $i <= 90; $i++) { // upper-case $this->preserve[$i] = true; } for ($i = 97; $i <= 122; $i++) { // lower-case $this->preserve[$i] = true; } $this->preserve[45] = true; // Dash - $this->preserve[46] = true; // Period . $this->preserve[95] = true; // Underscore _ $this->preserve[126]= true; // Tilde ~ // extra letters not to escape if ($preserve !== false) { for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { $this->preserve[ord($preserve[$i])] = true; } } } /** * Our replacement for urlencode, it encodes all non-reserved characters, * as well as any extra characters that were instructed to be preserved. * @note * Assumes that the string has already been normalized, making any * and all percent escape sequences valid. Percents will not be * re-escaped, regardless of their status in $preserve * @param string $string String to be encoded * @return string Encoded string. */ public function encode($string) { $ret = ''; for ($i = 0, $c = strlen($string); $i < $c; $i++) { if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) { $ret .= '%' . sprintf('%02X', $int); } else { $ret .= $string[$i]; } } return $ret; } /** * Fix up percent-encoding by decoding unreserved characters and normalizing. * @warning This function is affected by $preserve, even though the * usual desired behavior is for this not to preserve those * characters. Be careful when reusing instances of PercentEncoder! * @param string $string String to normalize * @return string */ public function normalize($string) { if ($string == '') { return ''; } $parts = explode('%', $string); $ret = array_shift($parts); foreach ($parts as $part) { $length = strlen($part); if ($length < 2) { $ret .= '%25' . $part; continue; } $encoding = substr($part, 0, 2); $text = substr($part, 2); if (!ctype_xdigit($encoding)) { $ret .= '%25' . $part; continue; } $int = hexdec($encoding); if (isset($this->preserve[$int])) { $ret .= chr($int) . $text; continue; } $encoding = strtoupper($encoding); $ret .= '%' . $encoding . $text; } return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Printer.php0000644000175000017500000001341113512700112020021 0ustar ezyangezyanggetAll(); $context = new HTMLPurifier_Context(); $this->generator = new HTMLPurifier_Generator($config, $context); } /** * Main function that renders object or aspect of that object * @note Parameters vary depending on printer */ // function render() {} /** * Returns a start tag * @param string $tag Tag name * @param array $attr Attribute array * @return string */ protected function start($tag, $attr = array()) { return $this->generator->generateFromToken( new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) ); } /** * Returns an end tag * @param string $tag Tag name * @return string */ protected function end($tag) { return $this->generator->generateFromToken( new HTMLPurifier_Token_End($tag) ); } /** * Prints a complete element with content inside * @param string $tag Tag name * @param string $contents Element contents * @param array $attr Tag attributes * @param bool $escape whether or not to escape contents * @return string */ protected function element($tag, $contents, $attr = array(), $escape = true) { return $this->start($tag, $attr) . ($escape ? $this->escape($contents) : $contents) . $this->end($tag); } /** * @param string $tag * @param array $attr * @return string */ protected function elementEmpty($tag, $attr = array()) { return $this->generator->generateFromToken( new HTMLPurifier_Token_Empty($tag, $attr) ); } /** * @param string $text * @return string */ protected function text($text) { return $this->generator->generateFromToken( new HTMLPurifier_Token_Text($text) ); } /** * Prints a simple key/value row in a table. * @param string $name Key * @param mixed $value Value * @return string */ protected function row($name, $value) { if (is_bool($value)) { $value = $value ? 'On' : 'Off'; } return $this->start('tr') . "\n" . $this->element('th', $name) . "\n" . $this->element('td', $value) . "\n" . $this->end('tr'); } /** * Escapes a string for HTML output. * @param string $string String to escape * @return string */ protected function escape($string) { $string = HTMLPurifier_Encoder::cleanUTF8($string); $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); return $string; } /** * Takes a list of strings and turns them into a single list * @param string[] $array List of strings * @param bool $polite Bool whether or not to add an end before the last * @return string */ protected function listify($array, $polite = false) { if (empty($array)) { return 'None'; } $ret = ''; $i = count($array); foreach ($array as $value) { $i--; $ret .= $value; if ($i > 0 && !($polite && $i == 1)) { $ret .= ', '; } if ($polite && $i == 1) { $ret .= 'and '; } } return $ret; } /** * Retrieves the class of an object without prefixes, as well as metadata * @param object $obj Object to determine class of * @param string $sec_prefix Further prefix to remove * @return string */ protected function getClass($obj, $sec_prefix = '') { static $five = null; if ($five === null) { $five = version_compare(PHP_VERSION, '5', '>='); } $prefix = 'HTMLPurifier_' . $sec_prefix; if (!$five) { $prefix = strtolower($prefix); } $class = str_replace($prefix, '', get_class($obj)); $lclass = strtolower($class); $class .= '('; switch ($lclass) { case 'enum': $values = array(); foreach ($obj->valid_values as $value => $bool) { $values[] = $value; } $class .= implode(', ', $values); break; case 'css_composite': $values = array(); foreach ($obj->defs as $def) { $values[] = $this->getClass($def, $sec_prefix); } $class .= implode(', ', $values); break; case 'css_multiple': $class .= $this->getClass($obj->single, $sec_prefix) . ', '; $class .= $obj->max; break; case 'css_denyelementdecorator': $class .= $this->getClass($obj->def, $sec_prefix) . ', '; $class .= $obj->element; break; case 'css_importantdecorator': $class .= $this->getClass($obj->def, $sec_prefix); if ($obj->allow) { $class .= ', !important'; } break; } $class .= ')'; return $class; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/PropertyList.php0000644000175000017500000000533713512700112021066 0ustar ezyangezyangparent = $parent; } /** * Recursively retrieves the value for a key * @param string $name * @throws HTMLPurifier_Exception */ public function get($name) { if ($this->has($name)) { return $this->data[$name]; } // possible performance bottleneck, convert to iterative if necessary if ($this->parent) { return $this->parent->get($name); } throw new HTMLPurifier_Exception("Key '$name' not found"); } /** * Sets the value of a key, for this plist * @param string $name * @param mixed $value */ public function set($name, $value) { $this->data[$name] = $value; } /** * Returns true if a given key exists * @param string $name * @return bool */ public function has($name) { return array_key_exists($name, $this->data); } /** * Resets a value to the value of it's parent, usually the default. If * no value is specified, the entire plist is reset. * @param string $name */ public function reset($name = null) { if ($name == null) { $this->data = array(); } else { unset($this->data[$name]); } } /** * Squashes this property list and all of its property lists into a single * array, and returns the array. This value is cached by default. * @param bool $force If true, ignores the cache and regenerates the array. * @return array */ public function squash($force = false) { if ($this->cache !== null && !$force) { return $this->cache; } if ($this->parent) { return $this->cache = array_merge($this->parent->squash($force), $this->data); } else { return $this->cache = $this->data; } } /** * Returns the parent plist. * @return HTMLPurifier_PropertyList */ public function getParent() { return $this->parent; } /** * Sets the parent plist. * @param HTMLPurifier_PropertyList $plist Parent plist */ public function setParent($plist) { $this->parent = $plist; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/PropertyListIterator.php0000644000175000017500000000154113512700112022571 0ustar ezyangezyangl = strlen($filter); $this->filter = $filter; } /** * @return bool */ public function accept() { $key = $this->getInnerIterator()->key(); if (strncmp($key, $this->filter, $this->l) !== 0) { return false; } return true; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Queue.php0000644000175000017500000000301713512700112017463 0ustar ezyangezyanginput = $input; $this->output = array(); } /** * Shifts an element off the front of the queue. */ public function shift() { if (empty($this->output)) { $this->output = array_reverse($this->input); $this->input = array(); } if (empty($this->output)) { return NULL; } return array_pop($this->output); } /** * Pushes an element onto the front of the queue. */ public function push($x) { array_push($this->input, $x); } /** * Checks if it's empty. */ public function isEmpty() { return empty($this->input) && empty($this->output); } } HTMLPurifier-4.11.0/HTMLPurifier/Strategy.php0000644000175000017500000000137213512700112020203 0ustar ezyangezyangaccessed[$index] = true; return parent::offsetGet($index); } /** * Returns a lookup array of all array indexes that have been accessed. * @return array in form array($index => true). */ public function getAccessed() { return $this->accessed; } /** * Resets the access array. */ public function resetAccessed() { $this->accessed = array(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/StringHashParser.php0000644000175000017500000000707113512700112021632 0ustar ezyangezyang 'DefaultKeyValue', * 'KEY' => 'Value', * 'KEY2' => 'Value2', * 'MULTILINE-KEY' => "Multiline\nvalue.\n", * ) * * We use this as an easy to use file-format for configuration schema * files, but the class itself is usage agnostic. * * You can use ---- to forcibly terminate parsing of a single string-hash; * this marker is used in multi string-hashes to delimit boundaries. */ class HTMLPurifier_StringHashParser { /** * @type string */ public $default = 'ID'; /** * Parses a file that contains a single string-hash. * @param string $file * @return array */ public function parseFile($file) { if (!file_exists($file)) { return false; } $fh = fopen($file, 'r'); if (!$fh) { return false; } $ret = $this->parseHandle($fh); fclose($fh); return $ret; } /** * Parses a file that contains multiple string-hashes delimited by '----' * @param string $file * @return array */ public function parseMultiFile($file) { if (!file_exists($file)) { return false; } $ret = array(); $fh = fopen($file, 'r'); if (!$fh) { return false; } while (!feof($fh)) { $ret[] = $this->parseHandle($fh); } fclose($fh); return $ret; } /** * Internal parser that acepts a file handle. * @note While it's possible to simulate in-memory parsing by using * custom stream wrappers, if such a use-case arises we should * factor out the file handle into its own class. * @param resource $fh File handle with pointer at start of valid string-hash * block. * @return array */ protected function parseHandle($fh) { $state = false; $single = false; $ret = array(); do { $line = fgets($fh); if ($line === false) { break; } $line = rtrim($line, "\n\r"); if (!$state && $line === '') { continue; } if ($line === '----') { break; } if (strncmp('--#', $line, 3) === 0) { // Comment continue; } elseif (strncmp('--', $line, 2) === 0) { // Multiline declaration $state = trim($line, '- '); if (!isset($ret[$state])) { $ret[$state] = ''; } continue; } elseif (!$state) { $single = true; if (strpos($line, ':') !== false) { // Single-line declaration list($state, $line) = explode(':', $line, 2); $line = trim($line); } else { // Use default declaration $state = $this->default; } } if ($single) { $ret[$state] = $line; $single = false; $state = false; } else { $ret[$state] .= "$line\n"; } } while (!feof($fh)); return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/TagTransform.php0000644000175000017500000000211213512700112021001 0ustar ezyangezyangline = $l; $this->col = $c; } /** * Convenience function for DirectLex settings line/col position. * @param int $l * @param int $c */ public function rawPosition($l, $c) { if ($c === -1) { $l++; } $this->line = $l; $this->col = $c; } /** * Converts a token into its corresponding node. */ abstract public function toNode(); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/TokenFactory.php0000644000175000017500000000603313512700112021010 0ustar ezyangezyangp_start = new HTMLPurifier_Token_Start('', array()); $this->p_end = new HTMLPurifier_Token_End(''); $this->p_empty = new HTMLPurifier_Token_Empty('', array()); $this->p_text = new HTMLPurifier_Token_Text(''); $this->p_comment = new HTMLPurifier_Token_Comment(''); } /** * Creates a HTMLPurifier_Token_Start. * @param string $name Tag name * @param array $attr Associative array of attributes * @return HTMLPurifier_Token_Start Generated HTMLPurifier_Token_Start */ public function createStart($name, $attr = array()) { $p = clone $this->p_start; $p->__construct($name, $attr); return $p; } /** * Creates a HTMLPurifier_Token_End. * @param string $name Tag name * @return HTMLPurifier_Token_End Generated HTMLPurifier_Token_End */ public function createEnd($name) { $p = clone $this->p_end; $p->__construct($name); return $p; } /** * Creates a HTMLPurifier_Token_Empty. * @param string $name Tag name * @param array $attr Associative array of attributes * @return HTMLPurifier_Token_Empty Generated HTMLPurifier_Token_Empty */ public function createEmpty($name, $attr = array()) { $p = clone $this->p_empty; $p->__construct($name, $attr); return $p; } /** * Creates a HTMLPurifier_Token_Text. * @param string $data Data of text token * @return HTMLPurifier_Token_Text Generated HTMLPurifier_Token_Text */ public function createText($data) { $p = clone $this->p_text; $p->__construct($data); return $p; } /** * Creates a HTMLPurifier_Token_Comment. * @param string $data Data of comment token * @return HTMLPurifier_Token_Comment Generated HTMLPurifier_Token_Comment */ public function createComment($data) { $p = clone $this->p_comment; $p->__construct($data); return $p; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/UnitConverter.php0000644000175000017500000002362213512700112021212 0ustar ezyangezyang array( 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary 'pt' => 4, 'pc' => 48, 'in' => 288, self::METRIC => array('pt', '0.352777778', 'mm'), ), self::METRIC => array( 'mm' => 1, 'cm' => 10, self::ENGLISH => array('mm', '2.83464567', 'pt'), ), ); /** * Minimum bcmath precision for output. * @type int */ protected $outputPrecision; /** * Bcmath precision for internal calculations. * @type int */ protected $internalPrecision; /** * Whether or not BCMath is available. * @type bool */ private $bcmath; public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) { $this->outputPrecision = $output_precision; $this->internalPrecision = $internal_precision; $this->bcmath = !$force_no_bcmath && function_exists('bcmul'); } /** * Converts a length object of one unit into another unit. * @param HTMLPurifier_Length $length * Instance of HTMLPurifier_Length to convert. You must validate() * it before passing it here! * @param string $to_unit * Unit to convert to. * @return HTMLPurifier_Length|bool * @note * About precision: This conversion function pays very special * attention to the incoming precision of values and attempts * to maintain a number of significant figure. Results are * fairly accurate up to nine digits. Some caveats: * - If a number is zero-padded as a result of this significant * figure tracking, the zeroes will be eliminated. * - If a number contains less than four sigfigs ($outputPrecision) * and this causes some decimals to be excluded, those * decimals will be added on. */ public function convert($length, $to_unit) { if (!$length->isValid()) { return false; } $n = $length->getN(); $unit = $length->getUnit(); if ($n === '0' || $unit === false) { return new HTMLPurifier_Length('0', false); } $state = $dest_state = false; foreach (self::$units as $k => $x) { if (isset($x[$unit])) { $state = $k; } if (isset($x[$to_unit])) { $dest_state = $k; } } if (!$state || !$dest_state) { return false; } // Some calculations about the initial precision of the number; // this will be useful when we need to do final rounding. $sigfigs = $this->getSigFigs($n); if ($sigfigs < $this->outputPrecision) { $sigfigs = $this->outputPrecision; } // BCMath's internal precision deals only with decimals. Use // our default if the initial number has no decimals, or increase // it by how ever many decimals, thus, the number of guard digits // will always be greater than or equal to internalPrecision. $log = (int)floor(log(abs($n), 10)); $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision for ($i = 0; $i < 2; $i++) { // Determine what unit IN THIS SYSTEM we need to convert to if ($dest_state === $state) { // Simple conversion $dest_unit = $to_unit; } else { // Convert to the smallest unit, pending a system shift $dest_unit = self::$units[$state][$dest_state][0]; } // Do the conversion if necessary if ($dest_unit !== $unit) { $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp); $n = $this->mul($n, $factor, $cp); $unit = $dest_unit; } // Output was zero, so bail out early. Shouldn't ever happen. if ($n === '') { $n = '0'; $unit = $to_unit; break; } // It was a simple conversion, so bail out if ($dest_state === $state) { break; } if ($i !== 0) { // Conversion failed! Apparently, the system we forwarded // to didn't have this unit. This should never happen! return false; } // Pre-condition: $i == 0 // Perform conversion to next system of units $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp); $unit = self::$units[$state][$dest_state][2]; $state = $dest_state; // One more loop around to convert the unit in the new system. } // Post-condition: $unit == $to_unit if ($unit !== $to_unit) { return false; } // Useful for debugging: //echo "
n";
        //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n
\n"; $n = $this->round($n, $sigfigs); if (strpos($n, '.') !== false) { $n = rtrim($n, '0'); } $n = rtrim($n, '.'); return new HTMLPurifier_Length($n, $unit); } /** * Returns the number of significant figures in a string number. * @param string $n Decimal number * @return int number of sigfigs */ public function getSigFigs($n) { $n = ltrim($n, '0+-'); $dp = strpos($n, '.'); // decimal position if ($dp === false) { $sigfigs = strlen(rtrim($n, '0')); } else { $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character if ($dp !== 0) { $sigfigs--; } } return $sigfigs; } /** * Adds two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function add($s1, $s2, $scale) { if ($this->bcmath) { return bcadd($s1, $s2, $scale); } else { return $this->scale((float)$s1 + (float)$s2, $scale); } } /** * Multiples two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function mul($s1, $s2, $scale) { if ($this->bcmath) { return bcmul($s1, $s2, $scale); } else { return $this->scale((float)$s1 * (float)$s2, $scale); } } /** * Divides two numbers, using arbitrary precision when available. * @param string $s1 * @param string $s2 * @param int $scale * @return string */ private function div($s1, $s2, $scale) { if ($this->bcmath) { return bcdiv($s1, $s2, $scale); } else { return $this->scale((float)$s1 / (float)$s2, $scale); } } /** * Rounds a number according to the number of sigfigs it should have, * using arbitrary precision when available. * @param float $n * @param int $sigfigs * @return string */ private function round($n, $sigfigs) { $new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1 $rp = $sigfigs - $new_log - 1; // Number of decimal places needed $neg = $n < 0 ? '-' : ''; // Negative sign if ($this->bcmath) { if ($rp >= 0) { $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1); $n = bcdiv($n, '1', $rp); } else { // This algorithm partially depends on the standardized // form of numbers that comes out of bcmath. $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0); $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1); } return $n; } else { return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1); } } /** * Scales a float to $scale digits right of decimal point, like BCMath. * @param float $r * @param int $scale * @return string */ private function scale($r, $scale) { if ($scale < 0) { // The f sprintf type doesn't support negative numbers, so we // need to cludge things manually. First get the string. $r = sprintf('%.0f', (float)$r); // Due to floating point precision loss, $r will more than likely // look something like 4652999999999.9234. We grab one more digit // than we need to precise from $r and then use that to round // appropriately. $precise = (string)round(substr($r, 0, strlen($r) + $scale), -1); // Now we return it, truncating the zero that was rounded off. return substr($precise, 0, -1) . str_repeat('0', -$scale + 1); } return sprintf('%.' . $scale . 'f', (float)$r); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/URI.php0000644000175000017500000002454413512700112017046 0ustar ezyangezyangscheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); $this->userinfo = $userinfo; $this->host = $host; $this->port = is_null($port) ? $port : (int)$port; $this->path = $path; $this->query = $query; $this->fragment = $fragment; } /** * Retrieves a scheme object corresponding to the URI's scheme/default * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI */ public function getSchemeObj($config, $context) { $registry = HTMLPurifier_URISchemeRegistry::instance(); if ($this->scheme !== null) { $scheme_obj = $registry->getScheme($this->scheme, $config, $context); if (!$scheme_obj) { return false; } // invalid scheme, clean it out } else { // no scheme: retrieve the default one $def = $config->getDefinition('URI'); $scheme_obj = $def->getDefaultScheme($config, $context); if (!$scheme_obj) { if ($def->defaultScheme !== null) { // something funky happened to the default scheme object trigger_error( 'Default scheme object "' . $def->defaultScheme . '" was not readable', E_USER_WARNING ); } // suppress error if it's null return false; } } return $scheme_obj; } /** * Generic validation method applicable for all schemes. May modify * this URI in order to get it into a compliant form. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool True if validation/filtering succeeds, false if failure */ public function validate($config, $context) { // ABNF definitions from RFC 3986 $chars_sub_delims = '!$&\'()*+,;='; $chars_gen_delims = ':/?#[]@'; $chars_pchar = $chars_sub_delims . ':@'; // validate host if (!is_null($this->host)) { $host_def = new HTMLPurifier_AttrDef_URI_Host(); $this->host = $host_def->validate($this->host, $config, $context); if ($this->host === false) { $this->host = null; } } // validate scheme // NOTE: It's not appropriate to check whether or not this // scheme is in our registry, since a URIFilter may convert a // URI that we don't allow into one we do. So instead, we just // check if the scheme can be dropped because there is no host // and it is our default scheme. if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { // support for relative paths is pretty abysmal when the // scheme is present, so axe it when possible $def = $config->getDefinition('URI'); if ($def->defaultScheme === $this->scheme) { $this->scheme = null; } } // validate username if (!is_null($this->userinfo)) { $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); $this->userinfo = $encoder->encode($this->userinfo); } // validate port if (!is_null($this->port)) { if ($this->port < 1 || $this->port > 65535) { $this->port = null; } } // validate path $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); if (!is_null($this->host)) { // this catches $this->host === '' // path-abempty (hier and relative) // http://www.example.com/my/path // //www.example.com/my/path (looks odd, but works, and // recognized by most browsers) // (this set is valid or invalid on a scheme by scheme // basis, so we'll deal with it later) // file:///my/path // ///my/path $this->path = $segments_encoder->encode($this->path); } elseif ($this->path !== '') { if ($this->path[0] === '/') { // path-absolute (hier and relative) // http:/my/path // /my/path if (strlen($this->path) >= 2 && $this->path[1] === '/') { // This could happen if both the host gets stripped // out // http://my/path // //my/path $this->path = ''; } else { $this->path = $segments_encoder->encode($this->path); } } elseif (!is_null($this->scheme)) { // path-rootless (hier) // http:my/path // Short circuit evaluation means we don't need to check nz $this->path = $segments_encoder->encode($this->path); } else { // path-noscheme (relative) // my/path // (once again, not checking nz) $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); $c = strpos($this->path, '/'); if ($c !== false) { $this->path = $segment_nc_encoder->encode(substr($this->path, 0, $c)) . $segments_encoder->encode(substr($this->path, $c)); } else { $this->path = $segment_nc_encoder->encode($this->path); } } } else { // path-empty (hier and relative) $this->path = ''; // just to be safe } // qf = query and fragment $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); if (!is_null($this->query)) { $this->query = $qf_encoder->encode($this->query); } if (!is_null($this->fragment)) { $this->fragment = $qf_encoder->encode($this->fragment); } return true; } /** * Convert URI back to string * @return string URI appropriate for output */ public function toString() { // reconstruct authority $authority = null; // there is a rendering difference between a null authority // (http:foo-bar) and an empty string authority // (http:///foo-bar). if (!is_null($this->host)) { $authority = ''; if (!is_null($this->userinfo)) { $authority .= $this->userinfo . '@'; } $authority .= $this->host; if (!is_null($this->port)) { $authority .= ':' . $this->port; } } // Reconstruct the result // One might wonder about parsing quirks from browsers after // this reconstruction. Unfortunately, parsing behavior depends // on what *scheme* was employed (file:///foo is handled *very* // differently than http:///foo), so unfortunately we have to // defer to the schemes to do the right thing. $result = ''; if (!is_null($this->scheme)) { $result .= $this->scheme . ':'; } if (!is_null($authority)) { $result .= '//' . $authority; } $result .= $this->path; if (!is_null($this->query)) { $result .= '?' . $this->query; } if (!is_null($this->fragment)) { $result .= '#' . $this->fragment; } return $result; } /** * Returns true if this URL might be considered a 'local' URL given * the current context. This is true when the host is null, or * when it matches the host supplied to the configuration. * * Note that this does not do any scheme checking, so it is mostly * only appropriate for metadata that doesn't care about protocol * security. isBenign is probably what you actually want. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function isLocal($config, $context) { if ($this->host === null) { return true; } $uri_def = $config->getDefinition('URI'); if ($uri_def->host === $this->host) { return true; } return false; } /** * Returns true if this URL should be considered a 'benign' URL, * that is: * * - It is a local URL (isLocal), and * - It has a equal or better level of security * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function isBenign($config, $context) { if (!$this->isLocal($config, $context)) { return false; } $scheme_obj = $this->getSchemeObj($config, $context); if (!$scheme_obj) { return false; } // conservative approach $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); if ($current_scheme_obj->secure) { if (!$scheme_obj->secure) { return false; } } return true; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/URIDefinition.php0000644000175000017500000000654613512700112021061 0ustar ezyangezyangregisterFilter(new HTMLPurifier_URIFilter_DisableExternal()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); $this->registerFilter(new HTMLPurifier_URIFilter_DisableResources()); $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); $this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe()); $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); $this->registerFilter(new HTMLPurifier_URIFilter_Munge()); } public function registerFilter($filter) { $this->registeredFilters[$filter->name] = $filter; } public function addFilter($filter, $config) { $r = $filter->prepare($config); if ($r === false) return; // null is ok, for backwards compat if ($filter->post) { $this->postFilters[$filter->name] = $filter; } else { $this->filters[$filter->name] = $filter; } } protected function doSetup($config) { $this->setupMemberVariables($config); $this->setupFilters($config); } protected function setupFilters($config) { foreach ($this->registeredFilters as $name => $filter) { if ($filter->always_load) { $this->addFilter($filter, $config); } else { $conf = $config->get('URI.' . $name); if ($conf !== false && $conf !== null) { $this->addFilter($filter, $config); } } } unset($this->registeredFilters); } protected function setupMemberVariables($config) { $this->host = $config->get('URI.Host'); $base_uri = $config->get('URI.Base'); if (!is_null($base_uri)) { $parser = new HTMLPurifier_URIParser(); $this->base = $parser->parse($base_uri); $this->defaultScheme = $this->base->scheme; if (is_null($this->host)) $this->host = $this->base->host; } if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); } public function getDefaultScheme($config, $context) { return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context); } public function filter(&$uri, $config, $context) { foreach ($this->filters as $name => $f) { $result = $f->filter($uri, $config, $context); if (!$result) return false; } return true; } public function postFilter(&$uri, $config, $context) { foreach ($this->postFilters as $name => $f) { $result = $f->filter($uri, $config, $context); if (!$result) return false; } return true; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/URIFilter.php0000644000175000017500000000447513512700112020215 0ustar ezyangezyangpercentEncoder = new HTMLPurifier_PercentEncoder(); } /** * Parses a URI. * @param $uri string URI to parse * @return HTMLPurifier_URI representation of URI. This representation has * not been validated yet and may not conform to RFC. */ public function parse($uri) { $uri = $this->percentEncoder->normalize($uri); // Regexp is as per Appendix B. // Note that ["<>] are an addition to the RFC's recommended // characters, because they represent external delimeters. $r_URI = '!'. '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme '(//([^/?#"<>]*))?'. // 4. Authority '([^?#"<>]*)'. // 5. Path '(\?([^#"<>]*))?'. // 7. Query '(#([^"<>]*))?'. // 8. Fragment '!'; $matches = array(); $result = preg_match($r_URI, $uri, $matches); if (!$result) return false; // *really* invalid URI // seperate out parts $scheme = !empty($matches[1]) ? $matches[2] : null; $authority = !empty($matches[3]) ? $matches[4] : null; $path = $matches[5]; // always present, can be empty $query = !empty($matches[6]) ? $matches[7] : null; $fragment = !empty($matches[8]) ? $matches[9] : null; // further parse authority if ($authority !== null) { $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; $matches = array(); preg_match($r_authority, $authority, $matches); $userinfo = !empty($matches[1]) ? $matches[2] : null; $host = !empty($matches[3]) ? $matches[3] : ''; $port = !empty($matches[4]) ? (int) $matches[5] : null; } else { $port = $host = $userinfo = null; } return new HTMLPurifier_URI( $scheme, $userinfo, $host, $port, $path, $query, $fragment); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/URIScheme.php0000644000175000017500000000663113512700112020170 0ustar ezyangezyang, resolves edge cases * with making relative URIs absolute * @type bool */ public $hierarchical = false; /** * Whether or not the URI may omit a hostname when the scheme is * explicitly specified, ala file:///path/to/file. As of writing, * 'file' is the only scheme that browsers support his properly. * @type bool */ public $may_omit_host = false; /** * Validates the components of a URI for a specific scheme. * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool success or failure */ abstract public function doValidate(&$uri, $config, $context); /** * Public interface for validating components of a URI. Performs a * bunch of default actions. Don't overload this method. * @param HTMLPurifier_URI $uri Reference to a HTMLPurifier_URI object * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool success or failure */ public function validate(&$uri, $config, $context) { if ($this->default_port == $uri->port) { $uri->port = null; } // kludge: browsers do funny things when the scheme but not the // authority is set if (!$this->may_omit_host && // if the scheme is present, a missing host is always in error (!is_null($uri->scheme) && ($uri->host === '' || is_null($uri->host))) || // if the scheme is not present, a *blank* host is in error, // since this translates into '///path' which most browsers // interpret as being 'http://path'. (is_null($uri->scheme) && $uri->host === '') ) { do { if (is_null($uri->scheme)) { if (substr($uri->path, 0, 2) != '//') { $uri->host = null; break; } // URI is '////path', so we cannot nullify the // host to preserve semantics. Try expanding the // hostname instead (fall through) } // first see if we can manually insert a hostname $host = $config->get('URI.Host'); if (!is_null($host)) { $uri->host = $host; } else { // we can't do anything sensible, reject the URL. return false; } } while (false); } return $this->doValidate($uri, $config, $context); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/URISchemeRegistry.php0000644000175000017500000000455113512700112021720 0ustar ezyangezyangget('URI.AllowedSchemes'); if (!$config->get('URI.OverrideAllowedSchemes') && !isset($allowed_schemes[$scheme]) ) { return; } if (isset($this->schemes[$scheme])) { return $this->schemes[$scheme]; } if (!isset($allowed_schemes[$scheme])) { return; } $class = 'HTMLPurifier_URIScheme_' . $scheme; if (!class_exists($class)) { return; } $this->schemes[$scheme] = new $class(); return $this->schemes[$scheme]; } /** * Registers a custom scheme to the cache, bypassing reflection. * @param string $scheme Scheme name * @param HTMLPurifier_URIScheme $scheme_obj */ public function register($scheme, $scheme_obj) { $this->schemes[$scheme] = $scheme_obj; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/VarParser.php0000644000175000017500000001354613512700112020314 0ustar ezyangezyang self::C_STRING, 'istring' => self::ISTRING, 'text' => self::TEXT, 'itext' => self::ITEXT, 'int' => self::C_INT, 'float' => self::C_FLOAT, 'bool' => self::C_BOOL, 'lookup' => self::LOOKUP, 'list' => self::ALIST, 'hash' => self::HASH, 'mixed' => self::C_MIXED ); /** * Lookup table of types that are string, and can have aliases or * allowed value lists. */ public static $stringTypes = array( self::C_STRING => true, self::ISTRING => true, self::TEXT => true, self::ITEXT => true, ); /** * Validate a variable according to type. * It may return NULL as a valid type if $allow_null is true. * * @param mixed $var Variable to validate * @param int $type Type of variable, see HTMLPurifier_VarParser->types * @param bool $allow_null Whether or not to permit null as a value * @return string Validated and type-coerced variable * @throws HTMLPurifier_VarParserException */ final public function parse($var, $type, $allow_null = false) { if (is_string($type)) { if (!isset(HTMLPurifier_VarParser::$types[$type])) { throw new HTMLPurifier_VarParserException("Invalid type '$type'"); } else { $type = HTMLPurifier_VarParser::$types[$type]; } } $var = $this->parseImplementation($var, $type, $allow_null); if ($allow_null && $var === null) { return null; } // These are basic checks, to make sure nothing horribly wrong // happened in our implementations. switch ($type) { case (self::C_STRING): case (self::ISTRING): case (self::TEXT): case (self::ITEXT): if (!is_string($var)) { break; } if ($type == self::ISTRING || $type == self::ITEXT) { $var = strtolower($var); } return $var; case (self::C_INT): if (!is_int($var)) { break; } return $var; case (self::C_FLOAT): if (!is_float($var)) { break; } return $var; case (self::C_BOOL): if (!is_bool($var)) { break; } return $var; case (self::LOOKUP): case (self::ALIST): case (self::HASH): if (!is_array($var)) { break; } if ($type === self::LOOKUP) { foreach ($var as $k) { if ($k !== true) { $this->error('Lookup table contains value other than true'); } } } elseif ($type === self::ALIST) { $keys = array_keys($var); if (array_keys($keys) !== $keys) { $this->error('Indices for list are not uniform'); } } return $var; case (self::C_MIXED): return $var; default: $this->errorInconsistent(get_class($this), $type); } $this->errorGeneric($var, $type); } /** * Actually implements the parsing. Base implementation does not * do anything to $var. Subclasses should overload this! * @param mixed $var * @param int $type * @param bool $allow_null * @return string */ protected function parseImplementation($var, $type, $allow_null) { return $var; } /** * Throws an exception. * @throws HTMLPurifier_VarParserException */ protected function error($msg) { throw new HTMLPurifier_VarParserException($msg); } /** * Throws an inconsistency exception. * @note This should not ever be called. It would be called if we * extend the allowed values of HTMLPurifier_VarParser without * updating subclasses. * @param string $class * @param int $type * @throws HTMLPurifier_Exception */ protected function errorInconsistent($class, $type) { throw new HTMLPurifier_Exception( "Inconsistency in $class: " . HTMLPurifier_VarParser::getTypeName($type) . " not implemented" ); } /** * Generic error for if a type didn't work. * @param mixed $var * @param int $type */ protected function errorGeneric($var, $type) { $vtype = gettype($var); $this->error("Expected type " . HTMLPurifier_VarParser::getTypeName($type) . ", got $vtype"); } /** * @param int $type * @return string */ public static function getTypeName($type) { static $lookup; if (!$lookup) { // Lazy load the alternative lookup table $lookup = array_flip(HTMLPurifier_VarParser::$types); } if (!isset($lookup[$type])) { return 'unknown'; } return $lookup[$type]; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/VarParserException.php0000644000175000017500000000023513512700112022162 0ustar ezyangezyangfront = $front; $this->back = $back; } /** * Creates a zipper from an array, with a hole in the * 0-index position. * @param Array to zipper-ify. * @return Tuple of zipper and element of first position. */ static public function fromArray($array) { $z = new self(array(), array_reverse($array)); $t = $z->delete(); // delete the "dummy hole" return array($z, $t); } /** * Convert zipper back into a normal array, optionally filling in * the hole with a value. (Usually you should supply a $t, unless you * are at the end of the array.) */ public function toArray($t = NULL) { $a = $this->front; if ($t !== NULL) $a[] = $t; for ($i = count($this->back)-1; $i >= 0; $i--) { $a[] = $this->back[$i]; } return $a; } /** * Move hole to the next element. * @param $t Element to fill hole with * @return Original contents of new hole. */ public function next($t) { if ($t !== NULL) array_push($this->front, $t); return empty($this->back) ? NULL : array_pop($this->back); } /** * Iterated hole advancement. * @param $t Element to fill hole with * @param $i How many forward to advance hole * @return Original contents of new hole, i away */ public function advance($t, $n) { for ($i = 0; $i < $n; $i++) { $t = $this->next($t); } return $t; } /** * Move hole to the previous element * @param $t Element to fill hole with * @return Original contents of new hole. */ public function prev($t) { if ($t !== NULL) array_push($this->back, $t); return empty($this->front) ? NULL : array_pop($this->front); } /** * Delete contents of current hole, shifting hole to * next element. * @return Original contents of new hole. */ public function delete() { return empty($this->back) ? NULL : array_pop($this->back); } /** * Returns true if we are at the end of the list. * @return bool */ public function done() { return empty($this->back); } /** * Insert element before hole. * @param Element to insert */ public function insertBefore($t) { if ($t !== NULL) array_push($this->front, $t); } /** * Insert element after hole. * @param Element to insert */ public function insertAfter($t) { if ($t !== NULL) array_push($this->back, $t); } /** * Splice in multiple elements at hole. Functional specification * in terms of array_splice: * * $arr1 = $arr; * $old1 = array_splice($arr1, $i, $delete, $replacement); * * list($z, $t) = HTMLPurifier_Zipper::fromArray($arr); * $t = $z->advance($t, $i); * list($old2, $t) = $z->splice($t, $delete, $replacement); * $arr2 = $z->toArray($t); * * assert($old1 === $old2); * assert($arr1 === $arr2); * * NB: the absolute index location after this operation is * *unchanged!* * * @param Current contents of hole. */ public function splice($t, $delete, $replacement) { // delete $old = array(); $r = $t; for ($i = $delete; $i > 0; $i--) { $old[] = $r; $r = $this->delete(); } // insert for ($i = count($replacement)-1; $i >= 0; $i--) { $this->insertAfter($r); $r = $replacement[$i]; } return array($old, $r); } } HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Clone.php0000644000175000017500000000155013512700112020770 0ustar ezyangezyangclone = $clone; } /** * @param string $v * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($v, $config, $context) { return $this->clone->validate($v, $config, $context); } /** * @param string $string * @return HTMLPurifier_AttrDef */ public function make($string) { return clone $this->clone; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS.php0000644000175000017500000001036313512700112020362 0ustar ezyangezyangparseCDATA($css); $definition = $config->getCSSDefinition(); $allow_duplicates = $config->get("CSS.AllowDuplicates"); // According to the CSS2.1 spec, the places where a // non-delimiting semicolon can appear are in strings // escape sequences. So here is some dumb hack to // handle quotes. $len = strlen($css); $accum = ""; $declarations = array(); $quoted = false; for ($i = 0; $i < $len; $i++) { $c = strcspn($css, ";'\"", $i); $accum .= substr($css, $i, $c); $i += $c; if ($i == $len) break; $d = $css[$i]; if ($quoted) { $accum .= $d; if ($d == $quoted) { $quoted = false; } } else { if ($d == ";") { $declarations[] = $accum; $accum = ""; } else { $accum .= $d; $quoted = $d; } } } if ($accum != "") $declarations[] = $accum; $propvalues = array(); $new_declarations = ''; /** * Name of the current CSS property being validated. */ $property = false; $context->register('CurrentCSSProperty', $property); foreach ($declarations as $declaration) { if (!$declaration) { continue; } if (!strpos($declaration, ':')) { continue; } list($property, $value) = explode(':', $declaration, 2); $property = trim($property); $value = trim($value); $ok = false; do { if (isset($definition->info[$property])) { $ok = true; break; } if (ctype_lower($property)) { break; } $property = strtolower($property); if (isset($definition->info[$property])) { $ok = true; break; } } while (0); if (!$ok) { continue; } // inefficient call, since the validator will do this again if (strtolower(trim($value)) !== 'inherit') { // inherit works for everything (but only on the base property) $result = $definition->info[$property]->validate( $value, $config, $context ); } else { $result = 'inherit'; } if ($result === false) { continue; } if ($allow_duplicates) { $new_declarations .= "$property:$result;"; } else { $propvalues[$property] = $result; } } $context->destroy('CurrentCSSProperty'); // procedure does not write the new CSS simultaneously, so it's // slightly inefficient, but it's the only way of getting rid of // duplicates. Perhaps config to optimize it, but not now. foreach ($propvalues as $prop => $value) { $new_declarations .= "$prop:$value;"; } return $new_declarations ? $new_declarations : false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Enum.php0000644000175000017500000000424313512700112020636 0ustar ezyangezyangvalid_values = array_flip($valid_values); $this->case_sensitive = $case_sensitive; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if (!$this->case_sensitive) { // we may want to do full case-insensitive libraries $string = ctype_lower($string) ? $string : strtolower($string); } $result = isset($this->valid_values[$string]); return $result ? $string : false; } /** * @param string $string In form of comma-delimited list of case-insensitive * valid values. Example: "foo,bar,baz". Prepend "s:" to make * case sensitive * @return HTMLPurifier_AttrDef_Enum */ public function make($string) { if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { $string = substr($string, 2); $sensitive = true; } else { $sensitive = false; } $values = explode(',', $string); return new HTMLPurifier_AttrDef_Enum($values, $sensitive); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Integer.php0000644000175000017500000000476313512700112021336 0ustar ezyangezyangnegative = $negative; $this->zero = $zero; $this->positive = $positive; } /** * @param string $integer * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($integer, $config, $context) { $integer = $this->parseCDATA($integer); if ($integer === '') { return false; } // we could possibly simply typecast it to integer, but there are // certain fringe cases that must not return an integer. // clip leading sign if ($this->negative && $integer[0] === '-') { $digits = substr($integer, 1); if ($digits === '0') { $integer = '0'; } // rm minus sign for zero } elseif ($this->positive && $integer[0] === '+') { $digits = $integer = substr($integer, 1); // rm unnecessary plus } else { $digits = $integer; } // test if it's numeric if (!ctype_digit($digits)) { return false; } // perform scope tests if (!$this->zero && $integer == 0) { return false; } if (!$this->positive && $integer > 0) { return false; } if (!$this->negative && $integer < 0) { return false; } return $integer; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Lang.php0000644000175000017500000000460413512700112020614 0ustar ezyangezyang 8 || !ctype_alnum($subtags[1])) { return $new_string; } if (!ctype_lower($subtags[1])) { $subtags[1] = strtolower($subtags[1]); } $new_string .= '-' . $subtags[1]; if ($num_subtags == 2) { return $new_string; } // process all other subtags, index 2 and up for ($i = 2; $i < $num_subtags; $i++) { $length = strlen($subtags[$i]); if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { return $new_string; } if (!ctype_lower($subtags[$i])) { $subtags[$i] = strtolower($subtags[$i]); } $new_string .= '-' . $subtags[$i]; } return $new_string; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Switch.php0000644000175000017500000000241113512700112021166 0ustar ezyangezyangtag = $tag; $this->withTag = $with_tag; $this->withoutTag = $without_tag; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $token = $context->get('CurrentToken', true); if (!$token || $token->name !== $this->tag) { return $this->withoutTag->validate($string, $config, $context); } else { return $this->withTag->validate($string, $config, $context); } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/Text.php0000644000175000017500000000065613512700112020662 0ustar ezyangezyangparseCDATA($string); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/URI.php0000644000175000017500000000523013512700112020366 0ustar ezyangezyangparser = new HTMLPurifier_URIParser(); $this->embedsResource = (bool)$embeds_resource; } /** * @param string $string * @return HTMLPurifier_AttrDef_URI */ public function make($string) { $embeds = ($string === 'embedded'); return new HTMLPurifier_AttrDef_URI($embeds); } /** * @param string $uri * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($uri, $config, $context) { if ($config->get('URI.Disable')) { return false; } $uri = $this->parseCDATA($uri); // parse the URI $uri = $this->parser->parse($uri); if ($uri === false) { return false; } // add embedded flag to context for validators $context->register('EmbeddedURI', $this->embedsResource); $ok = false; do { // generic validation $result = $uri->validate($config, $context); if (!$result) { break; } // chained filtering $uri_def = $config->getDefinition('URI'); $result = $uri_def->filter($uri, $config, $context); if (!$result) { break; } // scheme-specific validation $scheme_obj = $uri->getSchemeObj($config, $context); if (!$scheme_obj) { break; } if ($this->embedsResource && !$scheme_obj->browsable) { break; } $result = $scheme_obj->validate($uri, $config, $context); if (!$result) { break; } // Post chained filtering $result = $uri_def->postFilter($uri, $config, $context); if (!$result) { break; } // survived gauntlet $ok = true; } while (false); $context->destroy('EmbeddedURI'); if (!$ok) { return false; } // back to string return $uri->toString(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/AlphaValue.php0000644000175000017500000000143113512700112022400 0ustar ezyangezyang 1.0) { $result = '1'; } return $result; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Background.php0000644000175000017500000000621013512700112022435 0ustar ezyangezyanggetCSSDefinition(); $this->info['background-color'] = $def->info['background-color']; $this->info['background-image'] = $def->info['background-image']; $this->info['background-repeat'] = $def->info['background-repeat']; $this->info['background-attachment'] = $def->info['background-attachment']; $this->info['background-position'] = $def->info['background-position']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // munge rgb() decl if necessary $string = $this->mungeRgb($string); // assumes URI doesn't have spaces in it $bits = explode(' ', $string); // bits to process $caught = array(); $caught['color'] = false; $caught['image'] = false; $caught['repeat'] = false; $caught['attachment'] = false; $caught['position'] = false; $i = 0; // number of catches foreach ($bits as $bit) { if ($bit === '') { continue; } foreach ($caught as $key => $status) { if ($key != 'position') { if ($status !== false) { continue; } $r = $this->info['background-' . $key]->validate($bit, $config, $context); } else { $r = $bit; } if ($r === false) { continue; } if ($key == 'position') { if ($caught[$key] === false) { $caught[$key] = ''; } $caught[$key] .= $r . ' '; } else { $caught[$key] = $r; } $i++; break; } } if (!$i) { return false; } if ($caught['position'] !== false) { $caught['position'] = $this->info['background-position']-> validate($caught['position'], $config, $context); } $ret = array(); foreach ($caught as $value) { if ($value === false) { continue; } $ret[] = $value; } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php0000644000175000017500000001010613512700112024161 0ustar ezyangezyang | | left | center | right ] [ | | top | center | bottom ]? ] | [ // this signifies that the vertical and horizontal adjectives // can be arbitrarily ordered, however, there can only be two, // one of each, or none at all [ left | center | right ] || [ top | center | bottom ] ] top, left = 0% center, (none) = 50% bottom, right = 100% */ /* QuirksMode says: keyword + length/percentage must be ordered correctly, as per W3C Internet Explorer and Opera, however, support arbitrary ordering. We should fix it up. Minor issue though, not strictly necessary. */ // control freaks may appreciate the ability to convert these to // percentages or something, but it's not necessary /** * Validates the value of background-position. */ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef { /** * @type HTMLPurifier_AttrDef_CSS_Length */ protected $length; /** * @type HTMLPurifier_AttrDef_CSS_Percentage */ protected $percentage; public function __construct() { $this->length = new HTMLPurifier_AttrDef_CSS_Length(); $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); $bits = explode(' ', $string); $keywords = array(); $keywords['h'] = false; // left, right $keywords['v'] = false; // top, bottom $keywords['ch'] = false; // center (first word) $keywords['cv'] = false; // center (second word) $measures = array(); $i = 0; $lookup = array( 'top' => 'v', 'bottom' => 'v', 'left' => 'h', 'right' => 'h', 'center' => 'c' ); foreach ($bits as $bit) { if ($bit === '') { continue; } // test for keyword $lbit = ctype_lower($bit) ? $bit : strtolower($bit); if (isset($lookup[$lbit])) { $status = $lookup[$lbit]; if ($status == 'c') { if ($i == 0) { $status = 'ch'; } else { $status = 'cv'; } } $keywords[$status] = $lbit; $i++; } // test for length $r = $this->length->validate($bit, $config, $context); if ($r !== false) { $measures[] = $r; $i++; } // test for percentage $r = $this->percentage->validate($bit, $config, $context); if ($r !== false) { $measures[] = $r; $i++; } } if (!$i) { return false; } // no valid values were caught $ret = array(); // first keyword if ($keywords['h']) { $ret[] = $keywords['h']; } elseif ($keywords['ch']) { $ret[] = $keywords['ch']; $keywords['cv'] = false; // prevent re-use: center = center center } elseif (count($measures)) { $ret[] = array_shift($measures); } if ($keywords['v']) { $ret[] = $keywords['v']; } elseif ($keywords['cv']) { $ret[] = $keywords['cv']; } elseif (count($measures)) { $ret[] = array_shift($measures); } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Border.php0000644000175000017500000000306713512700112021602 0ustar ezyangezyanggetCSSDefinition(); $this->info['border-width'] = $def->info['border-width']; $this->info['border-style'] = $def->info['border-style']; $this->info['border-top-color'] = $def->info['border-top-color']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); $string = $this->mungeRgb($string); $bits = explode(' ', $string); $done = array(); // segments we've finished $ret = ''; // return value foreach ($bits as $bit) { foreach ($this->info as $propname => $validator) { if (isset($done[$propname])) { continue; } $r = $validator->validate($bit, $config, $context); if ($r !== false) { $ret .= $r . ' '; $done[$propname] = true; break; } } } return rtrim($ret); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Color.php0000644000175000017500000001111013512700112021427 0ustar ezyangezyangalpha = new HTMLPurifier_AttrDef_CSS_AlphaValue(); } /** * @param string $color * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($color, $config, $context) { static $colors = null; if ($colors === null) { $colors = $config->get('Core.ColorKeywords'); } $color = trim($color); if ($color === '') { return false; } $lower = strtolower($color); if (isset($colors[$lower])) { return $colors[$lower]; } if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) { $length = strlen($color); if (strpos($color, ')') !== $length - 1) { return false; } // get used function : rgb, rgba, hsl or hsla $function = $matches[1]; $parameters_size = 3; $alpha_channel = false; if (substr($function, -1) === 'a') { $parameters_size = 4; $alpha_channel = true; } /* * Allowed types for values : * parameter_position => [type => max_value] */ $allowed_types = array( 1 => array('percentage' => 100, 'integer' => 255), 2 => array('percentage' => 100, 'integer' => 255), 3 => array('percentage' => 100, 'integer' => 255), ); $allow_different_types = false; if (strpos($function, 'hsl') !== false) { $allowed_types = array( 1 => array('integer' => 360), 2 => array('percentage' => 100), 3 => array('percentage' => 100), ); $allow_different_types = true; } $values = trim(str_replace($function, '', $color), ' ()'); $parts = explode(',', $values); if (count($parts) !== $parameters_size) { return false; } $type = false; $new_parts = array(); $i = 0; foreach ($parts as $part) { $i++; $part = trim($part); if ($part === '') { return false; } // different check for alpha channel if ($alpha_channel === true && $i === count($parts)) { $result = $this->alpha->validate($part, $config, $context); if ($result === false) { return false; } $new_parts[] = (string)$result; continue; } if (substr($part, -1) === '%') { $current_type = 'percentage'; } else { $current_type = 'integer'; } if (!array_key_exists($current_type, $allowed_types[$i])) { return false; } if (!$type) { $type = $current_type; } if ($allow_different_types === false && $type != $current_type) { return false; } $max_value = $allowed_types[$i][$current_type]; if ($current_type == 'integer') { // Return value between range 0 -> $max_value $new_parts[] = (int)max(min($part, $max_value), 0); } elseif ($current_type == 'percentage') { $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%'; } } $new_values = implode(',', $new_parts); $color = $function . '(' . $new_values . ')'; } else { // hexadecimal handling if ($color[0] === '#') { $hex = substr($color, 1); } else { $hex = $color; $color = '#' . $color; } $length = strlen($hex); if ($length !== 3 && $length !== 6) { return false; } if (!ctype_xdigit($hex)) { return false; } } return $color; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Composite.php0000644000175000017500000000246413512700112022327 0ustar ezyangezyangdefs = $defs; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { foreach ($this->defs as $i => $def) { $result = $this->defs[$i]->validate($string, $config, $context); if ($result !== false) { return $result; } } return false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php0000644000175000017500000000206313512700112024434 0ustar ezyangezyangdef = $def; $this->element = $element; } /** * Checks if CurrentToken is set and equal to $this->element * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $token = $context->get('CurrentToken', true); if ($token && $token->name == $this->element) { return false; } return $this->def->validate($string, $config, $context); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Filter.php0000644000175000017500000000442613512700112021612 0ustar ezyangezyangintValidator = new HTMLPurifier_AttrDef_Integer(); } /** * @param string $value * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($value, $config, $context) { $value = $this->parseCDATA($value); if ($value === 'none') { return $value; } // if we looped this we could support multiple filters $function_length = strcspn($value, '('); $function = trim(substr($value, 0, $function_length)); if ($function !== 'alpha' && $function !== 'Alpha' && $function !== 'progid:DXImageTransform.Microsoft.Alpha' ) { return false; } $cursor = $function_length + 1; $parameters_length = strcspn($value, ')', $cursor); $parameters = substr($value, $cursor, $parameters_length); $params = explode(',', $parameters); $ret_params = array(); $lookup = array(); foreach ($params as $param) { list($key, $value) = explode('=', $param); $key = trim($key); $value = trim($value); if (isset($lookup[$key])) { continue; } if ($key !== 'opacity') { continue; } $value = $this->intValidator->validate($value, $config, $context); if ($value === false) { continue; } $int = (int)$value; if ($int > 100) { $value = '100'; } if ($int < 0) { $value = '0'; } $ret_params[] = "$key=$value"; $lookup[$key] = true; } $ret_parameters = implode(',', $ret_params); $ret_function = "$function($ret_parameters)"; return $ret_function; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Font.php0000644000175000017500000001472113512700112021272 0ustar ezyangezyanggetCSSDefinition(); $this->info['font-style'] = $def->info['font-style']; $this->info['font-variant'] = $def->info['font-variant']; $this->info['font-weight'] = $def->info['font-weight']; $this->info['font-size'] = $def->info['font-size']; $this->info['line-height'] = $def->info['line-height']; $this->info['font-family'] = $def->info['font-family']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $system_fonts = array( 'caption' => true, 'icon' => true, 'menu' => true, 'message-box' => true, 'small-caption' => true, 'status-bar' => true ); // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // check if it's one of the keywords $lowercase_string = strtolower($string); if (isset($system_fonts[$lowercase_string])) { return $lowercase_string; } $bits = explode(' ', $string); // bits to process $stage = 0; // this indicates what we're looking for $caught = array(); // which stage 0 properties have we caught? $stage_1 = array('font-style', 'font-variant', 'font-weight'); $final = ''; // output for ($i = 0, $size = count($bits); $i < $size; $i++) { if ($bits[$i] === '') { continue; } switch ($stage) { case 0: // attempting to catch font-style, font-variant or font-weight foreach ($stage_1 as $validator_name) { if (isset($caught[$validator_name])) { continue; } $r = $this->info[$validator_name]->validate( $bits[$i], $config, $context ); if ($r !== false) { $final .= $r . ' '; $caught[$validator_name] = true; break; } } // all three caught, continue on if (count($caught) >= 3) { $stage = 1; } if ($r !== false) { break; } case 1: // attempting to catch font-size and perhaps line-height $found_slash = false; if (strpos($bits[$i], '/') !== false) { list($font_size, $line_height) = explode('/', $bits[$i]); if ($line_height === '') { // ooh, there's a space after the slash! $line_height = false; $found_slash = true; } } else { $font_size = $bits[$i]; $line_height = false; } $r = $this->info['font-size']->validate( $font_size, $config, $context ); if ($r !== false) { $final .= $r; // attempt to catch line-height if ($line_height === false) { // we need to scroll forward for ($j = $i + 1; $j < $size; $j++) { if ($bits[$j] === '') { continue; } if ($bits[$j] === '/') { if ($found_slash) { return false; } else { $found_slash = true; continue; } } $line_height = $bits[$j]; break; } } else { // slash already found $found_slash = true; $j = $i; } if ($found_slash) { $i = $j; $r = $this->info['line-height']->validate( $line_height, $config, $context ); if ($r !== false) { $final .= '/' . $r; } } $final .= ' '; $stage = 2; break; } return false; case 2: // attempting to catch font-family $font_family = implode(' ', array_slice($bits, $i, $size - $i)); $r = $this->info['font-family']->validate( $font_family, $config, $context ); if ($r !== false) { $final .= $r . ' '; // processing completed successfully return rtrim($final); } return false; } } return false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/FontFamily.php0000644000175000017500000002230113512700112022425 0ustar ezyangezyangmask = '_- '; for ($c = 'a'; $c <= 'z'; $c++) { $this->mask .= $c; } for ($c = 'A'; $c <= 'Z'; $c++) { $this->mask .= $c; } for ($c = '0'; $c <= '9'; $c++) { $this->mask .= $c; } // cast-y, but should be fine // special bytes used by UTF-8 for ($i = 0x80; $i <= 0xFF; $i++) { // We don't bother excluding invalid bytes in this range, // because the our restriction of well-formed UTF-8 will // prevent these from ever occurring. $this->mask .= chr($i); } /* PHP's internal strcspn implementation is O(length of string * length of mask), making it inefficient for large masks. However, it's still faster than preg_match 8) for (p = s1;;) { spanp = s2; do { if (*spanp == c || p == s1_end) { return p - s1; } } while (spanp++ < (s2_end - 1)); c = *++p; } */ // possible optimization: invert the mask. } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { static $generic_names = array( 'serif' => true, 'sans-serif' => true, 'monospace' => true, 'fantasy' => true, 'cursive' => true ); $allowed_fonts = $config->get('CSS.AllowedFonts'); // assume that no font names contain commas in them $fonts = explode(',', $string); $final = ''; foreach ($fonts as $font) { $font = trim($font); if ($font === '') { continue; } // match a generic name if (isset($generic_names[$font])) { if ($allowed_fonts === null || isset($allowed_fonts[$font])) { $final .= $font . ', '; } continue; } // match a quoted name if ($font[0] === '"' || $font[0] === "'") { $length = strlen($font); if ($length <= 2) { continue; } $quote = $font[0]; if ($font[$length - 1] !== $quote) { continue; } $font = substr($font, 1, $length - 2); } $font = $this->expandCSSEscape($font); // $font is a pure representation of the font name if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) { continue; } if (ctype_alnum($font) && $font !== '') { // very simple font, allow it in unharmed $final .= $font . ', '; continue; } // bugger out on whitespace. form feed (0C) really // shouldn't show up regardless $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); // Here, there are various classes of characters which need // to be treated differently: // - Alphanumeric characters are essentially safe. We // handled these above. // - Spaces require quoting, though most parsers will do // the right thing if there aren't any characters that // can be misinterpreted // - Dashes rarely occur, but they fairly unproblematic // for parsing/rendering purposes. // The above characters cover the majority of Western font // names. // - Arbitrary Unicode characters not in ASCII. Because // most parsers give little thought to Unicode, treatment // of these codepoints is basically uniform, even for // punctuation-like codepoints. These characters can // show up in non-Western pages and are supported by most // major browsers, for example: "MS 明朝" is a // legitimate font-name // . See // the CSS3 spec for more examples: // // You can see live samples of these on the Internet: // // However, most of these fonts have ASCII equivalents: // for example, 'MS Mincho', and it's considered // professional to use ASCII font names instead of // Unicode font names. Thanks Takeshi Terada for // providing this information. // The following characters, to my knowledge, have not been // used to name font names. // - Single quote. While theoretically you might find a // font name that has a single quote in its name (serving // as an apostrophe, e.g. Dave's Scribble), I haven't // been able to find any actual examples of this. // Internet Explorer's cssText translation (which I // believe is invoked by innerHTML) normalizes any // quoting to single quotes, and fails to escape single // quotes. (Note that this is not IE's behavior for all // CSS properties, just some sort of special casing for // font-family). So a single quote *cannot* be used // safely in the font-family context if there will be an // innerHTML/cssText translation. Note that Firefox 3.x // does this too. // - Double quote. In IE, these get normalized to // single-quotes, no matter what the encoding. (Fun // fact, in IE8, the 'content' CSS property gained // support, where they special cased to preserve encoded // double quotes, but still translate unadorned double // quotes into single quotes.) So, because their // fixpoint behavior is identical to single quotes, they // cannot be allowed either. Firefox 3.x displays // single-quote style behavior. // - Backslashes are reduced by one (so \\ -> \) every // iteration, so they cannot be used safely. This shows // up in IE7, IE8 and FF3 // - Semicolons, commas and backticks are handled properly. // - The rest of the ASCII punctuation is handled properly. // We haven't checked what browsers do to unadorned // versions, but this is not important as long as the // browser doesn't /remove/ surrounding quotes (as IE does // for HTML). // // With these results in hand, we conclude that there are // various levels of safety: // - Paranoid: alphanumeric, spaces and dashes(?) // - International: Paranoid + non-ASCII Unicode // - Edgy: Everything except quotes, backslashes // - NoJS: Standards compliance, e.g. sod IE. Note that // with some judicious character escaping (since certain // types of escaping doesn't work) this is theoretically // OK as long as innerHTML/cssText is not called. // We believe that international is a reasonable default // (that we will implement now), and once we do more // extensive research, we may feel comfortable with dropping // it down to edgy. // Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of // str(c)spn assumes that the string was already well formed // Unicode (which of course it is). if (strspn($font, $this->mask) !== strlen($font)) { continue; } // Historical: // In the absence of innerHTML/cssText, these ugly // transforms don't pose a security risk (as \\ and \" // might--these escapes are not supported by most browsers). // We could try to be clever and use single-quote wrapping // when there is a double quote present, but I have choosen // not to implement that. (NOTE: you can reduce the amount // of escapes by one depending on what quoting style you use) // $font = str_replace('\\', '\\5C ', $font); // $font = str_replace('"', '\\22 ', $font); // $font = str_replace("'", '\\27 ', $font); // font possibly with spaces, requires quoting $final .= "'$font', "; } $final = rtrim($final, ', '); if ($final === '') { return false; } return $final; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Ident.php0000644000175000017500000000132413512700112021422 0ustar ezyangezyangdef = $def; $this->allow = $allow; } /** * Intercepts and removes !important if necessary * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // test for ! and important tokens $string = trim($string); $is_important = false; // :TODO: optimization: test directly for !important and ! important if (strlen($string) >= 9 && substr($string, -9) === 'important') { $temp = rtrim(substr($string, 0, -9)); // use a temp, because we might want to restore important if (strlen($temp) >= 1 && substr($temp, -1) === '!') { $string = rtrim(substr($temp, 0, -1)); $is_important = true; } } $string = $this->def->validate($string, $config, $context); if ($this->allow && $is_important) { $string .= ' !important'; } return $string; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Length.php0000644000175000017500000000355113512700112021604 0ustar ezyangezyangmin = $min !== null ? HTMLPurifier_Length::make($min) : null; $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); // Optimizations if ($string === '') { return false; } if ($string === '0') { return '0'; } if (strlen($string) === 1) { return false; } $length = HTMLPurifier_Length::make($string); if (!$length->isValid()) { return false; } if ($this->min) { $c = $length->compareTo($this->min); if ($c === false) { return false; } if ($c < 0) { return false; } } if ($this->max) { $c = $length->compareTo($this->max); if ($c === false) { return false; } if ($c > 0) { return false; } } return $length->toString(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/ListStyle.php0000644000175000017500000000553713512700112022325 0ustar ezyangezyanggetCSSDefinition(); $this->info['list-style-type'] = $def->info['list-style-type']; $this->info['list-style-position'] = $def->info['list-style-position']; $this->info['list-style-image'] = $def->info['list-style-image']; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { // regular pre-processing $string = $this->parseCDATA($string); if ($string === '') { return false; } // assumes URI doesn't have spaces in it $bits = explode(' ', strtolower($string)); // bits to process $caught = array(); $caught['type'] = false; $caught['position'] = false; $caught['image'] = false; $i = 0; // number of catches $none = false; foreach ($bits as $bit) { if ($i >= 3) { return; } // optimization bit if ($bit === '') { continue; } foreach ($caught as $key => $status) { if ($status !== false) { continue; } $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); if ($r === false) { continue; } if ($r === 'none') { if ($none) { continue; } else { $none = true; } if ($key == 'image') { continue; } } $caught[$key] = $r; $i++; break; } } if (!$i) { return false; } $ret = array(); // construct type if ($caught['type']) { $ret[] = $caught['type']; } // construct image if ($caught['image']) { $ret[] = $caught['image']; } // construct position if ($caught['position']) { $ret[] = $caught['position']; } if (empty($ret)) { return false; } return implode(' ', $ret); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Multiple.php0000644000175000017500000000405413512700112022155 0ustar ezyangezyangsingle = $single; $this->max = $max; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->mungeRgb($this->parseCDATA($string)); if ($string === '') { return false; } $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n $length = count($parts); $final = ''; for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { if (ctype_space($parts[$i])) { continue; } $result = $this->single->validate($parts[$i], $config, $context); if ($result !== false) { $final .= $result . ' '; $num++; } } if ($final === '') { return false; } return rtrim($final); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Number.php0000644000175000017500000000410413512700112021606 0ustar ezyangezyangnon_negative = $non_negative; } /** * @param string $number * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string|bool * @warning Some contexts do not pass $config, $context. These * variables should not be used without checking HTMLPurifier_Length */ public function validate($number, $config, $context) { $number = $this->parseCDATA($number); if ($number === '') { return false; } if ($number === '0') { return '0'; } $sign = ''; switch ($number[0]) { case '-': if ($this->non_negative) { return false; } $sign = '-'; case '+': $number = substr($number, 1); } if (ctype_digit($number)) { $number = ltrim($number, '0'); return $number ? $sign . $number : '0'; } // Period is the only non-numeric character allowed if (strpos($number, '.') === false) { return false; } list($left, $right) = explode('.', $number, 2); if ($left === '' && $right === '') { return false; } if ($left !== '' && !ctype_digit($left)) { return false; } $left = ltrim($left, '0'); $right = rtrim($right, '0'); if ($right === '') { return $left ? $sign . $left : '0'; } elseif (!ctype_digit($right)) { return false; } return $sign . $left . '.' . $right; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/Percentage.php0000644000175000017500000000237713512700112022445 0ustar ezyangezyangnumber_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = $this->parseCDATA($string); if ($string === '') { return false; } $length = strlen($string); if ($length === 1) { return false; } if ($string[$length - 1] !== '%') { return false; } $number = substr($string, 0, $length - 1); $number = $this->number_def->validate($number, $config, $context); if ($number === false) { return false; } return "$number%"; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/TextDecoration.php0000644000175000017500000000220413512700112023311 0ustar ezyangezyang true, 'overline' => true, 'underline' => true, ); $string = strtolower($this->parseCDATA($string)); if ($string === 'none') { return $string; } $parts = explode(' ', $string); $final = ''; foreach ($parts as $part) { if (isset($allowed_values[$part])) { $final .= $part . ' '; } } $final = rtrim($final); if ($final === '') { return false; } return $final; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/CSS/URI.php0000644000175000017500000000501013512700112021012 0ustar ezyangezyangparseCDATA($uri_string); if (strpos($uri_string, 'url(') !== 0) { return false; } $uri_string = substr($uri_string, 4); if (strlen($uri_string) == 0) { return false; } $new_length = strlen($uri_string) - 1; if ($uri_string[$new_length] != ')') { return false; } $uri = trim(substr($uri_string, 0, $new_length)); if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { $quote = $uri[0]; $new_length = strlen($uri) - 1; if ($uri[$new_length] !== $quote) { return false; } $uri = substr($uri, 1, $new_length - 1); } $uri = $this->expandCSSEscape($uri); $result = parent::validate($uri, $config, $context); if ($result === false) { return false; } // extra sanity check; should have been done by URI $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); // suspicious characters are ()'; we're going to percent encode // them for safety. $result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result); // there's an extra bug where ampersands lose their escaping on // an innerHTML cycle, so a very unlucky query parameter could // then change the meaning of the URL. Unfortunately, there's // not much we can do about that... return "url(\"$result\")"; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/Bool.php0000644000175000017500000000154013512700112021366 0ustar ezyangezyangname = $name; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { return $this->name; } /** * @param string $string Name of attribute * @return HTMLPurifier_AttrDef_HTML_Bool */ public function make($string) { return new HTMLPurifier_AttrDef_HTML_Bool($string); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/Class.php0000644000175000017500000000271513512700112021545 0ustar ezyangezyanggetDefinition('HTML')->doctype->name; if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { return parent::split($string, $config, $context); } else { return preg_split('/\s+/', $string); } } /** * @param array $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function filter($tokens, $config, $context) { $allowed = $config->get('Attr.AllowedClasses'); $forbidden = $config->get('Attr.ForbiddenClasses'); $ret = array(); foreach ($tokens as $token) { if (($allowed === null || isset($allowed[$token])) && !isset($forbidden[$token]) && // We need this O(n) check because of PHP's array // implementation that casts -0 to 0. !in_array($token, $ret, true) ) { $ret[] = $token; } } return $ret; } } HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/Color.php0000644000175000017500000000225313512700112021553 0ustar ezyangezyangget('Core.ColorKeywords'); } $string = trim($string); if (empty($string)) { return false; } $lower = strtolower($string); if (isset($colors[$lower])) { return $colors[$lower]; } if ($string[0] === '#') { $hex = substr($string, 1); } else { $hex = $string; } $length = strlen($hex); if ($length !== 3 && $length !== 6) { return false; } if (!ctype_xdigit($hex)) { return false; } if ($length === 3) { $hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2]; } return "#$hex"; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/FrameTarget.php0000644000175000017500000000150213512700112022672 0ustar ezyangezyangvalid_values === false) { $this->valid_values = $config->get('Attr.AllowedFrameTargets'); } return parent::validate($string, $config, $context); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/ID.php0000644000175000017500000000620413512700112020771 0ustar ezyangezyangselector = $selector; } /** * @param string $id * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($id, $config, $context) { if (!$this->selector && !$config->get('Attr.EnableID')) { return false; } $id = trim($id); // trim it first if ($id === '') { return false; } $prefix = $config->get('Attr.IDPrefix'); if ($prefix !== '') { $prefix .= $config->get('Attr.IDPrefixLocal'); // prevent re-appending the prefix if (strpos($id, $prefix) !== 0) { $id = $prefix . $id; } } elseif ($config->get('Attr.IDPrefixLocal') !== '') { trigger_error( '%Attr.IDPrefixLocal cannot be used unless ' . '%Attr.IDPrefix is set', E_USER_WARNING ); } if (!$this->selector) { $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) { return false; } } // we purposely avoid using regex, hopefully this is faster if ($config->get('Attr.ID.HTML5') === true) { if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } } else { if (ctype_alpha($id)) { // OK } else { if (!ctype_alpha(@$id[0])) { return false; } // primitive style of regexps, I suppose $trim = trim( $id, 'A..Za..z0..9:-._' ); if ($trim !== '') { return false; } } } $regexp = $config->get('Attr.IDBlacklistRegexp'); if ($regexp && preg_match($regexp, $id)) { return false; } if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. return $id; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/Length.php0000644000175000017500000000234213512700112021715 0ustar ezyangezyang 100) { return '100%'; } return ((string)$points) . '%'; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/LinkTypes.php0000644000175000017500000000335013512700112022416 0ustar ezyangezyang 'AllowedRel', 'rev' => 'AllowedRev' ); if (!isset($configLookup[$name])) { trigger_error( 'Unrecognized attribute name for link ' . 'relationship.', E_USER_ERROR ); return; } $this->name = $configLookup[$name]; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $allowed = $config->get('Attr.' . $this->name); if (empty($allowed)) { return false; } $string = $this->parseCDATA($string); $parts = explode(' ', $string); // lookup to prevent duplicates $ret_lookup = array(); foreach ($parts as $part) { $part = strtolower(trim($part)); if (!isset($allowed[$part])) { continue; } $ret_lookup[$part] = true; } if (empty($ret_lookup)) { return false; } $string = implode(' ', array_keys($ret_lookup)); return $string; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/MultiLength.php0000644000175000017500000000246413512700112022735 0ustar ezyangezyangsplit($string, $config, $context); $tokens = $this->filter($tokens, $config, $context); if (empty($tokens)) { return false; } return implode(' ', $tokens); } /** * Splits a space separated list of tokens into its constituent parts. * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function split($string, $config, $context) { // OPTIMIZABLE! // do the preg_match, capture all subpatterns for reformulation // we don't support U+00A1 and up codepoints or // escaping because I don't know how to do that with regexps // and plus it would complicate optimization efforts (you never // see that anyway). $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' . '(?:(?=\s)|\z)/'; // look ahead for space or string end preg_match_all($pattern, $string, $matches); return $matches[1]; } /** * Template method for removing certain tokens based on arbitrary criteria. * @note If we wanted to be really functional, we'd do an array_filter * with a callback. But... we're not. * @param array $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ protected function filter($tokens, $config, $context) { return $tokens; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/HTML/Pixels.php0000644000175000017500000000327413512700112021745 0ustar ezyangezyangmax = $max; } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $string = trim($string); if ($string === '0') { return $string; } if ($string === '') { return false; } $length = strlen($string); if (substr($string, $length - 2) == 'px') { $string = substr($string, 0, $length - 2); } if (!is_numeric($string)) { return false; } $int = (int)$string; if ($int < 0) { return '0'; } // upper-bound value, extremely high values can // crash operating systems, see // WARNING, above link WILL crash you if you're using Windows if ($this->max !== null && $int > $this->max) { return (string)$this->max; } return (string)$int; } /** * @param string $string * @return HTMLPurifier_AttrDef */ public function make($string) { if ($string === '') { $max = null; } else { $max = (int)$string; } $class = get_class($this); return new $class($max); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/URI/Email.php0000644000175000017500000000052713512700112021421 0ustar ezyangezyangipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); } /** * @param string $string * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool|string */ public function validate($string, $config, $context) { $length = strlen($string); // empty hostname is OK; it's usually semantically equivalent: // the default host as defined by a URI scheme is used: // // If the URI scheme defines a default for host, then that // default applies when the host subcomponent is undefined // or when the registered name is empty (zero length). if ($string === '') { return ''; } if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') { //IPv6 $ip = substr($string, 1, $length - 2); $valid = $this->ipv6->validate($ip, $config, $context); if ($valid === false) { return false; } return '[' . $valid . ']'; } // need to do checks on unusual encodings too $ipv4 = $this->ipv4->validate($string, $config, $context); if ($ipv4 !== false) { return $ipv4; } // A regular domain name. // This doesn't match I18N domain names, but we don't have proper IRI support, // so force users to insert Punycode. // There is not a good sense in which underscores should be // allowed, since it's technically not! (And if you go as // far to allow everything as specified by the DNS spec... // well, that's literally everything, modulo some space limits // for the components and the overall name (which, by the way, // we are NOT checking!). So we (arbitrarily) decide this: // let's allow underscores wherever we would have allowed // hyphens, if they are enabled. This is a pretty good match // for browser behavior, for example, a large number of browsers // cannot handle foo_.example.com, but foo_bar.example.com is // fairly well supported. $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; // Based off of RFC 1738, but amended so that // as per RFC 3696, the top label need only not be all numeric. // The productions describing this are: $a = '[a-z]'; // alpha $an = '[a-z0-9]'; // alphanum $and = "[a-z0-9-$underscore]"; // alphanum | "-" // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum $domainlabel = "$an(?:$and*$an)?"; // AMENDED as per RFC 3696 // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum // side condition: not all numeric $toplabel = "$an(?:$and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) { if (!ctype_digit($matches[1])) { return $string; } } // PHP 5.3 and later support this functionality natively if (function_exists('idn_to_ascii')) { if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) { $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46); } else { $string = idn_to_ascii($string); } // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, // since otherwise we have to assume browsers support } elseif ($config->get('Core.EnableIDNA')) { $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); // we need to encode each period separately $parts = explode('.', $string); try { $new_parts = array(); foreach ($parts as $part) { $encodable = false; for ($i = 0, $c = strlen($part); $i < $c; $i++) { if (ord($part[$i]) > 0x7a) { $encodable = true; break; } } if (!$encodable) { $new_parts[] = $part; } else { $new_parts[] = $idna->encode($part); } } $string = implode('.', $new_parts); } catch (Exception $e) { // XXX error reporting } } // Try again if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { return $string; } return false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/URI/IPv4.php0000644000175000017500000000174613512700112021160 0ustar ezyangezyangip4) { $this->_loadRegex(); } if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) { return $aIP; } return false; } /** * Lazy load function to prevent regex from being stuffed in * cache. */ protected function _loadRegex() { $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/URI/IPv6.php0000644000175000017500000000465513512700112021164 0ustar ezyangezyangip4) { $this->_loadRegex(); } $original = $aIP; $hex = '[0-9a-fA-F]'; $blk = '(?:' . $hex . '{1,4})'; $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 // prefix check if (strpos($aIP, '/') !== false) { if (preg_match('#' . $pre . '$#s', $aIP, $find)) { $aIP = substr($aIP, 0, 0 - strlen($find[0])); unset($find); } else { return false; } } // IPv4-compatiblity check if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) { $aIP = substr($aIP, 0, 0 - strlen($find[0])); $ip = explode('.', $find[0]); $ip = array_map('dechex', $ip); $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; unset($find, $ip); } // compression check $aIP = explode('::', $aIP); $c = count($aIP); if ($c > 2) { return false; } elseif ($c == 2) { list($first, $second) = $aIP; $first = explode(':', $first); $second = explode(':', $second); if (count($first) + count($second) > 8) { return false; } while (count($first) < 8) { array_push($first, '0'); } array_splice($first, 8 - count($second), 8, $second); $aIP = $first; unset($first, $second); } else { $aIP = explode(':', $aIP[0]); } $c = count($aIP); if ($c != 8) { return false; } // All the pieces should be 16-bit hex strings. Are they? foreach ($aIP as $piece) { if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) { return false; } } return $original; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php0000644000175000017500000000147013512700112023606 0ustar ezyangezyang" // that needs more percent encoding to be done if ($string == '') { return false; } $string = trim($string); $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); return $result ? $string : false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Background.php0000644000175000017500000000127013512700112023263 0ustar ezyangezyangconfiscateAttr($attr, 'background'); // some validation should happen here $this->prependCSS($attr, "background-image:url($background);"); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/BdoDir.php0000644000175000017500000000117713512700112022355 0ustar ezyangezyangget('Attr.DefaultTextDir'); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/BgColor.php0000644000175000017500000000124013512700112022530 0ustar ezyangezyangconfiscateAttr($attr, 'bgcolor'); // some validation should happen here $this->prependCSS($attr, "background-color:$bgcolor;"); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/BoolToCSS.php0000644000175000017500000000207713512700112022761 0ustar ezyangezyangattr = $attr; $this->css = $css; } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr[$this->attr])) { return $attr; } unset($attr[$this->attr]); $this->prependCSS($attr, $this->css); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Border.php0000644000175000017500000000124413512700112022422 0ustar ezyangezyangconfiscateAttr($attr, 'border'); // some validation should happen here $this->prependCSS($attr, "border:{$border_width}px solid;"); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/EnumToCSS.php0000644000175000017500000000327513512700112022773 0ustar ezyangezyangattr = $attr; $this->enumToCSS = $enum_to_css; $this->caseSensitive = (bool)$case_sensitive; } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr[$this->attr])) { return $attr; } $value = trim($attr[$this->attr]); unset($attr[$this->attr]); if (!$this->caseSensitive) { $value = strtolower($value); } if (!isset($this->enumToCSS[$value])) { return $attr; } $this->prependCSS($attr, $this->enumToCSS[$value]); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/ImgRequired.php0000644000175000017500000000246313512700112023426 0ustar ezyangezyangget('Core.RemoveInvalidImg')) { return $attr; } $attr['src'] = $config->get('Attr.DefaultInvalidImage'); $src = false; } if (!isset($attr['alt'])) { if ($src) { $alt = $config->get('Attr.DefaultImageAlt'); if ($alt === null) { $attr['alt'] = basename($attr['src']); } else { $attr['alt'] = $alt; } } else { $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt'); } } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/ImgSpace.php0000644000175000017500000000256613512700112022705 0ustar ezyangezyang array('left', 'right'), 'vspace' => array('top', 'bottom') ); /** * @param string $attr */ public function __construct($attr) { $this->attr = $attr; if (!isset($this->css[$attr])) { trigger_error(htmlspecialchars($attr) . ' is not valid space attribute'); } } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr[$this->attr])) { return $attr; } $width = $this->confiscateAttr($attr, $this->attr); // some validation could happen here if (!isset($this->css[$this->attr])) { return $attr; } $style = ''; foreach ($this->css[$this->attr] as $suffix) { $property = "margin-$suffix"; $style .= "$property:{$width}px;"; } $this->prependCSS($attr, $style); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Input.php0000644000175000017500000000310013512700112022275 0ustar ezyangezyangpixels = new HTMLPurifier_AttrDef_HTML_Pixels(); } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr['type'])) { $t = 'text'; } else { $t = strtolower($attr['type']); } if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') { unset($attr['checked']); } if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') { unset($attr['maxlength']); } if (isset($attr['size']) && $t !== 'text' && $t !== 'password') { $result = $this->pixels->validate($attr['size'], $config, $context); if ($result === false) { unset($attr['size']); } else { $attr['size'] = $result; } } if (isset($attr['src']) && $t !== 'image') { unset($attr['src']); } if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) { $attr['value'] = ''; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Lang.php0000644000175000017500000000153013512700112022064 0ustar ezyangezyangname = $name; $this->cssName = $css_name ? $css_name : $name; } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr[$this->name])) { return $attr; } $length = $this->confiscateAttr($attr, $this->name); if (ctype_digit($length)) { $length .= 'px'; } $this->prependCSS($attr, $this->cssName . ":$length;"); return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Name.php0000644000175000017500000000145013512700112022064 0ustar ezyangezyangget('HTML.Attr.Name.UseCDATA')) { return $attr; } if (!isset($attr['name'])) { return $attr; } $id = $this->confiscateAttr($attr, 'name'); if (isset($attr['id'])) { return $attr; } $attr['id'] = $id; return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/NameSync.php0000644000175000017500000000202213512700112022715 0ustar ezyangezyangidDef = new HTMLPurifier_AttrDef_HTML_ID(); } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr['name'])) { return $attr; } $name = $attr['name']; if (isset($attr['id']) && $attr['id'] === $name) { return $attr; } $result = $this->idDef->validate($name, $config, $context); if ($result === false) { unset($attr['name']); } else { $attr['name'] = $result; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/Nofollow.php0000644000175000017500000000243513512700112023007 0ustar ezyangezyangparser = new HTMLPurifier_URIParser(); } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr['href'])) { return $attr; } // XXX Kind of inefficient $url = $this->parser->parse($attr['href']); $scheme = $url->getSchemeObj($config, $context); if ($scheme->browsable && !$url->isLocal($config, $context)) { if (isset($attr['rel'])) { $rels = explode(' ', $attr['rel']); if (!in_array('nofollow', $rels)) { $rels[] = 'nofollow'; } $attr['rel'] = implode(' ', $rels); } else { $attr['rel'] = 'nofollow'; } } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/SafeEmbed.php0000644000175000017500000000107213512700112023017 0ustar ezyangezyanguri = new HTMLPurifier_AttrDef_URI(true); // embedded $this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent')); } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { // If we add support for other objects, we'll need to alter the // transforms. switch ($attr['name']) { // application/x-shockwave-flash // Keep this synchronized with Injector/SafeObject.php case 'allowScriptAccess': $attr['value'] = 'never'; break; case 'allowNetworking': $attr['value'] = 'internal'; break; case 'allowFullScreen': if ($config->get('HTML.FlashAllowFullScreen')) { $attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false'; } else { $attr['value'] = 'false'; } break; case 'wmode': $attr['value'] = $this->wmode->validate($attr['value'], $config, $context); break; case 'movie': case 'src': $attr['name'] = "movie"; $attr['value'] = $this->uri->validate($attr['value'], $config, $context); break; case 'flashvars': // we're going to allow arbitrary inputs to the SWF, on // the reasoning that it could only hack the SWF, not us. break; // add other cases to support other param name/value pairs default: $attr['name'] = $attr['value'] = null; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/ScriptRequired.php0000644000175000017500000000100413512700112024144 0ustar ezyangezyang */ class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform { /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr['type'])) { $attr['type'] = 'text/javascript'; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/TargetBlank.php0000644000175000017500000000210413512700112023377 0ustar ezyangezyangparser = new HTMLPurifier_URIParser(); } /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { if (!isset($attr['href'])) { return $attr; } // XXX Kind of inefficient $url = $this->parser->parse($attr['href']); $scheme = $url->getSchemeObj($config, $context); if ($scheme->browsable && !$url->isBenign($config, $context)) { $attr['target'] = '_blank'; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/AttrTransform/TargetNoopener.php0000644000175000017500000000177613512700112024153 0ustar ezyangezyang */ class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform { /** * @param array $attr * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function transform($attr, $config, $context) { // Calculated from Firefox if (!isset($attr['cols'])) { $attr['cols'] = '22'; } if (!isset($attr['rows'])) { $attr['rows'] = '3'; } return $attr; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Chameleon.php0000644000175000017500000000355213512700112021740 0ustar ezyangezyanginline = new HTMLPurifier_ChildDef_Optional($inline); $this->block = new HTMLPurifier_ChildDef_Optional($block); $this->elements = $this->block->elements; } /** * @param HTMLPurifier_Node[] $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function validateChildren($children, $config, $context) { if ($context->get('IsInline') === false) { return $this->block->validateChildren( $children, $config, $context ); } else { return $this->inline->validateChildren( $children, $config, $context ); } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Custom.php0000644000175000017500000000525513512700112021321 0ustar ezyangezyangdtd_regex = $dtd_regex; $this->_compileRegex(); } /** * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) */ protected function _compileRegex() { $raw = str_replace(' ', '', $this->dtd_regex); if ($raw{0} != '(') { $raw = "($raw)"; } $el = '[#a-zA-Z0-9_.-]+'; $reg = $raw; // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M // DOING! Seriously: if there's problems, please report them. // collect all elements into the $elements array preg_match_all("/$el/", $reg, $matches); foreach ($matches[0] as $match) { $this->elements[$match] = true; } // setup all elements as parentheticals with leading commas $reg = preg_replace("/$el/", '(,\\0)', $reg); // remove commas when they were not solicited $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); // remove all non-paranthetical commas: they are handled by first regex $reg = preg_replace("/,\(/", '(', $reg); $this->_pcre_regex = $reg; } /** * @param HTMLPurifier_Node[] $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function validateChildren($children, $config, $context) { $list_of_children = ''; $nesting = 0; // depth into the nest foreach ($children as $node) { if (!empty($node->is_whitespace)) { continue; } $list_of_children .= $node->name . ','; } // add leading comma to deal with stray comma declarations $list_of_children = ',' . rtrim($list_of_children, ','); $okay = preg_match( '/^,?' . $this->_pcre_regex . '$/', $list_of_children ); return (bool)$okay; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Empty.php0000644000175000017500000000154213512700112021140 0ustar ezyangezyang true, 'ul' => true, 'ol' => true); /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { // Flag for subclasses $this->whitespace = false; // if there are no tokens, delete parent node if (empty($children)) { return false; } // if li is not allowed, delete parent node if (!isset($config->getHTMLDefinition()->info['li'])) { trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING); return false; } // the new set of children $result = array(); // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; $current_li = null; foreach ($children as $node) { if (!empty($node->is_whitespace)) { $result[] = $node; continue; } $all_whitespace = false; // phew, we're not talking about whitespace if ($node->name === 'li') { // good $current_li = $node; $result[] = $node; } else { // we want to tuck this into the previous li // Invariant: we expect the node to be ol/ul // ToDo: Make this more robust in the case of not ol/ul // by distinguishing between existing li and li created // to handle non-list elements; non-list elements should // not be appended to an existing li; only li created // for non-list. This distinction is not currently made. if ($current_li === null) { $current_li = new HTMLPurifier_Node_Element('li'); $result[] = $current_li; } $current_li->children[] = $node; $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo } } if (empty($result)) { return false; } if ($all_whitespace) { return false; } return $result; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Optional.php0000644000175000017500000000227113512700112021627 0ustar ezyangezyangwhitespace) { return $children; } else { return array(); } } return $result; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Required.php0000644000175000017500000000642613512700112021630 0ustar ezyangezyang $x) { $elements[$i] = true; if (empty($i)) { unset($elements[$i]); } // remove blank } } $this->elements = $elements; } /** * @type bool */ public $allow_empty = false; /** * @type string */ public $type = 'required'; /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { // Flag for subclasses $this->whitespace = false; // if there are no tokens, delete parent node if (empty($children)) { return false; } // the new set of children $result = array(); // whether or not parsed character data is allowed // this controls whether or not we silently drop a tag // or generate escaped HTML from it $pcdata_allowed = isset($this->elements['#PCDATA']); // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; $stack = array_reverse($children); while (!empty($stack)) { $node = array_pop($stack); if (!empty($node->is_whitespace)) { $result[] = $node; continue; } $all_whitespace = false; // phew, we're not talking about whitespace if (!isset($this->elements[$node->name])) { // special case text // XXX One of these ought to be redundant or something if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) { $result[] = $node; continue; } // spill the child contents in // ToDo: Make configurable if ($node instanceof HTMLPurifier_Node_Element) { for ($i = count($node->children) - 1; $i >= 0; $i--) { $stack[] = $node->children[$i]; } continue; } continue; } $result[] = $node; } if (empty($result)) { return false; } if ($all_whitespace) { $this->whitespace = true; return false; } return $result; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/StrictBlockquote.php0000644000175000017500000000554213512700112023347 0ustar ezyangezyanginit($config); return $this->fake_elements; } /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { $this->init($config); // trick the parent class into thinking it allows more $this->elements = $this->fake_elements; $result = parent::validateChildren($children, $config, $context); $this->elements = $this->real_elements; if ($result === false) { return array(); } if ($result === true) { $result = $children; } $def = $config->getHTMLDefinition(); $block_wrap_name = $def->info_block_wrapper; $block_wrap = false; $ret = array(); foreach ($result as $node) { if ($block_wrap === false) { if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) || ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) { $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper); $ret[] = $block_wrap; } } else { if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) { $block_wrap = false; } } if ($block_wrap) { $block_wrap->children[] = $node; } else { $ret[] = $node; } } return $ret; } /** * @param HTMLPurifier_Config $config */ private function init($config) { if (!$this->init) { $def = $config->getHTMLDefinition(); // allow all inline elements $this->real_elements = $this->elements; $this->fake_elements = $def->info_content_sets['Flow']; $this->fake_elements['#PCDATA'] = true; $this->init = true; } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ChildDef/Table.php0000644000175000017500000001573013512700112021075 0ustar ezyangezyang true, 'tbody' => true, 'thead' => true, 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true ); public function __construct() { } /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { if (empty($children)) { return false; } // only one of these elements is allowed in a table $caption = false; $thead = false; $tfoot = false; // whitespace $initial_ws = array(); $after_caption_ws = array(); $after_thead_ws = array(); $after_tfoot_ws = array(); // as many of these as you want $cols = array(); $content = array(); $tbody_mode = false; // if true, then we need to wrap any stray // s with a . $ws_accum =& $initial_ws; foreach ($children as $node) { if ($node instanceof HTMLPurifier_Node_Comment) { $ws_accum[] = $node; continue; } switch ($node->name) { case 'tbody': $tbody_mode = true; // fall through case 'tr': $content[] = $node; $ws_accum =& $content; break; case 'caption': // there can only be one caption! if ($caption !== false) break; $caption = $node; $ws_accum =& $after_caption_ws; break; case 'thead': $tbody_mode = true; // XXX This breaks rendering properties with // Firefox, which never floats a to // the top. Ever. (Our scheme will float the // first to the top.) So maybe // s that are not first should be // turned into ? Very tricky, indeed. if ($thead === false) { $thead = $node; $ws_accum =& $after_thead_ws; } else { // Oops, there's a second one! What // should we do? Current behavior is to // transmutate the first and last entries into // tbody tags, and then put into content. // Maybe a better idea is to *attach // it* to the existing thead or tfoot? // We don't do this, because Firefox // doesn't float an extra tfoot to the // bottom like it does for the first one. $node->name = 'tbody'; $content[] = $node; $ws_accum =& $content; } break; case 'tfoot': // see above for some aveats $tbody_mode = true; if ($tfoot === false) { $tfoot = $node; $ws_accum =& $after_tfoot_ws; } else { $node->name = 'tbody'; $content[] = $node; $ws_accum =& $content; } break; case 'colgroup': case 'col': $cols[] = $node; $ws_accum =& $cols; break; case '#PCDATA': // How is whitespace handled? We treat is as sticky to // the *end* of the previous element. So all of the // nonsense we have worked on is to keep things // together. if (!empty($node->is_whitespace)) { $ws_accum[] = $node; } break; } } if (empty($content)) { return false; } $ret = $initial_ws; if ($caption !== false) { $ret[] = $caption; $ret = array_merge($ret, $after_caption_ws); } if ($cols !== false) { $ret = array_merge($ret, $cols); } if ($thead !== false) { $ret[] = $thead; $ret = array_merge($ret, $after_thead_ws); } if ($tfoot !== false) { $ret[] = $tfoot; $ret = array_merge($ret, $after_tfoot_ws); } if ($tbody_mode) { // we have to shuffle tr into tbody $current_tr_tbody = null; foreach($content as $node) { switch ($node->name) { case 'tbody': $current_tr_tbody = null; $ret[] = $node; break; case 'tr': if ($current_tr_tbody === null) { $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); $ret[] = $current_tr_tbody; } $current_tr_tbody->children[] = $node; break; case '#PCDATA': //assert($node->is_whitespace); if ($current_tr_tbody === null) { $ret[] = $node; } else { $current_tr_tbody->children[] = $node; } break; } } } else { $ret = array_merge($ret, $content); } return $ret; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Exception.php0000644000175000017500000000024213512700112022660 0ustar ezyangezyang array(directive info) * @type HTMLPurifier_ConfigSchema_Interchange_Directive[] */ public $directives = array(); /** * Adds a directive array to $directives * @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive * @throws HTMLPurifier_ConfigSchema_Exception */ public function addDirective($directive) { if (isset($this->directives[$i = $directive->id->toString()])) { throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'"); } $this->directives[$i] = $directive; } /** * Convenience function to perform standard validation. Throws exception * on failed validation. */ public function validate() { $validator = new HTMLPurifier_ConfigSchema_Validator(); return $validator->validate($this); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/InterchangeBuilder.php0000644000175000017500000001571313512700112024471 0ustar ezyangezyangvarParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native(); } /** * @param string $dir * @return HTMLPurifier_ConfigSchema_Interchange */ public static function buildFromDirectory($dir = null) { $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); $interchange = new HTMLPurifier_ConfigSchema_Interchange(); return $builder->buildDir($interchange, $dir); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param string $dir * @return HTMLPurifier_ConfigSchema_Interchange */ public function buildDir($interchange, $dir = null) { if (!$dir) { $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema'; } if (file_exists($dir . '/info.ini')) { $info = parse_ini_file($dir . '/info.ini'); $interchange->name = $info['name']; } $files = array(); $dh = opendir($dir); while (false !== ($file = readdir($dh))) { if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') { continue; } $files[] = $file; } closedir($dh); sort($files); foreach ($files as $file) { $this->buildFile($interchange, $dir . '/' . $file); } return $interchange; } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param string $file */ public function buildFile($interchange, $file) { $parser = new HTMLPurifier_StringHashParser(); $this->build( $interchange, new HTMLPurifier_StringHash($parser->parseFile($file)) ); } /** * Builds an interchange object based on a hash. * @param HTMLPurifier_ConfigSchema_Interchange $interchange HTMLPurifier_ConfigSchema_Interchange object to build * @param HTMLPurifier_StringHash $hash source data * @throws HTMLPurifier_ConfigSchema_Exception */ public function build($interchange, $hash) { if (!$hash instanceof HTMLPurifier_StringHash) { $hash = new HTMLPurifier_StringHash($hash); } if (!isset($hash['ID'])) { throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID'); } if (strpos($hash['ID'], '.') === false) { if (count($hash) == 2 && isset($hash['DESCRIPTION'])) { $hash->offsetGet('DESCRIPTION'); // prevent complaining } else { throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace'); } } else { $this->buildDirective($interchange, $hash); } $this->_findUnused($hash); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param HTMLPurifier_StringHash $hash * @throws HTMLPurifier_ConfigSchema_Exception */ public function buildDirective($interchange, $hash) { $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive(); // These are required elements: $directive->id = $this->id($hash->offsetGet('ID')); $id = $directive->id->toString(); // convenience if (isset($hash['TYPE'])) { $type = explode('/', $hash->offsetGet('TYPE')); if (isset($type[1])) { $directive->typeAllowsNull = true; } $directive->type = $type[0]; } else { throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined"); } if (isset($hash['DEFAULT'])) { try { $directive->default = $this->varParser->parse( $hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull ); } catch (HTMLPurifier_VarParserException $e) { throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'"); } } if (isset($hash['DESCRIPTION'])) { $directive->description = $hash->offsetGet('DESCRIPTION'); } if (isset($hash['ALLOWED'])) { $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED'))); } if (isset($hash['VALUE-ALIASES'])) { $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES')); } if (isset($hash['ALIASES'])) { $raw_aliases = trim($hash->offsetGet('ALIASES')); $aliases = preg_split('/\s*,\s*/', $raw_aliases); foreach ($aliases as $alias) { $directive->aliases[] = $this->id($alias); } } if (isset($hash['VERSION'])) { $directive->version = $hash->offsetGet('VERSION'); } if (isset($hash['DEPRECATED-USE'])) { $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE')); } if (isset($hash['DEPRECATED-VERSION'])) { $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION'); } if (isset($hash['EXTERNAL'])) { $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL'))); } $interchange->addDirective($directive); } /** * Evaluates an array PHP code string without array() wrapper * @param string $contents */ protected function evalArray($contents) { return eval('return array(' . $contents . ');'); } /** * Converts an array list into a lookup array. * @param array $array * @return array */ protected function lookup($array) { $ret = array(); foreach ($array as $val) { $ret[$val] = true; } return $ret; } /** * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id * object based on a string Id. * @param string $id * @return HTMLPurifier_ConfigSchema_Interchange_Id */ protected function id($id) { return HTMLPurifier_ConfigSchema_Interchange_Id::make($id); } /** * Triggers errors for any unused keys passed in the hash; such keys * may indicate typos, missing values, etc. * @param HTMLPurifier_StringHash $hash Hash to check. */ protected function _findUnused($hash) { $accessed = $hash->getAccessed(); foreach ($hash as $k => $v) { if (!isset($accessed[$k])) { trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE); } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema.ser0000644000175000017500000005707413512700112022203 0ustar ezyangezyangO:25:"HTMLPurifier_ConfigSchema":3:{s:8:"defaults";a:126:{s:19:"Attr.AllowedClasses";N;s:24:"Attr.AllowedFrameTargets";a:0:{}s:15:"Attr.AllowedRel";a:0:{}s:15:"Attr.AllowedRev";a:0:{}s:18:"Attr.ClassUseCDATA";N;s:20:"Attr.DefaultImageAlt";N;s:24:"Attr.DefaultInvalidImage";s:0:"";s:27:"Attr.DefaultInvalidImageAlt";s:13:"Invalid image";s:19:"Attr.DefaultTextDir";s:3:"ltr";s:13:"Attr.EnableID";b:0;s:21:"Attr.ForbiddenClasses";a:0:{}s:13:"Attr.ID.HTML5";N;s:16:"Attr.IDBlacklist";a:0:{}s:22:"Attr.IDBlacklistRegexp";N;s:13:"Attr.IDPrefix";s:0:"";s:18:"Attr.IDPrefixLocal";s:0:"";s:24:"AutoFormat.AutoParagraph";b:0;s:17:"AutoFormat.Custom";a:0:{}s:25:"AutoFormat.DisplayLinkURI";b:0;s:18:"AutoFormat.Linkify";b:0;s:33:"AutoFormat.PurifierLinkify.DocURL";s:3:"#%s";s:26:"AutoFormat.PurifierLinkify";b:0;s:32:"AutoFormat.RemoveEmpty.Predicate";a:4:{s:8:"colgroup";a:0:{}s:2:"th";a:0:{}s:2:"td";a:0:{}s:6:"iframe";a:1:{i:0;s:3:"src";}}s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";a:2:{s:2:"td";b:1;s:2:"th";b:1;}s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";b:0;s:22:"AutoFormat.RemoveEmpty";b:0;s:39:"AutoFormat.RemoveSpansWithoutAttributes";b:0;s:19:"CSS.AllowDuplicates";b:0;s:18:"CSS.AllowImportant";b:0;s:15:"CSS.AllowTricky";b:0;s:16:"CSS.AllowedFonts";N;s:21:"CSS.AllowedProperties";N;s:17:"CSS.DefinitionRev";i:1;s:23:"CSS.ForbiddenProperties";a:0:{}s:16:"CSS.MaxImgLength";s:6:"1200px";s:15:"CSS.Proprietary";b:0;s:11:"CSS.Trusted";b:0;s:20:"Cache.DefinitionImpl";s:10:"Serializer";s:20:"Cache.SerializerPath";N;s:27:"Cache.SerializerPermissions";i:493;s:22:"Core.AggressivelyFixLt";b:1;s:29:"Core.AggressivelyRemoveScript";b:1;s:28:"Core.AllowHostnameUnderscore";b:0;s:23:"Core.AllowParseManyTags";b:0;s:18:"Core.CollectErrors";b:0;s:18:"Core.ColorKeywords";a:148:{s:9:"aliceblue";s:7:"#F0F8FF";s:12:"antiquewhite";s:7:"#FAEBD7";s:4:"aqua";s:7:"#00FFFF";s:10:"aquamarine";s:7:"#7FFFD4";s:5:"azure";s:7:"#F0FFFF";s:5:"beige";s:7:"#F5F5DC";s:6:"bisque";s:7:"#FFE4C4";s:5:"black";s:7:"#000000";s:14:"blanchedalmond";s:7:"#FFEBCD";s:4:"blue";s:7:"#0000FF";s:10:"blueviolet";s:7:"#8A2BE2";s:5:"brown";s:7:"#A52A2A";s:9:"burlywood";s:7:"#DEB887";s:9:"cadetblue";s:7:"#5F9EA0";s:10:"chartreuse";s:7:"#7FFF00";s:9:"chocolate";s:7:"#D2691E";s:5:"coral";s:7:"#FF7F50";s:14:"cornflowerblue";s:7:"#6495ED";s:8:"cornsilk";s:7:"#FFF8DC";s:7:"crimson";s:7:"#DC143C";s:4:"cyan";s:7:"#00FFFF";s:8:"darkblue";s:7:"#00008B";s:8:"darkcyan";s:7:"#008B8B";s:13:"darkgoldenrod";s:7:"#B8860B";s:8:"darkgray";s:7:"#A9A9A9";s:8:"darkgrey";s:7:"#A9A9A9";s:9:"darkgreen";s:7:"#006400";s:9:"darkkhaki";s:7:"#BDB76B";s:11:"darkmagenta";s:7:"#8B008B";s:14:"darkolivegreen";s:7:"#556B2F";s:10:"darkorange";s:7:"#FF8C00";s:10:"darkorchid";s:7:"#9932CC";s:7:"darkred";s:7:"#8B0000";s:10:"darksalmon";s:7:"#E9967A";s:12:"darkseagreen";s:7:"#8FBC8F";s:13:"darkslateblue";s:7:"#483D8B";s:13:"darkslategray";s:7:"#2F4F4F";s:13:"darkslategrey";s:7:"#2F4F4F";s:13:"darkturquoise";s:7:"#00CED1";s:10:"darkviolet";s:7:"#9400D3";s:8:"deeppink";s:7:"#FF1493";s:11:"deepskyblue";s:7:"#00BFFF";s:7:"dimgray";s:7:"#696969";s:7:"dimgrey";s:7:"#696969";s:10:"dodgerblue";s:7:"#1E90FF";s:9:"firebrick";s:7:"#B22222";s:11:"floralwhite";s:7:"#FFFAF0";s:11:"forestgreen";s:7:"#228B22";s:7:"fuchsia";s:7:"#FF00FF";s:9:"gainsboro";s:7:"#DCDCDC";s:10:"ghostwhite";s:7:"#F8F8FF";s:4:"gold";s:7:"#FFD700";s:9:"goldenrod";s:7:"#DAA520";s:4:"gray";s:7:"#808080";s:4:"grey";s:7:"#808080";s:5:"green";s:7:"#008000";s:11:"greenyellow";s:7:"#ADFF2F";s:8:"honeydew";s:7:"#F0FFF0";s:7:"hotpink";s:7:"#FF69B4";s:9:"indianred";s:7:"#CD5C5C";s:6:"indigo";s:7:"#4B0082";s:5:"ivory";s:7:"#FFFFF0";s:5:"khaki";s:7:"#F0E68C";s:8:"lavender";s:7:"#E6E6FA";s:13:"lavenderblush";s:7:"#FFF0F5";s:9:"lawngreen";s:7:"#7CFC00";s:12:"lemonchiffon";s:7:"#FFFACD";s:9:"lightblue";s:7:"#ADD8E6";s:10:"lightcoral";s:7:"#F08080";s:9:"lightcyan";s:7:"#E0FFFF";s:20:"lightgoldenrodyellow";s:7:"#FAFAD2";s:9:"lightgray";s:7:"#D3D3D3";s:9:"lightgrey";s:7:"#D3D3D3";s:10:"lightgreen";s:7:"#90EE90";s:9:"lightpink";s:7:"#FFB6C1";s:11:"lightsalmon";s:7:"#FFA07A";s:13:"lightseagreen";s:7:"#20B2AA";s:12:"lightskyblue";s:7:"#87CEFA";s:14:"lightslategray";s:7:"#778899";s:14:"lightslategrey";s:7:"#778899";s:14:"lightsteelblue";s:7:"#B0C4DE";s:11:"lightyellow";s:7:"#FFFFE0";s:4:"lime";s:7:"#00FF00";s:9:"limegreen";s:7:"#32CD32";s:5:"linen";s:7:"#FAF0E6";s:7:"magenta";s:7:"#FF00FF";s:6:"maroon";s:7:"#800000";s:16:"mediumaquamarine";s:7:"#66CDAA";s:10:"mediumblue";s:7:"#0000CD";s:12:"mediumorchid";s:7:"#BA55D3";s:12:"mediumpurple";s:7:"#9370DB";s:14:"mediumseagreen";s:7:"#3CB371";s:15:"mediumslateblue";s:7:"#7B68EE";s:17:"mediumspringgreen";s:7:"#00FA9A";s:15:"mediumturquoise";s:7:"#48D1CC";s:15:"mediumvioletred";s:7:"#C71585";s:12:"midnightblue";s:7:"#191970";s:9:"mintcream";s:7:"#F5FFFA";s:9:"mistyrose";s:7:"#FFE4E1";s:8:"moccasin";s:7:"#FFE4B5";s:11:"navajowhite";s:7:"#FFDEAD";s:4:"navy";s:7:"#000080";s:7:"oldlace";s:7:"#FDF5E6";s:5:"olive";s:7:"#808000";s:9:"olivedrab";s:7:"#6B8E23";s:6:"orange";s:7:"#FFA500";s:9:"orangered";s:7:"#FF4500";s:6:"orchid";s:7:"#DA70D6";s:13:"palegoldenrod";s:7:"#EEE8AA";s:9:"palegreen";s:7:"#98FB98";s:13:"paleturquoise";s:7:"#AFEEEE";s:13:"palevioletred";s:7:"#DB7093";s:10:"papayawhip";s:7:"#FFEFD5";s:9:"peachpuff";s:7:"#FFDAB9";s:4:"peru";s:7:"#CD853F";s:4:"pink";s:7:"#FFC0CB";s:4:"plum";s:7:"#DDA0DD";s:10:"powderblue";s:7:"#B0E0E6";s:6:"purple";s:7:"#800080";s:13:"rebeccapurple";s:7:"#663399";s:3:"red";s:7:"#FF0000";s:9:"rosybrown";s:7:"#BC8F8F";s:9:"royalblue";s:7:"#4169E1";s:11:"saddlebrown";s:7:"#8B4513";s:6:"salmon";s:7:"#FA8072";s:10:"sandybrown";s:7:"#F4A460";s:8:"seagreen";s:7:"#2E8B57";s:8:"seashell";s:7:"#FFF5EE";s:6:"sienna";s:7:"#A0522D";s:6:"silver";s:7:"#C0C0C0";s:7:"skyblue";s:7:"#87CEEB";s:9:"slateblue";s:7:"#6A5ACD";s:9:"slategray";s:7:"#708090";s:9:"slategrey";s:7:"#708090";s:4:"snow";s:7:"#FFFAFA";s:11:"springgreen";s:7:"#00FF7F";s:9:"steelblue";s:7:"#4682B4";s:3:"tan";s:7:"#D2B48C";s:4:"teal";s:7:"#008080";s:7:"thistle";s:7:"#D8BFD8";s:6:"tomato";s:7:"#FF6347";s:9:"turquoise";s:7:"#40E0D0";s:6:"violet";s:7:"#EE82EE";s:5:"wheat";s:7:"#F5DEB3";s:5:"white";s:7:"#FFFFFF";s:10:"whitesmoke";s:7:"#F5F5F5";s:6:"yellow";s:7:"#FFFF00";s:11:"yellowgreen";s:7:"#9ACD32";}s:30:"Core.ConvertDocumentToFragment";b:1;s:36:"Core.DirectLexLineNumberSyncInterval";i:0;s:20:"Core.DisableExcludes";b:0;s:15:"Core.EnableIDNA";b:0;s:13:"Core.Encoding";s:5:"utf-8";s:26:"Core.EscapeInvalidChildren";b:0;s:22:"Core.EscapeInvalidTags";b:0;s:29:"Core.EscapeNonASCIICharacters";b:0;s:19:"Core.HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:13:"Core.Language";s:2:"en";s:24:"Core.LegacyEntityDecoder";b:0;s:14:"Core.LexerImpl";N;s:24:"Core.MaintainLineNumbers";N;s:22:"Core.NormalizeNewlines";b:1;s:21:"Core.RemoveInvalidImg";b:1;s:33:"Core.RemoveProcessingInstructions";b:0;s:25:"Core.RemoveScriptContents";N;s:13:"Filter.Custom";a:0:{}s:34:"Filter.ExtractStyleBlocks.Escaping";b:1;s:31:"Filter.ExtractStyleBlocks.Scope";N;s:34:"Filter.ExtractStyleBlocks.TidyImpl";N;s:25:"Filter.ExtractStyleBlocks";b:0;s:14:"Filter.YouTube";b:0;s:12:"HTML.Allowed";N;s:22:"HTML.AllowedAttributes";N;s:20:"HTML.AllowedComments";a:0:{}s:26:"HTML.AllowedCommentsRegexp";N;s:20:"HTML.AllowedElements";N;s:19:"HTML.AllowedModules";N;s:23:"HTML.Attr.Name.UseCDATA";b:0;s:17:"HTML.BlockWrapper";s:1:"p";s:16:"HTML.CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:18:"HTML.CustomDoctype";N;s:17:"HTML.DefinitionID";N;s:18:"HTML.DefinitionRev";i:1;s:12:"HTML.Doctype";N;s:25:"HTML.FlashAllowFullScreen";b:0;s:24:"HTML.ForbiddenAttributes";a:0:{}s:22:"HTML.ForbiddenElements";a:0:{}s:17:"HTML.MaxImgLength";i:1200;s:13:"HTML.Nofollow";b:0;s:11:"HTML.Parent";s:3:"div";s:16:"HTML.Proprietary";b:0;s:14:"HTML.SafeEmbed";b:0;s:15:"HTML.SafeIframe";b:0;s:15:"HTML.SafeObject";b:0;s:18:"HTML.SafeScripting";a:0:{}s:11:"HTML.Strict";b:0;s:16:"HTML.TargetBlank";b:0;s:19:"HTML.TargetNoopener";b:1;s:21:"HTML.TargetNoreferrer";b:1;s:12:"HTML.TidyAdd";a:0:{}s:14:"HTML.TidyLevel";s:6:"medium";s:15:"HTML.TidyRemove";a:0:{}s:12:"HTML.Trusted";b:0;s:10:"HTML.XHTML";b:1;s:28:"Output.CommentScriptContents";b:1;s:19:"Output.FixInnerHTML";b:1;s:18:"Output.FlashCompat";b:0;s:14:"Output.Newline";N;s:15:"Output.SortAttr";b:0;s:17:"Output.TidyFormat";b:0;s:17:"Test.ForceNoIconv";b:0;s:18:"URI.AllowedSchemes";a:7:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;s:3:"tel";b:1;}s:8:"URI.Base";N;s:17:"URI.DefaultScheme";s:4:"http";s:16:"URI.DefinitionID";N;s:17:"URI.DefinitionRev";i:1;s:11:"URI.Disable";b:0;s:19:"URI.DisableExternal";b:0;s:28:"URI.DisableExternalResources";b:0;s:20:"URI.DisableResources";b:0;s:8:"URI.Host";N;s:17:"URI.HostBlacklist";a:0:{}s:16:"URI.MakeAbsolute";b:0;s:9:"URI.Munge";N;s:18:"URI.MungeResources";b:0;s:18:"URI.MungeSecretKey";N;s:26:"URI.OverrideAllowedSchemes";b:1;s:20:"URI.SafeIframeRegexp";N;}s:12:"defaultPlist";O:25:"HTMLPurifier_PropertyList":3:{s:7:"*data";a:126:{s:19:"Attr.AllowedClasses";N;s:24:"Attr.AllowedFrameTargets";a:0:{}s:15:"Attr.AllowedRel";a:0:{}s:15:"Attr.AllowedRev";a:0:{}s:18:"Attr.ClassUseCDATA";N;s:20:"Attr.DefaultImageAlt";N;s:24:"Attr.DefaultInvalidImage";s:0:"";s:27:"Attr.DefaultInvalidImageAlt";s:13:"Invalid image";s:19:"Attr.DefaultTextDir";s:3:"ltr";s:13:"Attr.EnableID";b:0;s:21:"Attr.ForbiddenClasses";a:0:{}s:13:"Attr.ID.HTML5";N;s:16:"Attr.IDBlacklist";a:0:{}s:22:"Attr.IDBlacklistRegexp";N;s:13:"Attr.IDPrefix";s:0:"";s:18:"Attr.IDPrefixLocal";s:0:"";s:24:"AutoFormat.AutoParagraph";b:0;s:17:"AutoFormat.Custom";a:0:{}s:25:"AutoFormat.DisplayLinkURI";b:0;s:18:"AutoFormat.Linkify";b:0;s:33:"AutoFormat.PurifierLinkify.DocURL";s:3:"#%s";s:26:"AutoFormat.PurifierLinkify";b:0;s:32:"AutoFormat.RemoveEmpty.Predicate";a:4:{s:8:"colgroup";a:0:{}s:2:"th";a:0:{}s:2:"td";a:0:{}s:6:"iframe";a:1:{i:0;s:3:"src";}}s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";a:2:{s:2:"td";b:1;s:2:"th";b:1;}s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";b:0;s:22:"AutoFormat.RemoveEmpty";b:0;s:39:"AutoFormat.RemoveSpansWithoutAttributes";b:0;s:19:"CSS.AllowDuplicates";b:0;s:18:"CSS.AllowImportant";b:0;s:15:"CSS.AllowTricky";b:0;s:16:"CSS.AllowedFonts";N;s:21:"CSS.AllowedProperties";N;s:17:"CSS.DefinitionRev";i:1;s:23:"CSS.ForbiddenProperties";a:0:{}s:16:"CSS.MaxImgLength";s:6:"1200px";s:15:"CSS.Proprietary";b:0;s:11:"CSS.Trusted";b:0;s:20:"Cache.DefinitionImpl";s:10:"Serializer";s:20:"Cache.SerializerPath";N;s:27:"Cache.SerializerPermissions";i:493;s:22:"Core.AggressivelyFixLt";b:1;s:29:"Core.AggressivelyRemoveScript";b:1;s:28:"Core.AllowHostnameUnderscore";b:0;s:23:"Core.AllowParseManyTags";b:0;s:18:"Core.CollectErrors";b:0;s:18:"Core.ColorKeywords";a:148:{s:9:"aliceblue";s:7:"#F0F8FF";s:12:"antiquewhite";s:7:"#FAEBD7";s:4:"aqua";s:7:"#00FFFF";s:10:"aquamarine";s:7:"#7FFFD4";s:5:"azure";s:7:"#F0FFFF";s:5:"beige";s:7:"#F5F5DC";s:6:"bisque";s:7:"#FFE4C4";s:5:"black";s:7:"#000000";s:14:"blanchedalmond";s:7:"#FFEBCD";s:4:"blue";s:7:"#0000FF";s:10:"blueviolet";s:7:"#8A2BE2";s:5:"brown";s:7:"#A52A2A";s:9:"burlywood";s:7:"#DEB887";s:9:"cadetblue";s:7:"#5F9EA0";s:10:"chartreuse";s:7:"#7FFF00";s:9:"chocolate";s:7:"#D2691E";s:5:"coral";s:7:"#FF7F50";s:14:"cornflowerblue";s:7:"#6495ED";s:8:"cornsilk";s:7:"#FFF8DC";s:7:"crimson";s:7:"#DC143C";s:4:"cyan";s:7:"#00FFFF";s:8:"darkblue";s:7:"#00008B";s:8:"darkcyan";s:7:"#008B8B";s:13:"darkgoldenrod";s:7:"#B8860B";s:8:"darkgray";s:7:"#A9A9A9";s:8:"darkgrey";s:7:"#A9A9A9";s:9:"darkgreen";s:7:"#006400";s:9:"darkkhaki";s:7:"#BDB76B";s:11:"darkmagenta";s:7:"#8B008B";s:14:"darkolivegreen";s:7:"#556B2F";s:10:"darkorange";s:7:"#FF8C00";s:10:"darkorchid";s:7:"#9932CC";s:7:"darkred";s:7:"#8B0000";s:10:"darksalmon";s:7:"#E9967A";s:12:"darkseagreen";s:7:"#8FBC8F";s:13:"darkslateblue";s:7:"#483D8B";s:13:"darkslategray";s:7:"#2F4F4F";s:13:"darkslategrey";s:7:"#2F4F4F";s:13:"darkturquoise";s:7:"#00CED1";s:10:"darkviolet";s:7:"#9400D3";s:8:"deeppink";s:7:"#FF1493";s:11:"deepskyblue";s:7:"#00BFFF";s:7:"dimgray";s:7:"#696969";s:7:"dimgrey";s:7:"#696969";s:10:"dodgerblue";s:7:"#1E90FF";s:9:"firebrick";s:7:"#B22222";s:11:"floralwhite";s:7:"#FFFAF0";s:11:"forestgreen";s:7:"#228B22";s:7:"fuchsia";s:7:"#FF00FF";s:9:"gainsboro";s:7:"#DCDCDC";s:10:"ghostwhite";s:7:"#F8F8FF";s:4:"gold";s:7:"#FFD700";s:9:"goldenrod";s:7:"#DAA520";s:4:"gray";s:7:"#808080";s:4:"grey";s:7:"#808080";s:5:"green";s:7:"#008000";s:11:"greenyellow";s:7:"#ADFF2F";s:8:"honeydew";s:7:"#F0FFF0";s:7:"hotpink";s:7:"#FF69B4";s:9:"indianred";s:7:"#CD5C5C";s:6:"indigo";s:7:"#4B0082";s:5:"ivory";s:7:"#FFFFF0";s:5:"khaki";s:7:"#F0E68C";s:8:"lavender";s:7:"#E6E6FA";s:13:"lavenderblush";s:7:"#FFF0F5";s:9:"lawngreen";s:7:"#7CFC00";s:12:"lemonchiffon";s:7:"#FFFACD";s:9:"lightblue";s:7:"#ADD8E6";s:10:"lightcoral";s:7:"#F08080";s:9:"lightcyan";s:7:"#E0FFFF";s:20:"lightgoldenrodyellow";s:7:"#FAFAD2";s:9:"lightgray";s:7:"#D3D3D3";s:9:"lightgrey";s:7:"#D3D3D3";s:10:"lightgreen";s:7:"#90EE90";s:9:"lightpink";s:7:"#FFB6C1";s:11:"lightsalmon";s:7:"#FFA07A";s:13:"lightseagreen";s:7:"#20B2AA";s:12:"lightskyblue";s:7:"#87CEFA";s:14:"lightslategray";s:7:"#778899";s:14:"lightslategrey";s:7:"#778899";s:14:"lightsteelblue";s:7:"#B0C4DE";s:11:"lightyellow";s:7:"#FFFFE0";s:4:"lime";s:7:"#00FF00";s:9:"limegreen";s:7:"#32CD32";s:5:"linen";s:7:"#FAF0E6";s:7:"magenta";s:7:"#FF00FF";s:6:"maroon";s:7:"#800000";s:16:"mediumaquamarine";s:7:"#66CDAA";s:10:"mediumblue";s:7:"#0000CD";s:12:"mediumorchid";s:7:"#BA55D3";s:12:"mediumpurple";s:7:"#9370DB";s:14:"mediumseagreen";s:7:"#3CB371";s:15:"mediumslateblue";s:7:"#7B68EE";s:17:"mediumspringgreen";s:7:"#00FA9A";s:15:"mediumturquoise";s:7:"#48D1CC";s:15:"mediumvioletred";s:7:"#C71585";s:12:"midnightblue";s:7:"#191970";s:9:"mintcream";s:7:"#F5FFFA";s:9:"mistyrose";s:7:"#FFE4E1";s:8:"moccasin";s:7:"#FFE4B5";s:11:"navajowhite";s:7:"#FFDEAD";s:4:"navy";s:7:"#000080";s:7:"oldlace";s:7:"#FDF5E6";s:5:"olive";s:7:"#808000";s:9:"olivedrab";s:7:"#6B8E23";s:6:"orange";s:7:"#FFA500";s:9:"orangered";s:7:"#FF4500";s:6:"orchid";s:7:"#DA70D6";s:13:"palegoldenrod";s:7:"#EEE8AA";s:9:"palegreen";s:7:"#98FB98";s:13:"paleturquoise";s:7:"#AFEEEE";s:13:"palevioletred";s:7:"#DB7093";s:10:"papayawhip";s:7:"#FFEFD5";s:9:"peachpuff";s:7:"#FFDAB9";s:4:"peru";s:7:"#CD853F";s:4:"pink";s:7:"#FFC0CB";s:4:"plum";s:7:"#DDA0DD";s:10:"powderblue";s:7:"#B0E0E6";s:6:"purple";s:7:"#800080";s:13:"rebeccapurple";s:7:"#663399";s:3:"red";s:7:"#FF0000";s:9:"rosybrown";s:7:"#BC8F8F";s:9:"royalblue";s:7:"#4169E1";s:11:"saddlebrown";s:7:"#8B4513";s:6:"salmon";s:7:"#FA8072";s:10:"sandybrown";s:7:"#F4A460";s:8:"seagreen";s:7:"#2E8B57";s:8:"seashell";s:7:"#FFF5EE";s:6:"sienna";s:7:"#A0522D";s:6:"silver";s:7:"#C0C0C0";s:7:"skyblue";s:7:"#87CEEB";s:9:"slateblue";s:7:"#6A5ACD";s:9:"slategray";s:7:"#708090";s:9:"slategrey";s:7:"#708090";s:4:"snow";s:7:"#FFFAFA";s:11:"springgreen";s:7:"#00FF7F";s:9:"steelblue";s:7:"#4682B4";s:3:"tan";s:7:"#D2B48C";s:4:"teal";s:7:"#008080";s:7:"thistle";s:7:"#D8BFD8";s:6:"tomato";s:7:"#FF6347";s:9:"turquoise";s:7:"#40E0D0";s:6:"violet";s:7:"#EE82EE";s:5:"wheat";s:7:"#F5DEB3";s:5:"white";s:7:"#FFFFFF";s:10:"whitesmoke";s:7:"#F5F5F5";s:6:"yellow";s:7:"#FFFF00";s:11:"yellowgreen";s:7:"#9ACD32";}s:30:"Core.ConvertDocumentToFragment";b:1;s:36:"Core.DirectLexLineNumberSyncInterval";i:0;s:20:"Core.DisableExcludes";b:0;s:15:"Core.EnableIDNA";b:0;s:13:"Core.Encoding";s:5:"utf-8";s:26:"Core.EscapeInvalidChildren";b:0;s:22:"Core.EscapeInvalidTags";b:0;s:29:"Core.EscapeNonASCIICharacters";b:0;s:19:"Core.HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:13:"Core.Language";s:2:"en";s:24:"Core.LegacyEntityDecoder";b:0;s:14:"Core.LexerImpl";N;s:24:"Core.MaintainLineNumbers";N;s:22:"Core.NormalizeNewlines";b:1;s:21:"Core.RemoveInvalidImg";b:1;s:33:"Core.RemoveProcessingInstructions";b:0;s:25:"Core.RemoveScriptContents";N;s:13:"Filter.Custom";a:0:{}s:34:"Filter.ExtractStyleBlocks.Escaping";b:1;s:31:"Filter.ExtractStyleBlocks.Scope";N;s:34:"Filter.ExtractStyleBlocks.TidyImpl";N;s:25:"Filter.ExtractStyleBlocks";b:0;s:14:"Filter.YouTube";b:0;s:12:"HTML.Allowed";N;s:22:"HTML.AllowedAttributes";N;s:20:"HTML.AllowedComments";a:0:{}s:26:"HTML.AllowedCommentsRegexp";N;s:20:"HTML.AllowedElements";N;s:19:"HTML.AllowedModules";N;s:23:"HTML.Attr.Name.UseCDATA";b:0;s:17:"HTML.BlockWrapper";s:1:"p";s:16:"HTML.CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:18:"HTML.CustomDoctype";N;s:17:"HTML.DefinitionID";N;s:18:"HTML.DefinitionRev";i:1;s:12:"HTML.Doctype";N;s:25:"HTML.FlashAllowFullScreen";b:0;s:24:"HTML.ForbiddenAttributes";a:0:{}s:22:"HTML.ForbiddenElements";a:0:{}s:17:"HTML.MaxImgLength";i:1200;s:13:"HTML.Nofollow";b:0;s:11:"HTML.Parent";s:3:"div";s:16:"HTML.Proprietary";b:0;s:14:"HTML.SafeEmbed";b:0;s:15:"HTML.SafeIframe";b:0;s:15:"HTML.SafeObject";b:0;s:18:"HTML.SafeScripting";a:0:{}s:11:"HTML.Strict";b:0;s:16:"HTML.TargetBlank";b:0;s:19:"HTML.TargetNoopener";b:1;s:21:"HTML.TargetNoreferrer";b:1;s:12:"HTML.TidyAdd";a:0:{}s:14:"HTML.TidyLevel";s:6:"medium";s:15:"HTML.TidyRemove";a:0:{}s:12:"HTML.Trusted";b:0;s:10:"HTML.XHTML";b:1;s:28:"Output.CommentScriptContents";b:1;s:19:"Output.FixInnerHTML";b:1;s:18:"Output.FlashCompat";b:0;s:14:"Output.Newline";N;s:15:"Output.SortAttr";b:0;s:17:"Output.TidyFormat";b:0;s:17:"Test.ForceNoIconv";b:0;s:18:"URI.AllowedSchemes";a:7:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;s:3:"tel";b:1;}s:8:"URI.Base";N;s:17:"URI.DefaultScheme";s:4:"http";s:16:"URI.DefinitionID";N;s:17:"URI.DefinitionRev";i:1;s:11:"URI.Disable";b:0;s:19:"URI.DisableExternal";b:0;s:28:"URI.DisableExternalResources";b:0;s:20:"URI.DisableResources";b:0;s:8:"URI.Host";N;s:17:"URI.HostBlacklist";a:0:{}s:16:"URI.MakeAbsolute";b:0;s:9:"URI.Munge";N;s:18:"URI.MungeResources";b:0;s:18:"URI.MungeSecretKey";N;s:26:"URI.OverrideAllowedSchemes";b:1;s:20:"URI.SafeIframeRegexp";N;}s:9:"*parent";N;s:8:"*cache";N;}s:4:"info";a:139:{s:19:"Attr.AllowedClasses";i:-8;s:24:"Attr.AllowedFrameTargets";i:8;s:15:"Attr.AllowedRel";i:8;s:15:"Attr.AllowedRev";i:8;s:18:"Attr.ClassUseCDATA";i:-7;s:20:"Attr.DefaultImageAlt";i:-1;s:24:"Attr.DefaultInvalidImage";i:1;s:27:"Attr.DefaultInvalidImageAlt";i:1;s:19:"Attr.DefaultTextDir";O:8:"stdClass":2:{s:4:"type";i:1;s:7:"allowed";a:2:{s:3:"ltr";b:1;s:3:"rtl";b:1;}}s:13:"Attr.EnableID";i:7;s:17:"HTML.EnableAttrID";O:8:"stdClass":2:{s:3:"key";s:13:"Attr.EnableID";s:7:"isAlias";b:1;}s:21:"Attr.ForbiddenClasses";i:8;s:13:"Attr.ID.HTML5";i:-7;s:16:"Attr.IDBlacklist";i:9;s:22:"Attr.IDBlacklistRegexp";i:-1;s:13:"Attr.IDPrefix";i:1;s:18:"Attr.IDPrefixLocal";i:1;s:24:"AutoFormat.AutoParagraph";i:7;s:17:"AutoFormat.Custom";i:9;s:25:"AutoFormat.DisplayLinkURI";i:7;s:18:"AutoFormat.Linkify";i:7;s:33:"AutoFormat.PurifierLinkify.DocURL";i:1;s:37:"AutoFormatParam.PurifierLinkifyDocURL";O:8:"stdClass":2:{s:3:"key";s:33:"AutoFormat.PurifierLinkify.DocURL";s:7:"isAlias";b:1;}s:26:"AutoFormat.PurifierLinkify";i:7;s:32:"AutoFormat.RemoveEmpty.Predicate";i:10;s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";i:8;s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";i:7;s:22:"AutoFormat.RemoveEmpty";i:7;s:39:"AutoFormat.RemoveSpansWithoutAttributes";i:7;s:19:"CSS.AllowDuplicates";i:7;s:18:"CSS.AllowImportant";i:7;s:15:"CSS.AllowTricky";i:7;s:16:"CSS.AllowedFonts";i:-8;s:21:"CSS.AllowedProperties";i:-8;s:17:"CSS.DefinitionRev";i:5;s:23:"CSS.ForbiddenProperties";i:8;s:16:"CSS.MaxImgLength";i:-1;s:15:"CSS.Proprietary";i:7;s:11:"CSS.Trusted";i:7;s:20:"Cache.DefinitionImpl";i:-1;s:20:"Core.DefinitionCache";O:8:"stdClass":2:{s:3:"key";s:20:"Cache.DefinitionImpl";s:7:"isAlias";b:1;}s:20:"Cache.SerializerPath";i:-1;s:27:"Cache.SerializerPermissions";i:-5;s:22:"Core.AggressivelyFixLt";i:7;s:29:"Core.AggressivelyRemoveScript";i:7;s:28:"Core.AllowHostnameUnderscore";i:7;s:23:"Core.AllowParseManyTags";i:7;s:18:"Core.CollectErrors";i:7;s:18:"Core.ColorKeywords";i:10;s:30:"Core.ConvertDocumentToFragment";i:7;s:24:"Core.AcceptFullDocuments";O:8:"stdClass":2:{s:3:"key";s:30:"Core.ConvertDocumentToFragment";s:7:"isAlias";b:1;}s:36:"Core.DirectLexLineNumberSyncInterval";i:5;s:20:"Core.DisableExcludes";i:7;s:15:"Core.EnableIDNA";i:7;s:13:"Core.Encoding";i:2;s:26:"Core.EscapeInvalidChildren";i:7;s:22:"Core.EscapeInvalidTags";i:7;s:29:"Core.EscapeNonASCIICharacters";i:7;s:19:"Core.HiddenElements";i:8;s:13:"Core.Language";i:1;s:24:"Core.LegacyEntityDecoder";i:7;s:14:"Core.LexerImpl";i:-11;s:24:"Core.MaintainLineNumbers";i:-7;s:22:"Core.NormalizeNewlines";i:7;s:21:"Core.RemoveInvalidImg";i:7;s:33:"Core.RemoveProcessingInstructions";i:7;s:25:"Core.RemoveScriptContents";i:-7;s:13:"Filter.Custom";i:9;s:34:"Filter.ExtractStyleBlocks.Escaping";i:7;s:33:"Filter.ExtractStyleBlocksEscaping";O:8:"stdClass":2:{s:3:"key";s:34:"Filter.ExtractStyleBlocks.Escaping";s:7:"isAlias";b:1;}s:38:"FilterParam.ExtractStyleBlocksEscaping";O:8:"stdClass":2:{s:3:"key";s:34:"Filter.ExtractStyleBlocks.Escaping";s:7:"isAlias";b:1;}s:31:"Filter.ExtractStyleBlocks.Scope";i:-1;s:30:"Filter.ExtractStyleBlocksScope";O:8:"stdClass":2:{s:3:"key";s:31:"Filter.ExtractStyleBlocks.Scope";s:7:"isAlias";b:1;}s:35:"FilterParam.ExtractStyleBlocksScope";O:8:"stdClass":2:{s:3:"key";s:31:"Filter.ExtractStyleBlocks.Scope";s:7:"isAlias";b:1;}s:34:"Filter.ExtractStyleBlocks.TidyImpl";i:-11;s:38:"FilterParam.ExtractStyleBlocksTidyImpl";O:8:"stdClass":2:{s:3:"key";s:34:"Filter.ExtractStyleBlocks.TidyImpl";s:7:"isAlias";b:1;}s:25:"Filter.ExtractStyleBlocks";i:7;s:14:"Filter.YouTube";i:7;s:12:"HTML.Allowed";i:-4;s:22:"HTML.AllowedAttributes";i:-8;s:20:"HTML.AllowedComments";i:8;s:26:"HTML.AllowedCommentsRegexp";i:-1;s:20:"HTML.AllowedElements";i:-8;s:19:"HTML.AllowedModules";i:-8;s:23:"HTML.Attr.Name.UseCDATA";i:7;s:17:"HTML.BlockWrapper";i:1;s:16:"HTML.CoreModules";i:8;s:18:"HTML.CustomDoctype";i:-1;s:17:"HTML.DefinitionID";i:-1;s:18:"HTML.DefinitionRev";i:5;s:12:"HTML.Doctype";O:8:"stdClass":3:{s:4:"type";i:1;s:10:"allow_null";b:1;s:7:"allowed";a:5:{s:22:"HTML 4.01 Transitional";b:1;s:16:"HTML 4.01 Strict";b:1;s:22:"XHTML 1.0 Transitional";b:1;s:16:"XHTML 1.0 Strict";b:1;s:9:"XHTML 1.1";b:1;}}s:25:"HTML.FlashAllowFullScreen";i:7;s:24:"HTML.ForbiddenAttributes";i:8;s:22:"HTML.ForbiddenElements";i:8;s:17:"HTML.MaxImgLength";i:-5;s:13:"HTML.Nofollow";i:7;s:11:"HTML.Parent";i:1;s:16:"HTML.Proprietary";i:7;s:14:"HTML.SafeEmbed";i:7;s:15:"HTML.SafeIframe";i:7;s:15:"HTML.SafeObject";i:7;s:18:"HTML.SafeScripting";i:8;s:11:"HTML.Strict";i:7;s:16:"HTML.TargetBlank";i:7;s:19:"HTML.TargetNoopener";i:7;s:21:"HTML.TargetNoreferrer";i:7;s:12:"HTML.TidyAdd";i:8;s:14:"HTML.TidyLevel";O:8:"stdClass":2:{s:4:"type";i:1;s:7:"allowed";a:4:{s:4:"none";b:1;s:5:"light";b:1;s:6:"medium";b:1;s:5:"heavy";b:1;}}s:15:"HTML.TidyRemove";i:8;s:12:"HTML.Trusted";i:7;s:10:"HTML.XHTML";i:7;s:10:"Core.XHTML";O:8:"stdClass":2:{s:3:"key";s:10:"HTML.XHTML";s:7:"isAlias";b:1;}s:28:"Output.CommentScriptContents";i:7;s:26:"Core.CommentScriptContents";O:8:"stdClass":2:{s:3:"key";s:28:"Output.CommentScriptContents";s:7:"isAlias";b:1;}s:19:"Output.FixInnerHTML";i:7;s:18:"Output.FlashCompat";i:7;s:14:"Output.Newline";i:-1;s:15:"Output.SortAttr";i:7;s:17:"Output.TidyFormat";i:7;s:15:"Core.TidyFormat";O:8:"stdClass":2:{s:3:"key";s:17:"Output.TidyFormat";s:7:"isAlias";b:1;}s:17:"Test.ForceNoIconv";i:7;s:18:"URI.AllowedSchemes";i:8;s:8:"URI.Base";i:-1;s:17:"URI.DefaultScheme";i:-1;s:16:"URI.DefinitionID";i:-1;s:17:"URI.DefinitionRev";i:5;s:11:"URI.Disable";i:7;s:15:"Attr.DisableURI";O:8:"stdClass":2:{s:3:"key";s:11:"URI.Disable";s:7:"isAlias";b:1;}s:19:"URI.DisableExternal";i:7;s:28:"URI.DisableExternalResources";i:7;s:20:"URI.DisableResources";i:7;s:8:"URI.Host";i:-1;s:17:"URI.HostBlacklist";i:9;s:16:"URI.MakeAbsolute";i:7;s:9:"URI.Munge";i:-1;s:18:"URI.MungeResources";i:7;s:18:"URI.MungeSecretKey";i:-1;s:26:"URI.OverrideAllowedSchemes";i:7;s:20:"URI.SafeIframeRegexp";i:-1;}}HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Validator.php0000644000175000017500000002025713512700112022657 0ustar ezyangezyangparser = new HTMLPurifier_VarParser(); } /** * Validates a fully-formed interchange object. * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @return bool */ public function validate($interchange) { $this->interchange = $interchange; $this->aliases = array(); // PHP is a bit lax with integer <=> string conversions in // arrays, so we don't use the identical !== comparison foreach ($interchange->directives as $i => $directive) { $id = $directive->id->toString(); if ($i != $id) { $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'"); } $this->validateDirective($directive); } return true; } /** * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object. * @param HTMLPurifier_ConfigSchema_Interchange_Id $id */ public function validateId($id) { $id_string = $id->toString(); $this->context[] = "id '$id_string'"; if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) { // handled by InterchangeBuilder $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id'); } // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.) // we probably should check that it has at least one namespace $this->with($id, 'key') ->assertNotEmpty() ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder array_pop($this->context); } /** * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirective($d) { $id = $d->id->toString(); $this->context[] = "directive '$id'"; $this->validateId($d->id); $this->with($d, 'description') ->assertNotEmpty(); // BEGIN - handled by InterchangeBuilder $this->with($d, 'type') ->assertNotEmpty(); $this->with($d, 'typeAllowsNull') ->assertIsBool(); try { // This also tests validity of $d->type $this->parser->parse($d->default, $d->type, $d->typeAllowsNull); } catch (HTMLPurifier_VarParserException $e) { $this->error('default', 'had error: ' . $e->getMessage()); } // END - handled by InterchangeBuilder if (!is_null($d->allowed) || !empty($d->valueAliases)) { // allowed and valueAliases require that we be dealing with // strings, so check for that early. $d_int = HTMLPurifier_VarParser::$types[$d->type]; if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) { $this->error('type', 'must be a string type when used with allowed or value aliases'); } } $this->validateDirectiveAllowed($d); $this->validateDirectiveValueAliases($d); $this->validateDirectiveAliases($d); array_pop($this->context); } /** * Extra validation if $allowed member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveAllowed($d) { if (is_null($d->allowed)) { return; } $this->with($d, 'allowed') ->assertNotEmpty() ->assertIsLookup(); // handled by InterchangeBuilder if (is_string($d->default) && !isset($d->allowed[$d->default])) { $this->error('default', 'must be an allowed value'); } $this->context[] = 'allowed'; foreach ($d->allowed as $val => $x) { if (!is_string($val)) { $this->error("value $val", 'must be a string'); } } array_pop($this->context); } /** * Extra validation if $valueAliases member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveValueAliases($d) { if (is_null($d->valueAliases)) { return; } $this->with($d, 'valueAliases') ->assertIsArray(); // handled by InterchangeBuilder $this->context[] = 'valueAliases'; foreach ($d->valueAliases as $alias => $real) { if (!is_string($alias)) { $this->error("alias $alias", 'must be a string'); } if (!is_string($real)) { $this->error("alias target $real from alias '$alias'", 'must be a string'); } if ($alias === $real) { $this->error("alias '$alias'", "must not be an alias to itself"); } } if (!is_null($d->allowed)) { foreach ($d->valueAliases as $alias => $real) { if (isset($d->allowed[$alias])) { $this->error("alias '$alias'", 'must not be an allowed value'); } elseif (!isset($d->allowed[$real])) { $this->error("alias '$alias'", 'must be an alias to an allowed value'); } } } array_pop($this->context); } /** * Extra validation if $aliases member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveAliases($d) { $this->with($d, 'aliases') ->assertIsArray(); // handled by InterchangeBuilder $this->context[] = 'aliases'; foreach ($d->aliases as $alias) { $this->validateId($alias); $s = $alias->toString(); if (isset($this->interchange->directives[$s])) { $this->error("alias '$s'", 'collides with another directive'); } if (isset($this->aliases[$s])) { $other_directive = $this->aliases[$s]; $this->error("alias '$s'", "collides with alias for directive '$other_directive'"); } $this->aliases[$s] = $d->id->toString(); } array_pop($this->context); } // protected helper functions /** * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom * for validating simple member variables of objects. * @param $obj * @param $member * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ protected function with($obj, $member) { return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member); } /** * Emits an error, providing helpful context. * @throws HTMLPurifier_ConfigSchema_Exception */ protected function error($target, $msg) { if ($target !== false) { $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext(); } else { $prefix = ucfirst($this->getFormattedContext()); } throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg)); } /** * Returns a formatted context string. * @return string */ protected function getFormattedContext() { return implode(' in ', array_reverse($this->context)); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/ValidatorAtom.php0000644000175000017500000000544413512700112023501 0ustar ezyangezyangcontext = $context; $this->obj = $obj; $this->member = $member; $this->contents =& $obj->$member; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsString() { if (!is_string($this->contents)) { $this->error('must be a string'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsBool() { if (!is_bool($this->contents)) { $this->error('must be a boolean'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsArray() { if (!is_array($this->contents)) { $this->error('must be an array'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertNotNull() { if ($this->contents === null) { $this->error('must not be null'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertAlnum() { $this->assertIsString(); if (!ctype_alnum($this->contents)) { $this->error('must be alphanumeric'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertNotEmpty() { if (empty($this->contents)) { $this->error('must not be empty'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsLookup() { $this->assertIsArray(); foreach ($this->contents as $v) { if ($v !== true) { $this->error('must be a lookup array'); } } return $this; } /** * @param string $msg * @throws HTMLPurifier_ConfigSchema_Exception */ protected function error($msg) { throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php0000644000175000017500000000237513512700112024647 0ustar ezyangezyangdirectives as $d) { $schema->add( $d->id->key, $d->default, $d->type, $d->typeAllowsNull ); if ($d->allowed !== null) { $schema->addAllowedValues( $d->id->key, $d->allowed ); } foreach ($d->aliases as $alias) { $schema->addAlias( $alias->key, $d->id->key ); } if ($d->valueAliases !== null) { $schema->addValueAliases( $d->id->key, $d->valueAliases ); } } $schema->postProcess(); return $schema; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Builder/Xml.php0000644000175000017500000001042413512700112023053 0ustar ezyangezyangstartElement('div'); $purifier = HTMLPurifier::getInstance(); $html = $purifier->purify($html); $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml'); $this->writeRaw($html); $this->endElement(); // div } /** * @param mixed $var * @return string */ protected function export($var) { if ($var === array()) { return 'array()'; } return var_export($var, true); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange */ public function build($interchange) { // global access, only use as last resort $this->interchange = $interchange; $this->setIndent(true); $this->startDocument('1.0', 'UTF-8'); $this->startElement('configdoc'); $this->writeElement('title', $interchange->name); foreach ($interchange->directives as $directive) { $this->buildDirective($directive); } if ($this->namespace) { $this->endElement(); } // namespace $this->endElement(); // configdoc $this->flush(); } /** * @param HTMLPurifier_ConfigSchema_Interchange_Directive $directive */ public function buildDirective($directive) { // Kludge, although I suppose having a notion of a "root namespace" // certainly makes things look nicer when documentation is built. // Depends on things being sorted. if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) { if ($this->namespace) { $this->endElement(); } // namespace $this->namespace = $directive->id->getRootNamespace(); $this->startElement('namespace'); $this->writeAttribute('id', $this->namespace); $this->writeElement('name', $this->namespace); } $this->startElement('directive'); $this->writeAttribute('id', $directive->id->toString()); $this->writeElement('name', $directive->id->getDirective()); $this->startElement('aliases'); foreach ($directive->aliases as $alias) { $this->writeElement('alias', $alias->toString()); } $this->endElement(); // aliases $this->startElement('constraints'); if ($directive->version) { $this->writeElement('version', $directive->version); } $this->startElement('type'); if ($directive->typeAllowsNull) { $this->writeAttribute('allow-null', 'yes'); } $this->text($directive->type); $this->endElement(); // type if ($directive->allowed) { $this->startElement('allowed'); foreach ($directive->allowed as $value => $x) { $this->writeElement('value', $value); } $this->endElement(); // allowed } $this->writeElement('default', $this->export($directive->default)); $this->writeAttribute('xml:space', 'preserve'); if ($directive->external) { $this->startElement('external'); foreach ($directive->external as $project) { $this->writeElement('project', $project); } $this->endElement(); } $this->endElement(); // constraints if ($directive->deprecatedVersion) { $this->startElement('deprecated'); $this->writeElement('version', $directive->deprecatedVersion); $this->writeElement('use', $directive->deprecatedUse->toString()); $this->endElement(); // deprecated } $this->startElement('description'); $this->writeHTMLDiv($directive->description); $this->endElement(); // description $this->endElement(); // directive } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Interchange/Directive.php0000644000175000017500000000366113512700112025077 0ustar ezyangezyang true). * Null if all values are allowed. * @type array */ public $allowed; /** * List of aliases for the directive. * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))). * @type HTMLPurifier_ConfigSchema_Interchange_Id[] */ public $aliases = array(); /** * Hash of value aliases, e.g. array('alt' => 'real'). Null if value * aliasing is disabled (necessary for non-scalar types). * @type array */ public $valueAliases; /** * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'. * Null if the directive has always existed. * @type string */ public $version; /** * ID of directive that supercedes this old directive. * Null if not deprecated. * @type HTMLPurifier_ConfigSchema_Interchange_Id */ public $deprecatedUse; /** * Version of HTML Purifier this directive was deprecated. Null if not * deprecated. * @type string */ public $deprecatedVersion; /** * List of external projects this directive depends on, e.g. array('CSSTidy'). * @type array */ public $external = array(); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/Interchange/Id.php0000644000175000017500000000206113512700112023506 0ustar ezyangezyangkey = $key; } /** * @return string * @warning This is NOT magic, to ensure that people don't abuse SPL and * cause problems for PHP 5.0 support. */ public function toString() { return $this->key; } /** * @return string */ public function getRootNamespace() { return substr($this->key, 0, strpos($this->key, ".")); } /** * @return string */ public function getDirective() { return substr($this->key, strpos($this->key, ".") + 1); } /** * @param string $id * @return HTMLPurifier_ConfigSchema_Interchange_Id */ public static function make($id) { return new HTMLPurifier_ConfigSchema_Interchange_Id($id); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt0000644000175000017500000000033613512700112026034 0ustar ezyangezyangAttr.AllowedClasses TYPE: lookup/null VERSION: 4.0.0 DEFAULT: null --DESCRIPTION-- List of allowed class values in the class attribute. By default, this is null, which means all classes are allowed. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt0000644000175000017500000000110213512700112027013 0ustar ezyangezyangAttr.AllowedFrameTargets TYPE: lookup DEFAULT: array() --DESCRIPTION-- Lookup table of all allowed link frame targets. Some commonly used link targets include _blank, _self, _parent and _top. Values should be lowercase, as validation will be done in a case-sensitive manner despite W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute so this directive will have no effect in that doctype. XHTML 1.1 does not enable the Target module by default, you will have to manually enable it (see the module documentation for more details.) --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt0000644000175000017500000000044013512700112025155 0ustar ezyangezyangAttr.AllowedRel TYPE: lookup VERSION: 1.6.0 DEFAULT: array() --DESCRIPTION-- List of allowed forward document relationships in the rel attribute. Common values may be nofollow or print. By default, this is empty, meaning that no document relationships are allowed. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt0000644000175000017500000000037713512700112025200 0ustar ezyangezyangAttr.AllowedRev TYPE: lookup VERSION: 1.6.0 DEFAULT: array() --DESCRIPTION-- List of allowed reverse document relationships in the rev attribute. This attribute is a bit of an edge-case; if you don't know what it is for, stay away. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt0000644000175000017500000000163613512700112025412 0ustar ezyangezyangAttr.ClassUseCDATA TYPE: bool/null DEFAULT: null VERSION: 4.0.0 --DESCRIPTION-- If null, class will auto-detect the doctype and, if matching XHTML 1.1 or XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise, it will use a relaxed CDATA definition. If true, the relaxed CDATA definition is forced; if false, the NMTOKENS definition is forced. To get behavior of HTML Purifier prior to 4.0.0, set this directive to false. Some rational behind the auto-detection: in previous versions of HTML Purifier, it was assumed that the form of class was NMTOKENS, as specified by the XHTML Modularization (representing XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however specify class as CDATA. HTML 5 effectively defines it as CDATA, but with the additional constraint that each name should be unique (this is not explicitly outlined in previous specifications). --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt0000644000175000017500000000073713512700112026124 0ustar ezyangezyangAttr.DefaultImageAlt TYPE: string/null DEFAULT: null VERSION: 3.2.0 --DESCRIPTION-- This is the content of the alt tag of an image if the user had not previously specified an alt attribute. This applies to all images without a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which only applies to invalid images, and overrides in the case of an invalid image. Default behavior with null is to use the basename of the src tag for the alt. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt0000644000175000017500000000050013512700112026756 0ustar ezyangezyangAttr.DefaultInvalidImage TYPE: string DEFAULT: '' --DESCRIPTION-- This is the default image an img tag will be pointed to if it does not have a valid src attribute. In future versions, we may allow the image tag to be removed completely, but due to design issues, this is not possible right now. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt0000644000175000017500000000045513512700112027430 0ustar ezyangezyangAttr.DefaultInvalidImageAlt TYPE: string DEFAULT: 'Invalid image' --DESCRIPTION-- This is the content of the alt tag of an invalid image if the user had not previously specified an alt attribute. It has no effect when the image is valid but there was no alt attribute present. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt0000644000175000017500000000044613512700112026021 0ustar ezyangezyangAttr.DefaultTextDir TYPE: string DEFAULT: 'ltr' --DESCRIPTION-- Defines the default text direction (ltr or rtl) of the document being parsed. This generally is the same as the value of the dir attribute in HTML, or ltr if that is not specified. --ALLOWED-- 'ltr', 'rtl' --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt0000644000175000017500000000120513512700112024526 0ustar ezyangezyangAttr.EnableID TYPE: bool DEFAULT: false VERSION: 1.2.0 --DESCRIPTION-- Allows the ID attribute in HTML. This is disabled by default due to the fact that without proper configuration user input can easily break the validation of a webpage by specifying an ID that is already on the surrounding HTML. If you don't mind throwing caution to the wind, enable this directive, but I strongly recommend you also consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all user supplied IDs (%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of pre-1.2.0 versions. --ALIASES-- HTML.EnableAttrID --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt0000644000175000017500000000040613512700112026337 0ustar ezyangezyangAttr.ForbiddenClasses TYPE: lookup VERSION: 4.0.0 DEFAULT: array() --DESCRIPTION-- List of forbidden class values in the class attribute. By default, this is empty, which means that no classes are forbidden. See also %Attr.AllowedClasses. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt0000644000175000017500000000061513512700112024253 0ustar ezyangezyangAttr.ID.HTML5 TYPE: bool/null DEFAULT: null VERSION: 4.8.0 --DESCRIPTION-- In HTML5, restrictions on the format of the id attribute have been significantly relaxed, such that any string is valid so long as it contains no spaces and is at least one character. In lieu of a general HTML5 compatibility flag, set this configuration directive to true to use the relaxed rules. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt0000644000175000017500000000017313512700112025253 0ustar ezyangezyangAttr.IDBlacklist TYPE: list DEFAULT: array() DESCRIPTION: Array of IDs not allowed in the document. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt0000644000175000017500000000047513512700112026433 0ustar ezyangezyangAttr.IDBlacklistRegexp TYPE: string/null VERSION: 1.6.0 DEFAULT: NULL --DESCRIPTION-- PCRE regular expression to be matched against all IDs. If the expression is matches, the ID is rejected. Use this with care: may cause significant degradation. ID matching is done after all other validation. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt0000644000175000017500000000073313512700112024602 0ustar ezyangezyangAttr.IDPrefix TYPE: string VERSION: 1.2.0 DEFAULT: '' --DESCRIPTION-- String to prefix to IDs. If you have no idea what IDs your pages may use, you may opt to simply add a prefix to all user-submitted ID attributes so that they are still usable, but will not conflict with core page IDs. Example: setting the directive to 'user_' will result in a user submitted 'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true before using this. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt0000644000175000017500000000122413512700112025551 0ustar ezyangezyangAttr.IDPrefixLocal TYPE: string VERSION: 1.2.0 DEFAULT: '' --DESCRIPTION-- Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you need to allow multiple sets of user content on web page, you may need to have a seperate prefix that changes with each iteration. This way, seperately submitted user content displayed on the same page doesn't clobber each other. Ideal values are unique identifiers for the content it represents (i.e. the id of the row in the database). Be sure to add a seperator (like an underscore) at the end. Warning: this directive will not work unless %Attr.IDPrefix is set to a non-empty value! --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt0000644000175000017500000000216213512700112027033 0ustar ezyangezyangAutoFormat.AutoParagraph TYPE: bool VERSION: 2.0.1 DEFAULT: false --DESCRIPTION--

This directive turns on auto-paragraphing, where double newlines are converted in to paragraphs whenever possible. Auto-paragraphing:

  • Always applies to inline elements or text in the root node,
  • Applies to inline elements or text with double newlines in nodes that allow paragraph tags,
  • Applies to double newlines in paragraph tags

p tags must be allowed for this directive to take effect. We do not use br tags for paragraphing, as that is semantically incorrect.

To prevent auto-paragraphing as a content-producer, refrain from using double-newlines except to specify a new paragraph or in contexts where it has special meaning (whitespace usually has no meaning except in tags like pre, so this should not be difficult.) To prevent the paragraphing of inline text adjacent to block elements, wrap them in div tags (the behavior is slightly different outside of the root node.)

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt0000644000175000017500000000045513512700112025552 0ustar ezyangezyangAutoFormat.Custom TYPE: list VERSION: 2.0.1 DEFAULT: array() --DESCRIPTION--

This directive can be used to add custom auto-format injectors. Specify an array of injector names (class name minus the prefix) or concrete implementations. Injector class must exist.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt0000644000175000017500000000047513512700112027105 0ustar ezyangezyangAutoFormat.DisplayLinkURI TYPE: bool VERSION: 3.2.0 DEFAULT: false --DESCRIPTION--

This directive turns on the in-text display of URIs in <a> tags, and disables those links. For example, example becomes example (http://example.com).

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt0000644000175000017500000000041313512700112025677 0ustar ezyangezyangAutoFormat.Linkify TYPE: bool VERSION: 2.0.1 DEFAULT: false --DESCRIPTION--

This directive turns on linkification, auto-linking http, ftp and https URLs. a tags with the href attribute must be allowed.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt0000644000175000017500000000050613512700112030477 0ustar ezyangezyangAutoFormat.PurifierLinkify.DocURL TYPE: string VERSION: 2.0.1 DEFAULT: '#%s' ALIASES: AutoFormatParam.PurifierLinkifyDocURL --DESCRIPTION--

Location of configuration documentation to link to, let %s substitute into the configuration's namespace and directive names sans the percent sign.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt0000644000175000017500000000046413512700112027413 0ustar ezyangezyangAutoFormat.PurifierLinkify TYPE: bool VERSION: 2.0.1 DEFAULT: false --DESCRIPTION--

Internal auto-formatter that converts configuration directives in syntax %Namespace.Directive to links. a tags with the href attribute must be allowed.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.Predicate.txt0000644000175000017500000000117013512700112030446 0ustar ezyangezyangAutoFormat.RemoveEmpty.Predicate TYPE: hash VERSION: 4.7.0 DEFAULT: array('colgroup' => array(), 'th' => array(), 'td' => array(), 'iframe' => array('src')) --DESCRIPTION--

Given that an element has no contents, it will be removed by default, unless this predicate dictates otherwise. The predicate can either be an associative map from tag name to list of attributes that must be present for the element to be considered preserved: thus, the default always preserves colgroup, th and td, and also iframe if it has a src.

--# vim: et sw=4 sts=4 ././@LongLink000 145 0003736 LHTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txtHTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.tx0000644000175000017500000000055413512700112032567 0ustar ezyangezyangAutoFormat.RemoveEmpty.RemoveNbsp.Exceptions TYPE: lookup VERSION: 4.0.0 DEFAULT: array('td' => true, 'th' => true) --DESCRIPTION--

When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp are enabled, this directive defines what HTML elements should not be removede if they have only a non-breaking space in them.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt0000644000175000017500000000070113512700112030625 0ustar ezyangezyangAutoFormat.RemoveEmpty.RemoveNbsp TYPE: bool VERSION: 4.0.0 DEFAULT: false --DESCRIPTION--

When enabled, HTML Purifier will treat any elements that contain only non-breaking spaces as well as regular whitespace as empty, and remove them when %AutoFormat.RemoveEmpty is enabled.

See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements that don't have this behavior applied to them.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt0000644000175000017500000000335613512700112026557 0ustar ezyangezyangAutoFormat.RemoveEmpty TYPE: bool VERSION: 3.2.0 DEFAULT: false --DESCRIPTION--

When enabled, HTML Purifier will attempt to remove empty elements that contribute no semantic information to the document. The following types of nodes will be removed:

  • Tags with no attributes and no content, and that are not empty elements (remove <a></a> but not <br />), and
  • Tags with no content, except for:
    • The colgroup element, or
    • Elements with the id or name attribute, when those attributes are permitted on those elements.

Please be very careful when using this functionality; while it may not seem that empty elements contain useful information, they can alter the layout of a document given appropriate styling. This directive is most useful when you are processing machine-generated HTML, please avoid using it on regular user HTML.

Elements that contain only whitespace will be treated as empty. Non-breaking spaces, however, do not count as whitespace. See %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.

This algorithm is not perfect; you may still notice some empty tags, particularly if a node had elements, but those elements were later removed because they were not permitted in that context, or tags that, after being auto-closed by another tag, where empty. This is for safety reasons to prevent clever code from breaking validation. The general rule of thumb: if a tag looked empty on the way in, it will get removed; if HTML Purifier made it empty, it will stay.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt0000644000175000017500000000044713512700112032176 0ustar ezyangezyangAutoFormat.RemoveSpansWithoutAttributes TYPE: bool VERSION: 4.0.1 DEFAULT: false --DESCRIPTION--

This directive causes span tags without any attributes to be removed. It will also remove spans that had all attributes removed during processing.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt0000644000175000017500000000056613512700112026137 0ustar ezyangezyangCache.DefinitionImpl TYPE: string/null VERSION: 2.0.0 DEFAULT: 'Serializer' --DESCRIPTION-- This directive defines which method to use when caching definitions, the complex data-type that makes HTML Purifier tick. Set to null to disable caching (not recommended, as you will see a definite performance degradation). --ALIASES-- Core.DefinitionCache --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt0000644000175000017500000000051213512700112026142 0ustar ezyangezyangCache.SerializerPath TYPE: string/null VERSION: 2.0.0 DEFAULT: NULL --DESCRIPTION--

Absolute path with no trailing slash to store serialized definitions in. Default is within the HTML Purifier library inside DefinitionCache/Serializer. This path must be writable by the webserver.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt0000644000175000017500000000062713512700112027570 0ustar ezyangezyangCache.SerializerPermissions TYPE: int/null VERSION: 4.3.0 DEFAULT: 0755 --DESCRIPTION--

Directory permissions of the files and directories created inside the DefinitionCache/Serializer or other custom serializer path.

In HTML Purifier 4.8.0, this also supports NULL, which means that no chmod'ing or directory creation shall occur.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt0000644000175000017500000000122613512700112026517 0ustar ezyangezyangCore.AggressivelyFixLt TYPE: bool VERSION: 2.1.0 DEFAULT: true --DESCRIPTION--

This directive enables aggressive pre-filter fixes HTML Purifier can perform in order to ensure that open angled-brackets do not get killed during parsing stage. Enabling this will result in two preg_replace_callback calls and at least two preg_replace calls for every HTML document parsed; if your users make very well-formed HTML, you can set this directive false. This has no effect when DirectLex is used.

Notice: This directive's default turned from false to true in HTML Purifier 3.2.0.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt0000644000175000017500000000107513512700112030115 0ustar ezyangezyangCore.AggressivelyRemoveScript TYPE: bool VERSION: 4.9.0 DEFAULT: true --DESCRIPTION--

This directive enables aggressive pre-filter removal of script tags. This is not necessary for security, but it can help work around a bug in libxml where embedded HTML elements inside script sections cause the parser to choke. To revert to pre-4.9.0 behavior, set this to false. This directive has no effect if %Core.Trusted is true, %Core.RemoveScriptContents is false, or %Core.HiddenElements does not contain script.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.AllowHostnameUnderscore.txt0000644000175000017500000000107713512700112027717 0ustar ezyangezyangCore.AllowHostnameUnderscore TYPE: bool VERSION: 4.6.0 DEFAULT: false --DESCRIPTION--

By RFC 1123, underscores are not permitted in host names. (This is in contrast to the specification for DNS, RFC 2181, which allows underscores.) However, most browsers do the right thing when faced with an underscore in the host name, and so some poorly written websites are written with the expectation this should work. Setting this parameter to true relaxes our allowed character check so that underscores are permitted.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.AllowParseManyTags.txt0000644000175000017500000000050113512700112026614 0ustar ezyangezyangCore.AllowParseManyTags TYPE: bool DEFAULT: false VERSION: 4.10.1 --DESCRIPTION--

This directive allows parsing of many nested tags. If you set true, relaxes any hardcoded limit from the parser. However, in that case it may cause a Dos attack. Be careful when enabling it.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt0000644000175000017500000000066113512700112025670 0ustar ezyangezyangCore.CollectErrors TYPE: bool VERSION: 2.0.0 DEFAULT: false --DESCRIPTION-- Whether or not to collect errors found while filtering the document. This is a useful way to give feedback to your users. Warning: Currently this feature is very patchy and experimental, with lots of possible error messages not yet implemented. It will not cause any problems, but it may not help your users either. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt0000644000175000017500000001051213512700112025710 0ustar ezyangezyangCore.ColorKeywords TYPE: hash VERSION: 2.0.0 --DEFAULT-- array ( 'aliceblue' => '#F0F8FF', 'antiquewhite' => '#FAEBD7', 'aqua' => '#00FFFF', 'aquamarine' => '#7FFFD4', 'azure' => '#F0FFFF', 'beige' => '#F5F5DC', 'bisque' => '#FFE4C4', 'black' => '#000000', 'blanchedalmond' => '#FFEBCD', 'blue' => '#0000FF', 'blueviolet' => '#8A2BE2', 'brown' => '#A52A2A', 'burlywood' => '#DEB887', 'cadetblue' => '#5F9EA0', 'chartreuse' => '#7FFF00', 'chocolate' => '#D2691E', 'coral' => '#FF7F50', 'cornflowerblue' => '#6495ED', 'cornsilk' => '#FFF8DC', 'crimson' => '#DC143C', 'cyan' => '#00FFFF', 'darkblue' => '#00008B', 'darkcyan' => '#008B8B', 'darkgoldenrod' => '#B8860B', 'darkgray' => '#A9A9A9', 'darkgrey' => '#A9A9A9', 'darkgreen' => '#006400', 'darkkhaki' => '#BDB76B', 'darkmagenta' => '#8B008B', 'darkolivegreen' => '#556B2F', 'darkorange' => '#FF8C00', 'darkorchid' => '#9932CC', 'darkred' => '#8B0000', 'darksalmon' => '#E9967A', 'darkseagreen' => '#8FBC8F', 'darkslateblue' => '#483D8B', 'darkslategray' => '#2F4F4F', 'darkslategrey' => '#2F4F4F', 'darkturquoise' => '#00CED1', 'darkviolet' => '#9400D3', 'deeppink' => '#FF1493', 'deepskyblue' => '#00BFFF', 'dimgray' => '#696969', 'dimgrey' => '#696969', 'dodgerblue' => '#1E90FF', 'firebrick' => '#B22222', 'floralwhite' => '#FFFAF0', 'forestgreen' => '#228B22', 'fuchsia' => '#FF00FF', 'gainsboro' => '#DCDCDC', 'ghostwhite' => '#F8F8FF', 'gold' => '#FFD700', 'goldenrod' => '#DAA520', 'gray' => '#808080', 'grey' => '#808080', 'green' => '#008000', 'greenyellow' => '#ADFF2F', 'honeydew' => '#F0FFF0', 'hotpink' => '#FF69B4', 'indianred' => '#CD5C5C', 'indigo' => '#4B0082', 'ivory' => '#FFFFF0', 'khaki' => '#F0E68C', 'lavender' => '#E6E6FA', 'lavenderblush' => '#FFF0F5', 'lawngreen' => '#7CFC00', 'lemonchiffon' => '#FFFACD', 'lightblue' => '#ADD8E6', 'lightcoral' => '#F08080', 'lightcyan' => '#E0FFFF', 'lightgoldenrodyellow' => '#FAFAD2', 'lightgray' => '#D3D3D3', 'lightgrey' => '#D3D3D3', 'lightgreen' => '#90EE90', 'lightpink' => '#FFB6C1', 'lightsalmon' => '#FFA07A', 'lightseagreen' => '#20B2AA', 'lightskyblue' => '#87CEFA', 'lightslategray' => '#778899', 'lightslategrey' => '#778899', 'lightsteelblue' => '#B0C4DE', 'lightyellow' => '#FFFFE0', 'lime' => '#00FF00', 'limegreen' => '#32CD32', 'linen' => '#FAF0E6', 'magenta' => '#FF00FF', 'maroon' => '#800000', 'mediumaquamarine' => '#66CDAA', 'mediumblue' => '#0000CD', 'mediumorchid' => '#BA55D3', 'mediumpurple' => '#9370DB', 'mediumseagreen' => '#3CB371', 'mediumslateblue' => '#7B68EE', 'mediumspringgreen' => '#00FA9A', 'mediumturquoise' => '#48D1CC', 'mediumvioletred' => '#C71585', 'midnightblue' => '#191970', 'mintcream' => '#F5FFFA', 'mistyrose' => '#FFE4E1', 'moccasin' => '#FFE4B5', 'navajowhite' => '#FFDEAD', 'navy' => '#000080', 'oldlace' => '#FDF5E6', 'olive' => '#808000', 'olivedrab' => '#6B8E23', 'orange' => '#FFA500', 'orangered' => '#FF4500', 'orchid' => '#DA70D6', 'palegoldenrod' => '#EEE8AA', 'palegreen' => '#98FB98', 'paleturquoise' => '#AFEEEE', 'palevioletred' => '#DB7093', 'papayawhip' => '#FFEFD5', 'peachpuff' => '#FFDAB9', 'peru' => '#CD853F', 'pink' => '#FFC0CB', 'plum' => '#DDA0DD', 'powderblue' => '#B0E0E6', 'purple' => '#800080', 'rebeccapurple' => '#663399', 'red' => '#FF0000', 'rosybrown' => '#BC8F8F', 'royalblue' => '#4169E1', 'saddlebrown' => '#8B4513', 'salmon' => '#FA8072', 'sandybrown' => '#F4A460', 'seagreen' => '#2E8B57', 'seashell' => '#FFF5EE', 'sienna' => '#A0522D', 'silver' => '#C0C0C0', 'skyblue' => '#87CEEB', 'slateblue' => '#6A5ACD', 'slategray' => '#708090', 'slategrey' => '#708090', 'snow' => '#FFFAFA', 'springgreen' => '#00FF7F', 'steelblue' => '#4682B4', 'tan' => '#D2B48C', 'teal' => '#008080', 'thistle' => '#D8BFD8', 'tomato' => '#FF6347', 'turquoise' => '#40E0D0', 'violet' => '#EE82EE', 'wheat' => '#F5DEB3', 'white' => '#FFFFFF', 'whitesmoke' => '#F5F5F5', 'yellow' => '#FFFF00', 'yellowgreen' => '#9ACD32' ) --DESCRIPTION-- Lookup array of color names to six digit hexadecimal number corresponding to color, with preceding hash mark. Used when parsing colors. The lookup is done in a case-insensitive manner. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt0000644000175000017500000000066113512700112030214 0ustar ezyangezyangCore.ConvertDocumentToFragment TYPE: bool DEFAULT: true --DESCRIPTION-- This parameter determines whether or not the filter should convert input that is a full document with html and body tags to a fragment of just the contents of a body tag. This parameter is simply something HTML Purifier can do during an edge-case: for most inputs, this processing is not necessary. --ALIASES-- Core.AcceptFullDocuments --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt0000644000175000017500000000117013512700112031310 0ustar ezyangezyangCore.DirectLexLineNumberSyncInterval TYPE: int VERSION: 2.0.0 DEFAULT: 0 --DESCRIPTION--

Specifies the number of tokens the DirectLex line number tracking implementations should process before attempting to resyncronize the current line count by manually counting all previous new-lines. When at 0, this functionality is disabled. Lower values will decrease performance, and this is only strictly necessary if the counting algorithm is buggy (in which case you should report it as a bug). This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is not being used.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.DisableExcludes.txt0000644000175000017500000000071213512700112026143 0ustar ezyangezyangCore.DisableExcludes TYPE: bool DEFAULT: false VERSION: 4.5.0 --DESCRIPTION--

This directive disables SGML-style exclusions, e.g. the exclusion of <object> in any descendant of a <pre> tag. Disabling excludes will allow some invalid documents to pass through HTML Purifier, but HTML Purifier will also be less likely to accidentally remove large documents during processing.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt0000644000175000017500000000045713512700112024733 0ustar ezyangezyangCore.EnableIDNA TYPE: bool DEFAULT: false VERSION: 4.4.0 --DESCRIPTION-- Allows international domain names in URLs. This configuration option requires the PEAR Net_IDNA2 module to be installed. It operates by punycoding any internationalized host names for maximum portability. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt0000644000175000017500000000137713512700112024641 0ustar ezyangezyangCore.Encoding TYPE: istring DEFAULT: 'utf-8' --DESCRIPTION-- If for some reason you are unable to convert all webpages to UTF-8, you can use this directive as a stop-gap compatibility change to let HTML Purifier deal with non UTF-8 input. This technique has notable deficiencies: absolutely no characters outside of the selected character encoding will be preserved, not even the ones that have been ampersand escaped (this is due to a UTF-8 specific feature that automatically resolves all entities), making it pretty useless for anything except the most I18N-blind applications, although %Core.EscapeNonASCIICharacters offers fixes this trouble with another tradeoff. This directive only accepts ISO-8859-1 if iconv is not enabled. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt0000644000175000017500000000076713512700112027275 0ustar ezyangezyangCore.EscapeInvalidChildren TYPE: bool DEFAULT: false --DESCRIPTION--

Warning: this configuration option is no longer does anything as of 4.6.0.

When true, a child is found that is not allowed in the context of the parent element will be transformed into text as if it were ASCII. When false, that element and all internal tags will be dropped, though text will be preserved. There is no option for dropping the element but preserving child nodes.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt0000644000175000017500000000031213512700112026425 0ustar ezyangezyangCore.EscapeInvalidTags TYPE: bool DEFAULT: false --DESCRIPTION-- When true, invalid tags will be written back to the document as plain text. Otherwise, they are silently dropped. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt0000644000175000017500000000106713512700112027533 0ustar ezyangezyangCore.EscapeNonASCIICharacters TYPE: bool VERSION: 1.4.0 DEFAULT: false --DESCRIPTION-- This directive overcomes a deficiency in %Core.Encoding by blindly converting all non-ASCII characters into decimal numeric entities before converting it to its native encoding. This means that even characters that can be expressed in the non-UTF-8 encoding will be entity-ized, which can be a real downer for encodings like Big5. It also assumes that the ASCII repetoire is available, although this is the case for almost all encodings. Anyway, use UTF-8! --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt0000644000175000017500000000107713512700112026000 0ustar ezyangezyangCore.HiddenElements TYPE: lookup --DEFAULT-- array ( 'script' => true, 'style' => true, ) --DESCRIPTION--

This directive is a lookup array of elements which should have their contents removed when they are not allowed by the HTML definition. For example, the contents of a script tag are not normally shown in a document, so if script tags are to be removed, their contents should be removed to. This is opposed to a b tag, which defines some presentational changes but does not hide its contents.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.Language.txt0000644000175000017500000000044413512700112024630 0ustar ezyangezyangCore.Language TYPE: string VERSION: 2.0.0 DEFAULT: 'en' --DESCRIPTION-- ISO 639 language code for localizable things in HTML Purifier to use, which is mainly error reporting. There is currently only an English (en) translation, so this directive is currently useless. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.LegacyEntityDecoder.txt0000644000175000017500000000255713512700112027003 0ustar ezyangezyangCore.LegacyEntityDecoder TYPE: bool VERSION: 4.9.0 DEFAULT: false --DESCRIPTION--

Prior to HTML Purifier 4.9.0, entities were decoded by performing a global search replace for all entities whose decoded versions did not have special meanings under HTML, and replaced them with their decoded versions. We would match all entities, even if they did not have a trailing semicolon, but only if there weren't any trailing alphanumeric characters.

OriginalTextAttribute
&yen;¥¥
&yen¥¥
&yena&yena&yena
&yen=¥=¥=

In HTML Purifier 4.9.0, we changed the behavior of entity parsing to match entities that had missing trailing semicolons in less cases, to more closely match HTML5 parsing behavior:

OriginalTextAttribute
&yen;¥¥
&yen¥¥
&yena¥a&yena
&yen=¥=&yen=

This flag reverts back to pre-HTML Purifier 4.9.0 behavior.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt0000644000175000017500000000202413512700112025002 0ustar ezyangezyangCore.LexerImpl TYPE: mixed/null VERSION: 2.0.0 DEFAULT: NULL --DESCRIPTION--

This parameter determines what lexer implementation can be used. The valid values are:

null
Recommended, the lexer implementation will be auto-detected based on your PHP-version and configuration.
string lexer identifier
This is a slim way of manually overridding the implementation. Currently recognized values are: DOMLex (the default PHP5 implementation) and DirectLex (the default PHP4 implementation). Only use this if you know what you are doing: usually, the auto-detection will manage things for cases you aren't even aware of.
object lexer instance
Super-advanced: you can specify your own, custom, implementation that implements the interface defined by HTMLPurifier_Lexer. I may remove this option simply because I don't expect anyone to use it.
--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt0000644000175000017500000000105413512700112027007 0ustar ezyangezyangCore.MaintainLineNumbers TYPE: bool/null VERSION: 2.0.0 DEFAULT: NULL --DESCRIPTION--

If true, HTML Purifier will add line number information to all tokens. This is useful when error reporting is turned on, but can result in significant performance degradation and should not be used when unnecessary. This directive must be used with the DirectLex lexer, as the DOMLex lexer does not (yet) support this functionality. If the value is null, an appropriate value will be selected based on other configuration.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt0000644000175000017500000000042713512700112026553 0ustar ezyangezyangCore.NormalizeNewlines TYPE: bool VERSION: 4.2.0 DEFAULT: true --DESCRIPTION--

Whether or not to normalize newlines to the operating system default. When false, HTML Purifier will attempt to preserve mixed newline files.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt0000644000175000017500000000051613512700112026306 0ustar ezyangezyangCore.RemoveInvalidImg TYPE: bool DEFAULT: true VERSION: 1.3.0 --DESCRIPTION--

This directive enables pre-emptive URI checking in img tags, as the attribute validation strategy is not authorized to remove elements from the document. Revert to pre-1.3.0 behavior by setting to false.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt0000644000175000017500000000061613512700112031025 0ustar ezyangezyangCore.RemoveProcessingInstructions TYPE: bool VERSION: 4.2.0 DEFAULT: false --DESCRIPTION-- Instead of escaping processing instructions in the form <? ... ?>, remove it out-right. This may be useful if the HTML you are validating contains XML processing instruction gunk, however, it can also be user-unfriendly for people attempting to post PHP snippets. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt0000644000175000017500000000043013512700112027240 0ustar ezyangezyangCore.RemoveScriptContents TYPE: bool/null DEFAULT: NULL VERSION: 2.0.0 DEPRECATED-VERSION: 2.1.0 DEPRECATED-USE: Core.HiddenElements --DESCRIPTION--

This directive enables HTML Purifier to remove not only script tags but all of their contents.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt0000644000175000017500000000042313512700112025736 0ustar ezyangezyangCSS.AllowDuplicates TYPE: bool DEFAULT: false VERSION: 4.8.0 --DESCRIPTION--

By default, HTML Purifier removes duplicate CSS properties, like color:red; color:blue. If this is set to true, duplicate properties are allowed.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt0000644000175000017500000000053713512700112025251 0ustar ezyangezyangCSS.AllowedFonts TYPE: lookup/null VERSION: 4.3.0 DEFAULT: NULL --DESCRIPTION--

Allows you to manually specify a set of allowed fonts. If NULL, all fonts are allowed. This directive affects generic names (serif, sans-serif, monospace, cursive, fantasy) as well as specific font families.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt0000644000175000017500000000114013512700112026303 0ustar ezyangezyangCSS.AllowedProperties TYPE: lookup/null VERSION: 3.1.0 DEFAULT: NULL --DESCRIPTION--

If HTML Purifier's style attributes set is unsatisfactory for your needs, you can overload it with your own list of tags to allow. Note that this method is subtractive: it does its job by taking away from HTML Purifier usual feature set, so you cannot add an attribute that HTML Purifier never supported in the first place.

Warning: If another directive conflicts with the elements here, that directive will win and override.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt0000644000175000017500000000035413512700112025621 0ustar ezyangezyangCSS.AllowImportant TYPE: bool DEFAULT: false VERSION: 3.1.0 --DESCRIPTION-- This parameter determines whether or not !important cascade modifiers should be allowed in user CSS. If false, !important will stripped. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt0000644000175000017500000000071513512700112025112 0ustar ezyangezyangCSS.AllowTricky TYPE: bool DEFAULT: false VERSION: 3.1.0 --DESCRIPTION-- This parameter determines whether or not to allow "tricky" CSS properties and values. Tricky CSS properties/values can drastically modify page layout or be used for deceptive practices but do not directly constitute a security risk. For example, display:none; is considered a tricky property that will only be allowed if this directive is set to true. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt0000644000175000017500000000030413512700112025405 0ustar ezyangezyangCSS.DefinitionRev TYPE: int VERSION: 2.0.0 DEFAULT: 1 --DESCRIPTION--

Revision identifier for your custom definition. See %HTML.DefinitionRev for details.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt0000644000175000017500000000071713512700112026621 0ustar ezyangezyangCSS.ForbiddenProperties TYPE: lookup VERSION: 4.2.0 DEFAULT: array() --DESCRIPTION--

This is the logical inverse of %CSS.AllowedProperties, and it will override that directive or any other directive. If possible, %CSS.AllowedProperties is recommended over this directive, because it can sometimes be difficult to tell whether or not you've forbidden all of the CSS properties you truly would like to disallow.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt0000644000175000017500000000115713512700112025173 0ustar ezyangezyangCSS.MaxImgLength TYPE: string/null DEFAULT: '1200px' VERSION: 3.1.1 --DESCRIPTION--

This parameter sets the maximum allowed length on img tags, effectively the width and height properties. Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is in place to prevent imagecrash attacks, disable with null at your own risk. This directive is similar to %HTML.MaxImgLength, and both should be concurrently edited, although there are subtle differences in the input format (the CSS max is a number with a unit).

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt0000644000175000017500000000024413512700112025163 0ustar ezyangezyangCSS.Proprietary TYPE: bool VERSION: 3.0.0 DEFAULT: false --DESCRIPTION--

Whether or not to allow safe, proprietary CSS values.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt0000644000175000017500000000037313512700112024300 0ustar ezyangezyangCSS.Trusted TYPE: bool VERSION: 4.2.1 DEFAULT: false --DESCRIPTION-- Indicates whether or not the user's CSS input is trusted or not. If the input is trusted, a more expansive set of allowed properties. See also %HTML.Trusted. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt0000644000175000017500000000046313512700112024715 0ustar ezyangezyangFilter.Custom TYPE: list VERSION: 3.1.0 DEFAULT: array() --DESCRIPTION--

This directive can be used to add custom filters; it is nearly the equivalent of the now deprecated HTMLPurifier->addFilter() method. Specify an array of concrete implementations.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt0000644000175000017500000000074513512700112030767 0ustar ezyangezyangFilter.ExtractStyleBlocks.Escaping TYPE: bool VERSION: 3.0.0 DEFAULT: true ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping --DESCRIPTION--

Whether or not to escape the dangerous characters <, > and & as \3C, \3E and \26, respectively. This is can be safely set to false if the contents of StyleBlocks will be placed in an external stylesheet, where there is no risk of it being interpreted as HTML.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt0000644000175000017500000000226013512700112030301 0ustar ezyangezyangFilter.ExtractStyleBlocks.Scope TYPE: string/null VERSION: 3.0.0 DEFAULT: NULL ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope --DESCRIPTION--

If you would like users to be able to define external stylesheets, but only allow them to specify CSS declarations for a specific node and prevent them from fiddling with other elements, use this directive. It accepts any valid CSS selector, and will prepend this to any CSS declaration extracted from the document. For example, if this directive is set to #user-content and a user uses the selector a:hover, the final selector will be #user-content a:hover.

The comma shorthand may be used; consider the above example, with #user-content, #user-content2, the final selector will be #user-content a:hover, #user-content2 a:hover.

Warning: It is possible for users to bypass this measure using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML Purifier, and I am working to get it fixed. Until then, HTML Purifier performs a basic check to prevent this.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt0000644000175000017500000000110013512700112030753 0ustar ezyangezyangFilter.ExtractStyleBlocks.TidyImpl TYPE: mixed/null VERSION: 3.1.0 DEFAULT: NULL ALIASES: FilterParam.ExtractStyleBlocksTidyImpl --DESCRIPTION--

If left NULL, HTML Purifier will attempt to instantiate a csstidy class to use for internal cleaning. This will usually be good enough.

However, for trusted user input, you can set this to false to disable cleaning. In addition, you can supply your own concrete implementation of Tidy's interface to use, although I don't know why you'd want to do that.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt0000644000175000017500000000530613512700112027235 0ustar ezyangezyangFilter.ExtractStyleBlocks TYPE: bool VERSION: 3.1.0 DEFAULT: false EXTERNAL: CSSTidy --DESCRIPTION--

This directive turns on the style block extraction filter, which removes style blocks from input HTML, cleans them up with CSSTidy, and places them in the StyleBlocks context variable, for further use by you, usually to be placed in an external stylesheet, or a style block in the head of your document.

Sample usage:

';
?>



  Filter.ExtractStyleBlocks
body {color:#F00;} Some text';

    $config = HTMLPurifier_Config::createDefault();
    $config->set('Filter', 'ExtractStyleBlocks', true);
    $purifier = new HTMLPurifier($config);

    $html = $purifier->purify($dirty);

    // This implementation writes the stylesheets to the styles/ directory.
    // You can also echo the styles inside the document, but it's a bit
    // more difficult to make sure they get interpreted properly by
    // browsers; try the usual CSS armoring techniques.
    $styles = $purifier->context->get('StyleBlocks');
    $dir = 'styles/';
    if (!is_dir($dir)) mkdir($dir);
    $hash = sha1($_GET['html']);
    foreach ($styles as $i => $style) {
        file_put_contents($name = $dir . $hash . "_$i");
        echo '';
    }
?>


  
]]>

Warning: It is possible for a user to mount an imagecrash attack using this CSS. Counter-measures are difficult; it is not simply enough to limit the range of CSS lengths (using relative lengths with many nesting levels allows for large values to be attained without actually specifying them in the stylesheet), and the flexible nature of selectors makes it difficult to selectively disable lengths on image tags (HTML Purifier, however, does disable CSS width and height in inline styling). There are probably two effective counter measures: an explicit width and height set to auto in all images in your document (unlikely) or the disabling of width and height (somewhat reasonable). Whether or not these measures should be used is left to the reader.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt0000644000175000017500000000075713512700112025045 0ustar ezyangezyangFilter.YouTube TYPE: bool VERSION: 3.1.0 DEFAULT: false --DESCRIPTION--

Warning: Deprecated in favor of %HTML.SafeObject and %Output.FlashCompat (turn both on to allow YouTube videos and other Flash content).

This directive enables YouTube video embedding in HTML Purifier. Check this document on embedding videos for more information on what this filter does.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt0000644000175000017500000000174413512700112024354 0ustar ezyangezyangHTML.Allowed TYPE: itext/null VERSION: 2.0.0 DEFAULT: NULL --DESCRIPTION--

This is a preferred convenience directive that combines %HTML.AllowedElements and %HTML.AllowedAttributes. Specify elements and attributes that are allowed using: element1[attr1|attr2],element2.... For example, if you would like to only allow paragraphs and links, specify a[href],p. You can specify attributes that apply to all elements using an asterisk, e.g. *[lang]. You can also use newlines instead of commas to separate elements.

Warning: All of the constraints on the component directives are still enforced. The syntax is a subset of TinyMCE's valid_elements whitelist: directly copy-pasting it here will probably result in broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes are set, this directive has no effect.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt0000644000175000017500000000113713512700112026417 0ustar ezyangezyangHTML.AllowedAttributes TYPE: lookup/null VERSION: 1.3.0 DEFAULT: NULL --DESCRIPTION--

If HTML Purifier's attribute set is unsatisfactory, overload it! The syntax is "tag.attr" or "*.attr" for the global attributes (style, id, class, dir, lang, xml:lang).

Warning: If another directive conflicts with the elements here, that directive will win and override. For example, %HTML.EnableAttrID will take precedence over *.id in this directive. You must set that directive to true before you can use IDs at all.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt0000644000175000017500000000055713512700112026063 0ustar ezyangezyangHTML.AllowedComments TYPE: lookup VERSION: 4.4.0 DEFAULT: array() --DESCRIPTION-- A whitelist which indicates what explicit comment bodies should be allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp (these directives are union'ed together, so a comment is considered valid if any directive deems it valid.) --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt0000644000175000017500000000130713512700112027230 0ustar ezyangezyangHTML.AllowedCommentsRegexp TYPE: string/null VERSION: 4.4.0 DEFAULT: NULL --DESCRIPTION-- A regexp, which if it matches the body of a comment, indicates that it should be allowed. Trailing and leading spaces are removed prior to running this regular expression. Warning: Make sure you specify correct anchor metacharacters ^regex$, otherwise you may accept comments that you did not mean to! In particular, the regex /foo|bar/ is probably not sufficiently strict, since it also allows foobar. See also %HTML.AllowedComments (these directives are union'ed together, so a comment is considered valid if any directive deems it valid.) --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt0000644000175000017500000000156213512700112026047 0ustar ezyangezyangHTML.AllowedElements TYPE: lookup/null VERSION: 1.3.0 DEFAULT: NULL --DESCRIPTION--

If HTML Purifier's tag set is unsatisfactory for your needs, you can overload it with your own list of tags to allow. If you change this, you probably also want to change %HTML.AllowedAttributes; see also %HTML.Allowed which lets you set allowed elements and attributes at the same time.

If you attempt to allow an element that HTML Purifier does not know about, HTML Purifier will raise an error. You will need to manually tell HTML Purifier about this element by using the advanced customization features.

Warning: If another directive conflicts with the elements here, that directive will win and override.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt0000644000175000017500000000132613512700112025701 0ustar ezyangezyangHTML.AllowedModules TYPE: lookup/null VERSION: 2.0.0 DEFAULT: NULL --DESCRIPTION--

A doctype comes with a set of usual modules to use. Without having to mucking about with the doctypes, you can quickly activate or disable these modules by specifying which modules you wish to allow with this directive. This is most useful for unit testing specific modules, although end users may find it useful for their own ends.

If you specify a module that does not exist, the manager will silently fail to use it, so be careful! User-defined modules are not affected by this directive. Modules defined in %HTML.CoreModules are not affected by this directive.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt0000644000175000017500000000072713512700112026046 0ustar ezyangezyangHTML.Attr.Name.UseCDATA TYPE: bool DEFAULT: false VERSION: 4.0.0 --DESCRIPTION-- The W3C specification DTD defines the name attribute to be CDATA, not ID, due to limitations of DTD. In certain documents, this relaxed behavior is desired, whether it is to specify duplicate names, or to specify names that would be illegal IDs (for example, names that begin with a digit.) Set this configuration directive to true to use the relaxed parsing rules. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt0000644000175000017500000000107313512700112025353 0ustar ezyangezyangHTML.BlockWrapper TYPE: string VERSION: 1.3.0 DEFAULT: 'p' --DESCRIPTION--

String name of element to wrap inline elements that are inside a block context. This only occurs in the children of blockquote in strict mode.

Example: by default value, <blockquote>Foo</blockquote> would become <blockquote><p>Foo</p></blockquote>. The <p> tags can be replaced with whatever you desire, as long as it is a block level element.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt0000644000175000017500000000115613512700112025203 0ustar ezyangezyangHTML.CoreModules TYPE: lookup VERSION: 2.0.0 --DEFAULT-- array ( 'Structure' => true, 'Text' => true, 'Hypertext' => true, 'List' => true, 'NonXMLCommonAttributes' => true, 'XMLCommonAttributes' => true, 'CommonAttributes' => true, ) --DESCRIPTION--

Certain modularized doctypes (XHTML, namely), have certain modules that must be included for the doctype to be an conforming document type: put those modules here. By default, XHTML's core modules are used. You can set this to a blank array to disable core module protection, but this is not recommended.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt0000644000175000017500000000035113512700112025560 0ustar ezyangezyangHTML.CustomDoctype TYPE: string/null VERSION: 2.0.1 DEFAULT: NULL --DESCRIPTION-- A custom doctype for power-users who defined their own document type. This directive only applies when %HTML.Doctype is blank. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt0000644000175000017500000000227213512700112025267 0ustar ezyangezyangHTML.DefinitionID TYPE: string/null DEFAULT: NULL VERSION: 2.0.0 --DESCRIPTION--

Unique identifier for a custom-built HTML definition. If you edit the raw version of the HTMLDefinition, introducing changes that the configuration object does not reflect, you must specify this variable. If you change your custom edits, you should change this directive, or clear your cache. Example:

$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', '1');
$def = $config->getHTMLDefinition();
$def->addAttribute('a', 'tabindex', 'Number');

In the above example, the configuration is still at the defaults, but using the advanced API, an extra attribute has been added. The configuration object normally has no way of knowing that this change has taken place, so it needs an extra directive: %HTML.DefinitionID. If someone else attempts to use the default configuration, these two pieces of code will not clobber each other in the cache, since one has an extra directive attached to it.

You must specify a value to this directive to use the advanced API features.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt0000644000175000017500000000102513512700112025522 0ustar ezyangezyangHTML.DefinitionRev TYPE: int VERSION: 2.0.0 DEFAULT: 1 --DESCRIPTION--

Revision identifier for your custom definition specified in %HTML.DefinitionID. This serves the same purpose: uniquely identifying your custom definition, but this one does so in a chronological context: revision 3 is more up-to-date then revision 2. Thus, when this gets incremented, the cache handling is smart enough to clean up any older revisions of your definition as well as flush the cache.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt0000644000175000017500000000073113512700112024367 0ustar ezyangezyangHTML.Doctype TYPE: string/null DEFAULT: NULL --DESCRIPTION-- Doctype to use during filtering. Technically speaking this is not actually a doctype (as it does not identify a corresponding DTD), but we are using this name for sake of simplicity. When non-blank, this will override any older directives like %HTML.XHTML or %HTML.Strict. --ALLOWED-- 'HTML 4.01 Transitional', 'HTML 4.01 Strict', 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', 'XHTML 1.1' --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt0000644000175000017500000000043713512700112027002 0ustar ezyangezyangHTML.FlashAllowFullScreen TYPE: bool VERSION: 4.2.0 DEFAULT: false --DESCRIPTION--

Whether or not to permit embedded Flash content from %HTML.SafeObject to expand to the full screen. Corresponds to the allowFullScreen parameter.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt0000644000175000017500000000152413512700112026724 0ustar ezyangezyangHTML.ForbiddenAttributes TYPE: lookup VERSION: 3.1.0 DEFAULT: array() --DESCRIPTION--

While this directive is similar to %HTML.AllowedAttributes, for forwards-compatibility with XML, this attribute has a different syntax. Instead of tag.attr, use tag@attr. To disallow href attributes in a tags, set this directive to a@href. You can also disallow an attribute globally with attr or *@attr (either syntax is fine; the latter is provided for consistency with %HTML.AllowedAttributes).

Warning: This directive complements %HTML.ForbiddenElements, accordingly, check out that directive for a discussion of why you should think twice before using this directive.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt0000644000175000017500000000136213512700112026352 0ustar ezyangezyangHTML.ForbiddenElements TYPE: lookup VERSION: 3.1.0 DEFAULT: array() --DESCRIPTION--

This was, perhaps, the most requested feature ever in HTML Purifier. Please don't abuse it! This is the logical inverse of %HTML.AllowedElements, and it will override that directive, or any other directive.

If possible, %HTML.Allowed is recommended over this directive, because it can sometimes be difficult to tell whether or not you've forbidden all of the behavior you would like to disallow. If you forbid img with the expectation of preventing images on your site, you'll be in for a nasty surprise when people start using the background-image CSS property.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt0000644000175000017500000000075213512700112025307 0ustar ezyangezyangHTML.MaxImgLength TYPE: int/null DEFAULT: 1200 VERSION: 3.1.1 --DESCRIPTION--

This directive controls the maximum number of pixels in the width and height attributes in img tags. This is in place to prevent imagecrash attacks, disable with null at your own risk. This directive is similar to %CSS.MaxImgLength, and both should be concurrently edited, although there are subtle differences in the input format (the HTML max is an integer).

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt0000644000175000017500000000024313512700112024555 0ustar ezyangezyangHTML.Nofollow TYPE: bool VERSION: 4.3.0 DEFAULT: FALSE --DESCRIPTION-- If enabled, nofollow rel attributes are added to all outgoing links. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt0000644000175000017500000000047213512700112024213 0ustar ezyangezyangHTML.Parent TYPE: string VERSION: 1.3.0 DEFAULT: 'div' --DESCRIPTION--

String name of element that HTML fragment passed to library will be inserted in. An interesting variation would be using span as the parent element, meaning that only inline tags would be allowed.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt0000644000175000017500000000051613512700112025301 0ustar ezyangezyangHTML.Proprietary TYPE: bool VERSION: 3.1.0 DEFAULT: false --DESCRIPTION--

Whether or not to allow proprietary elements and attributes in your documents, as per HTMLPurifier_HTMLModule_Proprietary. Warning: This can cause your documents to stop validating!

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt0000644000175000017500000000074313512700112024576 0ustar ezyangezyangHTML.SafeEmbed TYPE: bool VERSION: 3.1.1 DEFAULT: false --DESCRIPTION--

Whether or not to permit embed tags in documents, with a number of extra security features added to prevent script execution. This is similar to what websites like MySpace do to embed tags. Embed is a proprietary element and will cause your website to stop validating; you should see if you can use %Output.FlashCompat with %HTML.SafeObject instead first.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt0000644000175000017500000000061613512700112024764 0ustar ezyangezyangHTML.SafeIframe TYPE: bool VERSION: 4.4.0 DEFAULT: false --DESCRIPTION--

Whether or not to permit iframe tags in untrusted documents. This directive must be accompanied by a whitelist of permitted iframes, such as %URI.SafeIframeRegexp, otherwise it will fatally error. This directive has no effect on strict doctypes, as iframes are not valid.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt0000644000175000017500000000067113512700112024770 0ustar ezyangezyangHTML.SafeObject TYPE: bool VERSION: 3.1.1 DEFAULT: false --DESCRIPTION--

Whether or not to permit object tags in documents, with a number of extra security features added to prevent script execution. This is similar to what websites like MySpace do to object tags. You should also enable %Output.FlashCompat in order to generate Internet Explorer compatibility code for your object tags.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.SafeScripting.txt0000644000175000017500000000042113512700112025515 0ustar ezyangezyangHTML.SafeScripting TYPE: lookup VERSION: 4.5.0 DEFAULT: array() --DESCRIPTION--

Whether or not to permit script tags to external scripts in documents. Inline scripting is not allowed, and the script must match an explicit whitelist.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt0000644000175000017500000000033513512700112024230 0ustar ezyangezyangHTML.Strict TYPE: bool VERSION: 1.3.0 DEFAULT: false DEPRECATED-VERSION: 1.7.0 DEPRECATED-USE: HTML.Doctype --DESCRIPTION-- Determines whether or not to use Transitional (loose) or Strict rulesets. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt0000644000175000017500000000037513512700112025162 0ustar ezyangezyangHTML.TargetBlank TYPE: bool VERSION: 4.4.0 DEFAULT: FALSE --DESCRIPTION-- If enabled, target=blank attributes are added to all outgoing links. (This includes links from an HTTPS version of a page to an HTTP version.) --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoopener.txt0000644000175000017500000000046113512700112025714 0ustar ezyangezyang--# vim: et sw=4 sts=4 HTML.TargetNoopener TYPE: bool VERSION: 4.8.0 DEFAULT: TRUE --DESCRIPTION-- If enabled, noopener rel attributes are added to links which have a target attribute associated with them. This prevents malicious destinations from overwriting the original window. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt0000644000175000017500000000043613512700112026242 0ustar ezyangezyangHTML.TargetNoreferrer TYPE: bool VERSION: 4.8.0 DEFAULT: TRUE --DESCRIPTION-- If enabled, noreferrer rel attributes are added to links which have a target attribute associated with them. This prevents malicious destinations from overwriting the original window. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt0000644000175000017500000000024313512700112024300 0ustar ezyangezyangHTML.TidyAdd TYPE: lookup VERSION: 2.0.0 DEFAULT: array() --DESCRIPTION-- Fixes to add to the default set of Tidy fixes as per your level. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt0000644000175000017500000000115413512700112024661 0ustar ezyangezyangHTML.TidyLevel TYPE: string VERSION: 2.0.0 DEFAULT: 'medium' --DESCRIPTION--

General level of cleanliness the Tidy module should enforce. There are four allowed values:

none
No extra tidying should be done
light
Only fix elements that would be discarded otherwise due to lack of support in doctype
medium
Enforce best practices
heavy
Transform all deprecated elements and attributes to standards compliant equivalents
--ALLOWED-- 'none', 'light', 'medium', 'heavy' --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt0000644000175000017500000000025313512700112025046 0ustar ezyangezyangHTML.TidyRemove TYPE: lookup VERSION: 2.0.0 DEFAULT: array() --DESCRIPTION-- Fixes to remove from the default set of Tidy fixes as per your level. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt0000644000175000017500000000041213512700112024406 0ustar ezyangezyangHTML.Trusted TYPE: bool VERSION: 2.0.0 DEFAULT: false --DESCRIPTION-- Indicates whether or not the user input is trusted or not. If the input is trusted, a more expansive set of allowed tags and attributes will be used. See also %CSS.Trusted. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt0000644000175000017500000000035313512700112023654 0ustar ezyangezyangHTML.XHTML TYPE: bool DEFAULT: true VERSION: 1.1.0 DEPRECATED-VERSION: 1.7.0 DEPRECATED-USE: HTML.Doctype --DESCRIPTION-- Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. --ALIASES-- Core.XHTML --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/info.ini0000644000175000017500000000005513512700112023107 0ustar ezyangezyangname = "HTML Purifier" ; vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt0000644000175000017500000000042313512700112030017 0ustar ezyangezyangOutput.CommentScriptContents TYPE: bool VERSION: 2.0.0 DEFAULT: true --DESCRIPTION-- Determines whether or not HTML Purifier should attempt to fix up the contents of script tags for legacy browsers with comments. --ALIASES-- Core.CommentScriptContents --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt0000644000175000017500000000103713512700112025723 0ustar ezyangezyangOutput.FixInnerHTML TYPE: bool VERSION: 4.3.0 DEFAULT: true --DESCRIPTION--

If true, HTML Purifier will protect against Internet Explorer's mishandling of the innerHTML attribute by appending a space to any attribute that does not contain angled brackets, spaces or quotes, but contains a backtick. This slightly changes the semantics of any given attribute, so if this is unacceptable and you do not use innerHTML on any of your pages, you can turn this directive off.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt0000644000175000017500000000041513512700112025714 0ustar ezyangezyangOutput.FlashCompat TYPE: bool VERSION: 4.1.0 DEFAULT: false --DESCRIPTION--

If true, HTML Purifier will generate Internet Explorer compatibility code for all object code. This is highly recommended if you enable %HTML.SafeObject.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt0000644000175000017500000000054613512700112025121 0ustar ezyangezyangOutput.Newline TYPE: string/null VERSION: 2.0.1 DEFAULT: NULL --DESCRIPTION--

Newline string to format final output with. If left null, HTML Purifier will auto-detect the default newline type of the system and use that; you can manually override it here. Remember, \r\n is Windows, \r is Mac, and \n is Unix.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt0000644000175000017500000000077513512700112025306 0ustar ezyangezyangOutput.SortAttr TYPE: bool VERSION: 3.2.0 DEFAULT: false --DESCRIPTION--

If true, HTML Purifier will sort attributes by name before writing them back to the document, converting a tag like: <el b="" a="" c="" /> to <el a="" b="" c="" />. This is a workaround for a bug in FCKeditor which causes it to swap attributes order, adding noise to text diffs. If you're not seeing this bug, chances are, you don't need this directive.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt0000644000175000017500000000147313512700112025602 0ustar ezyangezyangOutput.TidyFormat TYPE: bool VERSION: 1.1.1 DEFAULT: false --DESCRIPTION--

Determines whether or not to run Tidy on the final output for pretty formatting reasons, such as indentation and wrap.

This can greatly improve readability for editors who are hand-editing the HTML, but is by no means necessary as HTML Purifier has already fixed all major errors the HTML may have had. Tidy is a non-default extension, and this directive will silently fail if Tidy is not available.

If you are looking to make the overall look of your page's source better, I recommend running Tidy on the entire page rather than just user-content (after all, the indentation relative to the containing blocks will be incorrect).

--ALIASES-- Core.TidyFormat --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt0000644000175000017500000000030513512700112025462 0ustar ezyangezyangTest.ForceNoIconv TYPE: bool DEFAULT: false --DESCRIPTION-- When set to true, HTMLPurifier_Encoder will act as if iconv does not exist and use only pure PHP implementations. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt0000644000175000017500000000074513512700112025557 0ustar ezyangezyangURI.AllowedSchemes TYPE: lookup --DEFAULT-- array ( 'http' => true, 'https' => true, 'mailto' => true, 'ftp' => true, 'nntp' => true, 'news' => true, 'tel' => true, ) --DESCRIPTION-- Whitelist that defines the schemes that a URI is allowed to have. This prevents XSS attacks from using pseudo-schemes like javascript or mocha. There is also support for the data and file URI schemes, but they are not enabled by default. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.Base.txt0000644000175000017500000000121613512700112023524 0ustar ezyangezyangURI.Base TYPE: string/null VERSION: 2.1.0 DEFAULT: NULL --DESCRIPTION--

The base URI is the URI of the document this purified HTML will be inserted into. This information is important if HTML Purifier needs to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute is on. You may use a non-absolute URI for this value, but behavior may vary (%URI.MakeAbsolute deals nicely with both absolute and relative paths, but forwards-compatibility is not guaranteed). Warning: If set, the scheme on this URI overrides the one specified by %URI.DefaultScheme.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt0000644000175000017500000000062413512700112025365 0ustar ezyangezyangURI.DefaultScheme TYPE: string/null DEFAULT: 'http' --DESCRIPTION--

Defines through what scheme the output will be served, in order to select the proper object validator when no scheme information is present.

Starting with HTML Purifier 4.9.0, the default scheme can be null, in which case we reject all URIs which do not have explicit schemes.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt0000644000175000017500000000036213512700112025160 0ustar ezyangezyangURI.DefinitionID TYPE: string/null VERSION: 2.1.0 DEFAULT: NULL --DESCRIPTION--

Unique identifier for a custom-built URI definition. If you want to add custom URIFilters, you must specify this value.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt0000644000175000017500000000030413512700112025414 0ustar ezyangezyangURI.DefinitionRev TYPE: int VERSION: 2.1.0 DEFAULT: 1 --DESCRIPTION--

Revision identifier for your custom definition. See %HTML.DefinitionRev for details.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt0000644000175000017500000000042013512700112024211 0ustar ezyangezyangURI.Disable TYPE: bool VERSION: 1.3.0 DEFAULT: false --DESCRIPTION--

Disables all URIs in all forms. Not sure why you'd want to do that (after all, the Internet's founded on the notion of a hyperlink).

--ALIASES-- Attr.DisableURI --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt0000644000175000017500000000067313512700112025726 0ustar ezyangezyangURI.DisableExternal TYPE: bool VERSION: 1.2.0 DEFAULT: false --DESCRIPTION-- Disables links to external websites. This is a highly effective anti-spam and anti-pagerank-leech measure, but comes at a hefty price: nolinks or images outside of your domain will be allowed. Non-linkified URIs will still be preserved. If you want to be able to link to subdomains or use absolute URIs, specify %URI.Host for your website. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt0000644000175000017500000000104513512700112027613 0ustar ezyangezyangURI.DisableExternalResources TYPE: bool VERSION: 1.3.0 DEFAULT: false --DESCRIPTION-- Disables the embedding of external resources, preventing users from embedding things like images from other hosts. This prevents access tracking (good for email viewers), bandwidth leeching, cross-site request forging, goatse.cx posting, and other nasties, but also results in a loss of end-user functionality (they can't directly post a pic they posted from Flickr anymore). Use it if you don't have a robust user-content moderation team. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt0000644000175000017500000000065313512700112026114 0ustar ezyangezyangURI.DisableResources TYPE: bool VERSION: 4.2.0 DEFAULT: false --DESCRIPTION--

Disables embedding resources, essentially meaning no pictures. You can still link to them though. See %URI.DisableExternalResources for why this might be a good idea.

Note: While this directive has been available since 1.3.0, it didn't actually start doing anything until 4.2.0.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.Host.txt0000644000175000017500000000146213512700112023572 0ustar ezyangezyangURI.Host TYPE: string/null VERSION: 1.2.0 DEFAULT: NULL --DESCRIPTION--

Defines the domain name of the server, so we can determine whether or an absolute URI is from your website or not. Not strictly necessary, as users should be using relative URIs to reference resources on your website. It will, however, let you use absolute URIs to link to subdomains of the domain you post here: i.e. example.com will allow sub.example.com. However, higher up domains will still be excluded: if you set %URI.Host to sub.example.com, example.com will be blocked. Note: This directive overrides %URI.Base because a given page may be on a sub-domain, but you wish HTML Purifier to be more relaxed and allow some of the parent domains too.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt0000644000175000017500000000046613512700112025426 0ustar ezyangezyangURI.HostBlacklist TYPE: list VERSION: 1.3.0 DEFAULT: array() --DESCRIPTION-- List of strings that are forbidden in the host of any URI. Use it to kill domain names of spam, etc. Note that it will catch anything in the domain, so moo.com will catch moo.com.example.com. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt0000644000175000017500000000060613512700112025230 0ustar ezyangezyangURI.MakeAbsolute TYPE: bool VERSION: 2.1.0 DEFAULT: false --DESCRIPTION--

Converts all URIs into absolute forms. This is useful when the HTML being filtered assumes a specific base path, but will actually be viewed in a different context (and setting an alternate base URI is not possible). %URI.Base must be set for this directive to work.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt0000644000175000017500000000530713512700112023732 0ustar ezyangezyangURI.Munge TYPE: string/null VERSION: 1.3.0 DEFAULT: NULL --DESCRIPTION--

Munges all browsable (usually http, https and ftp) absolute URIs into another URI, usually a URI redirection service. This directive accepts a URI, formatted with a %s where the url-encoded original URI should be inserted (sample: http://www.google.com/url?q=%s).

Uses for this directive:

  • Prevent PageRank leaks, while being fairly transparent to users (you may also want to add some client side JavaScript to override the text in the statusbar). Notice: Many security experts believe that this form of protection does not deter spam-bots.
  • Redirect users to a splash page telling them they are leaving your website. While this is poor usability practice, it is often mandated in corporate environments.

Prior to HTML Purifier 3.1.1, this directive also enabled the munging of browsable external resources, which could break things if your redirection script was a splash page or used meta tags. To revert to previous behavior, please use %URI.MungeResources.

You may want to also use %URI.MungeSecretKey along with this directive in order to enforce what URIs your redirector script allows. Open redirector scripts can be a security risk and negatively affect the reputation of your domain name.

Starting with HTML Purifier 3.1.1, there is also these substitutions:

Key Description Example <a href="">
%r 1 - The URI embeds a resource
(blank) - The URI is merely a link
%n The name of the tag this URI came from a
%m The name of the attribute this URI came from href
%p The name of the CSS property this URI came from, or blank if irrelevant

Admittedly, these letters are somewhat arbitrary; the only stipulation was that they couldn't be a through f. r is for resource (I would have preferred e, but you take what you can get), n is for name, m was picked because it came after n (and I couldn't use a), p is for property.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt0000644000175000017500000000114313512700112025617 0ustar ezyangezyangURI.MungeResources TYPE: bool VERSION: 3.1.1 DEFAULT: false --DESCRIPTION--

If true, any URI munging directives like %URI.Munge will also apply to embedded resources, such as <img src="">. Be careful enabling this directive if you have a redirector script that does not use the Location HTTP header; all of your images and other embedded resources will break.

Warning: It is strongly advised you use this in conjunction %URI.MungeSecretKey to mitigate the security risk of an open redirector.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt0000644000175000017500000000172213512700112025546 0ustar ezyangezyangURI.MungeSecretKey TYPE: string/null VERSION: 3.1.1 DEFAULT: NULL --DESCRIPTION--

This directive enables secure checksum generation along with %URI.Munge. It should be set to a secure key that is not shared with anyone else. The checksum can be placed in the URI using %t. Use of this checksum affords an additional level of protection by allowing a redirector to check if a URI has passed through HTML Purifier with this line:

$checksum === hash_hmac("sha256", $url, $secret_key)

If the output is TRUE, the redirector script should accept the URI.

Please note that it would still be possible for an attacker to procure secure hashes en-mass by abusing your website's Preview feature or the like, but this service affords an additional level of protection that should be combined with website blacklisting.

Remember this has no effect if %URI.Munge is not on.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt0000644000175000017500000000050613512700112027252 0ustar ezyangezyangURI.OverrideAllowedSchemes TYPE: bool DEFAULT: true --DESCRIPTION-- If this is set to true (which it is by default), you can override %URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme to the registry. If false, you will also have to update that directive in order to add more schemes. --# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt0000644000175000017500000000160113512700112026025 0ustar ezyangezyangURI.SafeIframeRegexp TYPE: string/null VERSION: 4.4.0 DEFAULT: NULL --DESCRIPTION--

A PCRE regular expression that will be matched against an iframe URI. This is a relatively inflexible scheme, but works well enough for the most common use-case of iframes: embedded video. This directive only has an effect if %HTML.SafeIframe is enabled. Here are some example values:

  • %^http://www.youtube.com/embed/% - Allow YouTube videos
  • %^http://player.vimeo.com/video/% - Allow Vimeo videos
  • %^http://(www.youtube.com/embed/|player.vimeo.com/video/)% - Allow both

Note that this directive does not give you enough granularity to, say, disable all autoplay videos. Pipe up on the HTML Purifier forums if this is a capability you want.

--# vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCache/Decorator.php0000644000175000017500000000451313512700112023337 0ustar ezyangezyangcopy(); // reference is necessary for mocks in PHP 4 $decorator->cache =& $cache; $decorator->type = $cache->type; return $decorator; } /** * Cross-compatible clone substitute * @return HTMLPurifier_DefinitionCache_Decorator */ public function copy() { return new HTMLPurifier_DefinitionCache_Decorator(); } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return mixed */ public function add($def, $config) { return $this->cache->add($def, $config); } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return mixed */ public function set($def, $config) { return $this->cache->set($def, $config); } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return mixed */ public function replace($def, $config) { return $this->cache->replace($def, $config); } /** * @param HTMLPurifier_Config $config * @return mixed */ public function get($config) { return $this->cache->get($config); } /** * @param HTMLPurifier_Config $config * @return mixed */ public function remove($config) { return $this->cache->remove($config); } /** * @param HTMLPurifier_Config $config * @return mixed */ public function flush($config) { return $this->cache->flush($config); } /** * @param HTMLPurifier_Config $config * @return mixed */ public function cleanup($config) { return $this->cache->cleanup($config); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCache/Null.php0000644000175000017500000000251013512700112022322 0ustar ezyangezyangcheckDefType($def)) { return; } $file = $this->generateFilePath($config); if (file_exists($file)) { return false; } if (!$this->_prepareDir($config)) { return false; } return $this->_write($file, serialize($def), $config); } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return int|bool */ public function set($def, $config) { if (!$this->checkDefType($def)) { return; } $file = $this->generateFilePath($config); if (!$this->_prepareDir($config)) { return false; } return $this->_write($file, serialize($def), $config); } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return int|bool */ public function replace($def, $config) { if (!$this->checkDefType($def)) { return; } $file = $this->generateFilePath($config); if (!file_exists($file)) { return false; } if (!$this->_prepareDir($config)) { return false; } return $this->_write($file, serialize($def), $config); } /** * @param HTMLPurifier_Config $config * @return bool|HTMLPurifier_Config */ public function get($config) { $file = $this->generateFilePath($config); if (!file_exists($file)) { return false; } return unserialize(file_get_contents($file)); } /** * @param HTMLPurifier_Config $config * @return bool */ public function remove($config) { $file = $this->generateFilePath($config); if (!file_exists($file)) { return false; } return unlink($file); } /** * @param HTMLPurifier_Config $config * @return bool */ public function flush($config) { if (!$this->_prepareDir($config)) { return false; } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); // Apparently, on some versions of PHP, readdir will return // an empty string if you pass an invalid argument to readdir. // So you need this test. See #49. if (false === $dh) { return false; } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; } if ($filename[0] === '.') { continue; } unlink($dir . '/' . $filename); } closedir($dh); return true; } /** * @param HTMLPurifier_Config $config * @return bool */ public function cleanup($config) { if (!$this->_prepareDir($config)) { return false; } $dir = $this->generateDirectoryPath($config); $dh = opendir($dir); // See #49 (and above). if (false === $dh) { return false; } while (false !== ($filename = readdir($dh))) { if (empty($filename)) { continue; } if ($filename[0] === '.') { continue; } $key = substr($filename, 0, strlen($filename) - 4); if ($this->isOld($key, $config)) { unlink($dir . '/' . $filename); } } closedir($dh); return true; } /** * Generates the file path to the serial file corresponding to * the configuration and definition name * @param HTMLPurifier_Config $config * @return string * @todo Make protected */ public function generateFilePath($config) { $key = $this->generateKey($config); return $this->generateDirectoryPath($config) . '/' . $key . '.ser'; } /** * Generates the path to the directory contain this cache's serial files * @param HTMLPurifier_Config $config * @return string * @note No trailing slash * @todo Make protected */ public function generateDirectoryPath($config) { $base = $this->generateBaseDirectoryPath($config); return $base . '/' . $this->type; } /** * Generates path to base directory that contains all definition type * serials * @param HTMLPurifier_Config $config * @return mixed|string * @todo Make protected */ public function generateBaseDirectoryPath($config) { $base = $config->get('Cache.SerializerPath'); $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; return $base; } /** * Convenience wrapper function for file_put_contents * @param string $file File name to write to * @param string $data Data to write into file * @param HTMLPurifier_Config $config * @return int|bool Number of bytes written if success, or false if failure. */ private function _write($file, $data, $config) { $result = file_put_contents($file, $data); if ($result !== false) { // set permissions of the new file (no execute) $chmod = $config->get('Cache.SerializerPermissions'); if ($chmod !== null) { chmod($file, $chmod & 0666); } } return $result; } /** * Prepares the directory that this type stores the serials in * @param HTMLPurifier_Config $config * @return bool True if successful */ private function _prepareDir($config) { $directory = $this->generateDirectoryPath($config); $chmod = $config->get('Cache.SerializerPermissions'); if ($chmod === null) { if (!@mkdir($directory) && !is_dir($directory)) { trigger_error( 'Could not create directory ' . $directory . '', E_USER_WARNING ); return false; } return true; } if (!is_dir($directory)) { $base = $this->generateBaseDirectoryPath($config); if (!is_dir($base)) { trigger_error( 'Base directory ' . $base . ' does not exist, please create or change using %Cache.SerializerPath', E_USER_WARNING ); return false; } elseif (!$this->_testPermissions($base, $chmod)) { return false; } if (!@mkdir($directory, $chmod) && !is_dir($directory)) { trigger_error( 'Could not create directory ' . $directory . '', E_USER_WARNING ); return false; } if (!$this->_testPermissions($directory, $chmod)) { return false; } } elseif (!$this->_testPermissions($directory, $chmod)) { return false; } return true; } /** * Tests permissions on a directory and throws out friendly * error messages and attempts to chmod it itself if possible * @param string $dir Directory path * @param int $chmod Permissions * @return bool True if directory is writable */ private function _testPermissions($dir, $chmod) { // early abort, if it is writable, everything is hunky-dory if (is_writable($dir)) { return true; } if (!is_dir($dir)) { // generally, you'll want to handle this beforehand // so a more specific error message can be given trigger_error( 'Directory ' . $dir . ' does not exist', E_USER_WARNING ); return false; } if (function_exists('posix_getuid') && $chmod !== null) { // POSIX system, we can give more specific advice if (fileowner($dir) === posix_getuid()) { // we can chmod it ourselves $chmod = $chmod | 0700; if (chmod($dir, $chmod)) { return true; } } elseif (filegroup($dir) === posix_getgid()) { $chmod = $chmod | 0070; } else { // PHP's probably running as nobody, so we'll // need to give global permissions $chmod = $chmod | 0777; } trigger_error( 'Directory ' . $dir . ' not writable, ' . 'please chmod to ' . decoct($chmod), E_USER_WARNING ); } else { // generic error message trigger_error( 'Directory ' . $dir . ' not writable, ' . 'please alter file permissions', E_USER_WARNING ); } return false; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php0000644000175000017500000000324313512700112024725 0ustar ezyangezyangdefinitions[$this->generateKey($config)] = $def; } return $status; } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return mixed */ public function set($def, $config) { $status = parent::set($def, $config); if ($status) { $this->definitions[$this->generateKey($config)] = $def; } return $status; } /** * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config * @return mixed */ public function replace($def, $config) { $status = parent::replace($def, $config); if ($status) { $this->definitions[$this->generateKey($config)] = $def; } return $status; } /** * @param HTMLPurifier_Config $config * @return mixed */ public function get($config) { $key = $this->generateKey($config); if (isset($this->definitions[$key])) { return $this->definitions[$key]; } $this->definitions[$key] = parent::get($config); return $this->definitions[$key]; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/DefinitionCache/Decorator/Template.php.in0000644000175000017500000000321113512700112025511 0ustar ezyangezyang";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:4:"sup2";s:2:"²";s:4:"sup3";s:2:"³";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"sup1";s:2:"¹";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"frac14";s:2:"¼";s:6:"frac12";s:2:"½";s:6:"frac34";s:2:"¾";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";}HTMLPurifier-4.11.0/HTMLPurifier/Filter/ExtractStyleBlocks.php0000644000175000017500000003244413512700112023423 0ustar ezyangezyang blocks from input HTML, cleans them up * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') * so they can be used elsewhere in the document. * * @note * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for * sample usage. * * @note * This filter can also be used on stylesheets not included in the * document--something purists would probably prefer. Just directly * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS() */ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter { /** * @type string */ public $name = 'ExtractStyleBlocks'; /** * @type array */ private $_styleMatches = array(); /** * @type csstidy */ private $_tidy; /** * @type HTMLPurifier_AttrDef_HTML_ID */ private $_id_attrdef; /** * @type HTMLPurifier_AttrDef_CSS_Ident */ private $_class_attrdef; /** * @type HTMLPurifier_AttrDef_Enum */ private $_enum_attrdef; public function __construct() { $this->_tidy = new csstidy(); $this->_tidy->set_cfg('lowercase_s', false); $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum( array( 'first-child', 'link', 'visited', 'active', 'hover', 'focus' ) ); } /** * Save the contents of CSS blocks to style matches * @param array $matches preg_replace style $matches array */ protected function styleCallback($matches) { $this->_styleMatches[] = $matches[1]; } /** * Removes inline // we must not grab foo in a font-family prop). if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { $css = str_replace( array('<', '>', '&'), array('\3C ', '\3E ', '\26 '), $css ); } return $css; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Filter/YouTube.php0000644000175000017500000000345213512700112021223 0ustar ezyangezyang]+>.+?' . '(?:http:)?//www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?#s'; $pre_replace = '\1'; return preg_replace($pre_regex, $pre_replace, $html); } /** * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function postFilter($html, $config, $context) { $post_regex = '#((?:v|cp)/[A-Za-z0-9\-_=]+)#'; return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); } /** * @param $url * @return string */ protected function armorUrl($url) { return str_replace('--', '--', $url); } /** * @param array $matches * @return string */ protected function postFilterCallback($matches) { $url = $this->armorUrl($matches[1]); return '' . '' . '' . ''; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Bdo.php0000644000175000017500000000200413512700112021010 0ustar ezyangezyang array('dir' => false) ); /** * @param HTMLPurifier_Config $config */ public function setup($config) { $bdo = $this->addElement( 'bdo', 'Inline', 'Inline', array('Core', 'Lang'), array( 'dir' => 'Enum#ltr,rtl', // required // The Abstract Module specification has the attribute // inclusions wrong for bdo: bdo allows Lang ) ); $bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir(); $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl'; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/CommonAttributes.php0000644000175000017500000000123613512700112023611 0ustar ezyangezyang array( 0 => array('Style'), // 'xml:space' => false, 'class' => 'Class', 'id' => 'ID', 'title' => 'CDATA', ), 'Lang' => array(), 'I18N' => array( 0 => array('Lang'), // proprietary, for xml:lang/lang ), 'Common' => array( 0 => array('Core', 'I18N') ) ); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Edit.php0000644000175000017500000000263213512700112021200 0ustar ezyangezyang 'URI', // 'datetime' => 'Datetime', // not implemented ); $this->addElement('del', 'Inline', $contents, 'Common', $attr); $this->addElement('ins', 'Inline', $contents, 'Common', $attr); } // HTML 4.01 specifies that ins/del must not contain block // elements when used in an inline context, chameleon is // a complicated workaround to acheive this effect // Inline context ! Block context (exclamation mark is // separator, see getChildDef for parsing) /** * @type bool */ public $defines_child_def = true; /** * @param HTMLPurifier_ElementDef $def * @return HTMLPurifier_ChildDef_Chameleon */ public function getChildDef($def) { if ($def->content_model_type != 'chameleon') { return false; } $value = explode('!', $def->content_model); return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Forms.php0000644000175000017500000001316713512700112021406 0ustar ezyangezyang 'Form', 'Inline' => 'Formctrl', ); /** * @param HTMLPurifier_Config $config */ public function setup($config) { $form = $this->addElement( 'form', 'Form', 'Required: Heading | List | Block | fieldset', 'Common', array( 'accept' => 'ContentTypes', 'accept-charset' => 'Charsets', 'action*' => 'URI', 'method' => 'Enum#get,post', // really ContentType, but these two are the only ones used today 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data', ) ); $form->excludes = array('form' => true); $input = $this->addElement( 'input', 'Formctrl', 'Empty', 'Common', array( 'accept' => 'ContentTypes', 'accesskey' => 'Character', 'alt' => 'Text', 'checked' => 'Bool#checked', 'disabled' => 'Bool#disabled', 'maxlength' => 'Number', 'name' => 'CDATA', 'readonly' => 'Bool#readonly', 'size' => 'Number', 'src' => 'URI#embedded', 'tabindex' => 'Number', 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', 'value' => 'CDATA', ) ); $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input(); $this->addElement( 'select', 'Formctrl', 'Required: optgroup | option', 'Common', array( 'disabled' => 'Bool#disabled', 'multiple' => 'Bool#multiple', 'name' => 'CDATA', 'size' => 'Number', 'tabindex' => 'Number', ) ); $this->addElement( 'option', false, 'Optional: #PCDATA', 'Common', array( 'disabled' => 'Bool#disabled', 'label' => 'Text', 'selected' => 'Bool#selected', 'value' => 'CDATA', ) ); // It's illegal for there to be more than one selected, but not // be multiple. Also, no selected means undefined behavior. This might // be difficult to implement; perhaps an injector, or a context variable. $textarea = $this->addElement( 'textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array( 'accesskey' => 'Character', 'cols*' => 'Number', 'disabled' => 'Bool#disabled', 'name' => 'CDATA', 'readonly' => 'Bool#readonly', 'rows*' => 'Number', 'tabindex' => 'Number', ) ); $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea(); $button = $this->addElement( 'button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array( 'accesskey' => 'Character', 'disabled' => 'Bool#disabled', 'name' => 'CDATA', 'tabindex' => 'Number', 'type' => 'Enum#button,submit,reset', 'value' => 'CDATA', ) ); // For exclusions, ideally we'd specify content sets, not literal elements $button->excludes = $this->makeLookup( 'form', 'fieldset', // Form 'input', 'select', 'textarea', 'label', 'button', // Formctrl 'a', // as per HTML 4.01 spec, this is omitted by modularization 'isindex', 'iframe' // legacy items ); // Extra exclusion: img usemap="" is not permitted within this element. // We'll omit this for now, since we don't have any good way of // indicating it yet. // This is HIGHLY user-unfriendly; we need a custom child-def for this $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common'); $label = $this->addElement( 'label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array( 'accesskey' => 'Character', // 'for' => 'IDREF', // IDREF not implemented, cannot allow ) ); $label->excludes = array('label' => true); $this->addElement( 'legend', false, 'Optional: #PCDATA | Inline', 'Common', array( 'accesskey' => 'Character', ) ); $this->addElement( 'optgroup', false, 'Required: option', 'Common', array( 'disabled' => 'Bool#disabled', 'label*' => 'Text', ) ); // Don't forget an injector for . This one's a little complex // because it maps to multiple elements. } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Hypertext.php0000644000175000017500000000174413512700112022312 0ustar ezyangezyangaddElement( 'a', 'Inline', 'Inline', 'Common', array( // 'accesskey' => 'Character', // 'charset' => 'Charset', 'href' => 'URI', // 'hreflang' => 'LanguageCode', 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), // 'tabindex' => 'Number', // 'type' => 'ContentType', ) ); $a->formatting = true; $a->excludes = array('a' => true); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Iframe.php0000644000175000017500000000223113512700112021511 0ustar ezyangezyangget('HTML.SafeIframe')) { $this->safe = true; } $this->addElement( 'iframe', 'Inline', 'Flow', 'Common', array( 'src' => 'URI#embedded', 'width' => 'Length', 'height' => 'Length', 'name' => 'ID', 'scrolling' => 'Enum#yes,no,auto', 'frameborder' => 'Enum#0,1', 'longdesc' => 'URI', 'marginheight' => 'Pixels', 'marginwidth' => 'Pixels', ) ); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Image.php0000644000175000017500000000255413512700112021340 0ustar ezyangezyangget('HTML.MaxImgLength'); $img = $this->addElement( 'img', 'Inline', 'Empty', 'Common', array( 'alt*' => 'Text', // According to the spec, it's Length, but percents can // be abused, so we allow only Pixels. 'height' => 'Pixels#' . $max, 'width' => 'Pixels#' . $max, 'longdesc' => 'URI', 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded ) ); if ($max === null || $config->get('HTML.Trusted')) { $img->attr['height'] = $img->attr['width'] = 'Length'; } // kind of strange, but splitting things up would be inefficient $img->attr_transform_pre[] = $img->attr_transform_post[] = new HTMLPurifier_AttrTransform_ImgRequired(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Legacy.php0000644000175000017500000001333713512700112021523 0ustar ezyangezyangaddElement( 'basefont', 'Inline', 'Empty', null, array( 'color' => 'Color', 'face' => 'Text', // extremely broad, we should 'size' => 'Text', // tighten it 'id' => 'ID' ) ); $this->addElement('center', 'Block', 'Flow', 'Common'); $this->addElement( 'dir', 'Block', 'Required: li', 'Common', array( 'compact' => 'Bool#compact' ) ); $this->addElement( 'font', 'Inline', 'Inline', array('Core', 'I18N'), array( 'color' => 'Color', 'face' => 'Text', // extremely broad, we should 'size' => 'Text', // tighten it ) ); $this->addElement( 'menu', 'Block', 'Required: li', 'Common', array( 'compact' => 'Bool#compact' ) ); $s = $this->addElement('s', 'Inline', 'Inline', 'Common'); $s->formatting = true; $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common'); $strike->formatting = true; $u = $this->addElement('u', 'Inline', 'Inline', 'Common'); $u->formatting = true; // setup modifications to old elements $align = 'Enum#left,right,center,justify'; $address = $this->addBlankElement('address'); $address->content_model = 'Inline | #PCDATA | p'; $address->content_model_type = 'optional'; $address->child = false; $blockquote = $this->addBlankElement('blockquote'); $blockquote->content_model = 'Flow | #PCDATA'; $blockquote->content_model_type = 'optional'; $blockquote->child = false; $br = $this->addBlankElement('br'); $br->attr['clear'] = 'Enum#left,all,right,none'; $caption = $this->addBlankElement('caption'); $caption->attr['align'] = 'Enum#top,bottom,left,right'; $div = $this->addBlankElement('div'); $div->attr['align'] = $align; $dl = $this->addBlankElement('dl'); $dl->attr['compact'] = 'Bool#compact'; for ($i = 1; $i <= 6; $i++) { $h = $this->addBlankElement("h$i"); $h->attr['align'] = $align; } $hr = $this->addBlankElement('hr'); $hr->attr['align'] = $align; $hr->attr['noshade'] = 'Bool#noshade'; $hr->attr['size'] = 'Pixels'; $hr->attr['width'] = 'Length'; $img = $this->addBlankElement('img'); $img->attr['align'] = 'IAlign'; $img->attr['border'] = 'Pixels'; $img->attr['hspace'] = 'Pixels'; $img->attr['vspace'] = 'Pixels'; // figure out this integer business $li = $this->addBlankElement('li'); $li->attr['value'] = new HTMLPurifier_AttrDef_Integer(); $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle'; $ol = $this->addBlankElement('ol'); $ol->attr['compact'] = 'Bool#compact'; $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); $ol->attr['type'] = 'Enum#s:1,i,I,a,A'; $p = $this->addBlankElement('p'); $p->attr['align'] = $align; $pre = $this->addBlankElement('pre'); $pre->attr['width'] = 'Number'; // script omitted $table = $this->addBlankElement('table'); $table->attr['align'] = 'Enum#left,center,right'; $table->attr['bgcolor'] = 'Color'; $tr = $this->addBlankElement('tr'); $tr->attr['bgcolor'] = 'Color'; $th = $this->addBlankElement('th'); $th->attr['bgcolor'] = 'Color'; $th->attr['height'] = 'Length'; $th->attr['nowrap'] = 'Bool#nowrap'; $th->attr['width'] = 'Length'; $td = $this->addBlankElement('td'); $td->attr['bgcolor'] = 'Color'; $td->attr['height'] = 'Length'; $td->attr['nowrap'] = 'Bool#nowrap'; $td->attr['width'] = 'Length'; $ul = $this->addBlankElement('ul'); $ul->attr['compact'] = 'Bool#compact'; $ul->attr['type'] = 'Enum#square,disc,circle'; // "safe" modifications to "unsafe" elements // WARNING: If you want to add support for an unsafe, legacy // attribute, make a new TrustedLegacy module with the trusted // bit set appropriately $form = $this->addBlankElement('form'); $form->content_model = 'Flow | #PCDATA'; $form->content_model_type = 'optional'; $form->attr['target'] = 'FrameTarget'; $input = $this->addBlankElement('input'); $input->attr['align'] = 'IAlign'; $legend = $this->addBlankElement('legend'); $legend->attr['align'] = 'LAlign'; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/List.php0000644000175000017500000000356513512700112021234 0ustar ezyangezyang 'List'); /** * @param HTMLPurifier_Config $config */ public function setup($config) { $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); // XXX The wrap attribute is handled by MakeWellFormed. This is all // quite unsatisfactory, because we generated this // *specifically* for lists, and now a big chunk of the handling // is done properly by the List ChildDef. So actually, we just // want enough information to make autoclosing work properly, // and then hand off the tricky stuff to the ChildDef. $ol->wrap = 'li'; $ul->wrap = 'li'; $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('li', false, 'Flow', 'Common'); $this->addElement('dd', false, 'Flow', 'Common'); $this->addElement('dt', false, 'Inline', 'Common'); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Name.php0000644000175000017500000000123513512700112021171 0ustar ezyangezyangaddBlankElement($name); $element->attr['name'] = 'CDATA'; if (!$config->get('HTML.Attr.Name.UseCDATA')) { $element->attr_transform_post[] = new HTMLPurifier_AttrTransform_NameSync(); } } } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Nofollow.php0000644000175000017500000000077313512700112022116 0ustar ezyangezyangaddBlankElement('a'); $a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php0000644000175000017500000000054213512700112024644 0ustar ezyangezyang array( 'lang' => 'LanguageCode', ) ); } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Object.php0000644000175000017500000000276313512700112021526 0ustar ezyangezyang to cater to legacy browsers: this * module does not allow this sort of behavior */ class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule { /** * @type string */ public $name = 'Object'; /** * @type bool */ public $safe = false; /** * @param HTMLPurifier_Config $config */ public function setup($config) { $this->addElement( 'object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', array( 'archive' => 'URI', 'classid' => 'URI', 'codebase' => 'URI', 'codetype' => 'Text', 'data' => 'URI', 'declare' => 'Bool#declare', 'height' => 'Length', 'name' => 'CDATA', 'standby' => 'Text', 'tabindex' => 'Number', 'type' => 'ContentType', 'width' => 'Length' ) ); $this->addElement( 'param', false, 'Empty', null, array( 'id' => 'ID', 'name*' => 'Text', 'type' => 'Text', 'value' => 'Text', 'valuetype' => 'Enum#data,ref,object' ) ); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Presentation.php0000644000175000017500000000260713512700112022770 0ustar ezyangezyangaddElement('hr', 'Block', 'Empty', 'Common'); $this->addElement('sub', 'Inline', 'Inline', 'Common'); $this->addElement('sup', 'Inline', 'Inline', 'Common'); $b = $this->addElement('b', 'Inline', 'Inline', 'Common'); $b->formatting = true; $big = $this->addElement('big', 'Inline', 'Inline', 'Common'); $big->formatting = true; $i = $this->addElement('i', 'Inline', 'Inline', 'Common'); $i->formatting = true; $small = $this->addElement('small', 'Inline', 'Inline', 'Common'); $small->formatting = true; $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common'); $tt->formatting = true; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Proprietary.php0000644000175000017500000000174313512700112022635 0ustar ezyangezyangaddElement( 'marquee', 'Inline', 'Flow', 'Common', array( 'direction' => 'Enum#left,right,up,down', 'behavior' => 'Enum#alternate', 'width' => 'Length', 'height' => 'Length', 'scrolldelay' => 'Number', 'scrollamount' => 'Number', 'loop' => 'Number', 'bgcolor' => 'Color', 'hspace' => 'Pixels', 'vspace' => 'Pixels', ) ); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/Ruby.php0000644000175000017500000000203613512700112021232 0ustar ezyangezyangaddElement( 'ruby', 'Inline', 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))', 'Common' ); $this->addElement('rbc', false, 'Required: rb', 'Common'); $this->addElement('rtc', false, 'Required: rt', 'Common'); $rb = $this->addElement('rb', false, 'Inline', 'Common'); $rb->excludes = array('ruby' => true); $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number')); $rt->excludes = array('ruby' => true); $this->addElement('rp', false, 'Optional: #PCDATA', 'Common'); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/SafeEmbed.php0000644000175000017500000000211113512700112022116 0ustar ezyangezyangget('HTML.MaxImgLength'); $embed = $this->addElement( 'embed', 'Inline', 'Empty', 'Common', array( 'src*' => 'URI#embedded', 'type' => 'Enum#application/x-shockwave-flash', 'width' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max, 'allowscriptaccess' => 'Enum#never', 'allownetworking' => 'Enum#internal', 'flashvars' => 'Text', 'wmode' => 'Enum#window,transparent,opaque', 'name' => 'ID', ) ); $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed(); } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/SafeObject.php0000644000175000017500000000363313512700112022322 0ustar ezyangezyangget('HTML.MaxImgLength'); $object = $this->addElement( 'object', 'Inline', 'Optional: param | Flow | #PCDATA', 'Common', array( // While technically not required by the spec, we're forcing // it to this value. 'type' => 'Enum#application/x-shockwave-flash', 'width' => 'Pixels#' . $max, 'height' => 'Pixels#' . $max, 'data' => 'URI#embedded', 'codebase' => new HTMLPurifier_AttrDef_Enum( array( 'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0' ) ), ) ); $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); $param = $this->addElement( 'param', false, 'Empty', false, array( 'id' => 'ID', 'name*' => 'Text', 'value' => 'Text' ) ); $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam(); $this->info_injector[] = 'SafeObject'; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/HTMLModule/SafeScripting.php0000644000175000017500000000235713512700112023060 0ustar ezyangezyangget('HTML.SafeScripting'); $script = $this->addElement( 'script', 'Inline', 'Optional:', // Not `Empty` to not allow to autoclose the )#si', array($this, 'scriptCallback'), $html ); } $html = $this->normalize($html, $config, $context); $cursor = 0; // our location in the text $inside_tag = false; // whether or not we're parsing the inside of a tag $array = array(); // result array // This is also treated to mean maintain *column* numbers too $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); if ($maintain_line_numbers === null) { // automatically determine line numbering by checking // if error collection is on $maintain_line_numbers = $config->get('Core.CollectErrors'); } if ($maintain_line_numbers) { $current_line = 1; $current_col = 0; $length = strlen($html); } else { $current_line = false; $current_col = false; $length = false; } $context->register('CurrentLine', $current_line); $context->register('CurrentCol', $current_col); $nl = "\n"; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); $e = false; if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } // for testing synchronization $loops = 0; while (++$loops) { // $cursor is either at the start of a token, or inside of // a tag (i.e. there was a < immediately before it), as indicated // by $inside_tag if ($maintain_line_numbers) { // $rcursor, however, is always at the start of a token. $rcursor = $cursor - (int)$inside_tag; // Column number is cheap, so we calculate it every round. // We're interested at the *end* of the newline string, so // we need to add strlen($nl) == 1 to $nl_pos before subtracting it // from our "rcursor" position. $nl_pos = strrpos($html, $nl, $rcursor - $length); $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); // recalculate lines if ($synchronize_interval && // synchronization is on $cursor > 0 && // cursor is further than zero $loops % $synchronize_interval === 0) { // time to synchronize! $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); } } $position_next_lt = strpos($html, '<', $cursor); $position_next_gt = strpos($html, '>', $cursor); // triggers on "asdf" but not "asdf " // special case to set up context if ($position_next_lt === $cursor) { $inside_tag = true; $cursor++; } if (!$inside_tag && $position_next_lt !== false) { // We are not inside tag and there still is another tag to parse $token = new HTMLPurifier_Token_Text( $this->parseText( substr( $html, $cursor, $position_next_lt - $cursor ), $config ) ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); } $array[] = $token; $cursor = $position_next_lt + 1; $inside_tag = true; continue; } elseif (!$inside_tag) { // We are not inside tag but there are no more tags // If we're already at the end, break if ($cursor === strlen($html)) { break; } // Create Text of rest of string $token = new HTMLPurifier_Token_Text( $this->parseText( substr( $html, $cursor ), $config ) ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); } $array[] = $token; break; } elseif ($inside_tag && $position_next_gt !== false) { // We are in tag and it is well formed // Grab the internals of the tag $strlen_segment = $position_next_gt - $cursor; if ($strlen_segment < 1) { // there's nothing to process! $token = new HTMLPurifier_Token_Text('<'); $cursor++; continue; } $segment = substr($html, $cursor, $strlen_segment); if ($segment === false) { // somehow, we attempted to access beyond the end of // the string, defense-in-depth, reported by Nate Abele break; } // Check if it's a comment if (substr($segment, 0, 3) === '!--') { // re-determine segment length, looking for --> $position_comment_end = strpos($html, '-->', $cursor); if ($position_comment_end === false) { // uh oh, we have a comment that extends to // infinity. Can't be helped: set comment // end position to end of string if ($e) { $e->send(E_WARNING, 'Lexer: Unclosed comment'); } $position_comment_end = strlen($html); $end = true; } else { $end = false; } $strlen_segment = $position_comment_end - $cursor; $segment = substr($html, $cursor, $strlen_segment); $token = new HTMLPurifier_Token_Comment( substr( $segment, 3, $strlen_segment - 3 ) ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); } $array[] = $token; $cursor = $end ? $position_comment_end : $position_comment_end + 3; $inside_tag = false; continue; } // Check if it's an end tag $is_end_tag = (strpos($segment, '/') === 0); if ($is_end_tag) { $type = substr($segment, 1); $token = new HTMLPurifier_Token_End($type); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; $cursor = $position_next_gt + 1; continue; } // Check leading character is alnum, if not, we may // have accidently grabbed an emoticon. Translate into // text and go our merry way if (!ctype_alpha($segment[0])) { // XML: $segment[0] !== '_' && $segment[0] !== ':' if ($e) { $e->send(E_NOTICE, 'Lexer: Unescaped lt'); } $token = new HTMLPurifier_Token_Text('<'); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; continue; } // Check if it is explicitly self closing, if so, remove // trailing slash. Remember, we could have a tag like
, so // any later token processing scripts must convert improperly // classified EmptyTags from StartTags. $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1); if ($is_self_closing) { $strlen_segment--; $segment = substr($segment, 0, $strlen_segment); } // Check if there are any attributes $position_first_space = strcspn($segment, $this->_whitespace); if ($position_first_space >= $strlen_segment) { if ($is_self_closing) { $token = new HTMLPurifier_Token_Empty($segment); } else { $token = new HTMLPurifier_Token_Start($segment); } if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; $cursor = $position_next_gt + 1; continue; } // Grab out all the data $type = substr($segment, 0, $position_first_space); $attribute_string = trim( substr( $segment, $position_first_space ) ); if ($attribute_string) { $attr = $this->parseAttributeString( $attribute_string, $config, $context ); } else { $attr = array(); } if ($is_self_closing) { $token = new HTMLPurifier_Token_Empty($type, $attr); } else { $token = new HTMLPurifier_Token_Start($type, $attr); } if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $cursor = $position_next_gt + 1; $inside_tag = false; continue; } else { // inside tag, but there's no ending > sign if ($e) { $e->send(E_WARNING, 'Lexer: Missing gt'); } $token = new HTMLPurifier_Token_Text( '<' . $this->parseText( substr($html, $cursor), $config ) ); if ($maintain_line_numbers) { $token->rawPosition($current_line, $current_col); } // no cursor scroll? Hmm... $array[] = $token; break; } break; } $context->destroy('CurrentLine'); $context->destroy('CurrentCol'); return $array; } /** * PHP 5.0.x compatible substr_count that implements offset and length * @param string $haystack * @param string $needle * @param int $offset * @param int $length * @return int */ protected function substrCount($haystack, $needle, $offset, $length) { static $oldVersion; if ($oldVersion === null) { $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); } if ($oldVersion) { $haystack = substr($haystack, $offset, $length); return substr_count($haystack, $needle); } else { return substr_count($haystack, $needle, $offset, $length); } } /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * * @param string $string Inside of tag excluding name. * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array Assoc array of attributes. */ public function parseAttributeString($string, $config, $context) { $string = (string)$string; // quick typecast if ($string == '') { return array(); } // no attributes $e = false; if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } // let's see if we can abort as quickly as possible // one equal sign, no spaces => one attribute $num_equal = substr_count($string, '='); $has_space = strpos($string, ' '); if ($num_equal === 0 && !$has_space) { // bool attribute return array($string => $string); } elseif ($num_equal === 1 && !$has_space) { // only one attribute list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); if (!$key) { if ($e) { $e->send(E_ERROR, 'Lexer: Missing attribute key'); } return array(); } if (!$quoted_value) { return array($key => ''); } $first_char = @$quoted_value[0]; $last_char = @$quoted_value[strlen($quoted_value) - 1]; $same_quote = ($first_char == $last_char); $open_quote = ($first_char == '"' || $first_char == "'"); if ($same_quote && $open_quote) { // well behaved $value = substr($quoted_value, 1, strlen($quoted_value) - 2); } else { // not well behaved if ($open_quote) { if ($e) { $e->send(E_ERROR, 'Lexer: Missing end quote'); } $value = substr($quoted_value, 1); } else { $value = $quoted_value; } } if ($value === false) { $value = ''; } return array($key => $this->parseAttr($value, $config)); } // setup loop environment $array = array(); // return assoc array of attributes $cursor = 0; // current position in string (moves forward) $size = strlen($string); // size of the string (stays the same) // if we have unquoted attributes, the parser expects a terminating // space, so let's guarantee that there's always a terminating space. $string .= ' '; $old_cursor = -1; while ($cursor < $size) { if ($old_cursor >= $cursor) { throw new Exception("Infinite loop detected"); } $old_cursor = $cursor; $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); // grab the key $key_begin = $cursor; //we're currently at the start of the key // scroll past all characters that are the key (not whitespace or =) $cursor += strcspn($string, $this->_whitespace . '=', $cursor); $key_end = $cursor; // now at the end of the key $key = substr($string, $key_begin, $key_end - $key_begin); if (!$key) { if ($e) { $e->send(E_ERROR, 'Lexer: Missing attribute key'); } $cursor += 1 + strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop continue; // empty key } // scroll past all whitespace $cursor += strspn($string, $this->_whitespace, $cursor); if ($cursor >= $size) { $array[$key] = $key; break; } // if the next character is an equal sign, we've got a regular // pair, otherwise, it's a bool attribute $first_char = @$string[$cursor]; if ($first_char == '=') { // key="value" $cursor++; $cursor += strspn($string, $this->_whitespace, $cursor); if ($cursor === false) { $array[$key] = ''; break; } // we might be in front of a quote right now $char = @$string[$cursor]; if ($char == '"' || $char == "'") { // it's quoted, end bound is $char $cursor++; $value_begin = $cursor; $cursor = strpos($string, $char, $cursor); $value_end = $cursor; } else { // it's not quoted, end bound is whitespace $value_begin = $cursor; $cursor += strcspn($string, $this->_whitespace, $cursor); $value_end = $cursor; } // we reached a premature end if ($cursor === false) { $cursor = $size; $value_end = $cursor; } $value = substr($string, $value_begin, $value_end - $value_begin); if ($value === false) { $value = ''; } $array[$key] = $this->parseAttr($value, $config); $cursor++; } else { // boolattr if ($key !== '') { $array[$key] = $key; } else { // purely theoretical if ($e) { $e->send(E_ERROR, 'Lexer: Missing attribute key'); } } } } return $array; } } // vim: et sw=4 sts=4 HTMLPurifier-4.11.0/HTMLPurifier/Lexer/DOMLex.php0000644000175000017500000003001713512700112020546 0ustar ezyangezyangfactory = new HTMLPurifier_TokenFactory(); } /** * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] */ public function tokenizeHTML($html, $config, $context) { $html = $this->normalize($html, $config, $context); // attempt to armor stray angled brackets that cannot possibly // form tags and thus are probably being used as emoticons if ($config->get('Core.AggressivelyFixLt')) { $char = '[^a-z!\/]'; $comment = "/|\z)/is"; $html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html); do { $old = $html; $html = preg_replace("/<($char)/i", '<\\1', $html); } while ($html !== $old); $html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments } // preprocess html, essential for UTF-8 $html = $this->wrapHTML($html, $config, $context); $doc = new DOMDocument(); $doc->encoding = 'UTF-8'; // theoretically, the above has this covered $options = 0; if ($config->get('Core.AllowParseManyTags') && defined('LIBXML_PARSEHUGE')) { $options |= LIBXML_PARSEHUGE; } set_error_handler(array($this, 'muteErrorHandler')); $doc->loadHTML($html, $options); restore_error_handler(); $body = $doc->getElementsByTagName('html')->item(0)-> // getElementsByTagName('body')->item(0); // $div = $body->getElementsByTagName('div')->item(0); //
$tokens = array(); $this->tokenizeDOM($div, $tokens, $config); // If the div has a sibling, that means we tripped across // a premature
tag. So remove the div we parsed, // and then tokenize the rest of body. We can't tokenize // the sibling directly as we'll lose the tags in that case. if ($div->nextSibling) { $body->removeChild($div); $this->tokenizeDOM($body, $tokens, $config); } return $tokens; } /** * Iterative function that tokenizes a node, putting it into an accumulator. * To iterate is human, to recurse divine - L. Peter Deutsch * @param DOMNode $node DOMNode to be tokenized. * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. * @return HTMLPurifier_Token of node appended to previously passed tokens. */ protected function tokenizeDOM($node, &$tokens, $config) { $level = 0; $nodes = array($level => new HTMLPurifier_Queue(array($node))); $closingNodes = array(); do { while (!$nodes[$level]->isEmpty()) { $node = $nodes[$level]->shift(); // FIFO $collect = $level > 0 ? true : false; $needEndingTag = $this->createStartNode($node, $tokens, $collect, $config); if ($needEndingTag) { $closingNodes[$level][] = $node; } if ($node->childNodes && $node->childNodes->length) { $level++; $nodes[$level] = new HTMLPurifier_Queue(); foreach ($node->childNodes as $childNode) { $nodes[$level]->push($childNode); } } } $level--; if ($level && isset($closingNodes[$level])) { while ($node = array_pop($closingNodes[$level])) { $this->createEndNode($node, $tokens); } } } while ($level > 0); } /** * Portably retrieve the tag name of a node; deals with older versions * of libxml like 2.7.6 * @param DOMNode $node */ protected function getTagName($node) { if (isset($node->tagName)) { return $node->tagName; } else if (isset($node->nodeName)) { return $node->nodeName; } else if (isset($node->localName)) { return $node->localName; } return null; } /** * Portably retrieve the data of a node; deals with older versions * of libxml like 2.7.6 * @param DOMNode $node */ protected function getData($node) { if (isset($node->data)) { return $node->data; } else if (isset($node->nodeValue)) { return $node->nodeValue; } else if (isset($node->textContent)) { return $node->textContent; } return null; } /** * @param DOMNode $node DOMNode to be tokenized. * @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens. * @param bool $collect Says whether or start and close are collected, set to * false at first recursion because it's the implicit DIV * tag you're dealing with. * @return bool if the token needs an endtoken * @todo data and tagName properties don't seem to exist in DOMNode? */ protected function createStartNode($node, &$tokens, $collect, $config) { // intercept non element nodes. WE MUST catch all of them, // but we're not getting the character reference nodes because // those should have been preprocessed if ($node->nodeType === XML_TEXT_NODE) { $data = $this->getData($node); // Handle variable data property if ($data !== null) { $tokens[] = $this->factory->createText($data); } return false; } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) { // undo libxml's special treatment of