diff options
Diffstat (limited to 'framework/vendors/htmlpurifier/standalone/HTMLPurifier')
140 files changed, 7740 insertions, 0 deletions
diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php new file mode 100644 index 0000000..c05668a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php @@ -0,0 +1,44 @@ +<?php + +/** + * Converts HTMLPurifier_ConfigSchema_Interchange to our runtime + * representation used to perform checks on user configuration. + */ +class HTMLPurifier_ConfigSchema_Builder_ConfigSchema +{ + + public function build($interchange) { + $schema = new HTMLPurifier_ConfigSchema(); + foreach ($interchange->directives as $d) { + $schema->add( + $d->id->key, + $d->default, + $d->type, + $d->typeAllowsNull + ); + if ($d->allowed !== null) { + $schema->addAllowedValues( + $d->id->key, + $d->allowed + ); + } + foreach ($d->aliases as $alias) { + $schema->addAlias( + $alias->key, + $d->id->key + ); + } + if ($d->valueAliases !== null) { + $schema->addValueAliases( + $d->id->key, + $d->valueAliases + ); + } + } + $schema->postProcess(); + return $schema; + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/Xml.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/Xml.php new file mode 100644 index 0000000..244561a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Builder/Xml.php @@ -0,0 +1,106 @@ +<?php + +/** + * Converts HTMLPurifier_ConfigSchema_Interchange to an XML format, + * which can be further processed to generate documentation. + */ +class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter +{ + + protected $interchange; + private $namespace; + + protected function writeHTMLDiv($html) { + $this->startElement('div'); + + $purifier = HTMLPurifier::getInstance(); + $html = $purifier->purify($html); + $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml'); + $this->writeRaw($html); + + $this->endElement(); // div + } + + protected function export($var) { + if ($var === array()) return 'array()'; + return var_export($var, true); + } + + public function build($interchange) { + // global access, only use as last resort + $this->interchange = $interchange; + + $this->setIndent(true); + $this->startDocument('1.0', 'UTF-8'); + $this->startElement('configdoc'); + $this->writeElement('title', $interchange->name); + + foreach ($interchange->directives as $directive) { + $this->buildDirective($directive); + } + + if ($this->namespace) $this->endElement(); // namespace + + $this->endElement(); // configdoc + $this->flush(); + } + + public function buildDirective($directive) { + + // Kludge, although I suppose having a notion of a "root namespace" + // certainly makes things look nicer when documentation is built. + // Depends on things being sorted. + if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) { + if ($this->namespace) $this->endElement(); // namespace + $this->namespace = $directive->id->getRootNamespace(); + $this->startElement('namespace'); + $this->writeAttribute('id', $this->namespace); + $this->writeElement('name', $this->namespace); + } + + $this->startElement('directive'); + $this->writeAttribute('id', $directive->id->toString()); + + $this->writeElement('name', $directive->id->getDirective()); + + $this->startElement('aliases'); + foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString()); + $this->endElement(); // aliases + + $this->startElement('constraints'); + if ($directive->version) $this->writeElement('version', $directive->version); + $this->startElement('type'); + if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes'); + $this->text($directive->type); + $this->endElement(); // type + if ($directive->allowed) { + $this->startElement('allowed'); + foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value); + $this->endElement(); // allowed + } + $this->writeElement('default', $this->export($directive->default)); + $this->writeAttribute('xml:space', 'preserve'); + if ($directive->external) { + $this->startElement('external'); + foreach ($directive->external as $project) $this->writeElement('project', $project); + $this->endElement(); + } + $this->endElement(); // constraints + + if ($directive->deprecatedVersion) { + $this->startElement('deprecated'); + $this->writeElement('version', $directive->deprecatedVersion); + $this->writeElement('use', $directive->deprecatedUse->toString()); + $this->endElement(); // deprecated + } + + $this->startElement('description'); + $this->writeHTMLDiv($directive->description); + $this->endElement(); // description + + $this->endElement(); // directive + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Exception.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Exception.php new file mode 100644 index 0000000..2671516 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Exception.php @@ -0,0 +1,11 @@ +<?php + +/** + * Exceptions related to configuration schema + */ +class HTMLPurifier_ConfigSchema_Exception extends HTMLPurifier_Exception +{ + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange.php new file mode 100644 index 0000000..91a5aa7 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange.php @@ -0,0 +1,42 @@ +<?php + +/** + * Generic schema interchange format that can be converted to a runtime + * representation (HTMLPurifier_ConfigSchema) or HTML documentation. Members + * are completely validated. + */ +class HTMLPurifier_ConfigSchema_Interchange +{ + + /** + * Name of the application this schema is describing. + */ + public $name; + + /** + * Array of Directive ID => array(directive info) + */ + public $directives = array(); + + /** + * Adds a directive array to $directives + */ + public function addDirective($directive) { + if (isset($this->directives[$i = $directive->id->toString()])) { + throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'"); + } + $this->directives[$i] = $directive; + } + + /** + * Convenience function to perform standard validation. Throws exception + * on failed validation. + */ + public function validate() { + $validator = new HTMLPurifier_ConfigSchema_Validator(); + return $validator->validate($this); + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Directive.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Directive.php new file mode 100644 index 0000000..ac8be0d --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Directive.php @@ -0,0 +1,77 @@ +<?php + +/** + * Interchange component class describing configuration directives. + */ +class HTMLPurifier_ConfigSchema_Interchange_Directive +{ + + /** + * ID of directive, instance of HTMLPurifier_ConfigSchema_Interchange_Id. + */ + public $id; + + /** + * String type, e.g. 'integer' or 'istring'. + */ + public $type; + + /** + * Default value, e.g. 3 or 'DefaultVal'. + */ + public $default; + + /** + * HTML description. + */ + public $description; + + /** + * Boolean whether or not null is allowed as a value. + */ + public $typeAllowsNull = false; + + /** + * Lookup table of allowed scalar values, e.g. array('allowed' => true). + * Null if all values are allowed. + */ + public $allowed; + + /** + * List of aliases for the directive, + * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))). + */ + public $aliases = array(); + + /** + * Hash of value aliases, e.g. array('alt' => 'real'). Null if value + * aliasing is disabled (necessary for non-scalar types). + */ + public $valueAliases; + + /** + * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'. + * Null if the directive has always existed. + */ + public $version; + + /** + * ID of directive that supercedes this old directive, is an instance + * of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated. + */ + public $deprecatedUse; + + /** + * Version of HTML Purifier this directive was deprecated. Null if not + * deprecated. + */ + public $deprecatedVersion; + + /** + * List of external projects this directive depends on, e.g. array('CSSTidy'). + */ + public $external = array(); + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Id.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Id.php new file mode 100644 index 0000000..b9b3c6f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Interchange/Id.php @@ -0,0 +1,37 @@ +<?php + +/** + * Represents a directive ID in the interchange format. + */ +class HTMLPurifier_ConfigSchema_Interchange_Id +{ + + public $key; + + public function __construct($key) { + $this->key = $key; + } + + /** + * @warning This is NOT magic, to ensure that people don't abuse SPL and + * cause problems for PHP 5.0 support. + */ + public function toString() { + return $this->key; + } + + public function getRootNamespace() { + return substr($this->key, 0, strpos($this->key, ".")); + } + + public function getDirective() { + return substr($this->key, strpos($this->key, ".") + 1); + } + + public static function make($id) { + return new HTMLPurifier_ConfigSchema_Interchange_Id($id); + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/InterchangeBuilder.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/InterchangeBuilder.php new file mode 100644 index 0000000..785b72c --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/InterchangeBuilder.php @@ -0,0 +1,180 @@ +<?php + +class HTMLPurifier_ConfigSchema_InterchangeBuilder +{ + + /** + * Used for processing DEFAULT, nothing else. + */ + protected $varParser; + + public function __construct($varParser = null) { + $this->varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native(); + } + + public static function buildFromDirectory($dir = null) { + $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); + $interchange = new HTMLPurifier_ConfigSchema_Interchange(); + return $builder->buildDir($interchange, $dir); + } + + public function buildDir($interchange, $dir = null) { + if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema'; + if (file_exists($dir . '/info.ini')) { + $info = parse_ini_file($dir . '/info.ini'); + $interchange->name = $info['name']; + } + + $files = array(); + $dh = opendir($dir); + while (false !== ($file = readdir($dh))) { + if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') { + continue; + } + $files[] = $file; + } + closedir($dh); + + sort($files); + foreach ($files as $file) { + $this->buildFile($interchange, $dir . '/' . $file); + } + + return $interchange; + } + + public function buildFile($interchange, $file) { + $parser = new HTMLPurifier_StringHashParser(); + $this->build( + $interchange, + new HTMLPurifier_StringHash( $parser->parseFile($file) ) + ); + } + + /** + * Builds an interchange object based on a hash. + * @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build + * @param $hash HTMLPurifier_ConfigSchema_StringHash source data + */ + public function build($interchange, $hash) { + if (!$hash instanceof HTMLPurifier_StringHash) { + $hash = new HTMLPurifier_StringHash($hash); + } + if (!isset($hash['ID'])) { + throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID'); + } + if (strpos($hash['ID'], '.') === false) { + if (count($hash) == 2 && isset($hash['DESCRIPTION'])) { + $hash->offsetGet('DESCRIPTION'); // prevent complaining + } else { + throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace'); + } + } else { + $this->buildDirective($interchange, $hash); + } + $this->_findUnused($hash); + } + + public function buildDirective($interchange, $hash) { + $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive(); + + // These are required elements: + $directive->id = $this->id($hash->offsetGet('ID')); + $id = $directive->id->toString(); // convenience + + if (isset($hash['TYPE'])) { + $type = explode('/', $hash->offsetGet('TYPE')); + if (isset($type[1])) $directive->typeAllowsNull = true; + $directive->type = $type[0]; + } else { + throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined"); + } + + if (isset($hash['DEFAULT'])) { + try { + $directive->default = $this->varParser->parse($hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull); + } catch (HTMLPurifier_VarParserException $e) { + throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'"); + } + } + + if (isset($hash['DESCRIPTION'])) { + $directive->description = $hash->offsetGet('DESCRIPTION'); + } + + if (isset($hash['ALLOWED'])) { + $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED'))); + } + + if (isset($hash['VALUE-ALIASES'])) { + $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES')); + } + + if (isset($hash['ALIASES'])) { + $raw_aliases = trim($hash->offsetGet('ALIASES')); + $aliases = preg_split('/\s*,\s*/', $raw_aliases); + foreach ($aliases as $alias) { + $directive->aliases[] = $this->id($alias); + } + } + + if (isset($hash['VERSION'])) { + $directive->version = $hash->offsetGet('VERSION'); + } + + if (isset($hash['DEPRECATED-USE'])) { + $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE')); + } + + if (isset($hash['DEPRECATED-VERSION'])) { + $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION'); + } + + if (isset($hash['EXTERNAL'])) { + $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL'))); + } + + $interchange->addDirective($directive); + } + + /** + * Evaluates an array PHP code string without array() wrapper + */ + protected function evalArray($contents) { + return eval('return array('. $contents .');'); + } + + /** + * Converts an array list into a lookup array. + */ + protected function lookup($array) { + $ret = array(); + foreach ($array as $val) $ret[$val] = true; + return $ret; + } + + /** + * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id + * object based on a string Id. + */ + protected function id($id) { + return HTMLPurifier_ConfigSchema_Interchange_Id::make($id); + } + + /** + * Triggers errors for any unused keys passed in the hash; such keys + * may indicate typos, missing values, etc. + * @param $hash Instance of ConfigSchema_StringHash to check. + */ + protected function _findUnused($hash) { + $accessed = $hash->getAccessed(); + foreach ($hash as $k => $v) { + if (!isset($accessed[$k])) { + trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE); + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Validator.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Validator.php new file mode 100644 index 0000000..f374f6a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/Validator.php @@ -0,0 +1,206 @@ +<?php + +/** + * Performs validations on HTMLPurifier_ConfigSchema_Interchange + * + * @note If you see '// handled by InterchangeBuilder', that means a + * design decision in that class would prevent this validation from + * ever being necessary. We have them anyway, however, for + * redundancy. + */ +class HTMLPurifier_ConfigSchema_Validator +{ + + /** + * Easy to access global objects. + */ + protected $interchange, $aliases; + + /** + * Context-stack to provide easy to read error messages. + */ + protected $context = array(); + + /** + * HTMLPurifier_VarParser to test default's type. + */ + protected $parser; + + public function __construct() { + $this->parser = new HTMLPurifier_VarParser(); + } + + /** + * Validates a fully-formed interchange object. Throws an + * HTMLPurifier_ConfigSchema_Exception if there's a problem. + */ + public function validate($interchange) { + $this->interchange = $interchange; + $this->aliases = array(); + // PHP is a bit lax with integer <=> string conversions in + // arrays, so we don't use the identical !== comparison + foreach ($interchange->directives as $i => $directive) { + $id = $directive->id->toString(); + if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'"); + $this->validateDirective($directive); + } + return true; + } + + /** + * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object. + */ + public function validateId($id) { + $id_string = $id->toString(); + $this->context[] = "id '$id_string'"; + if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) { + // handled by InterchangeBuilder + $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id'); + } + // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.) + // we probably should check that it has at least one namespace + $this->with($id, 'key') + ->assertNotEmpty() + ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder + array_pop($this->context); + } + + /** + * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object. + */ + public function validateDirective($d) { + $id = $d->id->toString(); + $this->context[] = "directive '$id'"; + $this->validateId($d->id); + + $this->with($d, 'description') + ->assertNotEmpty(); + + // BEGIN - handled by InterchangeBuilder + $this->with($d, 'type') + ->assertNotEmpty(); + $this->with($d, 'typeAllowsNull') + ->assertIsBool(); + try { + // This also tests validity of $d->type + $this->parser->parse($d->default, $d->type, $d->typeAllowsNull); + } catch (HTMLPurifier_VarParserException $e) { + $this->error('default', 'had error: ' . $e->getMessage()); + } + // END - handled by InterchangeBuilder + + if (!is_null($d->allowed) || !empty($d->valueAliases)) { + // allowed and valueAliases require that we be dealing with + // strings, so check for that early. + $d_int = HTMLPurifier_VarParser::$types[$d->type]; + if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) { + $this->error('type', 'must be a string type when used with allowed or value aliases'); + } + } + + $this->validateDirectiveAllowed($d); + $this->validateDirectiveValueAliases($d); + $this->validateDirectiveAliases($d); + + array_pop($this->context); + } + + /** + * Extra validation if $allowed member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveAllowed($d) { + if (is_null($d->allowed)) return; + $this->with($d, 'allowed') + ->assertNotEmpty() + ->assertIsLookup(); // handled by InterchangeBuilder + if (is_string($d->default) && !isset($d->allowed[$d->default])) { + $this->error('default', 'must be an allowed value'); + } + $this->context[] = 'allowed'; + foreach ($d->allowed as $val => $x) { + if (!is_string($val)) $this->error("value $val", 'must be a string'); + } + array_pop($this->context); + } + + /** + * Extra validation if $valueAliases member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveValueAliases($d) { + if (is_null($d->valueAliases)) return; + $this->with($d, 'valueAliases') + ->assertIsArray(); // handled by InterchangeBuilder + $this->context[] = 'valueAliases'; + foreach ($d->valueAliases as $alias => $real) { + if (!is_string($alias)) $this->error("alias $alias", 'must be a string'); + if (!is_string($real)) $this->error("alias target $real from alias '$alias'", 'must be a string'); + if ($alias === $real) { + $this->error("alias '$alias'", "must not be an alias to itself"); + } + } + if (!is_null($d->allowed)) { + foreach ($d->valueAliases as $alias => $real) { + if (isset($d->allowed[$alias])) { + $this->error("alias '$alias'", 'must not be an allowed value'); + } elseif (!isset($d->allowed[$real])) { + $this->error("alias '$alias'", 'must be an alias to an allowed value'); + } + } + } + array_pop($this->context); + } + + /** + * Extra validation if $aliases member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveAliases($d) { + $this->with($d, 'aliases') + ->assertIsArray(); // handled by InterchangeBuilder + $this->context[] = 'aliases'; + foreach ($d->aliases as $alias) { + $this->validateId($alias); + $s = $alias->toString(); + if (isset($this->interchange->directives[$s])) { + $this->error("alias '$s'", 'collides with another directive'); + } + if (isset($this->aliases[$s])) { + $other_directive = $this->aliases[$s]; + $this->error("alias '$s'", "collides with alias for directive '$other_directive'"); + } + $this->aliases[$s] = $d->id->toString(); + } + array_pop($this->context); + } + + // protected helper functions + + /** + * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom + * for validating simple member variables of objects. + */ + protected function with($obj, $member) { + return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member); + } + + /** + * Emits an error, providing helpful context. + */ + protected function error($target, $msg) { + if ($target !== false) $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext(); + else $prefix = ucfirst($this->getFormattedContext()); + throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg)); + } + + /** + * Returns a formatted context string. + */ + protected function getFormattedContext() { + return implode(' in ', array_reverse($this->context)); + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/ValidatorAtom.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/ValidatorAtom.php new file mode 100644 index 0000000..b95aea1 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/ValidatorAtom.php @@ -0,0 +1,66 @@ +<?php + +/** + * Fluent interface for validating the contents of member variables. + * This should be immutable. See HTMLPurifier_ConfigSchema_Validator for + * use-cases. We name this an 'atom' because it's ONLY for validations that + * are independent and usually scalar. + */ +class HTMLPurifier_ConfigSchema_ValidatorAtom +{ + + protected $context, $obj, $member, $contents; + + public function __construct($context, $obj, $member) { + $this->context = $context; + $this->obj = $obj; + $this->member = $member; + $this->contents =& $obj->$member; + } + + public function assertIsString() { + if (!is_string($this->contents)) $this->error('must be a string'); + return $this; + } + + public function assertIsBool() { + if (!is_bool($this->contents)) $this->error('must be a boolean'); + return $this; + } + + public function assertIsArray() { + if (!is_array($this->contents)) $this->error('must be an array'); + return $this; + } + + public function assertNotNull() { + if ($this->contents === null) $this->error('must not be null'); + return $this; + } + + public function assertAlnum() { + $this->assertIsString(); + if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric'); + return $this; + } + + public function assertNotEmpty() { + if (empty($this->contents)) $this->error('must not be empty'); + return $this; + } + + public function assertIsLookup() { + $this->assertIsArray(); + foreach ($this->contents as $v) { + if ($v !== true) $this->error('must be a lookup array'); + } + return $this; + } + + protected function error($msg) { + throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg); + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema.ser b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema.ser Binary files differnew file mode 100644 index 0000000..b106bcf --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema.ser diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt new file mode 100644 index 0000000..0517fed --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt @@ -0,0 +1,8 @@ +Attr.AllowedClasses +TYPE: lookup/null +VERSION: 4.0.0 +DEFAULT: null +--DESCRIPTION-- +List of allowed class values in the class attribute. By default, this is null, +which means all classes are allowed. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt new file mode 100644 index 0000000..249edd6 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt @@ -0,0 +1,12 @@ +Attr.AllowedFrameTargets +TYPE: lookup +DEFAULT: array() +--DESCRIPTION-- +Lookup table of all allowed link frame targets. Some commonly used link +targets include _blank, _self, _parent and _top. Values should be +lowercase, as validation will be done in a case-sensitive manner despite +W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute +so this directive will have no effect in that doctype. XHTML 1.1 does not +enable the Target module by default, you will have to manually enable it +(see the module documentation for more details.) +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt new file mode 100644 index 0000000..9a8fa6a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt @@ -0,0 +1,9 @@ +Attr.AllowedRel +TYPE: lookup +VERSION: 1.6.0 +DEFAULT: array() +--DESCRIPTION-- +List of allowed forward document relationships in the rel attribute. Common +values may be nofollow or print. By default, this is empty, meaning that no +document relationships are allowed. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt new file mode 100644 index 0000000..b017883 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt @@ -0,0 +1,9 @@ +Attr.AllowedRev +TYPE: lookup +VERSION: 1.6.0 +DEFAULT: array() +--DESCRIPTION-- +List of allowed reverse document relationships in the rev attribute. This +attribute is a bit of an edge-case; if you don't know what it is for, stay +away. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt new file mode 100644 index 0000000..e774b82 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt @@ -0,0 +1,19 @@ +Attr.ClassUseCDATA +TYPE: bool/null +DEFAULT: null +VERSION: 4.0.0 +--DESCRIPTION-- +If null, class will auto-detect the doctype and, if matching XHTML 1.1 or +XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise, +it will use a relaxed CDATA definition. If true, the relaxed CDATA definition +is forced; if false, the NMTOKENS definition is forced. To get behavior +of HTML Purifier prior to 4.0.0, set this directive to false. + +Some rational behind the auto-detection: +in previous versions of HTML Purifier, it was assumed that the form of +class was NMTOKENS, as specified by the XHTML Modularization (representing +XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however +specify class as CDATA. HTML 5 effectively defines it as CDATA, but +with the additional constraint that each name should be unique (this is not +explicitly outlined in previous specifications). +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt new file mode 100644 index 0000000..533165e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt @@ -0,0 +1,11 @@ +Attr.DefaultImageAlt +TYPE: string/null +DEFAULT: null +VERSION: 3.2.0 +--DESCRIPTION-- +This is the content of the alt tag of an image if the user had not +previously specified an alt attribute. This applies to all images without +a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which +only applies to invalid images, and overrides in the case of an invalid image. +Default behavior with null is to use the basename of the src tag for the alt. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt new file mode 100644 index 0000000..9eb7e38 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt @@ -0,0 +1,9 @@ +Attr.DefaultInvalidImage +TYPE: string +DEFAULT: '' +--DESCRIPTION-- +This is the default image an img tag will be pointed to if it does not have +a valid src attribute. In future versions, we may allow the image tag to +be removed completely, but due to design issues, this is not possible right +now. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt new file mode 100644 index 0000000..2f17bf4 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt @@ -0,0 +1,8 @@ +Attr.DefaultInvalidImageAlt +TYPE: string +DEFAULT: 'Invalid image' +--DESCRIPTION-- +This is the content of the alt tag of an invalid image if the user had not +previously specified an alt attribute. It has no effect when the image is +valid but there was no alt attribute present. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt new file mode 100644 index 0000000..52654b5 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt @@ -0,0 +1,10 @@ +Attr.DefaultTextDir +TYPE: string +DEFAULT: 'ltr' +--DESCRIPTION-- +Defines the default text direction (ltr or rtl) of the document being +parsed. This generally is the same as the value of the dir attribute in +HTML, or ltr if that is not specified. +--ALLOWED-- +'ltr', 'rtl' +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt new file mode 100644 index 0000000..6440d21 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt @@ -0,0 +1,16 @@ +Attr.EnableID +TYPE: bool +DEFAULT: false +VERSION: 1.2.0 +--DESCRIPTION-- +Allows the ID attribute in HTML. This is disabled by default due to the +fact that without proper configuration user input can easily break the +validation of a webpage by specifying an ID that is already on the +surrounding HTML. If you don't mind throwing caution to the wind, enable +this directive, but I strongly recommend you also consider blacklisting IDs +you use (%Attr.IDBlacklist) or prefixing all user supplied IDs +(%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of +pre-1.2.0 versions. +--ALIASES-- +HTML.EnableAttrID +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt new file mode 100644 index 0000000..f31d226 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt @@ -0,0 +1,8 @@ +Attr.ForbiddenClasses +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array() +--DESCRIPTION-- +List of forbidden class values in the class attribute. By default, this is +empty, which means that no classes are forbidden. See also %Attr.AllowedClasses. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt new file mode 100644 index 0000000..5f2b5e3 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt @@ -0,0 +1,5 @@ +Attr.IDBlacklist +TYPE: list +DEFAULT: array() +DESCRIPTION: Array of IDs not allowed in the document. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt new file mode 100644 index 0000000..6f58245 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt @@ -0,0 +1,9 @@ +Attr.IDBlacklistRegexp +TYPE: string/null +VERSION: 1.6.0 +DEFAULT: NULL +--DESCRIPTION-- +PCRE regular expression to be matched against all IDs. If the expression is +matches, the ID is rejected. Use this with care: may cause significant +degradation. ID matching is done after all other validation. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt new file mode 100644 index 0000000..cc49d43 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt @@ -0,0 +1,12 @@ +Attr.IDPrefix +TYPE: string +VERSION: 1.2.0 +DEFAULT: '' +--DESCRIPTION-- +String to prefix to IDs. If you have no idea what IDs your pages may use, +you may opt to simply add a prefix to all user-submitted ID attributes so +that they are still usable, but will not conflict with core page IDs. +Example: setting the directive to 'user_' will result in a user submitted +'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true +before using this. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt new file mode 100644 index 0000000..2c5924a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt @@ -0,0 +1,14 @@ +Attr.IDPrefixLocal +TYPE: string +VERSION: 1.2.0 +DEFAULT: '' +--DESCRIPTION-- +Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you +need to allow multiple sets of user content on web page, you may need to +have a seperate prefix that changes with each iteration. This way, +seperately submitted user content displayed on the same page doesn't +clobber each other. Ideal values are unique identifiers for the content it +represents (i.e. the id of the row in the database). Be sure to add a +seperator (like an underscore) at the end. Warning: this directive will +not work unless %Attr.IDPrefix is set to a non-empty value! +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt new file mode 100644 index 0000000..d5caa1b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt @@ -0,0 +1,31 @@ +AutoFormat.AutoParagraph +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +<p> + This directive turns on auto-paragraphing, where double newlines are + converted in to paragraphs whenever possible. Auto-paragraphing: +</p> +<ul> + <li>Always applies to inline elements or text in the root node,</li> + <li>Applies to inline elements or text with double newlines in nodes + that allow paragraph tags,</li> + <li>Applies to double newlines in paragraph tags</li> +</ul> +<p> + <code>p</code> tags must be allowed for this directive to take effect. + We do not use <code>br</code> tags for paragraphing, as that is + semantically incorrect. +</p> +<p> + To prevent auto-paragraphing as a content-producer, refrain from using + double-newlines except to specify a new paragraph or in contexts where + it has special meaning (whitespace usually has no meaning except in + tags like <code>pre</code>, so this should not be difficult.) To prevent + the paragraphing of inline text adjacent to block elements, wrap them + in <code>div</code> tags (the behavior is slightly different outside of + the root node.) +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt new file mode 100644 index 0000000..2a47648 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt @@ -0,0 +1,12 @@ +AutoFormat.Custom +TYPE: list +VERSION: 2.0.1 +DEFAULT: array() +--DESCRIPTION-- + +<p> + This directive can be used to add custom auto-format injectors. + Specify an array of injector names (class name minus the prefix) + or concrete implementations. Injector class must exist. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt new file mode 100644 index 0000000..663064a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt @@ -0,0 +1,11 @@ +AutoFormat.DisplayLinkURI +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +<p> + This directive turns on the in-text display of URIs in <a> tags, and disables + those links. For example, <a href="http://example.com">example</a> becomes + example (<a>http://example.com</a>). +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt new file mode 100644 index 0000000..3a48ba9 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt @@ -0,0 +1,12 @@ +AutoFormat.Linkify +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +<p> + This directive turns on linkification, auto-linking http, ftp and + https URLs. <code>a</code> tags with the <code>href</code> attribute + must be allowed. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt new file mode 100644 index 0000000..db58b13 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt @@ -0,0 +1,12 @@ +AutoFormat.PurifierLinkify.DocURL +TYPE: string +VERSION: 2.0.1 +DEFAULT: '#%s' +ALIASES: AutoFormatParam.PurifierLinkifyDocURL +--DESCRIPTION-- +<p> + Location of configuration documentation to link to, let %s substitute + into the configuration's namespace and directive names sans the percent + sign. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt new file mode 100644 index 0000000..7996488 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt @@ -0,0 +1,12 @@ +AutoFormat.PurifierLinkify +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +<p> + Internal auto-formatter that converts configuration directives in + syntax <a>%Namespace.Directive</a> to links. <code>a</code> tags + with the <code>href</code> attribute must be allowed. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt new file mode 100644 index 0000000..35c393b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array('td' => true, 'th' => true) +--DESCRIPTION-- +<p> + When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp + are enabled, this directive defines what HTML elements should not be + removede if they have only a non-breaking space in them. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt new file mode 100644 index 0000000..ca17eb1 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt @@ -0,0 +1,15 @@ +AutoFormat.RemoveEmpty.RemoveNbsp +TYPE: bool +VERSION: 4.0.0 +DEFAULT: false +--DESCRIPTION-- +<p> + When enabled, HTML Purifier will treat any elements that contain only + non-breaking spaces as well as regular whitespace as empty, and remove + them when %AutoForamt.RemoveEmpty is enabled. +</p> +<p> + See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements + that don't have this behavior applied to them. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt new file mode 100644 index 0000000..34657ba --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt @@ -0,0 +1,46 @@ +AutoFormat.RemoveEmpty +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +<p> + When enabled, HTML Purifier will attempt to remove empty elements that + contribute no semantic information to the document. The following types + of nodes will be removed: +</p> +<ul><li> + Tags with no attributes and no content, and that are not empty + elements (remove <code><a></a></code> but not + <code><br /></code>), and + </li> + <li> + Tags with no content, except for:<ul> + <li>The <code>colgroup</code> element, or</li> + <li> + Elements with the <code>id</code> or <code>name</code> attribute, + when those attributes are permitted on those elements. + </li> + </ul></li> +</ul> +<p> + Please be very careful when using this functionality; while it may not + seem that empty elements contain useful information, they can alter the + layout of a document given appropriate styling. This directive is most + useful when you are processing machine-generated HTML, please avoid using + it on regular user HTML. +</p> +<p> + Elements that contain only whitespace will be treated as empty. Non-breaking + spaces, however, do not count as whitespace. See + %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior. +</p> +<p> + This algorithm is not perfect; you may still notice some empty tags, + particularly if a node had elements, but those elements were later removed + because they were not permitted in that context, or tags that, after + being auto-closed by another tag, where empty. This is for safety reasons + to prevent clever code from breaking validation. The general rule of thumb: + if a tag looked empty on the way in, it will get removed; if HTML Purifier + made it empty, it will stay. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt new file mode 100644 index 0000000..dde990a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveSpansWithoutAttributes +TYPE: bool +VERSION: 4.0.1 +DEFAULT: false +--DESCRIPTION-- +<p> + This directive causes <code>span</code> tags without any attributes + to be removed. It will also remove spans that had all attributes + removed during processing. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt new file mode 100644 index 0000000..b324608 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt @@ -0,0 +1,8 @@ +CSS.AllowImportant +TYPE: bool +DEFAULT: false +VERSION: 3.1.0 +--DESCRIPTION-- +This parameter determines whether or not !important cascade modifiers should +be allowed in user CSS. If false, !important will stripped. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt new file mode 100644 index 0000000..748be0e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt @@ -0,0 +1,11 @@ +CSS.AllowTricky +TYPE: bool +DEFAULT: false +VERSION: 3.1.0 +--DESCRIPTION-- +This parameter determines whether or not to allow "tricky" CSS properties and +values. Tricky CSS properties/values can drastically modify page layout or +be used for deceptive practices but do not directly constitute a security risk. +For example, <code>display:none;</code> is considered a tricky property that +will only be allowed if this directive is set to true. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt new file mode 100644 index 0000000..3fd4654 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedFonts.txt @@ -0,0 +1,12 @@ +CSS.AllowedFonts +TYPE: lookup/null +VERSION: 4.3.0 +DEFAULT: NULL +--DESCRIPTION-- +<p> + Allows you to manually specify a set of allowed fonts. If + <code>NULL</code>, all fonts are allowed. This directive + affects generic names (serif, sans-serif, monospace, cursive, + fantasy) as well as specific font families. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt new file mode 100644 index 0000000..460112e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt @@ -0,0 +1,18 @@ +CSS.AllowedProperties +TYPE: lookup/null +VERSION: 3.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + If HTML Purifier's style attributes set is unsatisfactory for your needs, + you can overload it with your own list of tags to allow. Note that this + method is subtractive: it does its job by taking away from HTML Purifier + usual feature set, so you cannot add an attribute that HTML Purifier never + supported in the first place. +</p> +<p> + <strong>Warning:</strong> If another directive conflicts with the + elements here, <em>that</em> directive will win and override. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt new file mode 100644 index 0000000..5cb7dda --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt @@ -0,0 +1,11 @@ +CSS.DefinitionRev +TYPE: int +VERSION: 2.0.0 +DEFAULT: 1 +--DESCRIPTION-- + +<p> + Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt new file mode 100644 index 0000000..f1f5c5f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.ForbiddenProperties.txt @@ -0,0 +1,13 @@ +CSS.ForbiddenProperties +TYPE: lookup +VERSION: 4.2.0 +DEFAULT: array() +--DESCRIPTION-- +<p> + This is the logical inverse of %CSS.AllowedProperties, and it will + override that directive or any other directive. If possible, + %CSS.AllowedProperties is recommended over this directive, + because it can sometimes be difficult to tell whether or not you've + forbidden all of the CSS properties you truly would like to disallow. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt new file mode 100644 index 0000000..7a32914 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt @@ -0,0 +1,16 @@ +CSS.MaxImgLength +TYPE: string/null +DEFAULT: '1200px' +VERSION: 3.1.1 +--DESCRIPTION-- +<p> + This parameter sets the maximum allowed length on <code>img</code> tags, + effectively the <code>width</code> and <code>height</code> properties. + Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is + in place to prevent imagecrash attacks, disable with null at your own risk. + This directive is similar to %HTML.MaxImgLength, and both should be + concurrently edited, although there are + subtle differences in the input format (the CSS max is a number with + a unit). +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt new file mode 100644 index 0000000..148eedb --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt @@ -0,0 +1,10 @@ +CSS.Proprietary +TYPE: bool +VERSION: 3.0.0 +DEFAULT: false +--DESCRIPTION-- + +<p> + Whether or not to allow safe, proprietary CSS values. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt new file mode 100644 index 0000000..e733a61 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt @@ -0,0 +1,9 @@ +CSS.Trusted +TYPE: bool +VERSION: 4.2.1 +DEFAULT: false +--DESCRIPTION-- +Indicates whether or not the user's CSS input is trusted or not. If the +input is trusted, a more expansive set of allowed properties. See +also %HTML.Trusted. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt new file mode 100644 index 0000000..c486724 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt @@ -0,0 +1,14 @@ +Cache.DefinitionImpl +TYPE: string/null +VERSION: 2.0.0 +DEFAULT: 'Serializer' +--DESCRIPTION-- + +This directive defines which method to use when caching definitions, +the complex data-type that makes HTML Purifier tick. Set to null +to disable caching (not recommended, as you will see a definite +performance degradation). + +--ALIASES-- +Core.DefinitionCache +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt new file mode 100644 index 0000000..5403650 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt @@ -0,0 +1,13 @@ +Cache.SerializerPath +TYPE: string/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + Absolute path with no trailing slash to store serialized definitions in. + Default is within the + HTML Purifier library inside DefinitionCache/Serializer. This + path must be writable by the webserver. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt new file mode 100644 index 0000000..b2b83d9 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPermissions.txt @@ -0,0 +1,11 @@ +Cache.SerializerPermissions +TYPE: int +VERSION: 4.3.0 +DEFAULT: 0755 +--DESCRIPTION-- + +<p> + Directory permissions of the files and directories created inside + the DefinitionCache/Serializer or other custom serializer path. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt new file mode 100644 index 0000000..568cbf3 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt @@ -0,0 +1,18 @@ +Core.AggressivelyFixLt +TYPE: bool +VERSION: 2.1.0 +DEFAULT: true +--DESCRIPTION-- +<p> + This directive enables aggressive pre-filter fixes HTML Purifier can + perform in order to ensure that open angled-brackets do not get killed + during parsing stage. Enabling this will result in two preg_replace_callback + calls and at least two preg_replace calls for every HTML document parsed; + if your users make very well-formed HTML, you can set this directive false. + This has no effect when DirectLex is used. +</p> +<p> + <strong>Notice:</strong> This directive's default turned from false to true + in HTML Purifier 3.2.0. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt new file mode 100644 index 0000000..d731791 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt @@ -0,0 +1,12 @@ +Core.CollectErrors +TYPE: bool +VERSION: 2.0.0 +DEFAULT: false +--DESCRIPTION-- + +Whether or not to collect errors found while filtering the document. This +is a useful way to give feedback to your users. <strong>Warning:</strong> +Currently this feature is very patchy and experimental, with lots of +possible error messages not yet implemented. It will not cause any +problems, but it may not help your users either. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt new file mode 100644 index 0000000..c572c14 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt @@ -0,0 +1,29 @@ +Core.ColorKeywords +TYPE: hash +VERSION: 2.0.0 +--DEFAULT-- +array ( + 'maroon' => '#800000', + 'red' => '#FF0000', + 'orange' => '#FFA500', + 'yellow' => '#FFFF00', + 'olive' => '#808000', + 'purple' => '#800080', + 'fuchsia' => '#FF00FF', + 'white' => '#FFFFFF', + 'lime' => '#00FF00', + 'green' => '#008000', + 'navy' => '#000080', + 'blue' => '#0000FF', + 'aqua' => '#00FFFF', + 'teal' => '#008080', + 'black' => '#000000', + 'silver' => '#C0C0C0', + 'gray' => '#808080', +) +--DESCRIPTION-- + +Lookup array of color names to six digit hexadecimal number corresponding +to color, with preceding hash mark. Used when parsing colors. The lookup +is done in a case-insensitive manner. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt new file mode 100644 index 0000000..64b114f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt @@ -0,0 +1,14 @@ +Core.ConvertDocumentToFragment +TYPE: bool +DEFAULT: true +--DESCRIPTION-- + +This parameter determines whether or not the filter should convert +input that is a full document with html and body tags to a fragment +of just the contents of a body tag. This parameter is simply something +HTML Purifier can do during an edge-case: for most inputs, this +processing is not necessary. + +--ALIASES-- +Core.AcceptFullDocuments +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt new file mode 100644 index 0000000..36f16e0 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt @@ -0,0 +1,17 @@ +Core.DirectLexLineNumberSyncInterval +TYPE: int +VERSION: 2.0.0 +DEFAULT: 0 +--DESCRIPTION-- + +<p> + Specifies the number of tokens the DirectLex line number tracking + implementations should process before attempting to resyncronize the + current line count by manually counting all previous new-lines. When + at 0, this functionality is disabled. Lower values will decrease + performance, and this is only strictly necessary if the counting + algorithm is buggy (in which case you should report it as a bug). + This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is + not being used. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt new file mode 100644 index 0000000..ce243c3 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EnableIDNA.txt @@ -0,0 +1,9 @@ +Core.EnableIDNA +TYPE: bool +DEFAULT: false +VERSION: 4.4.0 +--DESCRIPTION-- +Allows international domain names in URLs. This configuration option +requires the PEAR Net_IDNA2 module to be installed. It operates by +punycoding any internationalized host names for maximum portability. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt new file mode 100644 index 0000000..8bfb47c --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt @@ -0,0 +1,15 @@ +Core.Encoding +TYPE: istring +DEFAULT: 'utf-8' +--DESCRIPTION-- +If for some reason you are unable to convert all webpages to UTF-8, you can +use this directive as a stop-gap compatibility change to let HTML Purifier +deal with non UTF-8 input. This technique has notable deficiencies: +absolutely no characters outside of the selected character encoding will be +preserved, not even the ones that have been ampersand escaped (this is due +to a UTF-8 specific <em>feature</em> that automatically resolves all +entities), making it pretty useless for anything except the most I18N-blind +applications, although %Core.EscapeNonASCIICharacters offers fixes this +trouble with another tradeoff. This directive only accepts ISO-8859-1 if +iconv is not enabled. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt new file mode 100644 index 0000000..4d5b505 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt @@ -0,0 +1,10 @@ +Core.EscapeInvalidChildren +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When true, a child is found that is not allowed in the context of the +parent element will be transformed into text as if it were ASCII. When +false, that element and all internal tags will be dropped, though text will +be preserved. There is no option for dropping the element but preserving +child nodes. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt new file mode 100644 index 0000000..a7a5b24 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt @@ -0,0 +1,7 @@ +Core.EscapeInvalidTags +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When true, invalid tags will be written back to the document as plain text. +Otherwise, they are silently dropped. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt new file mode 100644 index 0000000..abb4999 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt @@ -0,0 +1,13 @@ +Core.EscapeNonASCIICharacters +TYPE: bool +VERSION: 1.4.0 +DEFAULT: false +--DESCRIPTION-- +This directive overcomes a deficiency in %Core.Encoding by blindly +converting all non-ASCII characters into decimal numeric entities before +converting it to its native encoding. This means that even characters that +can be expressed in the non-UTF-8 encoding will be entity-ized, which can +be a real downer for encodings like Big5. It also assumes that the ASCII +repetoire is available, although this is the case for almost all encodings. +Anyway, use UTF-8! +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt new file mode 100644 index 0000000..915391e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt @@ -0,0 +1,19 @@ +Core.HiddenElements +TYPE: lookup +--DEFAULT-- +array ( + 'script' => true, + 'style' => true, +) +--DESCRIPTION-- + +<p> + This directive is a lookup array of elements which should have their + contents removed when they are not allowed by the HTML definition. + For example, the contents of a <code>script</code> tag are not + normally shown in a document, so if script tags are to be removed, + their contents should be removed to. This is opposed to a <code>b</code> + tag, which defines some presentational changes but does not hide its + contents. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Language.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Language.txt new file mode 100644 index 0000000..233fca1 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.Language.txt @@ -0,0 +1,10 @@ +Core.Language +TYPE: string +VERSION: 2.0.0 +DEFAULT: 'en' +--DESCRIPTION-- + +ISO 639 language code for localizable things in HTML Purifier to use, +which is mainly error reporting. There is currently only an English (en) +translation, so this directive is currently useless. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt new file mode 100644 index 0000000..8983e2c --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt @@ -0,0 +1,34 @@ +Core.LexerImpl +TYPE: mixed/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + This parameter determines what lexer implementation can be used. The + valid values are: +</p> +<dl> + <dt><em>null</em></dt> + <dd> + Recommended, the lexer implementation will be auto-detected based on + your PHP-version and configuration. + </dd> + <dt><em>string</em> lexer identifier</dt> + <dd> + This is a slim way of manually overridding the implementation. + Currently recognized values are: DOMLex (the default PHP5 +implementation) + and DirectLex (the default PHP4 implementation). Only use this if + you know what you are doing: usually, the auto-detection will + manage things for cases you aren't even aware of. + </dd> + <dt><em>object</em> lexer instance</dt> + <dd> + Super-advanced: you can specify your own, custom, implementation that + implements the interface defined by <code>HTMLPurifier_Lexer</code>. + I may remove this option simply because I don't expect anyone + to use it. + </dd> +</dl> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt new file mode 100644 index 0000000..eb841a7 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt @@ -0,0 +1,16 @@ +Core.MaintainLineNumbers +TYPE: bool/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + If true, HTML Purifier will add line number information to all tokens. + This is useful when error reporting is turned on, but can result in + significant performance degradation and should not be used when + unnecessary. This directive must be used with the DirectLex lexer, + as the DOMLex lexer does not (yet) support this functionality. + If the value is null, an appropriate value will be selected based + on other configuration. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt new file mode 100644 index 0000000..d77f536 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.NormalizeNewlines.txt @@ -0,0 +1,11 @@ +Core.NormalizeNewlines +TYPE: bool +VERSION: 4.2.0 +DEFAULT: true +--DESCRIPTION-- +<p> + Whether or not to normalize newlines to the operating + system default. When <code>false</code>, HTML Purifier + will attempt to preserve mixed newline files. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt new file mode 100644 index 0000000..4070c2a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt @@ -0,0 +1,12 @@ +Core.RemoveInvalidImg +TYPE: bool +DEFAULT: true +VERSION: 1.3.0 +--DESCRIPTION-- + +<p> + This directive enables pre-emptive URI checking in <code>img</code> + tags, as the attribute validation strategy is not authorized to + remove elements from the document. Revert to pre-1.3.0 behavior by setting to false. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt new file mode 100644 index 0000000..3397d9f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt @@ -0,0 +1,11 @@ +Core.RemoveProcessingInstructions +TYPE: bool +VERSION: 4.2.0 +DEFAULT: false +--DESCRIPTION-- +Instead of escaping processing instructions in the form <code><? ... +?></code>, remove it out-right. This may be useful if the HTML +you are validating contains XML processing instruction gunk, however, +it can also be user-unfriendly for people attempting to post PHP +snippets. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt new file mode 100644 index 0000000..a4cd966 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt @@ -0,0 +1,12 @@ +Core.RemoveScriptContents +TYPE: bool/null +DEFAULT: NULL +VERSION: 2.0.0 +DEPRECATED-VERSION: 2.1.0 +DEPRECATED-USE: Core.HiddenElements +--DESCRIPTION-- +<p> + This directive enables HTML Purifier to remove not only script tags + but all of their contents. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt new file mode 100644 index 0000000..3db50ef --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt @@ -0,0 +1,11 @@ +Filter.Custom +TYPE: list +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +<p> + This directive can be used to add custom filters; it is nearly the + equivalent of the now deprecated <code>HTMLPurifier->addFilter()</code> + method. Specify an array of concrete implementations. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt new file mode 100644 index 0000000..16829bc --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt @@ -0,0 +1,14 @@ +Filter.ExtractStyleBlocks.Escaping +TYPE: bool +VERSION: 3.0.0 +DEFAULT: true +ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping +--DESCRIPTION-- + +<p> + Whether or not to escape the dangerous characters <, > and & + as \3C, \3E and \26, respectively. This is can be safely set to false + if the contents of StyleBlocks will be placed in an external stylesheet, + where there is no risk of it being interpreted as HTML. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt new file mode 100644 index 0000000..7f95f54 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt @@ -0,0 +1,29 @@ +Filter.ExtractStyleBlocks.Scope +TYPE: string/null +VERSION: 3.0.0 +DEFAULT: NULL +ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope +--DESCRIPTION-- + +<p> + If you would like users to be able to define external stylesheets, but + only allow them to specify CSS declarations for a specific node and + prevent them from fiddling with other elements, use this directive. + It accepts any valid CSS selector, and will prepend this to any + CSS declaration extracted from the document. For example, if this + directive is set to <code>#user-content</code> and a user uses the + selector <code>a:hover</code>, the final selector will be + <code>#user-content a:hover</code>. +</p> +<p> + The comma shorthand may be used; consider the above example, with + <code>#user-content, #user-content2</code>, the final selector will + be <code>#user-content a:hover, #user-content2 a:hover</code>. +</p> +<p> + <strong>Warning:</strong> It is possible for users to bypass this measure + using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML + Purifier, and I am working to get it fixed. Until then, HTML Purifier + performs a basic check to prevent this. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt new file mode 100644 index 0000000..6c231b2 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt @@ -0,0 +1,16 @@ +Filter.ExtractStyleBlocks.TidyImpl +TYPE: mixed/null +VERSION: 3.1.0 +DEFAULT: NULL +ALIASES: FilterParam.ExtractStyleBlocksTidyImpl +--DESCRIPTION-- +<p> + If left NULL, HTML Purifier will attempt to instantiate a <code>csstidy</code> + class to use for internal cleaning. This will usually be good enough. +</p> +<p> + However, for trusted user input, you can set this to <code>false</code> to + disable cleaning. In addition, you can supply your own concrete implementation + of Tidy's interface to use, although I don't know why you'd want to do that. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt new file mode 100644 index 0000000..078d087 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt @@ -0,0 +1,74 @@ +Filter.ExtractStyleBlocks +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +EXTERNAL: CSSTidy +--DESCRIPTION-- +<p> + This directive turns on the style block extraction filter, which removes + <code>style</code> blocks from input HTML, cleans them up with CSSTidy, + and places them in the <code>StyleBlocks</code> context variable, for further + use by you, usually to be placed in an external stylesheet, or a + <code>style</code> block in the <code>head</code> of your document. +</p> +<p> + Sample usage: +</p> +<pre><![CDATA[ +<?php + header('Content-type: text/html; charset=utf-8'); + echo '<?xml version="1.0" encoding="UTF-8"?>'; +?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> +<head> + <title>Filter.ExtractStyleBlocks</title> +<?php + require_once '/path/to/library/HTMLPurifier.auto.php'; + require_once '/path/to/csstidy.class.php'; + + $dirty = '<style>body {color:#F00;}</style> Some text'; + + $config = HTMLPurifier_Config::createDefault(); + $config->set('Filter', 'ExtractStyleBlocks', true); + $purifier = new HTMLPurifier($config); + + $html = $purifier->purify($dirty); + + // This implementation writes the stylesheets to the styles/ directory. + // You can also echo the styles inside the document, but it's a bit + // more difficult to make sure they get interpreted properly by + // browsers; try the usual CSS armoring techniques. + $styles = $purifier->context->get('StyleBlocks'); + $dir = 'styles/'; + if (!is_dir($dir)) mkdir($dir); + $hash = sha1($_GET['html']); + foreach ($styles as $i => $style) { + file_put_contents($name = $dir . $hash . "_$i"); + echo '<link rel="stylesheet" type="text/css" href="'.$name.'" />'; + } +?> +</head> +<body> + <div> + <?php echo $html; ?> + </div> +</b]]><![CDATA[ody> +</html> +]]></pre> +<p> + <strong>Warning:</strong> It is possible for a user to mount an + imagecrash attack using this CSS. Counter-measures are difficult; + it is not simply enough to limit the range of CSS lengths (using + relative lengths with many nesting levels allows for large values + to be attained without actually specifying them in the stylesheet), + and the flexible nature of selectors makes it difficult to selectively + disable lengths on image tags (HTML Purifier, however, does disable + CSS width and height in inline styling). There are probably two effective + counter measures: an explicit width and height set to auto in all + images in your document (unlikely) or the disabling of width and + height (somewhat reasonable). Whether or not these measures should be + used is left to the reader. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt new file mode 100644 index 0000000..321eaa2 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt @@ -0,0 +1,16 @@ +Filter.YouTube +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +--DESCRIPTION-- +<p> + <strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and + %Output.FlashCompat (turn both on to allow YouTube videos and other + Flash content). +</p> +<p> + This directive enables YouTube video embedding in HTML Purifier. Check + <a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document + on embedding videos</a> for more information on what this filter does. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt new file mode 100644 index 0000000..0b2c106 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt @@ -0,0 +1,25 @@ +HTML.Allowed +TYPE: itext/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + This is a preferred convenience directive that combines + %HTML.AllowedElements and %HTML.AllowedAttributes. + Specify elements and attributes that are allowed using: + <code>element1[attr1|attr2],element2...</code>. For example, + if you would like to only allow paragraphs and links, specify + <code>a[href],p</code>. You can specify attributes that apply + to all elements using an asterisk, e.g. <code>*[lang]</code>. + You can also use newlines instead of commas to separate elements. +</p> +<p> + <strong>Warning</strong>: + All of the constraints on the component directives are still enforced. + The syntax is a <em>subset</em> of TinyMCE's <code>valid_elements</code> + whitelist: directly copy-pasting it here will probably result in + broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes + are set, this directive has no effect. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt new file mode 100644 index 0000000..fcf093f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt @@ -0,0 +1,19 @@ +HTML.AllowedAttributes +TYPE: lookup/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + If HTML Purifier's attribute set is unsatisfactory, overload it! + The syntax is "tag.attr" or "*.attr" for the global attributes + (style, id, class, dir, lang, xml:lang). +</p> +<p> + <strong>Warning:</strong> If another directive conflicts with the + elements here, <em>that</em> directive will win and override. For + example, %HTML.EnableAttrID will take precedence over *.id in this + directive. You must set that directive to true before you can use + IDs at all. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt new file mode 100644 index 0000000..140e214 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedComments.txt @@ -0,0 +1,10 @@ +HTML.AllowedComments +TYPE: lookup +VERSION: 4.4.0 +DEFAULT: array() +--DESCRIPTION-- +A whitelist which indicates what explicit comment bodies should be +allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp +(these directives are union'ed together, so a comment is considered +valid if any directive deems it valid.) +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt new file mode 100644 index 0000000..f22e977 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedCommentsRegexp.txt @@ -0,0 +1,15 @@ +HTML.AllowedCommentsRegexp +TYPE: string/null +VERSION: 4.4.0 +DEFAULT: NULL +--DESCRIPTION-- +A regexp, which if it matches the body of a comment, indicates that +it should be allowed. Trailing and leading spaces are removed prior +to running this regular expression. +<strong>Warning:</strong> Make sure you specify +correct anchor metacharacters <code>^regex$</code>, otherwise you may accept +comments that you did not mean to! In particular, the regex <code>/foo|bar/</code> +is probably not sufficiently strict, since it also allows <code>foobar</code>. +See also %HTML.AllowedComments (these directives are union'ed together, +so a comment is considered valid if any directive deems it valid.) +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt new file mode 100644 index 0000000..1d3fa79 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt @@ -0,0 +1,23 @@ +HTML.AllowedElements +TYPE: lookup/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- +<p> + If HTML Purifier's tag set is unsatisfactory for your needs, you can + overload it with your own list of tags to allow. If you change + this, you probably also want to change %HTML.AllowedAttributes; see + also %HTML.Allowed which lets you set allowed elements and + attributes at the same time. +</p> +<p> + If you attempt to allow an element that HTML Purifier does not know + about, HTML Purifier will raise an error. You will need to manually + tell HTML Purifier about this element by using the + <a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a> +</p> +<p> + <strong>Warning:</strong> If another directive conflicts with the + elements here, <em>that</em> directive will win and override. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt new file mode 100644 index 0000000..5a59a55 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt @@ -0,0 +1,20 @@ +HTML.AllowedModules +TYPE: lookup/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + A doctype comes with a set of usual modules to use. Without having + to mucking about with the doctypes, you can quickly activate or + disable these modules by specifying which modules you wish to allow + with this directive. This is most useful for unit testing specific + modules, although end users may find it useful for their own ends. +</p> +<p> + If you specify a module that does not exist, the manager will silently + fail to use it, so be careful! User-defined modules are not affected + by this directive. Modules defined in %HTML.CoreModules are not + affected by this directive. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt new file mode 100644 index 0000000..151fb7b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt @@ -0,0 +1,11 @@ +HTML.Attr.Name.UseCDATA +TYPE: bool +DEFAULT: false +VERSION: 4.0.0 +--DESCRIPTION-- +The W3C specification DTD defines the name attribute to be CDATA, not ID, due +to limitations of DTD. In certain documents, this relaxed behavior is desired, +whether it is to specify duplicate names, or to specify names that would be +illegal IDs (for example, names that begin with a digit.) Set this configuration +directive to true to use the relaxed parsing rules. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt new file mode 100644 index 0000000..45ae469 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt @@ -0,0 +1,18 @@ +HTML.BlockWrapper +TYPE: string +VERSION: 1.3.0 +DEFAULT: 'p' +--DESCRIPTION-- + +<p> + String name of element to wrap inline elements that are inside a block + context. This only occurs in the children of blockquote in strict mode. +</p> +<p> + Example: by default value, + <code><blockquote>Foo</blockquote></code> would become + <code><blockquote><p>Foo</p></blockquote></code>. + The <code><p></code> tags can be replaced with whatever you desire, + as long as it is a block level element. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt new file mode 100644 index 0000000..5246188 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt @@ -0,0 +1,23 @@ +HTML.CoreModules +TYPE: lookup +VERSION: 2.0.0 +--DEFAULT-- +array ( + 'Structure' => true, + 'Text' => true, + 'Hypertext' => true, + 'List' => true, + 'NonXMLCommonAttributes' => true, + 'XMLCommonAttributes' => true, + 'CommonAttributes' => true, +) +--DESCRIPTION-- + +<p> + Certain modularized doctypes (XHTML, namely), have certain modules + that must be included for the doctype to be an conforming document + type: put those modules here. By default, XHTML's core modules + are used. You can set this to a blank array to disable core module + protection, but this is not recommended. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt new file mode 100644 index 0000000..a64e3d7 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt @@ -0,0 +1,9 @@ +HTML.CustomDoctype +TYPE: string/null +VERSION: 2.0.1 +DEFAULT: NULL +--DESCRIPTION-- + +A custom doctype for power-users who defined there own document +type. This directive only applies when %HTML.Doctype is blank. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt new file mode 100644 index 0000000..103db75 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt @@ -0,0 +1,33 @@ +HTML.DefinitionID +TYPE: string/null +DEFAULT: NULL +VERSION: 2.0.0 +--DESCRIPTION-- + +<p> + Unique identifier for a custom-built HTML definition. If you edit + the raw version of the HTMLDefinition, introducing changes that the + configuration object does not reflect, you must specify this variable. + If you change your custom edits, you should change this directive, or + clear your cache. Example: +</p> +<pre> +$config = HTMLPurifier_Config::createDefault(); +$config->set('HTML', 'DefinitionID', '1'); +$def = $config->getHTMLDefinition(); +$def->addAttribute('a', 'tabindex', 'Number'); +</pre> +<p> + In the above example, the configuration is still at the defaults, but + using the advanced API, an extra attribute has been added. The + configuration object normally has no way of knowing that this change + has taken place, so it needs an extra directive: %HTML.DefinitionID. + If someone else attempts to use the default configuration, these two + pieces of code will not clobber each other in the cache, since one has + an extra directive attached to it. +</p> +<p> + You <em>must</em> specify a value to this directive to use the + advanced API features. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt new file mode 100644 index 0000000..229ae02 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt @@ -0,0 +1,16 @@ +HTML.DefinitionRev +TYPE: int +VERSION: 2.0.0 +DEFAULT: 1 +--DESCRIPTION-- + +<p> + Revision identifier for your custom definition specified in + %HTML.DefinitionID. This serves the same purpose: uniquely identifying + your custom definition, but this one does so in a chronological + context: revision 3 is more up-to-date then revision 2. Thus, when + this gets incremented, the cache handling is smart enough to clean + up any older revisions of your definition as well as flush the + cache. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt new file mode 100644 index 0000000..9dab497 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt @@ -0,0 +1,11 @@ +HTML.Doctype +TYPE: string/null +DEFAULT: NULL +--DESCRIPTION-- +Doctype to use during filtering. Technically speaking this is not actually +a doctype (as it does not identify a corresponding DTD), but we are using +this name for sake of simplicity. When non-blank, this will override any +older directives like %HTML.XHTML or %HTML.Strict. +--ALLOWED-- +'HTML 4.01 Transitional', 'HTML 4.01 Strict', 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', 'XHTML 1.1' +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt new file mode 100644 index 0000000..7878dc0 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.FlashAllowFullScreen.txt @@ -0,0 +1,11 @@ +HTML.FlashAllowFullScreen +TYPE: bool +VERSION: 4.2.0 +DEFAULT: false +--DESCRIPTION-- +<p> + Whether or not to permit embedded Flash content from + %HTML.SafeObject to expand to the full screen. Corresponds to + the <code>allowFullScreen</code> parameter. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt new file mode 100644 index 0000000..57358f9 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt @@ -0,0 +1,21 @@ +HTML.ForbiddenAttributes +TYPE: lookup +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +<p> + While this directive is similar to %HTML.AllowedAttributes, for + forwards-compatibility with XML, this attribute has a different syntax. Instead of + <code>tag.attr</code>, use <code>tag@attr</code>. To disallow <code>href</code> + attributes in <code>a</code> tags, set this directive to + <code>a@href</code>. You can also disallow an attribute globally with + <code>attr</code> or <code>*@attr</code> (either syntax is fine; the latter + is provided for consistency with %HTML.AllowedAttributes). +</p> +<p> + <strong>Warning:</strong> This directive complements %HTML.ForbiddenElements, + accordingly, check + out that directive for a discussion of why you + should think twice before using this directive. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt new file mode 100644 index 0000000..93a53e1 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt @@ -0,0 +1,20 @@ +HTML.ForbiddenElements +TYPE: lookup +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +<p> + This was, perhaps, the most requested feature ever in HTML + Purifier. Please don't abuse it! This is the logical inverse of + %HTML.AllowedElements, and it will override that directive, or any + other directive. +</p> +<p> + If possible, %HTML.Allowed is recommended over this directive, because it + can sometimes be difficult to tell whether or not you've forbidden all of + the behavior you would like to disallow. If you forbid <code>img</code> + with the expectation of preventing images on your site, you'll be in for + a nasty surprise when people start using the <code>background-image</code> + CSS property. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt new file mode 100644 index 0000000..e424c38 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt @@ -0,0 +1,14 @@ +HTML.MaxImgLength +TYPE: int/null +DEFAULT: 1200 +VERSION: 3.1.1 +--DESCRIPTION-- +<p> + This directive controls the maximum number of pixels in the width and + height attributes in <code>img</code> tags. This is + in place to prevent imagecrash attacks, disable with null at your own risk. + This directive is similar to %CSS.MaxImgLength, and both should be + concurrently edited, although there are + subtle differences in the input format (the HTML max is an integer). +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt new file mode 100644 index 0000000..700b309 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Nofollow.txt @@ -0,0 +1,7 @@ +HTML.Nofollow +TYPE: bool +VERSION: 4.3.0 +DEFAULT: FALSE +--DESCRIPTION-- +If enabled, nofollow rel attributes are added to all outgoing links. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt new file mode 100644 index 0000000..62e8e16 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt @@ -0,0 +1,12 @@ +HTML.Parent +TYPE: string +VERSION: 1.3.0 +DEFAULT: 'div' +--DESCRIPTION-- + +<p> + String name of element that HTML fragment passed to library will be + inserted in. An interesting variation would be using span as the + parent element, meaning that only inline tags would be allowed. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt new file mode 100644 index 0000000..dfb7204 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt @@ -0,0 +1,12 @@ +HTML.Proprietary +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +--DESCRIPTION-- +<p> + Whether or not to allow proprietary elements and attributes in your + documents, as per <code>HTMLPurifier_HTMLModule_Proprietary</code>. + <strong>Warning:</strong> This can cause your documents to stop + validating! +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt new file mode 100644 index 0000000..cdda09a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt @@ -0,0 +1,13 @@ +HTML.SafeEmbed +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +<p> + Whether or not to permit embed tags in documents, with a number of extra + security features added to prevent script execution. This is similar to + what websites like MySpace do to embed tags. Embed is a proprietary + element and will cause your website to stop validating; you should + see if you can use %Output.FlashCompat with %HTML.SafeObject instead + first.</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt new file mode 100644 index 0000000..5eb6ec2 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt @@ -0,0 +1,13 @@ +HTML.SafeIframe +TYPE: bool +VERSION: 4.4.0 +DEFAULT: false +--DESCRIPTION-- +<p> + Whether or not to permit iframe tags in untrusted documents. This + directive must be accompanied by a whitelist of permitted iframes, + such as %URI.SafeIframeRegexp, otherwise it will fatally error. + This directive has no effect on strict doctypes, as iframes are not + valid. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt new file mode 100644 index 0000000..ceb342e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt @@ -0,0 +1,13 @@ +HTML.SafeObject +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +<p> + Whether or not to permit object tags in documents, with a number of extra + security features added to prevent script execution. This is similar to + what websites like MySpace do to object tags. You should also enable + %Output.FlashCompat in order to generate Internet Explorer + compatibility code for your object tags. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt new file mode 100644 index 0000000..a8b1de5 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt @@ -0,0 +1,9 @@ +HTML.Strict +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +DEPRECATED-VERSION: 1.7.0 +DEPRECATED-USE: HTML.Doctype +--DESCRIPTION-- +Determines whether or not to use Transitional (loose) or Strict rulesets. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt new file mode 100644 index 0000000..587a167 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetBlank.txt @@ -0,0 +1,8 @@ +HTML.TargetBlank +TYPE: bool +VERSION: 4.4.0 +DEFAULT: FALSE +--DESCRIPTION-- +If enabled, <code>target=blank</code> attributes are added to all outgoing links. +(This includes links from an HTTPS version of a page to an HTTP version.) +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt new file mode 100644 index 0000000..b4c271b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt @@ -0,0 +1,8 @@ +HTML.TidyAdd +TYPE: lookup +VERSION: 2.0.0 +DEFAULT: array() +--DESCRIPTION-- + +Fixes to add to the default set of Tidy fixes as per your level. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt new file mode 100644 index 0000000..4186ccd --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt @@ -0,0 +1,24 @@ +HTML.TidyLevel +TYPE: string +VERSION: 2.0.0 +DEFAULT: 'medium' +--DESCRIPTION-- + +<p>General level of cleanliness the Tidy module should enforce. +There are four allowed values:</p> +<dl> + <dt>none</dt> + <dd>No extra tidying should be done</dd> + <dt>light</dt> + <dd>Only fix elements that would be discarded otherwise due to + lack of support in doctype</dd> + <dt>medium</dt> + <dd>Enforce best practices</dd> + <dt>heavy</dt> + <dd>Transform all deprecated elements and attributes to standards + compliant equivalents</dd> +</dl> + +--ALLOWED-- +'none', 'light', 'medium', 'heavy' +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt new file mode 100644 index 0000000..996762b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt @@ -0,0 +1,8 @@ +HTML.TidyRemove +TYPE: lookup +VERSION: 2.0.0 +DEFAULT: array() +--DESCRIPTION-- + +Fixes to remove from the default set of Tidy fixes as per your level. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt new file mode 100644 index 0000000..1db9237 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt @@ -0,0 +1,9 @@ +HTML.Trusted +TYPE: bool +VERSION: 2.0.0 +DEFAULT: false +--DESCRIPTION-- +Indicates whether or not the user input is trusted or not. If the input is +trusted, a more expansive set of allowed tags and attributes will be used. +See also %CSS.Trusted. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt new file mode 100644 index 0000000..2a47e38 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt @@ -0,0 +1,11 @@ +HTML.XHTML +TYPE: bool +DEFAULT: true +VERSION: 1.1.0 +DEPRECATED-VERSION: 1.7.0 +DEPRECATED-USE: HTML.Doctype +--DESCRIPTION-- +Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. +--ALIASES-- +Core.XHTML +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt new file mode 100644 index 0000000..08921fd --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt @@ -0,0 +1,10 @@ +Output.CommentScriptContents +TYPE: bool +VERSION: 2.0.0 +DEFAULT: true +--DESCRIPTION-- +Determines whether or not HTML Purifier should attempt to fix up the +contents of script tags for legacy browsers with comments. +--ALIASES-- +Core.CommentScriptContents +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt new file mode 100644 index 0000000..d6f0d9f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FixInnerHTML.txt @@ -0,0 +1,15 @@ +Output.FixInnerHTML +TYPE: bool +VERSION: 4.3.0 +DEFAULT: true +--DESCRIPTION-- +<p> + If true, HTML Purifier will protect against Internet Explorer's + mishandling of the <code>innerHTML</code> attribute by appending + a space to any attribute that does not contain angled brackets, spaces + or quotes, but contains a backtick. This slightly changes the + semantics of any given attribute, so if this is unacceptable and + you do not use <code>innerHTML</code> on any of your pages, you can + turn this directive off. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt new file mode 100644 index 0000000..93398e8 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.FlashCompat.txt @@ -0,0 +1,11 @@ +Output.FlashCompat +TYPE: bool +VERSION: 4.1.0 +DEFAULT: false +--DESCRIPTION-- +<p> + If true, HTML Purifier will generate Internet Explorer compatibility + code for all object code. This is highly recommended if you enable + %HTML.SafeObject. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt new file mode 100644 index 0000000..79f8ad8 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt @@ -0,0 +1,13 @@ +Output.Newline +TYPE: string/null +VERSION: 2.0.1 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + Newline string to format final output with. If left null, HTML Purifier + will auto-detect the default newline type of the system and use that; + you can manually override it here. Remember, \r\n is Windows, \r + is Mac, and \n is Unix. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt new file mode 100644 index 0000000..232b023 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt @@ -0,0 +1,14 @@ +Output.SortAttr +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +<p> + If true, HTML Purifier will sort attributes by name before writing them back + to the document, converting a tag like: <code><el b="" a="" c="" /></code> + to <code><el a="" b="" c="" /></code>. This is a workaround for + a bug in FCKeditor which causes it to swap attributes order, adding noise + to text diffs. If you're not seeing this bug, chances are, you don't need + this directive. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt new file mode 100644 index 0000000..06bab00 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt @@ -0,0 +1,25 @@ +Output.TidyFormat +TYPE: bool +VERSION: 1.1.1 +DEFAULT: false +--DESCRIPTION-- +<p> + Determines whether or not to run Tidy on the final output for pretty + formatting reasons, such as indentation and wrap. +</p> +<p> + This can greatly improve readability for editors who are hand-editing + the HTML, but is by no means necessary as HTML Purifier has already + fixed all major errors the HTML may have had. Tidy is a non-default + extension, and this directive will silently fail if Tidy is not + available. +</p> +<p> + If you are looking to make the overall look of your page's source + better, I recommend running Tidy on the entire page rather than just + user-content (after all, the indentation relative to the containing + blocks will be incorrect). +</p> +--ALIASES-- +Core.TidyFormat +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt new file mode 100644 index 0000000..071bc02 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt @@ -0,0 +1,7 @@ +Test.ForceNoIconv +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When set to true, HTMLPurifier_Encoder will act as if iconv does not exist +and use only pure PHP implementations. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt new file mode 100644 index 0000000..666635a --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -0,0 +1,17 @@ +URI.AllowedSchemes +TYPE: lookup +--DEFAULT-- +array ( + 'http' => true, + 'https' => true, + 'mailto' => true, + 'ftp' => true, + 'nntp' => true, + 'news' => true, +) +--DESCRIPTION-- +Whitelist that defines the schemes that a URI is allowed to have. This +prevents XSS attacks from using pseudo-schemes like javascript or mocha. +There is also support for the <code>data</code> and <code>file</code> +URI schemes, but they are not enabled by default. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Base.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Base.txt new file mode 100644 index 0000000..876f068 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Base.txt @@ -0,0 +1,17 @@ +URI.Base +TYPE: string/null +VERSION: 2.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + The base URI is the URI of the document this purified HTML will be + inserted into. This information is important if HTML Purifier needs + to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute + is on. You may use a non-absolute URI for this value, but behavior + may vary (%URI.MakeAbsolute deals nicely with both absolute and + relative paths, but forwards-compatibility is not guaranteed). + <strong>Warning:</strong> If set, the scheme on this URI + overrides the one specified by %URI.DefaultScheme. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt new file mode 100644 index 0000000..728e378 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt @@ -0,0 +1,10 @@ +URI.DefaultScheme +TYPE: string +DEFAULT: 'http' +--DESCRIPTION-- + +<p> + Defines through what scheme the output will be served, in order to + select the proper object validator when no scheme information is present. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt new file mode 100644 index 0000000..f05312b --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt @@ -0,0 +1,11 @@ +URI.DefinitionID +TYPE: string/null +VERSION: 2.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + Unique identifier for a custom-built URI definition. If you want + to add custom URIFilters, you must specify this value. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt new file mode 100644 index 0000000..80cfea9 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt @@ -0,0 +1,11 @@ +URI.DefinitionRev +TYPE: int +VERSION: 2.1.0 +DEFAULT: 1 +--DESCRIPTION-- + +<p> + Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt new file mode 100644 index 0000000..71ce025 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt @@ -0,0 +1,14 @@ +URI.Disable +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +--DESCRIPTION-- + +<p> + Disables all URIs in all forms. Not sure why you'd want to do that + (after all, the Internet's founded on the notion of a hyperlink). +</p> + +--ALIASES-- +Attr.DisableURI +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt new file mode 100644 index 0000000..13c122c --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt @@ -0,0 +1,11 @@ +URI.DisableExternal +TYPE: bool +VERSION: 1.2.0 +DEFAULT: false +--DESCRIPTION-- +Disables links to external websites. This is a highly effective anti-spam +and anti-pagerank-leech measure, but comes at a hefty price: nolinks or +images outside of your domain will be allowed. Non-linkified URIs will +still be preserved. If you want to be able to link to subdomains or use +absolute URIs, specify %URI.Host for your website. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt new file mode 100644 index 0000000..abcc1ef --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt @@ -0,0 +1,13 @@ +URI.DisableExternalResources +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +--DESCRIPTION-- +Disables the embedding of external resources, preventing users from +embedding things like images from other hosts. This prevents access +tracking (good for email viewers), bandwidth leeching, cross-site request +forging, goatse.cx posting, and other nasties, but also results in a loss +of end-user functionality (they can't directly post a pic they posted from +Flickr anymore). Use it if you don't have a robust user-content moderation +team. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt new file mode 100644 index 0000000..f891de4 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt @@ -0,0 +1,15 @@ +URI.DisableResources +TYPE: bool +VERSION: 4.2.0 +DEFAULT: false +--DESCRIPTION-- +<p> + Disables embedding resources, essentially meaning no pictures. You can + still link to them though. See %URI.DisableExternalResources for why + this might be a good idea. +</p> +<p> + <em>Note:</em> While this directive has been available since 1.3.0, + it didn't actually start doing anything until 4.2.0. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Host.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Host.txt new file mode 100644 index 0000000..ee83b12 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Host.txt @@ -0,0 +1,19 @@ +URI.Host +TYPE: string/null +VERSION: 1.2.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + Defines the domain name of the server, so we can determine whether or + an absolute URI is from your website or not. Not strictly necessary, + as users should be using relative URIs to reference resources on your + website. It will, however, let you use absolute URIs to link to + subdomains of the domain you post here: i.e. example.com will allow + sub.example.com. However, higher up domains will still be excluded: + if you set %URI.Host to sub.example.com, example.com will be blocked. + <strong>Note:</strong> This directive overrides %URI.Base because + a given page may be on a sub-domain, but you wish HTML Purifier to be + more relaxed and allow some of the parent domains too. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt new file mode 100644 index 0000000..0b6df76 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt @@ -0,0 +1,9 @@ +URI.HostBlacklist +TYPE: list +VERSION: 1.3.0 +DEFAULT: array() +--DESCRIPTION-- +List of strings that are forbidden in the host of any URI. Use it to kill +domain names of spam, etc. Note that it will catch anything in the domain, +so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt new file mode 100644 index 0000000..4214900 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt @@ -0,0 +1,13 @@ +URI.MakeAbsolute +TYPE: bool +VERSION: 2.1.0 +DEFAULT: false +--DESCRIPTION-- + +<p> + Converts all URIs into absolute forms. This is useful when the HTML + being filtered assumes a specific base path, but will actually be + viewed in a different context (and setting an alternate base URI is + not possible). %URI.Base must be set for this directive to work. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt new file mode 100644 index 0000000..58c81dc --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt @@ -0,0 +1,83 @@ +URI.Munge +TYPE: string/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- + +<p> + Munges all browsable (usually http, https and ftp) + absolute URIs into another URI, usually a URI redirection service. + This directive accepts a URI, formatted with a <code>%s</code> where + the url-encoded original URI should be inserted (sample: + <code>http://www.google.com/url?q=%s</code>). +</p> +<p> + Uses for this directive: +</p> +<ul> + <li> + Prevent PageRank leaks, while being fairly transparent + to users (you may also want to add some client side JavaScript to + override the text in the statusbar). <strong>Notice</strong>: + Many security experts believe that this form of protection does not deter spam-bots. + </li> + <li> + Redirect users to a splash page telling them they are leaving your + website. While this is poor usability practice, it is often mandated + in corporate environments. + </li> +</ul> +<p> + Prior to HTML Purifier 3.1.1, this directive also enabled the munging + of browsable external resources, which could break things if your redirection + script was a splash page or used <code>meta</code> tags. To revert to + previous behavior, please use %URI.MungeResources. +</p> +<p> + You may want to also use %URI.MungeSecretKey along with this directive + in order to enforce what URIs your redirector script allows. Open + redirector scripts can be a security risk and negatively affect the + reputation of your domain name. +</p> +<p> + Starting with HTML Purifier 3.1.1, there is also these substitutions: +</p> +<table> + <thead> + <tr> + <th>Key</th> + <th>Description</th> + <th>Example <code><a href=""></code></th> + </tr> + </thead> + <tbody> + <tr> + <td>%r</td> + <td>1 - The URI embeds a resource<br />(blank) - The URI is merely a link</td> + <td></td> + </tr> + <tr> + <td>%n</td> + <td>The name of the tag this URI came from</td> + <td>a</td> + </tr> + <tr> + <td>%m</td> + <td>The name of the attribute this URI came from</td> + <td>href</td> + </tr> + <tr> + <td>%p</td> + <td>The name of the CSS property this URI came from, or blank if irrelevant</td> + <td></td> + </tr> + </tbody> +</table> +<p> + Admittedly, these letters are somewhat arbitrary; the only stipulation + was that they couldn't be a through f. r is for resource (I would have preferred + e, but you take what you can get), n is for name, m + was picked because it came after n (and I couldn't use a), p is for + property. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt new file mode 100644 index 0000000..6fce0fd --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt @@ -0,0 +1,17 @@ +URI.MungeResources +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +<p> + If true, any URI munging directives like %URI.Munge + will also apply to embedded resources, such as <code><img src=""></code>. + Be careful enabling this directive if you have a redirector script + that does not use the <code>Location</code> HTTP header; all of your images + and other embedded resources will break. +</p> +<p> + <strong>Warning:</strong> It is strongly advised you use this in conjunction + %URI.MungeSecretKey to mitigate the security risk of an open redirector. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt new file mode 100644 index 0000000..0d00f62 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt @@ -0,0 +1,30 @@ +URI.MungeSecretKey +TYPE: string/null +VERSION: 3.1.1 +DEFAULT: NULL +--DESCRIPTION-- +<p> + This directive enables secure checksum generation along with %URI.Munge. + It should be set to a secure key that is not shared with anyone else. + The checksum can be placed in the URI using %t. Use of this checksum + affords an additional level of protection by allowing a redirector + to check if a URI has passed through HTML Purifier with this line: +</p> + +<pre>$checksum === sha1($secret_key . ':' . $url)</pre> + +<p> + If the output is TRUE, the redirector script should accept the URI. +</p> + +<p> + Please note that it would still be possible for an attacker to procure + secure hashes en-mass by abusing your website's Preview feature or the + like, but this service affords an additional level of protection + that should be combined with website blacklisting. +</p> + +<p> + Remember this has no effect if %URI.Munge is not on. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt new file mode 100644 index 0000000..23331a4 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt @@ -0,0 +1,9 @@ +URI.OverrideAllowedSchemes +TYPE: bool +DEFAULT: true +--DESCRIPTION-- +If this is set to true (which it is by default), you can override +%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme to the +registry. If false, you will also have to update that directive in order +to add more schemes. +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt new file mode 100644 index 0000000..7908483 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.SafeIframeRegexp.txt @@ -0,0 +1,22 @@ +URI.SafeIframeRegexp +TYPE: string/null +VERSION: 4.4.0 +DEFAULT: NULL +--DESCRIPTION-- +<p> + A PCRE regular expression that will be matched against an iframe URI. This is + a relatively inflexible scheme, but works well enough for the most common + use-case of iframes: embedded video. This directive only has an effect if + %HTML.SafeIframe is enabled. Here are some example values: +</p> +<ul> + <li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li> + <li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li> + <li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li> +</ul> +<p> + Note that this directive does not give you enough granularity to, say, disable + all <code>autoplay</code> videos. Pipe up on the HTML Purifier forums if this + is a capability you want. +</p> +--# vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/info.ini b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/info.ini new file mode 100644 index 0000000..5de4505 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/info.ini @@ -0,0 +1,3 @@ +name = "HTML Purifier" + +; vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/EntityLookup/entities.ser b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/EntityLookup/entities.ser new file mode 100644 index 0000000..e8b0812 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/EntityLookup/entities.ser @@ -0,0 +1 @@ +a:253:{s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:6:"there4";s:3:"∴";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"";s:3:"zwj";s:3:"";s:3:"lrm";s:3:"";s:3:"rlm";s:3:"";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:4:"sup2";s:2:"²";s:4:"sup3";s:2:"³";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"sup1";s:2:"¹";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"frac14";s:2:"¼";s:6:"frac12";s:2:"½";s:6:"frac34";s:2:"¾";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";}
\ No newline at end of file diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/ExtractStyleBlocks.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/ExtractStyleBlocks.php new file mode 100644 index 0000000..320aa4f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -0,0 +1,288 @@ +<?php + +// why is this a top level function? Because PHP 5.2.0 doesn't seem to +// understand how to interpret this filter if it's a static method. +// It's all really silly, but if we go this route it might be reasonable +// to coalesce all of these methods into one. +function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {} + +/** + * This filter extracts <style> blocks from input HTML, cleans them up + * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') + * so they can be used elsewhere in the document. + * + * @note + * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for + * sample usage. + * + * @note + * This filter can also be used on stylesheets not included in the + * document--something purists would probably prefer. Just directly + * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS() + */ +class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter +{ + + public $name = 'ExtractStyleBlocks'; + private $_styleMatches = array(); + private $_tidy; + + private $_id_attrdef; + private $_class_attrdef; + private $_enum_attrdef; + + public function __construct() { + $this->_tidy = new csstidy(); + $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); + $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); + $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus')); + } + + /** + * Save the contents of CSS blocks to style matches + * @param $matches preg_replace style $matches array + */ + protected function styleCallback($matches) { + $this->_styleMatches[] = $matches[1]; + } + + /** + * Removes inline <style> tags from HTML, saves them for later use + * @todo Extend to indicate non-text/css style blocks + */ + public function preFilter($html, $config, $context) { + $tidy = $config->get('Filter.ExtractStyleBlocks.TidyImpl'); + if ($tidy !== null) $this->_tidy = $tidy; + $html = preg_replace_callback('#<style(?:\s.*)?>(.+)</style>#isU', array($this, 'styleCallback'), $html); + $style_blocks = $this->_styleMatches; + $this->_styleMatches = array(); // reset + $context->register('StyleBlocks', $style_blocks); // $context must not be reused + if ($this->_tidy) { + foreach ($style_blocks as &$style) { + $style = $this->cleanCSS($style, $config, $context); + } + } + return $html; + } + + /** + * Takes CSS (the stuff found in <style>) and cleans it. + * @warning Requires CSSTidy <http://csstidy.sourceforge.net/> + * @param $css CSS styling to clean + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Cleaned CSS + */ + public function cleanCSS($css, $config, $context) { + // prepare scope + $scope = $config->get('Filter.ExtractStyleBlocks.Scope'); + if ($scope !== null) { + $scopes = array_map('trim', explode(',', $scope)); + } else { + $scopes = array(); + } + // remove comments from CSS + $css = trim($css); + if (strncmp('<!--', $css, 4) === 0) { + $css = substr($css, 4); + } + if (strlen($css) > 3 && substr($css, -3) == '-->') { + $css = substr($css, 0, -3); + } + $css = trim($css); + set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); + $this->_tidy->parse($css); + restore_error_handler(); + $css_definition = $config->getDefinition('CSS'); + $html_definition = $config->getDefinition('HTML'); + $new_css = array(); + foreach ($this->_tidy->css as $k => $decls) { + // $decls are all CSS declarations inside an @ selector + $new_decls = array(); + foreach ($decls as $selector => $style) { + $selector = trim($selector); + if ($selector === '') continue; // should not happen + // Parse the selector + // Here is the relevant part of the CSS grammar: + // + // ruleset + // : selector [ ',' S* selector ]* '{' ... + // selector + // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? + // combinator + // : '+' S* + // : '>' S* + // simple_selector + // : element_name [ HASH | class | attrib | pseudo ]* + // | [ HASH | class | attrib | pseudo ]+ + // element_name + // : IDENT | '*' + // ; + // class + // : '.' IDENT + // ; + // attrib + // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* + // [ IDENT | STRING ] S* ]? ']' + // ; + // pseudo + // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] + // ; + // + // For reference, here are the relevant tokens: + // + // HASH #{name} + // IDENT {ident} + // INCLUDES == + // DASHMATCH |= + // STRING {string} + // FUNCTION {ident}\( + // + // And the lexical scanner tokens + // + // name {nmchar}+ + // nmchar [_a-z0-9-]|{nonascii}|{escape} + // nonascii [\240-\377] + // escape {unicode}|\\[^\r\n\f0-9a-f] + // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? + // ident -?{nmstart}{nmchar*} + // nmstart [_a-z]|{nonascii}|{escape} + // string {string1}|{string2} + // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" + // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' + // + // We'll implement a subset (in order to reduce attack + // surface); in particular: + // + // - No Unicode support + // - No escapes support + // - No string support (by proxy no attrib support) + // - element_name is matched against allowed + // elements (some people might find this + // annoying...) + // - Pseudo-elements one of :first-child, :link, + // :visited, :active, :hover, :focus + + // handle ruleset + $selectors = array_map('trim', explode(',', $selector)); + $new_selectors = array(); + foreach ($selectors as $sel) { + // split on +, > and spaces + $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); + // even indices are chunks, odd indices are + // delimiters + $nsel = null; + $delim = null; // guaranteed to be non-null after + // two loop iterations + for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { + $x = $basic_selectors[$i]; + if ($i % 2) { + // delimiter + if ($x === ' ') { + $delim = ' '; + } else { + $delim = ' ' . $x . ' '; + } + } else { + // simple selector + $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); + $sdelim = null; + $nx = null; + for ($j = 0, $cc = count($components); $j < $cc; $j ++) { + $y = $components[$j]; + if ($j === 0) { + if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { + $nx = $y; + } else { + // $nx stays null; this matters + // if we don't manage to find + // any valid selector content, + // in which case we ignore the + // outer $delim + } + } elseif ($j % 2) { + // set delimiter + $sdelim = $y; + } else { + $attrdef = null; + if ($sdelim === '#') { + $attrdef = $this->_id_attrdef; + } elseif ($sdelim === '.') { + $attrdef = $this->_class_attrdef; + } elseif ($sdelim === ':') { + $attrdef = $this->_enum_attrdef; + } else { + throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); + } + $r = $attrdef->validate($y, $config, $context); + if ($r !== false) { + if ($r !== true) { + $y = $r; + } + if ($nx === null) { + $nx = ''; + } + $nx .= $sdelim . $y; + } + } + } + if ($nx !== null) { + if ($nsel === null) { + $nsel = $nx; + } else { + $nsel .= $delim . $nx; + } + } else { + // delimiters to the left of invalid + // basic selector ignored + } + } + } + if ($nsel !== null) { + if (!empty($scopes)) { + foreach ($scopes as $s) { + $new_selectors[] = "$s $nsel"; + } + } else { + $new_selectors[] = $nsel; + } + } + } + if (empty($new_selectors)) continue; + $selector = implode(', ', $new_selectors); + foreach ($style as $name => $value) { + if (!isset($css_definition->info[$name])) { + unset($style[$name]); + continue; + } + $def = $css_definition->info[$name]; + $ret = $def->validate($value, $config, $context); + if ($ret === false) unset($style[$name]); + else $style[$name] = $ret; + } + $new_decls[$selector] = $style; + } + $new_css[$k] = $new_decls; + } + // remove stuff that shouldn't be used, could be reenabled + // after security risks are analyzed + $this->_tidy->css = $new_css; + $this->_tidy->import = array(); + $this->_tidy->charset = null; + $this->_tidy->namespace = null; + $css = $this->_tidy->print->plain(); + // we are going to escape any special characters <>& to ensure + // that no funny business occurs (i.e. </style> in a font-family prop). + if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { + $css = str_replace( + array('<', '>', '&'), + array('\3C ', '\3E ', '\26 '), + $css + ); + } + return $css; + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/YouTube.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/YouTube.php new file mode 100644 index 0000000..23df221 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Filter/YouTube.php @@ -0,0 +1,39 @@ +<?php + +class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter +{ + + public $name = 'YouTube'; + + public function preFilter($html, $config, $context) { + $pre_regex = '#<object[^>]+>.+?'. + 'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s'; + $pre_replace = '<span class="youtube-embed">\1</span>'; + return preg_replace($pre_regex, $pre_replace, $html); + } + + public function postFilter($html, $config, $context) { + $post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#'; + return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); + } + + protected function armorUrl($url) { + return str_replace('--', '--', $url); + } + + protected function postFilterCallback($matches) { + $url = $this->armorUrl($matches[1]); + return '<object width="425" height="350" type="application/x-shockwave-flash" '. + 'data="http://www.youtube.com/'.$url.'">'. + '<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'. + '<!--[if IE]>'. + '<embed src="http://www.youtube.com/'.$url.'"'. + 'type="application/x-shockwave-flash"'. + 'wmode="transparent" width="425" height="350" />'. + '<![endif]-->'. + '</object>'; + + } +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/classes/en-x-test.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/classes/en-x-test.php new file mode 100644 index 0000000..d52fcb7 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/classes/en-x-test.php @@ -0,0 +1,12 @@ +<?php + +// private class for unit testing + +class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language +{ + + + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-test.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-test.php new file mode 100644 index 0000000..1c046f3 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-test.php @@ -0,0 +1,11 @@ +<?php + +// private language message file for unit testing purposes + +$fallback = 'en'; + +$messages = array( + 'HTMLPurifier' => 'HTML Purifier X' +); + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-testmini.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-testmini.php new file mode 100644 index 0000000..806c83f --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en-x-testmini.php @@ -0,0 +1,12 @@ +<?php + +// private language message file for unit testing purposes +// this language file has no class associated with it + +$fallback = 'en'; + +$messages = array( + 'HTMLPurifier' => 'HTML Purifier XNone' +); + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en.php new file mode 100644 index 0000000..8d7b573 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Language/messages/en.php @@ -0,0 +1,63 @@ +<?php + +$fallback = false; + +$messages = array( + +'HTMLPurifier' => 'HTML Purifier', + +// for unit testing purposes +'LanguageFactoryTest: Pizza' => 'Pizza', +'LanguageTest: List' => '$1', +'LanguageTest: Hash' => '$1.Keys; $1.Values', + +'Item separator' => ', ', +'Item separator last' => ' and ', // non-Harvard style + +'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', +'ErrorCollector: At line' => ' at line $line', +'ErrorCollector: Incidental errors' => 'Incidental errors', + +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Missing attribute key' => 'Attribute declaration has no key', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', +'Lexer: Extracted body' => 'Removed document metadata tags', + +'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', +'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', +'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', +'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', +'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', +'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed', +'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', +'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' => 'Trailing hyphen(s) in comment removed', +'Strategy_RemoveForeignElements: Hyphens in comment collapsed' => 'Double hyphens in comments are not allowed, and were collapsed into single hyphens', + +'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', +'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact', +'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', +'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', + +'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', +'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', +'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', +'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', + +'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys', +'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed', + +); + +$errorNames = array( + E_ERROR => 'Error', + E_WARNING => 'Warning', + E_NOTICE => 'Notice' +); + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Lexer/PH5P.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Lexer/PH5P.php new file mode 100644 index 0000000..faf00b8 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Lexer/PH5P.php @@ -0,0 +1,3904 @@ +<?php + +/** + * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library. + * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts. + * + * @note + * Recent changes to PHP's DOM extension have resulted in some fatal + * error conditions with the original version of PH5P. Pending changes, + * this lexer will punt to DirectLex if DOM throughs an exception. + */ + +class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex { + + public function tokenizeHTML($html, $config, $context) { + $new_html = $this->normalize($html, $config, $context); + $new_html = $this->wrapHTML($new_html, $config, $context); + try { + $parser = new HTML5($new_html); + $doc = $parser->save(); + } catch (DOMException $e) { + // Uh oh, it failed. Punt to DirectLex. + $lexer = new HTMLPurifier_Lexer_DirectLex(); + $context->register('PH5PError', $e); // save the error, so we can detect it + return $lexer->tokenizeHTML($html, $config, $context); // use original HTML + } + $tokens = array(); + $this->tokenizeDOM( + $doc->getElementsByTagName('html')->item(0)-> // <html> + getElementsByTagName('body')->item(0)-> // <body> + getElementsByTagName('div')->item(0) // <div> + , $tokens); + return $tokens; + } + +} + +/* + +Copyright 2007 Jeroen van der Meer <http://jero.net/> + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ + +class HTML5 { + private $data; + private $char; + private $EOF; + private $state; + private $tree; + private $token; + private $content_model; + private $escape = false; + private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', + 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', + 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', + 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', + 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', + 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', + 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', + 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', + 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', + 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', + 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', + 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', + 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', + 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', + 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', + 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', + 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', + 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', + 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', + 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', + 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', + 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', + 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', + 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', + 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', + 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', + 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', + 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', + 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', + 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', + 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', + 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', + 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', + 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', + 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', + 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', + 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', + 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', + 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', + 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', + 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', + 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); + + const PCDATA = 0; + const RCDATA = 1; + const CDATA = 2; + const PLAINTEXT = 3; + + const DOCTYPE = 0; + const STARTTAG = 1; + const ENDTAG = 2; + const COMMENT = 3; + const CHARACTR = 4; + const EOF = 5; + + public function __construct($data) { + + $this->data = $data; + $this->char = -1; + $this->EOF = strlen($data); + $this->tree = new HTML5TreeConstructer; + $this->content_model = self::PCDATA; + + $this->state = 'data'; + + while($this->state !== null) { + $this->{$this->state.'State'}(); + } + } + + public function save() { + return $this->tree->save(); + } + + private function char() { + return ($this->char < $this->EOF) + ? $this->data[$this->char] + : false; + } + + private function character($s, $l = 0) { + if($s + $l < $this->EOF) { + if($l === 0) { + return $this->data[$s]; + } else { + return substr($this->data, $s, $l); + } + } + } + + private function characters($char_class, $start) { + return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); + } + + private function dataState() { + // Consume the next input character + $this->char++; + $char = $this->char(); + + if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { + /* U+0026 AMPERSAND (&) + When the content model flag is set to one of the PCDATA or RCDATA + states: switch to the entity data state. Otherwise: treat it as per + the "anything else" entry below. */ + $this->state = 'entityData'; + + } elseif($char === '-') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is false, and there are at + least three characters before this one in the input stream, and the + last four characters in the input stream, including this one, are + U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, + and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */ + if(($this->content_model === self::RCDATA || $this->content_model === + self::CDATA) && $this->escape === false && + $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--') { + $this->escape = true; + } + + /* In any case, emit the input character as a character token. Stay + in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + /* U+003C LESS-THAN SIGN (<) */ + } elseif($char === '<' && ($this->content_model === self::PCDATA || + (($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === false))) { + /* When the content model flag is set to the PCDATA state: switch + to the tag open state. + + When the content model flag is set to either the RCDATA state or + the CDATA state and the escape flag is false: switch to the tag + open state. + + Otherwise: treat it as per the "anything else" entry below. */ + $this->state = 'tagOpen'; + + /* U+003E GREATER-THAN SIGN (>) */ + } elseif($char === '>') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is true, and the last three + characters in the input stream including this one are U+002D + HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), + set the escape flag to false. */ + if(($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === true && + $this->character($this->char, 3) === '-->') { + $this->escape = false; + } + + /* In any case, emit the input character as a character token. + Stay in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + } elseif($this->char === $this->EOF) { + /* EOF + Emit an end-of-file token. */ + $this->EOF(); + + } elseif($this->content_model === self::PLAINTEXT) { + /* When the content model flag is set to the PLAINTEXT state + THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of + the text and emit it as a character token. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => substr($this->data, $this->char) + )); + + $this->EOF(); + + } else { + /* Anything else + THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that + otherwise would also be treated as a character token and emit it + as a single character token. Stay in the data state. */ + $len = strcspn($this->data, '<&', $this->char); + $char = substr($this->data, $this->char, $len); + $this->char += $len - 1; + + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + $this->state = 'data'; + } + } + + private function entityDataState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + // Finally, switch to the data state. + $this->state = 'data'; + } + + private function tagOpenState() { + switch($this->content_model) { + case self::RCDATA: + case self::CDATA: + /* If the next input character is a U+002F SOLIDUS (/) character, + consume it and switch to the close tag open state. If the next + input character is not a U+002F SOLIDUS (/) character, emit a + U+003C LESS-THAN SIGN character token and switch to the data + state to process the next input character. */ + if($this->character($this->char + 1) === '/') { + $this->char++; + $this->state = 'closeTagOpen'; + + } else { + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->state = 'data'; + } + break; + + case self::PCDATA: + // If the content model flag is set to the PCDATA state + // Consume the next input character: + $this->char++; + $char = $this->char(); + + if($char === '!') { + /* U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. */ + $this->state = 'markupDeclarationOpen'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Switch to the close tag open state. */ + $this->state = 'closeTagOpen'; + + } elseif(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new start tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's code + point), then switch to the tag name state. (Don't emit the token + yet; further details will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::STARTTAG, + 'attr' => array() + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+003E GREATER-THAN SIGN character token. Switch to the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<>' + )); + + $this->state = 'data'; + + } elseif($char === '?') { + /* U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + + } else { + /* Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token and + reconsume the current input character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->char--; + $this->state = 'data'; + } + break; + } + } + + private function closeTagOpenState() { + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; + + if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && + (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', + $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { + /* If the content model flag is set to the RCDATA or CDATA states then + examine the next few characters. If they do not match the tag name of + the last start tag token emitted (case insensitively), or if they do but + they are not immediately followed by one of the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000B LINE TABULATION + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character + token, a U+002F SOLIDUS character token, and switch to the data state + to process the next input character. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '</' + )); + + $this->state = 'data'; + + } else { + /* Otherwise, if the content model flag is set to the PCDATA state, + or if the next few characters do match that tag name, consume the + next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new end tag token, set its tag name to the lowercase version + of the input character (add 0x0020 to the character's code point), then + switch to the tag name state. (Don't emit the token yet; further details + will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::ENDTAG + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. */ + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F + SOLIDUS character token. Reconsume the EOF character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '</' + )); + + $this->char--; + $this->state = 'data'; + + } else { + /* Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + } + } + } + + private function tagNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } else { + /* Anything else + Append the current input character to the current tag token's tag name. + Stay in the tag name state. */ + $this->token['name'] .= strtolower($char); + $this->state = 'tagName'; + } + } + + private function beforeAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Stay in the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function attributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's name. + Stay in the attribute name state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['name'] .= strtolower($char); + + $this->state = 'attributeName'; + } + } + + private function afterAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the + before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function beforeAttributeValueState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. */ + $this->state = 'attributeValueDoubleQuoted'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. */ + $this->char--; + $this->state = 'attributeValueUnquoted'; + + } elseif($char === '\'') { + /* U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. */ + $this->state = 'attributeValueSingleQuoted'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Switch to the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function attributeValueDoubleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('double'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (double-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueDoubleQuoted'; + } + } + + private function attributeValueSingleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '\'') { + /* U+0022 QUOTATION MARK (') + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('single'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (single-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueSingleQuoted'; + } + } + + private function attributeValueUnquotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState(); + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function entityInAttributeValueState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, append a U+0026 AMPERSAND character to the + // current attribute's value. Otherwise, emit the character token that + // was returned. + $char = (!$entity) + ? '&' + : $entity; + + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + } + + private function bogusCommentState() { + /* Consume every character up to the first U+003E GREATER-THAN SIGN + character (>) or the end of the file (EOF), whichever comes first. Emit + a comment token whose data is the concatenation of all the characters + starting from and including the character that caused the state machine + to switch into the bogus comment state, up to and including the last + consumed character before the U+003E character, if any, or up to the + end of the file otherwise. (If the comment was started by the end of + the file (EOF), the token is empty.) */ + $data = $this->characters('^>', $this->char); + $this->emitToken(array( + 'data' => $data, + 'type' => self::COMMENT + )); + + $this->char += strlen($data); + + /* Switch to the data state. */ + $this->state = 'data'; + + /* If the end of the file was reached, reconsume the EOF character. */ + if($this->char === $this->EOF) { + $this->char = $this->EOF - 1; + } + } + + private function markupDeclarationOpenState() { + /* If the next two characters are both U+002D HYPHEN-MINUS (-) + characters, consume those two characters, create a comment token whose + data is the empty string, and switch to the comment state. */ + if($this->character($this->char + 1, 2) === '--') { + $this->char += 2; + $this->state = 'comment'; + $this->token = array( + 'data' => null, + 'type' => self::COMMENT + ); + + /* Otherwise if the next seven chacacters are a case-insensitive match + for the word "DOCTYPE", then consume those characters and switch to the + DOCTYPE state. */ + } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { + $this->char += 7; + $this->state = 'doctype'; + + /* Otherwise, is is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. */ + } else { + $this->char++; + $this->state = 'bogusComment'; + } + } + + private function commentState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment dash state */ + $this->state = 'commentDash'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append the input character to the comment token's data. Stay in + the comment state. */ + $this->token['data'] .= $char; + } + } + + private function commentDashState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment end state */ + $this->state = 'commentEnd'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment state. */ + $this->token['data'] .= '-'.$char; + $this->state = 'comment'; + } + } + + private function commentEndState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '-') { + $this->token['data'] .= '-'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['data'] .= '--'.$char; + $this->state = 'comment'; + } + } + + private function doctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'beforeDoctypeName'; + + } else { + $this->char--; + $this->state = 'beforeDoctypeName'; + } + } + + private function beforeDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the before DOCTYPE name state. + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token = array( + 'name' => strtoupper($char), + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + + } elseif($char === '>') { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->char--; + $this->state = 'data'; + + } else { + $this->token = array( + 'name' => $char, + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + } + } + + private function doctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'AfterDoctypeName'; + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token['name'] .= strtoupper($char); + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['name'] .= $char; + } + + $this->token['error'] = ($this->token['name'] === 'HTML') + ? false + : true; + } + + private function afterDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the DOCTYPE name state. + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['error'] = true; + $this->state = 'bogusDoctype'; + } + } + + private function bogusDoctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + // Stay in the bogus DOCTYPE state. + } + } + + private function entity() { + $start = $this->char; + + // This section defines how to consume an entity. This definition is + // used when parsing entities in text and in attributes. + + // The behaviour depends on the identity of the next character (the + // one immediately after the U+0026 AMPERSAND character): + + switch($this->character($this->char + 1)) { + // U+0023 NUMBER SIGN (#) + case '#': + + // The behaviour further depends on the character after the + // U+0023 NUMBER SIGN: + switch($this->character($this->char + 1)) { + // U+0078 LATIN SMALL LETTER X + // U+0058 LATIN CAPITAL LETTER X + case 'x': + case 'X': + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER + // A, through to U+0046 LATIN CAPITAL LETTER F (in other + // words, 0-9, A-F, a-f). + $char = 1; + $char_class = '0-9A-Fa-f'; + break; + + // Anything else + default: + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE (i.e. just 0-9). + $char = 0; + $char_class = '0-9'; + break; + } + + // Consume as many characters as match the range of characters + // given above. + $this->char++; + $e_name = $this->characters($char_class, $this->char + $char + 1); + $entity = $this->character($start, $this->char); + $cond = strlen($e_name) > 0; + + // The rest of the parsing happens bellow. + break; + + // Anything else + default: + // Consume the maximum number of characters possible, with the + // consumed characters case-sensitively matching one of the + // identifiers in the first column of the entities table. + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); + $len = strlen($e_name); + + for($c = 1; $c <= $len; $c++) { + $id = substr($e_name, 0, $c); + $this->char++; + + if(in_array($id, $this->entities)) { + if ($e_name[$c-1] !== ';') { + if ($c < $len && $e_name[$c] == ';') { + $this->char++; // consume extra semicolon + } + } + $entity = $id; + break; + } + } + + $cond = isset($entity); + // The rest of the parsing happens bellow. + break; + } + + if(!$cond) { + // If no match can be made, then this is a parse error. No + // characters are consumed, and nothing is returned. + $this->char = $start; + return false; + } + + // Return a character token for the character corresponding to the + // entity name (as given by the second column of the entities table). + return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); + } + + private function emitToken($token) { + $emit = $this->tree->emitToken($token); + + if(is_int($emit)) { + $this->content_model = $emit; + + } elseif($token['type'] === self::ENDTAG) { + $this->content_model = self::PCDATA; + } + } + + private function EOF() { + $this->state = null; + $this->tree->emitToken(array( + 'type' => self::EOF + )); + } +} + +class HTML5TreeConstructer { + public $stack = array(); + + private $phase; + private $mode; + private $dom; + private $foster_parent = null; + private $a_formatting = array(); + + private $head_pointer = null; + private $form_pointer = null; + + private $scoping = array('button','caption','html','marquee','object','table','td','th'); + private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); + private $special = array('address','area','base','basefont','bgsound', + 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', + 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', + 'h6','head','hr','iframe','image','img','input','isindex','li','link', + 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', + 'option','p','param','plaintext','pre','script','select','spacer','style', + 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); + + // The different phases. + const INIT_PHASE = 0; + const ROOT_PHASE = 1; + const MAIN_PHASE = 2; + const END_PHASE = 3; + + // The different insertion modes for the main phase. + const BEFOR_HEAD = 0; + const IN_HEAD = 1; + const AFTER_HEAD = 2; + const IN_BODY = 3; + const IN_TABLE = 4; + const IN_CAPTION = 5; + const IN_CGROUP = 6; + const IN_TBODY = 7; + const IN_ROW = 8; + const IN_CELL = 9; + const IN_SELECT = 10; + const AFTER_BODY = 11; + const IN_FRAME = 12; + const AFTR_FRAME = 13; + + // The different types of elements. + const SPECIAL = 0; + const SCOPING = 1; + const FORMATTING = 2; + const PHRASING = 3; + + const MARKER = 0; + + public function __construct() { + $this->phase = self::INIT_PHASE; + $this->mode = self::BEFOR_HEAD; + $this->dom = new DOMDocument; + + $this->dom->encoding = 'UTF-8'; + $this->dom->preserveWhiteSpace = true; + $this->dom->substituteEntities = true; + $this->dom->strictErrorChecking = false; + } + + // Process tag tokens + public function emitToken($token) { + switch($this->phase) { + case self::INIT_PHASE: return $this->initPhase($token); break; + case self::ROOT_PHASE: return $this->rootElementPhase($token); break; + case self::MAIN_PHASE: return $this->mainPhase($token); break; + case self::END_PHASE : return $this->trailingEndPhase($token); break; + } + } + + private function initPhase($token) { + /* Initially, the tree construction stage must handle each token + emitted from the tokenisation stage as follows: */ + + /* A DOCTYPE token that is marked as being in error + A comment token + A start tag token + An end tag token + A character token that is not one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE + An end-of-file token */ + if((isset($token['error']) && $token['error']) || + $token['type'] === HTML5::COMMENT || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF || + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { + /* This specification does not define how to handle this case. In + particular, user agents may ignore the entirety of this specification + altogether for such documents, and instead invoke special parse modes + with a greater emphasis on backwards compatibility. */ + + $this->phase = self::ROOT_PHASE; + return $this->rootElementPhase($token); + + /* A DOCTYPE token marked as being correct */ + } elseif(isset($token['error']) && !$token['error']) { + /* Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token (which will be + "HTML"), and the other attributes specific to DocumentType objects + set to null, empty lists, or the empty string as appropriate. */ + $doctype = new DOMDocumentType(null, null, 'HTML'); + + /* Then, switch to the root element phase of the tree construction + stage. */ + $this->phase = self::ROOT_PHASE; + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', + $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + } + } + + private function rootElementPhase($token) { + /* After the initial phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED + (FF), or U+0020 SPACE + A start tag token + An end tag token + An end-of-file token */ + } elseif(($token['type'] === HTML5::CHARACTR && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF) { + /* Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Switch to the main + phase and reprocess the current token. */ + $html = $this->dom->createElement('html'); + $this->dom->appendChild($html); + $this->stack[] = $html; + + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + } + } + + private function mainPhase($token) { + /* Tokens in the main phase must be handled as follows: */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A start tag token with the tag name "html" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { + /* If this start tag token was not the first start tag token, then + it is a parse error. */ + + /* For each attribute on the token, check to see if the attribute + is already present on the top element of the stack of open elements. + If it is not, add the attribute and its corresponding value to that + element. */ + foreach($token['attr'] as $attr) { + if(!$this->stack[0]->hasAttribute($attr['name'])) { + $this->stack[0]->setAttribute($attr['name'], $attr['value']); + } + } + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Anything else. */ + } else { + /* Depends on the insertion mode: */ + switch($this->mode) { + case self::BEFOR_HEAD: return $this->beforeHead($token); break; + case self::IN_HEAD: return $this->inHead($token); break; + case self::AFTER_HEAD: return $this->afterHead($token); break; + case self::IN_BODY: return $this->inBody($token); break; + case self::IN_TABLE: return $this->inTable($token); break; + case self::IN_CAPTION: return $this->inCaption($token); break; + case self::IN_CGROUP: return $this->inColumnGroup($token); break; + case self::IN_TBODY: return $this->inTableBody($token); break; + case self::IN_ROW: return $this->inRow($token); break; + case self::IN_CELL: return $this->inCell($token); break; + case self::IN_SELECT: return $this->inSelect($token); break; + case self::AFTER_BODY: return $this->afterBody($token); break; + case self::IN_FRAME: return $this->inFrameset($token); break; + case self::AFTR_FRAME: return $this->afterFrameset($token); break; + case self::END_PHASE: return $this->trailingEndPhase($token); break; + } + } + } + + private function beforeHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "head" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { + /* Create an element for the token, append the new element to the + current node and push it onto the stack of open elements. */ + $element = $this->insertElement($token); + + /* Set the head element pointer to this new element node. */ + $this->head_pointer = $element; + + /* Change the insertion mode to "in head". */ + $this->mode = self::IN_HEAD; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title". Or an end tag with the tag name "html". + Or a character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or any other start tag token */ + } elseif($token['type'] === HTML5::STARTTAG || + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || + ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', + $token['data']))) { + /* Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. */ + $this->beforeHead(array( + 'name' => 'head', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inHead($token); + + /* Any other end tag */ + } elseif($token['type'] === HTML5::ENDTAG) { + /* Parse error. Ignore the token. */ + } + } + + private function inHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. + + THIS DIFFERS FROM THE SPEC: If the current node is either a title, style + or script element, append the character to the current node regardless + of its content. */ + if(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( + $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, + array('title', 'style', 'script')))) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('title', 'style', 'script'))) { + array_pop($this->stack); + return HTML5::PCDATA; + + /* A start tag with the tag name "title" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $element = $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the RCDATA state. */ + return HTML5::RCDATA; + + /* A start tag with the tag name "style" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "script" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { + /* Create an element for the token. */ + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "base", "link", or "meta" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta'))) { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + array_pop($this->stack); + + } else { + $this->insertElement($token); + } + + /* An end tag with the tag name "head" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { + /* If the current node is a head element, pop the current node off + the stack of open elements. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + array_pop($this->stack); + + /* Otherwise, this is a parse error. */ + } else { + // k + } + + /* Change the insertion mode to "after head". */ + $this->mode = self::AFTER_HEAD; + + /* A start tag with the tag name "head" or an end tag except "html". */ + } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* If the current node is a head element, act as if an end tag + token with the tag name "head" had been seen. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + $this->inHead(array( + 'name' => 'head', + 'type' => HTML5::ENDTAG + )); + + /* Otherwise, change the insertion mode to "after head". */ + } else { + $this->mode = self::AFTER_HEAD; + } + + /* Then, reprocess the current token. */ + return $this->afterHead($token); + } + } + + private function afterHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "body" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { + /* Insert a body element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in body". */ + $this->mode = self::IN_BODY; + + /* A start tag token with the tag name "frameset" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { + /* Insert a frameset element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in frameset". */ + $this->mode = self::IN_FRAME; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta', 'script', 'style', 'title'))) { + /* Parse error. Switch the insertion mode back to "in head" and + reprocess the token. */ + $this->mode = self::IN_HEAD; + return $this->inHead($token); + + /* Anything else */ + } else { + /* Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. */ + $this->afterHead(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inBody($token); + } + } + + private function inBody($token) { + /* Handle the token as follows: */ + + switch($token['type']) { + /* A character token */ + case HTML5::CHARACTR: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + break; + + /* A comment token */ + case HTML5::COMMENT: + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + break; + + case HTML5::STARTTAG: + switch($token['name']) { + /* A start tag token whose tag name is one of: "script", + "style" */ + case 'script': case 'style': + /* Process the token as if the insertion mode had been "in + head". */ + return $this->inHead($token); + break; + + /* A start tag token whose tag name is one of: "base", "link", + "meta", "title" */ + case 'base': case 'link': case 'meta': case 'title': + /* Parse error. Process the token as if the insertion mode + had been "in head". */ + return $this->inHead($token); + break; + + /* A start tag token with the tag name "body" */ + case 'body': + /* Parse error. If the second element on the stack of open + elements is not a body element, or, if the stack of open + elements has only one node on it, then ignore the token. + (innerHTML case) */ + if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { + // Ignore + + /* Otherwise, for each attribute on the token, check to see + if the attribute is already present on the body element (the + second element) on the stack of open elements. If it is not, + add the attribute and its corresponding value to that + element. */ + } else { + foreach($token['attr'] as $attr) { + if(!$this->stack[1]->hasAttribute($attr['name'])) { + $this->stack[1]->setAttribute($attr['name'], $attr['value']); + } + } + } + break; + + /* A start tag whose tag name is one of: "address", + "blockquote", "center", "dir", "div", "dl", "fieldset", + "listing", "menu", "ol", "p", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'p': case 'ul': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "form" */ + case 'form': + /* If the form element pointer is not null, ignore the + token with a parse error. */ + if($this->form_pointer !== null) { + // Ignore. + + /* Otherwise: */ + } else { + /* If the stack of open elements has a p element in + scope, then act as if an end tag with the tag name p + had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token, and set the + form element pointer to point to the element created. */ + $element = $this->insertElement($token); + $this->form_pointer = $element; + } + break; + + /* A start tag whose tag name is "li", "dd" or "dt" */ + case 'li': case 'dd': case 'dt': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + $stack_length = count($this->stack) - 1; + + for($n = $stack_length; 0 <= $n; $n--) { + /* 1. Initialise node to be the current node (the + bottommost node of the stack). */ + $stop = false; + $node = $this->stack[$n]; + $cat = $this->getElementCategory($node->tagName); + + /* 2. If node is an li, dd or dt element, then pop all + the nodes from the current node up to node, including + node, then stop this algorithm. */ + if($token['name'] === $node->tagName || ($token['name'] !== 'li' + && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { + for($x = $stack_length; $x >= $n ; $x--) { + array_pop($this->stack); + } + + break; + } + + /* 3. If node is not in the formatting category, and is + not in the phrasing category, and is not an address or + div element, then stop this algorithm. */ + if($cat !== self::FORMATTING && $cat !== self::PHRASING && + $node->tagName !== 'address' && $node->tagName !== 'div') { + break; + } + } + + /* Finally, insert an HTML element with the same tag + name as the token's. */ + $this->insertElement($token); + break; + + /* A start tag token whose tag name is "plaintext" */ + case 'plaintext': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + return HTML5::PLAINTEXT; + break; + + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + this is a parse error; pop elements from the stack until an + element with one of those tag names has been popped from the + stack. */ + while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { + array_pop($this->stack); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "a" */ + case 'a': + /* If the list of active formatting elements contains + an element whose tag name is "a" between the end of the + list and the last marker on the list (or the start of + the list if there is no marker on the list), then this + is a parse error; act as if an end tag with the tag name + "a" had been seen, then remove that element from the list + of active formatting elements and the stack of open + elements if the end tag didn't already remove it (it + might not have if the element is not in table scope). */ + $leng = count($this->a_formatting); + + for($n = $leng - 1; $n >= 0; $n--) { + if($this->a_formatting[$n] === self::MARKER) { + break; + + } elseif($this->a_formatting[$n]->nodeName === 'a') { + $this->emitToken(array( + 'name' => 'a', + 'type' => HTML5::ENDTAG + )); + break; + } + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag whose tag name is one of: "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'b': case 'big': case 'em': case 'font': case 'i': + case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag token whose tag name is "button" */ + case 'button': + /* If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. (We don't + do that. Unnecessary.) */ + if($this->elementInScope('button')) { + $this->inBody(array( + 'name' => 'button', + 'type' => HTML5::ENDTAG + )); + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is one of: "marquee", "object" */ + case 'marquee': case 'object': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is "xmp" */ + case 'xmp': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Switch the content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "table" */ + case 'table': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + break; + + /* A start tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'img': case 'param': case 'spacer': + case 'wbr': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "hr" */ + case 'hr': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "image" */ + case 'image': + /* Parse error. Change the token's tag name to "img" and + reprocess it. (Don't ask.) */ + $token['name'] = 'img'; + return $this->inBody($token); + break; + + /* A start tag whose tag name is "input" */ + case 'input': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an input element for the token. */ + $element = $this->insertElement($token, false); + + /* If the form element pointer is not null, then associate the + input element with the form element pointed to by the form + element pointer. */ + $this->form_pointer !== null + ? $this->form_pointer->appendChild($element) + : end($this->stack)->appendChild($element); + + /* Pop that input element off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "isindex" */ + case 'isindex': + /* Parse error. */ + // w/e + + /* If the form element pointer is not null, + then ignore the token. */ + if($this->form_pointer === null) { + /* Act as if a start tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a stream of character tokens had been seen. */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if a start tag token with the tag name "input" + had been seen, with all the attributes from the "isindex" + token, except with the "name" attribute set to the value + "isindex" (ignoring any explicit "name" attribute). */ + $attr = $token['attr']; + $attr[] = array('name' => 'name', 'value' => 'isindex'); + + $this->inBody(array( + 'name' => 'input', + 'type' => HTML5::STARTTAG, + 'attr' => $attr + )); + + /* Act as if a stream of character tokens had been seen + (see below for what they should say). */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if an end tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'form', + 'type' => HTML5::ENDTAG + )); + } + break; + + /* A start tag whose tag name is "textarea" */ + case 'textarea': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the + RCDATA state. */ + return HTML5::RCDATA; + break; + + /* A start tag whose tag name is one of: "iframe", "noembed", + "noframes" */ + case 'iframe': case 'noembed': case 'noframes': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "select" */ + case 'select': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in select". */ + $this->mode = self::IN_SELECT; + break; + + /* A start or end tag whose tag name is one of: "caption", "col", + "colgroup", "frame", "frameset", "head", "option", "optgroup", + "tbody", "td", "tfoot", "th", "thead", "tr". */ + case 'caption': case 'col': case 'colgroup': case 'frame': + case 'frameset': case 'head': case 'option': case 'optgroup': + case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': + case 'tr': + // Parse error. Ignore the token. + break; + + /* A start or end tag whose tag name is one of: "event-source", + "section", "nav", "article", "aside", "header", "footer", + "datagrid", "command" */ + case 'event-source': case 'section': case 'nav': case 'article': + case 'aside': case 'header': case 'footer': case 'datagrid': + case 'command': + // Work in progress! + break; + + /* A start tag token not covered by the previous entries */ + default: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + $this->insertElement($token, true, true); + break; + } + break; + + case HTML5::ENDTAG: + switch($token['name']) { + /* An end tag with the tag name "body" */ + case 'body': + /* If the second element in the stack of open elements is + not a body element, this is a parse error. Ignore the token. + (innerHTML case) */ + if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { + // Ignore. + + /* If the current node is not the body element, then this + is a parse error. */ + } elseif(end($this->stack)->nodeName !== 'body') { + // Parse error. + } + + /* Change the insertion mode to "after body". */ + $this->mode = self::AFTER_BODY; + break; + + /* An end tag with the tag name "html" */ + case 'html': + /* Act as if an end tag with tag name "body" had been seen, + then, if that token wasn't ignored, reprocess the current + token. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::ENDTAG + )); + + return $this->afterBody($token); + break; + + /* An end tag whose tag name is one of: "address", "blockquote", + "center", "dir", "div", "dl", "fieldset", "listing", "menu", + "ol", "pre", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'pre': case 'ul': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with + the same tag name as that of the token, then this + is a parse error. */ + // w/e + + /* If the stack of open elements has an element in + scope with the same tag name as that of the token, + then pop elements from this stack until an element + with that tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is "form" */ + case 'form': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + } + + if(end($this->stack)->nodeName !== $token['name']) { + /* Now, if the current node is not an element with the + same tag name as that of the token, then this is a parse + error. */ + // w/e + + } else { + /* Otherwise, if the current node is an element with + the same tag name as that of the token pop that element + from the stack. */ + array_pop($this->stack); + } + + /* In any case, set the form element pointer to null. */ + $this->form_pointer = null; + break; + + /* An end tag whose tag name is "p" */ + case 'p': + /* If the stack of open elements has a p element in scope, + then generate implied end tags, except for p elements. */ + if($this->elementInScope('p')) { + $this->generateImpliedEndTags(array('p')); + + /* If the current node is not a p element, then this is + a parse error. */ + // k + + /* If the stack of open elements has a p element in + scope, then pop elements from this stack until the stack + no longer has a p element in scope. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->elementInScope('p')) { + array_pop($this->stack); + + } else { + break; + } + } + } + break; + + /* An end tag whose tag name is "dd", "dt", or "li" */ + case 'dd': case 'dt': case 'li': + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + generate implied end tags, except for elements with the + same tag name as the token. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(array($token['name'])); + + /* If the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + pop elements from this stack until an element with that + tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + generate implied end tags. */ + if($this->elementInScope($elements)) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as that of the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has in scope an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or + "h6", then pop elements from the stack until an element + with one of those tag names has been popped from the stack. */ + while($this->elementInScope($elements)) { + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "a", "b", "big", "em", + "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'a': case 'b': case 'big': case 'em': case 'font': + case 'i': case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* 1. Let the formatting element be the last element in + the list of active formatting elements that: + * is between the end of the list and the last scope + marker in the list, if any, or the start of the list + otherwise, and + * has the same tag name as the token. + */ + while(true) { + for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { + if($this->a_formatting[$a] === self::MARKER) { + break; + + } elseif($this->a_formatting[$a]->tagName === $token['name']) { + $formatting_element = $this->a_formatting[$a]; + $in_stack = in_array($formatting_element, $this->stack, true); + $fe_af_pos = $a; + break; + } + } + + /* If there is no such node, or, if that node is + also in the stack of open elements but the element + is not in scope, then this is a parse error. Abort + these steps. The token is ignored. */ + if(!isset($formatting_element) || ($in_stack && + !$this->elementInScope($token['name']))) { + break; + + /* Otherwise, if there is such a node, but that node + is not in the stack of open elements, then this is a + parse error; remove the element from the list, and + abort these steps. */ + } elseif(isset($formatting_element) && !$in_stack) { + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 2. Let the furthest block be the topmost node in the + stack of open elements that is lower in the stack + than the formatting element, and is not an element in + the phrasing or formatting categories. There might + not be one. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $length = count($this->stack); + + for($s = $fe_s_pos + 1; $s < $length; $s++) { + $category = $this->getElementCategory($this->stack[$s]->nodeName); + + if($category !== self::PHRASING && $category !== self::FORMATTING) { + $furthest_block = $this->stack[$s]; + } + } + + /* 3. If there is no furthest block, then the UA must + skip the subsequent steps and instead just pop all + the nodes from the bottom of the stack of open + elements, from the current node up to the formatting + element, and remove the formatting element from the + list of active formatting elements. */ + if(!isset($furthest_block)) { + for($n = $length - 1; $n >= $fe_s_pos; $n--) { + array_pop($this->stack); + } + + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 4. Let the common ancestor be the element + immediately above the formatting element in the stack + of open elements. */ + $common_ancestor = $this->stack[$fe_s_pos - 1]; + + /* 5. If the furthest block has a parent node, then + remove the furthest block from its parent node. */ + if($furthest_block->parentNode !== null) { + $furthest_block->parentNode->removeChild($furthest_block); + } + + /* 6. Let a bookmark note the position of the + formatting element in the list of active formatting + elements relative to the elements on either side + of it in the list. */ + $bookmark = $fe_af_pos; + + /* 7. Let node and last node be the furthest block. + Follow these steps: */ + $node = $furthest_block; + $last_node = $furthest_block; + + while(true) { + for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { + /* 7.1 Let node be the element immediately + prior to node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 7.2 If node is not in the list of active + formatting elements, then remove node from + the stack of open elements and then go back + to step 1. */ + if(!in_array($node, $this->a_formatting, true)) { + unset($this->stack[$n]); + $this->stack = array_merge($this->stack); + + } else { + break; + } + } + + /* 7.3 Otherwise, if node is the formatting + element, then go to the next step in the overall + algorithm. */ + if($node === $formatting_element) { + break; + + /* 7.4 Otherwise, if last node is the furthest + block, then move the aforementioned bookmark to + be immediately after the node in the list of + active formatting elements. */ + } elseif($last_node === $furthest_block) { + $bookmark = array_search($node, $this->a_formatting, true) + 1; + } + + /* 7.5 If node has any children, perform a + shallow clone of node, replace the entry for + node in the list of active formatting elements + with an entry for the clone, replace the entry + for node in the stack of open elements with an + entry for the clone, and let node be the clone. */ + if($node->hasChildNodes()) { + $clone = $node->cloneNode(); + $s_pos = array_search($node, $this->stack, true); + $a_pos = array_search($node, $this->a_formatting, true); + + $this->stack[$s_pos] = $clone; + $this->a_formatting[$a_pos] = $clone; + $node = $clone; + } + + /* 7.6 Insert last node into node, first removing + it from its previous parent node if any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $node->appendChild($last_node); + + /* 7.7 Let last node be node. */ + $last_node = $node; + } + + /* 8. Insert whatever last node ended up being in + the previous step into the common ancestor node, + first removing it from its previous parent node if + any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $common_ancestor->appendChild($last_node); + + /* 9. Perform a shallow clone of the formatting + element. */ + $clone = $formatting_element->cloneNode(); + + /* 10. Take all of the child nodes of the furthest + block and append them to the clone created in the + last step. */ + while($furthest_block->hasChildNodes()) { + $child = $furthest_block->firstChild; + $furthest_block->removeChild($child); + $clone->appendChild($child); + } + + /* 11. Append that clone to the furthest block. */ + $furthest_block->appendChild($clone); + + /* 12. Remove the formatting element from the list + of active formatting elements, and insert the clone + into the list of active formatting elements at the + position of the aforementioned bookmark. */ + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); + + /* 13. Remove the formatting element from the stack + of open elements, and insert the clone into the stack + of open elements immediately after (i.e. in a more + deeply nested position than) the position of the + furthest block in that stack. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $fb_s_pos = array_search($furthest_block, $this->stack, true); + unset($this->stack[$fe_s_pos]); + + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); + $this->stack = array_merge($s_part1, array($clone), $s_part2); + + /* 14. Jump back to step 1 in this series of steps. */ + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); + } + break; + + /* An end tag token whose tag name is one of: "button", + "marquee", "object" */ + case 'button': case 'marquee': case 'object': + /* If the stack of open elements has an element in scope whose + tag name matches the tag name of the token, then generate implied + tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // k + + /* Now, if the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then pop + elements from the stack until that element has been popped from + the stack, and clear the list of active formatting elements up + to the last marker. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); + + for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { + array_pop($this->a_formatting); + } + } + break; + + /* Or an end tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "hr", "iframe", "image", "img", + "input", "isindex", "noembed", "noframes", "param", "select", + "spacer", "table", "textarea", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'hr': case 'iframe': case 'image': + case 'img': case 'input': case 'isindex': case 'noembed': + case 'noframes': case 'param': case 'select': case 'spacer': + case 'table': case 'textarea': case 'wbr': + // Parse error. Ignore the token. + break; + + /* An end tag token not covered by the previous entries */ + default: + for($n = count($this->stack) - 1; $n >= 0; $n--) { + /* Initialise node to be the current node (the bottommost + node of the stack). */ + $node = end($this->stack); + + /* If node has the same tag name as the end tag token, + then: */ + if($token['name'] === $node->nodeName) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* If the tag name of the end tag token does not + match the tag name of the current node, this is a + parse error. */ + // k + + /* Pop all the nodes from the current node up to + node, including node, then stop this algorithm. */ + for($x = count($this->stack) - $n; $x >= $n; $x--) { + array_pop($this->stack); + } + + } else { + $category = $this->getElementCategory($node); + + if($category !== self::SPECIAL && $category !== self::SCOPING) { + /* Otherwise, if node is in neither the formatting + category nor the phrasing category, then this is a + parse error. Stop this algorithm. The end tag token + is ignored. */ + return false; + } + } + } + break; + } + break; + } + } + + private function inTable($token) { + $clear = array('html', 'table'); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "caption" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'caption') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + + /* Insert an HTML element for the token, then switch the + insertion mode to "in caption". */ + $this->insertElement($token); + $this->mode = self::IN_CAPTION; + + /* A start tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'colgroup') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the + insertion mode to "in column group". */ + $this->insertElement($token); + $this->mode = self::IN_CGROUP; + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'col') { + $this->inTable(array( + 'name' => 'colgroup', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + $this->inColumnGroup($token); + + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('tbody', 'tfoot', 'thead'))) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in table body". */ + $this->insertElement($token); + $this->mode = self::IN_TBODY; + + /* A start tag whose tag name is one of: "td", "th", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && + in_array($token['name'], array('td', 'th', 'tr'))) { + /* Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. */ + $this->inTable(array( + 'name' => 'tbody', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inTableBody($token); + + /* A start tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'table') { + /* Parse error. Act as if an end tag token with the tag name "table" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inTable(array( + 'name' => 'table', + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + + /* An end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + return false; + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a table element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a table element has been + popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'table') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', + 'tfoot', 'th', 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Parse error. Process the token as if the insertion mode was "in + body", with the following exception: */ + + /* If the current node is a table, tbody, tfoot, thead, or tr + element, then, whenever a node would be inserted into the current + node, it must instead be inserted into the foster parent element. */ + if(in_array(end($this->stack)->nodeName, + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* The foster parent element is the parent element of the last + table element in the stack of open elements, if there is a + table element and it has such a parent element. If there is no + table element in the stack of open elements (innerHTML case), + then the foster parent element is the first element in the + stack of open elements (the html element). Otherwise, if there + is a table element in the stack of open elements, but the last + table element in the stack of open elements has no parent, or + its parent node is not an element, then the foster parent + element is the element before the last table element in the + stack of open elements. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table') { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $table->parentNode !== null) { + $this->foster_parent = $table->parentNode; + + } elseif(!isset($table)) { + $this->foster_parent = $this->stack[0]; + + } elseif(isset($table) && ($table->parentNode === null || + $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { + $this->foster_parent = $this->stack[$n - 1]; + } + } + + $this->inBody($token); + } + } + + private function inCaption($token) { + /* An end tag whose tag name is "caption" */ + if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a caption element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a caption element has + been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === 'caption') { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag + name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table')) { + /* Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inCaption(array( + 'name' => 'caption', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + + /* An end tag whose tag name is one of: "body", "col", "colgroup", + "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', + 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inColumnGroup($token) { + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { + /* Insert a col element for the token. Immediately pop the current + node off the stack of open elements. */ + $this->insertElement($token); + array_pop($this->stack); + + /* An end tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'colgroup') { + /* If the current node is the root html element, then this is a + parse error, ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + /* Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". */ + } else { + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* An end tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current token. */ + $this->inColumnGroup(array( + 'name' => 'colgroup', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + } + } + + private function inTableBody($token) { + $clear = array('tbody', 'tfoot', 'thead', 'html'); + + /* A start tag whose tag name is "tr" */ + if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Insert a tr element for the token, then switch the insertion + mode to "in row". */ + $this->insertElement($token); + $this->mode = self::IN_ROW; + + /* A start tag whose tag name is one of: "th", "td" */ + } elseif($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inTableBody(array( + 'name' => 'tr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inRow($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node from the stack of open elements. Switch + the insertion mode to "in table". */ + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { + /* If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (innerHTML case) */ + if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Act as if an end tag with the same tag name as the current + node ("tbody", "tfoot", or "thead") had been seen, then + reprocess the current token. */ + $this->inTableBody(array( + 'name' => end($this->stack)->nodeName, + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inRow($token) { + $clear = array('tr', 'html'); + + /* A start tag whose tag name is one of: "th", "td" */ + if($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in cell". */ + $this->insertElement($token); + $this->mode = self::IN_CELL; + + /* Insert a marker at the end of the list of active formatting + elements. */ + $this->a_formatting[] = self::MARKER; + + /* An end tag whose tag name is "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node (which will be a tr element) from the + stack of open elements. Switch the insertion mode to "in table + body". */ + array_pop($this->stack); + $this->mode = self::IN_TBODY; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Otherwise, act as if an end tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inCell($token) { + /* An end tag whose tag name is one of: "td", "th" */ + if($token['type'] === HTML5::ENDTAG && + ($token['name'] === 'td' || $token['name'] === 'th')) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is a + parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Generate implied end tags, except for elements with the same + tag name as the token. */ + $this->generateImpliedEndTags(array($token['name'])); + + /* Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. */ + // k + + /* Pop elements from this stack until an element with the same + tag name as the token has been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === $token['name']) { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in row". (The current node + will be a tr element at this point.) */ + $this->mode = self::IN_ROW; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html'))) { + /* Parse error. Ignore the token. */ + + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can only + happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), + then this is a parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inSelect($token) { + /* Handle the token as follows: */ + + /* A character token */ + if($token['type'] === HTML5::CHARACTR) { + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'option') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* A start tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'optgroup') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. */ + if(end($this->stack)->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'optgroup', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* An end tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'optgroup') { + /* First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an optgroup + element, then act as if an end tag with the tag name "option" had + been seen. */ + $elements_in_stack = count($this->stack); + + if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if($this->stack[$elements_in_stack - 1] === 'optgroup') { + array_pop($this->stack); + } + + /* An end tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'option') { + /* If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if(end($this->stack)->nodeName === 'option') { + array_pop($this->stack); + } + + /* An end tag whose tag name is "select" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'select') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // w/e + + /* Otherwise: */ + } else { + /* Pop elements from the stack of open elements until a select + element has been popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'select') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* A start tag whose tag name is "select" */ + } elseif($token['name'] === 'select' && + $token['type'] === HTML5::STARTTAG) { + /* Parse error. Act as if the token had been an end tag with the + tag name "select" instead. */ + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + /* An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" */ + } elseif(in_array($token['name'], array('caption', 'table', 'tbody', + 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { + /* Parse error. */ + // w/e + + /* If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end tag + with the tag name "select" had been seen, and reprocess the token. + Otherwise, ignore the token. */ + if($this->elementInScope($token['name'], true)) { + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + $this->mainPhase($token); + } + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterBody($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed if the insertion mode + was "in body". */ + $this->inBody($token); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->stack[0]->appendChild($comment); + + /* An end tag with the tag name "html" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { + /* If the parser was originally created in order to handle the + setting of an element's innerHTML attribute, this is a parse error; + ignore the token. (The element will be an html element in this + case.) (innerHTML case) */ + + /* Otherwise, switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* Anything else */ + } else { + /* Parse error. Set the insertion mode to "in body" and reprocess + the token. */ + $this->mode = self::IN_BODY; + return $this->inBody($token); + } + } + + private function inFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::STARTTAG) { + $this->insertElement($token); + + /* An end tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::ENDTAG) { + /* If the current node is the root html element, then this is a + parse error; ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + } else { + /* Otherwise, pop the current node from the stack of open + elements. */ + array_pop($this->stack); + + /* If the parser was not originally created in order to handle + the setting of an element's innerHTML attribute (innerHTML case), + and the current node is no longer a frameset element, then change + the insertion mode to "after frameset". */ + $this->mode = self::AFTR_FRAME; + } + + /* A start tag with the tag name "frame" */ + } elseif($token['name'] === 'frame' && + $token['type'] === HTML5::STARTTAG) { + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* An end tag with the tag name "html" */ + } elseif($token['name'] === 'html' && + $token['type'] === HTML5::ENDTAG) { + /* Switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function trailingEndPhase($token) { + /* After the main phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed in the main phase. */ + $this->mainPhase($token); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or a start tag token. Or an end tag token. */ + } elseif(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { + /* Parse error. Switch back to the main phase and reprocess the + token. */ + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* OMG DONE!! */ + } + } + + private function insertElement($token, $append = true, $check = false) { + // Proprietary workaround for libxml2's limitations with tag names + if ($check) { + // Slightly modified HTML5 tag-name modification, + // removing anything that's not an ASCII letter, digit, or hyphen + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); + // Remove leading hyphens and numbers + $token['name'] = ltrim($token['name'], '-0..9'); + // In theory, this should ever be needed, but just in case + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice + } + + $el = $this->dom->createElement($token['name']); + + foreach($token['attr'] as $attr) { + if(!$el->hasAttribute($attr['name'])) { + $el->setAttribute($attr['name'], $attr['value']); + } + } + + $this->appendToRealParent($el); + $this->stack[] = $el; + + return $el; + } + + private function insertText($data) { + $text = $this->dom->createTextNode($data); + $this->appendToRealParent($text); + } + + private function insertComment($data) { + $comment = $this->dom->createComment($data); + $this->appendToRealParent($comment); + } + + private function appendToRealParent($node) { + if($this->foster_parent === null) { + end($this->stack)->appendChild($node); + + } elseif($this->foster_parent !== null) { + /* If the foster parent element is the parent element of the + last table element in the stack of open elements, then the new + node must be inserted immediately before the last table element + in the stack of open elements in the foster parent element; + otherwise, the new node must be appended to the foster parent + element. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table' && + $this->stack[$n]->parentNode !== null) { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) + $this->foster_parent->insertBefore($node, $table); + else + $this->foster_parent->appendChild($node); + + $this->foster_parent = null; + } + } + + private function elementInScope($el, $table = false) { + if(is_array($el)) { + foreach($el as $element) { + if($this->elementInScope($element, $table)) { + return true; + } + } + + return false; + } + + $leng = count($this->stack); + + for($n = 0; $n < $leng; $n++) { + /* 1. Initialise node to be the current node (the bottommost node of + the stack). */ + $node = $this->stack[$leng - 1 - $n]; + + if($node->tagName === $el) { + /* 2. If node is the target node, terminate in a match state. */ + return true; + + } elseif($node->tagName === 'table') { + /* 3. Otherwise, if node is a table element, terminate in a failure + state. */ + return false; + + } elseif($table === true && in_array($node->tagName, array('caption', 'td', + 'th', 'button', 'marquee', 'object'))) { + /* 4. Otherwise, if the algorithm is the "has an element in scope" + variant (rather than the "has an element in table scope" variant), + and node is one of the following, terminate in a failure state. */ + return false; + + } elseif($node === $node->ownerDocument->documentElement) { + /* 5. Otherwise, if node is an html element (root element), terminate + in a failure state. (This can only happen if the node is the topmost + node of the stack of open elements, and prevents the next step from + being invoked if there are no more elements in the stack.) */ + return false; + } + + /* Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. (This will never fail, since the loop + will always terminate in the previous step if the top of the stack + is reached.) */ + } + } + + private function reconstructActiveFormattingElements() { + /* 1. If there are no entries in the list of active formatting elements, + then there is nothing to reconstruct; stop this algorithm. */ + $formatting_elements = count($this->a_formatting); + + if($formatting_elements === 0) { + return false; + } + + /* 3. Let entry be the last (most recently added) element in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. If the last (most recently added) entry in the list of active + formatting elements is a marker, or if it is an element that is in the + stack of open elements, then there is nothing to reconstruct; stop this + algorithm. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + return false; + } + + for($a = $formatting_elements - 1; $a >= 0; true) { + /* 4. If there are no entries before entry in the list of active + formatting elements, then jump to step 8. */ + if($a === 0) { + $step_seven = false; + break; + } + + /* 5. Let entry be the entry one earlier than entry in the list of + active formatting elements. */ + $a--; + $entry = $this->a_formatting[$a]; + + /* 6. If entry is neither a marker nor an element that is also in + thetack of open elements, go to step 4. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + break; + } + } + + while(true) { + /* 7. Let entry be the element one later than entry in the list of + active formatting elements. */ + if(isset($step_seven) && $step_seven === true) { + $a++; + $entry = $this->a_formatting[$a]; + } + + /* 8. Perform a shallow clone of the element entry to obtain clone. */ + $clone = $entry->cloneNode(); + + /* 9. Append clone to the current node and push it onto the stack + of open elements so that it is the new current node. */ + end($this->stack)->appendChild($clone); + $this->stack[] = $clone; + + /* 10. Replace the entry for entry in the list with an entry for + clone. */ + $this->a_formatting[$a] = $clone; + + /* 11. If the entry for clone in the list of active formatting + elements is not the last entry in the list, return to step 7. */ + if(end($this->a_formatting) !== $clone) { + $step_seven = true; + } else { + break; + } + } + } + + private function clearTheActiveFormattingElementsUpToTheLastMarker() { + /* When the steps below require the UA to clear the list of active + formatting elements up to the last marker, the UA must perform the + following steps: */ + + while(true) { + /* 1. Let entry be the last (most recently added) entry in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. Remove entry from the list of active formatting elements. */ + array_pop($this->a_formatting); + + /* 3. If entry was a marker, then stop the algorithm at this point. + The list has been cleared up to the last marker. */ + if($entry === self::MARKER) { + break; + } + } + } + + private function generateImpliedEndTags($exclude = array()) { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must + act as if an end tag with the respective tag name had been seen and + then generate implied end tags again. */ + $node = end($this->stack); + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); + + while(in_array(end($this->stack)->nodeName, $elements)) { + array_pop($this->stack); + } + } + + private function getElementCategory($node) { + $name = $node->tagName; + if(in_array($name, $this->special)) + return self::SPECIAL; + + elseif(in_array($name, $this->scoping)) + return self::SCOPING; + + elseif(in_array($name, $this->formatting)) + return self::FORMATTING; + + else + return self::PHRASING; + } + + private function clearStackToTableContext($elements) { + /* When the steps above require the UA to clear the stack back to a + table context, it means that the UA must, while the current node is not + a table element or an html element, pop elements from the stack of open + elements. If this causes any elements to be popped from the stack, then + this is a parse error. */ + while(true) { + $node = end($this->stack)->nodeName; + + if(in_array($node, $elements)) { + break; + } else { + array_pop($this->stack); + } + } + } + + private function resetInsertionMode() { + /* 1. Let last be false. */ + $last = false; + $leng = count($this->stack); + + for($n = $leng - 1; $n >= 0; $n--) { + /* 2. Let node be the last node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 3. If node is the first node in the stack of open elements, then + set last to true. If the element whose innerHTML attribute is being + set is neither a td element nor a th element, then set node to the + element whose innerHTML attribute is being set. (innerHTML case) */ + if($this->stack[0]->isSameNode($node)) { + $last = true; + } + + /* 4. If node is a select element, then switch the insertion mode to + "in select" and abort these steps. (innerHTML case) */ + if($node->nodeName === 'select') { + $this->mode = self::IN_SELECT; + break; + + /* 5. If node is a td or th element, then switch the insertion mode + to "in cell" and abort these steps. */ + } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { + $this->mode = self::IN_CELL; + break; + + /* 6. If node is a tr element, then switch the insertion mode to + "in row" and abort these steps. */ + } elseif($node->nodeName === 'tr') { + $this->mode = self::IN_ROW; + break; + + /* 7. If node is a tbody, thead, or tfoot element, then switch the + insertion mode to "in table body" and abort these steps. */ + } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { + $this->mode = self::IN_TBODY; + break; + + /* 8. If node is a caption element, then switch the insertion mode + to "in caption" and abort these steps. */ + } elseif($node->nodeName === 'caption') { + $this->mode = self::IN_CAPTION; + break; + + /* 9. If node is a colgroup element, then switch the insertion mode + to "in column group" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'colgroup') { + $this->mode = self::IN_CGROUP; + break; + + /* 10. If node is a table element, then switch the insertion mode + to "in table" and abort these steps. */ + } elseif($node->nodeName === 'table') { + $this->mode = self::IN_TABLE; + break; + + /* 11. If node is a head element, then switch the insertion mode + to "in body" ("in body"! not "in head"!) and abort these steps. + (innerHTML case) */ + } elseif($node->nodeName === 'head') { + $this->mode = self::IN_BODY; + break; + + /* 12. If node is a body element, then switch the insertion mode to + "in body" and abort these steps. */ + } elseif($node->nodeName === 'body') { + $this->mode = self::IN_BODY; + break; + + /* 13. If node is a frameset element, then switch the insertion + mode to "in frameset" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'frameset') { + $this->mode = self::IN_FRAME; + break; + + /* 14. If node is an html element, then: if the head element + pointer is null, switch the insertion mode to "before head", + otherwise, switch the insertion mode to "after head". In either + case, abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'html') { + $this->mode = ($this->head_pointer === null) + ? self::BEFOR_HEAD + : self::AFTER_HEAD; + + break; + + /* 15. If last is true, then set the insertion mode to "in body" + and abort these steps. (innerHTML case) */ + } elseif($last) { + $this->mode = self::IN_BODY; + break; + } + } + } + + private function closeCell() { + /* If the stack of open elements has a td or th element in table scope, + then act as if an end tag token with that tag name had been seen. */ + foreach(array('td', 'th') as $cell) { + if($this->elementInScope($cell, true)) { + $this->inCell(array( + 'name' => $cell, + 'type' => HTML5::ENDTAG + )); + + break; + } + } + } + + public function save() { + return $this->dom; + } +} +?> diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer.php new file mode 100644 index 0000000..e7eb82e --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer.php @@ -0,0 +1,176 @@ +<?php + +// OUT OF DATE, NEEDS UPDATING! +// USE XMLWRITER! + +class HTMLPurifier_Printer +{ + + /** + * Instance of HTMLPurifier_Generator for HTML generation convenience funcs + */ + protected $generator; + + /** + * Instance of HTMLPurifier_Config, for easy access + */ + protected $config; + + /** + * Initialize $generator. + */ + public function __construct() { + } + + /** + * Give generator necessary configuration if possible + */ + public function prepareGenerator($config) { + $all = $config->getAll(); + $context = new HTMLPurifier_Context(); + $this->generator = new HTMLPurifier_Generator($config, $context); + } + + /** + * Main function that renders object or aspect of that object + * @note Parameters vary depending on printer + */ + // function render() {} + + /** + * Returns a start tag + * @param $tag Tag name + * @param $attr Attribute array + */ + protected function start($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) + ); + } + + /** + * Returns an end teg + * @param $tag Tag name + */ + protected function end($tag) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_End($tag) + ); + } + + /** + * Prints a complete element with content inside + * @param $tag Tag name + * @param $contents Element contents + * @param $attr Tag attributes + * @param $escape Bool whether or not to escape contents + */ + protected function element($tag, $contents, $attr = array(), $escape = true) { + return $this->start($tag, $attr) . + ($escape ? $this->escape($contents) : $contents) . + $this->end($tag); + } + + protected function elementEmpty($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Empty($tag, $attr) + ); + } + + protected function text($text) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Text($text) + ); + } + + /** + * Prints a simple key/value row in a table. + * @param $name Key + * @param $value Value + */ + protected function row($name, $value) { + if (is_bool($value)) $value = $value ? 'On' : 'Off'; + return + $this->start('tr') . "\n" . + $this->element('th', $name) . "\n" . + $this->element('td', $value) . "\n" . + $this->end('tr') + ; + } + + /** + * Escapes a string for HTML output. + * @param $string String to escape + */ + protected function escape($string) { + $string = HTMLPurifier_Encoder::cleanUTF8($string); + $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + return $string; + } + + /** + * Takes a list of strings and turns them into a single list + * @param $array List of strings + * @param $polite Bool whether or not to add an end before the last + */ + protected function listify($array, $polite = false) { + if (empty($array)) return 'None'; + $ret = ''; + $i = count($array); + foreach ($array as $value) { + $i--; + $ret .= $value; + if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; + if ($polite && $i == 1) $ret .= 'and '; + } + return $ret; + } + + /** + * Retrieves the class of an object without prefixes, as well as metadata + * @param $obj Object to determine class of + * @param $prefix Further prefix to remove + */ + protected function getClass($obj, $sec_prefix = '') { + static $five = null; + if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); + $prefix = 'HTMLPurifier_' . $sec_prefix; + if (!$five) $prefix = strtolower($prefix); + $class = str_replace($prefix, '', get_class($obj)); + $lclass = strtolower($class); + $class .= '('; + switch ($lclass) { + case 'enum': + $values = array(); + foreach ($obj->valid_values as $value => $bool) { + $values[] = $value; + } + $class .= implode(', ', $values); + break; + case 'css_composite': + $values = array(); + foreach ($obj->defs as $def) { + $values[] = $this->getClass($def, $sec_prefix); + } + $class .= implode(', ', $values); + break; + case 'css_multiple': + $class .= $this->getClass($obj->single, $sec_prefix) . ', '; + $class .= $obj->max; + break; + case 'css_denyelementdecorator': + $class .= $this->getClass($obj->def, $sec_prefix) . ', '; + $class .= $obj->element; + break; + case 'css_importantdecorator': + $class .= $this->getClass($obj->def, $sec_prefix); + if ($obj->allow) $class .= ', !important'; + break; + } + $class .= ')'; + return $class; + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/CSSDefinition.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/CSSDefinition.php new file mode 100644 index 0000000..81f9865 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/CSSDefinition.php @@ -0,0 +1,38 @@ +<?php + +class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer +{ + + protected $def; + + public function render($config) { + $this->def = $config->getCSSDefinition(); + $ret = ''; + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + $ret .= $this->start('table'); + + $ret .= $this->element('caption', 'Properties ($info)'); + + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Property', array('class' => 'heavy')); + $ret .= $this->element('th', 'Definition', array('class' => 'heavy', 'style' => 'width:auto;')); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + + ksort($this->def->info); + foreach ($this->def->info as $property => $obj) { + $name = $this->getClass($obj, 'AttrDef_'); + $ret .= $this->row($property, $name); + } + + $ret .= $this->end('table'); + $ret .= $this->end('div'); + + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.css b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.css new file mode 100644 index 0000000..3ff1a88 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.css @@ -0,0 +1,10 @@ + +.hp-config {} + +.hp-config tbody th {text-align:right; padding-right:0.5em;} +.hp-config thead, .hp-config .namespace {background:#3C578C; color:#FFF;} +.hp-config .namespace th {text-align:center;} +.hp-config .verbose {display:none;} +.hp-config .controls {text-align:center;} + +/* vim: et sw=4 sts=4 */ diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.js b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.js new file mode 100644 index 0000000..cba00c9 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.js @@ -0,0 +1,5 @@ +function toggleWriteability(id_of_patient, checked) { + document.getElementById(id_of_patient).disabled = checked; +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php new file mode 100644 index 0000000..02aa656 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php @@ -0,0 +1,368 @@ +<?php + +/** + * @todo Rewrite to use Interchange objects + */ +class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer +{ + + /** + * Printers for specific fields + */ + protected $fields = array(); + + /** + * Documentation URL, can have fragment tagged on end + */ + protected $docURL; + + /** + * Name of form element to stuff config in + */ + protected $name; + + /** + * Whether or not to compress directive names, clipping them off + * after a certain amount of letters. False to disable or integer letters + * before clipping. + */ + protected $compress = false; + + /** + * @param $name Form element name for directives to be stuffed into + * @param $doc_url String documentation URL, will have fragment tagged on + * @param $compress Integer max length before compressing a directive name, set to false to turn off + */ + public function __construct( + $name, $doc_url = null, $compress = false + ) { + parent::__construct(); + $this->docURL = $doc_url; + $this->name = $name; + $this->compress = $compress; + // initialize sub-printers + $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default(); + $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool(); + } + + /** + * Sets default column and row size for textareas in sub-printers + * @param $cols Integer columns of textarea, null to use default + * @param $rows Integer rows of textarea, null to use default + */ + public function setTextareaDimensions($cols = null, $rows = null) { + if ($cols) $this->fields['default']->cols = $cols; + if ($rows) $this->fields['default']->rows = $rows; + } + + /** + * Retrieves styling, in case it is not accessible by webserver + */ + public static function getCSS() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css'); + } + + /** + * Retrieves JavaScript, in case it is not accessible by webserver + */ + public static function getJavaScript() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js'); + } + + /** + * Returns HTML output for a configuration form + * @param $config Configuration object of current form state, or an array + * where [0] has an HTML namespace and [1] is being rendered. + * @param $allowed Optional namespace(s) and directives to restrict form to. + */ + public function render($config, $allowed = true, $render_controls = true) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + + $this->config = $config; + $this->genConfig = $gen_config; + $this->prepareGenerator($gen_config); + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $config->def); + $all = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $all[$ns][$directive] = $config->get($ns .'.'. $directive); + } + + $ret = ''; + $ret .= $this->start('table', array('class' => 'hp-config')); + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive')); + $ret .= $this->element('th', 'Value', array('class' => 'hp-value')); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + foreach ($all as $ns => $directives) { + $ret .= $this->renderNamespace($ns, $directives); + } + if ($render_controls) { + $ret .= $this->start('tbody'); + $ret .= $this->start('tr'); + $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls')); + $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit')); + $ret .= '[<a href="?">Reset</a>]'; + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a single namespace + * @param $ns String namespace name + * @param $directive Associative array of directives to values + */ + protected function renderNamespace($ns, $directives) { + $ret = ''; + $ret .= $this->start('tbody', array('class' => 'namespace')); + $ret .= $this->start('tr'); + $ret .= $this->element('th', $ns, array('colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); + $ret .= $this->start('tbody'); + foreach ($directives as $directive => $value) { + $ret .= $this->start('tr'); + $ret .= $this->start('th'); + if ($this->docURL) { + $url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL); + $ret .= $this->start('a', array('href' => $url)); + } + $attr = array('for' => "{$this->name}:$ns.$directive"); + + // crop directive name if it's too long + if (!$this->compress || (strlen($directive) < $this->compress)) { + $directive_disp = $directive; + } else { + $directive_disp = substr($directive, 0, $this->compress - 2) . '...'; + $attr['title'] = $directive; + } + + $ret .= $this->element( + 'label', + $directive_disp, + // component printers must create an element with this id + $attr + ); + if ($this->docURL) $ret .= $this->end('a'); + $ret .= $this->end('th'); + + $ret .= $this->start('td'); + $def = $this->config->def->info["$ns.$directive"]; + if (is_int($def)) { + $allow_null = $def < 0; + $type = abs($def); + } else { + $type = $def->type; + $allow_null = isset($def->allow_null); + } + if (!isset($this->fields[$type])) $type = 0; // default + $type_obj = $this->fields[$type]; + if ($allow_null) { + $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj); + } + $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config)); + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + } + $ret .= $this->end('tbody'); + return $ret; + } + +} + +/** + * Printer decorator for directives that accept null + */ +class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer { + /** + * Printer being decorated + */ + protected $obj; + /** + * @param $obj Printer to decorate + */ + public function __construct($obj) { + parent::__construct(); + $this->obj = $obj; + } + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + + $ret = ''; + $ret .= $this->start('label', array('for' => "$name:Null_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Null/Disabled'); + $ret .= $this->end('label'); + $attr = array( + 'type' => 'checkbox', + 'value' => '1', + 'class' => 'null-toggle', + 'name' => "$name"."[Null_$ns.$directive]", + 'id' => "$name:Null_$ns.$directive", + 'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!! + ); + if ($this->obj instanceof HTMLPurifier_Printer_ConfigForm_bool) { + // modify inline javascript slightly + $attr['onclick'] = "toggleWriteability('$name:Yes_$ns.$directive',checked);toggleWriteability('$name:No_$ns.$directive',checked)"; + } + if ($value === null) $attr['checked'] = 'checked'; + $ret .= $this->elementEmpty('input', $attr); + $ret .= $this->text(' or '); + $ret .= $this->elementEmpty('br'); + $ret .= $this->obj->render($ns, $directive, $value, $name, array($gen_config, $config)); + return $ret; + } +} + +/** + * Swiss-army knife configuration form field printer + */ +class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { + public $cols = 18; + public $rows = 5; + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + // this should probably be split up a little + $ret = ''; + $def = $config->def->info["$ns.$directive"]; + if (is_int($def)) { + $type = abs($def); + } else { + $type = $def->type; + } + if (is_array($value)) { + switch ($type) { + case HTMLPurifier_VarParser::LOOKUP: + $array = $value; + $value = array(); + foreach ($array as $val => $b) { + $value[] = $val; + } + case HTMLPurifier_VarParser::ALIST: + $value = implode(PHP_EOL, $value); + break; + case HTMLPurifier_VarParser::HASH: + $nvalue = ''; + foreach ($value as $i => $v) { + $nvalue .= "$i:$v" . PHP_EOL; + } + $value = $nvalue; + break; + default: + $value = ''; + } + } + if ($type === HTMLPurifier_VarParser::MIXED) { + return 'Not supported'; + $value = serialize($value); + } + $attr = array( + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:$ns.$directive" + ); + if ($value === null) $attr['disabled'] = 'disabled'; + if (isset($def->allowed)) { + $ret .= $this->start('select', $attr); + foreach ($def->allowed as $val => $b) { + $attr = array(); + if ($value == $val) $attr['selected'] = 'selected'; + $ret .= $this->element('option', $val, $attr); + } + $ret .= $this->end('select'); + } elseif ( + $type === HTMLPurifier_VarParser::TEXT || + $type === HTMLPurifier_VarParser::ITEXT || + $type === HTMLPurifier_VarParser::ALIST || + $type === HTMLPurifier_VarParser::HASH || + $type === HTMLPurifier_VarParser::LOOKUP + ) { + $attr['cols'] = $this->cols; + $attr['rows'] = $this->rows; + $ret .= $this->start('textarea', $attr); + $ret .= $this->text($value); + $ret .= $this->end('textarea'); + } else { + $attr['value'] = $value; + $attr['type'] = 'text'; + $ret .= $this->elementEmpty('input', $attr); + } + return $ret; + } +} + +/** + * Bool form field printer + */ +class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + $ret = ''; + $ret .= $this->start('div', array('id' => "$name:$ns.$directive")); + + $ret .= $this->start('label', array('for' => "$name:Yes_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Yes'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:Yes_$ns.$directive", + 'value' => '1' + ); + if ($value === true) $attr['checked'] = 'checked'; + if ($value === null) $attr['disabled'] = 'disabled'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->start('label', array('for' => "$name:No_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' No'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:No_$ns.$directive", + 'value' => '0' + ); + if ($value === false) $attr['checked'] = 'checked'; + if ($value === null) $attr['disabled'] = 'disabled'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->end('div'); + + return $ret; + } +} + +// vim: et sw=4 sts=4 diff --git a/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/HTMLDefinition.php b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/HTMLDefinition.php new file mode 100644 index 0000000..8a8f126 --- /dev/null +++ b/framework/vendors/htmlpurifier/standalone/HTMLPurifier/Printer/HTMLDefinition.php @@ -0,0 +1,272 @@ +<?php + +class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer +{ + + /** + * Instance of HTMLPurifier_HTMLDefinition, for easy access + */ + protected $def; + + public function render($config) { + $ret = ''; + $this->config =& $config; + + $this->def = $config->getHTMLDefinition(); + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + + $ret .= $this->renderDoctype(); + $ret .= $this->renderEnvironment(); + $ret .= $this->renderContentSets(); + $ret .= $this->renderInfo(); + + $ret .= $this->end('div'); + + return $ret; + } + + /** + * Renders the Doctype table + */ + protected function renderDoctype() { + $doctype = $this->def->doctype; + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Doctype'); + $ret .= $this->row('Name', $doctype->name); + $ret .= $this->row('XML', $doctype->xml ? 'Yes' : 'No'); + $ret .= $this->row('Default Modules', implode($doctype->modules, ', ')); + $ret .= $this->row('Default Tidy Modules', implode($doctype->tidyModules, ', ')); + $ret .= $this->end('table'); + return $ret; + } + + + /** + * Renders environment table, which is miscellaneous info + */ + protected function renderEnvironment() { + $def = $this->def; + + $ret = ''; + + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Environment'); + + $ret .= $this->row('Parent of fragment', $def->info_parent); + $ret .= $this->renderChildren($def->info_parent_def->child); + $ret .= $this->row('Block wrap name', $def->info_block_wrapper); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Global attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Tag transforms'); + $list = array(); + foreach ($def->info_tag_transform as $old => $new) { + $new = $this->getClass($new, 'TagTransform_'); + $list[] = "<$old> with $new"; + } + $ret .= $this->element('td', $this->listify($list)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); + $ret .= $this->end('tr'); + + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders the Content Sets table + */ + protected function renderContentSets() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Content Sets'); + foreach ($this->def->info_content_sets as $name => $lookup) { + $ret .= $this->heavyHeader($name); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($lookup)); + $ret .= $this->end('tr'); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders the Elements ($info) table + */ + protected function renderInfo() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Elements ($info)'); + ksort($this->def->info); + $ret .= $this->heavyHeader('Allowed tags', 2); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2)); + $ret .= $this->end('tr'); + foreach ($this->def->info as $name => $def) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Inline content'); + $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); + $ret .= $this->end('tr'); + if (!empty($def->excludes)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Excludes'); + $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_pre)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_post)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); + $ret .= $this->end('tr'); + } + if (!empty($def->auto_close)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Auto closed by'); + $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); + $ret .= $this->end('tr'); + } + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed attributes'); + $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0); + $ret .= $this->end('tr'); + + if (!empty($def->required_attr)) { + $ret .= $this->row('Required attributes', $this->listify($def->required_attr)); + } + + $ret .= $this->renderChildren($def->child); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a row describing the allowed children of an element + * @param $def HTMLPurifier_ChildDef of pertinent element + */ + protected function renderChildren($def) { + $context = new HTMLPurifier_Context(); + $ret = ''; + $ret .= $this->start('tr'); + $elements = array(); + $attr = array(); + if (isset($def->elements)) { + if ($def->type == 'strictblockquote') { + $def->validateChildren(array(), $this->config, $context); + } + $elements = $def->elements; + } + if ($def->type == 'chameleon') { + $attr['rowspan'] = 2; + } elseif ($def->type == 'empty') { + $elements = array(); + } elseif ($def->type == 'table') { + $elements = array_flip(array('col', 'caption', 'colgroup', 'thead', + 'tfoot', 'tbody', 'tr')); + } + $ret .= $this->element('th', 'Allowed children', $attr); + + if ($def->type == 'chameleon') { + + $ret .= $this->element('td', + '<em>Block</em>: ' . + $this->escape($this->listifyTagLookup($def->block->elements)),0,0); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', + '<em>Inline</em>: ' . + $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + + } elseif ($def->type == 'custom') { + + $ret .= $this->element('td', '<em>'.ucfirst($def->type).'</em>: ' . + $def->dtd_regex); + + } else { + $ret .= $this->element('td', + '<em>'.ucfirst($def->type).'</em>: ' . + $this->escape($this->listifyTagLookup($elements)),0,0); + } + $ret .= $this->end('tr'); + return $ret; + } + + /** + * Listifies a tag lookup table. + * @param $array Tag lookup array in form of array('tagname' => true) + */ + protected function listifyTagLookup($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $discard) { + if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue; + $list[] = $name; + } + return $this->listify($list); + } + + /** + * Listifies a list of objects by retrieving class names and internal state + * @param $array List of objects + * @todo Also add information about internal state + */ + protected function listifyObjectList($array) { + ksort($array); + $list = array(); + foreach ($array as $discard => $obj) { + $list[] = $this->getClass($obj, 'AttrTransform_'); + } + return $this->listify($list); + } + + /** + * Listifies a hash of attributes to AttrDef classes + * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef) + */ + protected function listifyAttr($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $obj) { + if ($obj === false) continue; + $list[] = "$name = <i>" . $this->getClass($obj, 'AttrDef_') . '</i>'; + } + return $this->listify($list); + } + + /** + * Creates a heavy header row + */ + protected function heavyHeader($text, $num = 1) { + $ret = ''; + $ret .= $this->start('tr'); + $ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy')); + $ret .= $this->end('tr'); + return $ret; + } + +} + +// vim: et sw=4 sts=4 |
