MDL-9151 HTML Purifier cleaning support - enable switch is in experimental section

MDL-9435 Reviewved url cleaning in redirect()
2025-08-04 16:36:37 +02:00 · 2007-04-18 21:52:03 +00:00 · 2007-04-18 21:52:03 +00:00 · e0ac8448c7
commit e0ac8448c7
parent c85607f0be
129 changed files with 10389 additions and 20 deletions
--- a/lib/htmlpurifier/CREDITS
+++ b/lib/htmlpurifier/CREDITS
@ -0,0 +1,7 @@
+
+CREDITS
+
+Almost everything written by Edward Z. Yang (Ambush Commander).  Lots of thanks
+to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
+more details), Feyd especially (namely IPv6 and optimization).  Thanks to RSnake
+for letting me package his fantastic XSS cheatsheet for a smoketest.
--- a/lib/htmlpurifier/HTMLPurifier.auto.php
+++ b/lib/htmlpurifier/HTMLPurifier.auto.php
@ -0,0 +1,10 @@
+<?php
+
+/**
+ * This is a stub include that automatically configures the include path.
+ */
+
+set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
+require_once 'HTMLPurifier.php';
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier.func.php
+++ b/lib/htmlpurifier/HTMLPurifier.func.php
@ -0,0 +1,21 @@
+<?php
+
+/**
+ * Function wrapper for HTML Purifier for quick use.
+ * @note This function only includes the library when it is called. While
+ *       this is efficient for instances when you only use HTML Purifier
+ *       on a few of your pages, it murders bytecode caching. You still
+ *       need to add HTML Purifier to your path.
+ * @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
+ */
+
+function HTMLPurifier($html, $config = null) {
+    static $purifier = false;
+    if (!$purifier) {
+        require_once 'HTMLPurifier.php';
+        $purifier = new HTMLPurifier();
+    }
+    return $purifier->purify($html, $config);
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier.php
+++ b/lib/htmlpurifier/HTMLPurifier.php
@ -0,0 +1,170 @@
+<?php
+
+/*!
+ * @mainpage
+ * 
+ * HTML Purifier is an HTML filter that will take an arbitrary snippet of
+ * HTML and rigorously test, validate and filter it into a version that
+ * is safe for output onto webpages. It achieves this by:
+ * 
+ *  -# Lexing (parsing into tokens) the document,
+ *  -# Executing various strategies on the tokens:
+ *      -# Removing all elements not in the whitelist,
+ *      -# Making the tokens well-formed,
+ *      -# Fixing the nesting of the nodes, and
+ *      -# Validating attributes of the nodes; and
+ *  -# Generating HTML from the purified tokens.
+ * 
+ * However, most users will only need to interface with the HTMLPurifier
+ * class, so this massive amount of infrastructure is usually concealed.
+ * If you plan on working with the internals, be sure to include
+ * HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
+ */
+
+/*
+    HTML Purifier 1.6.0 - Standards Compliant HTML Filtering
+    Copyright (C) 2006 Edward Z. Yang
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+// almost every class has an undocumented dependency to these, so make sure
+// they get included
+require_once 'HTMLPurifier/ConfigSchema.php';
+require_once 'HTMLPurifier/Config.php';
+require_once 'HTMLPurifier/Context.php';
+
+require_once 'HTMLPurifier/Lexer.php';
+require_once 'HTMLPurifier/Generator.php';
+require_once 'HTMLPurifier/Strategy/Core.php';
+require_once 'HTMLPurifier/Encoder.php';
+
+/**
+ * Main library execution class.
+ * 
+ * Facade that performs calls to the HTMLPurifier_Lexer,
+ * HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
+ * purify HTML.
+ * 
+ * @todo We need an easier way to inject strategies, it'll probably end
+ *       up getting done through config though.
+ */
+class HTMLPurifier
+{
+    
+    var $version = '1.6.0';
+    
+    var $config;
+    var $filters;
+    
+    var $lexer, $strategy, $generator;
+    
+    /**
+     * Final HTMLPurifier_Context of last run purification. Might be an array.
+     * @public
+     */
+    var $context;
+    
+    /**
+     * Initializes the purifier.
+     * @param $config Optional HTMLPurifier_Config object for all instances of
+     *                the purifier, if omitted, a default configuration is
+     *                supplied (which can be overridden on a per-use basis).
+     *                The parameter can also be any type that
+     *                HTMLPurifier_Config::create() supports.
+     */
+    function HTMLPurifier($config = null) {
+        
+        $this->config = HTMLPurifier_Config::create($config);
+        
+        $this->lexer        = HTMLPurifier_Lexer::create();
+        $this->strategy     = new HTMLPurifier_Strategy_Core();
+        $this->generator    = new HTMLPurifier_Generator();
+        
+    }
+    
+    /**
+     * Adds a filter to process the output. First come first serve
+     * @param $filter HTMLPurifier_Filter object
+     */
+    function addFilter($filter) {
+        $this->filters[] = $filter;
+    }
+    
+    /**
+     * Filters an HTML snippet/document to be XSS-free and standards-compliant.
+     * 
+     * @param $html String of HTML to purify
+     * @param $config HTMLPurifier_Config object for this operation, if omitted,
+     *                defaults to the config object specified during this
+     *                object's construction. The parameter can also be any type
+     *                that HTMLPurifier_Config::create() supports.
+     * @return Purified HTML
+     */
+    function purify($html, $config = null) {
+        
+        $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
+        
+        $context = new HTMLPurifier_Context();
+        $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
+        
+        for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
+            $html = $this->filters[$i]->preFilter($html, $config, $context);
+        }
+        
+        // purified HTML
+        $html = 
+            $this->generator->generateFromTokens(
+                // list of tokens
+                $this->strategy->execute(
+                    // list of un-purified tokens
+                    $this->lexer->tokenizeHTML(
+                        // un-purified HTML
+                        $html, $config, $context
+                    ),
+                    $config, $context
+                ),
+                $config, $context
+            );
+        
+        for ($i = $size - 1; $i >= 0; $i--) {
+            $html = $this->filters[$i]->postFilter($html, $config, $context);
+        }
+        
+        $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
+        $this->context =& $context;
+        return $html;
+    }
+    
+    /**
+     * Filters an array of HTML snippets
+     * @param $config Optional HTMLPurifier_Config object for this operation.
+     *                See HTMLPurifier::purify() for more details.
+     * @return Array of purified HTML
+     */
+    function purifyArray($array_of_html, $config = null) {
+        $context_array = array();
+        foreach ($array_of_html as $key => $html) {
+            $array_of_html[$key] = $this->purify($html, $config);
+            $context_array[$key] = $this->context;
+        }
+        $this->context = $context_array;
+        return $array_of_html;
+    }
+    
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrCollections.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrCollections.php
@ -0,0 +1,100 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTypes.php';
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+
+/**
+ * Defines common attribute collections that modules reference
+ */
+
+class HTMLPurifier_AttrCollections
+{
+    
+    /**
+     * Associative array of attribute collections, indexed by name
+     * @note Technically, the composition of these is more complicated,
+     *       but we bypass it using our own excludes property
+     */
+    var $info = array();
+    
+    /**
+     * Performs all expansions on internal data for use by other inclusions
+     * It also collects all attribute collection extensions from
+     * modules
+     * @param $attr_types HTMLPurifier_AttrTypes instance
+     * @param $modules Hash array of HTMLPurifier_HTMLModule members
+     */
+    function HTMLPurifier_AttrCollections($attr_types, $modules) {
+        $info =& $this->info;
+        // load extensions from the modules
+        foreach ($modules as $module) {
+            foreach ($module->attr_collections as $coll_i => $coll) {
+                foreach ($coll as $attr_i => $attr) {
+                    if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
+                        // merge in includes
+                        $info[$coll_i][$attr_i] = array_merge(
+                            $info[$coll_i][$attr_i], $attr);
+                        continue;
+                    }
+                    $info[$coll_i][$attr_i] = $attr;
+                }
+            }
+        }
+        // perform internal expansions and inclusions
+        foreach ($info as $name => $attr) {
+            // merge attribute collections that include others
+            $this->performInclusions($info[$name]);
+            // replace string identifiers with actual attribute objects
+            $this->expandIdentifiers($info[$name], $attr_types);
+        }
+    }
+    
+    /**
+     * Takes a reference to an attribute associative array and performs
+     * all inclusions specified by the zero index.
+     * @param &$attr Reference to attribute array
+     */
+    function performInclusions(&$attr) {
+        if (!isset($attr[0])) return;
+        $merge = $attr[0];
+        // loop through all the inclusions
+        for ($i = 0; isset($merge[$i]); $i++) {
+            // foreach attribute of the inclusion, copy it over
+            foreach ($this->info[$merge[$i]] as $key => $value) {
+                if (isset($attr[$key])) continue; // also catches more inclusions
+                $attr[$key] = $value;
+            }
+            if (isset($info[$merge[$i]][0])) {
+                // recursion
+                $merge = array_merge($merge, isset($info[$merge[$i]][0]));
+            }
+        }
+        unset($attr[0]);
+    }
+    
+    /**
+     * Expands all string identifiers in an attribute array by replacing
+     * them with the appropriate values inside HTMLPurifier_AttrTypes
+     * @param &$attr Reference to attribute array
+     * @param $attr_types HTMLPurifier_AttrTypes instance
+     */
+    function expandIdentifiers(&$attr, $attr_types) {
+        foreach ($attr as $def_i => $def) {
+            if ($def_i === 0) continue;
+            if (!is_string($def)) continue;
+            if ($def === false) {
+                unset($attr[$def_i]);
+                continue;
+            }
+            if (isset($attr_types->info[$def])) {
+                $attr[$def_i] = $attr_types->info[$def];
+            } else {
+                trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
+                unset($attr[$def_i]);
+            }
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef.php
@ -0,0 +1,67 @@
+<?php
+
+/**
+ * Base class for all validating attribute definitions.
+ * 
+ * This family of classes forms the core for not only HTML attribute validation,
+ * but also any sort of string that needs to be validated or cleaned (which
+ * means CSS properties and composite definitions are defined here too).  
+ * Besides defining (through code) what precisely makes the string valid,
+ * subclasses are also responsible for cleaning the code if possible.
+ */
+
+class HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Tells us whether or not an HTML attribute is minimized. Only the
+     * boolean attribute vapourware would use this.
+     */
+    var $minimized = false;
+    
+    /**
+     * Validates and cleans passed string according to a definition.
+     * 
+     * @public
+     * @param $string String to be validated and cleaned.
+     * @param $config Mandatory HTMLPurifier_Config object.
+     * @param $context Mandatory HTMLPurifier_AttrContext object.
+     */
+    function validate($string, $config, &$context) {
+        trigger_error('Cannot call abstract function', E_USER_ERROR);
+    }
+    
+    /**
+     * Convenience method that parses a string as if it were CDATA.
+     * 
+     * This method process a string in the manner specified at
+     * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
+     * leading and trailing whitespace, ignoring line feeds, and replacing
+     * carriage returns and tabs with spaces.  While most useful for HTML
+     * attributes specified as CDATA, it can also be applied to most CSS
+     * values.
+     * 
+     * @note This method is not entirely standards compliant, as trim() removes
+     *       more types of whitespace than specified in the spec. In practice,
+     *       this is rarely a problem, as those extra characters usually have
+     *       already been removed by HTMLPurifier_Encoder.
+     * 
+     * @warning This processing is inconsistent with XML's whitespace handling
+     *          as specified by section 3.3.3 and referenced XHTML 1.0 section
+     *          4.7.  Compliant processing requires all line breaks normalized
+     *          to "\n", so the fix is not as simple as fixing it in this
+     *          function.  Trim and whitespace collapsing are supposed to only
+     *          occur in NMTOKENs.  However, note that we are NOT necessarily
+     *          parsing XML, thus, this behavior may still be correct.
+     * 
+     * @public
+     */
+    function parseCDATA($string) {
+        $string = trim($string);
+        $string = str_replace("\n", '', $string);
+        $string = str_replace(array("\r", "\t"), ' ', $string);
+        return $string;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php
@ -0,0 +1,69 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/CSSDefinition.php';
+
+/**
+ * Validates the HTML attribute style, otherwise known as CSS.
+ * @note We don't implement the whole CSS specification, so it might be
+ *       difficult to reuse this component in the context of validating
+ *       actual stylesheet declarations.
+ * @note If we were really serious about validating the CSS, we would
+ *       tokenize the styles and then parse the tokens. Obviously, we
+ *       are not doing that. Doing that could seriously harm performance,
+ *       but would make these components a lot more viable for a CSS
+ *       filtering solution.
+ */
+class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
+{
+    
+    function validate($css, $config, &$context) {
+        
+        $css = $this->parseCDATA($css);
+        
+        $definition = $config->getCSSDefinition();
+        
+        // we're going to break the spec and explode by semicolons.
+        // This is because semicolon rarely appears in escaped form
+        // Doing this is generally flaky but fast
+        // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
+        // for details
+        
+        $declarations = explode(';', $css);
+        $propvalues = array();
+        
+        foreach ($declarations as $declaration) {
+            if (!$declaration) continue;
+            if (!strpos($declaration, ':')) continue;
+            list($property, $value) = explode(':', $declaration, 2);
+            $property = trim($property);
+            $value    = trim($value);
+            if (!isset($definition->info[$property])) continue;
+            // inefficient call, since the validator will do this again
+            if (strtolower(trim($value)) !== 'inherit') {
+                // inherit works for everything (but only on the base property)
+                $result = $definition->info[$property]->validate(
+                    $value, $config, $context );
+            } else {
+                $result = 'inherit';
+            }
+            if ($result === false) continue;
+            $propvalues[$property] = $result;
+        }
+        
+        // procedure does not write the new CSS simultaneously, so it's
+        // slightly inefficient, but it's the only way of getting rid of
+        // duplicates. Perhaps config to optimize it, but not now.
+        
+        $new_declarations = '';
+        foreach ($propvalues as $prop => $value) {
+            $new_declarations .= "$prop:$value;";
+        }
+        
+        return $new_declarations ? $new_declarations : false;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php
@ -0,0 +1,87 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/CSSDefinition.php';
+
+/**
+ * Validates shorthand CSS property background.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Local copy of component validators.
+     * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
+     */
+    var $info;
+    
+    function HTMLPurifier_AttrDef_CSS_Background($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['background-color'] = $def->info['background-color'];
+        $this->info['background-image'] = $def->info['background-image'];
+        $this->info['background-repeat'] = $def->info['background-repeat'];
+        $this->info['background-attachment'] = $def->info['background-attachment'];
+        $this->info['background-position'] = $def->info['background-position'];
+    }
+    
+    function validate($string, $config, &$context) {
+        
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+        
+        // assumes URI doesn't have spaces in it
+        $bits = explode(' ', strtolower($string)); // bits to process
+        
+        $caught = array();
+        $caught['color']    = false;
+        $caught['image']    = false;
+        $caught['repeat']   = false;
+        $caught['attachment'] = false;
+        $caught['position'] = false;
+        
+        $i = 0; // number of catches
+        $none = false;
+        
+        foreach ($bits as $bit) {
+            if ($bit === '') continue;
+            foreach ($caught as $key => $status) {
+                if ($key != 'position') {
+                    if ($status !== false) continue;
+                    $r = $this->info['background-' . $key]->validate($bit, $config, $context);
+                } else {
+                    $r = $bit;
+                }
+                if ($r === false) continue;
+                if ($key == 'position') {
+                    if ($caught[$key] === false) $caught[$key] = '';
+                    $caught[$key] .= $r . ' ';
+                } else {
+                    $caught[$key] = $r;
+                }
+                $i++;
+                break;
+            }
+        }
+        
+        if (!$i) return false;
+        if ($caught['position'] !== false) {
+            $caught['position'] = $this->info['background-position']->
+                validate($caught['position'], $config, $context);
+        }
+        
+        $ret = array();
+        foreach ($caught as $value) {
+            if ($value === false) continue;
+            $ret[] = $value;
+        }
+        
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
@ -0,0 +1,130 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
+
+/* W3C says:
+    [ // adjective and number must be in correct order, even if
+      // you could switch them without introducing ambiguity.
+      // some browsers support that syntax
+        [
+            <percentage> | <length> | left | center | right
+        ]
+        [ 
+            <percentage> | <length> | top | center | bottom
+        ]?
+    ] |
+    [ // this signifies that the vertical and horizontal adjectives
+      // can be arbitrarily ordered, however, there can only be two,
+      // one of each, or none at all
+        [
+            left | center | right
+        ] ||
+        [
+            top | center | bottom
+        ]
+    ]
+    top, left = 0%
+    center, (none) = 50%
+    bottom, right = 100%
+*/
+
+/* QuirksMode says:
+    keyword + length/percentage must be ordered correctly, as per W3C
+    
+    Internet Explorer and Opera, however, support arbitrary ordering. We
+    should fix it up.
+    
+    Minor issue though, not strictly necessary.
+*/
+
+// control freaks may appreciate the ability to convert these to
+// percentages or something, but it's not necessary
+
+/**
+ * Validates the value of background-position.
+ */
+class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
+{
+    
+    var $length;
+    var $percentage;
+    
+    function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
+        $this->length     = new HTMLPurifier_AttrDef_CSS_Length();
+        $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
+    }
+    
+    function validate($string, $config, &$context) {
+        $string = $this->parseCDATA($string);
+        $bits = explode(' ', $string);
+        
+        $keywords = array();
+        $keywords['h'] = false; // left, right
+        $keywords['v'] = false; // top, bottom
+        $keywords['c'] = false; // center
+        $measures = array();
+        
+        $i = 0;
+        
+        $lookup = array(
+            'top' => 'v',
+            'bottom' => 'v',
+            'left' => 'h',
+            'right' => 'h',
+            'center' => 'c'
+        );
+        
+        foreach ($bits as $bit) {
+            if ($bit === '') continue;
+            
+            // test for keyword
+            $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
+            if (isset($lookup[$lbit])) {
+                $status = $lookup[$lbit];
+                $keywords[$status] = $lbit;
+                $i++;
+            }
+            
+            // test for length
+            $r = $this->length->validate($bit, $config, $context);
+            if ($r !== false) {
+                $measures[] = $r;
+                $i++;
+            }
+            
+            // test for percentage
+            $r = $this->percentage->validate($bit, $config, $context);
+            if ($r !== false) {
+                $measures[] = $r;
+                $i++;
+            }
+            
+        }
+        
+        if (!$i) return false; // no valid values were caught
+        
+        
+        $ret = array();
+        
+        // first keyword
+        if     ($keywords['h'])     $ret[] = $keywords['h'];
+        elseif (count($measures))   $ret[] = array_shift($measures);
+        elseif ($keywords['c']) {
+            $ret[] = $keywords['c'];
+            $keywords['c'] = false; // prevent re-use: center = center center
+        }
+        
+        if     ($keywords['v'])     $ret[] = $keywords['v'];
+        elseif (count($measures))   $ret[] = array_shift($measures);
+        elseif ($keywords['c'])     $ret[] = $keywords['c'];
+        
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php
@ -0,0 +1,45 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates the border property as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Local copy of properties this property is shorthand for.
+     */
+    var $info = array();
+    
+    function HTMLPurifier_AttrDef_CSS_Border($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['border-width'] = $def->info['border-width'];
+        $this->info['border-style'] = $def->info['border-style'];
+        $this->info['border-top-color'] = $def->info['border-top-color'];
+    }
+    
+    function validate($string, $config, &$context) {
+        $string = $this->parseCDATA($string);
+        // we specifically will not support rgb() syntax with spaces
+        $bits = explode(' ', $string);
+        $done = array(); // segments we've finished
+        $ret = ''; // return value
+        foreach ($bits as $bit) {
+            foreach ($this->info as $propname => $validator) {
+                if (isset($done[$propname])) continue;
+                $r = $validator->validate($bit, $config, $context);
+                if ($r !== false) {
+                    $ret .= $r . ' ';
+                    $done[$propname] = true;
+                    break;
+                }
+            }
+        }
+        return rtrim($ret);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php
@ -0,0 +1,97 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates Color as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Color keyword lookup table.
+     * @todo Extend it to include all usually allowed colors.
+     */
+    var $colors = array(
+        'maroon'    => '#800000',
+        'red'       => '#F00',
+        'orange'    => '#FFA500',
+        'yellow'    => '#FF0',
+        'olive'     => '#808000',
+        'purple'    => '#800080',
+        'fuchsia'   => '#F0F',
+        'white'     => '#FFF',
+        'lime'      => '#0F0',
+        'green'     => '#008000',
+        'navy'      => '#000080',
+        'blue'      => '#00F',
+        'aqua'      => '#0FF',
+        'teal'      => '#008080',
+        'black'     => '#000',
+        'silver'    => '#C0C0C0',
+        'gray'      => '#808080'
+    );
+    
+    function validate($color, $config, &$context) {
+        
+        $color = trim($color);
+        if (!$color) return false;
+        
+        $lower = strtolower($color);
+        if (isset($this->colors[$lower])) return $this->colors[$lower];
+        
+        if ($color[0] === '#') {
+            // hexadecimal handling
+            $hex = substr($color, 1);
+            $length = strlen($hex);
+            if ($length !== 3 && $length !== 6) return false;
+            if (!ctype_xdigit($hex)) return false;
+        } else {
+            // rgb literal handling
+            if (strpos($color, 'rgb(')) return false;
+            $length = strlen($color);
+            if (strpos($color, ')') !== $length - 1) return false;
+            $triad = substr($color, 4, $length - 4 - 1);
+            $parts = explode(',', $triad);
+            if (count($parts) !== 3) return false;
+            $type = false; // to ensure that they're all the same type
+            $new_parts = array();
+            foreach ($parts as $part) {
+                $part = trim($part);
+                if ($part === '') return false;
+                $length = strlen($part);
+                if ($part[$length - 1] === '%') {
+                    // handle percents
+                    if (!$type) {
+                        $type = 'percentage';
+                    } elseif ($type !== 'percentage') {
+                        return false;
+                    }
+                    $num = (float) substr($part, 0, $length - 1);
+                    if ($num < 0) $num = 0;
+                    if ($num > 100) $num = 100;
+                    $new_parts[] = "$num%";
+                } else {
+                    // handle integers
+                    if (!$type) {
+                        $type = 'integer';
+                    } elseif ($type !== 'integer') {
+                        return false;
+                    }
+                    $num = (int) $part;
+                    if ($num < 0) $num = 0;
+                    if ($num > 255) $num = 255;
+                    $new_parts[] = (string) $num;
+                }
+            }
+            $new_triad = implode(',', $new_parts);
+            $color = "rgb($new_triad)";
+        }
+        
+        return $color;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php
@ -0,0 +1,38 @@
+<?php
+
+/**
+ * Allows multiple validators to attempt to validate attribute.
+ * 
+ * Composite is just what it sounds like: a composite of many validators.
+ * This means that multiple HTMLPurifier_AttrDef objects will have a whack
+ * at the string.  If one of them passes, that's what is returned.  This is
+ * especially useful for CSS values, which often are a choice between
+ * an enumerated set of predefined values or a flexible data type.
+ */
+class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * List of HTMLPurifier_AttrDef objects that may process strings
+     * @protected
+     */
+    var $defs;
+    
+    /**
+     * @param $defs List of HTMLPurifier_AttrDef objects
+     */
+    function HTMLPurifier_AttrDef_CSS_Composite($defs) {
+        $this->defs = $defs;
+    }
+    
+    function validate($string, $config, &$context) {
+        foreach ($this->defs as $i => $def) {
+            $result = $this->defs[$i]->validate($string, $config, $context);
+            if ($result !== false) return $result;
+        }
+        return false;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php
@ -0,0 +1,154 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates shorthand CSS property font.
+ */
+class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Local copy of component validators.
+     * 
+     * @note If we moved specific CSS property definitions to their own
+     *       classes instead of having them be assembled at run time by
+     *       CSSDefinition, this wouldn't be necessary.  We'd instantiate
+     *       our own copies.
+     */
+    var $info = array();
+    
+    /**
+     * System font keywords.
+     */
+    var $system_fonts = array(
+        'caption' => true,
+        'icon' => true,
+        'menu' => true,
+        'message-box' => true,
+        'small-caption' => true,
+        'status-bar' => true
+    );
+    
+    function HTMLPurifier_AttrDef_CSS_Font($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['font-style']   = $def->info['font-style'];
+        $this->info['font-variant'] = $def->info['font-variant'];
+        $this->info['font-weight']  = $def->info['font-weight'];
+        $this->info['font-size']    = $def->info['font-size'];
+        $this->info['line-height']  = $def->info['line-height'];
+        $this->info['font-family']  = $def->info['font-family'];
+    }
+    
+    function validate($string, $config, &$context) {
+        
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+        
+        // check if it's one of the keywords
+        $lowercase_string = strtolower($string);
+        if (isset($this->system_fonts[$lowercase_string])) {
+            return $lowercase_string;
+        }
+        
+        $bits = explode(' ', $string); // bits to process
+        $stage = 0; // this indicates what we're looking for
+        $caught = array(); // which stage 0 properties have we caught?
+        $stage_1 = array('font-style', 'font-variant', 'font-weight');
+        $final = ''; // output
+        
+        for ($i = 0, $size = count($bits); $i < $size; $i++) {
+            if ($bits[$i] === '') continue;
+            switch ($stage) {
+                
+                // attempting to catch font-style, font-variant or font-weight
+                case 0:
+                    foreach ($stage_1 as $validator_name) {
+                        if (isset($caught[$validator_name])) continue;
+                        $r = $this->info[$validator_name]->validate(
+                                                $bits[$i], $config, $context);
+                        if ($r !== false) {
+                            $final .= $r . ' ';
+                            $caught[$validator_name] = true;
+                            break;
+                        }
+                    }
+                    // all three caught, continue on
+                    if (count($caught) >= 3) $stage = 1;
+                    if ($r !== false) break;
+                
+                // attempting to catch font-size and perhaps line-height
+                case 1:
+                    $found_slash = false;
+                    if (strpos($bits[$i], '/') !== false) {
+                        list($font_size, $line_height) =
+                                                    explode('/', $bits[$i]);
+                        if ($line_height === '') {
+                            // ooh, there's a space after the slash!
+                            $line_height = false;
+                            $found_slash = true;
+                        }
+                    } else {
+                        $font_size = $bits[$i];
+                        $line_height = false;
+                    }
+                    $r = $this->info['font-size']->validate(
+                                              $font_size, $config, $context);
+                    if ($r !== false) {
+                        $final .= $r;
+                        // attempt to catch line-height
+                        if ($line_height === false) {
+                            // we need to scroll forward
+                            for ($j = $i + 1; $j < $size; $j++) {
+                                if ($bits[$j] === '') continue;
+                                if ($bits[$j] === '/') {
+                                    if ($found_slash) {
+                                        return false;
+                                    } else {
+                                        $found_slash = true;
+                                        continue;
+                                    }
+                                }
+                                $line_height = $bits[$j];
+                                break;
+                            }
+                        } else {
+                            // slash already found
+                            $found_slash = true;
+                            $j = $i;
+                        }
+                        if ($found_slash) {
+                            $i = $j;
+                            $r = $this->info['line-height']->validate(
+                                              $line_height, $config, $context);
+                            if ($r !== false) {
+                                $final .= '/' . $r;
+                            }
+                        }
+                        $final .= ' ';
+                        $stage = 2;
+                        break;
+                    }
+                    return false;
+                
+                // attempting to catch font-family
+                case 2:
+                    $font_family =
+                        implode(' ', array_slice($bits, $i, $size - $i));
+                    $r = $this->info['font-family']->validate(
+                                              $font_family, $config, $context);
+                    if ($r !== false) {
+                        $final .= $r . ' ';
+                        // processing completed successfully
+                        return rtrim($final);
+                    }
+                    return false;
+            }
+        }
+        return false;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
@ -0,0 +1,66 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+// whitelisting allowed fonts would be nice
+
+/**
+ * Validates a font family list according to CSS spec
+ */
+class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Generic font family keywords.
+     * @protected
+     */
+    var $generic_names = array(
+        'serif' => true,
+        'sans-serif' => true,
+        'monospace' => true,
+        'fantasy' => true,
+        'cursive' => true
+    );
+    
+    function validate($string, $config, &$context) {
+        $string = $this->parseCDATA($string);
+        // assume that no font names contain commas in them
+        $fonts = explode(',', $string);
+        $final = '';
+        foreach($fonts as $font) {
+            $font = trim($font);
+            if ($font === '') continue;
+            // match a generic name
+            if (isset($this->generic_names[$font])) {
+                $final .= $font . ', ';
+                continue;
+            }
+            // match a quoted name
+            if ($font[0] === '"' || $font[0] === "'") {
+                $length = strlen($font);
+                if ($length <= 2) continue;
+                $quote = $font[0];
+                if ($font[$length - 1] !== $quote) continue;
+                $font = substr($font, 1, $length - 2);
+            }
+            // process font
+            if (ctype_alnum($font)) {
+                // very simple font, allow it in unharmed
+                $final .= $font . ', ';
+                continue;
+            }
+            $nospace = str_replace(array(' ', '.', '!'), '', $font);
+            if (ctype_alnum($nospace)) {
+                // font with spaces in it
+                $final .= "'$font', ";
+                continue;
+            }
+        }
+        $final = rtrim($final, ', ');
+        if ($final === '') return false;
+        return $final;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php
@ -0,0 +1,56 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
+
+/**
+ * Represents a Length as defined by CSS.
+ */
+class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Valid unit lookup table.
+     * @warning The code assumes all units are two characters long.  Be careful
+     *          if we have to change this behavior!
+     */
+    var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
+         'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
+    /**
+     * Instance of HTMLPurifier_AttrDef_Number to defer number validation to
+     */
+    var $number_def;
+    
+    /**
+     * @param $non_negative Bool indication whether or not negative values are
+     *                      allowed.
+     */
+    function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
+        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
+    }
+    
+    function validate($length, $config, &$context) {
+        
+        $length = $this->parseCDATA($length);
+        if ($length === '') return false;
+        if ($length === '0') return '0';
+        $strlen = strlen($length);
+        if ($strlen === 1) return false; // impossible!
+        
+        // we assume all units are two characters
+        $unit = substr($length, $strlen - 2);
+        if (!ctype_lower($unit)) $unit = strtolower($unit);
+        $number = substr($length, 0, $strlen - 2);
+        
+        if (!isset($this->units[$unit])) return false;
+        
+        $number = $this->number_def->validate($number, $config, $context);
+        if ($number === false) return false;
+        
+        return $number . $unit;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php
@ -0,0 +1,80 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates shorthand CSS property list-style.
+ * @warning Does not support url tokens that have internal spaces.
+ */
+class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Local copy of component validators.
+     * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
+     */
+    var $info;
+    
+    function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
+        $def = $config->getCSSDefinition();
+        $this->info['list-style-type']     = $def->info['list-style-type'];
+        $this->info['list-style-position'] = $def->info['list-style-position'];
+        $this->info['list-style-image'] = $def->info['list-style-image'];
+    }
+    
+    function validate($string, $config, &$context) {
+        
+        // regular pre-processing
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+        
+        // assumes URI doesn't have spaces in it
+        $bits = explode(' ', strtolower($string)); // bits to process
+        
+        $caught = array();
+        $caught['type']     = false;
+        $caught['position'] = false;
+        $caught['image']    = false;
+        
+        $i = 0; // number of catches
+        $none = false;
+        
+        foreach ($bits as $bit) {
+            if ($i >= 3) return; // optimization bit
+            if ($bit === '') continue;
+            foreach ($caught as $key => $status) {
+                if ($status !== false) continue;
+                $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
+                if ($r === false) continue;
+                if ($r === 'none') {
+                    if ($none) continue;
+                    else $none = true;
+                    if ($key == 'image') continue;
+                }
+                $caught[$key] = $r;
+                $i++;
+                break;
+            }
+        }
+        
+        if (!$i) return false;
+        
+        $ret = array();
+        
+        // construct type
+        if ($caught['type']) $ret[] = $caught['type'];
+        
+        // construct image
+        if ($caught['image']) $ret[] = $caught['image'];
+        
+        // construct position
+        if ($caught['position']) $ret[] = $caught['position'];
+        
+        if (empty($ret)) return false;
+        return implode(' ', $ret);
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php
@ -0,0 +1,58 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Framework class for strings that involve multiple values.
+ * 
+ * Certain CSS properties such as border-width and margin allow multiple
+ * lengths to be specified.  This class can take a vanilla border-width
+ * definition and multiply it, usually into a max of four.
+ * 
+ * @note Even though the CSS specification isn't clear about it, inherit
+ *       can only be used alone: it will never manifest as part of a multi
+ *       shorthand declaration.  Thus, this class does not allow inherit.
+ */
+class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Instance of component definition to defer validation to.
+     */
+    var $single;
+    
+    /**
+     * Max number of values allowed.
+     */
+    var $max;
+    
+    /**
+     * @param $single HTMLPurifier_AttrDef to multiply
+     * @param $max Max number of values allowed (usually four)
+     */
+    function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
+        $this->single = $single;
+        $this->max = $max;
+    }
+    
+    function validate($string, $config, &$context) {
+        $string = $this->parseCDATA($string);
+        if ($string === '') return false;
+        $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
+        $length = count($parts);
+        $final = '';
+        for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
+            if (ctype_space($parts[$i])) continue;
+            $result = $this->single->validate($parts[$i], $config, $context);
+            if ($result !== false) {
+                $final .= $result . ' ';
+                $num++;
+            }
+        }
+        if ($final === '') return false;
+        return rtrim($final);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php
@ -0,0 +1,61 @@
+<?php
+
+/**
+ * Validates a number as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Bool indicating whether or not only positive values allowed.
+     */
+    var $non_negative = false;
+    
+    /**
+     * @param $non_negative Bool indicating whether negatives are forbidden
+     */
+    function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
+        $this->non_negative = $non_negative;
+    }
+    
+    function validate($number, $config, &$context) {
+        
+        $number = $this->parseCDATA($number);
+        
+        if ($number === '') return false;
+        
+        $sign = '';
+        switch ($number[0]) {
+            case '-':
+                if ($this->non_negative) return false;
+                $sign = '-';
+            case '+':
+                $number = substr($number, 1);
+        }
+        
+        if (ctype_digit($number)) {
+            $number = ltrim($number, '0');
+            return $number ? $sign . $number : '0';
+        }
+        if (!strpos($number, '.')) return false;
+        
+        list($left, $right) = explode('.', $number, 2);
+        
+        if (!ctype_digit($left)) return false;
+        $left = ltrim($left, '0');
+        
+        $right = rtrim($right, '0');
+        
+        if ($right === '') {
+            return $left ? $sign . $left : '0';
+        } elseif (!ctype_digit($right)) {
+            return false;
+        }
+        
+        return $sign . $left . '.' . $right;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php
@ -0,0 +1,43 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
+
+/**
+ * Validates a Percentage as defined by the CSS spec.
+ */
+class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
+     */
+    var $number_def;
+    
+    /**
+     * @param Bool indicating whether to forbid negative values
+     */
+    function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
+        $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
+    }
+    
+    function validate($string, $config, &$context) {
+        
+        $string = $this->parseCDATA($string);
+        
+        if ($string === '') return false;
+        $length = strlen($string);
+        if ($length === 1) return false;
+        if ($string[$length - 1] !== '%') return false;
+        
+        $number = substr($string, 0, $length - 1);
+        $number = $this->number_def->validate($number, $config, $context);
+        
+        if ($number === false) return false;
+        return "$number%";
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php
@ -0,0 +1,41 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates the value for the CSS property text-decoration
+ * @note This class could be generalized into a version that acts sort of
+ *       like Enum except you can compound the allowed values.
+ */
+class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Lookup table of allowed values.
+     * @protected
+     */
+    var $allowed_values = array(
+        'line-through' => true,
+        'overline' => true,
+        'underline' => true
+    );
+    
+    function validate($string, $config, &$context) {
+        
+        $string = strtolower($this->parseCDATA($string));
+        $parts = explode(' ', $string);
+        $final = '';
+        foreach ($parts as $part) {
+            if (isset($this->allowed_values[$part])) {
+                $final .= $part . ' ';
+            }
+        }
+        $final = rtrim($final);
+        if ($final === '') return false;
+        return $final;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php
@ -0,0 +1,58 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/URI.php';
+
+/**
+ * Validates a URI in CSS syntax, which uses url('http://example.com')
+ * @note While theoretically speaking a URI in a CSS document could
+ *       be non-embedded, as of CSS2 there is no such usage so we're
+ *       generalizing it. This may need to be changed in the future.
+ * @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
+ *          the separator, you cannot put a literal semicolon in
+ *          in the URI. Try percent encoding it, in that case.
+ */
+class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
+{
+    
+    function HTMLPurifier_AttrDef_CSS_URI() {
+        $this->HTMLPurifier_AttrDef_URI(true); // always embedded
+    }
+    
+    function validate($uri_string, $config, &$context) {
+        // parse the URI out of the string and then pass it onto
+        // the parent object
+        
+        $uri_string = $this->parseCDATA($uri_string);
+        if (strpos($uri_string, 'url(') !== 0) return false;
+        $uri_string = substr($uri_string, 4);
+        $new_length = strlen($uri_string) - 1;
+        if ($uri_string[$new_length] != ')') return false;
+        $uri = trim(substr($uri_string, 0, $new_length));
+        
+        if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
+            $quote = $uri[0];
+            $new_length = strlen($uri) - 1;
+            if ($uri[$new_length] !== $quote) return false;
+            $uri = substr($uri, 1, $new_length - 1);
+        }
+        
+        $keys   = array(  '(',   ')',   ',',   ' ',   '"',   "'");
+        $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
+        $uri = str_replace($values, $keys, $uri);
+        
+        $result = parent::validate($uri, $config, $context);
+        
+        if ($result === false) return false;
+        
+        // escape necessary characters according to CSS spec
+        // except for the comma, none of these should appear in the
+        // URI at all
+        $result = str_replace($keys, $values, $result);
+        
+        return "url($result)";
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php
@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+// Enum = Enumerated
+/**
+ * Validates a keyword against a list of valid values.
+ */
+class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Lookup table of valid values.
+     */
+    var $valid_values   = array();
+    
+    /**
+     * Bool indicating whether or not enumeration is case sensitive.
+     * @note In general this is always case insensitive.
+     */
+    var $case_sensitive = false; // values according to W3C spec
+    
+    /**
+     * @param $valid_values List of valid values
+     * @param $case_sensitive Bool indicating whether or not case sensitive
+     */
+    function HTMLPurifier_AttrDef_Enum(
+        $valid_values = array(), $case_sensitive = false
+    ) {
+        $this->valid_values = array_flip($valid_values);
+        $this->case_sensitive = $case_sensitive;
+    }
+    
+    function validate($string, $config, &$context) {
+        $string = trim($string);
+        if (!$this->case_sensitive) {
+            $string = ctype_lower($string) ? $string : strtolower($string);
+        }
+        $result = isset($this->valid_values[$string]);
+        
+        return $result ? $string : false;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php
@ -0,0 +1,121 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/IDAccumulator.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'EnableID', false, 'bool',
+    'Allows the ID attribute in HTML.  This is disabled by default '.
+    'due to the fact that without proper configuration user input can '.
+    'easily break the validation of a webpage by specifying an ID that is '.
+    'already on the surrounding HTML.  If you don\'t mind throwing caution to '.
+    'the wind, enable this directive, but I strongly recommend you also '.
+    'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
+    'user supplied IDs (%Attr.IDPrefix).  This directive has been available '.
+    'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
+    'versions.'
+);
+HTMLPurifier_ConfigSchema::defineAlias(
+    'HTML', 'EnableAttrID', 'Attr', 'EnableID'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'IDPrefix', '', 'string',
+    'String to prefix to IDs.  If you have no idea what IDs your pages '.
+    'may use, you may opt to simply add a prefix to all user-submitted ID '.
+    'attributes so that they are still usable, but will not conflict with '.
+    'core page IDs. Example: setting the directive to \'user_\' will result in '.
+    'a user submitted \'foo\' to become \'user_foo\'  Be sure to set '.
+    '%HTML.EnableAttrID to true before using '.
+    'this.  This directive was available since 1.2.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'IDPrefixLocal', '', 'string',
+    'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix.  If '.
+    'you need to allow multiple sets of '.
+    'user content on web page, you may need to have a seperate prefix that '.
+    'changes with each iteration.  This way, seperately submitted user content '.
+    'displayed on the same page doesn\'t clobber each other. Ideal values '.
+    'are unique identifiers for the content it represents (i.e. the id of '.
+    'the row in the database). Be sure to add a seperator (like an underscore) '.
+    'at the end.  Warning: this directive will not work unless %Attr.IDPrefix '.
+    'is set to a non-empty value! This directive was available since 1.2.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'IDBlacklistRegexp', null, 'string/null',
+    'PCRE regular expression to be matched against all IDs. If the expression '.
+    'is matches, the ID is rejected. Use this with care: may cause '.
+    'significant degradation. ID matching is done after all other '.
+    'validation. This directive was available since 1.6.0.'
+);
+
+/**
+ * Validates the HTML attribute ID.
+ * @warning Even though this is the id processor, it
+ *          will ignore the directive Attr:IDBlacklist, since it will only
+ *          go according to the ID accumulator. Since the accumulator is
+ *          automatically generated, it will have already absorbed the
+ *          blacklist. If you're hacking around, make sure you use load()!
+ */
+
+class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
+{
+    
+    // ref functionality disabled, since we also have to verify
+    // whether or not the ID it refers to exists
+    
+    function validate($id, $config, &$context) {
+        
+        if (!$config->get('Attr', 'EnableID')) return false;
+        
+        $id = trim($id); // trim it first
+        
+        if ($id === '') return false;
+        
+        $prefix = $config->get('Attr', 'IDPrefix');
+        if ($prefix !== '') {
+            $prefix .= $config->get('Attr', 'IDPrefixLocal');
+            // prevent re-appending the prefix
+            if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
+        } elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
+            trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
+                '%Attr.IDPrefix is set', E_USER_WARNING);
+        }
+        
+        //if (!$this->ref) {
+            $id_accumulator =& $context->get('IDAccumulator');
+            if (isset($id_accumulator->ids[$id])) return false;
+        //}
+        
+        // we purposely avoid using regex, hopefully this is faster
+        
+        if (ctype_alpha($id)) {
+            $result = true;
+        } else {
+            if (!ctype_alpha(@$id[0])) return false;
+            $trim = trim( // primitive style of regexps, I suppose
+                $id,
+                'A..Za..z0..9:-._'
+              );
+            $result = ($trim === '');
+        }
+        
+        $regexp = $config->get('Attr', 'IDBlacklistRegexp');
+        if ($regexp && preg_match($regexp, $id)) {
+            return false;
+        }
+        
+        if (/*!$this->ref && */$result) $id_accumulator->add($id);
+        
+        // if no change was made to the ID, return the result
+        // else, return the new id if stripping whitespace made it
+        //     valid, or return false.
+        return $result ? $id : false;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php
@ -0,0 +1,44 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
+
+/**
+ * Validates the HTML type length (not to be confused with CSS's length).
+ * 
+ * This accepts integer pixels or percentages as lengths for certain
+ * HTML attributes.
+ */
+
+class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
+{
+    
+    function validate($string, $config, &$context) {
+        
+        $string = trim($string);
+        if ($string === '') return false;
+        
+        $parent_result = parent::validate($string, $config, $context);
+        if ($parent_result !== false) return $parent_result;
+        
+        $length = strlen($string);
+        $last_char = $string[$length - 1];
+        
+        if ($last_char !== '%') return false;
+        
+        $points = substr($string, 0, $length - 1);
+        
+        if (!is_numeric($points)) return false;
+        
+        $points = (int) $points;
+        
+        if ($points < 0) return '0%';
+        if ($points > 100) return '100%';
+        
+        return ((string) $points) . '%';
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php
@ -0,0 +1,75 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'AllowedRel', array(), 'lookup',
+    'List of allowed forward document relationships in the rel attribute. '.
+    'Common values may be nofollow or print. By default, this is empty, '.
+    'meaning that no document relationships are allowed. This directive '.
+    'was available since 1.6.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'AllowedRev', array(), 'lookup',
+    'List of allowed reverse document relationships in the rev attribute. '.
+    'This attribute is a bit of an edge-case; if you don\'t know what it '.
+    'is for, stay away. This directive was available since 1.6.0.'
+);
+
+/**
+ * Validates a rel/rev link attribute against a directive of allowed values
+ * @note We cannot use Enum because link types allow multiple
+ *       values.
+ * @note Assumes link types are ASCII text
+ */
+class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
+{
+    
+    /** Lookup array of attribute names to configuration name */
+    var $configLookup = array(
+        'rel' => 'AllowedRel',
+        'rev' => 'AllowedRev'
+    );
+    
+    /** Name config attribute to pull. */
+    var $name;
+    
+    function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
+        if (!isset($this->configLookup[$name])) {
+            trigger_error('Unrecognized attribute name for link '.
+                'relationship.', E_USER_ERROR);
+            return;
+        }
+        $this->name = $this->configLookup[$name];
+    }
+    
+    function validate($string, $config, &$context) {
+        
+        $allowed = $config->get('Attr', $this->name);
+        if (empty($allowed)) return false;
+        
+        $string = $this->parseCDATA($string);
+        $parts = explode(' ', $string);
+        
+        // lookup to prevent duplicates
+        $ret_lookup = array();
+        foreach ($parts as $part) {
+            $part = strtolower(trim($part));
+            if (!isset($allowed[$part])) continue;
+            $ret_lookup[$part] = true;
+        }
+        
+        if (empty($ret_lookup)) return false;
+        
+        $ret_array = array();
+        foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
+        $string = implode(' ', $ret_array);
+        
+        return $string;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php
@ -0,0 +1,44 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
+
+/**
+ * Validates a MultiLength as defined by the HTML spec.
+ * 
+ * A multilength is either a integer (pixel count), a percentage, or
+ * a relative number.
+ */
+class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
+{
+    
+    function validate($string, $config, &$context) {
+        
+        $string = trim($string);
+        if ($string === '') return false;
+        
+        $parent_result = parent::validate($string, $config, $context);
+        if ($parent_result !== false) return $parent_result;
+        
+        $length = strlen($string);
+        $last_char = $string[$length - 1];
+        
+        if ($last_char !== '*') return false;
+        
+        $int = substr($string, 0, $length - 1);
+        
+        if ($int == '') return '*';
+        if (!is_numeric($int)) return false;
+        
+        $int = (int) $int;
+        
+        if ($int < 0) return false;
+        if ($int == 0) return '0';
+        if ($int == 1) return '*';
+        return ((string) $int) . '*';
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php
@ -0,0 +1,51 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/Config.php';
+
+/**
+ * Validates contents based on NMTOKENS attribute type.
+ * @note The only current use for this is the class attribute in HTML
+ * @note Could have some functionality factored out into Nmtoken class
+ * @warning We cannot assume this class will be used only for 'class'
+ *          attributes. Not sure how to hook in magic behavior, then.
+ */
+class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
+{
+    
+    function validate($string, $config, &$context) {
+        
+        $string = trim($string);
+        
+        // early abort: '' and '0' (strings that convert to false) are invalid
+        if (!$string) return false;
+        
+        // OPTIMIZABLE!
+        // do the preg_match, capture all subpatterns for reformulation
+        
+        // we don't support U+00A1 and up codepoints or
+        // escaping because I don't know how to do that with regexps
+        // and plus it would complicate optimization efforts (you never
+        // see that anyway).
+        $matches = array();
+        $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
+                   '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
+                   '(?:(?=\s)|\z)/'; // look ahead for space or string end
+        preg_match_all($pattern, $string, $matches);
+        
+        if (empty($matches[1])) return false;
+        
+        // reconstruct string
+        $new_string = '';
+        foreach ($matches[1] as $token) {
+            $new_string .= $token . ' ';
+        }
+        $new_string = rtrim($new_string);
+        
+        return $new_string;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php
@ -0,0 +1,37 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates an integer representation of pixels according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
+{
+    
+    function validate($string, $config, &$context) {
+        
+        $string = trim($string);
+        if ($string === '0') return $string;
+        if ($string === '')  return false;
+        $length = strlen($string);
+        if (substr($string, $length - 2) == 'px') {
+            $string = substr($string, 0, $length - 2);
+        }
+        if (!is_numeric($string)) return false;
+        $int = (int) $string;
+        
+        if ($int < 0) return '0';
+        
+        // upper-bound value, extremely high values can
+        // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
+        // WARNING, above link WILL crash you if you're using Windows
+        
+        if ($int > 1200) return '1200';
+        
+        return (string) $int;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php
@ -0,0 +1,75 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates an integer.
+ * @note While this class was modeled off the CSS definition, no currently
+ *       allowed CSS uses this type.  The properties that do are: widows,
+ *       orphans, z-index, counter-increment, counter-reset.  Some of the
+ *       HTML attributes, however, find use for a non-negative version of this.
+ */
+class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Bool indicating whether or not negative values are allowed
+     */
+    var $negative = true;
+    
+    /**
+     * Bool indicating whether or not zero is allowed
+     */
+    var $zero = true;
+    
+    /**
+     * Bool indicating whether or not positive values are allowed
+     */
+    var $positive = true;
+    
+    /**
+     * @param $negative Bool indicating whether or not negative values are allowed
+     * @param $zero Bool indicating whether or not zero is allowed
+     * @param $positive Bool indicating whether or not positive values are allowed
+     */
+    function HTMLPurifier_AttrDef_Integer(
+        $negative = true, $zero = true, $positive = true
+    ) {
+        $this->negative = $negative;
+        $this->zero     = $zero;
+        $this->positive = $positive;
+    }
+    
+    function validate($integer, $config, &$context) {
+        
+        $integer = $this->parseCDATA($integer);
+        if ($integer === '') return false;
+        
+        // we could possibly simply typecast it to integer, but there are
+        // certain fringe cases that must not return an integer.
+        
+        // clip leading sign
+        if ( $this->negative && $integer[0] === '-' ) {
+            $digits = substr($integer, 1);
+            if ($digits === '0') $integer = '0'; // rm minus sign for zero
+        } elseif( $this->positive && $integer[0] === '+' ) {
+            $digits = $integer = substr($integer, 1); // rm unnecessary plus
+        } else {
+            $digits = $integer;
+        }
+        
+        // test if it's numeric
+        if (!ctype_digit($digits)) return false;
+        
+        // perform scope tests
+        if (!$this->zero     && $integer == 0) return false;
+        if (!$this->positive && $integer > 0) return false;
+        if (!$this->negative && $integer < 0) return false;
+        
+        return $integer;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php
@ -0,0 +1,75 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates the HTML attribute lang, effectively a language code.
+ * @note Built according to RFC 3066, which obsoleted RFC 1766
+ */
+class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
+{
+    
+    function validate($string, $config, &$context) {
+        
+        $string = trim($string);
+        if (!$string) return false;
+        
+        $subtags = explode('-', $string);
+        $num_subtags = count($subtags);
+        
+        if ($num_subtags == 0) return false; // sanity check
+        
+        // process primary subtag : $subtags[0]
+        $length = strlen($subtags[0]);
+        switch ($length) {
+            case 0:
+                return false;
+            case 1:
+                if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
+                    return false;
+                }
+                break;
+            case 2:
+            case 3:
+                if (! ctype_alpha($subtags[0]) ) {
+                    return false;
+                } elseif (! ctype_lower($subtags[0]) ) {
+                    $subtags[0] = strtolower($subtags[0]);
+                }
+                break;
+            default:
+                return false;
+        }
+        
+        $new_string = $subtags[0];
+        if ($num_subtags == 1) return $new_string;
+        
+        // process second subtag : $subtags[1]
+        $length = strlen($subtags[1]);
+        if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
+            return $new_string;
+        }
+        if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
+        
+        $new_string .= '-' . $subtags[1];
+        if ($num_subtags == 2) return $new_string;
+        
+        // process all other subtags, index 2 and up
+        for ($i = 2; $i < $num_subtags; $i++) {
+            $length = strlen($subtags[$i]);
+            if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
+                return $new_string;
+            }
+            if (!ctype_lower($subtags[$i])) {
+                $subtags[$i] = strtolower($subtags[$i]);
+            }
+            $new_string .= '-' . $subtags[$i];
+        }
+        
+        return $new_string;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php
@ -0,0 +1,17 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates arbitrary text according to the HTML spec.
+ */
+class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
+{
+    
+    function validate($string, $config, &$context) {
+        return $this->parseCDATA($string);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
@ -0,0 +1,296 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/URIScheme.php';
+require_once 'HTMLPurifier/URISchemeRegistry.php';
+require_once 'HTMLPurifier/AttrDef/URI/Host.php';
+require_once 'HTMLPurifier/PercentEncoder.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'DefaultScheme', 'http', 'string',
+    'Defines through what scheme the output will be served, in order to '.
+    'select the proper object validator when no scheme information is present.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'Host', null, 'string/null',
+    'Defines the domain name of the server, so we can determine whether or '.
+    'an absolute URI is from your website or not.  Not strictly necessary, '.
+    'as users should be using relative URIs to reference resources on your '.
+    'website.  It will, however, let you use absolute URIs to link to '.
+    'subdomains of the domain you post here: i.e. example.com will allow '.
+    'sub.example.com.  However, higher up domains will still be excluded: '.
+    'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
+    'This directive has been available since 1.2.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'DisableExternal', false, 'bool',
+    'Disables links to external websites.  This is a highly effective '.
+    'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
+    'links or images outside of your domain will be allowed.  Non-linkified '.
+    'URIs will still be preserved.  If you want to be able to link to '.
+    'subdomains or use absolute URIs, specify %URI.Host for your website. '.
+    'This directive has been available since 1.2.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'DisableExternalResources', false, 'bool',
+    'Disables the embedding of external resources, preventing users from '.
+    'embedding things like images from other hosts. This prevents '.
+    'access tracking (good for email viewers), bandwidth leeching, '.
+    'cross-site request forging, goatse.cx posting, and '.
+    'other nasties, but also results in '.
+    'a loss of end-user functionality (they can\'t directly post a pic '.
+    'they posted from Flickr anymore). Use it if you don\'t have a '.
+    'robust user-content moderation team. This directive has been '.
+    'available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'DisableResources', false, 'bool',
+    'Disables embedding resources, essentially meaning no pictures. You can '.
+    'still link to them though. See %URI.DisableExternalResources for why '.
+    'this might be a good idea. This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'Munge', null, 'string/null',
+    'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
+    'redirection service. Pass this directive a URI, with %s inserted where '.
+    'the url-encoded original URI should be inserted (sample: '.
+    '<code>http://www.google.com/url?q=%s</code>). '.
+    'This prevents PageRank leaks, while being as transparent as possible '.
+    'to users (you may also want to add some client side JavaScript to '.
+    'override the text in the statusbar). Warning: many security experts '.
+    'believe that this form of protection does not deter spam-bots. '.
+    'You can also use this directive to redirect users to a splash page '.
+    'telling them they are leaving your website. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'HostBlacklist', array(), 'list',
+    'List of strings that are forbidden in the host of any URI. Use it to '.
+    'kill domain names of spam, etc. Note that it will catch anything in '.
+    'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'URI', 'Disable', false, 'bool',
+    'Disables all URIs in all forms. Not sure why you\'d want to do that '.
+    '(after all, the Internet\'s founded on the notion of a hyperlink). '.
+    'This directive has been available since 1.3.0.'
+);
+HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
+
+/**
+ * Validates a URI as defined by RFC 3986.
+ * @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
+ */
+class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
+{
+    
+    var $host;
+    var $PercentEncoder;
+    var $embeds_resource;
+    
+    /**
+     * @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
+     */
+    function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
+        $this->host = new HTMLPurifier_AttrDef_URI_Host();
+        $this->PercentEncoder = new HTMLPurifier_PercentEncoder();
+        $this->embeds_resource = (bool) $embeds_resource;
+    }
+    
+    function validate($uri, $config, &$context) {
+        
+        // We'll write stack-based parsers later, for now, use regexps to
+        // get things working as fast as possible (irony)
+        
+        if ($config->get('URI', 'Disable')) return false;
+        
+        // parse as CDATA
+        $uri = $this->parseCDATA($uri);
+        
+        // fix up percent-encoding
+        $uri = $this->PercentEncoder->normalize($uri);
+        
+        // while it would be nice to use parse_url(), that's specifically
+        // for HTTP and thus won't work for our generic URI parsing
+        
+        // according to the RFC... (but this cuts corners, i.e. non-validating)
+        $r_URI = '!'.
+            '(([^:/?#<>\'"]+):)?'. // 2. Scheme
+            '(//([^/?#<>\'"]*))?'. // 4. Authority
+            '([^?#<>\'"]*)'.       // 5. Path
+            '(\?([^#<>\'"]*))?'.   // 7. Query
+            '(#([^<>\'"]*))?'.     // 8. Fragment
+            '!';
+        
+        $matches = array();
+        $result = preg_match($r_URI, $uri, $matches);
+        
+        if (!$result) return false; // invalid URI
+        
+        // seperate out parts
+        $scheme     = !empty($matches[1]) ? $matches[2] : null;
+        $authority  = !empty($matches[3]) ? $matches[4] : null;
+        $path       = $matches[5]; // always present, can be empty
+        $query      = !empty($matches[6]) ? $matches[7] : null;
+        $fragment   = !empty($matches[8]) ? $matches[9] : null;
+        
+        
+        
+        $registry =& HTMLPurifier_URISchemeRegistry::instance();
+        if ($scheme !== null) {
+            // no need to validate the scheme's fmt since we do that when we
+            // retrieve the specific scheme object from the registry
+            $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
+            $scheme_obj = $registry->getScheme($scheme, $config, $context);
+            if (!$scheme_obj) return false; // invalid scheme, clean it out
+        } else {
+            $scheme_obj = $registry->getScheme(
+                $config->get('URI', 'DefaultScheme'), $config, $context
+            );
+        }
+        
+        
+        // the URI we're processing embeds_resource a resource in the page, but the URI
+        // it references cannot be located
+        if ($this->embeds_resource && !$scheme_obj->browsable) {
+            return false;
+        }
+        
+        
+        if ($authority !== null) {
+            
+            // remove URI if it's absolute and we disabled externals or
+            // if it's absolute and embedded and we disabled external resources
+            unset($our_host);
+            if (
+                $config->get('URI', 'DisableExternal') ||
+                (
+                    $config->get('URI', 'DisableExternalResources') &&
+                    $this->embeds_resource
+                )
+            ) {
+                $our_host = $config->get('URI', 'Host');
+                if ($our_host === null) return false;
+            }
+            
+            $HEXDIG = '[A-Fa-f0-9]';
+            $unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
+            $sub_delims = '!$&\'()'; // needs []
+            $pct_encoded = "%$HEXDIG$HEXDIG";
+            $r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
+            $r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
+            $matches = array();
+            preg_match($r_authority, $authority, $matches);
+            // overloads regexp!
+            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
+            $host       = !empty($matches[3]) ? $matches[3] : null;
+            $port       = !empty($matches[4]) ? $matches[5] : null;
+            
+            // validate port
+            if ($port !== null) {
+                $port = (int) $port;
+                if ($port < 1 || $port > 65535) $port = null;
+            }
+            
+            $host = $this->host->validate($host, $config, $context);
+            if ($host === false) $host = null;
+            
+            if ($this->checkBlacklist($host, $config, $context)) return false;
+            
+            // more lenient absolute checking
+            if (isset($our_host)) {
+                $host_parts = array_reverse(explode('.', $host));
+                // could be cached
+                $our_host_parts = array_reverse(explode('.', $our_host));
+                foreach ($our_host_parts as $i => $discard) {
+                    if (!isset($host_parts[$i])) return false;
+                    if ($host_parts[$i] != $our_host_parts[$i]) return false;
+                }
+            }
+            
+            // userinfo and host are validated within the regexp
+            
+        } else {
+            $port = $host = $userinfo = null;
+        }
+        
+        
+        // query and fragment are quite simple in terms of definition:
+        // *( pchar / "/" / "?" ), so define their validation routines
+        // when we start fixing percent encoding
+        
+        
+        
+        // path gets to be validated against a hodge-podge of rules depending
+        // on the status of authority and scheme, but it's not that important,
+        // esp. since it won't be applicable to everyone
+        
+        
+        
+        // okay, now we defer execution to the subobject for more processing
+        // note that $fragment is omitted
+        list($userinfo, $host, $port, $path, $query) = 
+            $scheme_obj->validateComponents(
+                $userinfo, $host, $port, $path, $query, $config, $context
+            );
+        
+        
+        // reconstruct authority
+        $authority = null;
+        if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
+            $authority = '';
+            if($userinfo !== null) $authority .= $userinfo . '@';
+            $authority .= $host;
+            if($port !== null) $authority .= ':' . $port;
+        }
+        
+        // reconstruct the result
+        $result = '';
+        if ($scheme !== null) $result .= "$scheme:";
+        if ($authority !== null) $result .= "//$authority";
+        $result .= $path;
+        if ($query !== null) $result .= "?$query";
+        if ($fragment !== null) $result .= "#$fragment";
+        
+        // munge if necessary
+        $munge = $config->get('URI', 'Munge');
+        if (!empty($scheme_obj->browsable) && $munge !== null) {
+            if ($authority !== null) {
+                $result = str_replace('%s', rawurlencode($result), $munge);
+            }
+        }
+        
+        return $result;
+        
+    }
+    
+    /**
+     * Checks a host against an array blacklist
+     * @param $host Host to check
+     * @param $config HTMLPurifier_Config instance
+     * @param $context HTMLPurifier_Context instance
+     * @return bool Is spam?
+     */
+    function checkBlacklist($host, &$config, &$context) {
+        $blacklist = $config->get('URI', 'HostBlacklist');
+        if (!empty($blacklist)) {
+            foreach($blacklist as $blacklisted_host_fragment) {
+                if (strpos($host, $blacklisted_host_fragment) !== false) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
@ -0,0 +1,17 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Unpacks a mailbox into its display-name and address
+     */
+    function unpack($string) {
+        // needs to be implemented
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php
@ -0,0 +1,23 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/URI/Email.php';
+
+/**
+ * Primitive email validation class based on the regexp found at 
+ * http://www.regular-expressions.info/email.html
+ */
+class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
+{
+    
+    function validate($string, $config, &$context) {
+        // no support for named mailboxes i.e. "Bob <bob@example.com>"
+        // that needs more percent encoding to be done
+        if ($string == '') return false;
+        $string = trim($string);
+        $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
+        return $result ? $string : false;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php
@ -0,0 +1,54 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
+require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
+
+/**
+ * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
+ */
+class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
+     */
+    var $ipv4;
+    
+    /**
+     * Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
+     */
+    var $ipv6;
+    
+    function HTMLPurifier_AttrDef_URI_Host() {
+        $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
+        $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
+    }
+    
+    function validate($string, $config, &$context) {
+        $length = strlen($string);
+        if ($string === '') return '';
+        if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
+            //IPv6
+            $ip = substr($string, 1, $length - 2);
+            $valid = $this->ipv6->validate($ip, $config, $context);
+            if ($valid === false) return false;
+            return '['. $valid . ']';
+        }
+        
+        // need to do checks on unusual encodings too
+        $ipv4 = $this->ipv4->validate($string, $config, $context);
+        if ($ipv4 !== false) return $ipv4;
+        
+        // validate a domain name here, do filtering, etc etc etc
+        
+        // We could use this, but it would break I18N domain names
+        //$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
+        //if (!$match) return false;
+        
+        return $string;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php
@ -0,0 +1,36 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef.php';
+
+/**
+ * Validates an IPv4 address
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ */
+class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
+{
+    
+    /**
+     * IPv4 regex, protected so that IPv6 can reuse it
+     * @protected
+     */
+    var $ip4;
+    
+    function HTMLPurifier_AttrDef_URI_IPv4() {
+        $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
+        $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
+    }
+    
+    function validate($aIP, $config, &$context) {
+        
+        if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
+        {
+                return $aIP;
+        }
+        
+        return false;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php
@ -0,0 +1,99 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
+
+/**
+ * Validates an IPv6 address.
+ * @author Feyd @ forums.devnetwork.net (public domain)
+ * @note This function requires brackets to have been removed from address
+ *       in URI.
+ */
+class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
+{
+    
+    function validate($aIP, $config, &$context) {
+        
+        $original = $aIP;
+        
+        $hex = '[0-9a-fA-F]';
+        $blk = '(?:' . $hex . '{1,4})';
+        $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))';   // /0 - /128
+        
+        //      prefix check
+        if (strpos($aIP, '/') !== false)
+        {
+                if (preg_match('#' . $pre . '$#s', $aIP, $find))
+                {
+                        $aIP = substr($aIP, 0, 0-strlen($find[0]));
+                        unset($find);
+                }
+                else
+                {
+                        return false;
+                }
+        }
+        
+        //      IPv4-compatiblity check       
+        if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
+        {
+                $aIP = substr($aIP, 0, 0-strlen($find[0]));
+                $ip = explode('.', $find[0]);
+                $ip = array_map('dechex', $ip);
+                $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
+                unset($find, $ip);
+        }
+        
+        //      compression check
+        $aIP = explode('::', $aIP);
+        $c = count($aIP);
+        if ($c > 2)
+        {
+                return false;
+        }
+        elseif ($c == 2)
+        {
+                list($first, $second) = $aIP;
+                $first = explode(':', $first);
+                $second = explode(':', $second);
+               
+                if (count($first) + count($second) > 8)
+                {
+                        return false;
+                }
+               
+                while(count($first) < 8)
+                {
+                        array_push($first, '0');
+                }
+
+                array_splice($first, 8 - count($second), 8, $second);
+                $aIP = $first;
+                unset($first,$second);
+        }
+        else
+        {
+                $aIP = explode(':', $aIP[0]);
+        }
+        $c = count($aIP);
+        
+        if ($c != 8)
+        {
+                return false;
+        }
+       
+        //      All the pieces should be 16-bit hex strings. Are they?
+        foreach ($aIP as $piece)
+        {
+                if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
+                {
+                        return false;
+                }
+        }
+        
+        return $original;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform.php
@ -0,0 +1,34 @@
+<?php
+
+/**
+ * Processes an entire attribute array for corrections needing multiple values.
+ * 
+ * Occasionally, a certain attribute will need to be removed and popped onto
+ * another value.  Instead of creating a complex return syntax for
+ * HTMLPurifier_AttrDef, we just pass the whole attribute array to a
+ * specialized object and have that do the special work.  That is the
+ * family of HTMLPurifier_AttrTransform.
+ * 
+ * An attribute transformation can be assigned to run before or after
+ * HTMLPurifier_AttrDef validation.  See HTMLPurifier_HTMLDefinition for
+ * more details.
+ */
+
+class HTMLPurifier_AttrTransform
+{
+    
+    /**
+     * Abstract: makes changes to the attributes dependent on multiple values.
+     * 
+     * @param $attr Assoc array of attributes, usually from
+     *              HTMLPurifier_Token_Tag::$attr
+     * @param $config Mandatory HTMLPurifier_Config object.
+     * @param $context Mandatory HTMLPurifier_Context object
+     * @returns Processed attribute array.
+     */
+    function transform($attr, $config, &$context) {
+        trigger_error('Cannot call abstract function', E_USER_ERROR);
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php
@ -0,0 +1,31 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+// this MUST be placed in post, as it assumes that any value in dir is valid
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'DefaultTextDir', 'ltr', 'string',
+    'Defines the default text direction (ltr or rtl) of the document '.
+    'being parsed.  This generally is the same as the value of the dir '.
+    'attribute in HTML, or ltr if that is not specified.'
+);
+HTMLPurifier_ConfigSchema::defineAllowedValues(
+    'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
+);
+
+/**
+ * Post-trasnform that ensures that bdo tags have the dir attribute set.
+ */
+class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
+{
+    
+    function transform($attr, $config, &$context) {
+        if (isset($attr['dir'])) return $attr;
+        $attr['dir'] = $config->get('Attr', 'DefaultTextDir');
+        return $attr;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php
@ -0,0 +1,28 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Pre-transform that changes deprecated bgcolor attribute to CSS.
+ */
+class HTMLPurifier_AttrTransform_BgColor
+extends HTMLPurifier_AttrTransform {
+
+    function transform($attr, $config, &$context) {
+        
+        if (!isset($attr['bgcolor'])) return $attr;
+        
+        $bgcolor = $attr['bgcolor'];
+        unset($attr['bgcolor']);
+        // some validation should happen here
+        
+        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
+        $attr['style'] = "background-color:$bgcolor;" . $attr['style'];
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php
@ -0,0 +1,28 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Pre-transform that changes deprecated border attribute to CSS.
+ */
+class HTMLPurifier_AttrTransform_Border
+extends HTMLPurifier_AttrTransform {
+
+    function transform($attr, $config, &$context) {
+        
+        if (!isset($attr['border'])) return $attr;
+        
+        $border_width = $attr['border'];
+        unset($attr['border']);
+        // some validation should happen here
+        
+        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
+        $attr['style'] = "border:{$border_width}px solid;" . $attr['style'];
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php
@ -0,0 +1,50 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+// must be called POST validation
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'DefaultInvalidImage', '', 'string',
+    'This is the default image an img tag will be pointed to if it does '.
+    'not have a valid src attribute.  In future versions, we may allow the '.
+    'image tag to be removed completely, but due to design issues, this is '.
+    'not possible right now.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
+    'This is the content of the alt tag of an invalid image if the user '.
+    'had not previously specified an alt attribute.  It has no effect when the '.
+    'image is valid but there was no alt attribute present.'
+);
+
+/**
+ * Post-transform that ensures the required attrs of img (alt and src) are set
+ */
+class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
+{
+    
+    function transform($attr, $config, &$context) {
+        
+        $src = true;
+        if (!isset($attr['src'])) {
+            $attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
+            $src = false;
+        }
+        
+        if (!isset($attr['alt'])) {
+            if ($src) {
+                $attr['alt'] = basename($attr['src']);
+            } else {
+                $attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
+            }
+        }
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php
@ -0,0 +1,30 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Post-transform that copies lang's value to xml:lang (and vice-versa)
+ * @note Theoretically speaking, this could be a pre-transform, but putting
+ *       post is more efficient.
+ */
+class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
+{
+    
+    function transform($attr, $config, &$context) {
+        
+        $lang     = isset($attr['lang']) ? $attr['lang'] : false;
+        $xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
+        
+        if ($lang !== false && $xml_lang === false) {
+            $attr['xml:lang'] = $lang;
+        } elseif ($xml_lang !== false) {
+            $attr['lang'] = $xml_lang;
+        }
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php
@ -0,0 +1,33 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Class for handling width/height length attribute transformations to CSS
+ */
+class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
+{
+    
+    var $name;
+    var $cssName;
+    
+    function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
+        $this->name = $name;
+        $this->cssName = $css_name ? $css_name : $name;
+    }
+    
+    function transform($attr, $config, &$context) {
+        if (!isset($attr[$this->name])) return $attr;
+        $length = $attr[$this->name];
+        unset($attr[$this->name]);
+        if(ctype_digit($length)) $length .= 'px';
+        
+        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
+        $attr['style'] = $this->cssName . ":$length;" . $attr['style'];
+        
+        return $attr;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php
@ -0,0 +1,31 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Pre-transform that changes deprecated name attribute to ID if necessary
+ */
+class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
+{
+    
+    function transform($attr, $config, &$context) {
+        
+        if (!isset($attr['name'])) return $attr;
+        
+        $name = $attr['name'];
+        unset($attr['name']);
+        
+        if (isset($attr['id'])) {
+            // ID already set, discard name
+            return $attr;
+        }
+        
+        $attr['id'] = $name;
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php
@ -0,0 +1,36 @@
+<?php
+
+require_once 'HTMLPurifier/AttrTransform.php';
+
+/**
+ * Pre-transform that changes deprecated align attribute to text-align.
+ */
+class HTMLPurifier_AttrTransform_TextAlign
+extends HTMLPurifier_AttrTransform {
+
+    function transform($attr, $config, &$context) {
+        
+        if (!isset($attr['align'])) return $attr;
+        
+        $align = strtolower(trim($attr['align']));
+        unset($attr['align']);
+        
+        $values = array('left' => 1,
+                        'right' => 1,
+                        'center' => 1,
+                        'justify' => 1);
+        
+        if (!isset($values[$align])) {
+            return $attr;
+        }
+        
+        $attr['style'] = isset($attr['style']) ? $attr['style'] : '';
+        $attr['style'] = "text-align:$align;" . $attr['style'];
+        
+        return $attr;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/AttrTypes.php
+++ b/lib/htmlpurifier/HTMLPurifier/AttrTypes.php
@ -0,0 +1,41 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
+require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
+require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
+require_once 'HTMLPurifier/AttrDef/Integer.php';
+require_once 'HTMLPurifier/AttrDef/Text.php';
+require_once 'HTMLPurifier/AttrDef/URI.php';
+
+/**
+ * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
+ */
+class HTMLPurifier_AttrTypes
+{
+    /**
+     * Lookup array of attribute string identifiers to concrete implementations
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Constructs the info array
+     */
+    function HTMLPurifier_AttrTypes() {
+        $this->info['CDATA']    = new HTMLPurifier_AttrDef_Text();
+        $this->info['ID']       = new HTMLPurifier_AttrDef_HTML_ID();
+        $this->info['Length']   = new HTMLPurifier_AttrDef_HTML_Length();
+        $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
+        $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
+        $this->info['Pixels']   = new HTMLPurifier_AttrDef_HTML_Pixels();
+        $this->info['Text']     = new HTMLPurifier_AttrDef_Text();
+        $this->info['URI']      = new HTMLPurifier_AttrDef_URI();
+        
+        // number is really a positive integer (one or more digits)
+        $this->info['Number']   = new HTMLPurifier_AttrDef_Integer(false, false, true);
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php
+++ b/lib/htmlpurifier/HTMLPurifier/CSSDefinition.php
@ -0,0 +1,213 @@
+<?php
+
+require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
+require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
+require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
+require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
+require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
+require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
+require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
+require_once 'HTMLPurifier/AttrDef/Enum.php';
+
+/**
+ * Defines allowed CSS attributes and what their values are.
+ * @see HTMLPurifier_HTMLDefinition
+ */
+class HTMLPurifier_CSSDefinition
+{
+    
+    /**
+     * Assoc array of attribute name to definition object.
+     */
+    var $info = array();
+    
+    /**
+     * Constructs the info array.  The meat of this class.
+     */
+    function setup($config) {
+        
+        $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
+            array('left', 'right', 'center', 'justify'), false);
+        
+        $border_style =
+        $this->info['border-bottom-style'] = 
+        $this->info['border-right-style'] = 
+        $this->info['border-left-style'] = 
+        $this->info['border-top-style'] =  new HTMLPurifier_AttrDef_Enum(
+            array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
+            'groove', 'ridge', 'inset', 'outset'), false);
+        
+        $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
+        
+        $this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
+            array('none', 'left', 'right', 'both'), false);
+        $this->info['float'] = new HTMLPurifier_AttrDef_Enum(
+            array('none', 'left', 'right'), false);
+        $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
+            array('normal', 'italic', 'oblique'), false);
+        $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
+            array('normal', 'small-caps'), false);
+        
+        $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
+            array(
+                new HTMLPurifier_AttrDef_Enum(array('none')),
+                new HTMLPurifier_AttrDef_CSS_URI()
+            )
+        );
+        
+        $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
+            array('inside', 'outside'), false);
+        $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
+            array('disc', 'circle', 'square', 'decimal', 'lower-roman',
+            'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
+        $this->info['list-style-image'] = $uri_or_none;
+        
+        $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
+        
+        $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
+            array('capitalize', 'uppercase', 'lowercase', 'none'), false);
+        $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
+        
+        $this->info['background-image'] = $uri_or_none;
+        $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
+            array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
+        );
+        $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
+            array('scroll', 'fixed')
+        );
+        $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
+        
+        $border_color = 
+        $this->info['border-top-color'] = 
+        $this->info['border-bottom-color'] = 
+        $this->info['border-left-color'] = 
+        $this->info['border-right-color'] = 
+        $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('transparent')),
+            new HTMLPurifier_AttrDef_CSS_Color()
+        ));
+        
+        $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
+        
+        $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
+        
+        $border_width = 
+        $this->info['border-top-width'] = 
+        $this->info['border-bottom-width'] = 
+        $this->info['border-left-width'] = 
+        $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
+            new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
+        ));
+        
+        $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
+        
+        $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('normal')),
+            new HTMLPurifier_AttrDef_CSS_Length()
+        ));
+        
+        $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('normal')),
+            new HTMLPurifier_AttrDef_CSS_Length()
+        ));
+        
+        $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
+                'small', 'medium', 'large', 'x-large', 'xx-large',
+                'larger', 'smaller')),
+            new HTMLPurifier_AttrDef_CSS_Percentage(),
+            new HTMLPurifier_AttrDef_CSS_Length()
+        ));
+        
+        $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('normal')),
+            new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true)
+        ));
+        
+        $margin =
+        $this->info['margin-top'] = 
+        $this->info['margin-bottom'] = 
+        $this->info['margin-left'] = 
+        $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage(),
+            new HTMLPurifier_AttrDef_Enum(array('auto'))
+        ));
+        
+        $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
+        
+        // non-negative
+        $padding =
+        $this->info['padding-top'] = 
+        $this->info['padding-bottom'] = 
+        $this->info['padding-left'] = 
+        $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true)
+        ));
+        
+        $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
+        
+        $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage()
+        ));
+        
+        $this->info['width'] =
+        $this->info['height'] = 
+        new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_CSS_Length(true),
+            new HTMLPurifier_AttrDef_CSS_Percentage(true),
+            new HTMLPurifier_AttrDef_Enum(array('auto'))
+        ));
+        
+        $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
+        
+        $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
+        
+        // this could use specialized code
+        $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
+            array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
+            '400', '500', '600', '700', '800', '900'), false);
+        
+        // MUST be called after other font properties, as it references
+        // a CSSDefinition object
+        $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
+        
+        // same here
+        $this->info['border'] =
+        $this->info['border-bottom'] = 
+        $this->info['border-top'] = 
+        $this->info['border-left'] = 
+        $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
+        
+        $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
+            'collapse', 'seperate'));
+        
+        $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
+            'top', 'bottom'));
+        
+        $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
+            'auto', 'fixed'));
+        
+        $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
+            new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
+                'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
+            new HTMLPurifier_AttrDef_CSS_Length(),
+            new HTMLPurifier_AttrDef_CSS_Percentage()
+        ));
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef.php
@ -0,0 +1,55 @@
+<?php
+
+// HTMLPurifier_ChildDef and inheritance have three types of output:
+// true = leave nodes as is
+// false = delete parent node and all children
+// array(...) = replace children nodes with these
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'EscapeInvalidChildren', false, 'bool',
+    'When true, a child is found that is not allowed in the context of the '.
+    'parent element will be transformed into text as if it were ASCII. When '.
+    'false, that element and all internal tags will be dropped, though text '.
+    'will be preserved.  There is no option for dropping the element but '.
+    'preserving child nodes.'
+);
+
+/**
+ * Defines allowed child nodes and validates tokens against it.
+ */
+class HTMLPurifier_ChildDef
+{
+    /**
+     * Type of child definition, usually right-most part of class name lowercase.
+     * Used occasionally in terms of context.
+     * @public
+     */
+    var $type;
+    
+    /**
+     * Bool that indicates whether or not an empty array of children is okay
+     * 
+     * This is necessary for redundant checking when changes affecting
+     * a child node may cause a parent node to now be disallowed.
+     * 
+     * @public
+     */
+    var $allow_empty;
+    
+    /**
+     * Validates nodes according to definition and returns modification.
+     * 
+     * @public
+     * @param $tokens_of_children Array of HTMLPurifier_Token
+     * @param $config HTMLPurifier_Config object
+     * @param $context HTMLPurifier_Context object
+     * @return bool true to leave nodes as is
+     * @return bool false to remove parent node
+     * @return array of replacement child tokens
+     */
+    function validateChildren($tokens_of_children, $config, &$context) {
+        trigger_error('Call to abstract function', E_USER_ERROR);
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php
@ -0,0 +1,51 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that uses different definitions depending on context.
+ * 
+ * The del and ins tags are notable because they allow different types of
+ * elements depending on whether or not they're in a block or inline context.
+ * Chameleon allows this behavior to happen by using two different
+ * definitions depending on context.  While this somewhat generalized,
+ * it is specifically intended for those two tags.
+ */
+class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
+{
+    
+    /**
+     * Instance of the definition object to use when inline. Usually stricter.
+     * @public
+     */
+    var $inline;
+    
+    /**
+     * Instance of the definition object to use when block.
+     * @public
+     */
+    var $block;
+    
+    var $type = 'chameleon';
+    
+    /**
+     * @param $inline List of elements to allow when inline.
+     * @param $block List of elements to allow when block.
+     */
+    function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
+        $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
+        $this->block  = new HTMLPurifier_ChildDef_Optional($block);
+    }
+    
+    function validateChildren($tokens_of_children, $config, &$context) {
+        if ($context->get('IsInline') === false) {
+            return $this->block->validateChildren(
+                $tokens_of_children, $config, $context);
+        } else {
+            return $this->inline->validateChildren(
+                $tokens_of_children, $config, $context);
+        }
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php
@ -0,0 +1,75 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Custom validation class, accepts DTD child definitions
+ * 
+ * @warning Currently this class is an all or nothing proposition, that is,
+ *          it will only give a bool return value.
+ * @note This class is currently not used by any code, although it is unit
+ *       tested.
+ */
+class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
+{
+    var $type = 'custom';
+    var $allow_empty = false;
+    /**
+     * Allowed child pattern as defined by the DTD
+     */
+    var $dtd_regex;
+    /**
+     * PCRE regex derived from $dtd_regex
+     * @private
+     */
+    var $_pcre_regex;
+    /**
+     * @param $dtd_regex Allowed child pattern from the DTD
+     */
+    function HTMLPurifier_ChildDef_Custom($dtd_regex) {
+        $this->dtd_regex = $dtd_regex;
+        $this->_compileRegex();
+    }
+    /**
+     * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
+     */
+    function _compileRegex() {
+        $raw = str_replace(' ', '', $this->dtd_regex);
+        if ($raw{0} != '(') {
+            $raw = "($raw)";
+        }
+        $reg = str_replace(',', ',?', $raw);
+        $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
+        $this->_pcre_regex = $reg;
+    }
+    function validateChildren($tokens_of_children, $config, &$context) {
+        $list_of_children = '';
+        $nesting = 0; // depth into the nest
+        foreach ($tokens_of_children as $token) {
+            if (!empty($token->is_whitespace)) continue;
+            
+            $is_child = ($nesting == 0); // direct
+            
+            if ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            if ($is_child) {
+                $list_of_children .= $token->name . ',';
+            }
+        }
+        $list_of_children = rtrim($list_of_children, ',');
+        
+        $okay =
+            preg_match(
+                '/^'.$this->_pcre_regex.'$/',
+                $list_of_children
+            );
+        
+        return (bool) $okay;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php
@ -0,0 +1,22 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that disallows all elements.
+ * @warning validateChildren() in this class is actually never called, because
+ *          empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
+ *          before child definitions are parsed in earnest by
+ *          HTMLPurifier_Strategy_FixNesting.
+ */
+class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
+{
+    var $allow_empty = true;
+    var $type = 'empty';
+    function HTMLPurifier_ChildDef_Empty() {}
+    function validateChildren($tokens_of_children, $config, &$context) {
+        return array();
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
@ -0,0 +1,23 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef/Required.php';
+
+/**
+ * Definition that allows a set of elements, and allows no children.
+ * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
+ *       really, one shouldn't inherit from the other.  Only altered behavior
+ *       is to overload a returned false with an array.  Thus, it will never
+ *       return false.
+ */
+class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
+{
+    var $allow_empty = true;
+    var $type = 'optional';
+    function validateChildren($tokens_of_children, $config, &$context) {
+        $result = parent::validateChildren($tokens_of_children, $config, $context);
+        if ($result === false) return array();
+        return $result;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php
@ -0,0 +1,107 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition that allows a set of elements, but disallows empty children.
+ */
+class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
+{
+    /**
+     * Lookup table of allowed elements.
+     * @public
+     */
+    var $elements = array();
+    /**
+     * @param $elements List of allowed element names (lowercase).
+     */
+    function HTMLPurifier_ChildDef_Required($elements) {
+        if (is_string($elements)) {
+            $elements = str_replace(' ', '', $elements);
+            $elements = explode('|', $elements);
+        }
+        $keys = array_keys($elements);
+        if ($keys == array_keys($keys)) {
+            $elements = array_flip($elements);
+            foreach ($elements as $i => $x) {
+                $elements[$i] = true;
+                if (empty($i)) unset($elements[$i]);
+            }
+        }
+        $this->elements = $elements;
+        $this->gen = new HTMLPurifier_Generator();
+    }
+    var $allow_empty = false;
+    var $type = 'required';
+    function validateChildren($tokens_of_children, $config, &$context) {
+        // if there are no tokens, delete parent node
+        if (empty($tokens_of_children)) return false;
+        
+        // the new set of children
+        $result = array();
+        
+        // current depth into the nest
+        $nesting = 0;
+        
+        // whether or not we're deleting a node
+        $is_deleting = false;
+        
+        // whether or not parsed character data is allowed
+        // this controls whether or not we silently drop a tag
+        // or generate escaped HTML from it
+        $pcdata_allowed = isset($this->elements['#PCDATA']);
+        
+        // a little sanity check to make sure it's not ALL whitespace
+        $all_whitespace = true;
+        
+        // some configuration
+        $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
+        
+        foreach ($tokens_of_children as $token) {
+            if (!empty($token->is_whitespace)) {
+                $result[] = $token;
+                continue;
+            }
+            $all_whitespace = false; // phew, we're not talking about whitespace
+            
+            $is_child = ($nesting == 0);
+            
+            if ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            if ($is_child) {
+                $is_deleting = false;
+                if (!isset($this->elements[$token->name])) {
+                    $is_deleting = true;
+                    if ($pcdata_allowed && $token->type == 'text') {
+                        $result[] = $token;
+                    } elseif ($pcdata_allowed && $escape_invalid_children) {
+                        $result[] = new HTMLPurifier_Token_Text(
+                            $this->gen->generateFromToken($token, $config)
+                        );
+                    }
+                    continue;
+                }
+            }
+            if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
+                $result[] = $token;
+            } elseif ($pcdata_allowed && $escape_invalid_children) {
+                $result[] =
+                    new HTMLPurifier_Token_Text(
+                        $this->gen->generateFromToken( $token, $config )
+                    );
+            } else {
+                // drop silently
+            }
+        }
+        if (empty($result)) return false;
+        if ($all_whitespace) return false;
+        if ($tokens_of_children == $result) return true;
+        return $result;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php
@ -0,0 +1,76 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef/Required.php';
+
+/**
+ * Takes the contents of blockquote when in strict and reformats for validation.
+ */
+class   HTMLPurifier_ChildDef_StrictBlockquote
+extends HTMLPurifier_ChildDef_Required
+{
+    var $real_elements;
+    var $fake_elements;
+    var $allow_empty = true;
+    var $type = 'strictblockquote';
+    var $init = false;
+    function validateChildren($tokens_of_children, $config, &$context) {
+        
+        $def = $config->getHTMLDefinition();
+        if (!$this->init) {
+            // allow all inline elements
+            $this->real_elements = $this->elements;
+            $this->fake_elements = $def->info_content_sets['Flow'];
+            $this->fake_elements['#PCDATA'] = true;
+            $this->init = true;
+        }
+        
+        // trick the parent class into thinking it allows more
+        $this->elements = $this->fake_elements;
+        $result = parent::validateChildren($tokens_of_children, $config, $context);
+        $this->elements = $this->real_elements;
+        
+        if ($result === false) return array();
+        if ($result === true) $result = $tokens_of_children;
+        
+        $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
+        $block_wrap_end   = new HTMLPurifier_Token_End(  $def->info_block_wrapper);
+        $is_inline = false;
+        $depth = 0;
+        $ret = array();
+        
+        // assuming that there are no comment tokens
+        foreach ($result as $i => $token) {
+            $token = $result[$i];
+            // ifs are nested for readability
+            if (!$is_inline) {
+                if (!$depth) {
+                     if (
+                        $token->type == 'text' ||
+                        !isset($this->elements[$token->name])
+                     ) {
+                        $is_inline = true;
+                        $ret[] = $block_wrap_start;
+                     }
+                }
+            } else {
+                if (!$depth) {
+                    // starting tokens have been inline text / empty
+                    if ($token->type == 'start' || $token->type == 'empty') {
+                        if (isset($this->elements[$token->name])) {
+                            // ended
+                            $ret[] = $block_wrap_end;
+                            $is_inline = false;
+                        }
+                    }
+                }
+            }
+            $ret[] = $token;
+            if ($token->type == 'start') $depth++;
+            if ($token->type == 'end')   $depth--;
+        }
+        if ($is_inline) $ret[] = $block_wrap_end;
+        return $ret;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php
+++ b/lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php
@ -0,0 +1,142 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef.php';
+
+/**
+ * Definition for tables
+ */
+class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
+{
+    var $allow_empty = false;
+    var $type = 'table';
+    function HTMLPurifier_ChildDef_Table() {}
+    function validateChildren($tokens_of_children, $config, &$context) {
+        if (empty($tokens_of_children)) return false;
+        
+        // this ensures that the loop gets run one last time before closing
+        // up. It's a little bit of a hack, but it works! Just make sure you
+        // get rid of the token later.
+        $tokens_of_children[] = false;
+        
+        // only one of these elements is allowed in a table
+        $caption = false;
+        $thead   = false;
+        $tfoot   = false;
+        
+        // as many of these as you want
+        $cols    = array();
+        $content = array();
+        
+        $nesting = 0; // current depth so we can determine nodes
+        $is_collecting = false; // are we globbing together tokens to package
+                                // into one of the collectors?
+        $collection = array(); // collected nodes
+        $tag_index = 0; // the first node might be whitespace,
+                            // so this tells us where the start tag is
+        
+        foreach ($tokens_of_children as $token) {
+            $is_child = ($nesting == 0);
+            
+            if ($token === false) {
+                // terminating sequence started
+            } elseif ($token->type == 'start') {
+                $nesting++;
+            } elseif ($token->type == 'end') {
+                $nesting--;
+            }
+            
+            // handle node collection
+            if ($is_collecting) {
+                if ($is_child) {
+                    // okay, let's stash the tokens away
+                    // first token tells us the type of the collection
+                    switch ($collection[$tag_index]->name) {
+                        case 'tr':
+                        case 'tbody':
+                            $content[] = $collection;
+                            break;
+                        case 'caption':
+                            if ($caption !== false) break;
+                            $caption = $collection;
+                            break;
+                        case 'thead':
+                        case 'tfoot':
+                            // access the appropriate variable, $thead or $tfoot
+                            $var = $collection[$tag_index]->name;
+                            if ($$var === false) {
+                                $$var = $collection;
+                            } else {
+                                // transmutate the first and less entries into
+                                // tbody tags, and then put into content
+                                $collection[$tag_index]->name = 'tbody';
+                                $collection[count($collection)-1]->name = 'tbody';
+                                $content[] = $collection;
+                            }
+                            break;
+                         case 'colgroup':
+                            $cols[] = $collection;
+                            break;
+                    }
+                    $collection = array();
+                    $is_collecting = false;
+                    $tag_index = 0;
+                } else {
+                    // add the node to the collection
+                    $collection[] = $token;
+                }
+            }
+            
+            // terminate
+            if ($token === false) break;
+            
+            if ($is_child) {
+                // determine what we're dealing with
+                if ($token->name == 'col') {
+                    // the only empty tag in the possie, we can handle it
+                    // immediately
+                    $cols[] = array_merge($collection, array($token));
+                    $collection = array();
+                    $tag_index = 0;
+                    continue;
+                }
+                switch($token->name) {
+                    case 'caption':
+                    case 'colgroup':
+                    case 'thead':
+                    case 'tfoot':
+                    case 'tbody':
+                    case 'tr':
+                        $is_collecting = true;
+                        $collection[] = $token;
+                        continue;
+                    default:
+                        if ($token->type == 'text' && $token->is_whitespace) {
+                            $collection[] = $token;
+                            $tag_index++;
+                        }
+                        continue;
+                }
+            }
+        }
+        
+        if (empty($content)) return false;
+        
+        $ret = array();
+        if ($caption !== false) $ret = array_merge($ret, $caption);
+        if ($cols !== false)    foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
+        if ($thead !== false)   $ret = array_merge($ret, $thead);
+        if ($tfoot !== false)   $ret = array_merge($ret, $tfoot);
+        foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
+        if (!empty($collection) && $is_collecting == false){
+            // grab the trailing space
+            $ret = array_merge($ret, $collection);
+        }
+        
+        array_pop($tokens_of_children); // remove phantom token
+        
+        return ($ret === $tokens_of_children) ? true : $ret;
+        
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Config.php
+++ b/lib/htmlpurifier/HTMLPurifier/Config.php
@ -0,0 +1,222 @@
+<?php
+
+/**
+ * Configuration object that triggers customizable behavior.
+ *
+ * @warning This class is strongly defined: that means that the class
+ *          will fail if an undefined directive is retrieved or set.
+ * 
+ * @note Many classes that could (although many times don't) use the
+ *       configuration object make it a mandatory parameter.  This is
+ *       because a configuration object should always be forwarded,
+ *       otherwise, you run the risk of missing a parameter and then
+ *       being stumped when a configuration directive doesn't work.
+ */
+class HTMLPurifier_Config
+{
+    
+    /**
+     * Two-level associative array of configuration directives
+     */
+    var $conf;
+    
+    /**
+     * Reference HTMLPurifier_ConfigSchema for value checking
+     */
+    var $def;
+    
+    /**
+     * Cached instance of HTMLPurifier_HTMLDefinition
+     */
+    var $html_definition;
+    
+    /**
+     * Cached instance of HTMLPurifier_CSSDefinition
+     */
+    var $css_definition;
+    
+    /**
+     * @param $definition HTMLPurifier_ConfigSchema that defines what directives
+     *                    are allowed.
+     */
+    function HTMLPurifier_Config(&$definition) {
+        $this->conf = $definition->defaults; // set up, copy in defaults
+        $this->def  = $definition; // keep a copy around for checking
+    }
+    
+    /**
+     * Convenience constructor that creates a config object based on a mixed var
+     * @static
+     * @param mixed $config Variable that defines the state of the config
+     *                      object. Can be: a HTMLPurifier_Config() object,
+     *                      an array of directives based on loadArray(),
+     *                      or a string filename of an ini file.
+     * @return Configured HTMLPurifier_Config object
+     */
+    function create($config) {
+        if (is_a($config, 'HTMLPurifier_Config')) return $config;
+        $ret = HTMLPurifier_Config::createDefault();
+        if (is_string($config)) $ret->loadIni($config);
+        elseif (is_array($config)) $ret->loadArray($config);
+        return $ret;
+    }
+    
+    /**
+     * Convenience constructor that creates a default configuration object.
+     * @static
+     * @return Default HTMLPurifier_Config object.
+     */
+    function createDefault() {
+        $definition =& HTMLPurifier_ConfigSchema::instance();
+        $config = new HTMLPurifier_Config($definition);
+        return $config;
+    }
+    
+    /**
+     * Retreives a value from the configuration.
+     * @param $namespace String namespace
+     * @param $key String key
+     */
+    function get($namespace, $key, $from_alias = false) {
+        if (!isset($this->def->info[$namespace][$key])) {
+            trigger_error('Cannot retrieve value of undefined directive',
+                E_USER_WARNING);
+            return;
+        }
+        if ($this->def->info[$namespace][$key]->class == 'alias') {
+            trigger_error('Cannot get value from aliased directive, use real name',
+                E_USER_ERROR);
+            return;
+        }
+        return $this->conf[$namespace][$key];
+    }
+    
+    /**
+     * Retreives an array of directives to values from a given namespace
+     * @param $namespace String namespace
+     */
+    function getBatch($namespace) {
+        if (!isset($this->def->info[$namespace])) {
+            trigger_error('Cannot retrieve undefined namespace',
+                E_USER_WARNING);
+            return;
+        }
+        return $this->conf[$namespace];
+    }
+    
+    /**
+     * Sets a value to configuration.
+     * @param $namespace String namespace
+     * @param $key String key
+     * @param $value Mixed value
+     */
+    function set($namespace, $key, $value, $from_alias = false) {
+        if (!isset($this->def->info[$namespace][$key])) {
+            trigger_error('Cannot set undefined directive to value',
+                E_USER_WARNING);
+            return;
+        }
+        if ($this->def->info[$namespace][$key]->class == 'alias') {
+            if ($from_alias) {
+                trigger_error('Double-aliases not allowed, please fix '.
+                    'ConfigSchema bug');
+            }
+            $this->set($this->def->info[$namespace][$key]->namespace,
+                       $this->def->info[$namespace][$key]->name,
+                       $value, true);
+            return;
+        }
+        $value = $this->def->validate(
+                    $value,
+                    $this->def->info[$namespace][$key]->type,
+                    $this->def->info[$namespace][$key]->allow_null
+                 );
+        if (is_string($value)) {
+            // resolve value alias if defined
+            if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
+                $value = $this->def->info[$namespace][$key]->aliases[$value];
+            }
+            if ($this->def->info[$namespace][$key]->allowed !== true) {
+                // check to see if the value is allowed
+                if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
+                    trigger_error('Value not supported', E_USER_WARNING);
+                    return;
+                }
+            }
+        }
+        if ($this->def->isError($value)) {
+            trigger_error('Value is of invalid type', E_USER_WARNING);
+            return;
+        }
+        $this->conf[$namespace][$key] = $value;
+        if ($namespace == 'HTML' || $namespace == 'Attr') {
+            // reset HTML definition if relevant attributes changed
+            $this->html_definition = null;
+        }
+        if ($namespace == 'CSS') {
+            $this->css_definition = null;
+        }
+    }
+    
+    /**
+     * Retrieves reference to the HTML definition.
+     * @param $raw Return a copy that has not been setup yet. Must be
+     *             called before it's been setup, otherwise won't work.
+     */
+    function &getHTMLDefinition($raw = false) {
+        if (
+            empty($this->html_definition) || // hasn't ever been setup
+            ($raw && $this->html_definition->setup) // requesting new one
+        ) {
+            $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
+            if ($raw) return $this->html_definition; // no setup!
+        }
+        if (!$this->html_definition->setup) $this->html_definition->setup();
+        return $this->html_definition;
+    }
+    
+    /**
+     * Retrieves reference to the CSS definition
+     */
+    function &getCSSDefinition() {
+        if ($this->css_definition === null) {
+            $this->css_definition = new HTMLPurifier_CSSDefinition();
+            $this->css_definition->setup($this);
+        }
+        return $this->css_definition;
+    }
+    
+    /**
+     * Loads configuration values from an array with the following structure:
+     * Namespace.Directive => Value
+     * @param $config_array Configuration associative array
+     */
+    function loadArray($config_array) {
+        foreach ($config_array as $key => $value) {
+            $key = str_replace('_', '.', $key);
+            if (strpos($key, '.') !== false) {
+                // condensed form
+                list($namespace, $directive) = explode('.', $key);
+                $this->set($namespace, $directive, $value);
+            } else {
+                $namespace = $key;
+                $namespace_values = $value;
+                foreach ($namespace_values as $directive => $value) {
+                    $this->set($namespace, $directive, $value);
+                }
+            }
+        }
+    }
+    
+    /**
+     * Loads configuration values from an ini file
+     * @param $filename Name of ini file
+     */
+    function loadIni($filename) {
+        $array = parse_ini_file($filename, true);
+        $this->loadArray($array);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ConfigDef.php
+++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef.php
@ -0,0 +1,10 @@
+<?php
+
+/**
+ * Base class for configuration entity
+ */
+class HTMLPurifier_ConfigDef {
+    var $class = false;
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php
+++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php
@ -0,0 +1,74 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object containing definition of a directive.
+ * @note This structure does not contain default values
+ */
+class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
+{
+    
+    var $class = 'directive';
+    
+    function HTMLPurifier_ConfigDef_Directive(
+        $type = null,
+        $descriptions = null,
+        $allow_null = null,
+        $allowed = null,
+        $aliases = null
+    ) {
+        if (        $type !== null)         $this->type = $type;
+        if ($descriptions !== null) $this->descriptions = $descriptions;
+        if (  $allow_null !== null)   $this->allow_null = $allow_null;
+        if (     $allowed !== null)      $this->allowed = $allowed;
+        if (     $aliases !== null)      $this->aliases = $aliases;
+    }
+    
+    /**
+     * Allowed type of the directive. Values are:
+     *      - string
+     *      - istring (case insensitive string)
+     *      - int
+     *      - float
+     *      - bool
+     *      - lookup (array of value => true)
+     *      - list (regular numbered index array)
+     *      - hash (array of key => value)
+     *      - mixed (anything goes)
+     */
+    var $type = 'mixed';
+    
+    /**
+     * Plaintext descriptions of the configuration entity is. Organized by
+     * file and line number, so multiple descriptions are allowed.
+     */
+    var $descriptions = array();
+    
+    /**
+     * Is null allowed? Has no effect for mixed type.
+     * @bool
+     */
+    var $allow_null = false;
+    
+    /**
+     * Lookup table of allowed values of the element, bool true if all allowed.
+     */
+    var $allowed = true;
+    
+    /**
+     * Hash of value aliases, i.e. values that are equivalent.
+     */
+    var $aliases = array();
+    
+    /**
+     * Adds a description to the array
+     */
+    function addDescription($file, $line, $description) {
+        if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
+        $this->descriptions[$file][$line] = $description;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php
+++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php
@ -0,0 +1,27 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object describing a directive alias
+ */
+class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
+{
+    var $class = 'alias';
+    
+    /**
+     * Namespace being aliased to
+     */
+    var $namespace;
+    /**
+     * Directive being aliased to
+     */
+    var $name;
+    
+    function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
+        $this->namespace = $namespace;
+        $this->name = $name;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php
+++ b/lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php
@ -0,0 +1,23 @@
+<?php
+
+require_once 'HTMLPurifier/ConfigDef.php';
+
+/**
+ * Structure object describing of a namespace
+ */
+class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
+    
+    function HTMLPurifier_ConfigDef_Namespace($description = null) {
+        $this->description = $description;
+    }
+    
+    var $class = 'namespace';
+    
+    /**
+     * String description of what kinds of directives go in this namespace.
+     */
+    var $description;
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php
+++ b/lib/htmlpurifier/HTMLPurifier/ConfigSchema.php
@ -0,0 +1,386 @@
+<?php
+
+require_once 'HTMLPurifier/Error.php';
+require_once 'HTMLPurifier/ConfigDef.php';
+require_once 'HTMLPurifier/ConfigDef/Namespace.php';
+require_once 'HTMLPurifier/ConfigDef/Directive.php';
+require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
+
+/**
+ * Configuration definition, defines directives and their defaults.
+ * @todo The ability to define things multiple times is confusing and should
+ *       be factored out to its own function named registerDependency() or 
+ *       addNote(), where only the namespace.name and an extra descriptions
+ *       documenting the nature of the dependency are needed.  Since it's
+ *       possible that the dependency is registered before the configuration
+ *       is defined, deferring it to some sort of cache until it actually
+ *       gets defined would be wise, keeping it opaque until it does get
+ *       defined. We could add a finalize() method which would cause it to
+ *       error out if we get a dangling dependency.  It's difficult, however,
+ *       to know whether or not it's a dependency, or a codependency, that is
+ *       neither of them fully depends on it. Where does the configuration go
+ *       then?  This could be partially resolved by allowing blanket definitions
+ *       and then splitting them up into finer-grained versions, however, there
+ *       might be implementation difficulties in ini files regarding order of
+ *       execution.
+ */
+class HTMLPurifier_ConfigSchema {
+    
+    /**
+     * Defaults of the directives and namespaces.
+     * @note This shares the exact same structure as HTMLPurifier_Config::$conf
+     */
+    var $defaults = array();
+    
+    /**
+     * Definition of the directives.
+     */
+    var $info = array();
+    
+    /**
+     * Definition of namespaces.
+     */
+    var $info_namespace = array();
+    
+    /**
+     * Lookup table of allowed types.
+     */
+    var $types = array(
+        'string'    => 'String',
+        'istring'   => 'Case-insensitive string',
+        'int'       => 'Integer',
+        'float'     => 'Float',
+        'bool'      => 'Boolean',
+        'lookup'    => 'Lookup array',
+        'list'      => 'Array list',
+        'hash'      => 'Associative array',
+        'mixed'     => 'Mixed'
+    );
+    
+    /**
+     * Initializes the default namespaces.
+     */
+    function initialize() {
+        $this->defineNamespace('Core', 'Core features that are always available.');
+        $this->defineNamespace('Attr', 'Features regarding attribute validation.');
+        $this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
+        $this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
+        $this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
+        $this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
+    }
+    
+    /**
+     * Retrieves an instance of the application-wide configuration definition.
+     * @static
+     */
+    function &instance($prototype = null) {
+        static $instance;
+        if ($prototype !== null) {
+            $instance = $prototype;
+        } elseif ($instance === null || $prototype === true) {
+            $instance = new HTMLPurifier_ConfigSchema();
+            $instance->initialize();
+        }
+        return $instance;
+    }
+    
+    /**
+     * Defines a directive for configuration
+     * @static
+     * @warning Will fail of directive's namespace is defined
+     * @param $namespace Namespace the directive is in
+     * @param $name Key of directive
+     * @param $default Default value of directive
+     * @param $type Allowed type of the directive. See
+     *      HTMLPurifier_DirectiveDef::$type for allowed values
+     * @param $description Description of directive for documentation
+     */
+    function define(
+        $namespace, $name, $default, $type, 
+        $description
+    ) {
+        $def =& HTMLPurifier_ConfigSchema::instance();
+        if (!isset($def->info[$namespace])) {
+            trigger_error('Cannot define directive for undefined namespace',
+                E_USER_ERROR);
+            return;
+        }
+        if (!ctype_alnum($name)) {
+            trigger_error('Directive name must be alphanumeric',
+                E_USER_ERROR);
+            return;
+        }
+        if (empty($description)) {
+            trigger_error('Description must be non-empty',
+                E_USER_ERROR);
+            return;
+        }
+        if (isset($def->info[$namespace][$name])) {
+            if (
+                $def->info[$namespace][$name]->type !== $type ||
+                $def->defaults[$namespace][$name]   !== $default
+            ) {
+                trigger_error('Inconsistent default or type, cannot redefine');
+                return;
+            }
+        } else {
+            // process modifiers
+            $type_values = explode('/', $type, 2);
+            $type = $type_values[0];
+            $modifier = isset($type_values[1]) ? $type_values[1] : false;
+            $allow_null = ($modifier === 'null');
+            
+            if (!isset($def->types[$type])) {
+                trigger_error('Invalid type for configuration directive',
+                    E_USER_ERROR);
+                return;
+            }
+            $default = $def->validate($default, $type, $allow_null);
+            if ($def->isError($default)) {
+                trigger_error('Default value does not match directive type',
+                    E_USER_ERROR);
+                return;
+            }
+            $def->info[$namespace][$name] =
+                new HTMLPurifier_ConfigDef_Directive();
+            $def->info[$namespace][$name]->type = $type;
+            $def->info[$namespace][$name]->allow_null = $allow_null;
+            $def->defaults[$namespace][$name]   = $default;
+        }
+        $backtrace = debug_backtrace();
+        $file = $def->mungeFilename($backtrace[0]['file']);
+        $line = $backtrace[0]['line'];
+        $def->info[$namespace][$name]->addDescription($file,$line,$description);
+    }
+    
+    /**
+     * Defines a namespace for directives to be put into.
+     * @static
+     * @param $namespace Namespace's name
+     * @param $description Description of the namespace
+     */
+    function defineNamespace($namespace, $description) {
+        $def =& HTMLPurifier_ConfigSchema::instance();
+        if (isset($def->info[$namespace])) {
+            trigger_error('Cannot redefine namespace', E_USER_ERROR);
+            return;
+        }
+        if (!ctype_alnum($namespace)) {
+            trigger_error('Namespace name must be alphanumeric',
+                E_USER_ERROR);
+            return;
+        }
+        if (empty($description)) {
+            trigger_error('Description must be non-empty',
+                E_USER_ERROR);
+            return;
+        }
+        $def->info[$namespace] = array();
+        $def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
+        $def->info_namespace[$namespace]->description = $description;
+        $def->defaults[$namespace] = array();
+    }
+    
+    /**
+     * Defines a directive value alias.
+     * 
+     * Directive value aliases are convenient for developers because it lets
+     * them set a directive to several values and get the same result.
+     * @static
+     * @param $namespace Directive's namespace
+     * @param $name Name of Directive
+     * @param $alias Name of aliased value
+     * @param $real Value aliased value will be converted into
+     */
+    function defineValueAliases($namespace, $name, $aliases) {
+        $def =& HTMLPurifier_ConfigSchema::instance();
+        if (!isset($def->info[$namespace][$name])) {
+            trigger_error('Cannot set value alias for non-existant directive',
+                E_USER_ERROR);
+            return;
+        }
+        foreach ($aliases as $alias => $real) {
+            if (!$def->info[$namespace][$name] !== true &&
+                !isset($def->info[$namespace][$name]->allowed[$real])
+            ) {
+                trigger_error('Cannot define alias to value that is not allowed',
+                    E_USER_ERROR);
+                return;
+            }
+            if (isset($def->info[$namespace][$name]->allowed[$alias])) {
+                trigger_error('Cannot define alias over allowed value',
+                    E_USER_ERROR);
+                return;
+            }
+            $def->info[$namespace][$name]->aliases[$alias] = $real;
+        }
+    }
+    
+    /**
+     * Defines a set of allowed values for a directive.
+     * @static
+     * @param $namespace Namespace of directive
+     * @param $name Name of directive
+     * @param $allowed_values Arraylist of allowed values
+     */
+    function defineAllowedValues($namespace, $name, $allowed_values) {
+        $def =& HTMLPurifier_ConfigSchema::instance();
+        if (!isset($def->info[$namespace][$name])) {
+            trigger_error('Cannot define allowed values for undefined directive',
+                E_USER_ERROR);
+            return;
+        }
+        $directive =& $def->info[$namespace][$name];
+        $type = $directive->type;
+        if ($type != 'string' && $type != 'istring') {
+            trigger_error('Cannot define allowed values for directive whose type is not string',
+                E_USER_ERROR);
+            return;
+        }
+        if ($directive->allowed === true) {
+            $directive->allowed = array();
+        }
+        foreach ($allowed_values as $value) {
+            $directive->allowed[$value] = true;
+        }
+        if ($def->defaults[$namespace][$name] !== null &&
+            !isset($directive->allowed[$def->defaults[$namespace][$name]])) {
+            trigger_error('Default value must be in allowed range of variables',
+                E_USER_ERROR);
+            $directive->allowed = true; // undo undo!
+            return;
+        }
+    }
+    
+    /**
+     * Defines a directive alias for backwards compatibility
+     * @static
+     * @param $namespace
+     * @param $name Directive that will be aliased
+     * @param $new_namespace
+     * @param $new_name Directive that the alias will be to
+     */
+    function defineAlias($namespace, $name, $new_namespace, $new_name) {
+        $def =& HTMLPurifier_ConfigSchema::instance();
+        if (!isset($def->info[$namespace])) {
+            trigger_error('Cannot define directive alias in undefined namespace',
+                E_USER_ERROR);
+            return;
+        }
+        if (!ctype_alnum($name)) {
+            trigger_error('Directive name must be alphanumeric',
+                E_USER_ERROR);
+            return;
+        }
+        if (isset($def->info[$namespace][$name])) {
+            trigger_error('Cannot define alias over directive',
+                E_USER_ERROR);
+            return;
+        }
+        if (!isset($def->info[$new_namespace][$new_name])) {
+            trigger_error('Cannot define alias to undefined directive',
+                E_USER_ERROR);
+            return;
+        }
+        if ($def->info[$new_namespace][$new_name]->class == 'alias') {
+            trigger_error('Cannot define alias to alias',
+                E_USER_ERROR);
+            return;
+        }
+        $def->info[$namespace][$name] =
+            new HTMLPurifier_ConfigDef_DirectiveAlias(
+                $new_namespace, $new_name);
+    }
+    
+    /**
+     * Validate a variable according to type. Return null if invalid.
+     */
+    function validate($var, $type, $allow_null = false) {
+        if (!isset($this->types[$type])) {
+            trigger_error('Invalid type', E_USER_ERROR);
+            return;
+        }
+        if ($allow_null && $var === null) return null;
+        switch ($type) {
+            case 'mixed':
+                return $var;
+            case 'istring':
+            case 'string':
+                if (!is_string($var)) break;
+                if ($type === 'istring') $var = strtolower($var);
+                return $var;
+            case 'int':
+                if (is_string($var) && ctype_digit($var)) $var = (int) $var;
+                elseif (!is_int($var)) break;
+                return $var;
+            case 'float':
+                if (is_string($var) && is_numeric($var)) $var = (float) $var;
+                elseif (!is_float($var)) break;
+                return $var;
+            case 'bool':
+                if (is_int($var) && ($var === 0 || $var === 1)) {
+                    $var = (bool) $var;
+                } elseif (is_string($var)) {
+                    if ($var == 'on' || $var == 'true' || $var == '1') {
+                        $var = true;
+                    } elseif ($var == 'off' || $var == 'false' || $var == '0') {
+                        $var = false;
+                    } else {
+                        break;
+                    }
+                } elseif (!is_bool($var)) break;
+                return $var;
+            case 'list':
+            case 'hash':
+            case 'lookup':
+                if (is_string($var)) {
+                    // simplistic string to array method that only works
+                    // for simple lists of tag names or alphanumeric characters
+                    $var = explode(',',$var);
+                    // remove spaces
+                    foreach ($var as $i => $j) $var[$i] = trim($j);
+                }
+                if (!is_array($var)) break;
+                $keys = array_keys($var);
+                if ($keys === array_keys($keys)) {
+                    if ($type == 'list') return $var;
+                    elseif ($type == 'lookup') {
+                        $new = array();
+                        foreach ($var as $key) {
+                            $new[$key] = true;
+                        }
+                        return $new;
+                    } else break;
+                }
+                if ($type === 'lookup') {
+                    foreach ($var as $key => $value) {
+                        $var[$key] = true;
+                    }
+                }
+                return $var;
+        }
+        $error = new HTMLPurifier_Error();
+        return $error;
+    }
+    
+    /**
+     * Takes an absolute path and munges it into a more manageable relative path
+     */
+    function mungeFilename($filename) {
+        $offset = strrpos($filename, 'HTMLPurifier');
+        $filename = substr($filename, $offset);
+        $filename = str_replace('\\', '/', $filename);
+        return $filename;
+    }
+    
+    /**
+     * Checks if var is an HTMLPurifier_Error object
+     */
+    function isError($var) {
+        if (!is_object($var)) return false;
+        if (!is_a($var, 'HTMLPurifier_Error')) return false;
+        return true;
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ContentSets.php
+++ b/lib/htmlpurifier/HTMLPurifier/ContentSets.php
@ -0,0 +1,148 @@
+<?php
+
+// common defs that we'll support by default
+require_once 'HTMLPurifier/ChildDef.php';
+require_once 'HTMLPurifier/ChildDef/Empty.php';
+require_once 'HTMLPurifier/ChildDef/Required.php';
+require_once 'HTMLPurifier/ChildDef/Optional.php';
+
+class HTMLPurifier_ContentSets
+{
+    
+    /**
+     * List of content set strings (pipe seperators) indexed by name.
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * List of content set lookups (element => true) indexed by name.
+     * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
+     * @public
+     */
+    var $lookup = array();
+    
+    /**
+     * Synchronized list of defined content sets (keys of info)
+     */
+    var $keys = array();
+    /**
+     * Synchronized list of defined content values (values of info)
+     */
+    var $values = array();
+    
+    /**
+     * Merges in module's content sets, expands identifiers in the content
+     * sets and populates the keys, values and lookup member variables.
+     * @param $modules List of HTMLPurifier_HTMLModule
+     */
+    function HTMLPurifier_ContentSets($modules) {
+        if (!is_array($modules)) $modules = array($modules);
+        // populate content_sets based on module hints
+        // sorry, no way of overloading
+        foreach ($modules as $module_i => $module) {
+            foreach ($module->content_sets as $key => $value) {
+                if (isset($this->info[$key])) {
+                    // add it into the existing content set
+                    $this->info[$key] = $this->info[$key] . ' | ' . $value;
+                } else {
+                    $this->info[$key] = $value;
+                }
+            }
+        }
+        // perform content_set expansions
+        $this->keys = array_keys($this->info);
+        foreach ($this->info as $i => $set) {
+            // only performed once, so infinite recursion is not
+            // a problem
+            $this->info[$i] =
+                str_replace(
+                    $this->keys,
+                    // must be recalculated each time due to
+                    // changing substitutions
+                    array_values($this->info),
+                $set);
+        }
+        $this->values = array_values($this->info);
+        
+        // generate lookup tables
+        foreach ($this->info as $name => $set) {
+            $this->lookup[$name] = $this->convertToLookup($set);
+        }
+    }
+    
+    /**
+     * Accepts a definition; generates and assigns a ChildDef for it
+     * @param $def HTMLPurifier_ElementDef reference
+     * @param $module Module that defined the ElementDef
+     */
+    function generateChildDef(&$def, $module) {
+        if (!empty($def->child)) return; // already done!
+        $content_model = $def->content_model;
+        if (is_string($content_model)) {
+            $def->content_model = str_replace(
+                $this->keys, $this->values, $content_model);
+        }
+        $def->child = $this->getChildDef($def, $module);
+    }
+    
+    /**
+     * Instantiates a ChildDef based on content_model and content_model_type
+     * member variables in HTMLPurifier_ElementDef
+     * @note This will also defer to modules for custom HTMLPurifier_ChildDef
+     *       subclasses that need content set expansion
+     * @param $def HTMLPurifier_ElementDef to have ChildDef extracted
+     * @return HTMLPurifier_ChildDef corresponding to ElementDef
+     */
+    function getChildDef($def, $module) {
+        $value = $def->content_model;
+        if (is_object($value)) {
+            trigger_error(
+                'Literal object child definitions should be stored in '.
+                'ElementDef->child not ElementDef->content_model',
+                E_USER_NOTICE
+            );
+            return $value;
+        }
+        switch ($def->content_model_type) {
+            case 'required':
+                return new HTMLPurifier_ChildDef_Required($value);
+            case 'optional':
+                return new HTMLPurifier_ChildDef_Optional($value);
+            case 'empty':
+                return new HTMLPurifier_ChildDef_Empty();
+            case 'custom':
+                return new HTMLPurifier_ChildDef_Custom($value);
+        }
+        // defer to its module
+        $return = false;
+        if ($module->defines_child_def) { // save a func call
+            $return = $module->getChildDef($def);
+        }
+        if ($return !== false) return $return;
+        // error-out
+        trigger_error(
+            'Could not determine which ChildDef class to instantiate',
+            E_USER_ERROR
+        );
+        return false;
+    }
+    
+    /**
+     * Converts a string list of elements separated by pipes into
+     * a lookup array.
+     * @param $string List of elements
+     * @return Lookup array of elements
+     */
+    function convertToLookup($string) {
+        $array = explode('|', str_replace(' ', '', $string));
+        $ret = array();
+        foreach ($array as $i => $k) {
+            $ret[$k] = true;
+        }
+        return $ret;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Context.php
+++ b/lib/htmlpurifier/HTMLPurifier/Context.php
@ -0,0 +1,76 @@
+<?php
+
+/**
+ * Registry object that contains information about the current context.
+ */
+class HTMLPurifier_Context
+{
+    
+    /**
+     * Private array that stores the references.
+     * @private
+     */
+    var $_storage = array();
+    
+    /**
+     * Registers a variable into the context.
+     * @param $name String name
+     * @param $ref Variable to be registered
+     */
+    function register($name, &$ref) {
+        if (isset($this->_storage[$name])) {
+            trigger_error('Name collision, cannot re-register',
+                          E_USER_ERROR);
+            return;
+        }
+        $this->_storage[$name] =& $ref;
+    }
+    
+    /**
+     * Retrieves a variable reference from the context.
+     * @param $name String name
+     */
+    function &get($name) {
+        if (!isset($this->_storage[$name])) {
+            trigger_error('Attempted to retrieve non-existent variable',
+                          E_USER_ERROR);
+            $var = null; // so we can return by reference
+            return $var;
+        }
+        return $this->_storage[$name];
+    }
+    
+    /**
+     * Destorys a variable in the context.
+     * @param $name String name
+     */
+    function destroy($name) {
+        if (!isset($this->_storage[$name])) {
+            trigger_error('Attempted to destroy non-existent variable',
+                          E_USER_ERROR);
+            return;
+        }
+        unset($this->_storage[$name]);
+    }
+    
+    /**
+     * Checks whether or not the variable exists.
+     * @param $name String name
+     */
+    function exists($name) {
+        return isset($this->_storage[$name]);
+    }
+    
+    /**
+     * Loads a series of variables from an associative array
+     * @param $context_array Assoc array of variables to load
+     */
+    function loadArray(&$context_array) {
+        foreach ($context_array as $key => $discard) {
+            $this->register($key, $context_array[$key]);
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/ElementDef.php
+++ b/lib/htmlpurifier/HTMLPurifier/ElementDef.php
@ -0,0 +1,122 @@
+<?php
+
+/**
+ * Structure that stores an HTML element definition. Used by
+ * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
+ */
+class HTMLPurifier_ElementDef
+{
+    
+    /**
+     * Does the definition work by itself, or is it created solely
+     * for the purpose of merging into another definition?
+     */
+    var $standalone = true;
+    
+    /**
+     * Associative array of attribute name to HTMLPurifier_AttrDef
+     * @note Before being processed by HTMLPurifier_AttrCollections
+     *       when modules are finalized during
+     *       HTMLPurifier_HTMLDefinition->setup(), this array may also
+     *       contain an array at index 0 that indicates which attribute
+     *       collections to load into the full array. It may also
+     *       contain string indentifiers in lieu of HTMLPurifier_AttrDef,
+     *       see HTMLPurifier_AttrTypes on how they are expanded during
+     *       HTMLPurifier_HTMLDefinition->setup() processing.
+     * @public
+     */
+    var $attr = array();
+    
+    /**
+     * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
+     * @public
+     */
+    var $attr_transform_pre = array();
+    
+    /**
+     * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
+     * @public
+     */
+    var $attr_transform_post = array();
+    
+    
+    
+    /**
+     * HTMLPurifier_ChildDef of this tag.
+     * @public
+     */
+    var $child;
+    
+    /**
+     * Abstract string representation of internal ChildDef rules. See
+     * HTMLPurifier_ContentSets for how this is parsed and then transformed
+     * into an HTMLPurifier_ChildDef.
+     * @public
+     */
+    var $content_model;
+    
+    /**
+     * Value of $child->type, used to determine which ChildDef to use,
+     * used in combination with $content_model.
+     * @public
+     */
+    var $content_model_type;
+    
+    
+    
+    /**
+     * Lookup table of tags that close this tag. Used during parsing
+     * to make sure we don't attempt to nest unclosed tags.
+     * @public
+     */
+    var $auto_close = array();
+    
+    /**
+     * Does the element have a content model (#PCDATA | Inline)*? This
+     * is important for chameleon ins and del processing in 
+     * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
+     * have to worry about this one.
+     * @public
+     */
+    var $descendants_are_inline;
+    
+    /**
+     * Lookup table of tags excluded from all descendants of this tag.
+     * @public
+     */
+    var $excludes = array();
+    
+    /**
+     * Merges the values of another element definition into this one.
+     * Values from the new element def take precedence if a value is
+     * not mergeable.
+     */
+    function mergeIn($def) {
+        
+        // later keys takes precedence
+        foreach($def->attr as $k => $v) {
+            if ($k == 0) {
+                // merge in the includes
+                // sorry, no way to override an include
+                foreach ($v as $v2) {
+                    $def->attr[0][] = $v2;
+                }
+                continue;
+            }
+            $this->attr[$k] = $v;
+        }
+        foreach($def->attr_transform_pre    as $k => $v) $this->attr_transform_pre[$k]  = $v;
+        foreach($def->attr_transform_post   as $k => $v) $this->attr_transform_post[$k] = $v;
+        foreach($def->auto_close            as $k => $v) $this->auto_close[$k]          = $v;
+        foreach($def->excludes              as $k => $v) $this->excludes[$k]            = $v;
+        
+        if(!is_null($def->child)) $this->child = $def->child;
+        if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
+        if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
+        if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Encoder.php
+++ b/lib/htmlpurifier/HTMLPurifier/Encoder.php
@ -0,0 +1,403 @@
+<?php
+
+require_once 'HTMLPurifier/EntityLookup.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'Encoding', 'utf-8', 'istring', 
+    'If for some reason you are unable to convert all webpages to UTF-8, '. 
+    'you can use this directive as a stop-gap compatibility change to '. 
+    'let HTML Purifier deal with non UTF-8 input.  This technique has '. 
+    'notable deficiencies: absolutely no characters outside of the selected '. 
+    'character encoding will be preserved, not even the ones that have '. 
+    'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> '.
+    'that automatically resolves all entities), making it pretty useless '.
+    'for anything except the most I18N-blind applications, although '.
+    '%Core.EscapeNonASCIICharacters offers fixes this trouble with '.
+    'another tradeoff. This directive '.
+    'only accepts ISO-8859-1 if iconv is not enabled.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'EscapeNonASCIICharacters', false, 'bool',
+    'This directive overcomes a deficiency in %Core.Encoding by blindly '.
+    'converting all non-ASCII characters into decimal numeric entities before '.
+    'converting it to its native encoding. This means that even '.
+    'characters that can be expressed in the non-UTF-8 encoding will '.
+    'be entity-ized, which can be a real downer for encodings like Big5. '.
+    'It also assumes that the ASCII repetoire is available, although '.
+    'this is the case for almost all encodings. Anyway, use UTF-8! This '.
+    'directive has been available since 1.4.0.'
+);
+
+if ( !function_exists('iconv') ) {
+    // only encodings with native PHP support
+    HTMLPurifier_ConfigSchema::defineAllowedValues(
+        'Core', 'Encoding', array(
+            'utf-8',
+            'iso-8859-1'
+        )
+    );
+    HTMLPurifier_ConfigSchema::defineValueAliases(
+        'Core', 'Encoding', array(
+            'iso8859-1' => 'iso-8859-1'
+        )
+    );
+}
+
+HTMLPurifier_ConfigSchema::define(
+    'Test', 'ForceNoIconv', false, 'bool', 
+    'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
+    'exist and use only pure PHP implementations.'
+);
+
+/**
+ * A UTF-8 specific character encoder that handles cleaning and transforming.
+ * @note All functions in this class should be static.
+ */
+class HTMLPurifier_Encoder
+{
+    
+    /**
+     * Constructor throws fatal error if you attempt to instantiate class
+     */
+    function HTMLPurifier_Encoder() {
+        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
+    }
+    
+    /**
+     * Cleans a UTF-8 string for well-formedness and SGML validity
+     * 
+     * It will parse according to UTF-8 and return a valid UTF8 string, with
+     * non-SGML codepoints excluded.
+     * 
+     * @static
+     * @note Just for reference, the non-SGML code points are 0 to 31 and
+     *       127 to 159, inclusive.  However, we allow code points 9, 10
+     *       and 13, which are the tab, line feed and carriage return
+     *       respectively. 128 and above the code points map to multibyte
+     *       UTF-8 representations.
+     * 
+     * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
+     *       hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
+     *       LGPL license.  Notes on what changed are inside, but in general,
+     *       the original code transformed UTF-8 text into an array of integer
+     *       Unicode codepoints. Understandably, transforming that back to
+     *       a string would be somewhat expensive, so the function was modded to
+     *       directly operate on the string.  However, this discourages code
+     *       reuse, and the logic enumerated here would be useful for any
+     *       function that needs to be able to understand UTF-8 characters.
+     *       As of right now, only smart lossless character encoding converters
+     *       would need that, and I'm probably not going to implement them.
+     *       Once again, PHP 6 should solve all our problems.
+     */
+    function cleanUTF8($str, $force_php = false) {
+        
+        static $non_sgml_chars = array();
+        if (empty($non_sgml_chars)) {
+            for ($i = 0; $i <= 31; $i++) {
+                // non-SGML ASCII chars
+                // save \r, \t and \n
+                if ($i == 9 || $i == 13 || $i == 10) continue;
+                $non_sgml_chars[chr($i)] = '';
+            }
+            for ($i = 127; $i <= 159; $i++) {
+                $non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
+            }
+        }
+        
+        static $iconv = null;
+        if ($iconv === null) $iconv = function_exists('iconv');
+        
+        if ($iconv && !$force_php) {
+            // do the shortcut way
+            $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
+            return strtr($str, $non_sgml_chars);
+        }
+        
+        $mState = 0; // cached expected number of octets after the current octet
+                     // until the beginning of the next UTF8 character sequence
+        $mUcs4  = 0; // cached Unicode character
+        $mBytes = 1; // cached expected number of octets in the current sequence
+        
+        // original code involved an $out that was an array of Unicode
+        // codepoints.  Instead of having to convert back into UTF-8, we've
+        // decided to directly append valid UTF-8 characters onto a string
+        // $out once they're done.  $char accumulates raw bytes, while $mUcs4
+        // turns into the Unicode code point, so there's some redundancy.
+        
+        $out = '';
+        $char = '';
+        
+        $len = strlen($str);
+        for($i = 0; $i < $len; $i++) {
+            $in = ord($str{$i});
+            $char .= $str[$i]; // append byte to char
+            if (0 == $mState) {
+                // When mState is zero we expect either a US-ASCII character 
+                // or a multi-octet sequence.
+                if (0 == (0x80 & ($in))) {
+                    // US-ASCII, pass straight through.
+                    if (($in <= 31 || $in == 127) && 
+                        !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
+                    ) {
+                        // control characters, remove
+                    } else {
+                        $out .= $char;
+                    }
+                    // reset
+                    $char = '';
+                    $mBytes = 1;
+                } elseif (0xC0 == (0xE0 & ($in))) {
+                    // First octet of 2 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
+                    $mState = 1;
+                    $mBytes = 2;
+                } elseif (0xE0 == (0xF0 & ($in))) {
+                    // First octet of 3 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
+                    $mState = 2;
+                    $mBytes = 3;
+                } elseif (0xF0 == (0xF8 & ($in))) {
+                    // First octet of 4 octet sequence
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x07) << 18;
+                    $mState = 3;
+                    $mBytes = 4;
+                } elseif (0xF8 == (0xFC & ($in))) {
+                    // First octet of 5 octet sequence.
+                    // 
+                    // This is illegal because the encoded codepoint must be 
+                    // either:
+                    // (a) not the shortest form or
+                    // (b) outside the Unicode range of 0-0x10FFFF.
+                    // Rather than trying to resynchronize, we will carry on 
+                    // until the end of the sequence and let the later error
+                    // handling code catch it.
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 0x03) << 24;
+                    $mState = 4;
+                    $mBytes = 5;
+                } elseif (0xFC == (0xFE & ($in))) {
+                    // First octet of 6 octet sequence, see comments for 5
+                    // octet sequence.
+                    $mUcs4 = ($in);
+                    $mUcs4 = ($mUcs4 & 1) << 30;
+                    $mState = 5;
+                    $mBytes = 6;
+                } else {
+                    // Current octet is neither in the US-ASCII range nor a 
+                    // legal first octet of a multi-octet sequence.
+                    $mState = 0;
+                    $mUcs4  = 0;
+                    $mBytes = 1;
+                    $char = '';
+                }
+            } else {
+                // When mState is non-zero, we expect a continuation of the
+                // multi-octet sequence
+                if (0x80 == (0xC0 & ($in))) {
+                    // Legal continuation.
+                    $shift = ($mState - 1) * 6;
+                    $tmp = $in;
+                    $tmp = ($tmp & 0x0000003F) << $shift;
+                    $mUcs4 |= $tmp;
+                    
+                    if (0 == --$mState) {
+                        // End of the multi-octet sequence. mUcs4 now contains
+                        // the final Unicode codepoint to be output
+                        
+                        // Check for illegal sequences and codepoints.
+                        
+                        // From Unicode 3.1, non-shortest form is illegal
+                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
+                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
+                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
+                            (4 < $mBytes) ||
+                            // From Unicode 3.2, surrogate characters = illegal
+                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
+                            // Codepoints outside the Unicode range are illegal
+                            ($mUcs4 > 0x10FFFF)
+                        ) {
+                            
+                        } elseif (0xFEFF != $mUcs4 && // omit BOM
+                            !($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML
+                        ) {
+                            $out .= $char;
+                        }
+                        // initialize UTF8 cache (reset)
+                        $mState = 0;
+                        $mUcs4  = 0;
+                        $mBytes = 1;
+                        $char = '';
+                    }
+                } else {
+                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
+                    // Incomplete multi-octet sequence.
+                    // used to result in complete fail, but we'll reset
+                    $mState = 0;
+                    $mUcs4  = 0;
+                    $mBytes = 1;
+                    $char ='';
+                }
+            }
+        }
+        return $out;
+    }
+    
+    /**
+     * Translates a Unicode codepoint into its corresponding UTF-8 character.
+     * @static
+     * @note Based on Feyd's function at
+     *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
+     *       which is in public domain.
+     * @note While we're going to do code point parsing anyway, a good
+     *       optimization would be to refuse to translate code points that
+     *       are non-SGML characters.  However, this could lead to duplication.
+     * @note This is very similar to the unichr function in
+     *       maintenance/generate-entity-file.php (although this is superior,
+     *       due to its sanity checks).
+     */
+    
+    // +----------+----------+----------+----------+
+    // | 33222222 | 22221111 | 111111   |          |
+    // | 10987654 | 32109876 | 54321098 | 76543210 | bit
+    // +----------+----------+----------+----------+
+    // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
+    // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
+    // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
+    // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
+    // +----------+----------+----------+----------+
+    // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
+    // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
+    // +----------+----------+----------+----------+ 
+    
+    function unichr($code) {
+        if($code > 1114111 or $code < 0 or
+          ($code >= 55296 and $code <= 57343) ) {
+            // bits are set outside the "valid" range as defined
+            // by UNICODE 4.1.0 
+            return '';
+        }
+        
+        $x = $y = $z = $w = 0; 
+        if ($code < 128) {
+            // regular ASCII character
+            $x = $code;
+        } else {
+            // set up bits for UTF-8
+            $x = ($code & 63) | 128;
+            if ($code < 2048) {
+                $y = (($code & 2047) >> 6) | 192;
+            } else {
+                $y = (($code & 4032) >> 6) | 128;
+                if($code < 65536) {
+                    $z = (($code >> 12) & 15) | 224;
+                } else {
+                    $z = (($code >> 12) & 63) | 128;
+                    $w = (($code >> 18) & 7)  | 240;
+                }
+            } 
+        }
+        // set up the actual character
+        $ret = '';
+        if($w) $ret .= chr($w);
+        if($z) $ret .= chr($z);
+        if($y) $ret .= chr($y);
+        $ret .= chr($x); 
+        
+        return $ret;
+    }
+    
+    /**
+     * Converts a string to UTF-8 based on configuration.
+     * @static
+     */
+    function convertToUTF8($str, $config, &$context) {
+        static $iconv = null;
+        if ($iconv === null) $iconv = function_exists('iconv');
+        $encoding = $config->get('Core', 'Encoding');
+        if ($encoding === 'utf-8') return $str;
+        if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
+            return @iconv($encoding, 'utf-8//IGNORE', $str);
+        } elseif ($encoding === 'iso-8859-1') {
+            return @utf8_encode($str);
+        }
+        trigger_error('Encoding not supported', E_USER_ERROR);
+    }
+    
+    /**
+     * Converts a string from UTF-8 based on configuration.
+     * @static
+     * @note Currently, this is a lossy conversion, with unexpressable
+     *       characters being omitted.
+     */
+    function convertFromUTF8($str, $config, &$context) {
+        static $iconv = null;
+        if ($iconv === null) $iconv = function_exists('iconv');
+        $encoding = $config->get('Core', 'Encoding');
+        if ($encoding === 'utf-8') return $str;
+        if ($config->get('Core', 'EscapeNonASCIICharacters')) {
+            $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
+        }
+        if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
+            return @iconv('utf-8', $encoding . '//IGNORE', $str);
+        } elseif ($encoding === 'iso-8859-1') {
+            return @utf8_decode($str);
+        }
+        trigger_error('Encoding not supported', E_USER_ERROR);
+    }
+    
+    /**
+     * Lossless (character-wise) conversion of HTML to ASCII
+     * @static
+     * @param $str UTF-8 string to be converted to ASCII
+     * @returns ASCII encoded string with non-ASCII character entity-ized
+     * @warning Adapted from MediaWiki, claiming fair use: this is a common
+     *       algorithm. If you disagree with this license fudgery,
+     *       implement it yourself.
+     * @note Uses decimal numeric entities since they are best supported.
+     * @note This is a DUMB function: it has no concept of keeping
+     *       character entities that the projected character encoding
+     *       can allow. We could possibly implement a smart version
+     *       but that would require it to also know which Unicode
+     *       codepoints the charset supported (not an easy task).
+     * @note Sort of with cleanUTF8() but it assumes that $str is
+     *       well-formed UTF-8
+     */
+    function convertToASCIIDumbLossless($str) {
+        $bytesleft = 0;
+        $result = '';
+        $working = 0;
+        $len = strlen($str);
+        for( $i = 0; $i < $len; $i++ ) {
+            $bytevalue = ord( $str[$i] );
+            if( $bytevalue <= 0x7F ) { //0xxx xxxx
+                $result .= chr( $bytevalue );
+                $bytesleft = 0;
+            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
+                $working = $working << 6;
+                $working += ($bytevalue & 0x3F);
+                $bytesleft--;
+                if( $bytesleft <= 0 ) {
+                    $result .= "&#" . $working . ";";
+                }
+            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
+                $working = $bytevalue & 0x1F;
+                $bytesleft = 1;
+            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
+                $working = $bytevalue & 0x0F;
+                $bytesleft = 2;
+            } else { //1111 0xxx
+                $working = $bytevalue & 0x07;
+                $bytesleft = 3;
+            }
+        }
+        return $result;
+    }
+    
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/EntityLookup.php
+++ b/lib/htmlpurifier/HTMLPurifier/EntityLookup.php
@ -0,0 +1,46 @@
+<?php
+
+/**
+ * Object that provides entity lookup table from entity name to character
+ */
+class HTMLPurifier_EntityLookup {
+    
+    /**
+     * Assoc array of entity name to character represented.
+     * @public
+     */
+    var $table;
+    
+    /**
+     * Sets up the entity lookup table from the serialized file contents.
+     * @note The serialized contents are versioned, but were generated
+     *       using the maintenance script generate_entity_file.php
+     * @warning This is not in constructor to help enforce the Singleton
+     */
+    function setup($file = false) {
+        if (!$file) {
+            $file = dirname(__FILE__) . '/EntityLookup/entities.ser';
+        }
+        $this->table = unserialize(file_get_contents($file));
+    }
+    
+    /**
+     * Retrieves sole instance of the object.
+     * @static
+     * @param Optional prototype of custom lookup table to overload with.
+     */
+    function instance($prototype = false) {
+        // no references, since PHP doesn't copy unless modified
+        static $instance = null;
+        if ($prototype) {
+            $instance = $prototype;
+        } elseif (!$instance) {
+            $instance = new HTMLPurifier_EntityLookup();
+            $instance->setup();
+        }
+        return $instance;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser
+++ b/lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser
--- a/lib/htmlpurifier/HTMLPurifier/EntityParser.php
+++ b/lib/htmlpurifier/HTMLPurifier/EntityParser.php
@ -0,0 +1,158 @@
+<?php
+
+require_once 'HTMLPurifier/EntityLookup.php';
+require_once 'HTMLPurifier/Encoder.php';
+
+// if want to implement error collecting here, we'll need to use some sort
+// of global data (probably trigger_error) because it's impossible to pass
+// $config or $context to the callback functions.
+
+/**
+ * Handles referencing and derefencing character entities
+ */
+class HTMLPurifier_EntityParser
+{
+    
+    /**
+     * Reference to entity lookup table.
+     * @protected
+     */
+    var $_entity_lookup;
+    
+    /**
+     * Callback regex string for parsing entities.
+     * @protected
+     */                             
+    var $_substituteEntitiesRegex =
+'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z]+));?/';
+//     1. hex             2. dec      3. string
+    
+    
+    /**
+     * Decimal to parsed string conversion table for special entities.
+     * @protected
+     */
+    var $_special_dec2str =
+            array(
+                    34 => '"',
+                    38 => '&',
+                    39 => "'",
+                    60 => '<',
+                    62 => '>'
+            );
+    
+    /**
+     * Stripped entity names to decimal conversion table for special entities.
+     * @protected
+     */
+    var $_special_ent2dec =
+            array(
+                    'quot' => 34,
+                    'amp'  => 38,
+                    'lt'   => 60,
+                    'gt'   => 62
+            );
+    
+    /**
+     * Substitutes non-special entities with their parsed equivalents. Since
+     * running this whenever you have parsed character is t3h 5uck, we run
+     * it before everything else.
+     * 
+     * @protected
+     * @param $string String to have non-special entities parsed.
+     * @returns Parsed string.
+     */
+    function substituteNonSpecialEntities($string) {
+        // it will try to detect missing semicolons, but don't rely on it
+        return preg_replace_callback(
+            $this->_substituteEntitiesRegex,
+            array($this, 'nonSpecialEntityCallback'),
+            $string
+            );
+    }
+    
+    /**
+     * Callback function for substituteNonSpecialEntities() that does the work.
+     * 
+     * @warning Though this is public in order to let the callback happen,
+     *          calling it directly is not recommended.
+     * @param $matches  PCRE matches array, with 0 the entire match, and
+     *                  either index 1, 2 or 3 set with a hex value, dec value,
+     *                  or string (respectively).
+     * @returns Replacement string.
+     */
+    
+    function nonSpecialEntityCallback($matches) {
+        // replaces all but big five
+        $entity = $matches[0];
+        $is_num = (@$matches[0][1] === '#');
+        if ($is_num) {
+            $is_hex = (@$entity[2] === 'x');
+            $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
+            
+            // abort for special characters
+            if (isset($this->_special_dec2str[$code]))  return $entity;
+            
+            return HTMLPurifier_Encoder::unichr($code);
+        } else {
+            if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
+            if (!$this->_entity_lookup) {
+                require_once 'HTMLPurifier/EntityLookup.php';
+                $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
+            }
+            if (isset($this->_entity_lookup->table[$matches[3]])) {
+                return $this->_entity_lookup->table[$matches[3]];
+            } else {
+                return $entity;
+            }
+        }
+    }
+    
+    /**
+     * Substitutes only special entities with their parsed equivalents.
+     * 
+     * @notice We try to avoid calling this function because otherwise, it
+     * would have to be called a lot (for every parsed section).
+     * 
+     * @protected
+     * @param $string String to have non-special entities parsed.
+     * @returns Parsed string.
+     */
+    function substituteSpecialEntities($string) {
+        return preg_replace_callback(
+            $this->_substituteEntitiesRegex,
+            array($this, 'specialEntityCallback'),
+            $string);
+    }
+    
+    /**
+     * Callback function for substituteSpecialEntities() that does the work.
+     * 
+     * This callback has same syntax as nonSpecialEntityCallback().
+     * 
+     * @warning Though this is public in order to let the callback happen,
+     *          calling it directly is not recommended.
+     * @param $matches  PCRE-style matches array, with 0 the entire match, and
+     *                  either index 1, 2 or 3 set with a hex value, dec value,
+     *                  or string (respectively).
+     * @returns Replacement string.
+     */
+    function specialEntityCallback($matches) {
+        $entity = $matches[0];
+        $is_num = (@$matches[0][1] === '#');
+        if ($is_num) {
+            $is_hex = (@$entity[2] === 'x');
+            $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
+            return isset($this->_special_dec2str[$int]) ?
+                $this->_special_dec2str[$int] :
+                $entity;
+        } else {
+            return isset($this->_special_ent2dec[$matches[3]]) ?
+                $this->_special_ent2dec[$matches[3]] :
+                $entity;
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Error.php
+++ b/lib/htmlpurifier/HTMLPurifier/Error.php
@ -0,0 +1,8 @@
+<?php
+
+/**
+ * Return object from functions that signifies error when null doesn't cut it
+ */
+class HTMLPurifier_Error {}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Filter.php
+++ b/lib/htmlpurifier/HTMLPurifier/Filter.php
@ -0,0 +1,39 @@
+<?php
+
+/**
+ * Represents a pre or post processing filter on HTML Purifier's output
+ * 
+ * Sometimes, a little ad-hoc fixing of HTML has to be done before
+ * it gets sent through HTML Purifier: you can use filters to acheive
+ * this effect. For instance, YouTube videos can be preserved using
+ * this manner. You could have used a decorator for this task, but
+ * PHP's support for them is not terribly robust, so we're going
+ * to just loop through the filters.
+ * 
+ * Filters should be exited first in, last out. If there are three filters,
+ * named 1, 2 and 3, the order of execution should go 1->preFilter,
+ * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
+ * 1->postFilter.
+ */
+
+class HTMLPurifier_Filter
+{
+    
+    /**
+     * Name of the filter for identification purposes
+     */
+    var $name;
+    
+    /**
+     * Pre-processor function, handles HTML before HTML Purifier 
+     */
+    function preFilter($html, $config, &$context) {}
+    
+    /**
+     * Post-processor function, handles HTML after HTML Purifier
+     */
+    function postFilter($html, $config, &$context) {}
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php
+++ b/lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php
@ -0,0 +1,34 @@
+<?php
+
+require_once 'HTMLPurifier/Filter.php';
+
+class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
+{
+    
+    var $name = 'YouTube preservation';
+    
+    function preFilter($html, $config, &$context) {
+        $pre_regex = '#<object[^>]+>.+?'.
+            'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
+        $pre_replace = '<span class="youtube-embed">\1</span>';
+        return preg_replace($pre_regex, $pre_replace, $html);
+    }
+    
+    function postFilter($html, $config, &$context) {
+        $post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
+        $post_replace = '<object width="425" height="350" '.
+            'data="http://www.youtube.com/v/\1">'.
+            '<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
+            '<param name="wmode" value="transparent"></param>'.
+            '<!--[if IE]>'.
+            '<embed src="http://www.youtube.com/v/\1"'.
+            'type="application/x-shockwave-flash"'.
+            'wmode="transparent" width="425" height="350" />'.
+            '<![endif]-->'.
+            '</object>';
+        return preg_replace($post_regex, $post_replace, $html);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Generator.php
+++ b/lib/htmlpurifier/HTMLPurifier/Generator.php
@ -0,0 +1,158 @@
+<?php
+
+require_once 'HTMLPurifier/Lexer.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'CleanUTF8DuringGeneration', false, 'bool',
+    'When true, HTMLPurifier_Generator will also check all strings it '.
+    'escapes for UTF-8 well-formedness as a defense in depth measure. '.
+    'This could cause a considerable performance impact, and is not '.
+    'strictly necessary due to the fact that the Lexers should have '.
+    'ensured that all the UTF-8 strings were well-formed.  Note that '.
+    'the configuration value is only read at the beginning of '.
+    'generateFromTokens.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'XHTML', true, 'bool',
+    'Determines whether or not output is XHTML or not.  When disabled, HTML '.
+    'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
+    'such as boolean attribute expansion and trailing slashes in empty tags. '.
+    'This directive was available since 1.1.'
+);
+
+// extension constraints could be factored into ConfigSchema
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'TidyFormat', false, 'bool',
+    '<p>Determines whether or not to run Tidy on the final output for pretty '.
+    'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
+    'improve readability for editors who are hand-editing the HTML, but is '.
+    'by no means necessary as HTML Purifier has already fixed all major '.
+    'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
+    'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
+    'the overall look of your page\'s source better, I recommend running Tidy '.
+    'on the entire page rather than just user-content (after all, the '.
+    'indentation relative to the containing blocks will be incorrect).</p><p>This '.
+    'directive was available since 1.1.1.</p>'
+);
+
+/**
+ * Generates HTML from tokens.
+ */
+class HTMLPurifier_Generator
+{
+    
+    /**
+     * Bool cache of %Core.CleanUTF8DuringGeneration
+     * @private
+     */
+    var $_clean_utf8 = false;
+    
+    /**
+     * Bool cache of %Core.XHTML
+     * @private
+     */
+    var $_xhtml = true;
+    
+    /**
+     * Generates HTML from an array of tokens.
+     * @param $tokens Array of HTMLPurifier_Token
+     * @param $config HTMLPurifier_Config object
+     * @return Generated HTML
+     */
+    function generateFromTokens($tokens, $config, &$context) {
+        $html = '';
+        if (!$config) $config = HTMLPurifier_Config::createDefault();
+        $this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
+        $this->_xhtml = $config->get('Core', 'XHTML');
+        if (!$tokens) return '';
+        foreach ($tokens as $token) {
+            $html .= $this->generateFromToken($token);
+        }
+        if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
+            
+            $tidy_options = array(
+               'indent'=> true,
+               'output-xhtml' => $this->_xhtml,
+               'show-body-only' => true,
+               'indent-spaces' => 2,
+               'wrap' => 68,
+            );
+            if (version_compare(PHP_VERSION, '5', '<')) {
+                tidy_set_encoding('utf8');
+                foreach ($tidy_options as $key => $value) {
+                    tidy_setopt($key, $value);
+                }
+                tidy_parse_string($html);
+                tidy_clean_repair();
+                $html = tidy_get_output();
+            } else {
+                $tidy = new Tidy;
+                $tidy->parseString($html, $tidy_options, 'utf8');
+                $tidy->cleanRepair();
+                $html = (string) $tidy;
+            }
+        }
+        return $html;
+    }
+    
+    /**
+     * Generates HTML from a single token.
+     * @param $token HTMLPurifier_Token object.
+     * @return Generated HTML
+     */
+    function generateFromToken($token) {
+        if (!isset($token->type)) return '';
+        if ($token->type == 'start') {
+            $attr = $this->generateAttributes($token->attr);
+            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
+            
+        } elseif ($token->type == 'end') {
+            return '</' . $token->name . '>';
+            
+        } elseif ($token->type == 'empty') {
+            $attr = $this->generateAttributes($token->attr);
+             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
+                ( $this->_xhtml ? ' /': '' )
+                . '>';
+            
+        } elseif ($token->type == 'text') {
+            return $this->escape($token->data);
+            
+        } else {
+            return '';
+            
+        }
+    }
+    
+    /**
+     * Generates attribute declarations from attribute array.
+     * @param $assoc_array_of_attributes Attribute array
+     * @return Generate HTML fragment for insertion.
+     */
+    function generateAttributes($assoc_array_of_attributes) {
+        $html = '';
+        foreach ($assoc_array_of_attributes as $key => $value) {
+            if (!$this->_xhtml) {
+                // remove namespaced attributes
+                if (strpos($key, ':') !== false) continue;
+                // also needed: check for attribute minimization
+            }
+            $html .= $key.'="'.$this->escape($value).'" ';
+        }
+        return rtrim($html);
+    }
+    
+    /**
+     * Escapes raw text data.
+     * @param $string String data to escape for HTML.
+     * @return String escaped data.
+     */
+    function escape($string) {
+        if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string);
+        return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
@ -0,0 +1,281 @@
+<?php
+
+// components
+require_once 'HTMLPurifier/HTMLModuleManager.php';
+
+// this definition and its modules MUST NOT define configuration directives
+// outside of the HTML or Attr namespaces
+
+// will be superceded by more accurate doctype declaration schemes
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Strict', false, 'bool',
+    'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'BlockWrapper', 'p', 'string',
+    'String name of element to wrap inline elements that are inside a block '.
+    'context.  This only occurs in the children of blockquote in strict mode. '.
+    'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
+    'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
+    '<code>&lt;p&gt;</code> tags can be replaced '.
+    'with whatever you desire, as long as it is a block level element. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Parent', 'div', 'string',
+    'String name of element that HTML fragment passed to library will be '.
+    'inserted in.  An interesting variation would be using span as the '.
+    'parent element, meaning that only inline tags would be allowed. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'AllowedElements', null, 'lookup/null',
+    'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+    'can overload it with your own list of tags to allow.  Note that this '.
+    'method is subtractive: it does its job by taking away from HTML Purifier '.
+    'usual feature set, so you cannot add a tag that HTML Purifier never '.
+    'supported in the first place (like embed, form or head).  If you change this, you '.
+    'probably also want to change %HTML.AllowedAttributes. '.
+    '<strong>Warning:</strong> If another directive conflicts with the '.
+    'elements here, <em>that</em> directive will win and override. '.
+    'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'AllowedAttributes', null, 'lookup/null',
+    'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+    'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+    '(style, id, class, dir, lang, xml:lang).'.
+    '<strong>Warning:</strong> If another directive conflicts with the '.
+    'elements here, <em>that</em> directive will win and override. For '.
+    'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+    'directive.  You must set that directive to true before you can use '.
+    'IDs at all. This directive has been available since 1.3.0.'
+);
+
+/**
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ * 
+ * Conventions:
+ * 
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ * 
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ * 
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
+ */
+class HTMLPurifier_HTMLDefinition
+{
+    
+    /** FULLY-PUBLIC VARIABLES */
+    
+    /**
+     * Associative array of element names to HTMLPurifier_ElementDef
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Associative array of global attribute name to attribute definition.
+     * @public
+     */
+    var $info_global_attr = array();
+    
+    /**
+     * String name of parent element HTML will be going into.
+     * @public
+     */
+    var $info_parent = 'div';
+    
+    /**
+     * Definition for parent element, allows parent element to be a
+     * tag that's not allowed inside the HTML fragment.
+     * @public
+     */
+    var $info_parent_def;
+    
+    /**
+     * String name of element used to wrap inline elements in block context
+     * @note This is rarely used except for BLOCKQUOTEs in strict mode
+     * @public
+     */
+    var $info_block_wrapper = 'p';
+    
+    /**
+     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+     * @public
+     */
+    var $info_tag_transform = array();
+    
+    /**
+     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
+     * @public
+     */
+    var $info_attr_transform_pre = array();
+    
+    /**
+     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
+     * @public
+     */
+    var $info_attr_transform_post = array();
+    
+    /**
+     * Nested lookup array of content set name (Block, Inline) to
+     * element name to whether or not it belongs in that content set.
+     * @public
+     */
+    var $info_content_sets = array();
+    
+    
+    
+    /** PUBLIC BUT INTERNAL VARIABLES */
+    
+    var $setup = false; /**< Has setup() been called yet? */
+    var $config; /**< Temporary instance of HTMLPurifier_Config */
+    
+    var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+    
+    /**
+     * Performs low-cost, preliminary initialization.
+     * @param $config Instance of HTMLPurifier_Config
+     */
+    function HTMLPurifier_HTMLDefinition(&$config) {
+        $this->config =& $config;
+        $this->manager = new HTMLPurifier_HTMLModuleManager();
+    }
+    
+    /**
+     * Processes internals into form usable by HTMLPurifier internals. 
+     * Modifying the definition after calling this function should not
+     * be done.
+     */
+    function setup() {
+        
+        // multiple call guard
+        if ($this->setup) {return;} else {$this->setup = true;}
+        
+        $this->processModules();
+        $this->setupConfigStuff();
+        
+        unset($this->config);
+        unset($this->manager);
+        
+    }
+    
+    /**
+     * Extract out the information from the manager
+     */
+    function processModules() {
+        
+        $this->manager->setup($this->config);
+        
+        foreach ($this->manager->activeModules as $module) {
+            foreach($module->info_tag_transform         as $k => $v) $this->info_tag_transform[$k]      = $v;
+            foreach($module->info_attr_transform_pre    as $k => $v) $this->info_attr_transform_pre[$k] = $v;
+            foreach($module->info_attr_transform_post   as $k => $v) $this->info_attr_transform_post[$k]= $v;
+        }
+        
+        $this->info = $this->manager->getElements($this->config);
+        $this->info_content_sets = $this->manager->contentSets->lookup;
+        
+    }
+    
+    /**
+     * Sets up stuff based on config. We need a better way of doing this.
+     */
+    function setupConfigStuff() {
+        
+        $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
+        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+            $this->info_block_wrapper = $block_wrapper;
+        } else {
+            trigger_error('Cannot use non-block element as block wrapper.',
+                E_USER_ERROR);
+        }
+        
+        $parent = $this->config->get('HTML', 'Parent');
+        $def = $this->manager->getElement($parent, $this->config);
+        if ($def) {
+            $this->info_parent = $parent;
+            $this->info_parent_def = $def;
+        } else {
+            trigger_error('Cannot use unrecognized element as parent.',
+                E_USER_ERROR);
+            $this->info_parent_def = $this->manager->getElement(
+                $this->info_parent, $this->config);
+        }
+        
+        // support template text
+        $support = "(for information on implementing this, see the ".
+                   "support forums) ";
+        
+        // setup allowed elements, SubtractiveWhitelist module
+        $allowed_elements = $this->config->get('HTML', 'AllowedElements');
+        if (is_array($allowed_elements)) {
+            foreach ($this->info as $name => $d) {
+                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+                unset($allowed_elements[$name]);
+            }
+            // emit errors
+            foreach ($allowed_elements as $element => $d) {
+                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
+            }
+        }
+        
+        $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
+        $allowed_attributes_mutable = $allowed_attributes; // by copy!
+        if (is_array($allowed_attributes)) {
+            foreach ($this->info_global_attr as $attr_key => $info) {
+                if (!isset($allowed_attributes["*.$attr_key"])) {
+                    unset($this->info_global_attr[$attr_key]);
+                } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
+                    unset($allowed_attributes_mutable["*.$attr_key"]);
+                }
+            }
+            foreach ($this->info as $tag => $info) {
+                foreach ($info->attr as $attr => $attr_info) {
+                    if (!isset($allowed_attributes["$tag.$attr"]) &&
+                        !isset($allowed_attributes["*.$attr"])) {
+                        unset($this->info[$tag]->attr[$attr]);
+                    } else {
+                        if (isset($allowed_attributes_mutable["$tag.$attr"])) {
+                            unset($allowed_attributes_mutable["$tag.$attr"]);
+                        } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
+                            unset($allowed_attributes_mutable["*.$attr"]);
+                        }
+                    }
+                }
+            }
+            // emit errors
+            foreach ($allowed_attributes_mutable as $elattr => $d) {
+                list($element, $attribute) = explode('.', $elattr);
+                if ($element == '*') {
+                    trigger_error("Global attribute '$attribute' is not ".
+                        "supported in any elements $support",
+                        E_USER_WARNING);
+                } else {
+                    trigger_error("Attribute '$attribute' in element '$element' not supported $support",
+                        E_USER_WARNING);
+                }
+            }
+        }
+        
+    }
+    
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule.php
@ -0,0 +1,125 @@
+<?php
+
+/**
+ * Represents an XHTML 1.1 module, with information on elements, tags
+ * and attributes.
+ * @note Even though this is technically XHTML 1.1, it is also used for
+ *       regular HTML parsing. We are using modulization as a convenient
+ *       way to represent the internals of HTMLDefinition, and our
+ *       implementation is by no means conforming and does not directly
+ *       use the normative DTDs or XML schemas.
+ * @note The public variables in a module should almost directly
+ *       correspond to the variables in HTMLPurifier_HTMLDefinition.
+ *       However, the prefix info carries no special meaning in these
+ *       objects (include it anyway if that's the correspondence though).
+ */
+
+class HTMLPurifier_HTMLModule
+{
+    /**
+     * Short unique string identifier of the module
+     */
+    var $name;
+    
+    /**
+     * Dynamically set integer that specifies when the module was loaded in.
+     */
+    var $order;
+    
+    /**
+     * Informally, a list of elements this module changes. Not used in
+     * any significant way.
+     * @protected
+     */
+    var $elements = array();
+    
+    /**
+     * Associative array of element names to element definitions.
+     * Some definitions may be incomplete, to be merged in later
+     * with the full definition.
+     * @public
+     */
+    var $info = array();
+    
+    /**
+     * Associative array of content set names to content set additions.
+     * This is commonly used to, say, add an A element to the Inline
+     * content set. This corresponds to an internal variable $content_sets
+     * and NOT info_content_sets member variable of HTMLDefinition.
+     * @public
+     */
+    var $content_sets = array();
+    
+    /**
+     * Associative array of attribute collection names to attribute
+     * collection additions. More rarely used for adding attributes to
+     * the global collections. Example is the StyleAttribute module adding
+     * the style attribute to the Core. Corresponds to HTMLDefinition's
+     * attr_collections->info, since the object's data is only info,
+     * with extra behavior associated with it.
+     * @public
+     */
+    var $attr_collections = array();
+    
+    /**
+     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+     * @public
+     */
+    var $info_tag_transform = array();
+    
+    /**
+     * List of HTMLPurifier_AttrTransform to be performed before validation.
+     * @public
+     */
+    var $info_attr_transform_pre = array();
+    
+    /**
+     * List of HTMLPurifier_AttrTransform to be performed after validation.
+     * @public
+     */
+    var $info_attr_transform_post = array();
+    
+    /**
+     * Boolean flag that indicates whether or not getChildDef is implemented.
+     * For optimization reasons: may save a call to a function. Be sure
+     * to set it if you do implement getChildDef(), otherwise it will have
+     * no effect!
+     * @public
+     */
+    var $defines_child_def = false;
+    
+    /**
+     * Retrieves a proper HTMLPurifier_ChildDef subclass based on 
+     * content_model and content_model_type member variables of
+     * the HTMLPurifier_ElementDef class. There is a similar function
+     * in HTMLPurifier_HTMLDefinition.
+     * @param $def HTMLPurifier_ElementDef instance
+     * @return HTMLPurifier_ChildDef subclass
+     * @public
+     */
+    function getChildDef($def) {return false;}
+    
+    /**
+     * Hook method that lets module perform arbitrary operations on
+     * HTMLPurifier_HTMLDefinition before the module gets processed.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function preProcess(&$definition) {}
+    
+    /**
+     * Hook method that lets module perform arbitrary operations
+     * on HTMLPurifier_HTMLDefinition after the module gets processed.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function postProcess(&$definition) {}
+    
+    /**
+     * Hook method that is called when a module gets registered to
+     * the definition.
+     * @param $definition Reference to HTMLDefinition being setup
+     */
+    function setup(&$definition) {}
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
@ -0,0 +1,43 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
+
+/**
+ * XHTML 1.1 Bi-directional Text Module, defines elements that
+ * declare directionality of content. Text Extension Module.
+ */
+class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Bdo';
+    var $elements = array('bdo');
+    var $info = array();
+    var $content_sets = array('Inline' => 'bdo');
+    var $attr_collections = array(
+        'I18N' => array('dir' => false)
+    );
+    
+    function HTMLPurifier_HTMLModule_Bdo() {
+        $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
+        $this->attr_collections['I18N']['dir'] = $dir;
+        $this->info['bdo'] = new HTMLPurifier_ElementDef();
+        $this->info['bdo']->attr = array(
+            0 => array('Core', 'Lang'),
+            'dir' => $dir, // required
+            // The Abstract Module specification has the attribute
+            // inclusions wrong for bdo: bdo allows
+            // xml:lang too (and we'll toss in lang for good measure,
+            // though it is not allowed for XHTML 1.1, this will
+            // be managed with a global attribute transform)
+        );
+        $this->info['bdo']->content_model = '#PCDATA | Inline';
+        $this->info['bdo']->content_model_type = 'optional';
+        // provides fallback behavior if dir's missing (dir is required)
+        $this->info['bdo']->attr_transform_post['required-dir'] =
+            new HTMLPurifier_AttrTransform_BdoDir();
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/CommonAttributes.php
@ -0,0 +1,31 @@
+<?php
+
+class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
+{
+    var $name = 'CommonAttributes';
+    
+    var $attr_collections = array(
+        'Core' => array(
+            0 => array('Style'),
+            // 'xml:space' => false,
+            'class' => 'NMTOKENS',
+            'id' => 'ID',
+            'title' => 'CDATA',
+        ),
+        'Lang' => array(
+            'xml:lang' => false, // see constructor
+        ),
+        'I18N' => array(
+            0 => array('Lang'), // proprietary, for xml:lang/lang
+        ),
+        'Common' => array(
+            0 => array('Core', 'I18N')
+        )
+    );
+    
+    function HTMLPurifier_HTMLModule_CommonAttributes() {
+        $this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
+    }
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ChildDef/Chameleon.php';
+
+/**
+ * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
+ * Module.
+ */
+class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Edit';
+    var $elements = array('del', 'ins');
+    var $info = array();
+    var $content_sets = array('Inline' => 'del | ins');
+    
+    function HTMLPurifier_HTMLModule_Edit() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(
+                0 => array('Common'),
+                'cite' => 'URI',
+                // 'datetime' => 'Datetime' // Datetime not implemented
+            );
+            // Inline context ! Block context (exclamation mark is
+            // separator, see getChildDef for parsing)
+            $this->info[$element]->content_model =
+                '#PCDATA | Inline ! #PCDATA | Flow';
+            // HTML 4.01 specifies that ins/del must not contain block
+            // elements when used in an inline context, chameleon is
+            // a complicated workaround to acheive this effect
+            $this->info[$element]->content_model_type = 'chameleon';
+        }
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'chameleon') return false;
+        $value = explode('!', $def->content_model);
+        return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
@ -0,0 +1,37 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
+
+/**
+ * XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
+ */
+class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Hypertext';
+    var $elements = array('a');
+    var $info = array();
+    var $content_sets = array('Inline' => 'a');
+    
+    function HTMLPurifier_HTMLModule_Hypertext() {
+        $this->info['a'] = new HTMLPurifier_ElementDef();
+        $this->info['a']->attr = array(
+            0 => array('Common'),
+            // 'accesskey' => 'Character',
+            // 'charset' => 'Charset',
+            'href' => 'URI',
+            //'hreflang' => 'LanguageCode',
+            'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
+            'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
+            //'tabindex' => 'Number',
+            //'type' => 'ContentType',
+        );
+        $this->info['a']->content_model = '#PCDATA | Inline';
+        $this->info['a']->content_model_type = 'optional';
+        $this->info['a']->excludes = array('a' => true);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
@ -0,0 +1,38 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+require_once 'HTMLPurifier/AttrDef/URI.php';
+require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
+
+/**
+ * XHTML 1.1 Image Module provides basic image embedding.
+ * @note There is specialized code for removing empty images in
+ *       HTMLPurifier_Strategy_RemoveForeignElements
+ */
+class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Image';
+    var $elements = array('img');
+    var $info = array();
+    var $content_sets = array('Inline' => 'img');
+    
+    function HTMLPurifier_HTMLModule_Image() {
+        $this->info['img'] = new HTMLPurifier_ElementDef();
+        $this->info['img']->attr = array(
+            0 => array('Common'),
+            'alt' => 'Text',
+            'height' => 'Length',
+            'longdesc' => 'URI', 
+            'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
+            'width' => 'Length'
+        );
+        $this->info['img']->content_model_type = 'empty';
+        $this->info['img']->attr_transform_post[] =
+            new HTMLPurifier_AttrTransform_ImgRequired();
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
@ -0,0 +1,60 @@
+<?php
+
+/**
+ * XHTML 1.1 Legacy module defines elements that were previously 
+ * deprecated.
+ * 
+ * @note Not all legacy elements have been implemented yet, which
+ *       is a bit of a reverse problem as compared to browsers! In
+ *       addition, this legacy module may implement a bit more than
+ *       mandated by XHTML 1.1.
+ * 
+ * This module can be used in combination with TransformToStrict in order
+ * to transform as many deprecated elements as possible, but retain
+ * questionably deprecated elements that do not have good alternatives
+ * as well as transform elements that don't have an implementation.
+ * See docs/ref-strictness.txt for more details.
+ */
+
+class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
+{
+    
+    // incomplete
+    
+    var $name = 'Legacy';
+    var $elements = array('u', 's', 'strike');
+    var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
+    
+    function HTMLPurifier_HTMLModule_Legacy() {
+        // setup new elements
+        foreach ($this->elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            // for u, s, strike, as more elements get added, add
+            // conditionals as necessary
+            $this->info[$name]->content_model = 'Inline | #PCDATA';
+            $this->info[$name]->content_model_type = 'optional';
+            $this->info[$name]->attr[0] = array('Common');
+        }
+        
+        // setup modifications to old elements
+        foreach ($this->non_standalone_elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            $this->info[$name]->standalone = false;
+        }
+        
+        $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+        $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+        
+        $this->info['address']->content_model = 'Inline | #PCDATA | p';
+        $this->info['address']->content_model_type = 'optional';
+        $this->info['address']->child = false;
+        
+        $this->info['blockquote']->content_model = 'Flow | #PCDATA';
+        $this->info['blockquote']->content_model_type = 'optional';
+        $this->info['blockquote']->child = false;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
@ -0,0 +1,46 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 List Module, defines list-oriented elements. Core Module.
+ */
+class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'List';
+    var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
+    var $info = array();
+    // According to the abstract schema, the List content set is a fully formed
+    // one or more expr, but it invariably occurs in an optional declaration
+    // so we're not going to do that subtlety. It might cause trouble
+    // if a user defines "List" and expects that multiple lists are
+    // allowed to be specified, but then again, that's not very intuitive.
+    // Furthermore, the actual XML Schema may disagree. Regardless,
+    // we don't have support for such nested expressions without using
+    // the incredibly inefficient and draconic Custom ChildDef.
+    var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
+    
+    function HTMLPurifier_HTMLModule_List() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(0 => array('Common'));
+            if ($element == 'li' || $element == 'dd') {
+                $this->info[$element]->content_model = '#PCDATA | Flow';
+                $this->info[$element]->content_model_type = 'optional';
+            } elseif ($element == 'ol' || $element == 'ul') {
+                $this->info[$element]->content_model = 'li';
+                $this->info[$element]->content_model_type = 'required';
+            }
+        }
+        $this->info['dt']->content_model = '#PCDATA | Inline';
+        $this->info['dt']->content_model_type = 'optional';
+        $this->info['dl']->content_model = 'dt | dd';
+        $this->info['dl']->content_model_type = 'required';
+        // this could be a LOT more robust
+        $this->info['li']->auto_close = array('li' => true);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
@ -0,0 +1,41 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Presentation Module, defines simple presentation-related
+ * markup. Text Extension Module.
+ * @note The official XML Schema and DTD specs further divide this into
+ *       two modules:
+ *          - Block Presentation (hr)
+ *          - Inline Presentation (b, big, i, small, sub, sup, tt)
+ *       We have chosen not to heed this distinction, as content_sets
+ *       provides satisfactory disambiguation.
+ */
+class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Presentation';
+    var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
+    var $info = array();
+    var $content_sets = array(
+        'Block' => 'hr',
+        'Inline' => 'b | big | i | small | sub | sup | tt'
+    );
+    
+    function HTMLPurifier_HTMLModule_Presentation() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            $this->info[$element]->attr = array(0 => array('Common'));
+            if ($element == 'hr') {
+                $this->info[$element]->content_model_type = 'empty';
+            } else {
+                $this->info[$element]->content_model = '#PCDATA | Inline';
+                $this->info[$element]->content_model_type = 'optional';
+            }
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
@ -0,0 +1,27 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/AttrDef/CSS.php';
+
+/**
+ * XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
+ * Module.
+ */
+class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'StyleAttribute';
+    var $attr_collections = array(
+        // The inclusion routine differs from the Abstract Modules but
+        // is in line with the DTD and XML Schemas.
+        'Style' => array('style' => false), // see constructor
+        'Core' => array(0 => array('Style'))
+    );
+    
+    function HTMLPurifier_HTMLModule_StyleAttribute() {
+        $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
@ -0,0 +1,88 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ChildDef/Table.php';
+
+/**
+ * XHTML 1.1 Tables Module, fully defines accessible table elements.
+ */
+class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Tables';
+    var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
+        'colgroup', 'tbody', 'thead', 'tfoot');
+    var $info = array();
+    var $content_sets = array('Block' => 'table');
+    
+    function HTMLPurifier_HTMLModule_Tables() {
+        foreach ($this->elements as $e) {
+            $this->info[$e] = new HTMLPurifier_ElementDef();
+            $this->info[$e]->attr = array(0 => array('Common'));
+            $attr =& $this->info[$e]->attr;
+            if ($e == 'caption') continue;
+            if ($e == 'table'){
+                $attr['border'] = 'Pixels';
+                $attr['cellpadding'] = 'Length';
+                $attr['cellspacing'] = 'Length';
+                $attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
+                    'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
+                    'vsides', 'box', 'border'
+                ), false);
+                $attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
+                    'none', 'groups', 'rows', 'cols', 'all'
+                ), false);
+                $attr['summary'] = 'Text';
+                $attr['width'] = 'Length';
+                continue;
+            }
+            if ($e == 'col' || $e == 'colgroup') {
+                $attr['span'] = 'Number';
+                $attr['width'] = 'MultiLength';
+            }
+            if ($e == 'td' || $e == 'th') {
+                $attr['abbr'] = 'Text';
+                $attr['colspan'] = 'Number';
+                $attr['rowspan'] = 'Number';
+            }
+            $attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
+                'left', 'center', 'right', 'justify', 'char'
+            ), false);
+            $attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
+                'top', 'middle', 'bottom', 'baseline'
+            ), false);
+            $attr['charoff'] = 'Length';
+        }
+        $this->info['caption']->content_model = '#PCDATA | Inline';
+        $this->info['caption']->content_model_type = 'optional';
+        
+        // Is done directly because it doesn't leverage substitution
+        // mechanisms. True model is:
+        // 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
+        $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
+        
+        $this->info['td']->content_model = 
+        $this->info['th']->content_model = '#PCDATA | Flow';
+        $this->info['td']->content_model_type = 
+        $this->info['th']->content_model_type = 'optional';
+        
+        $this->info['tr']->content_model = 'td | th';
+        $this->info['tr']->content_model_type = 'required';
+        
+        $this->info['col']->content_model_type = 'empty';
+        
+        $this->info['colgroup']->content_model = 'col';
+        $this->info['colgroup']->content_model_type = 'optional';
+        
+        $this->info['tbody']->content_model = 
+        $this->info['thead']->content_model = 
+        $this->info['tfoot']->content_model = 'tr';
+        $this->info['tbody']->content_model_type = 
+        $this->info['thead']->content_model_type = 
+        $this->info['tfoot']->content_model_type = 'required';
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
@ -0,0 +1,78 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+
+/**
+ * XHTML 1.1 Text Module, defines basic text containers. Core Module.
+ * @note In the normative XML Schema specification, this module
+ *       is further abstracted into the following modules:
+ *          - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
+ *          - Block Structural (div, p)
+ *          - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
+ *          - Inline Structural (br, span)
+ *       We have elected not to follow suite, but this may change.
+ */
+class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'Text';
+    
+    var $elements = array('abbr', 'acronym', 'address', 'blockquote',
+        'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
+        'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
+        'var', 'nolink', 'tex', 'algebra'); //moodle modification
+    
+    var $info = array();
+    
+    var $content_sets = array(
+        'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
+        'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification
+        'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
+        'Flow' => 'Heading | Block | Inline'
+    );
+    
+    function HTMLPurifier_HTMLModule_Text() {
+        foreach ($this->elements as $element) {
+            $this->info[$element] = new HTMLPurifier_ElementDef();
+            // attributes
+            if ($element == 'br') {
+                $this->info[$element]->attr = array(0 => array('Core'));
+            } elseif ($element == 'blockquote' || $element == 'q') {
+                $this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
+            } else {
+                $this->info[$element]->attr = array(0 => array('Common'));
+            }
+            // content models
+            if ($element == 'br') {
+                $this->info[$element]->content_model_type = 'empty';
+            } elseif ($element == 'blockquote') {
+                $this->info[$element]->content_model = 'Heading | Block | List';
+                $this->info[$element]->content_model_type = 'optional';
+            } elseif ($element == 'div') {
+                $this->info[$element]->content_model = '#PCDATA | Flow';
+                $this->info[$element]->content_model_type = 'optional';
+            } else {
+                $this->info[$element]->content_model = '#PCDATA | Inline';
+                $this->info[$element]->content_model_type = 'optional';
+            }
+        }
+        // SGML permits exclusions for all descendants, but this is
+        // not possible with DTDs or XML Schemas. W3C has elected to
+        // use complicated compositions of content_models to simulate
+        // exclusion for children, but we go the simpler, SGML-style
+        // route of flat-out exclusions. Note that the Abstract Module
+        // is blithely unaware of such distinctions.
+        $this->info['pre']->excludes = array_flip(array(
+            'img', 'big', 'small',
+            'object', 'applet', 'font', 'basefont' // generally not allowed
+        ));
+        $this->info['p']->auto_close = array_flip(array(
+            'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
+            'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification
+        ));
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
@ -0,0 +1,108 @@
+<?php
+
+require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
+
+require_once 'HTMLPurifier/TagTransform/Simple.php';
+require_once 'HTMLPurifier/TagTransform/Center.php';
+require_once 'HTMLPurifier/TagTransform/Font.php';
+
+require_once 'HTMLPurifier/AttrTransform/Lang.php';
+require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
+require_once 'HTMLPurifier/AttrTransform/BgColor.php';
+require_once 'HTMLPurifier/AttrTransform/Border.php';
+require_once 'HTMLPurifier/AttrTransform/Name.php';
+require_once 'HTMLPurifier/AttrTransform/Length.php';
+
+/**
+ * Proprietary module that transforms deprecated elements into Strict
+ * HTML (see HTML 4.01 and XHTML 1.0) when possible.
+ */
+
+class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'TransformToStrict';
+    
+    // we're actually modifying these elements, not defining them
+    var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p',
+        'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr');
+    
+    var $info_tag_transform = array(
+        // placeholders, see constructor for definitions
+        'font'  => false,
+        'menu'  => false,
+        'dir'   => false,
+        'center'=> false
+    );
+    
+    var $attr_collections = array(
+        'Lang' => array(
+            'lang' => false // placeholder
+        )
+    );
+    
+    var $info_attr_transform_post = array(
+        'lang' => false // placeholder
+    );
+    
+    function HTMLPurifier_HTMLModule_TransformToStrict() {
+        
+        // deprecated tag transforms
+        $this->info_tag_transform['font']   = new HTMLPurifier_TagTransform_Font();
+        $this->info_tag_transform['menu']   = new HTMLPurifier_TagTransform_Simple('ul');
+        $this->info_tag_transform['dir']    = new HTMLPurifier_TagTransform_Simple('ul');
+        $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+        
+        foreach ($this->elements as $name) {
+            $this->info[$name] = new HTMLPurifier_ElementDef();
+            $this->info[$name]->standalone = false;
+        }
+        
+        // deprecated attribute transforms
+        $this->info['h1']->attr_transform_pre['align'] =
+        $this->info['h2']->attr_transform_pre['align'] =
+        $this->info['h3']->attr_transform_pre['align'] =
+        $this->info['h4']->attr_transform_pre['align'] =
+        $this->info['h5']->attr_transform_pre['align'] =
+        $this->info['h6']->attr_transform_pre['align'] =
+        $this->info['p'] ->attr_transform_pre['align'] = 
+                    new HTMLPurifier_AttrTransform_TextAlign();
+        
+        // xml:lang <=> lang mirroring, implement in TransformToStrict,
+        // this is overridden in TransformToXHTML11
+        $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
+        $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
+        
+        // this should not be applied to XHTML 1.0 Transitional, ONLY
+        // XHTML 1.0 Strict. We may need three classes
+        $this->info['blockquote']->content_model_type = 'strictblockquote';
+        $this->info['blockquote']->child = false; // recalculate please!
+        
+        $this->info['table']->attr_transform_pre['bgcolor'] = 
+        $this->info['tr']->attr_transform_pre['bgcolor'] = 
+        $this->info['td']->attr_transform_pre['bgcolor'] = 
+        $this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor();
+        
+        $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
+        
+        $this->info['img']->attr_transform_pre['name'] = 
+        $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
+        
+        $this->info['td']->attr_transform_pre['width'] = 
+        $this->info['th']->attr_transform_pre['width'] = 
+        $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
+        
+        $this->info['td']->attr_transform_pre['height'] = 
+        $this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height');
+        
+    }
+    
+    var $defines_child_def = true;
+    function getChildDef($def) {
+        if ($def->content_model_type != 'strictblockquote') return false;
+        return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToXHTML11.php
@ -0,0 +1,30 @@
+<?php
+
+/**
+ * Proprietary module that transforms XHTML 1.0 deprecated aspects into
+ * XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
+ * HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
+ * (otherwise, elements that were deprecated from Transitional to Strict
+ * will not be transformed).
+ * 
+ * XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
+ * although they may not be as friendly to legacy browsers.
+ */
+
+class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
+{
+    
+    var $name = 'TransformToXHTML11';
+    var $attr_collections = array(
+        'Lang' => array(
+            'lang' => false // remove it
+        )
+    );
+    
+    var $info_attr_transform_post = array(
+        'lang' => false // remove it
+    );
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
+++ b/lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
@ -0,0 +1,558 @@
+<?php
+
+require_once 'HTMLPurifier/HTMLModule.php';
+require_once 'HTMLPurifier/ElementDef.php';
+
+require_once 'HTMLPurifier/ContentSets.php';
+require_once 'HTMLPurifier/AttrTypes.php';
+require_once 'HTMLPurifier/AttrCollections.php';
+
+require_once 'HTMLPurifier/AttrDef.php';
+require_once 'HTMLPurifier/AttrDef/Enum.php';
+
+// W3C modules
+require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
+require_once 'HTMLPurifier/HTMLModule/Text.php';
+require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
+require_once 'HTMLPurifier/HTMLModule/List.php';
+require_once 'HTMLPurifier/HTMLModule/Presentation.php';
+require_once 'HTMLPurifier/HTMLModule/Edit.php';
+require_once 'HTMLPurifier/HTMLModule/Bdo.php';
+require_once 'HTMLPurifier/HTMLModule/Tables.php';
+require_once 'HTMLPurifier/HTMLModule/Image.php';
+require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
+require_once 'HTMLPurifier/HTMLModule/Legacy.php';
+
+// proprietary modules
+require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
+require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'HTML', 'Doctype', null, 'string/null',
+    'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '.
+    'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
+    'Technically speaking this is not actually a doctype (as it does '.
+    'not identify a corresponding DTD), but we are using this name '.
+    'for sake of simplicity. This will override any older directives '.
+    'like %Core.XHTML or %HTML.Strict.'
+);
+
+class HTMLPurifier_HTMLModuleManager
+{
+    
+    /**
+     * Array of HTMLPurifier_Module instances, indexed by module's class name.
+     * All known modules, regardless of use, are in this array.
+     */
+    var $modules = array();
+    
+    /**
+     * String doctype we will validate against. See $validModules for use.
+     * 
+     * @note
+     * There is a special doctype '*' that acts both as the "default"
+     * doctype if a customized system only defines one doctype and
+     * also a catch-all doctype that gets merged into all the other
+     * module collections. When possible, use a private collection to
+     * share modules between doctypes: this special doctype is to
+     * make life more convenient for users.
+     */
+    var $doctype;
+    var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
+    
+    /**
+     * Associative array: $collections[$type][$doctype] = list of modules.
+     * This is used to logically separate types of functionality so that
+     * based on the doctype and other configuration settings they may
+     * be easily switched and on and off. Custom setups may not need
+     * to use this abstraction, opting to have only one big collection
+     * with one valid doctype.
+     */
+    var $collections = array();
+    
+    /**
+     * Modules that may be used in a valid doctype of this kind.
+     * Correctional and leniency modules should not be placed in this
+     * array unless the user said so: don't stuff every possible lenient
+     * module for this doctype in here.
+     */
+    var $validModules = array();
+    var $validCollections = array(); /**< Collections to merge into $validModules */
+    
+    /**
+     * Modules that we will allow in input, subset of $validModules. Single
+     * element definitions may result in us consulting validModules.
+     */
+    var $activeModules = array();
+    var $activeCollections = array(); /**< Collections to merge into $activeModules */
+    
+    var $counter = 0; /**< Designates next available integer order for modules. */
+    var $initialized = false; /**< Says whether initialize() was called */
+    
+    /**
+     * Specifies what doctype to siphon new modules from addModule() to,
+     * or false to disable the functionality. Must be used in conjunction
+     * with $autoCollection.
+     */
+    var $autoDoctype = false;
+    /**
+     * Specifies what collection to siphon new modules from addModule() to,
+     * or false to disable the functionality. Must be used in conjunction
+     * with $autoCollection.
+     */
+    var $autoCollection = false;
+    
+    /** Associative array of element name to defining modules (always array) */
+    var $elementLookup = array();
+    
+    /** List of prefixes we should use for resolving small names */
+    var $prefixes = array('HTMLPurifier_HTMLModule_');
+    
+    var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
+    var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
+    var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
+    
+    /**
+     * @param $blank If true, don't do any initializing
+     */
+    function HTMLPurifier_HTMLModuleManager($blank = false) {
+        
+        // the only editable internal object. The rest need to
+        // be manipulated through modules
+        $this->attrTypes = new HTMLPurifier_AttrTypes();
+        
+        if (!$blank) $this->initialize();
+        
+    }
+    
+    function initialize() {
+        $this->initialized = true;
+        
+        // load default modules to the recognized modules list (not active)
+        $modules = array(
+            // define
+            'CommonAttributes',
+            'Text', 'Hypertext', 'List', 'Presentation',
+            'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
+            // define-redefine
+            'Legacy',
+            // redefine
+            'TransformToStrict', 'TransformToXHTML11'
+        );
+        foreach ($modules as $module) {
+            $this->addModule($module);
+        }
+        
+        // Safe modules for supported doctypes. These are included
+        // in the valid and active module lists by default
+        $this->collections['Safe'] = array(
+            '_Common' => array( // leading _ indicates private
+                'CommonAttributes', 'Text', 'Hypertext', 'List',
+                'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+                'StyleAttribute'
+            ),
+            // HTML definitions, defer to XHTML definitions
+            'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+            'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+            // XHTML definitions
+            'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
+            'XHTML 1.0 Strict' => array(array('_Common')),
+            'XHTML 1.1' => array(array('_Common')),
+        );
+        
+        // Modules that specify elements that are unsafe from untrusted
+        // third-parties. These should be registered in $validModules but
+        // almost never $activeModules unless you really know what you're
+        // doing.
+        $this->collections['Unsafe'] = array();
+        
+        // Modules to import if lenient mode (attempt to convert everything
+        // to a valid representation) is on. These must not be in $validModules
+        // unless specified so.
+        $this->collections['Lenient'] = array(
+            'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
+            'XHTML 1.0 Strict' => array('TransformToStrict'),
+            'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
+        );
+        
+        // Modules to import if correctional mode (correct everything that
+        // is feasible to strict mode) is on. These must not be in $validModules
+        // unless specified so.
+        $this->collections['Correctional'] = array(
+            'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
+            'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
+        );
+        
+        // User-space modules, custom code or whatever
+        $this->collections['Extension'] = array();
+        
+        // setup active versus valid modules. ORDER IS IMPORTANT!
+        // definition modules
+        $this->makeCollectionActive('Safe');
+        $this->makeCollectionValid('Unsafe');
+        // redefinition modules
+        $this->makeCollectionActive('Lenient');
+        $this->makeCollectionActive('Correctional');
+        
+        $this->autoDoctype    = '*';
+        $this->autoCollection = 'Extension';
+        
+    }
+    
+    /**
+     * Adds a module to the recognized module list. This does not
+     * do anything else: the module must be added to a corresponding
+     * collection to be "activated".
+     * @param $module Mixed: string module name, with or without
+     *                HTMLPurifier_HTMLModule prefix, or instance of
+     *                subclass of HTMLPurifier_HTMLModule.
+     */
+    function addModule($module) {
+        if (is_string($module)) {
+            $original_module = $module;
+            if (!class_exists($module)) {
+                foreach ($this->prefixes as $prefix) {
+                    $module = $prefix . $original_module;
+                    if (class_exists($module)) break;
+                }
+            }
+            if (!class_exists($module)) {
+                trigger_error($original_module . ' module does not exist',
+                    E_USER_ERROR);
+                return;
+            }
+            $module = new $module();
+        }
+        $module->order = $this->counter++; // assign then increment
+        $this->modules[$module->name] = $module;
+        if ($this->autoDoctype !== false && $this->autoCollection !== false) {
+            $this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
+        }
+    }
+    
+    /**
+     * Makes a collection active, while also making it valid if not
+     * already done so. See $activeModules for the semantics of "active".
+     * @param $collection_name Name of collection to activate
+     */
+    function makeCollectionActive($collection_name) {
+        if (!in_array($collection_name, $this->validCollections)) {
+            $this->makeCollectionValid($collection_name);
+        }
+        $this->activeCollections[] = $collection_name;
+    }
+    
+    /**
+     * Makes a collection valid. See $validModules for the semantics of "valid"
+     */
+    function makeCollectionValid($collection_name) {
+        $this->validCollections[] = $collection_name;
+    }
+    
+    /**
+     * Adds a class prefix that addModule() will use to resolve a
+     * string name to a concrete class
+     */
+    function addPrefix($prefix) {
+        $this->prefixes[] = (string) $prefix;
+    }
+    
+    function setup($config) {
+        
+        // load up the autocollection
+        if ($this->autoCollection !== false) {
+            $this->makeCollectionActive($this->autoCollection);
+        }
+        
+        // retrieve the doctype
+        $this->doctype = $this->getDoctype($config);
+        if (isset($this->doctypeAliases[$this->doctype])) {
+            $this->doctype = $this->doctypeAliases[$this->doctype];
+        }
+        
+        // process module collections to module name => module instance form
+        foreach ($this->collections as $col_i => $x) {
+            $this->processCollections($this->collections[$col_i]);
+        }
+        
+        $this->validModules  = $this->assembleModules($this->validCollections);
+        $this->activeModules = $this->assembleModules($this->activeCollections);
+        
+        // setup lookup table based on all valid modules
+        foreach ($this->validModules as $module) {
+            foreach ($module->info as $name => $def) {
+                if (!isset($this->elementLookup[$name])) {
+                    $this->elementLookup[$name] = array();
+                }
+                $this->elementLookup[$name][] = $module->name;
+            }
+        }
+        
+        // note the different choice
+        $this->contentSets = new HTMLPurifier_ContentSets(
+            // content models that contain non-allowed elements are 
+            // harmless because RemoveForeignElements will ensure
+            // they never get in anyway, and there is usually no
+            // reason why you should want to restrict a content
+            // model beyond what is mandated by the doctype.
+            // Note, however, that this means redefinitions of
+            // content models can't be tossed in validModels willy-nilly:
+            // that stuff still is regulated by configuration.
+            $this->validModules
+        );
+        $this->attrCollections = new HTMLPurifier_AttrCollections(
+            $this->attrTypes,
+            // only explicitly allowed modules are allowed to affect
+            // the global attribute collections. This mean's there's
+            // a distinction between loading the Bdo module, and the
+            // bdo element: Bdo will enable the dir attribute on all
+            // elements, while bdo will only define the bdo element,
+            // which will not have an editable directionality. This might
+            // catch people who are loading only elements by surprise, so
+            // we should consider loading an entire module if all the
+            // elements it defines are requested by the user, especially
+            // if it affects the global attribute collections.
+            $this->activeModules
+        );
+        
+    }
+    
+    /**
+     * Takes a list of collections and merges together all the defined
+     * modules for the current doctype from those collections.
+     * @param $collections List of collection suffixes we should grab
+     *                     modules from (like 'Safe' or 'Lenient')
+     */
+    function assembleModules($collections) {
+        $modules = array();
+        $numOfCollectionsUsed = 0;
+        foreach ($collections as $name) {
+            $disable_global = false;
+            if (!isset($this->collections[$name])) {
+                trigger_error("$name collection is undefined", E_USER_ERROR);
+                continue;
+            }
+            $cols = $this->collections[$name];
+            if (isset($cols[$this->doctype])) {
+                if (isset($cols[$this->doctype]['*'])) {
+                    unset($cols[$this->doctype]['*']);
+                    $disable_global = true;
+                }
+                $modules += $cols[$this->doctype];
+                $numOfCollectionsUsed++;
+            }
+            // accept catch-all doctype
+            if (
+                $this->doctype !== '*' && 
+                isset($cols['*']) &&
+                !$disable_global
+            ) {
+                $modules += $cols['*'];
+            }
+        }
+        
+        if ($numOfCollectionsUsed < 1) {
+            // possible XSS injection if user-specified doctypes
+            // are allowed
+            trigger_error("Doctype {$this->doctype} does not exist, ".
+                "check for typos (if you desire a doctype that allows ".
+                "no elements, use an empty array collection)", E_USER_ERROR);
+        }
+        return $modules;
+    }
+    
+    /**
+     * Takes a collection and performs inclusions and substitutions for it.
+     * @param $cols Reference to collections class member variable
+     */
+    function processCollections(&$cols) {
+        
+        // $cols is the set of collections
+        // $col_i is the name (index) of a collection
+        // $col is a collection/list of modules
+        
+        // perform inclusions
+        foreach ($cols as $col_i => $col) {
+            $seen = array();
+            if (!empty($col[0]) && is_array($col[0])) {
+                $seen[$col_i] = true; // recursion reporting
+                $includes = $col[0];
+                unset($cols[$col_i][0]); // remove inclusions value, recursion guard
+            } else {
+                $includes = array();
+            }
+            if (empty($includes)) continue;
+            for ($i = 0; isset($includes[$i]); $i++) {
+                $inc = $includes[$i];
+                if (isset($seen[$inc])) {
+                    trigger_error(
+                        "Circular inclusion detected in $col_i collection",
+                        E_USER_ERROR
+                    );
+                    continue;
+                } else {
+                    $seen[$inc] = true;
+                }
+                if (!isset($cols[$inc])) {
+                    trigger_error(
+                        "Collection $col_i tried to include undefined ".
+                        "collection $inc", E_USER_ERROR);
+                    continue;
+                }
+                foreach ($cols[$inc] as $module) {
+                    if (is_array($module)) { // another inclusion!
+                        foreach ($module as $inc2) $includes[] = $inc2;
+                        continue;
+                    }
+                    $cols[$col_i][] = $module; // merge in the other modules
+                }
+            }
+        }
+        
+        // replace with real modules, invert module from list to
+        // assoc array of module name to module instance
+        foreach ($cols as $col_i => $col) {
+            $ignore_global = false;
+            $order = array();
+            foreach ($col as $module_i => $module) {
+                unset($cols[$col_i][$module_i]);
+                if (is_array($module)) {
+                    trigger_error("Illegal inclusion array at index".
+                        " $module_i found collection $col_i, inclusion".
+                        " arrays must be at start of collection (index 0)",
+                        E_USER_ERROR);
+                    continue;
+                }
+                if ($module_i === '*' && $module === false) {
+                    $ignore_global = true;
+                    continue;
+                }
+                if (!isset($this->modules[$module])) {
+                    trigger_error(
+                        "Collection $col_i references undefined ".
+                        "module $module",
+                        E_USER_ERROR
+                    );
+                    continue;
+                }
+                $module = $this->modules[$module];
+                $cols[$col_i][$module->name] = $module;
+                $order[$module->name] = $module->order;
+            }
+            array_multisort(
+                $order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
+            );
+            if ($ignore_global) $cols[$col_i]['*'] = false;
+        }
+        
+        // delete pseudo-collections
+        foreach ($cols as $col_i => $col) {
+            if ($col_i[0] == '_') unset($cols[$col_i]);
+        }
+        
+    }
+    
+    /**
+     * Retrieves the doctype from the configuration object
+     */
+    function getDoctype($config) {
+        $doctype = $config->get('HTML', 'Doctype');
+        if ($doctype !== null) {
+            return $doctype;
+        }
+        if (!$this->initialized) {
+            // don't do HTML-oriented backwards compatibility stuff
+            // use either the auto-doctype, or the catch-all doctype
+            return $this->autoDoctype ? $this->autoDoctype : '*';
+        }
+        // this is backwards-compatibility stuff
+        if ($config->get('Core', 'XHTML')) {
+            $doctype = 'XHTML 1.0';
+        } else {
+            $doctype = 'HTML 4.01';
+        }
+        if ($config->get('HTML', 'Strict')) {
+            $doctype .= ' Strict';
+        } else {
+            $doctype .= ' Transitional';
+        }
+        return $doctype;
+    }
+    
+    /**
+     * Retrieves merged element definitions for all active elements.
+     * @note We may want to generate an elements array during setup
+     *       and pass that on, because a specific combination of
+     *       elements may trigger the loading of a module.
+     * @param $config Instance of HTMLPurifier_Config, for determining
+     *                stray elements.
+     */
+    function getElements($config) {
+        
+        $elements = array();
+        foreach ($this->activeModules as $module) {
+            foreach ($module->elements as $name) {
+                $elements[$name] = $this->getElement($name, $config);
+            }
+        }
+        
+        // standalone elements now loaded
+        
+        return $elements;
+        
+    }
+    
+    /**
+     * Retrieves a single merged element definition
+     * @param $name Name of element
+     * @param $config Instance of HTMLPurifier_Config, may not be necessary.
+     */
+    function getElement($name, $config) {
+        
+        $def = false;
+        
+        $modules = $this->validModules;
+        
+        if (!isset($this->elementLookup[$name])) {
+            return false;
+        }
+        
+        foreach($this->elementLookup[$name] as $module_name) {
+            
+            $module = $modules[$module_name];
+            $new_def = $module->info[$name];
+            
+            if (!$def && $new_def->standalone) {
+                $def = $new_def;
+            } elseif ($def) {
+                $def->mergeIn($new_def);
+            } else {
+                // could "save it for another day":
+                // non-standalone definitions that don't have a standalone
+                // to merge into could be deferred to the end
+                continue;
+            }
+            
+            // attribute value expansions
+            $this->attrCollections->performInclusions($def->attr);
+            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
+            
+            // descendants_are_inline, for ChildDef_Chameleon
+            if (is_string($def->content_model) &&
+                strpos($def->content_model, 'Inline') !== false) {
+                if ($name != 'del' && $name != 'ins') {
+                    // this is for you, ins/del
+                    $def->descendants_are_inline = true;
+                }
+            }
+            
+            $this->contentSets->generateChildDef($def, $module);
+        }
+        
+        return $def;
+        
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
+++ b/lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
@ -0,0 +1,42 @@
+<?php
+
+/**
+ * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
+ * @note In Slashdot-speak, dupe means duplicate.
+ * @note This class does not accept $config or $context, thus, it is the
+ *       burden of the callee to register the appropriate errors or
+ *       configuration.
+ */
+class HTMLPurifier_IDAccumulator
+{
+    
+    /**
+     * Lookup table of IDs we've accumulated.
+     * @public
+     */
+    var $ids = array();
+    
+    /**
+     * Add an ID to the lookup table.
+     * @param $id ID to be added.
+     * @return Bool status, true if success, false if there's a dupe
+     */
+    function add($id) {
+        if (isset($this->ids[$id])) return false;
+        return $this->ids[$id] = true;
+    }
+    
+    /**
+     * Load a list of IDs into the lookup table
+     * @param $array_of_ids Array of IDs to load
+     * @note This function doesn't care about duplicates
+     */
+    function load($array_of_ids) {
+        foreach ($array_of_ids as $id) {
+            $this->ids[$id] = true;
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Language.php
+++ b/lib/htmlpurifier/HTMLPurifier/Language.php
@ -0,0 +1,56 @@
+<?php
+
+require_once 'HTMLPurifier/LanguageFactory.php';
+
+class HTMLPurifier_Language
+{
+    
+    /**
+     * ISO 639 language code of language. Prefers shortest possible version
+     */
+    var $code = 'en';
+    
+    /**
+     * Fallback language code
+     */
+    var $fallback = false;
+    
+    /**
+     * Array of localizable messages
+     */
+    var $messages = array();
+    
+    /**
+     * Has the language object been loaded yet?
+     * @private
+     */
+    var $_loaded = false;
+    
+    /**
+     * Loads language object with necessary info from factory cache
+     * @note This is a lazy loader
+     */
+    function load() {
+        if ($this->_loaded) return;
+        $factory = HTMLPurifier_LanguageFactory::instance();
+        $factory->loadLanguage($this->code);
+        foreach ($factory->keys as $key) {
+            $this->$key = $factory->cache[$this->code][$key];
+        }
+        $this->_loaded = true;
+    }
+    
+    /**
+     * Retrieves a localised message. Does not perform any operations.
+     * @param $key string identifier of message
+     * @return string localised message
+     */
+    function getMessage($key) {
+        if (!$this->_loaded) $this->load();
+        if (!isset($this->messages[$key])) return '';
+        return $this->messages[$key];
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
+++ b/lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
@ -0,0 +1,12 @@
+<?php
+
+// private class for unit testing
+
+class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language
+{
+    
+    
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php
+++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en-x-test.php
@ -0,0 +1,11 @@
+<?php
+
+// private language message file for unit testing purposes
+
+$fallback = 'en';
+
+$messages = array(
+    'htmlpurifier' => 'HTML Purifier X'
+);
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
+++ b/lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
@ -0,0 +1,12 @@
+<?php
+
+$fallback = false;
+
+$messages = array(
+
+'htmlpurifier' => 'HTML Purifier',
+'pizza' => 'Pizza', // for unit testing purposes
+
+);
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
+++ b/lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
@ -0,0 +1,196 @@
+<?php
+
+require_once 'HTMLPurifier/Language.php';
+require_once 'HTMLPurifier/AttrDef/Lang.php';
+
+/**
+ * Class responsible for generating HTMLPurifier_Language objects, managing
+ * caching and fallbacks.
+ * @note Thanks to MediaWiki for the general logic, although this version
+ *       has been entirely rewritten
+ */
+class HTMLPurifier_LanguageFactory
+{
+    
+    /**
+     * Cache of language code information used to load HTMLPurifier_Language objects
+     * Structure is: $factory->cache[$language_code][$key] = $value
+     * @value array map
+     */
+    var $cache;
+    
+    /**
+     * Valid keys in the HTMLPurifier_Language object. Designates which
+     * variables to slurp out of a message file.
+     * @value array list
+     */
+    var $keys = array('fallback', 'messages');
+    
+    /**
+     * Instance of HTMLPurifier_AttrDef_Lang to validate language codes
+     * @value object HTMLPurifier_AttrDef_Lang
+     */
+    var $validator;
+    
+    /**
+     * Cached copy of dirname(__FILE__), directory of current file without
+     * trailing slash
+     * @value string filename
+     */
+    var $dir;
+    
+    /**
+     * Keys whose contents are a hash map and can be merged
+     * @value array lookup
+     */
+    var $mergeable_keys_map = array('messages' => true);
+    
+    /**
+     * Keys whose contents are a list and can be merged
+     * @value array lookup
+     */
+    var $mergeable_keys_list = array();
+    
+    /**
+     * Retrieve sole instance of the factory.
+     * @static
+     * @param $prototype Optional prototype to overload sole instance with,
+     *                   or bool true to reset to default factory.
+     */
+    function &instance($prototype = null) {
+        static $instance = null;
+        if ($prototype !== null) {
+            $instance = $prototype;
+        } elseif ($instance === null || $prototype == true) {
+            $instance = new HTMLPurifier_LanguageFactory();
+            $instance->setup();
+        }
+        return $instance;
+    }
+    
+    /**
+     * Sets up the singleton, much like a constructor
+     * @note Prevents people from getting this outside of the singleton
+     */
+    function setup() {
+        $this->validator = new HTMLPurifier_AttrDef_Lang();
+        $this->dir = dirname(__FILE__);
+    }
+    
+    /**
+     * Creates a language object, handles class fallbacks
+     * @param $code string language code
+     */
+    function create($code) {
+        
+        $config = $context = false; // hope it doesn't use these!
+        $code = $this->validator->validate($code, $config, $context);
+        if ($code === false) $code = 'en'; // malformed code becomes English
+        
+        $pcode = str_replace('-', '_', $code); // make valid PHP classname
+        static $depth = 0; // recursion protection
+        
+        if ($code == 'en') {
+            $class = 'HTMLPurifier_Language';
+            $file  = $this->dir . '/Language.php';
+        } else {
+            $class = 'HTMLPurifier_Language_' . $pcode;
+            $file  = $this->dir . '/Language/classes/' . $code . '.php';
+            // PHP5/APC deps bug workaround can go here
+            // you can bypass the conditional include by loading the
+            // file yourself
+            if (file_exists($file) && !class_exists($class)) {
+				include_once $file;
+			}
+        }
+        
+        if (!class_exists($class)) {
+            // go fallback
+            $fallback = HTMLPurifier_Language::getFallbackFor($code);
+            $depth++;
+            $lang = Language::factory( $fallback );
+            $depth--;
+        } else {
+            $lang = new $class;
+        }
+        $lang->code = $code;
+        
+        return $lang;
+        
+    }
+    
+    /**
+     * Returns the fallback language for language
+     * @note Loads the original language into cache
+     * @param $code string language code
+     */
+    function getFallbackFor($code) {
+        $this->loadLanguage($code);
+        return $this->cache[$code]['fallback'];
+    }
+    
+    /**
+     * Loads language into the cache, handles message file and fallbacks
+     * @param $code string language code
+     */
+    function loadLanguage($code) {
+        static $languages_seen = array(); // recursion guard
+        
+        // abort if we've already loaded it
+        if (isset($this->cache[$code])) return;
+        
+        // generate filename
+        $filename = $this->dir . '/Language/messages/' . $code . '.php';
+        
+        // default fallback : may be overwritten by the ensuing include
+        $fallback = ($code != 'en') ? 'en' : false;
+        
+        // load primary localisation
+        if (!file_exists($filename)) {
+            // skip the include: will rely solely on fallback
+            $filename = $this->dir . '/Language/messages/en.php';
+            $cache = array();
+        } else {
+            include $filename;
+            $cache = compact($this->keys);
+        }
+        
+        // load fallback localisation
+        if (!empty($fallback)) {
+            
+            // infinite recursion guard
+            if (isset($languages_seen[$code])) {
+                trigger_error('Circular fallback reference in language ' .
+                    $code, E_USER_ERROR);
+                $fallback = 'en';
+            }
+            $language_seen[$code] = true;
+            
+            // load the fallback recursively
+            $this->loadLanguage($fallback);
+            $fallback_cache = $this->cache[$fallback];
+            
+            // merge fallback with current language
+            foreach ( $this->keys as $key ) {
+				if (isset($cache[$key]) && isset($fallback_cache[$key])) {
+                    if (isset($this->mergeable_keys_map[$key])) {
+                        $cache[$key] = $cache[$key] + $fallback_cache[$key];
+                    } elseif (isset($this->mergeable_keys_list[$key])) {
+                        $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
+                    }
+				} else {
+					$cache[$key] = $fallback_cache[$key];
+				}
+            }
+            
+        }
+        
+        // save to cache for later retrieval
+        $this->cache[$code] = $cache;
+        
+        return;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Lexer.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer.php
@ -0,0 +1,237 @@
+<?php
+
+require_once 'HTMLPurifier/Token.php';
+require_once 'HTMLPurifier/Encoder.php';
+require_once 'HTMLPurifier/EntityParser.php';
+
+HTMLPurifier_ConfigSchema::define(
+    'Core', 'AcceptFullDocuments', true, 'bool',
+    'This parameter determines whether or not the filter should accept full '.
+    'HTML documents, not just HTML fragments.  When on, it will '.
+    'drop all sections except the content between body.'
+);
+
+/**
+ * Forgivingly lexes HTML (SGML-style) markup into tokens.
+ * 
+ * A lexer parses a string of SGML-style markup and converts them into
+ * corresponding tokens.  It doesn't check for well-formedness, although its
+ * internal mechanism may make this automatic (such as the case of
+ * HTMLPurifier_Lexer_DOMLex).  There are several implementations to choose
+ * from.
+ * 
+ * A lexer is HTML-oriented: it might work with XML, but it's not
+ * recommended, as we adhere to a subset of the specification for optimization
+ * reasons.
+ * 
+ * This class should not be directly instantiated, but you may use create() to
+ * retrieve a default copy of the lexer.  Being a supertype, this class
+ * does not actually define any implementation, but offers commonly used
+ * convenience functions for subclasses.
+ * 
+ * @note The unit tests will instantiate this class for testing purposes, as
+ *       many of the utility functions require a class to be instantiated.
+ *       Be careful when porting this class to PHP 5.
+ * 
+ * @par
+ * 
+ * @note
+ * We use tokens rather than create a DOM representation because DOM would:
+ * 
+ * @par
+ *  -# Require more processing power to create,
+ *  -# Require recursion to iterate,
+ *  -# Must be compatible with PHP 5's DOM (otherwise duplication),
+ *  -# Has the entire document structure (html and body not needed), and
+ *  -# Has unknown readability improvement.
+ * 
+ * @par
+ * What the last item means is that the functions for manipulating tokens are
+ * already fairly compact, and when well-commented, more abstraction may not
+ * be needed.
+ * 
+ * @see HTMLPurifier_Token
+ */
+class HTMLPurifier_Lexer
+{
+    
+    function HTMLPurifier_Lexer() {
+        $this->_entity_parser = new HTMLPurifier_EntityParser();
+    }
+    
+    
+    /**
+     * Most common entity to raw value conversion table for special entities.
+     * @protected
+     */
+    var $_special_entity2str =
+            array(
+                    '&quot;' => '"',
+                    '&amp;'  => '&',
+                    '&lt;'   => '<',
+                    '&gt;'   => '>',
+                    '&#39;'  => "'",
+                    '&#039;' => "'",
+                    '&#x27;' => "'"
+            );
+    
+    /**
+     * Parses special entities into the proper characters.
+     * 
+     * This string will translate escaped versions of the special characters
+     * into the correct ones.
+     * 
+     * @warning
+     * You should be able to treat the output of this function as
+     * completely parsed, but that's only because all other entities should
+     * have been handled previously in substituteNonSpecialEntities()
+     * 
+     * @param $string String character data to be parsed.
+     * @returns Parsed character data.
+     */
+    function parseData($string) {
+        
+        // following functions require at least one character
+        if ($string === '') return '';
+        
+        // subtracts amps that cannot possibly be escaped
+        $num_amp = substr_count($string, '&') - substr_count($string, '& ') -
+            ($string[strlen($string)-1] === '&' ? 1 : 0);
+        
+        if (!$num_amp) return $string; // abort if no entities
+        $num_esc_amp = substr_count($string, '&amp;');
+        $string = strtr($string, $this->_special_entity2str);
+        
+        // code duplication for sake of optimization, see above
+        $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
+            ($string[strlen($string)-1] === '&' ? 1 : 0);
+        
+        if ($num_amp_2 <= $num_esc_amp) return $string;
+        
+        // hmm... now we have some uncommon entities. Use the callback.
+        $string = $this->_entity_parser->substituteSpecialEntities($string);
+        return $string;
+    }
+    
+    /**
+     * Lexes an HTML string into tokens.
+     * 
+     * @param $string String HTML.
+     * @return HTMLPurifier_Token array representation of HTML.
+     */
+    function tokenizeHTML($string, $config, &$context) {
+        trigger_error('Call to abstract class', E_USER_ERROR);
+    }
+    
+    /**
+     * Retrieves or sets the default Lexer as a Prototype Factory.
+     * 
+     * Depending on what PHP version you are running, the abstract base
+     * Lexer class will determine which concrete Lexer is best for you:
+     * HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
+     * for PHP 5 and beyond.
+     * 
+     * Passing the optional prototype lexer parameter will override the
+     * default with your own implementation.  A copy/reference of the prototype
+     * lexer will now be returned when you request a new lexer.
+     * 
+     * @static
+     * 
+     * @note
+     * Though it is possible to call this factory method from subclasses,
+     * such usage is not recommended.
+     * 
+     * @param $prototype Optional prototype lexer.
+     * @return Concrete lexer.
+     */
+    function create($prototype = null) {
+        // we don't really care if it's a reference or a copy
+        static $lexer = null;
+        if ($prototype) {
+            $lexer = $prototype;
+        }
+        if (empty($lexer)) {
+            if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
+                class_exists('DOMDocument')) { // check for DOM support
+                require_once 'HTMLPurifier/Lexer/DOMLex.php';
+                $lexer = new HTMLPurifier_Lexer_DOMLex();
+            } else {
+                require_once 'HTMLPurifier/Lexer/DirectLex.php';
+                $lexer = new HTMLPurifier_Lexer_DirectLex();
+            }
+        }
+        return $lexer;
+    }
+    
+    /**
+     * Translates CDATA sections into regular sections (through escaping).
+     * 
+     * @static
+     * @protected
+     * @param $string HTML string to process.
+     * @returns HTML with CDATA sections escaped.
+     */
+    function escapeCDATA($string) {
+        return preg_replace_callback(
+            '/<!\[CDATA\[(.+?)\]\]>/',
+            array('HTMLPurifier_Lexer', 'CDATACallback'),
+            $string
+        );
+    }
+    
+    /**
+     * Callback function for escapeCDATA() that does the work.
+     * 
+     * @static
+     * @warning Though this is public in order to let the callback happen,
+     *          calling it directly is not recommended.
+     * @params $matches PCRE matches array, with index 0 the entire match
+     *                  and 1 the inside of the CDATA section.
+     * @returns Escaped internals of the CDATA section.
+     */
+    function CDATACallback($matches) {
+        // not exactly sure why the character set is needed, but whatever
+        return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
+    }
+    
+    /**
+     * Takes a piece of HTML and normalizes it by converting entities, fixing
+     * encoding, extracting bits, and other good stuff.
+     */
+    function normalize($html, $config, &$context) {
+        
+        // extract body from document if applicable
+        if ($config->get('Core', 'AcceptFullDocuments')) {
+            $html = $this->extractBody($html);
+        }
+        
+        // escape CDATA
+        $html = $this->escapeCDATA($html);
+        
+        // expand entities that aren't the big five
+        $html = $this->_entity_parser->substituteNonSpecialEntities($html);
+        
+        // clean into wellformed UTF-8 string for an SGML context: this has
+        // to be done after entity expansion because the entities sometimes
+        // represent non-SGML characters (horror, horror!)
+        $html = HTMLPurifier_Encoder::cleanUTF8($html);
+        
+        return $html;
+    }
+    
+    /**
+     * Takes a string of HTML (fragment or document) and returns the content
+     */
+    function extractBody($html) {
+        $matches = array();
+        $result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
+        if ($result) {
+            return $matches[1];
+        } else {
+            return $html;
+        }
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
@ -0,0 +1,152 @@
+<?php
+
+require_once 'HTMLPurifier/Lexer.php';
+require_once 'HTMLPurifier/TokenFactory.php';
+
+/**
+ * Parser that uses PHP 5's DOM extension (part of the core).
+ * 
+ * In PHP 5, the DOM XML extension was revamped into DOM and added to the core.
+ * It gives us a forgiving HTML parser, which we use to transform the HTML
+ * into a DOM, and then into the tokens.  It is blazingly fast (for large
+ * documents, it performs twenty times faster than
+ * HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5. 
+ * 
+ * @note Any empty elements will have empty tokens associated with them, even if
+ * this is prohibited by the spec. This is cannot be fixed until the spec
+ * comes into play.
+ * 
+ * @note PHP's DOM extension does not actually parse any entities, we use
+ *       our own function to do that.
+ * 
+ * @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
+ *          If this is a huge problem, due to the fact that HTML is hand
+ *          edited and you are unable to get a parser cache that caches the
+ *          the output of HTML Purifier while keeping the original HTML lying
+ *          around, you may want to run Tidy on the resulting output or use
+ *          HTMLPurifier_DirectLex
+ */
+
+class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
+{
+    
+    private $factory;
+    
+    public function __construct() {
+        // setup the factory
+        parent::HTMLPurifier_Lexer();
+        $this->factory = new HTMLPurifier_TokenFactory();
+    }
+    
+    public function tokenizeHTML($string, $config, &$context) {
+        
+        $string = $this->normalize($string, $config, $context);
+        
+        // preprocess string, essential for UTF-8
+        $string =
+            '<!DOCTYPE html '.
+                'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
+                '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
+            '<html><head>'.
+            '<meta http-equiv="Content-Type" content="text/html;'.
+                ' charset=utf-8" />'.
+            '</head><body><div>'.$string.'</div></body></html>';
+        
+        $doc = new DOMDocument();
+        $doc->encoding = 'UTF-8'; // technically does nothing, but whatever
+        
+        // DOM will toss errors if the HTML its parsing has really big
+        // problems, so we're going to mute them. This can cause problems
+        // if a custom error handler that doesn't implement error_reporting
+        // is set, as noted by a Drupal plugin of HTML Purifier. Consider
+        // making our own error reporter to temporarily load in
+        @$doc->loadHTML($string);
+        
+        $tokens = array();
+        $this->tokenizeDOM(
+            $doc->getElementsByTagName('html')->item(0)-> // html
+                  getElementsByTagName('body')->item(0)-> // body
+                  getElementsByTagName('div')->item(0) // div
+            , $tokens);
+        return $tokens;
+    }
+    
+    /**
+     * Recursive function that tokenizes a node, putting it into an accumulator.
+     * 
+     * @param $node     DOMNode to be tokenized.
+     * @param $tokens   Array-list of already tokenized tokens.
+     * @param $collect  Says whether or start and close are collected, set to
+     *                  false at first recursion because it's the implicit DIV
+     *                  tag you're dealing with.
+     * @returns Tokens of node appended to previously passed tokens.
+     */
+    protected function tokenizeDOM($node, &$tokens, $collect = false) {
+        // recursive goodness!
+        
+        // intercept non element nodes. WE MUST catch all of them,
+        // but we're not getting the character reference nodes because
+        // those should have been preprocessed
+        if ($node->nodeType === XML_TEXT_NODE ||
+                  $node->nodeType === XML_CDATA_SECTION_NODE) {
+            $tokens[] = $this->factory->createText($node->data);
+            return;
+        } elseif ($node->nodeType === XML_COMMENT_NODE) {
+            $tokens[] = $this->factory->createComment($node->data);
+            return;
+        } elseif (
+            // not-well tested: there may be other nodes we have to grab
+            $node->nodeType !== XML_ELEMENT_NODE
+        ) {
+            return;
+        }
+        
+        $attr = $node->hasAttributes() ?
+            $this->transformAttrToAssoc($node->attributes) :
+            array();
+        
+        // We still have to make sure that the element actually IS empty
+        if (!$node->childNodes->length) {
+            if ($collect) {
+                $tokens[] = $this->factory->createEmpty($node->tagName, $attr);
+            }
+        } else {
+            if ($collect) { // don't wrap on first iteration
+                $tokens[] = $this->factory->createStart(
+                    $tag_name = $node->tagName, // somehow, it get's dropped
+                    $attr
+                );
+            }
+            foreach ($node->childNodes as $node) {
+                // remember, it's an accumulator. Otherwise, we'd have
+                // to use array_merge
+                $this->tokenizeDOM($node, $tokens, true);
+            }
+            if ($collect) {
+                $tokens[] = $this->factory->createEnd($tag_name);
+            }
+        }
+        
+    }
+    
+    /**
+     * Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
+     * 
+     * @param $attribute_list DOMNamedNodeMap of DOMAttr objects.
+     * @returns Associative array of attributes.
+     */
+    protected function transformAttrToAssoc($node_map) {
+        // NamedNodeMap is documented very well, so we're using undocumented
+        // features, namely, the fact that it implements Iterator and
+        // has a ->length attribute
+        if ($node_map->length === 0) return array();
+        $array = array();
+        foreach ($node_map as $attr) {
+            $array[$attr->name] = $attr->value;
+        }
+        return $array;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
@ -0,0 +1,309 @@
+<?php
+
+require_once 'HTMLPurifier/Lexer.php';
+
+/**
+ * Our in-house implementation of a parser.
+ * 
+ * A pure PHP parser, DirectLex has absolutely no dependencies, making
+ * it a reasonably good default for PHP4.  Written with efficiency in mind,
+ * it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
+ * pales in comparison to HTMLPurifier_Lexer_DOMLex.  It will support UTF-8
+ * completely eventually.
+ * 
+ * @todo Reread XML spec and document differences.
+ * 
+ * @todo Determine correct behavior in transforming comment data. (preserve dashes?)
+ */
+class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
+{
+    
+    /**
+     * Whitespace characters for str(c)spn.
+     * @protected
+     */
+    var $_whitespace = "\x20\x09\x0D\x0A";
+    
+    function tokenizeHTML($html, $config, &$context) {
+        
+        $html = $this->normalize($html, $config, $context);
+        
+        $cursor = 0; // our location in the text
+        $inside_tag = false; // whether or not we're parsing the inside of a tag
+        $array = array(); // result array
+        
+        // infinite loop protection
+        // has to be pretty big, since html docs can be big
+        // we're allow two hundred thousand tags... more than enough?
+        $loops = 0;
+        
+        while(true) {
+            
+            // infinite loop protection
+            if (++$loops > 200000) return array();
+            
+            $position_next_lt = strpos($html, '<', $cursor);
+            $position_next_gt = strpos($html, '>', $cursor);
+            
+            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
+            if ($position_next_lt === $cursor) {
+                $inside_tag = true;
+                $cursor++;
+            }
+            
+            if (!$inside_tag && $position_next_lt !== false) {
+                // We are not inside tag and there still is another tag to parse
+                $array[] = new
+                    HTMLPurifier_Token_Text(
+                        $this->parseData(
+                            substr(
+                                $html, $cursor, $position_next_lt - $cursor
+                            )
+                        )
+                    );
+                $cursor  = $position_next_lt + 1;
+                $inside_tag = true;
+                continue;
+            } elseif (!$inside_tag) {
+                // We are not inside tag but there are no more tags
+                // If we're already at the end, break
+                if ($cursor === strlen($html)) break;
+                // Create Text of rest of string
+                $array[] = new
+                    HTMLPurifier_Token_Text(
+                        $this->parseData(
+                            substr(
+                                $html, $cursor
+                            )
+                        )
+                    );
+                break;
+            } elseif ($inside_tag && $position_next_gt !== false) {
+                // We are in tag and it is well formed
+                // Grab the internals of the tag
+                $strlen_segment = $position_next_gt - $cursor;
+                $segment = substr($html, $cursor, $strlen_segment);
+                
+                // Check if it's a comment
+                if (
+                    substr($segment, 0, 3) == '!--' &&
+                    substr($segment, $strlen_segment-2, 2) == '--'
+                ) {
+                    $array[] = new
+                        HTMLPurifier_Token_Comment(
+                            substr(
+                                $segment, 3, $strlen_segment - 5
+                            )
+                        );
+                    $inside_tag = false;
+                    $cursor = $position_next_gt + 1;
+                    continue;
+                }
+                
+                // Check if it's an end tag
+                $is_end_tag = (strpos($segment,'/') === 0);
+                if ($is_end_tag) {
+                    $type = substr($segment, 1);
+                    $array[] = new HTMLPurifier_Token_End($type);
+                    $inside_tag = false;
+                    $cursor = $position_next_gt + 1;
+                    continue;
+                }
+                
+                // Check if it is explicitly self closing, if so, remove
+                // trailing slash. Remember, we could have a tag like <br>, so
+                // any later token processing scripts must convert improperly
+                // classified EmptyTags from StartTags.
+                $is_self_closing= (strpos($segment,'/') === $strlen_segment-1);
+                if ($is_self_closing) {
+                    $strlen_segment--;
+                    $segment = substr($segment, 0, $strlen_segment);
+                }
+                
+                // Check if there are any attributes
+                $position_first_space = strcspn($segment, $this->_whitespace);
+                
+                if ($position_first_space >= $strlen_segment) {
+                    if ($is_self_closing) {
+                        $array[] = new HTMLPurifier_Token_Empty($segment);
+                    } else {
+                        $array[] = new HTMLPurifier_Token_Start($segment);
+                    }
+                    $inside_tag = false;
+                    $cursor = $position_next_gt + 1;
+                    continue;
+                }
+                
+                // Grab out all the data
+                $type = substr($segment, 0, $position_first_space);
+                $attribute_string =
+                    trim(
+                        substr(
+                            $segment, $position_first_space
+                        )
+                    );
+                if ($attribute_string) {
+                    $attr = $this->parseAttributeString(
+                                    $attribute_string
+                                  , $config, $context
+                              );
+                } else {
+                    $attr = array();
+                }
+                
+                if ($is_self_closing) {
+                    $array[] = new HTMLPurifier_Token_Empty($type, $attr);
+                } else {
+                    $array[] = new HTMLPurifier_Token_Start($type, $attr);
+                }
+                $cursor = $position_next_gt + 1;
+                $inside_tag = false;
+                continue;
+            } else {
+                $array[] = new
+                    HTMLPurifier_Token_Text(
+                        '<' .
+                        $this->parseData(
+                            substr($html, $cursor)
+                        )
+                    );
+                break;
+            }
+            break;
+        }
+        return $array;
+    }
+    
+    /**
+     * Takes the inside of an HTML tag and makes an assoc array of attributes.
+     * 
+     * @param $string Inside of tag excluding name.
+     * @returns Assoc array of attributes.
+     */
+    function parseAttributeString($string, $config, &$context) {
+        $string = (string) $string; // quick typecast
+        
+        if ($string == '') return array(); // no attributes
+        
+        // let's see if we can abort as quickly as possible
+        // one equal sign, no spaces => one attribute
+        $num_equal = substr_count($string, '=');
+        $has_space = strpos($string, ' ');
+        if ($num_equal === 0 && !$has_space) {
+            // bool attribute
+            return array($string => $string);
+        } elseif ($num_equal === 1 && !$has_space) {
+            // only one attribute
+            list($key, $quoted_value) = explode('=', $string);
+            $quoted_value = trim($quoted_value);
+            if (!$key) return array();
+            if (!$quoted_value) return array($key => '');
+            $first_char = @$quoted_value[0];
+            $last_char  = @$quoted_value[strlen($quoted_value)-1];
+            
+            $same_quote = ($first_char == $last_char);
+            $open_quote = ($first_char == '"' || $first_char == "'");
+            
+            if ( $same_quote && $open_quote) {
+                // well behaved
+                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
+            } else {
+                // not well behaved
+                if ($open_quote) {
+                    $value = substr($quoted_value, 1);
+                } else {
+                    $value = $quoted_value;
+                }
+            }
+            return array($key => $value);
+        }
+        
+        // setup loop environment
+        $array  = array(); // return assoc array of attributes
+        $cursor = 0; // current position in string (moves forward)
+        $size   = strlen($string); // size of the string (stays the same)
+        
+        // if we have unquoted attributes, the parser expects a terminating
+        // space, so let's guarantee that there's always a terminating space.
+        $string .= ' ';
+        
+        // infinite loop protection
+        $loops = 0;
+        
+        while(true) {
+            
+            // infinite loop protection
+            if (++$loops > 1000) return array();
+            
+            if ($cursor >= $size) {
+                break;
+            }
+            
+            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
+            
+            // grab the key
+            
+            $key_begin = $cursor; //we're currently at the start of the key
+            
+            // scroll past all characters that are the key (not whitespace or =)
+            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
+            
+            $key_end = $cursor; // now at the end of the key
+            
+            $key = substr($string, $key_begin, $key_end - $key_begin);
+            
+            if (!$key) continue; // empty key
+            
+            // scroll past all whitespace
+            $cursor += strspn($string, $this->_whitespace, $cursor);
+            
+            if ($cursor >= $size) {
+                $array[$key] = $key;
+                break;
+            }
+            
+            // if the next character is an equal sign, we've got a regular
+            // pair, otherwise, it's a bool attribute
+            $first_char = @$string[$cursor];
+            
+            if ($first_char == '=') {
+                // key="value"
+                
+                $cursor++;
+                $cursor += strspn($string, $this->_whitespace, $cursor);
+                
+                // we might be in front of a quote right now
+                
+                $char = @$string[$cursor];
+                
+                if ($char == '"' || $char == "'") {
+                    // it's quoted, end bound is $char
+                    $cursor++;
+                    $value_begin = $cursor;
+                    $cursor = strpos($string, $char, $cursor);
+                    $value_end = $cursor;
+                } else {
+                    // it's not quoted, end bound is whitespace
+                    $value_begin = $cursor;
+                    $cursor += strcspn($string, $this->_whitespace, $cursor);
+                    $value_end = $cursor;
+                }
+                
+                $value = substr($string, $value_begin, $value_end - $value_begin);
+                $array[$key] = $this->parseData($value);
+                $cursor++;
+                
+            } else {
+                // boolattr
+                if ($key !== '') {
+                    $array[$key] = $key;
+                }
+                
+            }
+        }
+        return $array;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php
+++ b/lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php
@ -0,0 +1,110 @@
+<?php
+
+require_once 'XML/HTMLSax3.php'; // PEAR
+require_once 'HTMLPurifier/Lexer.php';
+
+/**
+ * Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
+ * 
+ * PEAR, not suprisingly, also has a SAX parser for HTML.  I don't know
+ * very much about implementation, but it's fairly well written.  However, that
+ * abstraction comes at a price: performance. You need to have it installed,
+ * and if the API changes, it might break our adapter. Not sure whether or not
+ * it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
+ * text and attributes).
+ * 
+ * Quite personally, I don't recommend using the PEAR class, and the defaults
+ * don't use it. The unit tests do perform the tests on the SAX parser too, but
+ * whatever it does for poorly formed HTML is up to it.
+ * 
+ * @todo Generalize so that XML_HTMLSax is also supported.
+ * 
+ * @warning Entity-resolution inside attributes is broken.
+ */
+
+class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
+{
+    
+    /**
+     * Internal accumulator array for SAX parsers.
+     * @protected
+     */
+    var $tokens = array();
+    
+    function tokenizeHTML($string, $config, &$context) {
+        
+        $this->tokens = array();
+        
+        $string = $this->normalize($string, $config, $context);
+        
+        $parser = new XML_HTMLSax3();
+        $parser->set_object($this);
+        $parser->set_element_handler('openHandler','closeHandler');
+        $parser->set_data_handler('dataHandler');
+        $parser->set_escape_handler('escapeHandler');
+        
+        // doesn't seem to work correctly for attributes
+        $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
+        
+        $parser->parse($string);
+        
+        return $this->tokens;
+        
+    }
+    
+    /**
+     * Open tag event handler, interface is defined by PEAR package.
+     */
+    function openHandler(&$parser, $name, $attrs, $closed) {
+        // entities are not resolved in attrs
+        foreach ($attrs as $key => $attr) {
+            $attrs[$key] = $this->parseData($attr);
+        }
+        if ($closed) {
+            $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
+        } else {
+            $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
+        }
+        return true;
+    }
+    
+    /**
+     * Close tag event handler, interface is defined by PEAR package.
+     */
+    function closeHandler(&$parser, $name) {
+        // HTMLSax3 seems to always send empty tags an extra close tag
+        // check and ignore if you see it:
+        // [TESTME] to make sure it doesn't overreach
+        if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
+            return true;
+        }
+        $this->tokens[] = new HTMLPurifier_Token_End($name);
+        return true;
+    }
+    
+    /**
+     * Data event handler, interface is defined by PEAR package.
+     */
+    function dataHandler(&$parser, $data) {
+        $this->tokens[] = new HTMLPurifier_Token_Text($data);
+        return true;
+    }
+    
+    /**
+     * Escaped text handler, interface is defined by PEAR package.
+     */
+    function escapeHandler(&$parser, $data) {
+        if (strpos($data, '--') === 0) {
+            $this->tokens[] = new HTMLPurifier_Token_Comment($data);
+        }
+        // CDATA is handled elsewhere, but if it was handled here:
+        //if (strpos($data, '[CDATA[') === 0) {
+        //    $this->tokens[] = new HTMLPurifier_Token_Text(
+        //        substr($data, 7, strlen($data) - 9) );
+        //}
+        return true;
+    }
+    
+}
+
+?>
--- a/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php
+++ b/lib/htmlpurifier/HTMLPurifier/PercentEncoder.php
@ -0,0 +1,47 @@
+<?php
+
+/**
+ * Class that handles operations involving percent-encoding in URIs.
+ */
+class HTMLPurifier_PercentEncoder
+{
+    
+    /**
+     * Fix up percent-encoding by decoding unreserved characters and normalizing
+     * @param $string String to normalize
+     */
+    function normalize($string) {
+        if ($string == '') return '';
+        $parts = explode('%', $string);
+        $ret = array_shift($parts);
+        foreach ($parts as $part) {
+            $length = strlen($part);
+            if ($length < 2) {
+                $ret .= '%25' . $part;
+                continue;
+            }
+            $encoding = substr($part, 0, 2);
+            $text     = substr($part, 2);
+            if (!ctype_xdigit($encoding)) {
+                $ret .= '%25' . $part;
+                continue;
+            }
+            $int = hexdec($encoding);
+            if (
+                ($int >= 48 && $int <= 57) || // digits
+                ($int >= 65 && $int <= 90) || // uppercase letters
+                ($int >= 97 && $int <= 122) || // lowercase letters
+                $int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
+            ) {
+                $ret .= chr($int) . $text;
+                continue;
+            }
+            $encoding = strtoupper($encoding);
+            $ret .= '%' . $encoding . $text;
+        }
+        return $ret;
+    }
+    
+}
+
+?>
--- a/Show more
+++ b/Show more