mirror of
https://github.com/moodle/moodle.git
synced 2025-08-04 16:36:37 +02:00
MDL-9151 HTML Purifier cleaning support - enable switch is in experimental section
MDL-9435 Reviewved url cleaning in redirect()
This commit is contained in:
parent
c85607f0be
commit
e0ac8448c7
129 changed files with 10389 additions and 20 deletions
7
lib/htmlpurifier/CREDITS
Normal file
7
lib/htmlpurifier/CREDITS
Normal file
|
@ -0,0 +1,7 @@
|
|||
|
||||
CREDITS
|
||||
|
||||
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
||||
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
10
lib/htmlpurifier/HTMLPurifier.auto.php
Normal file
10
lib/htmlpurifier/HTMLPurifier.auto.php
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* This is a stub include that automatically configures the include path.
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
?>
|
21
lib/htmlpurifier/HTMLPurifier.func.php
Normal file
21
lib/htmlpurifier/HTMLPurifier.func.php
Normal file
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Function wrapper for HTML Purifier for quick use.
|
||||
* @note This function only includes the library when it is called. While
|
||||
* this is efficient for instances when you only use HTML Purifier
|
||||
* on a few of your pages, it murders bytecode caching. You still
|
||||
* need to add HTML Purifier to your path.
|
||||
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
||||
*/
|
||||
|
||||
function HTMLPurifier($html, $config = null) {
|
||||
static $purifier = false;
|
||||
if (!$purifier) {
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
}
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
||||
?>
|
170
lib/htmlpurifier/HTMLPurifier.php
Normal file
170
lib/htmlpurifier/HTMLPurifier.php
Normal file
|
@ -0,0 +1,170 @@
|
|||
<?php
|
||||
|
||||
/*!
|
||||
* @mainpage
|
||||
*
|
||||
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||
* HTML and rigorously test, validate and filter it into a version that
|
||||
* is safe for output onto webpages. It achieves this by:
|
||||
*
|
||||
* -# Lexing (parsing into tokens) the document,
|
||||
* -# Executing various strategies on the tokens:
|
||||
* -# Removing all elements not in the whitelist,
|
||||
* -# Making the tokens well-formed,
|
||||
* -# Fixing the nesting of the nodes, and
|
||||
* -# Validating attributes of the nodes; and
|
||||
* -# Generating HTML from the purified tokens.
|
||||
*
|
||||
* However, most users will only need to interface with the HTMLPurifier
|
||||
* class, so this massive amount of infrastructure is usually concealed.
|
||||
* If you plan on working with the internals, be sure to include
|
||||
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.6.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
/**
|
||||
* Main library execution class.
|
||||
*
|
||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||
* purify HTML.
|
||||
*
|
||||
* @todo We need an easier way to inject strategies, it'll probably end
|
||||
* up getting done through config though.
|
||||
*/
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.6.0';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
||||
var $lexer, $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||
* the purifier, if omitted, a default configuration is
|
||||
* supplied (which can be overridden on a per-use basis).
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
$this->lexer = HTMLPurifier_Lexer::create();
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a filter to process the output. First come first serve
|
||||
* @param $filter HTMLPurifier_Filter object
|
||||
*/
|
||||
function addFilter($filter) {
|
||||
$this->filters[] = $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
||||
*
|
||||
* @param $html String of HTML to purify
|
||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||
* defaults to the config object specified during this
|
||||
* object's construction. The parameter can also be any type
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
$html = $this->filters[$i]->preFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
// purified HTML
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
// list of un-purified tokens
|
||||
$this->lexer->tokenizeHTML(
|
||||
// un-purified HTML
|
||||
$html, $config, $context
|
||||
),
|
||||
$config, $context
|
||||
),
|
||||
$config, $context
|
||||
);
|
||||
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$html = $this->filters[$i]->postFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
||||
$this->context =& $context;
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an array of HTML snippets
|
||||
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
$context_array[$key] = $this->context;
|
||||
}
|
||||
$this->context = $context_array;
|
||||
return $array_of_html;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
100
lib/htmlpurifier/HTMLPurifier/AttrCollections.php
Normal file
100
lib/htmlpurifier/HTMLPurifier/AttrCollections.php
Normal file
|
@ -0,0 +1,100 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||
|
||||
/**
|
||||
* Defines common attribute collections that modules reference
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrCollections
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
* @note Technically, the composition of these is more complicated,
|
||||
* but we bypass it using our own excludes property
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
* It also collects all attribute collection extensions from
|
||||
* modules
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
$info =& $this->info;
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
foreach ($coll as $attr_i => $attr) {
|
||||
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
|
||||
// merge in includes
|
||||
$info[$coll_i][$attr_i] = array_merge(
|
||||
$info[$coll_i][$attr_i], $attr);
|
||||
continue;
|
||||
}
|
||||
$info[$coll_i][$attr_i] = $attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform internal expansions and inclusions
|
||||
foreach ($info as $name => $attr) {
|
||||
// merge attribute collections that include others
|
||||
$this->performInclusions($info[$name]);
|
||||
// replace string identifiers with actual attribute objects
|
||||
$this->expandIdentifiers($info[$name], $attr_types);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a reference to an attribute associative array and performs
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
// loop through all the inclusions
|
||||
for ($i = 0; isset($merge[$i]); $i++) {
|
||||
// foreach attribute of the inclusion, copy it over
|
||||
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||
$attr[$key] = $value;
|
||||
}
|
||||
if (isset($info[$merge[$i]][0])) {
|
||||
// recursion
|
||||
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
|
||||
}
|
||||
}
|
||||
unset($attr[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands all string identifiers in an attribute array by replacing
|
||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
foreach ($attr as $def_i => $def) {
|
||||
if ($def_i === 0) continue;
|
||||
if (!is_string($def)) continue;
|
||||
if ($def === false) {
|
||||
unset($attr[$def_i]);
|
||||
continue;
|
||||
}
|
||||
if (isset($attr_types->info[$def])) {
|
||||
$attr[$def_i] = $attr_types->info[$def];
|
||||
} else {
|
||||
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
|
||||
unset($attr[$def_i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
67
lib/htmlpurifier/HTMLPurifier/AttrDef.php
Normal file
67
lib/htmlpurifier/HTMLPurifier/AttrDef.php
Normal file
|
@ -0,0 +1,67 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Base class for all validating attribute definitions.
|
||||
*
|
||||
* This family of classes forms the core for not only HTML attribute validation,
|
||||
* but also any sort of string that needs to be validated or cleaned (which
|
||||
* means CSS properties and composite definitions are defined here too).
|
||||
* Besides defining (through code) what precisely makes the string valid,
|
||||
* subclasses are also responsible for cleaning the code if possible.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||
* boolean attribute vapourware would use this.
|
||||
*/
|
||||
var $minimized = false;
|
||||
|
||||
/**
|
||||
* Validates and cleans passed string according to a definition.
|
||||
*
|
||||
* @public
|
||||
* @param $string String to be validated and cleaned.
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_AttrContext object.
|
||||
*/
|
||||
function validate($string, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that parses a string as if it were CDATA.
|
||||
*
|
||||
* This method process a string in the manner specified at
|
||||
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
|
||||
* leading and trailing whitespace, ignoring line feeds, and replacing
|
||||
* carriage returns and tabs with spaces. While most useful for HTML
|
||||
* attributes specified as CDATA, it can also be applied to most CSS
|
||||
* values.
|
||||
*
|
||||
* @note This method is not entirely standards compliant, as trim() removes
|
||||
* more types of whitespace than specified in the spec. In practice,
|
||||
* this is rarely a problem, as those extra characters usually have
|
||||
* already been removed by HTMLPurifier_Encoder.
|
||||
*
|
||||
* @warning This processing is inconsistent with XML's whitespace handling
|
||||
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||
* 4.7. Compliant processing requires all line breaks normalized
|
||||
* to "\n", so the fix is not as simple as fixing it in this
|
||||
* function. Trim and whitespace collapsing are supposed to only
|
||||
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||
* parsing XML, thus, this behavior may still be correct.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
function parseCDATA($string) {
|
||||
$string = trim($string);
|
||||
$string = str_replace("\n", '', $string);
|
||||
$string = str_replace(array("\r", "\t"), ' ', $string);
|
||||
return $string;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
69
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php
Normal file
69
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS.php
Normal file
|
@ -0,0 +1,69 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute style, otherwise known as CSS.
|
||||
* @note We don't implement the whole CSS specification, so it might be
|
||||
* difficult to reuse this component in the context of validating
|
||||
* actual stylesheet declarations.
|
||||
* @note If we were really serious about validating the CSS, we would
|
||||
* tokenize the styles and then parse the tokens. Obviously, we
|
||||
* are not doing that. Doing that could seriously harm performance,
|
||||
* but would make these components a lot more viable for a CSS
|
||||
* filtering solution.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($css, $config, &$context) {
|
||||
|
||||
$css = $this->parseCDATA($css);
|
||||
|
||||
$definition = $config->getCSSDefinition();
|
||||
|
||||
// we're going to break the spec and explode by semicolons.
|
||||
// This is because semicolon rarely appears in escaped form
|
||||
// Doing this is generally flaky but fast
|
||||
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
|
||||
// for details
|
||||
|
||||
$declarations = explode(';', $css);
|
||||
$propvalues = array();
|
||||
|
||||
foreach ($declarations as $declaration) {
|
||||
if (!$declaration) continue;
|
||||
if (!strpos($declaration, ':')) continue;
|
||||
list($property, $value) = explode(':', $declaration, 2);
|
||||
$property = trim($property);
|
||||
$value = trim($value);
|
||||
if (!isset($definition->info[$property])) continue;
|
||||
// inefficient call, since the validator will do this again
|
||||
if (strtolower(trim($value)) !== 'inherit') {
|
||||
// inherit works for everything (but only on the base property)
|
||||
$result = $definition->info[$property]->validate(
|
||||
$value, $config, $context );
|
||||
} else {
|
||||
$result = 'inherit';
|
||||
}
|
||||
if ($result === false) continue;
|
||||
$propvalues[$property] = $result;
|
||||
}
|
||||
|
||||
// procedure does not write the new CSS simultaneously, so it's
|
||||
// slightly inefficient, but it's the only way of getting rid of
|
||||
// duplicates. Perhaps config to optimize it, but not now.
|
||||
|
||||
$new_declarations = '';
|
||||
foreach ($propvalues as $prop => $value) {
|
||||
$new_declarations .= "$prop:$value;";
|
||||
}
|
||||
|
||||
return $new_declarations ? $new_declarations : false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
87
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
87
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
|
@ -0,0 +1,87 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property background.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
||||
$this->info['background-repeat'] = $def->info['background-repeat'];
|
||||
$this->info['background-attachment'] = $def->info['background-attachment'];
|
||||
$this->info['background-position'] = $def->info['background-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
$caught['image'] = false;
|
||||
$caught['repeat'] = false;
|
||||
$caught['attachment'] = false;
|
||||
$caught['position'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($key != 'position') {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
|
||||
} else {
|
||||
$r = $bit;
|
||||
}
|
||||
if ($r === false) continue;
|
||||
if ($key == 'position') {
|
||||
if ($caught[$key] === false) $caught[$key] = '';
|
||||
$caught[$key] .= $r . ' ';
|
||||
} else {
|
||||
$caught[$key] = $r;
|
||||
}
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
if ($caught['position'] !== false) {
|
||||
$caught['position'] = $this->info['background-position']->
|
||||
validate($caught['position'], $config, $context);
|
||||
}
|
||||
|
||||
$ret = array();
|
||||
foreach ($caught as $value) {
|
||||
if ($value === false) continue;
|
||||
$ret[] = $value;
|
||||
}
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
130
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
130
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
|
@ -0,0 +1,130 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* W3C says:
|
||||
[ // adjective and number must be in correct order, even if
|
||||
// you could switch them without introducing ambiguity.
|
||||
// some browsers support that syntax
|
||||
[
|
||||
<percentage> | <length> | left | center | right
|
||||
]
|
||||
[
|
||||
<percentage> | <length> | top | center | bottom
|
||||
]?
|
||||
] |
|
||||
[ // this signifies that the vertical and horizontal adjectives
|
||||
// can be arbitrarily ordered, however, there can only be two,
|
||||
// one of each, or none at all
|
||||
[
|
||||
left | center | right
|
||||
] ||
|
||||
[
|
||||
top | center | bottom
|
||||
]
|
||||
]
|
||||
top, left = 0%
|
||||
center, (none) = 50%
|
||||
bottom, right = 100%
|
||||
*/
|
||||
|
||||
/* QuirksMode says:
|
||||
keyword + length/percentage must be ordered correctly, as per W3C
|
||||
|
||||
Internet Explorer and Opera, however, support arbitrary ordering. We
|
||||
should fix it up.
|
||||
|
||||
Minor issue though, not strictly necessary.
|
||||
*/
|
||||
|
||||
// control freaks may appreciate the ability to convert these to
|
||||
// percentages or something, but it's not necessary
|
||||
|
||||
/**
|
||||
* Validates the value of background-position.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
$bits = explode(' ', $string);
|
||||
|
||||
$keywords = array();
|
||||
$keywords['h'] = false; // left, right
|
||||
$keywords['v'] = false; // top, bottom
|
||||
$keywords['c'] = false; // center
|
||||
$measures = array();
|
||||
|
||||
$i = 0;
|
||||
|
||||
$lookup = array(
|
||||
'top' => 'v',
|
||||
'bottom' => 'v',
|
||||
'left' => 'h',
|
||||
'right' => 'h',
|
||||
'center' => 'c'
|
||||
);
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
|
||||
// test for keyword
|
||||
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
|
||||
if (isset($lookup[$lbit])) {
|
||||
$status = $lookup[$lbit];
|
||||
$keywords[$status] = $lbit;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for length
|
||||
$r = $this->length->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for percentage
|
||||
$r = $this->percentage->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!$i) return false; // no valid values were caught
|
||||
|
||||
|
||||
$ret = array();
|
||||
|
||||
// first keyword
|
||||
if ($keywords['h']) $ret[] = $keywords['h'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) {
|
||||
$ret[] = $keywords['c'];
|
||||
$keywords['c'] = false; // prevent re-use: center = center center
|
||||
}
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) $ret[] = $keywords['c'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
45
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php
Normal file
45
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Border.php
Normal file
|
@ -0,0 +1,45 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the border property as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of properties this property is shorthand for.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
||||
$this->info['border-top-color'] = $def->info['border-top-color'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
// we specifically will not support rgb() syntax with spaces
|
||||
$bits = explode(' ', $string);
|
||||
$done = array(); // segments we've finished
|
||||
$ret = ''; // return value
|
||||
foreach ($bits as $bit) {
|
||||
foreach ($this->info as $propname => $validator) {
|
||||
if (isset($done[$propname])) continue;
|
||||
$r = $validator->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$ret .= $r . ' ';
|
||||
$done[$propname] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rtrim($ret);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
97
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php
Normal file
97
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Color.php
Normal file
|
@ -0,0 +1,97 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates Color as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Color keyword lookup table.
|
||||
* @todo Extend it to include all usually allowed colors.
|
||||
*/
|
||||
var $colors = array(
|
||||
'maroon' => '#800000',
|
||||
'red' => '#F00',
|
||||
'orange' => '#FFA500',
|
||||
'yellow' => '#FF0',
|
||||
'olive' => '#808000',
|
||||
'purple' => '#800080',
|
||||
'fuchsia' => '#F0F',
|
||||
'white' => '#FFF',
|
||||
'lime' => '#0F0',
|
||||
'green' => '#008000',
|
||||
'navy' => '#000080',
|
||||
'blue' => '#00F',
|
||||
'aqua' => '#0FF',
|
||||
'teal' => '#008080',
|
||||
'black' => '#000',
|
||||
'silver' => '#C0C0C0',
|
||||
'gray' => '#808080'
|
||||
);
|
||||
|
||||
function validate($color, $config, &$context) {
|
||||
|
||||
$color = trim($color);
|
||||
if (!$color) return false;
|
||||
|
||||
$lower = strtolower($color);
|
||||
if (isset($this->colors[$lower])) return $this->colors[$lower];
|
||||
|
||||
if ($color[0] === '#') {
|
||||
// hexadecimal handling
|
||||
$hex = substr($color, 1);
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
} else {
|
||||
// rgb literal handling
|
||||
if (strpos($color, 'rgb(')) return false;
|
||||
$length = strlen($color);
|
||||
if (strpos($color, ')') !== $length - 1) return false;
|
||||
$triad = substr($color, 4, $length - 4 - 1);
|
||||
$parts = explode(',', $triad);
|
||||
if (count($parts) !== 3) return false;
|
||||
$type = false; // to ensure that they're all the same type
|
||||
$new_parts = array();
|
||||
foreach ($parts as $part) {
|
||||
$part = trim($part);
|
||||
if ($part === '') return false;
|
||||
$length = strlen($part);
|
||||
if ($part[$length - 1] === '%') {
|
||||
// handle percents
|
||||
if (!$type) {
|
||||
$type = 'percentage';
|
||||
} elseif ($type !== 'percentage') {
|
||||
return false;
|
||||
}
|
||||
$num = (float) substr($part, 0, $length - 1);
|
||||
if ($num < 0) $num = 0;
|
||||
if ($num > 100) $num = 100;
|
||||
$new_parts[] = "$num%";
|
||||
} else {
|
||||
// handle integers
|
||||
if (!$type) {
|
||||
$type = 'integer';
|
||||
} elseif ($type !== 'integer') {
|
||||
return false;
|
||||
}
|
||||
$num = (int) $part;
|
||||
if ($num < 0) $num = 0;
|
||||
if ($num > 255) $num = 255;
|
||||
$new_parts[] = (string) $num;
|
||||
}
|
||||
}
|
||||
$new_triad = implode(',', $new_parts);
|
||||
$color = "rgb($new_triad)";
|
||||
}
|
||||
|
||||
return $color;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
38
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php
Normal file
38
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Composite.php
Normal file
|
@ -0,0 +1,38 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Allows multiple validators to attempt to validate attribute.
|
||||
*
|
||||
* Composite is just what it sounds like: a composite of many validators.
|
||||
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
|
||||
* at the string. If one of them passes, that's what is returned. This is
|
||||
* especially useful for CSS values, which often are a choice between
|
||||
* an enumerated set of predefined values or a flexible data type.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrDef objects that may process strings
|
||||
* @protected
|
||||
*/
|
||||
var $defs;
|
||||
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
foreach ($this->defs as $i => $def) {
|
||||
$result = $this->defs[$i]->validate($string, $config, $context);
|
||||
if ($result !== false) return $result;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
154
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php
Normal file
154
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Font.php
Normal file
|
@ -0,0 +1,154 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property font.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
*
|
||||
* @note If we moved specific CSS property definitions to their own
|
||||
* classes instead of having them be assembled at run time by
|
||||
* CSSDefinition, this wouldn't be necessary. We'd instantiate
|
||||
* our own copies.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* System font keywords.
|
||||
*/
|
||||
var $system_fonts = array(
|
||||
'caption' => true,
|
||||
'icon' => true,
|
||||
'menu' => true,
|
||||
'message-box' => true,
|
||||
'small-caption' => true,
|
||||
'status-bar' => true
|
||||
);
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
||||
$this->info['font-weight'] = $def->info['font-weight'];
|
||||
$this->info['font-size'] = $def->info['font-size'];
|
||||
$this->info['line-height'] = $def->info['line-height'];
|
||||
$this->info['font-family'] = $def->info['font-family'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// check if it's one of the keywords
|
||||
$lowercase_string = strtolower($string);
|
||||
if (isset($this->system_fonts[$lowercase_string])) {
|
||||
return $lowercase_string;
|
||||
}
|
||||
|
||||
$bits = explode(' ', $string); // bits to process
|
||||
$stage = 0; // this indicates what we're looking for
|
||||
$caught = array(); // which stage 0 properties have we caught?
|
||||
$stage_1 = array('font-style', 'font-variant', 'font-weight');
|
||||
$final = ''; // output
|
||||
|
||||
for ($i = 0, $size = count($bits); $i < $size; $i++) {
|
||||
if ($bits[$i] === '') continue;
|
||||
switch ($stage) {
|
||||
|
||||
// attempting to catch font-style, font-variant or font-weight
|
||||
case 0:
|
||||
foreach ($stage_1 as $validator_name) {
|
||||
if (isset($caught[$validator_name])) continue;
|
||||
$r = $this->info[$validator_name]->validate(
|
||||
$bits[$i], $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r . ' ';
|
||||
$caught[$validator_name] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// all three caught, continue on
|
||||
if (count($caught) >= 3) $stage = 1;
|
||||
if ($r !== false) break;
|
||||
|
||||
// attempting to catch font-size and perhaps line-height
|
||||
case 1:
|
||||
$found_slash = false;
|
||||
if (strpos($bits[$i], '/') !== false) {
|
||||
list($font_size, $line_height) =
|
||||
explode('/', $bits[$i]);
|
||||
if ($line_height === '') {
|
||||
// ooh, there's a space after the slash!
|
||||
$line_height = false;
|
||||
$found_slash = true;
|
||||
}
|
||||
} else {
|
||||
$font_size = $bits[$i];
|
||||
$line_height = false;
|
||||
}
|
||||
$r = $this->info['font-size']->validate(
|
||||
$font_size, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r;
|
||||
// attempt to catch line-height
|
||||
if ($line_height === false) {
|
||||
// we need to scroll forward
|
||||
for ($j = $i + 1; $j < $size; $j++) {
|
||||
if ($bits[$j] === '') continue;
|
||||
if ($bits[$j] === '/') {
|
||||
if ($found_slash) {
|
||||
return false;
|
||||
} else {
|
||||
$found_slash = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$line_height = $bits[$j];
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// slash already found
|
||||
$found_slash = true;
|
||||
$j = $i;
|
||||
}
|
||||
if ($found_slash) {
|
||||
$i = $j;
|
||||
$r = $this->info['line-height']->validate(
|
||||
$line_height, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= '/' . $r;
|
||||
}
|
||||
}
|
||||
$final .= ' ';
|
||||
$stage = 2;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
||||
// attempting to catch font-family
|
||||
case 2:
|
||||
$font_family =
|
||||
implode(' ', array_slice($bits, $i, $size - $i));
|
||||
$r = $this->info['font-family']->validate(
|
||||
$font_family, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r . ' ';
|
||||
// processing completed successfully
|
||||
return rtrim($final);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
66
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
Normal file
66
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php
Normal file
|
@ -0,0 +1,66 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// whitelisting allowed fonts would be nice
|
||||
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Generic font family keywords.
|
||||
* @protected
|
||||
*/
|
||||
var $generic_names = array(
|
||||
'serif' => true,
|
||||
'sans-serif' => true,
|
||||
'monospace' => true,
|
||||
'fantasy' => true,
|
||||
'cursive' => true
|
||||
);
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
// assume that no font names contain commas in them
|
||||
$fonts = explode(',', $string);
|
||||
$final = '';
|
||||
foreach($fonts as $font) {
|
||||
$font = trim($font);
|
||||
if ($font === '') continue;
|
||||
// match a generic name
|
||||
if (isset($this->generic_names[$font])) {
|
||||
$final .= $font . ', ';
|
||||
continue;
|
||||
}
|
||||
// match a quoted name
|
||||
if ($font[0] === '"' || $font[0] === "'") {
|
||||
$length = strlen($font);
|
||||
if ($length <= 2) continue;
|
||||
$quote = $font[0];
|
||||
if ($font[$length - 1] !== $quote) continue;
|
||||
$font = substr($font, 1, $length - 2);
|
||||
}
|
||||
// process font
|
||||
if (ctype_alnum($font)) {
|
||||
// very simple font, allow it in unharmed
|
||||
$final .= $font . ', ';
|
||||
continue;
|
||||
}
|
||||
$nospace = str_replace(array(' ', '.', '!'), '', $font);
|
||||
if (ctype_alnum($nospace)) {
|
||||
// font with spaces in it
|
||||
$final .= "'$font', ";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$final = rtrim($final, ', ');
|
||||
if ($final === '') return false;
|
||||
return $final;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
56
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php
Normal file
56
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Length.php
Normal file
|
@ -0,0 +1,56 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Represents a Length as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Valid unit lookup table.
|
||||
* @warning The code assumes all units are two characters long. Be careful
|
||||
* if we have to change this behavior!
|
||||
*/
|
||||
var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
/**
|
||||
* @param $non_negative Bool indication whether or not negative values are
|
||||
* allowed.
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($length, $config, &$context) {
|
||||
|
||||
$length = $this->parseCDATA($length);
|
||||
if ($length === '') return false;
|
||||
if ($length === '0') return '0';
|
||||
$strlen = strlen($length);
|
||||
if ($strlen === 1) return false; // impossible!
|
||||
|
||||
// we assume all units are two characters
|
||||
$unit = substr($length, $strlen - 2);
|
||||
if (!ctype_lower($unit)) $unit = strtolower($unit);
|
||||
$number = substr($length, 0, $strlen - 2);
|
||||
|
||||
if (!isset($this->units[$unit])) return false;
|
||||
|
||||
$number = $this->number_def->validate($number, $config, $context);
|
||||
if ($number === false) return false;
|
||||
|
||||
return $number . $unit;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
80
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
80
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
|
@ -0,0 +1,80 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property list-style.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
||||
$this->info['list-style-image'] = $def->info['list-style-image'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['type'] = false;
|
||||
$caught['position'] = false;
|
||||
$caught['image'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($i >= 3) return; // optimization bit
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
|
||||
if ($r === false) continue;
|
||||
if ($r === 'none') {
|
||||
if ($none) continue;
|
||||
else $none = true;
|
||||
if ($key == 'image') continue;
|
||||
}
|
||||
$caught[$key] = $r;
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
|
||||
$ret = array();
|
||||
|
||||
// construct type
|
||||
if ($caught['type']) $ret[] = $caught['type'];
|
||||
|
||||
// construct image
|
||||
if ($caught['image']) $ret[] = $caught['image'];
|
||||
|
||||
// construct position
|
||||
if ($caught['position']) $ret[] = $caught['position'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
58
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php
Normal file
58
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Multiple.php
Normal file
|
@ -0,0 +1,58 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Framework class for strings that involve multiple values.
|
||||
*
|
||||
* Certain CSS properties such as border-width and margin allow multiple
|
||||
* lengths to be specified. This class can take a vanilla border-width
|
||||
* definition and multiply it, usually into a max of four.
|
||||
*
|
||||
* @note Even though the CSS specification isn't clear about it, inherit
|
||||
* can only be used alone: it will never manifest as part of a multi
|
||||
* shorthand declaration. Thus, this class does not allow inherit.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of component definition to defer validation to.
|
||||
*/
|
||||
var $single;
|
||||
|
||||
/**
|
||||
* Max number of values allowed.
|
||||
*/
|
||||
var $max;
|
||||
|
||||
/**
|
||||
* @param $single HTMLPurifier_AttrDef to multiply
|
||||
* @param $max Max number of values allowed (usually four)
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
|
||||
$length = count($parts);
|
||||
$final = '';
|
||||
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
|
||||
if (ctype_space($parts[$i])) continue;
|
||||
$result = $this->single->validate($parts[$i], $config, $context);
|
||||
if ($result !== false) {
|
||||
$final .= $result . ' ';
|
||||
$num++;
|
||||
}
|
||||
}
|
||||
if ($final === '') return false;
|
||||
return rtrim($final);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
61
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php
Normal file
61
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Number.php
Normal file
|
@ -0,0 +1,61 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Validates a number as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not only positive values allowed.
|
||||
*/
|
||||
var $non_negative = false;
|
||||
|
||||
/**
|
||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
}
|
||||
|
||||
function validate($number, $config, &$context) {
|
||||
|
||||
$number = $this->parseCDATA($number);
|
||||
|
||||
if ($number === '') return false;
|
||||
|
||||
$sign = '';
|
||||
switch ($number[0]) {
|
||||
case '-':
|
||||
if ($this->non_negative) return false;
|
||||
$sign = '-';
|
||||
case '+':
|
||||
$number = substr($number, 1);
|
||||
}
|
||||
|
||||
if (ctype_digit($number)) {
|
||||
$number = ltrim($number, '0');
|
||||
return $number ? $sign . $number : '0';
|
||||
}
|
||||
if (!strpos($number, '.')) return false;
|
||||
|
||||
list($left, $right) = explode('.', $number, 2);
|
||||
|
||||
if (!ctype_digit($left)) return false;
|
||||
$left = ltrim($left, '0');
|
||||
|
||||
$right = rtrim($right, '0');
|
||||
|
||||
if ($right === '') {
|
||||
return $left ? $sign . $left : '0';
|
||||
} elseif (!ctype_digit($right)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $sign . $left . '.' . $right;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
43
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php
Normal file
43
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/Percentage.php
Normal file
|
@ -0,0 +1,43 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Validates a Percentage as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
/**
|
||||
* @param Bool indicating whether to forbid negative values
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
|
||||
if ($string === '') return false;
|
||||
$length = strlen($string);
|
||||
if ($length === 1) return false;
|
||||
if ($string[$length - 1] !== '%') return false;
|
||||
|
||||
$number = substr($string, 0, $length - 1);
|
||||
$number = $this->number_def->validate($number, $config, $context);
|
||||
|
||||
if ($number === false) return false;
|
||||
return "$number%";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
41
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php
Normal file
41
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php
Normal file
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the value for the CSS property text-decoration
|
||||
* @note This class could be generalized into a version that acts sort of
|
||||
* like Enum except you can compound the allowed values.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values.
|
||||
* @protected
|
||||
*/
|
||||
var $allowed_values = array(
|
||||
'line-through' => true,
|
||||
'overline' => true,
|
||||
'underline' => true
|
||||
);
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = strtolower($this->parseCDATA($string));
|
||||
$parts = explode(' ', $string);
|
||||
$final = '';
|
||||
foreach ($parts as $part) {
|
||||
if (isset($this->allowed_values[$part])) {
|
||||
$final .= $part . ' ';
|
||||
}
|
||||
}
|
||||
$final = rtrim($final);
|
||||
if ($final === '') return false;
|
||||
return $final;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
58
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
58
lib/htmlpurifier/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
|
@ -0,0 +1,58 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
||||
* @note While theoretically speaking a URI in a CSS document could
|
||||
* be non-embedded, as of CSS2 there is no such usage so we're
|
||||
* generalizing it. This may need to be changed in the future.
|
||||
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
||||
* the separator, you cannot put a literal semicolon in
|
||||
* in the URI. Try percent encoding it, in that case.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
{
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||
}
|
||||
|
||||
function validate($uri_string, $config, &$context) {
|
||||
// parse the URI out of the string and then pass it onto
|
||||
// the parent object
|
||||
|
||||
$uri_string = $this->parseCDATA($uri_string);
|
||||
if (strpos($uri_string, 'url(') !== 0) return false;
|
||||
$uri_string = substr($uri_string, 4);
|
||||
$new_length = strlen($uri_string) - 1;
|
||||
if ($uri_string[$new_length] != ')') return false;
|
||||
$uri = trim(substr($uri_string, 0, $new_length));
|
||||
|
||||
if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
|
||||
$quote = $uri[0];
|
||||
$new_length = strlen($uri) - 1;
|
||||
if ($uri[$new_length] !== $quote) return false;
|
||||
$uri = substr($uri, 1, $new_length - 1);
|
||||
}
|
||||
|
||||
$keys = array( '(', ')', ',', ' ', '"', "'");
|
||||
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
|
||||
$uri = str_replace($values, $keys, $uri);
|
||||
|
||||
$result = parent::validate($uri, $config, $context);
|
||||
|
||||
if ($result === false) return false;
|
||||
|
||||
// escape necessary characters according to CSS spec
|
||||
// except for the comma, none of these should appear in the
|
||||
// URI at all
|
||||
$result = str_replace($keys, $values, $result);
|
||||
|
||||
return "url($result)";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
46
lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php
Normal file
46
lib/htmlpurifier/HTMLPurifier/AttrDef/Enum.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// Enum = Enumerated
|
||||
/**
|
||||
* Validates a keyword against a list of valid values.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Lookup table of valid values.
|
||||
*/
|
||||
var $valid_values = array();
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not enumeration is case sensitive.
|
||||
* @note In general this is always case insensitive.
|
||||
*/
|
||||
var $case_sensitive = false; // values according to W3C spec
|
||||
|
||||
/**
|
||||
* @param $valid_values List of valid values
|
||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Enum(
|
||||
$valid_values = array(), $case_sensitive = false
|
||||
) {
|
||||
$this->valid_values = array_flip($valid_values);
|
||||
$this->case_sensitive = $case_sensitive;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = trim($string);
|
||||
if (!$this->case_sensitive) {
|
||||
$string = ctype_lower($string) ? $string : strtolower($string);
|
||||
}
|
||||
$result = isset($this->valid_values[$string]);
|
||||
|
||||
return $result ? $string : false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
121
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php
Normal file
121
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/ID.php
Normal file
|
@ -0,0 +1,121 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'EnableID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias(
|
||||
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefix', '', 'string',
|
||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||
'may use, you may opt to simply add a prefix to all user-submitted ID '.
|
||||
'attributes so that they are still usable, but will not conflict with '.
|
||||
'core page IDs. Example: setting the directive to \'user_\' will result in '.
|
||||
'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
|
||||
'%HTML.EnableAttrID to true before using '.
|
||||
'this. This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefixLocal', '', 'string',
|
||||
'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
|
||||
'you need to allow multiple sets of '.
|
||||
'user content on web page, you may need to have a seperate prefix that '.
|
||||
'changes with each iteration. This way, seperately submitted user content '.
|
||||
'displayed on the same page doesn\'t clobber each other. Ideal values '.
|
||||
'are unique identifiers for the content it represents (i.e. the id of '.
|
||||
'the row in the database). Be sure to add a seperator (like an underscore) '.
|
||||
'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
|
||||
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklistRegexp', null, 'string/null',
|
||||
'PCRE regular expression to be matched against all IDs. If the expression '.
|
||||
'is matches, the ID is rejected. Use this with care: may cause '.
|
||||
'significant degradation. ID matching is done after all other '.
|
||||
'validation. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute ID.
|
||||
* @warning Even though this is the id processor, it
|
||||
* will ignore the directive Attr:IDBlacklist, since it will only
|
||||
* go according to the ID accumulator. Since the accumulator is
|
||||
* automatically generated, it will have already absorbed the
|
||||
* blacklist. If you're hacking around, make sure you use load()!
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
|
||||
$prefix = $config->get('Attr', 'IDPrefix');
|
||||
if ($prefix !== '') {
|
||||
$prefix .= $config->get('Attr', 'IDPrefixLocal');
|
||||
// prevent re-appending the prefix
|
||||
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
|
||||
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
|
||||
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
//if (!$this->ref) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
if (ctype_alpha($id)) {
|
||||
$result = true;
|
||||
} else {
|
||||
if (!ctype_alpha(@$id[0])) return false;
|
||||
$trim = trim( // primitive style of regexps, I suppose
|
||||
$id,
|
||||
'A..Za..z0..9:-._'
|
||||
);
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
|
||||
if ($regexp && preg_match($regexp, $id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
// valid, or return false.
|
||||
return $result ? $id : false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
44
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php
Normal file
44
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Length.php
Normal file
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML type length (not to be confused with CSS's length).
|
||||
*
|
||||
* This accepts integer pixels or percentages as lengths for certain
|
||||
* HTML attributes.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
$parent_result = parent::validate($string, $config, $context);
|
||||
if ($parent_result !== false) return $parent_result;
|
||||
|
||||
$length = strlen($string);
|
||||
$last_char = $string[$length - 1];
|
||||
|
||||
if ($last_char !== '%') return false;
|
||||
|
||||
$points = substr($string, 0, $length - 1);
|
||||
|
||||
if (!is_numeric($points)) return false;
|
||||
|
||||
$points = (int) $points;
|
||||
|
||||
if ($points < 0) return '0%';
|
||||
if ($points > 100) return '100%';
|
||||
|
||||
return ((string) $points) . '%';
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
75
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
75
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRel', array(), 'lookup',
|
||||
'List of allowed forward document relationships in the rel attribute. '.
|
||||
'Common values may be nofollow or print. By default, this is empty, '.
|
||||
'meaning that no document relationships are allowed. This directive '.
|
||||
'was available since 1.6.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRev', array(), 'lookup',
|
||||
'List of allowed reverse document relationships in the rev attribute. '.
|
||||
'This attribute is a bit of an edge-case; if you don\'t know what it '.
|
||||
'is for, stay away. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a rel/rev link attribute against a directive of allowed values
|
||||
* @note We cannot use Enum because link types allow multiple
|
||||
* values.
|
||||
* @note Assumes link types are ASCII text
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/** Lookup array of attribute names to configuration name */
|
||||
var $configLookup = array(
|
||||
'rel' => 'AllowedRel',
|
||||
'rev' => 'AllowedRev'
|
||||
);
|
||||
|
||||
/** Name config attribute to pull. */
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
|
||||
if (!isset($this->configLookup[$name])) {
|
||||
trigger_error('Unrecognized attribute name for link '.
|
||||
'relationship.', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->name = $this->configLookup[$name];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
$parts = explode(' ', $string);
|
||||
|
||||
// lookup to prevent duplicates
|
||||
$ret_lookup = array();
|
||||
foreach ($parts as $part) {
|
||||
$part = strtolower(trim($part));
|
||||
if (!isset($allowed[$part])) continue;
|
||||
$ret_lookup[$part] = true;
|
||||
}
|
||||
|
||||
if (empty($ret_lookup)) return false;
|
||||
|
||||
$ret_array = array();
|
||||
foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
|
||||
$string = implode(' ', $ret_array);
|
||||
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
44
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php
Normal file
44
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php
Normal file
|
@ -0,0 +1,44 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
|
||||
/**
|
||||
* Validates a MultiLength as defined by the HTML spec.
|
||||
*
|
||||
* A multilength is either a integer (pixel count), a percentage, or
|
||||
* a relative number.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
$parent_result = parent::validate($string, $config, $context);
|
||||
if ($parent_result !== false) return $parent_result;
|
||||
|
||||
$length = strlen($string);
|
||||
$last_char = $string[$length - 1];
|
||||
|
||||
if ($last_char !== '*') return false;
|
||||
|
||||
$int = substr($string, 0, $length - 1);
|
||||
|
||||
if ($int == '') return '*';
|
||||
if (!is_numeric($int)) return false;
|
||||
|
||||
$int = (int) $int;
|
||||
|
||||
if ($int < 0) return false;
|
||||
if ($int == 0) return '0';
|
||||
if ($int == 1) return '*';
|
||||
return ((string) $int) . '*';
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
51
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php
Normal file
51
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php
Normal file
|
@ -0,0 +1,51 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
|
||||
/**
|
||||
* Validates contents based on NMTOKENS attribute type.
|
||||
* @note The only current use for this is the class attribute in HTML
|
||||
* @note Could have some functionality factored out into Nmtoken class
|
||||
* @warning We cannot assume this class will be used only for 'class'
|
||||
* attributes. Not sure how to hook in magic behavior, then.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
// OPTIMIZABLE!
|
||||
// do the preg_match, capture all subpatterns for reformulation
|
||||
|
||||
// we don't support U+00A1 and up codepoints or
|
||||
// escaping because I don't know how to do that with regexps
|
||||
// and plus it would complicate optimization efforts (you never
|
||||
// see that anyway).
|
||||
$matches = array();
|
||||
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||
preg_match_all($pattern, $string, $matches);
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $token) {
|
||||
$new_string .= $token . ' ';
|
||||
}
|
||||
$new_string = rtrim($new_string);
|
||||
|
||||
return $new_string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
37
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php
Normal file
37
lib/htmlpurifier/HTMLPurifier/AttrDef/HTML/Pixels.php
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates an integer representation of pixels according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '0') return $string;
|
||||
if ($string === '') return false;
|
||||
$length = strlen($string);
|
||||
if (substr($string, $length - 2) == 'px') {
|
||||
$string = substr($string, 0, $length - 2);
|
||||
}
|
||||
if (!is_numeric($string)) return false;
|
||||
$int = (int) $string;
|
||||
|
||||
if ($int < 0) return '0';
|
||||
|
||||
// upper-bound value, extremely high values can
|
||||
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
|
||||
// WARNING, above link WILL crash you if you're using Windows
|
||||
|
||||
if ($int > 1200) return '1200';
|
||||
|
||||
return (string) $int;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
75
lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php
Normal file
75
lib/htmlpurifier/HTMLPurifier/AttrDef/Integer.php
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates an integer.
|
||||
* @note While this class was modeled off the CSS definition, no currently
|
||||
* allowed CSS uses this type. The properties that do are: widows,
|
||||
* orphans, z-index, counter-increment, counter-reset. Some of the
|
||||
* HTML attributes, however, find use for a non-negative version of this.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not negative values are allowed
|
||||
*/
|
||||
var $negative = true;
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not zero is allowed
|
||||
*/
|
||||
var $zero = true;
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
var $positive = true;
|
||||
|
||||
/**
|
||||
* @param $negative Bool indicating whether or not negative values are allowed
|
||||
* @param $zero Bool indicating whether or not zero is allowed
|
||||
* @param $positive Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Integer(
|
||||
$negative = true, $zero = true, $positive = true
|
||||
) {
|
||||
$this->negative = $negative;
|
||||
$this->zero = $zero;
|
||||
$this->positive = $positive;
|
||||
}
|
||||
|
||||
function validate($integer, $config, &$context) {
|
||||
|
||||
$integer = $this->parseCDATA($integer);
|
||||
if ($integer === '') return false;
|
||||
|
||||
// we could possibly simply typecast it to integer, but there are
|
||||
// certain fringe cases that must not return an integer.
|
||||
|
||||
// clip leading sign
|
||||
if ( $this->negative && $integer[0] === '-' ) {
|
||||
$digits = substr($integer, 1);
|
||||
if ($digits === '0') $integer = '0'; // rm minus sign for zero
|
||||
} elseif( $this->positive && $integer[0] === '+' ) {
|
||||
$digits = $integer = substr($integer, 1); // rm unnecessary plus
|
||||
} else {
|
||||
$digits = $integer;
|
||||
}
|
||||
|
||||
// test if it's numeric
|
||||
if (!ctype_digit($digits)) return false;
|
||||
|
||||
// perform scope tests
|
||||
if (!$this->zero && $integer == 0) return false;
|
||||
if (!$this->positive && $integer > 0) return false;
|
||||
if (!$this->negative && $integer < 0) return false;
|
||||
|
||||
return $integer;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
75
lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php
Normal file
75
lib/htmlpurifier/HTMLPurifier/AttrDef/Lang.php
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute lang, effectively a language code.
|
||||
* @note Built according to RFC 3066, which obsoleted RFC 1766
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
if (!$string) return false;
|
||||
|
||||
$subtags = explode('-', $string);
|
||||
$num_subtags = count($subtags);
|
||||
|
||||
if ($num_subtags == 0) return false; // sanity check
|
||||
|
||||
// process primary subtag : $subtags[0]
|
||||
$length = strlen($subtags[0]);
|
||||
switch ($length) {
|
||||
case 0:
|
||||
return false;
|
||||
case 1:
|
||||
if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
if (! ctype_alpha($subtags[0]) ) {
|
||||
return false;
|
||||
} elseif (! ctype_lower($subtags[0]) ) {
|
||||
$subtags[0] = strtolower($subtags[0]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
$new_string = $subtags[0];
|
||||
if ($num_subtags == 1) return $new_string;
|
||||
|
||||
// process second subtag : $subtags[1]
|
||||
$length = strlen($subtags[1]);
|
||||
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||
|
||||
$new_string .= '-' . $subtags[1];
|
||||
if ($num_subtags == 2) return $new_string;
|
||||
|
||||
// process all other subtags, index 2 and up
|
||||
for ($i = 2; $i < $num_subtags; $i++) {
|
||||
$length = strlen($subtags[$i]);
|
||||
if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[$i])) {
|
||||
$subtags[$i] = strtolower($subtags[$i]);
|
||||
}
|
||||
$new_string .= '-' . $subtags[$i];
|
||||
}
|
||||
|
||||
return $new_string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
17
lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php
Normal file
17
lib/htmlpurifier/HTMLPurifier/AttrDef/Text.php
Normal file
|
@ -0,0 +1,17 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates arbitrary text according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
return $this->parseCDATA($string);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
296
lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
Normal file
296
lib/htmlpurifier/HTMLPurifier/AttrDef/URI.php
Normal file
|
@ -0,0 +1,296 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string',
|
||||
'Defines through what scheme the output will be served, in order to '.
|
||||
'select the proper object validator when no scheme information is present.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Host', null, 'string/null',
|
||||
'Defines the domain name of the server, so we can determine whether or '.
|
||||
'an absolute URI is from your website or not. Not strictly necessary, '.
|
||||
'as users should be using relative URIs to reference resources on your '.
|
||||
'website. It will, however, let you use absolute URIs to link to '.
|
||||
'subdomains of the domain you post here: i.e. example.com will allow '.
|
||||
'sub.example.com. However, higher up domains will still be excluded: '.
|
||||
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
'links or images outside of your domain will be allowed. Non-linkified '.
|
||||
'URIs will still be preserved. If you want to be able to link to '.
|
||||
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Munge', null, 'string/null',
|
||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||
'the url-encoded original URI should be inserted (sample: '.
|
||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||
'to users (you may also want to add some client side JavaScript to '.
|
||||
'override the text in the statusbar). Warning: many security experts '.
|
||||
'believe that this form of protection does not deter spam-bots. '.
|
||||
'You can also use this directive to redirect users to a splash page '.
|
||||
'telling them they are leaving your website. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'HostBlacklist', array(), 'list',
|
||||
'List of strings that are forbidden in the host of any URI. Use it to '.
|
||||
'kill domain names of spam, etc. Note that it will catch anything in '.
|
||||
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Disable', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds_resource;
|
||||
|
||||
/**
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
|
||||
// We'll write stack-based parsers later, for now, use regexps to
|
||||
// get things working as fast as possible (irony)
|
||||
|
||||
if ($config->get('URI', 'Disable')) return false;
|
||||
|
||||
// parse as CDATA
|
||||
$uri = $this->parseCDATA($uri);
|
||||
|
||||
// fix up percent-encoding
|
||||
$uri = $this->PercentEncoder->normalize($uri);
|
||||
|
||||
// while it would be nice to use parse_url(), that's specifically
|
||||
// for HTTP and thus won't work for our generic URI parsing
|
||||
|
||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
||||
$r_URI = '!'.
|
||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||
'([^?#<>\'"]*)'. // 5. Path
|
||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||
'!';
|
||||
|
||||
$matches = array();
|
||||
$result = preg_match($r_URI, $uri, $matches);
|
||||
|
||||
if (!$result) return false; // invalid URI
|
||||
|
||||
// seperate out parts
|
||||
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
||||
$authority = !empty($matches[3]) ? $matches[4] : null;
|
||||
$path = $matches[5]; // always present, can be empty
|
||||
$query = !empty($matches[6]) ? $matches[7] : null;
|
||||
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
||||
|
||||
|
||||
|
||||
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
||||
if ($scheme !== null) {
|
||||
// no need to validate the scheme's fmt since we do that when we
|
||||
// retrieve the specific scheme object from the registry
|
||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||
} else {
|
||||
$scheme_obj = $registry->getScheme(
|
||||
$config->get('URI', 'DefaultScheme'), $config, $context
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
unset($our_host);
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
|
||||
$HEXDIG = '[A-Fa-f0-9]';
|
||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||
$sub_delims = '!$&\'()'; // needs []
|
||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||
$matches = array();
|
||||
preg_match($r_authority, $authority, $matches);
|
||||
// overloads regexp!
|
||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
||||
$host = !empty($matches[3]) ? $matches[3] : null;
|
||||
$port = !empty($matches[4]) ? $matches[5] : null;
|
||||
|
||||
// validate port
|
||||
if ($port !== null) {
|
||||
$port = (int) $port;
|
||||
if ($port < 1 || $port > 65535) $port = null;
|
||||
}
|
||||
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||
|
||||
// more lenient absolute checking
|
||||
if (isset($our_host)) {
|
||||
$host_parts = array_reverse(explode('.', $host));
|
||||
// could be cached
|
||||
$our_host_parts = array_reverse(explode('.', $our_host));
|
||||
foreach ($our_host_parts as $i => $discard) {
|
||||
if (!isset($host_parts[$i])) return false;
|
||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||
}
|
||||
}
|
||||
|
||||
// userinfo and host are validated within the regexp
|
||||
|
||||
} else {
|
||||
$port = $host = $userinfo = null;
|
||||
}
|
||||
|
||||
|
||||
// query and fragment are quite simple in terms of definition:
|
||||
// *( pchar / "/" / "?" ), so define their validation routines
|
||||
// when we start fixing percent encoding
|
||||
|
||||
|
||||
|
||||
// path gets to be validated against a hodge-podge of rules depending
|
||||
// on the status of authority and scheme, but it's not that important,
|
||||
// esp. since it won't be applicable to everyone
|
||||
|
||||
|
||||
|
||||
// okay, now we defer execution to the subobject for more processing
|
||||
// note that $fragment is omitted
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
$scheme_obj->validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context
|
||||
);
|
||||
|
||||
|
||||
// reconstruct authority
|
||||
$authority = null;
|
||||
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
|
||||
$authority = '';
|
||||
if($userinfo !== null) $authority .= $userinfo . '@';
|
||||
$authority .= $host;
|
||||
if($port !== null) $authority .= ':' . $port;
|
||||
}
|
||||
|
||||
// reconstruct the result
|
||||
$result = '';
|
||||
if ($scheme !== null) $result .= "$scheme:";
|
||||
if ($authority !== null) $result .= "//$authority";
|
||||
$result .= $path;
|
||||
if ($query !== null) $result .= "?$query";
|
||||
if ($fragment !== null) $result .= "#$fragment";
|
||||
|
||||
// munge if necessary
|
||||
$munge = $config->get('URI', 'Munge');
|
||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
||||
if ($authority !== null) {
|
||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks a host against an array blacklist
|
||||
* @param $host Host to check
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
* @param $context HTMLPurifier_Context instance
|
||||
* @return bool Is spam?
|
||||
*/
|
||||
function checkBlacklist($host, &$config, &$context) {
|
||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||
if (!empty($blacklist)) {
|
||||
foreach($blacklist as $blacklisted_host_fragment) {
|
||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
17
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
Normal file
17
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Email.php
Normal file
|
@ -0,0 +1,17 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Unpacks a mailbox into its display-name and address
|
||||
*/
|
||||
function unpack($string) {
|
||||
// needs to be implemented
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
|
||||
/**
|
||||
* Primitive email validation class based on the regexp found at
|
||||
* http://www.regular-expressions.info/email.html
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
// no support for named mailboxes i.e. "Bob <bob@example.com>"
|
||||
// that needs more percent encoding to be done
|
||||
if ($string == '') return false;
|
||||
$string = trim($string);
|
||||
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
|
||||
return $result ? $string : false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
54
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php
Normal file
54
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php
Normal file
|
@ -0,0 +1,54 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
|
||||
/**
|
||||
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
|
||||
*/
|
||||
var $ipv4;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
|
||||
*/
|
||||
var $ipv6;
|
||||
|
||||
function HTMLPurifier_AttrDef_URI_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$length = strlen($string);
|
||||
if ($string === '') return '';
|
||||
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
|
||||
//IPv6
|
||||
$ip = substr($string, 1, $length - 2);
|
||||
$valid = $this->ipv6->validate($ip, $config, $context);
|
||||
if ($valid === false) return false;
|
||||
return '['. $valid . ']';
|
||||
}
|
||||
|
||||
// need to do checks on unusual encodings too
|
||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||
if ($ipv4 !== false) return $ipv4;
|
||||
|
||||
// validate a domain name here, do filtering, etc etc etc
|
||||
|
||||
// We could use this, but it would break I18N domain names
|
||||
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
|
||||
//if (!$match) return false;
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
36
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php
Normal file
36
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv4.php
Normal file
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates an IPv4 address
|
||||
* @author Feyd @ forums.devnetwork.net (public domain)
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* IPv4 regex, protected so that IPv6 can reuse it
|
||||
* @protected
|
||||
*/
|
||||
var $ip4;
|
||||
|
||||
function HTMLPurifier_AttrDef_URI_IPv4() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
|
||||
{
|
||||
return $aIP;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
99
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php
Normal file
99
lib/htmlpurifier/HTMLPurifier/AttrDef/URI/IPv6.php
Normal file
|
@ -0,0 +1,99 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
|
||||
/**
|
||||
* Validates an IPv6 address.
|
||||
* @author Feyd @ forums.devnetwork.net (public domain)
|
||||
* @note This function requires brackets to have been removed from address
|
||||
* in URI.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
{
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
|
||||
$original = $aIP;
|
||||
|
||||
$hex = '[0-9a-fA-F]';
|
||||
$blk = '(?:' . $hex . '{1,4})';
|
||||
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
|
||||
|
||||
// prefix check
|
||||
if (strpos($aIP, '/') !== false)
|
||||
{
|
||||
if (preg_match('#' . $pre . '$#s', $aIP, $find))
|
||||
{
|
||||
$aIP = substr($aIP, 0, 0-strlen($find[0]));
|
||||
unset($find);
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// IPv4-compatiblity check
|
||||
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
|
||||
{
|
||||
$aIP = substr($aIP, 0, 0-strlen($find[0]));
|
||||
$ip = explode('.', $find[0]);
|
||||
$ip = array_map('dechex', $ip);
|
||||
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
|
||||
unset($find, $ip);
|
||||
}
|
||||
|
||||
// compression check
|
||||
$aIP = explode('::', $aIP);
|
||||
$c = count($aIP);
|
||||
if ($c > 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
elseif ($c == 2)
|
||||
{
|
||||
list($first, $second) = $aIP;
|
||||
$first = explode(':', $first);
|
||||
$second = explode(':', $second);
|
||||
|
||||
if (count($first) + count($second) > 8)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
while(count($first) < 8)
|
||||
{
|
||||
array_push($first, '0');
|
||||
}
|
||||
|
||||
array_splice($first, 8 - count($second), 8, $second);
|
||||
$aIP = $first;
|
||||
unset($first,$second);
|
||||
}
|
||||
else
|
||||
{
|
||||
$aIP = explode(':', $aIP[0]);
|
||||
}
|
||||
$c = count($aIP);
|
||||
|
||||
if ($c != 8)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// All the pieces should be 16-bit hex strings. Are they?
|
||||
foreach ($aIP as $piece)
|
||||
{
|
||||
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return $original;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
34
lib/htmlpurifier/HTMLPurifier/AttrTransform.php
Normal file
34
lib/htmlpurifier/HTMLPurifier/AttrTransform.php
Normal file
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Processes an entire attribute array for corrections needing multiple values.
|
||||
*
|
||||
* Occasionally, a certain attribute will need to be removed and popped onto
|
||||
* another value. Instead of creating a complex return syntax for
|
||||
* HTMLPurifier_AttrDef, we just pass the whole attribute array to a
|
||||
* specialized object and have that do the special work. That is the
|
||||
* family of HTMLPurifier_AttrTransform.
|
||||
*
|
||||
* An attribute transformation can be assigned to run before or after
|
||||
* HTMLPurifier_AttrDef validation. See HTMLPurifier_HTMLDefinition for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
/**
|
||||
* Abstract: makes changes to the attributes dependent on multiple values.
|
||||
*
|
||||
* @param $attr Assoc array of attributes, usually from
|
||||
* HTMLPurifier_Token_Tag::$attr
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
* @returns Processed attribute array.
|
||||
*/
|
||||
function transform($attr, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
31
lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php
Normal file
31
lib/htmlpurifier/HTMLPurifier/AttrTransform/BdoDir.php
Normal file
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
// this MUST be placed in post, as it assumes that any value in dir is valid
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
||||
'Defines the default text direction (ltr or rtl) of the document '.
|
||||
'being parsed. This generally is the same as the value of the dir '.
|
||||
'attribute in HTML, or ltr if that is not specified.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
||||
);
|
||||
|
||||
/**
|
||||
* Post-trasnform that ensures that bdo tags have the dir attribute set.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (isset($attr['dir'])) return $attr;
|
||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
28
lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php
Normal file
28
lib/htmlpurifier/HTMLPurifier/AttrTransform/BgColor.php
Normal file
|
@ -0,0 +1,28 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated bgcolor attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BgColor
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['bgcolor'])) return $attr;
|
||||
|
||||
$bgcolor = $attr['bgcolor'];
|
||||
unset($attr['bgcolor']);
|
||||
// some validation should happen here
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "background-color:$bgcolor;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
28
lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php
Normal file
28
lib/htmlpurifier/HTMLPurifier/AttrTransform/Border.php
Normal file
|
@ -0,0 +1,28 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated border attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Border
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['border'])) return $attr;
|
||||
|
||||
$border_width = $attr['border'];
|
||||
unset($attr['border']);
|
||||
// some validation should happen here
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "border:{$border_width}px solid;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
50
lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php
Normal file
50
lib/htmlpurifier/HTMLPurifier/AttrTransform/ImgRequired.php
Normal file
|
@ -0,0 +1,50 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultInvalidImage', '', 'string',
|
||||
'This is the default image an img tag will be pointed to if it does '.
|
||||
'not have a valid src attribute. In future versions, we may allow the '.
|
||||
'image tag to be removed completely, but due to design issues, this is '.
|
||||
'not possible right now.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
||||
'This is the content of the alt tag of an invalid image if the user '.
|
||||
'had not previously specified an alt attribute. It has no effect when the '.
|
||||
'image is valid but there was no alt attribute present.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Post-transform that ensures the required attrs of img (alt and src) are set
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
$src = true;
|
||||
if (!isset($attr['src'])) {
|
||||
$attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
|
||||
$src = false;
|
||||
}
|
||||
|
||||
if (!isset($attr['alt'])) {
|
||||
if ($src) {
|
||||
$attr['alt'] = basename($attr['src']);
|
||||
} else {
|
||||
$attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
|
||||
}
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
30
lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php
Normal file
30
lib/htmlpurifier/HTMLPurifier/AttrTransform/Lang.php
Normal file
|
@ -0,0 +1,30 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Post-transform that copies lang's value to xml:lang (and vice-versa)
|
||||
* @note Theoretically speaking, this could be a pre-transform, but putting
|
||||
* post is more efficient.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
||||
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
||||
|
||||
if ($lang !== false && $xml_lang === false) {
|
||||
$attr['xml:lang'] = $lang;
|
||||
} elseif ($xml_lang !== false) {
|
||||
$attr['lang'] = $xml_lang;
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
33
lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php
Normal file
33
lib/htmlpurifier/HTMLPurifier/AttrTransform/Length.php
Normal file
|
@ -0,0 +1,33 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Class for handling width/height length attribute transformations to CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $cssName;
|
||||
|
||||
function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
|
||||
$this->name = $name;
|
||||
$this->cssName = $css_name ? $css_name : $name;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr[$this->name])) return $attr;
|
||||
$length = $attr[$this->name];
|
||||
unset($attr[$this->name]);
|
||||
if(ctype_digit($length)) $length .= 'px';
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = $this->cssName . ":$length;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
31
lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php
Normal file
31
lib/htmlpurifier/HTMLPurifier/AttrTransform/Name.php
Normal file
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated name attribute to ID if necessary
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
|
||||
$name = $attr['name'];
|
||||
unset($attr['name']);
|
||||
|
||||
if (isset($attr['id'])) {
|
||||
// ID already set, discard name
|
||||
return $attr;
|
||||
}
|
||||
|
||||
$attr['id'] = $name;
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
36
lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php
Normal file
36
lib/htmlpurifier/HTMLPurifier/AttrTransform/TextAlign.php
Normal file
|
@ -0,0 +1,36 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated align attribute to text-align.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TextAlign
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['align'])) return $attr;
|
||||
|
||||
$align = strtolower(trim($attr['align']));
|
||||
unset($attr['align']);
|
||||
|
||||
$values = array('left' => 1,
|
||||
'right' => 1,
|
||||
'center' => 1,
|
||||
'justify' => 1);
|
||||
|
||||
if (!isset($values[$align])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "text-align:$align;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
41
lib/htmlpurifier/HTMLPurifier/AttrTypes.php
Normal file
41
lib/htmlpurifier/HTMLPurifier/AttrTypes.php
Normal file
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Text.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
class HTMLPurifier_AttrTypes
|
||||
{
|
||||
/**
|
||||
* Lookup array of attribute string identifiers to concrete implementations
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array
|
||||
*/
|
||||
function HTMLPurifier_AttrTypes() {
|
||||
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
|
||||
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
|
||||
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
|
||||
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
|
||||
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
|
||||
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
213
lib/htmlpurifier/HTMLPurifier/CSSDefinition.php
Normal file
213
lib/htmlpurifier/HTMLPurifier/CSSDefinition.php
Normal file
|
@ -0,0 +1,213 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
|
||||
/**
|
||||
* Defines allowed CSS attributes and what their values are.
|
||||
* @see HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
class HTMLPurifier_CSSDefinition
|
||||
{
|
||||
|
||||
/**
|
||||
* Assoc array of attribute name to definition object.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array. The meat of this class.
|
||||
*/
|
||||
function setup($config) {
|
||||
|
||||
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('left', 'right', 'center', 'justify'), false);
|
||||
|
||||
$border_style =
|
||||
$this->info['border-bottom-style'] =
|
||||
$this->info['border-right-style'] =
|
||||
$this->info['border-left-style'] =
|
||||
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
|
||||
'groove', 'ridge', 'inset', 'outset'), false);
|
||||
|
||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
|
||||
|
||||
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('none', 'left', 'right', 'both'), false);
|
||||
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('none', 'left', 'right'), false);
|
||||
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('normal', 'italic', 'oblique'), false);
|
||||
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('normal', 'small-caps'), false);
|
||||
|
||||
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
|
||||
array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('none')),
|
||||
new HTMLPurifier_AttrDef_CSS_URI()
|
||||
)
|
||||
);
|
||||
|
||||
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('inside', 'outside'), false);
|
||||
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('disc', 'circle', 'square', 'decimal', 'lower-roman',
|
||||
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
|
||||
$this->info['list-style-image'] = $uri_or_none;
|
||||
|
||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
|
||||
|
||||
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
|
||||
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
|
||||
$this->info['background-image'] = $uri_or_none;
|
||||
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
|
||||
);
|
||||
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('scroll', 'fixed')
|
||||
);
|
||||
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
|
||||
|
||||
$border_color =
|
||||
$this->info['border-top-color'] =
|
||||
$this->info['border-bottom-color'] =
|
||||
$this->info['border-left-color'] =
|
||||
$this->info['border-right-color'] =
|
||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('transparent')),
|
||||
new HTMLPurifier_AttrDef_CSS_Color()
|
||||
));
|
||||
|
||||
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||
|
||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
|
||||
|
||||
$border_width =
|
||||
$this->info['border-top-width'] =
|
||||
$this->info['border-bottom-width'] =
|
||||
$this->info['border-left-width'] =
|
||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
|
||||
));
|
||||
|
||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
||||
|
||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
|
||||
'small', 'medium', 'large', 'x-large', 'xx-large',
|
||||
'larger', 'smaller')),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$margin =
|
||||
$this->info['margin-top'] =
|
||||
$this->info['margin-bottom'] =
|
||||
$this->info['margin-left'] =
|
||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
|
||||
|
||||
// non-negative
|
||||
$padding =
|
||||
$this->info['padding-top'] =
|
||||
$this->info['padding-bottom'] =
|
||||
$this->info['padding-left'] =
|
||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
|
||||
|
||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
$this->info['width'] =
|
||||
$this->info['height'] =
|
||||
new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
||||
|
||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||
|
||||
// this could use specialized code
|
||||
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300',
|
||||
'400', '500', '600', '700', '800', '900'), false);
|
||||
|
||||
// MUST be called after other font properties, as it references
|
||||
// a CSSDefinition object
|
||||
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
|
||||
|
||||
// same here
|
||||
$this->info['border'] =
|
||||
$this->info['border-bottom'] =
|
||||
$this->info['border-top'] =
|
||||
$this->info['border-left'] =
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||
|
||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'collapse', 'seperate'));
|
||||
|
||||
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'top', 'bottom'));
|
||||
|
||||
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'auto', 'fixed'));
|
||||
|
||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
|
||||
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
55
lib/htmlpurifier/HTMLPurifier/ChildDef.php
Normal file
55
lib/htmlpurifier/HTMLPurifier/ChildDef.php
Normal file
|
@ -0,0 +1,55 @@
|
|||
<?php
|
||||
|
||||
// HTMLPurifier_ChildDef and inheritance have three types of output:
|
||||
// true = leave nodes as is
|
||||
// false = delete parent node and all children
|
||||
// array(...) = replace children nodes with these
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||
'When true, a child is found that is not allowed in the context of the '.
|
||||
'parent element will be transformed into text as if it were ASCII. When '.
|
||||
'false, that element and all internal tags will be dropped, though text '.
|
||||
'will be preserved. There is no option for dropping the element but '.
|
||||
'preserving child nodes.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Defines allowed child nodes and validates tokens against it.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Type of child definition, usually right-most part of class name lowercase.
|
||||
* Used occasionally in terms of context.
|
||||
* @public
|
||||
*/
|
||||
var $type;
|
||||
|
||||
/**
|
||||
* Bool that indicates whether or not an empty array of children is okay
|
||||
*
|
||||
* This is necessary for redundant checking when changes affecting
|
||||
* a child node may cause a parent node to now be disallowed.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
var $allow_empty;
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
* @public
|
||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
* @return bool true to leave nodes as is
|
||||
* @return bool false to remove parent node
|
||||
* @return array of replacement child tokens
|
||||
*/
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
51
lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php
Normal file
51
lib/htmlpurifier/HTMLPurifier/ChildDef/Chameleon.php
Normal file
|
@ -0,0 +1,51 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
* @public
|
||||
*/
|
||||
var $inline;
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
* @public
|
||||
*/
|
||||
var $block;
|
||||
|
||||
var $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
} else {
|
||||
return $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
75
lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php
Normal file
75
lib/htmlpurifier/HTMLPurifier/ChildDef/Custom.php
Normal file
|
@ -0,0 +1,75 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
22
lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php
Normal file
22
lib/htmlpurifier/HTMLPurifier/ChildDef/Empty.php
Normal file
|
@ -0,0 +1,22 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
23
lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
Normal file
23
lib/htmlpurifier/HTMLPurifier/ChildDef/Optional.php
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
107
lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php
Normal file
107
lib/htmlpurifier/HTMLPurifier/ChildDef/Required.php
Normal file
|
@ -0,0 +1,107 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
* @public
|
||||
*/
|
||||
var $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$keys = array_keys($elements);
|
||||
if ($keys == array_keys($keys)) {
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
}
|
||||
}
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token->type == 'text') {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
76
lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
76
lib/htmlpurifier/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
|
@ -0,0 +1,76 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $real_elements;
|
||||
var $fake_elements;
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
$this->elements = $this->real_elements;
|
||||
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
$depth = 0;
|
||||
$ret = array();
|
||||
|
||||
// assuming that there are no comment tokens
|
||||
foreach ($result as $i => $token) {
|
||||
$token = $result[$i];
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (
|
||||
$token->type == 'text' ||
|
||||
!isset($this->elements[$token->name])
|
||||
) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if (isset($this->elements[$token->name])) {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret[] = $token;
|
||||
if ($token->type == 'start') $depth++;
|
||||
if ($token->type == 'end') $depth--;
|
||||
}
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
142
lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php
Normal file
142
lib/htmlpurifier/HTMLPurifier/ChildDef/Table.php
Normal file
|
@ -0,0 +1,142 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if ($token->type == 'text' && $token->is_whitespace) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
222
lib/htmlpurifier/HTMLPurifier/Config.php
Normal file
222
lib/htmlpurifier/HTMLPurifier/Config.php
Normal file
|
@ -0,0 +1,222 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Configuration object that triggers customizable behavior.
|
||||
*
|
||||
* @warning This class is strongly defined: that means that the class
|
||||
* will fail if an undefined directive is retrieved or set.
|
||||
*
|
||||
* @note Many classes that could (although many times don't) use the
|
||||
* configuration object make it a mandatory parameter. This is
|
||||
* because a configuration object should always be forwarded,
|
||||
* otherwise, you run the risk of missing a parameter and then
|
||||
* being stumped when a configuration directive doesn't work.
|
||||
*/
|
||||
class HTMLPurifier_Config
|
||||
{
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
*/
|
||||
var $conf;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
*/
|
||||
var $def;
|
||||
|
||||
/**
|
||||
* Cached instance of HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
var $html_definition;
|
||||
|
||||
/**
|
||||
* Cached instance of HTMLPurifier_CSSDefinition
|
||||
*/
|
||||
var $css_definition;
|
||||
|
||||
/**
|
||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||
* are allowed.
|
||||
*/
|
||||
function HTMLPurifier_Config(&$definition) {
|
||||
$this->conf = $definition->defaults; // set up, copy in defaults
|
||||
$this->def = $definition; // keep a copy around for checking
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a config object based on a mixed var
|
||||
* @static
|
||||
* @param mixed $config Variable that defines the state of the config
|
||||
* object. Can be: a HTMLPurifier_Config() object,
|
||||
* an array of directives based on loadArray(),
|
||||
* or a string filename of an ini file.
|
||||
* @return Configured HTMLPurifier_Config object
|
||||
*/
|
||||
function create($config) {
|
||||
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||
$ret = HTMLPurifier_Config::createDefault();
|
||||
if (is_string($config)) $ret->loadIni($config);
|
||||
elseif (is_array($config)) $ret->loadArray($config);
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a default configuration object.
|
||||
* @static
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
*/
|
||||
function createDefault() {
|
||||
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||
$config = new HTMLPurifier_Config($definition);
|
||||
return $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives a value from the configuration.
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
*/
|
||||
function get($namespace, $key, $from_alias = false) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot retrieve value of undefined directive',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if ($this->def->info[$namespace][$key]->class == 'alias') {
|
||||
trigger_error('Cannot get value from aliased directive, use real name',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
return $this->conf[$namespace][$key];
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives an array of directives to values from a given namespace
|
||||
* @param $namespace String namespace
|
||||
*/
|
||||
function getBatch($namespace) {
|
||||
if (!isset($this->def->info[$namespace])) {
|
||||
trigger_error('Cannot retrieve undefined namespace',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
return $this->conf[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a value to configuration.
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
* @param $value Mixed value
|
||||
*/
|
||||
function set($namespace, $key, $value, $from_alias = false) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot set undefined directive to value',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if ($this->def->info[$namespace][$key]->class == 'alias') {
|
||||
if ($from_alias) {
|
||||
trigger_error('Double-aliases not allowed, please fix '.
|
||||
'ConfigSchema bug');
|
||||
}
|
||||
$this->set($this->def->info[$namespace][$key]->namespace,
|
||||
$this->def->info[$namespace][$key]->name,
|
||||
$value, true);
|
||||
return;
|
||||
}
|
||||
$value = $this->def->validate(
|
||||
$value,
|
||||
$this->def->info[$namespace][$key]->type,
|
||||
$this->def->info[$namespace][$key]->allow_null
|
||||
);
|
||||
if (is_string($value)) {
|
||||
// resolve value alias if defined
|
||||
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
||||
$value = $this->def->info[$namespace][$key]->aliases[$value];
|
||||
}
|
||||
if ($this->def->info[$namespace][$key]->allowed !== true) {
|
||||
// check to see if the value is allowed
|
||||
if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
|
||||
trigger_error('Value not supported', E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($this->def->isError($value)) {
|
||||
trigger_error('Value is of invalid type', E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
$this->conf[$namespace][$key] = $value;
|
||||
if ($namespace == 'HTML' || $namespace == 'Attr') {
|
||||
// reset HTML definition if relevant attributes changed
|
||||
$this->html_definition = null;
|
||||
}
|
||||
if ($namespace == 'CSS') {
|
||||
$this->css_definition = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves reference to the HTML definition.
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
if (
|
||||
empty($this->html_definition) || // hasn't ever been setup
|
||||
($raw && $this->html_definition->setup) // requesting new one
|
||||
) {
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||
if ($raw) return $this->html_definition; // no setup!
|
||||
}
|
||||
if (!$this->html_definition->setup) $this->html_definition->setup();
|
||||
return $this->html_definition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function &getCSSDefinition() {
|
||||
if ($this->css_definition === null) {
|
||||
$this->css_definition = new HTMLPurifier_CSSDefinition();
|
||||
$this->css_definition->setup($this);
|
||||
}
|
||||
return $this->css_definition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an array with the following structure:
|
||||
* Namespace.Directive => Value
|
||||
* @param $config_array Configuration associative array
|
||||
*/
|
||||
function loadArray($config_array) {
|
||||
foreach ($config_array as $key => $value) {
|
||||
$key = str_replace('_', '.', $key);
|
||||
if (strpos($key, '.') !== false) {
|
||||
// condensed form
|
||||
list($namespace, $directive) = explode('.', $key);
|
||||
$this->set($namespace, $directive, $value);
|
||||
} else {
|
||||
$namespace = $key;
|
||||
$namespace_values = $value;
|
||||
foreach ($namespace_values as $directive => $value) {
|
||||
$this->set($namespace, $directive, $value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an ini file
|
||||
* @param $filename Name of ini file
|
||||
*/
|
||||
function loadIni($filename) {
|
||||
$array = parse_ini_file($filename, true);
|
||||
$this->loadArray($array);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
10
lib/htmlpurifier/HTMLPurifier/ConfigDef.php
Normal file
10
lib/htmlpurifier/HTMLPurifier/ConfigDef.php
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef {
|
||||
var $class = false;
|
||||
}
|
||||
|
||||
?>
|
74
lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php
Normal file
74
lib/htmlpurifier/HTMLPurifier/ConfigDef/Directive.php
Normal file
|
@ -0,0 +1,74 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
* @note This structure does not contain default values
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
|
||||
var $class = 'directive';
|
||||
|
||||
function HTMLPurifier_ConfigDef_Directive(
|
||||
$type = null,
|
||||
$descriptions = null,
|
||||
$allow_null = null,
|
||||
$allowed = null,
|
||||
$aliases = null
|
||||
) {
|
||||
if ( $type !== null) $this->type = $type;
|
||||
if ($descriptions !== null) $this->descriptions = $descriptions;
|
||||
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
||||
if ( $allowed !== null) $this->allowed = $allowed;
|
||||
if ( $aliases !== null) $this->aliases = $aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allowed type of the directive. Values are:
|
||||
* - string
|
||||
* - istring (case insensitive string)
|
||||
* - int
|
||||
* - float
|
||||
* - bool
|
||||
* - lookup (array of value => true)
|
||||
* - list (regular numbered index array)
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no effect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
27
lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
27
lib/htmlpurifier/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
|
@ -0,0 +1,27 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing a directive alias
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
var $class = 'alias';
|
||||
|
||||
/**
|
||||
* Namespace being aliased to
|
||||
*/
|
||||
var $namespace;
|
||||
/**
|
||||
* Directive being aliased to
|
||||
*/
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
|
||||
$this->namespace = $namespace;
|
||||
$this->name = $name;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
23
lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php
Normal file
23
lib/htmlpurifier/HTMLPurifier/ConfigDef/Namespace.php
Normal file
|
@ -0,0 +1,23 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||
|
||||
function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||
$this->description = $description;
|
||||
}
|
||||
|
||||
var $class = 'namespace';
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
?>
|
386
lib/htmlpurifier/HTMLPurifier/ConfigSchema.php
Normal file
386
lib/htmlpurifier/HTMLPurifier/ConfigSchema.php
Normal file
|
@ -0,0 +1,386 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Error.php';
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
* @todo The ability to define things multiple times is confusing and should
|
||||
* be factored out to its own function named registerDependency() or
|
||||
* addNote(), where only the namespace.name and an extra descriptions
|
||||
* documenting the nature of the dependency are needed. Since it's
|
||||
* possible that the dependency is registered before the configuration
|
||||
* is defined, deferring it to some sort of cache until it actually
|
||||
* gets defined would be wise, keeping it opaque until it does get
|
||||
* defined. We could add a finalize() method which would cause it to
|
||||
* error out if we get a dangling dependency. It's difficult, however,
|
||||
* to know whether or not it's a dependency, or a codependency, that is
|
||||
* neither of them fully depends on it. Where does the configuration go
|
||||
* then? This could be partially resolved by allowing blanket definitions
|
||||
* and then splitting them up into finer-grained versions, however, there
|
||||
* might be implementation difficulties in ini files regarding order of
|
||||
* execution.
|
||||
*/
|
||||
class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defaults of the directives and namespaces.
|
||||
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
|
||||
*/
|
||||
var $defaults = array();
|
||||
|
||||
/**
|
||||
* Definition of the directives.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Definition of namespaces.
|
||||
*/
|
||||
var $info_namespace = array();
|
||||
|
||||
/**
|
||||
* Lookup table of allowed types.
|
||||
*/
|
||||
var $types = array(
|
||||
'string' => 'String',
|
||||
'istring' => 'Case-insensitive string',
|
||||
'int' => 'Integer',
|
||||
'float' => 'Float',
|
||||
'bool' => 'Boolean',
|
||||
'lookup' => 'Lookup array',
|
||||
'list' => 'Array list',
|
||||
'hash' => 'Associative array',
|
||||
'mixed' => 'Mixed'
|
||||
);
|
||||
|
||||
/**
|
||||
* Initializes the default namespaces.
|
||||
*/
|
||||
function initialize() {
|
||||
$this->defineNamespace('Core', 'Core features that are always available.');
|
||||
$this->defineNamespace('Attr', 'Features regarding attribute validation.');
|
||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves an instance of the application-wide configuration definition.
|
||||
* @static
|
||||
*/
|
||||
function &instance($prototype = null) {
|
||||
static $instance;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
} elseif ($instance === null || $prototype === true) {
|
||||
$instance = new HTMLPurifier_ConfigSchema();
|
||||
$instance->initialize();
|
||||
}
|
||||
return $instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a directive for configuration
|
||||
* @static
|
||||
* @warning Will fail of directive's namespace is defined
|
||||
* @param $namespace Namespace the directive is in
|
||||
* @param $name Key of directive
|
||||
* @param $default Default value of directive
|
||||
* @param $type Allowed type of the directive. See
|
||||
* HTMLPurifier_DirectiveDef::$type for allowed values
|
||||
* @param $description Description of directive for documentation
|
||||
*/
|
||||
function define(
|
||||
$namespace, $name, $default, $type,
|
||||
$description
|
||||
) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive for undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
if (
|
||||
$def->info[$namespace][$name]->type !== $type ||
|
||||
$def->defaults[$namespace][$name] !== $default
|
||||
) {
|
||||
trigger_error('Inconsistent default or type, cannot redefine');
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// process modifiers
|
||||
$type_values = explode('/', $type, 2);
|
||||
$type = $type_values[0];
|
||||
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||
$allow_null = ($modifier === 'null');
|
||||
|
||||
if (!isset($def->types[$type])) {
|
||||
trigger_error('Invalid type for configuration directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$default = $def->validate($default, $type, $allow_null);
|
||||
if ($def->isError($default)) {
|
||||
trigger_error('Default value does not match directive type',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_Directive();
|
||||
$def->info[$namespace][$name]->type = $type;
|
||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||
$def->defaults[$namespace][$name] = $default;
|
||||
}
|
||||
$backtrace = debug_backtrace();
|
||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
||||
$line = $backtrace[0]['line'];
|
||||
$def->info[$namespace][$name]->addDescription($file,$line,$description);
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a namespace for directives to be put into.
|
||||
* @static
|
||||
* @param $namespace Namespace's name
|
||||
* @param $description Description of the namespace
|
||||
*/
|
||||
function defineNamespace($namespace, $description) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($namespace)) {
|
||||
trigger_error('Namespace name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace] = array();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||
$def->info_namespace[$namespace]->description = $description;
|
||||
$def->defaults[$namespace] = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a directive value alias.
|
||||
*
|
||||
* Directive value aliases are convenient for developers because it lets
|
||||
* them set a directive to several values and get the same result.
|
||||
* @static
|
||||
* @param $namespace Directive's namespace
|
||||
* @param $name Name of Directive
|
||||
* @param $alias Name of aliased value
|
||||
* @param $real Value aliased value will be converted into
|
||||
*/
|
||||
function defineValueAliases($namespace, $name, $aliases) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot set value alias for non-existant directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
foreach ($aliases as $alias => $real) {
|
||||
if (!$def->info[$namespace][$name] !== true &&
|
||||
!isset($def->info[$namespace][$name]->allowed[$real])
|
||||
) {
|
||||
trigger_error('Cannot define alias to value that is not allowed',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
|
||||
trigger_error('Cannot define alias over allowed value',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name]->aliases[$alias] = $real;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a set of allowed values for a directive.
|
||||
* @static
|
||||
* @param $namespace Namespace of directive
|
||||
* @param $name Name of directive
|
||||
* @param $allowed_values Arraylist of allowed values
|
||||
*/
|
||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define allowed values for undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$directive =& $def->info[$namespace][$name];
|
||||
$type = $directive->type;
|
||||
if ($type != 'string' && $type != 'istring') {
|
||||
trigger_error('Cannot define allowed values for directive whose type is not string',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($directive->allowed === true) {
|
||||
$directive->allowed = array();
|
||||
}
|
||||
foreach ($allowed_values as $value) {
|
||||
$directive->allowed[$value] = true;
|
||||
}
|
||||
if ($def->defaults[$namespace][$name] !== null &&
|
||||
!isset($directive->allowed[$def->defaults[$namespace][$name]])) {
|
||||
trigger_error('Default value must be in allowed range of variables',
|
||||
E_USER_ERROR);
|
||||
$directive->allowed = true; // undo undo!
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a directive alias for backwards compatibility
|
||||
* @static
|
||||
* @param $namespace
|
||||
* @param $name Directive that will be aliased
|
||||
* @param $new_namespace
|
||||
* @param $new_name Directive that the alias will be to
|
||||
*/
|
||||
function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive alias in undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define alias over directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!isset($def->info[$new_namespace][$new_name])) {
|
||||
trigger_error('Cannot define alias to undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
|
||||
trigger_error('Cannot define alias to alias',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||
$new_namespace, $new_name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate a variable according to type. Return null if invalid.
|
||||
*/
|
||||
function validate($var, $type, $allow_null = false) {
|
||||
if (!isset($this->types[$type])) {
|
||||
trigger_error('Invalid type', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($allow_null && $var === null) return null;
|
||||
switch ($type) {
|
||||
case 'mixed':
|
||||
return $var;
|
||||
case 'istring':
|
||||
case 'string':
|
||||
if (!is_string($var)) break;
|
||||
if ($type === 'istring') $var = strtolower($var);
|
||||
return $var;
|
||||
case 'int':
|
||||
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
||||
elseif (!is_int($var)) break;
|
||||
return $var;
|
||||
case 'float':
|
||||
if (is_string($var) && is_numeric($var)) $var = (float) $var;
|
||||
elseif (!is_float($var)) break;
|
||||
return $var;
|
||||
case 'bool':
|
||||
if (is_int($var) && ($var === 0 || $var === 1)) {
|
||||
$var = (bool) $var;
|
||||
} elseif (is_string($var)) {
|
||||
if ($var == 'on' || $var == 'true' || $var == '1') {
|
||||
$var = true;
|
||||
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
|
||||
$var = false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} elseif (!is_bool($var)) break;
|
||||
return $var;
|
||||
case 'list':
|
||||
case 'hash':
|
||||
case 'lookup':
|
||||
if (is_string($var)) {
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
// remove spaces
|
||||
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||
}
|
||||
if (!is_array($var)) break;
|
||||
$keys = array_keys($var);
|
||||
if ($keys === array_keys($keys)) {
|
||||
if ($type == 'list') return $var;
|
||||
elseif ($type == 'lookup') {
|
||||
$new = array();
|
||||
foreach ($var as $key) {
|
||||
$new[$key] = true;
|
||||
}
|
||||
return $new;
|
||||
} else break;
|
||||
}
|
||||
if ($type === 'lookup') {
|
||||
foreach ($var as $key => $value) {
|
||||
$var[$key] = true;
|
||||
}
|
||||
}
|
||||
return $var;
|
||||
}
|
||||
$error = new HTMLPurifier_Error();
|
||||
return $error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes an absolute path and munges it into a more manageable relative path
|
||||
*/
|
||||
function mungeFilename($filename) {
|
||||
$offset = strrpos($filename, 'HTMLPurifier');
|
||||
$filename = substr($filename, $offset);
|
||||
$filename = str_replace('\\', '/', $filename);
|
||||
return $filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if var is an HTMLPurifier_Error object
|
||||
*/
|
||||
function isError($var) {
|
||||
if (!is_object($var)) return false;
|
||||
if (!is_a($var, 'HTMLPurifier_Error')) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
148
lib/htmlpurifier/HTMLPurifier/ContentSets.php
Normal file
148
lib/htmlpurifier/HTMLPurifier/ContentSets.php
Normal file
|
@ -0,0 +1,148 @@
|
|||
<?php
|
||||
|
||||
// common defs that we'll support by default
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
|
||||
class HTMLPurifier_ContentSets
|
||||
{
|
||||
|
||||
/**
|
||||
* List of content set strings (pipe seperators) indexed by name.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* List of content set lookups (element => true) indexed by name.
|
||||
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
|
||||
* @public
|
||||
*/
|
||||
var $lookup = array();
|
||||
|
||||
/**
|
||||
* Synchronized list of defined content sets (keys of info)
|
||||
*/
|
||||
var $keys = array();
|
||||
/**
|
||||
* Synchronized list of defined content values (values of info)
|
||||
*/
|
||||
var $values = array();
|
||||
|
||||
/**
|
||||
* Merges in module's content sets, expands identifiers in the content
|
||||
* sets and populates the keys, values and lookup member variables.
|
||||
* @param $modules List of HTMLPurifier_HTMLModule
|
||||
*/
|
||||
function HTMLPurifier_ContentSets($modules) {
|
||||
if (!is_array($modules)) $modules = array($modules);
|
||||
// populate content_sets based on module hints
|
||||
// sorry, no way of overloading
|
||||
foreach ($modules as $module_i => $module) {
|
||||
foreach ($module->content_sets as $key => $value) {
|
||||
if (isset($this->info[$key])) {
|
||||
// add it into the existing content set
|
||||
$this->info[$key] = $this->info[$key] . ' | ' . $value;
|
||||
} else {
|
||||
$this->info[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform content_set expansions
|
||||
$this->keys = array_keys($this->info);
|
||||
foreach ($this->info as $i => $set) {
|
||||
// only performed once, so infinite recursion is not
|
||||
// a problem
|
||||
$this->info[$i] =
|
||||
str_replace(
|
||||
$this->keys,
|
||||
// must be recalculated each time due to
|
||||
// changing substitutions
|
||||
array_values($this->info),
|
||||
$set);
|
||||
}
|
||||
$this->values = array_values($this->info);
|
||||
|
||||
// generate lookup tables
|
||||
foreach ($this->info as $name => $set) {
|
||||
$this->lookup[$name] = $this->convertToLookup($set);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts a definition; generates and assigns a ChildDef for it
|
||||
* @param $def HTMLPurifier_ElementDef reference
|
||||
* @param $module Module that defined the ElementDef
|
||||
*/
|
||||
function generateChildDef(&$def, $module) {
|
||||
if (!empty($def->child)) return; // already done!
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
$def->content_model = str_replace(
|
||||
$this->keys, $this->values, $content_model);
|
||||
}
|
||||
$def->child = $this->getChildDef($def, $module);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates a ChildDef based on content_model and content_model_type
|
||||
* member variables in HTMLPurifier_ElementDef
|
||||
* @note This will also defer to modules for custom HTMLPurifier_ChildDef
|
||||
* subclasses that need content set expansion
|
||||
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
|
||||
* @return HTMLPurifier_ChildDef corresponding to ElementDef
|
||||
*/
|
||||
function getChildDef($def, $module) {
|
||||
$value = $def->content_model;
|
||||
if (is_object($value)) {
|
||||
trigger_error(
|
||||
'Literal object child definitions should be stored in '.
|
||||
'ElementDef->child not ElementDef->content_model',
|
||||
E_USER_NOTICE
|
||||
);
|
||||
return $value;
|
||||
}
|
||||
switch ($def->content_model_type) {
|
||||
case 'required':
|
||||
return new HTMLPurifier_ChildDef_Required($value);
|
||||
case 'optional':
|
||||
return new HTMLPurifier_ChildDef_Optional($value);
|
||||
case 'empty':
|
||||
return new HTMLPurifier_ChildDef_Empty();
|
||||
case 'custom':
|
||||
return new HTMLPurifier_ChildDef_Custom($value);
|
||||
}
|
||||
// defer to its module
|
||||
$return = false;
|
||||
if ($module->defines_child_def) { // save a func call
|
||||
$return = $module->getChildDef($def);
|
||||
}
|
||||
if ($return !== false) return $return;
|
||||
// error-out
|
||||
trigger_error(
|
||||
'Could not determine which ChildDef class to instantiate',
|
||||
E_USER_ERROR
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string list of elements separated by pipes into
|
||||
* a lookup array.
|
||||
* @param $string List of elements
|
||||
* @return Lookup array of elements
|
||||
*/
|
||||
function convertToLookup($string) {
|
||||
$array = explode('|', str_replace(' ', '', $string));
|
||||
$ret = array();
|
||||
foreach ($array as $i => $k) {
|
||||
$ret[$k] = true;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
76
lib/htmlpurifier/HTMLPurifier/Context.php
Normal file
76
lib/htmlpurifier/HTMLPurifier/Context.php
Normal file
|
@ -0,0 +1,76 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Registry object that contains information about the current context.
|
||||
*/
|
||||
class HTMLPurifier_Context
|
||||
{
|
||||
|
||||
/**
|
||||
* Private array that stores the references.
|
||||
* @private
|
||||
*/
|
||||
var $_storage = array();
|
||||
|
||||
/**
|
||||
* Registers a variable into the context.
|
||||
* @param $name String name
|
||||
* @param $ref Variable to be registered
|
||||
*/
|
||||
function register($name, &$ref) {
|
||||
if (isset($this->_storage[$name])) {
|
||||
trigger_error('Name collision, cannot re-register',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->_storage[$name] =& $ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a variable reference from the context.
|
||||
* @param $name String name
|
||||
*/
|
||||
function &get($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to retrieve non-existent variable',
|
||||
E_USER_ERROR);
|
||||
$var = null; // so we can return by reference
|
||||
return $var;
|
||||
}
|
||||
return $this->_storage[$name];
|
||||
}
|
||||
|
||||
/**
|
||||
* Destorys a variable in the context.
|
||||
* @param $name String name
|
||||
*/
|
||||
function destroy($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to destroy non-existent variable',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
unset($this->_storage[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether or not the variable exists.
|
||||
* @param $name String name
|
||||
*/
|
||||
function exists($name) {
|
||||
return isset($this->_storage[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a series of variables from an associative array
|
||||
* @param $context_array Assoc array of variables to load
|
||||
*/
|
||||
function loadArray(&$context_array) {
|
||||
foreach ($context_array as $key => $discard) {
|
||||
$this->register($key, $context_array[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
122
lib/htmlpurifier/HTMLPurifier/ElementDef.php
Normal file
122
lib/htmlpurifier/HTMLPurifier/ElementDef.php
Normal file
|
@ -0,0 +1,122 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Structure that stores an HTML element definition. Used by
|
||||
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
|
||||
*/
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Does the definition work by itself, or is it created solely
|
||||
* for the purpose of merging into another definition?
|
||||
*/
|
||||
var $standalone = true;
|
||||
|
||||
/**
|
||||
* Associative array of attribute name to HTMLPurifier_AttrDef
|
||||
* @note Before being processed by HTMLPurifier_AttrCollections
|
||||
* when modules are finalized during
|
||||
* HTMLPurifier_HTMLDefinition->setup(), this array may also
|
||||
* contain an array at index 0 that indicates which attribute
|
||||
* collections to load into the full array. It may also
|
||||
* contain string indentifiers in lieu of HTMLPurifier_AttrDef,
|
||||
* see HTMLPurifier_AttrTypes on how they are expanded during
|
||||
* HTMLPurifier_HTMLDefinition->setup() processing.
|
||||
* @public
|
||||
*/
|
||||
var $attr = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_post = array();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* HTMLPurifier_ChildDef of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $child;
|
||||
|
||||
/**
|
||||
* Abstract string representation of internal ChildDef rules. See
|
||||
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||
* into an HTMLPurifier_ChildDef.
|
||||
* @public
|
||||
*/
|
||||
var $content_model;
|
||||
|
||||
/**
|
||||
* Value of $child->type, used to determine which ChildDef to use,
|
||||
* used in combination with $content_model.
|
||||
* @public
|
||||
*/
|
||||
var $content_model_type;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Lookup table of tags that close this tag. Used during parsing
|
||||
* to make sure we don't attempt to nest unclosed tags.
|
||||
* @public
|
||||
*/
|
||||
var $auto_close = array();
|
||||
|
||||
/**
|
||||
* Does the element have a content model (#PCDATA | Inline)*? This
|
||||
* is important for chameleon ins and del processing in
|
||||
* HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
|
||||
* have to worry about this one.
|
||||
* @public
|
||||
*/
|
||||
var $descendants_are_inline;
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $excludes = array();
|
||||
|
||||
/**
|
||||
* Merges the values of another element definition into this one.
|
||||
* Values from the new element def take precedence if a value is
|
||||
* not mergeable.
|
||||
*/
|
||||
function mergeIn($def) {
|
||||
|
||||
// later keys takes precedence
|
||||
foreach($def->attr as $k => $v) {
|
||||
if ($k == 0) {
|
||||
// merge in the includes
|
||||
// sorry, no way to override an include
|
||||
foreach ($v as $v2) {
|
||||
$def->attr[0][] = $v2;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
$this->attr[$k] = $v;
|
||||
}
|
||||
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
|
||||
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
|
||||
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
|
||||
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
|
||||
|
||||
if(!is_null($def->child)) $this->child = $def->child;
|
||||
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
|
||||
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
|
||||
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
403
lib/htmlpurifier/HTMLPurifier/Encoder.php
Normal file
403
lib/htmlpurifier/HTMLPurifier/Encoder.php
Normal file
|
@ -0,0 +1,403 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Encoding', 'utf-8', 'istring',
|
||||
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
||||
'you can use this directive as a stop-gap compatibility change to '.
|
||||
'let HTML Purifier deal with non UTF-8 input. This technique has '.
|
||||
'notable deficiencies: absolutely no characters outside of the selected '.
|
||||
'character encoding will be preserved, not even the ones that have '.
|
||||
'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> '.
|
||||
'that automatically resolves all entities), making it pretty useless '.
|
||||
'for anything except the most I18N-blind applications, although '.
|
||||
'%Core.EscapeNonASCIICharacters offers fixes this trouble with '.
|
||||
'another tradeoff. This directive '.
|
||||
'only accepts ISO-8859-1 if iconv is not enabled.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'EscapeNonASCIICharacters', false, 'bool',
|
||||
'This directive overcomes a deficiency in %Core.Encoding by blindly '.
|
||||
'converting all non-ASCII characters into decimal numeric entities before '.
|
||||
'converting it to its native encoding. This means that even '.
|
||||
'characters that can be expressed in the non-UTF-8 encoding will '.
|
||||
'be entity-ized, which can be a real downer for encodings like Big5. '.
|
||||
'It also assumes that the ASCII repetoire is available, although '.
|
||||
'this is the case for almost all encodings. Anyway, use UTF-8! This '.
|
||||
'directive has been available since 1.4.0.'
|
||||
);
|
||||
|
||||
if ( !function_exists('iconv') ) {
|
||||
// only encodings with native PHP support
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Core', 'Encoding', array(
|
||||
'utf-8',
|
||||
'iso-8859-1'
|
||||
)
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||
'Core', 'Encoding', array(
|
||||
'iso8859-1' => 'iso-8859-1'
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Test', 'ForceNoIconv', false, 'bool',
|
||||
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
|
||||
'exist and use only pure PHP implementations.'
|
||||
);
|
||||
|
||||
/**
|
||||
* A UTF-8 specific character encoder that handles cleaning and transforming.
|
||||
* @note All functions in this class should be static.
|
||||
*/
|
||||
class HTMLPurifier_Encoder
|
||||
{
|
||||
|
||||
/**
|
||||
* Constructor throws fatal error if you attempt to instantiate class
|
||||
*/
|
||||
function HTMLPurifier_Encoder() {
|
||||
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a UTF-8 string for well-formedness and SGML validity
|
||||
*
|
||||
* It will parse according to UTF-8 and return a valid UTF8 string, with
|
||||
* non-SGML codepoints excluded.
|
||||
*
|
||||
* @static
|
||||
* @note Just for reference, the non-SGML code points are 0 to 31 and
|
||||
* 127 to 159, inclusive. However, we allow code points 9, 10
|
||||
* and 13, which are the tab, line feed and carriage return
|
||||
* respectively. 128 and above the code points map to multibyte
|
||||
* UTF-8 representations.
|
||||
*
|
||||
* @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
|
||||
* hsivonen@iki.fi at <http://iki.fi/hsivonen/php-utf8/> under the
|
||||
* LGPL license. Notes on what changed are inside, but in general,
|
||||
* the original code transformed UTF-8 text into an array of integer
|
||||
* Unicode codepoints. Understandably, transforming that back to
|
||||
* a string would be somewhat expensive, so the function was modded to
|
||||
* directly operate on the string. However, this discourages code
|
||||
* reuse, and the logic enumerated here would be useful for any
|
||||
* function that needs to be able to understand UTF-8 characters.
|
||||
* As of right now, only smart lossless character encoding converters
|
||||
* would need that, and I'm probably not going to implement them.
|
||||
* Once again, PHP 6 should solve all our problems.
|
||||
*/
|
||||
function cleanUTF8($str, $force_php = false) {
|
||||
|
||||
static $non_sgml_chars = array();
|
||||
if (empty($non_sgml_chars)) {
|
||||
for ($i = 0; $i <= 31; $i++) {
|
||||
// non-SGML ASCII chars
|
||||
// save \r, \t and \n
|
||||
if ($i == 9 || $i == 13 || $i == 10) continue;
|
||||
$non_sgml_chars[chr($i)] = '';
|
||||
}
|
||||
for ($i = 127; $i <= 159; $i++) {
|
||||
$non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
|
||||
}
|
||||
}
|
||||
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
|
||||
if ($iconv && !$force_php) {
|
||||
// do the shortcut way
|
||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
||||
return strtr($str, $non_sgml_chars);
|
||||
}
|
||||
|
||||
$mState = 0; // cached expected number of octets after the current octet
|
||||
// until the beginning of the next UTF8 character sequence
|
||||
$mUcs4 = 0; // cached Unicode character
|
||||
$mBytes = 1; // cached expected number of octets in the current sequence
|
||||
|
||||
// original code involved an $out that was an array of Unicode
|
||||
// codepoints. Instead of having to convert back into UTF-8, we've
|
||||
// decided to directly append valid UTF-8 characters onto a string
|
||||
// $out once they're done. $char accumulates raw bytes, while $mUcs4
|
||||
// turns into the Unicode code point, so there's some redundancy.
|
||||
|
||||
$out = '';
|
||||
$char = '';
|
||||
|
||||
$len = strlen($str);
|
||||
for($i = 0; $i < $len; $i++) {
|
||||
$in = ord($str{$i});
|
||||
$char .= $str[$i]; // append byte to char
|
||||
if (0 == $mState) {
|
||||
// When mState is zero we expect either a US-ASCII character
|
||||
// or a multi-octet sequence.
|
||||
if (0 == (0x80 & ($in))) {
|
||||
// US-ASCII, pass straight through.
|
||||
if (($in <= 31 || $in == 127) &&
|
||||
!($in == 9 || $in == 13 || $in == 10) // save \r\t\n
|
||||
) {
|
||||
// control characters, remove
|
||||
} else {
|
||||
$out .= $char;
|
||||
}
|
||||
// reset
|
||||
$char = '';
|
||||
$mBytes = 1;
|
||||
} elseif (0xC0 == (0xE0 & ($in))) {
|
||||
// First octet of 2 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x1F) << 6;
|
||||
$mState = 1;
|
||||
$mBytes = 2;
|
||||
} elseif (0xE0 == (0xF0 & ($in))) {
|
||||
// First octet of 3 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x0F) << 12;
|
||||
$mState = 2;
|
||||
$mBytes = 3;
|
||||
} elseif (0xF0 == (0xF8 & ($in))) {
|
||||
// First octet of 4 octet sequence
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x07) << 18;
|
||||
$mState = 3;
|
||||
$mBytes = 4;
|
||||
} elseif (0xF8 == (0xFC & ($in))) {
|
||||
// First octet of 5 octet sequence.
|
||||
//
|
||||
// This is illegal because the encoded codepoint must be
|
||||
// either:
|
||||
// (a) not the shortest form or
|
||||
// (b) outside the Unicode range of 0-0x10FFFF.
|
||||
// Rather than trying to resynchronize, we will carry on
|
||||
// until the end of the sequence and let the later error
|
||||
// handling code catch it.
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 0x03) << 24;
|
||||
$mState = 4;
|
||||
$mBytes = 5;
|
||||
} elseif (0xFC == (0xFE & ($in))) {
|
||||
// First octet of 6 octet sequence, see comments for 5
|
||||
// octet sequence.
|
||||
$mUcs4 = ($in);
|
||||
$mUcs4 = ($mUcs4 & 1) << 30;
|
||||
$mState = 5;
|
||||
$mBytes = 6;
|
||||
} else {
|
||||
// Current octet is neither in the US-ASCII range nor a
|
||||
// legal first octet of a multi-octet sequence.
|
||||
$mState = 0;
|
||||
$mUcs4 = 0;
|
||||
$mBytes = 1;
|
||||
$char = '';
|
||||
}
|
||||
} else {
|
||||
// When mState is non-zero, we expect a continuation of the
|
||||
// multi-octet sequence
|
||||
if (0x80 == (0xC0 & ($in))) {
|
||||
// Legal continuation.
|
||||
$shift = ($mState - 1) * 6;
|
||||
$tmp = $in;
|
||||
$tmp = ($tmp & 0x0000003F) << $shift;
|
||||
$mUcs4 |= $tmp;
|
||||
|
||||
if (0 == --$mState) {
|
||||
// End of the multi-octet sequence. mUcs4 now contains
|
||||
// the final Unicode codepoint to be output
|
||||
|
||||
// Check for illegal sequences and codepoints.
|
||||
|
||||
// From Unicode 3.1, non-shortest form is illegal
|
||||
if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
|
||||
((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
|
||||
((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
|
||||
(4 < $mBytes) ||
|
||||
// From Unicode 3.2, surrogate characters = illegal
|
||||
(($mUcs4 & 0xFFFFF800) == 0xD800) ||
|
||||
// Codepoints outside the Unicode range are illegal
|
||||
($mUcs4 > 0x10FFFF)
|
||||
) {
|
||||
|
||||
} elseif (0xFEFF != $mUcs4 && // omit BOM
|
||||
!($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML
|
||||
) {
|
||||
$out .= $char;
|
||||
}
|
||||
// initialize UTF8 cache (reset)
|
||||
$mState = 0;
|
||||
$mUcs4 = 0;
|
||||
$mBytes = 1;
|
||||
$char = '';
|
||||
}
|
||||
} else {
|
||||
// ((0xC0 & (*in) != 0x80) && (mState != 0))
|
||||
// Incomplete multi-octet sequence.
|
||||
// used to result in complete fail, but we'll reset
|
||||
$mState = 0;
|
||||
$mUcs4 = 0;
|
||||
$mBytes = 1;
|
||||
$char ='';
|
||||
}
|
||||
}
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates a Unicode codepoint into its corresponding UTF-8 character.
|
||||
* @static
|
||||
* @note Based on Feyd's function at
|
||||
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
||||
* which is in public domain.
|
||||
* @note While we're going to do code point parsing anyway, a good
|
||||
* optimization would be to refuse to translate code points that
|
||||
* are non-SGML characters. However, this could lead to duplication.
|
||||
* @note This is very similar to the unichr function in
|
||||
* maintenance/generate-entity-file.php (although this is superior,
|
||||
* due to its sanity checks).
|
||||
*/
|
||||
|
||||
// +----------+----------+----------+----------+
|
||||
// | 33222222 | 22221111 | 111111 | |
|
||||
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
|
||||
// +----------+----------+----------+----------+
|
||||
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
|
||||
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
|
||||
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
|
||||
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
|
||||
// +----------+----------+----------+----------+
|
||||
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
|
||||
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
||||
// +----------+----------+----------+----------+
|
||||
|
||||
function unichr($code) {
|
||||
if($code > 1114111 or $code < 0 or
|
||||
($code >= 55296 and $code <= 57343) ) {
|
||||
// bits are set outside the "valid" range as defined
|
||||
// by UNICODE 4.1.0
|
||||
return '';
|
||||
}
|
||||
|
||||
$x = $y = $z = $w = 0;
|
||||
if ($code < 128) {
|
||||
// regular ASCII character
|
||||
$x = $code;
|
||||
} else {
|
||||
// set up bits for UTF-8
|
||||
$x = ($code & 63) | 128;
|
||||
if ($code < 2048) {
|
||||
$y = (($code & 2047) >> 6) | 192;
|
||||
} else {
|
||||
$y = (($code & 4032) >> 6) | 128;
|
||||
if($code < 65536) {
|
||||
$z = (($code >> 12) & 15) | 224;
|
||||
} else {
|
||||
$z = (($code >> 12) & 63) | 128;
|
||||
$w = (($code >> 18) & 7) | 240;
|
||||
}
|
||||
}
|
||||
}
|
||||
// set up the actual character
|
||||
$ret = '';
|
||||
if($w) $ret .= chr($w);
|
||||
if($z) $ret .= chr($z);
|
||||
if($y) $ret .= chr($y);
|
||||
$ret .= chr($x);
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string to UTF-8 based on configuration.
|
||||
* @static
|
||||
*/
|
||||
function convertToUTF8($str, $config, &$context) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
return @iconv($encoding, 'utf-8//IGNORE', $str);
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_encode($str);
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string from UTF-8 based on configuration.
|
||||
* @static
|
||||
* @note Currently, this is a lossy conversion, with unexpressable
|
||||
* characters being omitted.
|
||||
*/
|
||||
function convertFromUTF8($str, $config, &$context) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
if ($config->get('Core', 'EscapeNonASCIICharacters')) {
|
||||
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
||||
}
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_decode($str);
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lossless (character-wise) conversion of HTML to ASCII
|
||||
* @static
|
||||
* @param $str UTF-8 string to be converted to ASCII
|
||||
* @returns ASCII encoded string with non-ASCII character entity-ized
|
||||
* @warning Adapted from MediaWiki, claiming fair use: this is a common
|
||||
* algorithm. If you disagree with this license fudgery,
|
||||
* implement it yourself.
|
||||
* @note Uses decimal numeric entities since they are best supported.
|
||||
* @note This is a DUMB function: it has no concept of keeping
|
||||
* character entities that the projected character encoding
|
||||
* can allow. We could possibly implement a smart version
|
||||
* but that would require it to also know which Unicode
|
||||
* codepoints the charset supported (not an easy task).
|
||||
* @note Sort of with cleanUTF8() but it assumes that $str is
|
||||
* well-formed UTF-8
|
||||
*/
|
||||
function convertToASCIIDumbLossless($str) {
|
||||
$bytesleft = 0;
|
||||
$result = '';
|
||||
$working = 0;
|
||||
$len = strlen($str);
|
||||
for( $i = 0; $i < $len; $i++ ) {
|
||||
$bytevalue = ord( $str[$i] );
|
||||
if( $bytevalue <= 0x7F ) { //0xxx xxxx
|
||||
$result .= chr( $bytevalue );
|
||||
$bytesleft = 0;
|
||||
} elseif( $bytevalue <= 0xBF ) { //10xx xxxx
|
||||
$working = $working << 6;
|
||||
$working += ($bytevalue & 0x3F);
|
||||
$bytesleft--;
|
||||
if( $bytesleft <= 0 ) {
|
||||
$result .= "&#" . $working . ";";
|
||||
}
|
||||
} elseif( $bytevalue <= 0xDF ) { //110x xxxx
|
||||
$working = $bytevalue & 0x1F;
|
||||
$bytesleft = 1;
|
||||
} elseif( $bytevalue <= 0xEF ) { //1110 xxxx
|
||||
$working = $bytevalue & 0x0F;
|
||||
$bytesleft = 2;
|
||||
} else { //1111 0xxx
|
||||
$working = $bytevalue & 0x07;
|
||||
$bytesleft = 3;
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
46
lib/htmlpurifier/HTMLPurifier/EntityLookup.php
Normal file
46
lib/htmlpurifier/HTMLPurifier/EntityLookup.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Object that provides entity lookup table from entity name to character
|
||||
*/
|
||||
class HTMLPurifier_EntityLookup {
|
||||
|
||||
/**
|
||||
* Assoc array of entity name to character represented.
|
||||
* @public
|
||||
*/
|
||||
var $table;
|
||||
|
||||
/**
|
||||
* Sets up the entity lookup table from the serialized file contents.
|
||||
* @note The serialized contents are versioned, but were generated
|
||||
* using the maintenance script generate_entity_file.php
|
||||
* @warning This is not in constructor to help enforce the Singleton
|
||||
*/
|
||||
function setup($file = false) {
|
||||
if (!$file) {
|
||||
$file = dirname(__FILE__) . '/EntityLookup/entities.ser';
|
||||
}
|
||||
$this->table = unserialize(file_get_contents($file));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves sole instance of the object.
|
||||
* @static
|
||||
* @param Optional prototype of custom lookup table to overload with.
|
||||
*/
|
||||
function instance($prototype = false) {
|
||||
// no references, since PHP doesn't copy unless modified
|
||||
static $instance = null;
|
||||
if ($prototype) {
|
||||
$instance = $prototype;
|
||||
} elseif (!$instance) {
|
||||
$instance = new HTMLPurifier_EntityLookup();
|
||||
$instance->setup();
|
||||
}
|
||||
return $instance;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
1
lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser
Normal file
1
lib/htmlpurifier/HTMLPurifier/EntityLookup/entities.ser
Normal file
File diff suppressed because one or more lines are too long
158
lib/htmlpurifier/HTMLPurifier/EntityParser.php
Normal file
158
lib/htmlpurifier/HTMLPurifier/EntityParser.php
Normal file
|
@ -0,0 +1,158 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
// if want to implement error collecting here, we'll need to use some sort
|
||||
// of global data (probably trigger_error) because it's impossible to pass
|
||||
// $config or $context to the callback functions.
|
||||
|
||||
/**
|
||||
* Handles referencing and derefencing character entities
|
||||
*/
|
||||
class HTMLPurifier_EntityParser
|
||||
{
|
||||
|
||||
/**
|
||||
* Reference to entity lookup table.
|
||||
* @protected
|
||||
*/
|
||||
var $_entity_lookup;
|
||||
|
||||
/**
|
||||
* Callback regex string for parsing entities.
|
||||
* @protected
|
||||
*/
|
||||
var $_substituteEntitiesRegex =
|
||||
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z]+));?/';
|
||||
// 1. hex 2. dec 3. string
|
||||
|
||||
|
||||
/**
|
||||
* Decimal to parsed string conversion table for special entities.
|
||||
* @protected
|
||||
*/
|
||||
var $_special_dec2str =
|
||||
array(
|
||||
34 => '"',
|
||||
38 => '&',
|
||||
39 => "'",
|
||||
60 => '<',
|
||||
62 => '>'
|
||||
);
|
||||
|
||||
/**
|
||||
* Stripped entity names to decimal conversion table for special entities.
|
||||
* @protected
|
||||
*/
|
||||
var $_special_ent2dec =
|
||||
array(
|
||||
'quot' => 34,
|
||||
'amp' => 38,
|
||||
'lt' => 60,
|
||||
'gt' => 62
|
||||
);
|
||||
|
||||
/**
|
||||
* Substitutes non-special entities with their parsed equivalents. Since
|
||||
* running this whenever you have parsed character is t3h 5uck, we run
|
||||
* it before everything else.
|
||||
*
|
||||
* @protected
|
||||
* @param $string String to have non-special entities parsed.
|
||||
* @returns Parsed string.
|
||||
*/
|
||||
function substituteNonSpecialEntities($string) {
|
||||
// it will try to detect missing semicolons, but don't rely on it
|
||||
return preg_replace_callback(
|
||||
$this->_substituteEntitiesRegex,
|
||||
array($this, 'nonSpecialEntityCallback'),
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for substituteNonSpecialEntities() that does the work.
|
||||
*
|
||||
* @warning Though this is public in order to let the callback happen,
|
||||
* calling it directly is not recommended.
|
||||
* @param $matches PCRE matches array, with 0 the entire match, and
|
||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||
* or string (respectively).
|
||||
* @returns Replacement string.
|
||||
*/
|
||||
|
||||
function nonSpecialEntityCallback($matches) {
|
||||
// replaces all but big five
|
||||
$entity = $matches[0];
|
||||
$is_num = (@$matches[0][1] === '#');
|
||||
if ($is_num) {
|
||||
$is_hex = (@$entity[2] === 'x');
|
||||
$code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
|
||||
|
||||
// abort for special characters
|
||||
if (isset($this->_special_dec2str[$code])) return $entity;
|
||||
|
||||
return HTMLPurifier_Encoder::unichr($code);
|
||||
} else {
|
||||
if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
|
||||
if (!$this->_entity_lookup) {
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
||||
}
|
||||
if (isset($this->_entity_lookup->table[$matches[3]])) {
|
||||
return $this->_entity_lookup->table[$matches[3]];
|
||||
} else {
|
||||
return $entity;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Substitutes only special entities with their parsed equivalents.
|
||||
*
|
||||
* @notice We try to avoid calling this function because otherwise, it
|
||||
* would have to be called a lot (for every parsed section).
|
||||
*
|
||||
* @protected
|
||||
* @param $string String to have non-special entities parsed.
|
||||
* @returns Parsed string.
|
||||
*/
|
||||
function substituteSpecialEntities($string) {
|
||||
return preg_replace_callback(
|
||||
$this->_substituteEntitiesRegex,
|
||||
array($this, 'specialEntityCallback'),
|
||||
$string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for substituteSpecialEntities() that does the work.
|
||||
*
|
||||
* This callback has same syntax as nonSpecialEntityCallback().
|
||||
*
|
||||
* @warning Though this is public in order to let the callback happen,
|
||||
* calling it directly is not recommended.
|
||||
* @param $matches PCRE-style matches array, with 0 the entire match, and
|
||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||
* or string (respectively).
|
||||
* @returns Replacement string.
|
||||
*/
|
||||
function specialEntityCallback($matches) {
|
||||
$entity = $matches[0];
|
||||
$is_num = (@$matches[0][1] === '#');
|
||||
if ($is_num) {
|
||||
$is_hex = (@$entity[2] === 'x');
|
||||
$int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
|
||||
return isset($this->_special_dec2str[$int]) ?
|
||||
$this->_special_dec2str[$int] :
|
||||
$entity;
|
||||
} else {
|
||||
return isset($this->_special_ent2dec[$matches[3]]) ?
|
||||
$this->_special_ent2dec[$matches[3]] :
|
||||
$entity;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
8
lib/htmlpurifier/HTMLPurifier/Error.php
Normal file
8
lib/htmlpurifier/HTMLPurifier/Error.php
Normal file
|
@ -0,0 +1,8 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Return object from functions that signifies error when null doesn't cut it
|
||||
*/
|
||||
class HTMLPurifier_Error {}
|
||||
|
||||
?>
|
39
lib/htmlpurifier/HTMLPurifier/Filter.php
Normal file
39
lib/htmlpurifier/HTMLPurifier/Filter.php
Normal file
|
@ -0,0 +1,39 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Represents a pre or post processing filter on HTML Purifier's output
|
||||
*
|
||||
* Sometimes, a little ad-hoc fixing of HTML has to be done before
|
||||
* it gets sent through HTML Purifier: you can use filters to acheive
|
||||
* this effect. For instance, YouTube videos can be preserved using
|
||||
* this manner. You could have used a decorator for this task, but
|
||||
* PHP's support for them is not terribly robust, so we're going
|
||||
* to just loop through the filters.
|
||||
*
|
||||
* Filters should be exited first in, last out. If there are three filters,
|
||||
* named 1, 2 and 3, the order of execution should go 1->preFilter,
|
||||
* 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
|
||||
* 1->postFilter.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
/**
|
||||
* Name of the filter for identification purposes
|
||||
*/
|
||||
var $name;
|
||||
|
||||
/**
|
||||
* Pre-processor function, handles HTML before HTML Purifier
|
||||
*/
|
||||
function preFilter($html, $config, &$context) {}
|
||||
|
||||
/**
|
||||
* Post-processor function, handles HTML after HTML Purifier
|
||||
*/
|
||||
function postFilter($html, $config, &$context) {}
|
||||
|
||||
}
|
||||
|
||||
?>
|
34
lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php
Normal file
34
lib/htmlpurifier/HTMLPurifier/Filter/YouTube.php
Normal file
|
@ -0,0 +1,34 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Filter.php';
|
||||
|
||||
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
var $name = 'YouTube preservation';
|
||||
|
||||
function preFilter($html, $config, &$context) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
return preg_replace($pre_regex, $pre_replace, $html);
|
||||
}
|
||||
|
||||
function postFilter($html, $config, &$context) {
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
return preg_replace($post_regex, $post_replace, $html);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
158
lib/htmlpurifier/HTMLPurifier/Generator.php
Normal file
158
lib/htmlpurifier/HTMLPurifier/Generator.php
Normal file
|
@ -0,0 +1,158 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
||||
'When true, HTMLPurifier_Generator will also check all strings it '.
|
||||
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
||||
'This could cause a considerable performance impact, and is not '.
|
||||
'strictly necessary due to the fact that the Lexers should have '.
|
||||
'ensured that all the UTF-8 strings were well-formed. Note that '.
|
||||
'the configuration value is only read at the beginning of '.
|
||||
'generateFromTokens.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'XHTML', true, 'bool',
|
||||
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||
'This directive was available since 1.1.'
|
||||
);
|
||||
|
||||
// extension constraints could be factored into ConfigSchema
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'TidyFormat', false, 'bool',
|
||||
'<p>Determines whether or not to run Tidy on the final output for pretty '.
|
||||
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
|
||||
'improve readability for editors who are hand-editing the HTML, but is '.
|
||||
'by no means necessary as HTML Purifier has already fixed all major '.
|
||||
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
|
||||
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
|
||||
'the overall look of your page\'s source better, I recommend running Tidy '.
|
||||
'on the entire page rather than just user-content (after all, the '.
|
||||
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
|
||||
'directive was available since 1.1.1.</p>'
|
||||
);
|
||||
|
||||
/**
|
||||
* Generates HTML from tokens.
|
||||
*/
|
||||
class HTMLPurifier_Generator
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||
* @private
|
||||
*/
|
||||
var $_clean_utf8 = false;
|
||||
|
||||
/**
|
||||
* Bool cache of %Core.XHTML
|
||||
* @private
|
||||
*/
|
||||
var $_xhtml = true;
|
||||
|
||||
/**
|
||||
* Generates HTML from an array of tokens.
|
||||
* @param $tokens Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @return Generated HTML
|
||||
*/
|
||||
function generateFromTokens($tokens, $config, &$context) {
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||
if (!$tokens) return '';
|
||||
foreach ($tokens as $token) {
|
||||
$html .= $this->generateFromToken($token);
|
||||
}
|
||||
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
|
||||
|
||||
$tidy_options = array(
|
||||
'indent'=> true,
|
||||
'output-xhtml' => $this->_xhtml,
|
||||
'show-body-only' => true,
|
||||
'indent-spaces' => 2,
|
||||
'wrap' => 68,
|
||||
);
|
||||
if (version_compare(PHP_VERSION, '5', '<')) {
|
||||
tidy_set_encoding('utf8');
|
||||
foreach ($tidy_options as $key => $value) {
|
||||
tidy_setopt($key, $value);
|
||||
}
|
||||
tidy_parse_string($html);
|
||||
tidy_clean_repair();
|
||||
$html = tidy_get_output();
|
||||
} else {
|
||||
$tidy = new Tidy;
|
||||
$tidy->parseString($html, $tidy_options, 'utf8');
|
||||
$tidy->cleanRepair();
|
||||
$html = (string) $tidy;
|
||||
}
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates HTML from a single token.
|
||||
* @param $token HTMLPurifier_Token object.
|
||||
* @return Generated HTML
|
||||
*/
|
||||
function generateFromToken($token) {
|
||||
if (!isset($token->type)) return '';
|
||||
if ($token->type == 'start') {
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||
|
||||
} elseif ($token->type == 'end') {
|
||||
return '</' . $token->name . '>';
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||
( $this->_xhtml ? ' /': '' )
|
||||
. '>';
|
||||
|
||||
} elseif ($token->type == 'text') {
|
||||
return $this->escape($token->data);
|
||||
|
||||
} else {
|
||||
return '';
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates attribute declarations from attribute array.
|
||||
* @param $assoc_array_of_attributes Attribute array
|
||||
* @return Generate HTML fragment for insertion.
|
||||
*/
|
||||
function generateAttributes($assoc_array_of_attributes) {
|
||||
$html = '';
|
||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||
if (!$this->_xhtml) {
|
||||
// remove namespaced attributes
|
||||
if (strpos($key, ':') !== false) continue;
|
||||
// also needed: check for attribute minimization
|
||||
}
|
||||
$html .= $key.'="'.$this->escape($value).'" ';
|
||||
}
|
||||
return rtrim($html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes raw text data.
|
||||
* @param $string String data to escape for HTML.
|
||||
* @return String escaped data.
|
||||
*/
|
||||
function escape($string) {
|
||||
if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string);
|
||||
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
281
lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
Normal file
281
lib/htmlpurifier/HTMLPurifier/HTMLDefinition.php
Normal file
|
@ -0,0 +1,281 @@
|
|||
<?php
|
||||
|
||||
// components
|
||||
require_once 'HTMLPurifier/HTMLModuleManager.php';
|
||||
|
||||
// this definition and its modules MUST NOT define configuration directives
|
||||
// outside of the HTML or Attr namespaces
|
||||
|
||||
// will be superceded by more accurate doctype declaration schemes
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Strict', false, 'bool',
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'BlockWrapper', 'p', 'string',
|
||||
'String name of element to wrap inline elements that are inside a block '.
|
||||
'context. This only occurs in the children of blockquote in strict mode. '.
|
||||
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||
'<code><p></code> tags can be replaced '.
|
||||
'with whatever you desire, as long as it is a block level element. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Parent', 'div', 'string',
|
||||
'String name of element that HTML fragment passed to library will be '.
|
||||
'inserted in. An interesting variation would be using span as the '.
|
||||
'parent element, meaning that only inline tags would be allowed. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'AllowedElements', null, 'lookup/null',
|
||||
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
|
||||
'can overload it with your own list of tags to allow. Note that this '.
|
||||
'method is subtractive: it does its job by taking away from HTML Purifier '.
|
||||
'usual feature set, so you cannot add a tag that HTML Purifier never '.
|
||||
'supported in the first place (like embed, form or head). If you change this, you '.
|
||||
'probably also want to change %HTML.AllowedAttributes. '.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'AllowedAttributes', null, 'lookup/null',
|
||||
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
|
||||
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
|
||||
'(style, id, class, dir, lang, xml:lang).'.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override. For '.
|
||||
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
|
||||
'directive. You must set that directive to true before you can use '.
|
||||
'IDs at all. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Definition of the purified HTML that describes allowed children,
|
||||
* attributes, and many other things.
|
||||
*
|
||||
* Conventions:
|
||||
*
|
||||
* All member variables that are prefixed with info
|
||||
* (including the main $info array) are used by HTML Purifier internals
|
||||
* and should not be directly edited when customizing the HTMLDefinition.
|
||||
* They can usually be set via configuration directives or custom
|
||||
* modules.
|
||||
*
|
||||
* On the other hand, member variables without the info prefix are used
|
||||
* internally by the HTMLDefinition and MUST NOT be used by other HTML
|
||||
* Purifier internals. Many of them, however, are public, and may be
|
||||
* edited by userspace code to tweak the behavior of HTMLDefinition.
|
||||
*
|
||||
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
|
||||
* rule: in the interest of comprehensiveness, it will sniff everything.
|
||||
*/
|
||||
class HTMLPurifier_HTMLDefinition
|
||||
{
|
||||
|
||||
/** FULLY-PUBLIC VARIABLES */
|
||||
|
||||
/**
|
||||
* Associative array of element names to HTMLPurifier_ElementDef
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Associative array of global attribute name to attribute definition.
|
||||
* @public
|
||||
*/
|
||||
var $info_global_attr = array();
|
||||
|
||||
/**
|
||||
* String name of parent element HTML will be going into.
|
||||
* @public
|
||||
*/
|
||||
var $info_parent = 'div';
|
||||
|
||||
/**
|
||||
* Definition for parent element, allows parent element to be a
|
||||
* tag that's not allowed inside the HTML fragment.
|
||||
* @public
|
||||
*/
|
||||
var $info_parent_def;
|
||||
|
||||
/**
|
||||
* String name of element used to wrap inline elements in block context
|
||||
* @note This is rarely used except for BLOCKQUOTEs in strict mode
|
||||
* @public
|
||||
*/
|
||||
var $info_block_wrapper = 'p';
|
||||
|
||||
/**
|
||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||
* @public
|
||||
*/
|
||||
var $info_tag_transform = array();
|
||||
|
||||
/**
|
||||
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Nested lookup array of content set name (Block, Inline) to
|
||||
* element name to whether or not it belongs in that content set.
|
||||
* @public
|
||||
*/
|
||||
var $info_content_sets = array();
|
||||
|
||||
|
||||
|
||||
/** PUBLIC BUT INTERNAL VARIABLES */
|
||||
|
||||
var $setup = false; /**< Has setup() been called yet? */
|
||||
var $config; /**< Temporary instance of HTMLPurifier_Config */
|
||||
|
||||
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
|
||||
|
||||
/**
|
||||
* Performs low-cost, preliminary initialization.
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
*/
|
||||
function HTMLPurifier_HTMLDefinition(&$config) {
|
||||
$this->config =& $config;
|
||||
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes internals into form usable by HTMLPurifier internals.
|
||||
* Modifying the definition after calling this function should not
|
||||
* be done.
|
||||
*/
|
||||
function setup() {
|
||||
|
||||
// multiple call guard
|
||||
if ($this->setup) {return;} else {$this->setup = true;}
|
||||
|
||||
$this->processModules();
|
||||
$this->setupConfigStuff();
|
||||
|
||||
unset($this->config);
|
||||
unset($this->manager);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract out the information from the manager
|
||||
*/
|
||||
function processModules() {
|
||||
|
||||
$this->manager->setup($this->config);
|
||||
|
||||
foreach ($this->manager->activeModules as $module) {
|
||||
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v;
|
||||
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v;
|
||||
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v;
|
||||
}
|
||||
|
||||
$this->info = $this->manager->getElements($this->config);
|
||||
$this->info_content_sets = $this->manager->contentSets->lookup;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up stuff based on config. We need a better way of doing this.
|
||||
*/
|
||||
function setupConfigStuff() {
|
||||
|
||||
$block_wrapper = $this->config->get('HTML', 'BlockWrapper');
|
||||
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
|
||||
$this->info_block_wrapper = $block_wrapper;
|
||||
} else {
|
||||
trigger_error('Cannot use non-block element as block wrapper.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
|
||||
$parent = $this->config->get('HTML', 'Parent');
|
||||
$def = $this->manager->getElement($parent, $this->config);
|
||||
if ($def) {
|
||||
$this->info_parent = $parent;
|
||||
$this->info_parent_def = $def;
|
||||
} else {
|
||||
trigger_error('Cannot use unrecognized element as parent.',
|
||||
E_USER_ERROR);
|
||||
$this->info_parent_def = $this->manager->getElement(
|
||||
$this->info_parent, $this->config);
|
||||
}
|
||||
|
||||
// support template text
|
||||
$support = "(for information on implementing this, see the ".
|
||||
"support forums) ";
|
||||
|
||||
// setup allowed elements, SubtractiveWhitelist module
|
||||
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
|
||||
if (is_array($allowed_elements)) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||
unset($allowed_elements[$name]);
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_elements as $element => $d) {
|
||||
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
|
||||
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
|
||||
$allowed_attributes_mutable = $allowed_attributes; // by copy!
|
||||
if (is_array($allowed_attributes)) {
|
||||
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||
unset($this->info_global_attr[$attr_key]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr_key"]);
|
||||
}
|
||||
}
|
||||
foreach ($this->info as $tag => $info) {
|
||||
foreach ($info->attr as $attr => $attr_info) {
|
||||
if (!isset($allowed_attributes["$tag.$attr"]) &&
|
||||
!isset($allowed_attributes["*.$attr"])) {
|
||||
unset($this->info[$tag]->attr[$attr]);
|
||||
} else {
|
||||
if (isset($allowed_attributes_mutable["$tag.$attr"])) {
|
||||
unset($allowed_attributes_mutable["$tag.$attr"]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_attributes_mutable as $elattr => $d) {
|
||||
list($element, $attribute) = explode('.', $elattr);
|
||||
if ($element == '*') {
|
||||
trigger_error("Global attribute '$attribute' is not ".
|
||||
"supported in any elements $support",
|
||||
E_USER_WARNING);
|
||||
} else {
|
||||
trigger_error("Attribute '$attribute' in element '$element' not supported $support",
|
||||
E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
125
lib/htmlpurifier/HTMLPurifier/HTMLModule.php
Normal file
125
lib/htmlpurifier/HTMLPurifier/HTMLModule.php
Normal file
|
@ -0,0 +1,125 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Represents an XHTML 1.1 module, with information on elements, tags
|
||||
* and attributes.
|
||||
* @note Even though this is technically XHTML 1.1, it is also used for
|
||||
* regular HTML parsing. We are using modulization as a convenient
|
||||
* way to represent the internals of HTMLDefinition, and our
|
||||
* implementation is by no means conforming and does not directly
|
||||
* use the normative DTDs or XML schemas.
|
||||
* @note The public variables in a module should almost directly
|
||||
* correspond to the variables in HTMLPurifier_HTMLDefinition.
|
||||
* However, the prefix info carries no special meaning in these
|
||||
* objects (include it anyway if that's the correspondence though).
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule
|
||||
{
|
||||
/**
|
||||
* Short unique string identifier of the module
|
||||
*/
|
||||
var $name;
|
||||
|
||||
/**
|
||||
* Dynamically set integer that specifies when the module was loaded in.
|
||||
*/
|
||||
var $order;
|
||||
|
||||
/**
|
||||
* Informally, a list of elements this module changes. Not used in
|
||||
* any significant way.
|
||||
* @protected
|
||||
*/
|
||||
var $elements = array();
|
||||
|
||||
/**
|
||||
* Associative array of element names to element definitions.
|
||||
* Some definitions may be incomplete, to be merged in later
|
||||
* with the full definition.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Associative array of content set names to content set additions.
|
||||
* This is commonly used to, say, add an A element to the Inline
|
||||
* content set. This corresponds to an internal variable $content_sets
|
||||
* and NOT info_content_sets member variable of HTMLDefinition.
|
||||
* @public
|
||||
*/
|
||||
var $content_sets = array();
|
||||
|
||||
/**
|
||||
* Associative array of attribute collection names to attribute
|
||||
* collection additions. More rarely used for adding attributes to
|
||||
* the global collections. Example is the StyleAttribute module adding
|
||||
* the style attribute to the Core. Corresponds to HTMLDefinition's
|
||||
* attr_collections->info, since the object's data is only info,
|
||||
* with extra behavior associated with it.
|
||||
* @public
|
||||
*/
|
||||
var $attr_collections = array();
|
||||
|
||||
/**
|
||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||
* @public
|
||||
*/
|
||||
var $info_tag_transform = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed after validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Boolean flag that indicates whether or not getChildDef is implemented.
|
||||
* For optimization reasons: may save a call to a function. Be sure
|
||||
* to set it if you do implement getChildDef(), otherwise it will have
|
||||
* no effect!
|
||||
* @public
|
||||
*/
|
||||
var $defines_child_def = false;
|
||||
|
||||
/**
|
||||
* Retrieves a proper HTMLPurifier_ChildDef subclass based on
|
||||
* content_model and content_model_type member variables of
|
||||
* the HTMLPurifier_ElementDef class. There is a similar function
|
||||
* in HTMLPurifier_HTMLDefinition.
|
||||
* @param $def HTMLPurifier_ElementDef instance
|
||||
* @return HTMLPurifier_ChildDef subclass
|
||||
* @public
|
||||
*/
|
||||
function getChildDef($def) {return false;}
|
||||
|
||||
/**
|
||||
* Hook method that lets module perform arbitrary operations on
|
||||
* HTMLPurifier_HTMLDefinition before the module gets processed.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function preProcess(&$definition) {}
|
||||
|
||||
/**
|
||||
* Hook method that lets module perform arbitrary operations
|
||||
* on HTMLPurifier_HTMLDefinition after the module gets processed.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function postProcess(&$definition) {}
|
||||
|
||||
/**
|
||||
* Hook method that is called when a module gets registered to
|
||||
* the definition.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function setup(&$definition) {}
|
||||
|
||||
}
|
||||
|
||||
?>
|
43
lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
Normal file
43
lib/htmlpurifier/HTMLPurifier/HTMLModule/Bdo.php
Normal file
|
@ -0,0 +1,43 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Bi-directional Text Module, defines elements that
|
||||
* declare directionality of content. Text Extension Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Bdo';
|
||||
var $elements = array('bdo');
|
||||
var $info = array();
|
||||
var $content_sets = array('Inline' => 'bdo');
|
||||
var $attr_collections = array(
|
||||
'I18N' => array('dir' => false)
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_Bdo() {
|
||||
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
|
||||
$this->attr_collections['I18N']['dir'] = $dir;
|
||||
$this->info['bdo'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['bdo']->attr = array(
|
||||
0 => array('Core', 'Lang'),
|
||||
'dir' => $dir, // required
|
||||
// The Abstract Module specification has the attribute
|
||||
// inclusions wrong for bdo: bdo allows
|
||||
// xml:lang too (and we'll toss in lang for good measure,
|
||||
// though it is not allowed for XHTML 1.1, this will
|
||||
// be managed with a global attribute transform)
|
||||
);
|
||||
$this->info['bdo']->content_model = '#PCDATA | Inline';
|
||||
$this->info['bdo']->content_model_type = 'optional';
|
||||
// provides fallback behavior if dir's missing (dir is required)
|
||||
$this->info['bdo']->attr_transform_post['required-dir'] =
|
||||
new HTMLPurifier_AttrTransform_BdoDir();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -0,0 +1,31 @@
|
|||
<?php
|
||||
|
||||
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
var $name = 'CommonAttributes';
|
||||
|
||||
var $attr_collections = array(
|
||||
'Core' => array(
|
||||
0 => array('Style'),
|
||||
// 'xml:space' => false,
|
||||
'class' => 'NMTOKENS',
|
||||
'id' => 'ID',
|
||||
'title' => 'CDATA',
|
||||
),
|
||||
'Lang' => array(
|
||||
'xml:lang' => false, // see constructor
|
||||
),
|
||||
'I18N' => array(
|
||||
0 => array('Lang'), // proprietary, for xml:lang/lang
|
||||
),
|
||||
'Common' => array(
|
||||
0 => array('Core', 'I18N')
|
||||
)
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_CommonAttributes() {
|
||||
$this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
46
lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
Normal file
46
lib/htmlpurifier/HTMLPurifier/HTMLModule/Edit.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
|
||||
* Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Edit';
|
||||
var $elements = array('del', 'ins');
|
||||
var $info = array();
|
||||
var $content_sets = array('Inline' => 'del | ins');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Edit() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$element]->attr = array(
|
||||
0 => array('Common'),
|
||||
'cite' => 'URI',
|
||||
// 'datetime' => 'Datetime' // Datetime not implemented
|
||||
);
|
||||
// Inline context ! Block context (exclamation mark is
|
||||
// separator, see getChildDef for parsing)
|
||||
$this->info[$element]->content_model =
|
||||
'#PCDATA | Inline ! #PCDATA | Flow';
|
||||
// HTML 4.01 specifies that ins/del must not contain block
|
||||
// elements when used in an inline context, chameleon is
|
||||
// a complicated workaround to acheive this effect
|
||||
$this->info[$element]->content_model_type = 'chameleon';
|
||||
}
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
function getChildDef($def) {
|
||||
if ($def->content_model_type != 'chameleon') return false;
|
||||
$value = explode('!', $def->content_model);
|
||||
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
37
lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
37
lib/htmlpurifier/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
|
@ -0,0 +1,37 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Hypertext';
|
||||
var $elements = array('a');
|
||||
var $info = array();
|
||||
var $content_sets = array('Inline' => 'a');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Hypertext() {
|
||||
$this->info['a'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['a']->attr = array(
|
||||
0 => array('Common'),
|
||||
// 'accesskey' => 'Character',
|
||||
// 'charset' => 'Charset',
|
||||
'href' => 'URI',
|
||||
//'hreflang' => 'LanguageCode',
|
||||
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
|
||||
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
|
||||
//'tabindex' => 'Number',
|
||||
//'type' => 'ContentType',
|
||||
);
|
||||
$this->info['a']->content_model = '#PCDATA | Inline';
|
||||
$this->info['a']->content_model_type = 'optional';
|
||||
$this->info['a']->excludes = array('a' => true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
38
lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
Normal file
38
lib/htmlpurifier/HTMLPurifier/HTMLModule/Image.php
Normal file
|
@ -0,0 +1,38 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Image Module provides basic image embedding.
|
||||
* @note There is specialized code for removing empty images in
|
||||
* HTMLPurifier_Strategy_RemoveForeignElements
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Image';
|
||||
var $elements = array('img');
|
||||
var $info = array();
|
||||
var $content_sets = array('Inline' => 'img');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Image() {
|
||||
$this->info['img'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['img']->attr = array(
|
||||
0 => array('Common'),
|
||||
'alt' => 'Text',
|
||||
'height' => 'Length',
|
||||
'longdesc' => 'URI',
|
||||
'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
|
||||
'width' => 'Length'
|
||||
);
|
||||
$this->info['img']->content_model_type = 'empty';
|
||||
$this->info['img']->attr_transform_post[] =
|
||||
new HTMLPurifier_AttrTransform_ImgRequired();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
60
lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
Normal file
60
lib/htmlpurifier/HTMLPurifier/HTMLModule/Legacy.php
Normal file
|
@ -0,0 +1,60 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Legacy module defines elements that were previously
|
||||
* deprecated.
|
||||
*
|
||||
* @note Not all legacy elements have been implemented yet, which
|
||||
* is a bit of a reverse problem as compared to browsers! In
|
||||
* addition, this legacy module may implement a bit more than
|
||||
* mandated by XHTML 1.1.
|
||||
*
|
||||
* This module can be used in combination with TransformToStrict in order
|
||||
* to transform as many deprecated elements as possible, but retain
|
||||
* questionably deprecated elements that do not have good alternatives
|
||||
* as well as transform elements that don't have an implementation.
|
||||
* See docs/ref-strictness.txt for more details.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
// incomplete
|
||||
|
||||
var $name = 'Legacy';
|
||||
var $elements = array('u', 's', 'strike');
|
||||
var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Legacy() {
|
||||
// setup new elements
|
||||
foreach ($this->elements as $name) {
|
||||
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||
// for u, s, strike, as more elements get added, add
|
||||
// conditionals as necessary
|
||||
$this->info[$name]->content_model = 'Inline | #PCDATA';
|
||||
$this->info[$name]->content_model_type = 'optional';
|
||||
$this->info[$name]->attr[0] = array('Common');
|
||||
}
|
||||
|
||||
// setup modifications to old elements
|
||||
foreach ($this->non_standalone_elements as $name) {
|
||||
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$name]->standalone = false;
|
||||
}
|
||||
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||
|
||||
$this->info['address']->content_model = 'Inline | #PCDATA | p';
|
||||
$this->info['address']->content_model_type = 'optional';
|
||||
$this->info['address']->child = false;
|
||||
|
||||
$this->info['blockquote']->content_model = 'Flow | #PCDATA';
|
||||
$this->info['blockquote']->content_model_type = 'optional';
|
||||
$this->info['blockquote']->child = false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
46
lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
Normal file
46
lib/htmlpurifier/HTMLPurifier/HTMLModule/List.php
Normal file
|
@ -0,0 +1,46 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'List';
|
||||
var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
|
||||
var $info = array();
|
||||
// According to the abstract schema, the List content set is a fully formed
|
||||
// one or more expr, but it invariably occurs in an optional declaration
|
||||
// so we're not going to do that subtlety. It might cause trouble
|
||||
// if a user defines "List" and expects that multiple lists are
|
||||
// allowed to be specified, but then again, that's not very intuitive.
|
||||
// Furthermore, the actual XML Schema may disagree. Regardless,
|
||||
// we don't have support for such nested expressions without using
|
||||
// the incredibly inefficient and draconic Custom ChildDef.
|
||||
var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
|
||||
|
||||
function HTMLPurifier_HTMLModule_List() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$element]->attr = array(0 => array('Common'));
|
||||
if ($element == 'li' || $element == 'dd') {
|
||||
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} elseif ($element == 'ol' || $element == 'ul') {
|
||||
$this->info[$element]->content_model = 'li';
|
||||
$this->info[$element]->content_model_type = 'required';
|
||||
}
|
||||
}
|
||||
$this->info['dt']->content_model = '#PCDATA | Inline';
|
||||
$this->info['dt']->content_model_type = 'optional';
|
||||
$this->info['dl']->content_model = 'dt | dd';
|
||||
$this->info['dl']->content_model_type = 'required';
|
||||
// this could be a LOT more robust
|
||||
$this->info['li']->auto_close = array('li' => true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
41
lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
Normal file
41
lib/htmlpurifier/HTMLPurifier/HTMLModule/Presentation.php
Normal file
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Presentation Module, defines simple presentation-related
|
||||
* markup. Text Extension Module.
|
||||
* @note The official XML Schema and DTD specs further divide this into
|
||||
* two modules:
|
||||
* - Block Presentation (hr)
|
||||
* - Inline Presentation (b, big, i, small, sub, sup, tt)
|
||||
* We have chosen not to heed this distinction, as content_sets
|
||||
* provides satisfactory disambiguation.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Presentation';
|
||||
var $elements = array('b', 'big', 'hr', 'i', 'small', 'sub', 'sup', 'tt');
|
||||
var $info = array();
|
||||
var $content_sets = array(
|
||||
'Block' => 'hr',
|
||||
'Inline' => 'b | big | i | small | sub | sup | tt'
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_Presentation() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$element]->attr = array(0 => array('Common'));
|
||||
if ($element == 'hr') {
|
||||
$this->info[$element]->content_model_type = 'empty';
|
||||
} else {
|
||||
$this->info[$element]->content_model = '#PCDATA | Inline';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
27
lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
Normal file
27
lib/htmlpurifier/HTMLPurifier/HTMLModule/StyleAttribute.php
Normal file
|
@ -0,0 +1,27 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
|
||||
* Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'StyleAttribute';
|
||||
var $attr_collections = array(
|
||||
// The inclusion routine differs from the Abstract Modules but
|
||||
// is in line with the DTD and XML Schemas.
|
||||
'Style' => array('style' => false), // see constructor
|
||||
'Core' => array(0 => array('Style'))
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_StyleAttribute() {
|
||||
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
88
lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
Normal file
88
lib/htmlpurifier/HTMLPurifier/HTMLModule/Tables.php
Normal file
|
@ -0,0 +1,88 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Tables Module, fully defines accessible table elements.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Tables';
|
||||
var $elements = array('caption', 'table', 'td', 'th', 'tr', 'col',
|
||||
'colgroup', 'tbody', 'thead', 'tfoot');
|
||||
var $info = array();
|
||||
var $content_sets = array('Block' => 'table');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Tables() {
|
||||
foreach ($this->elements as $e) {
|
||||
$this->info[$e] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$e]->attr = array(0 => array('Common'));
|
||||
$attr =& $this->info[$e]->attr;
|
||||
if ($e == 'caption') continue;
|
||||
if ($e == 'table'){
|
||||
$attr['border'] = 'Pixels';
|
||||
$attr['cellpadding'] = 'Length';
|
||||
$attr['cellspacing'] = 'Length';
|
||||
$attr['frame'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'void', 'above', 'below', 'hsides', 'lhs', 'rhs',
|
||||
'vsides', 'box', 'border'
|
||||
), false);
|
||||
$attr['rules'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'none', 'groups', 'rows', 'cols', 'all'
|
||||
), false);
|
||||
$attr['summary'] = 'Text';
|
||||
$attr['width'] = 'Length';
|
||||
continue;
|
||||
}
|
||||
if ($e == 'col' || $e == 'colgroup') {
|
||||
$attr['span'] = 'Number';
|
||||
$attr['width'] = 'MultiLength';
|
||||
}
|
||||
if ($e == 'td' || $e == 'th') {
|
||||
$attr['abbr'] = 'Text';
|
||||
$attr['colspan'] = 'Number';
|
||||
$attr['rowspan'] = 'Number';
|
||||
}
|
||||
$attr['align'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'left', 'center', 'right', 'justify', 'char'
|
||||
), false);
|
||||
$attr['valign'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'top', 'middle', 'bottom', 'baseline'
|
||||
), false);
|
||||
$attr['charoff'] = 'Length';
|
||||
}
|
||||
$this->info['caption']->content_model = '#PCDATA | Inline';
|
||||
$this->info['caption']->content_model_type = 'optional';
|
||||
|
||||
// Is done directly because it doesn't leverage substitution
|
||||
// mechanisms. True model is:
|
||||
// 'caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))'
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
$this->info['td']->content_model =
|
||||
$this->info['th']->content_model = '#PCDATA | Flow';
|
||||
$this->info['td']->content_model_type =
|
||||
$this->info['th']->content_model_type = 'optional';
|
||||
|
||||
$this->info['tr']->content_model = 'td | th';
|
||||
$this->info['tr']->content_model_type = 'required';
|
||||
|
||||
$this->info['col']->content_model_type = 'empty';
|
||||
|
||||
$this->info['colgroup']->content_model = 'col';
|
||||
$this->info['colgroup']->content_model_type = 'optional';
|
||||
|
||||
$this->info['tbody']->content_model =
|
||||
$this->info['thead']->content_model =
|
||||
$this->info['tfoot']->content_model = 'tr';
|
||||
$this->info['tbody']->content_model_type =
|
||||
$this->info['thead']->content_model_type =
|
||||
$this->info['tfoot']->content_model_type = 'required';
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
78
lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
Normal file
78
lib/htmlpurifier/HTMLPurifier/HTMLModule/Text.php
Normal file
|
@ -0,0 +1,78 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Text Module, defines basic text containers. Core Module.
|
||||
* @note In the normative XML Schema specification, this module
|
||||
* is further abstracted into the following modules:
|
||||
* - Block Phrasal (address, blockquote, pre, h1, h2, h3, h4, h5, h6)
|
||||
* - Block Structural (div, p)
|
||||
* - Inline Phrasal (abbr, acronym, cite, code, dfn, em, kbd, q, samp, strong, var)
|
||||
* - Inline Structural (br, span)
|
||||
* We have elected not to follow suite, but this may change.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Text';
|
||||
|
||||
var $elements = array('abbr', 'acronym', 'address', 'blockquote',
|
||||
'br', 'cite', 'code', 'dfn', 'div', 'em', 'h1', 'h2', 'h3',
|
||||
'h4', 'h5', 'h6', 'kbd', 'p', 'pre', 'q', 'samp', 'span', 'strong',
|
||||
'var', 'nolink', 'tex', 'algebra'); //moodle modification
|
||||
|
||||
var $info = array();
|
||||
|
||||
var $content_sets = array(
|
||||
'Heading' => 'h1 | h2 | h3 | h4 | h5 | h6',
|
||||
'Block' => 'address | blockquote | div | p | pre | nolink | tex | algebra', //moodle modification
|
||||
'Inline' => 'abbr | acronym | br | cite | code | dfn | em | kbd | q | samp | span | strong | var',
|
||||
'Flow' => 'Heading | Block | Inline'
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_Text() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
// attributes
|
||||
if ($element == 'br') {
|
||||
$this->info[$element]->attr = array(0 => array('Core'));
|
||||
} elseif ($element == 'blockquote' || $element == 'q') {
|
||||
$this->info[$element]->attr = array(0 => array('Common'), 'cite' => 'URI');
|
||||
} else {
|
||||
$this->info[$element]->attr = array(0 => array('Common'));
|
||||
}
|
||||
// content models
|
||||
if ($element == 'br') {
|
||||
$this->info[$element]->content_model_type = 'empty';
|
||||
} elseif ($element == 'blockquote') {
|
||||
$this->info[$element]->content_model = 'Heading | Block | List';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} elseif ($element == 'div') {
|
||||
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} else {
|
||||
$this->info[$element]->content_model = '#PCDATA | Inline';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
}
|
||||
}
|
||||
// SGML permits exclusions for all descendants, but this is
|
||||
// not possible with DTDs or XML Schemas. W3C has elected to
|
||||
// use complicated compositions of content_models to simulate
|
||||
// exclusion for children, but we go the simpler, SGML-style
|
||||
// route of flat-out exclusions. Note that the Abstract Module
|
||||
// is blithely unaware of such distinctions.
|
||||
$this->info['pre']->excludes = array_flip(array(
|
||||
'img', 'big', 'small',
|
||||
'object', 'applet', 'font', 'basefont' // generally not allowed
|
||||
));
|
||||
$this->info['p']->auto_close = array_flip(array(
|
||||
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
||||
'table', 'ul', 'nolink', 'tex', 'algebra' //moodle modification
|
||||
));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
108
lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
Normal file
108
lib/htmlpurifier/HTMLPurifier/HTMLModule/TransformToStrict.php
Normal file
|
@ -0,0 +1,108 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
||||
require_once 'HTMLPurifier/TagTransform/Simple.php';
|
||||
require_once 'HTMLPurifier/TagTransform/Center.php';
|
||||
require_once 'HTMLPurifier/TagTransform/Font.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Border.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Length.php';
|
||||
|
||||
/**
|
||||
* Proprietary module that transforms deprecated elements into Strict
|
||||
* HTML (see HTML 4.01 and XHTML 1.0) when possible.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'TransformToStrict';
|
||||
|
||||
// we're actually modifying these elements, not defining them
|
||||
var $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p',
|
||||
'blockquote', 'table', 'td', 'th', 'tr', 'img', 'a', 'hr');
|
||||
|
||||
var $info_tag_transform = array(
|
||||
// placeholders, see constructor for definitions
|
||||
'font' => false,
|
||||
'menu' => false,
|
||||
'dir' => false,
|
||||
'center'=> false
|
||||
);
|
||||
|
||||
var $attr_collections = array(
|
||||
'Lang' => array(
|
||||
'lang' => false // placeholder
|
||||
)
|
||||
);
|
||||
|
||||
var $info_attr_transform_post = array(
|
||||
'lang' => false // placeholder
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_TransformToStrict() {
|
||||
|
||||
// deprecated tag transforms
|
||||
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
||||
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
|
||||
|
||||
foreach ($this->elements as $name) {
|
||||
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$name]->standalone = false;
|
||||
}
|
||||
|
||||
// deprecated attribute transforms
|
||||
$this->info['h1']->attr_transform_pre['align'] =
|
||||
$this->info['h2']->attr_transform_pre['align'] =
|
||||
$this->info['h3']->attr_transform_pre['align'] =
|
||||
$this->info['h4']->attr_transform_pre['align'] =
|
||||
$this->info['h5']->attr_transform_pre['align'] =
|
||||
$this->info['h6']->attr_transform_pre['align'] =
|
||||
$this->info['p'] ->attr_transform_pre['align'] =
|
||||
new HTMLPurifier_AttrTransform_TextAlign();
|
||||
|
||||
// xml:lang <=> lang mirroring, implement in TransformToStrict,
|
||||
// this is overridden in TransformToXHTML11
|
||||
$this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang();
|
||||
$this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang();
|
||||
|
||||
// this should not be applied to XHTML 1.0 Transitional, ONLY
|
||||
// XHTML 1.0 Strict. We may need three classes
|
||||
$this->info['blockquote']->content_model_type = 'strictblockquote';
|
||||
$this->info['blockquote']->child = false; // recalculate please!
|
||||
|
||||
$this->info['table']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['tr']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['td']->attr_transform_pre['bgcolor'] =
|
||||
$this->info['th']->attr_transform_pre['bgcolor'] = new HTMLPurifier_AttrTransform_BgColor();
|
||||
|
||||
$this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border();
|
||||
|
||||
$this->info['img']->attr_transform_pre['name'] =
|
||||
$this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name();
|
||||
|
||||
$this->info['td']->attr_transform_pre['width'] =
|
||||
$this->info['th']->attr_transform_pre['width'] =
|
||||
$this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width');
|
||||
|
||||
$this->info['td']->attr_transform_pre['height'] =
|
||||
$this->info['th']->attr_transform_pre['height'] = new HTMLPurifier_AttrTransform_Length('height');
|
||||
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
function getChildDef($def) {
|
||||
if ($def->content_model_type != 'strictblockquote') return false;
|
||||
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -0,0 +1,30 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Proprietary module that transforms XHTML 1.0 deprecated aspects into
|
||||
* XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
|
||||
* HTMLPurifier_HTMLModule_TransformToStrict must also be loaded
|
||||
* (otherwise, elements that were deprecated from Transitional to Strict
|
||||
* will not be transformed).
|
||||
*
|
||||
* XHTML 1.1 compliant document are automatically XHTML 1.0 compliant too,
|
||||
* although they may not be as friendly to legacy browsers.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'TransformToXHTML11';
|
||||
var $attr_collections = array(
|
||||
'Lang' => array(
|
||||
'lang' => false // remove it
|
||||
)
|
||||
);
|
||||
|
||||
var $info_attr_transform_post = array(
|
||||
'lang' => false // remove it
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
?>
|
558
lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
Normal file
558
lib/htmlpurifier/HTMLPurifier/HTMLModuleManager.php
Normal file
|
@ -0,0 +1,558 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/ElementDef.php';
|
||||
|
||||
require_once 'HTMLPurifier/ContentSets.php';
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
require_once 'HTMLPurifier/AttrCollections.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
|
||||
// W3C modules
|
||||
require_once 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Text.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/List.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Edit.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Bdo.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tables.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Image.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||
|
||||
// proprietary modules
|
||||
require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Doctype', null, 'string/null',
|
||||
'Doctype to use, valid values are HTML 4.01 Transitional, HTML 4.01 '.
|
||||
'Strict, XHTML 1.0 Transitional, XHTML 1.0 Strict, XHTML 1.1. '.
|
||||
'Technically speaking this is not actually a doctype (as it does '.
|
||||
'not identify a corresponding DTD), but we are using this name '.
|
||||
'for sake of simplicity. This will override any older directives '.
|
||||
'like %Core.XHTML or %HTML.Strict.'
|
||||
);
|
||||
|
||||
class HTMLPurifier_HTMLModuleManager
|
||||
{
|
||||
|
||||
/**
|
||||
* Array of HTMLPurifier_Module instances, indexed by module's class name.
|
||||
* All known modules, regardless of use, are in this array.
|
||||
*/
|
||||
var $modules = array();
|
||||
|
||||
/**
|
||||
* String doctype we will validate against. See $validModules for use.
|
||||
*
|
||||
* @note
|
||||
* There is a special doctype '*' that acts both as the "default"
|
||||
* doctype if a customized system only defines one doctype and
|
||||
* also a catch-all doctype that gets merged into all the other
|
||||
* module collections. When possible, use a private collection to
|
||||
* share modules between doctypes: this special doctype is to
|
||||
* make life more convenient for users.
|
||||
*/
|
||||
var $doctype;
|
||||
var $doctypeAliases = array(); /**< Lookup array of strings to real doctypes */
|
||||
|
||||
/**
|
||||
* Associative array: $collections[$type][$doctype] = list of modules.
|
||||
* This is used to logically separate types of functionality so that
|
||||
* based on the doctype and other configuration settings they may
|
||||
* be easily switched and on and off. Custom setups may not need
|
||||
* to use this abstraction, opting to have only one big collection
|
||||
* with one valid doctype.
|
||||
*/
|
||||
var $collections = array();
|
||||
|
||||
/**
|
||||
* Modules that may be used in a valid doctype of this kind.
|
||||
* Correctional and leniency modules should not be placed in this
|
||||
* array unless the user said so: don't stuff every possible lenient
|
||||
* module for this doctype in here.
|
||||
*/
|
||||
var $validModules = array();
|
||||
var $validCollections = array(); /**< Collections to merge into $validModules */
|
||||
|
||||
/**
|
||||
* Modules that we will allow in input, subset of $validModules. Single
|
||||
* element definitions may result in us consulting validModules.
|
||||
*/
|
||||
var $activeModules = array();
|
||||
var $activeCollections = array(); /**< Collections to merge into $activeModules */
|
||||
|
||||
var $counter = 0; /**< Designates next available integer order for modules. */
|
||||
var $initialized = false; /**< Says whether initialize() was called */
|
||||
|
||||
/**
|
||||
* Specifies what doctype to siphon new modules from addModule() to,
|
||||
* or false to disable the functionality. Must be used in conjunction
|
||||
* with $autoCollection.
|
||||
*/
|
||||
var $autoDoctype = false;
|
||||
/**
|
||||
* Specifies what collection to siphon new modules from addModule() to,
|
||||
* or false to disable the functionality. Must be used in conjunction
|
||||
* with $autoCollection.
|
||||
*/
|
||||
var $autoCollection = false;
|
||||
|
||||
/** Associative array of element name to defining modules (always array) */
|
||||
var $elementLookup = array();
|
||||
|
||||
/** List of prefixes we should use for resolving small names */
|
||||
var $prefixes = array('HTMLPurifier_HTMLModule_');
|
||||
|
||||
var $contentSets; /**< Instance of HTMLPurifier_ContentSets */
|
||||
var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */
|
||||
var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
|
||||
|
||||
/**
|
||||
* @param $blank If true, don't do any initializing
|
||||
*/
|
||||
function HTMLPurifier_HTMLModuleManager($blank = false) {
|
||||
|
||||
// the only editable internal object. The rest need to
|
||||
// be manipulated through modules
|
||||
$this->attrTypes = new HTMLPurifier_AttrTypes();
|
||||
|
||||
if (!$blank) $this->initialize();
|
||||
|
||||
}
|
||||
|
||||
function initialize() {
|
||||
$this->initialized = true;
|
||||
|
||||
// load default modules to the recognized modules list (not active)
|
||||
$modules = array(
|
||||
// define
|
||||
'CommonAttributes',
|
||||
'Text', 'Hypertext', 'List', 'Presentation',
|
||||
'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute',
|
||||
// define-redefine
|
||||
'Legacy',
|
||||
// redefine
|
||||
'TransformToStrict', 'TransformToXHTML11'
|
||||
);
|
||||
foreach ($modules as $module) {
|
||||
$this->addModule($module);
|
||||
}
|
||||
|
||||
// Safe modules for supported doctypes. These are included
|
||||
// in the valid and active module lists by default
|
||||
$this->collections['Safe'] = array(
|
||||
'_Common' => array( // leading _ indicates private
|
||||
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||
'StyleAttribute'
|
||||
),
|
||||
// HTML definitions, defer to XHTML definitions
|
||||
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
|
||||
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
|
||||
// XHTML definitions
|
||||
'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ),
|
||||
'XHTML 1.0 Strict' => array(array('_Common')),
|
||||
'XHTML 1.1' => array(array('_Common')),
|
||||
);
|
||||
|
||||
// Modules that specify elements that are unsafe from untrusted
|
||||
// third-parties. These should be registered in $validModules but
|
||||
// almost never $activeModules unless you really know what you're
|
||||
// doing.
|
||||
$this->collections['Unsafe'] = array();
|
||||
|
||||
// Modules to import if lenient mode (attempt to convert everything
|
||||
// to a valid representation) is on. These must not be in $validModules
|
||||
// unless specified so.
|
||||
$this->collections['Lenient'] = array(
|
||||
'HTML 4.01 Strict' => array(array('XHTML 1.0 Strict')),
|
||||
'XHTML 1.0 Strict' => array('TransformToStrict'),
|
||||
'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11')
|
||||
);
|
||||
|
||||
// Modules to import if correctional mode (correct everything that
|
||||
// is feasible to strict mode) is on. These must not be in $validModules
|
||||
// unless specified so.
|
||||
$this->collections['Correctional'] = array(
|
||||
'HTML 4.01 Transitional' => array(array('XHTML 1.0 Transitional')),
|
||||
'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one
|
||||
);
|
||||
|
||||
// User-space modules, custom code or whatever
|
||||
$this->collections['Extension'] = array();
|
||||
|
||||
// setup active versus valid modules. ORDER IS IMPORTANT!
|
||||
// definition modules
|
||||
$this->makeCollectionActive('Safe');
|
||||
$this->makeCollectionValid('Unsafe');
|
||||
// redefinition modules
|
||||
$this->makeCollectionActive('Lenient');
|
||||
$this->makeCollectionActive('Correctional');
|
||||
|
||||
$this->autoDoctype = '*';
|
||||
$this->autoCollection = 'Extension';
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a module to the recognized module list. This does not
|
||||
* do anything else: the module must be added to a corresponding
|
||||
* collection to be "activated".
|
||||
* @param $module Mixed: string module name, with or without
|
||||
* HTMLPurifier_HTMLModule prefix, or instance of
|
||||
* subclass of HTMLPurifier_HTMLModule.
|
||||
*/
|
||||
function addModule($module) {
|
||||
if (is_string($module)) {
|
||||
$original_module = $module;
|
||||
if (!class_exists($module)) {
|
||||
foreach ($this->prefixes as $prefix) {
|
||||
$module = $prefix . $original_module;
|
||||
if (class_exists($module)) break;
|
||||
}
|
||||
}
|
||||
if (!class_exists($module)) {
|
||||
trigger_error($original_module . ' module does not exist',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$module = new $module();
|
||||
}
|
||||
$module->order = $this->counter++; // assign then increment
|
||||
$this->modules[$module->name] = $module;
|
||||
if ($this->autoDoctype !== false && $this->autoCollection !== false) {
|
||||
$this->collections[$this->autoCollection][$this->autoDoctype][] = $module->name;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a collection active, while also making it valid if not
|
||||
* already done so. See $activeModules for the semantics of "active".
|
||||
* @param $collection_name Name of collection to activate
|
||||
*/
|
||||
function makeCollectionActive($collection_name) {
|
||||
if (!in_array($collection_name, $this->validCollections)) {
|
||||
$this->makeCollectionValid($collection_name);
|
||||
}
|
||||
$this->activeCollections[] = $collection_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a collection valid. See $validModules for the semantics of "valid"
|
||||
*/
|
||||
function makeCollectionValid($collection_name) {
|
||||
$this->validCollections[] = $collection_name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a class prefix that addModule() will use to resolve a
|
||||
* string name to a concrete class
|
||||
*/
|
||||
function addPrefix($prefix) {
|
||||
$this->prefixes[] = (string) $prefix;
|
||||
}
|
||||
|
||||
function setup($config) {
|
||||
|
||||
// load up the autocollection
|
||||
if ($this->autoCollection !== false) {
|
||||
$this->makeCollectionActive($this->autoCollection);
|
||||
}
|
||||
|
||||
// retrieve the doctype
|
||||
$this->doctype = $this->getDoctype($config);
|
||||
if (isset($this->doctypeAliases[$this->doctype])) {
|
||||
$this->doctype = $this->doctypeAliases[$this->doctype];
|
||||
}
|
||||
|
||||
// process module collections to module name => module instance form
|
||||
foreach ($this->collections as $col_i => $x) {
|
||||
$this->processCollections($this->collections[$col_i]);
|
||||
}
|
||||
|
||||
$this->validModules = $this->assembleModules($this->validCollections);
|
||||
$this->activeModules = $this->assembleModules($this->activeCollections);
|
||||
|
||||
// setup lookup table based on all valid modules
|
||||
foreach ($this->validModules as $module) {
|
||||
foreach ($module->info as $name => $def) {
|
||||
if (!isset($this->elementLookup[$name])) {
|
||||
$this->elementLookup[$name] = array();
|
||||
}
|
||||
$this->elementLookup[$name][] = $module->name;
|
||||
}
|
||||
}
|
||||
|
||||
// note the different choice
|
||||
$this->contentSets = new HTMLPurifier_ContentSets(
|
||||
// content models that contain non-allowed elements are
|
||||
// harmless because RemoveForeignElements will ensure
|
||||
// they never get in anyway, and there is usually no
|
||||
// reason why you should want to restrict a content
|
||||
// model beyond what is mandated by the doctype.
|
||||
// Note, however, that this means redefinitions of
|
||||
// content models can't be tossed in validModels willy-nilly:
|
||||
// that stuff still is regulated by configuration.
|
||||
$this->validModules
|
||||
);
|
||||
$this->attrCollections = new HTMLPurifier_AttrCollections(
|
||||
$this->attrTypes,
|
||||
// only explicitly allowed modules are allowed to affect
|
||||
// the global attribute collections. This mean's there's
|
||||
// a distinction between loading the Bdo module, and the
|
||||
// bdo element: Bdo will enable the dir attribute on all
|
||||
// elements, while bdo will only define the bdo element,
|
||||
// which will not have an editable directionality. This might
|
||||
// catch people who are loading only elements by surprise, so
|
||||
// we should consider loading an entire module if all the
|
||||
// elements it defines are requested by the user, especially
|
||||
// if it affects the global attribute collections.
|
||||
$this->activeModules
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of collections and merges together all the defined
|
||||
* modules for the current doctype from those collections.
|
||||
* @param $collections List of collection suffixes we should grab
|
||||
* modules from (like 'Safe' or 'Lenient')
|
||||
*/
|
||||
function assembleModules($collections) {
|
||||
$modules = array();
|
||||
$numOfCollectionsUsed = 0;
|
||||
foreach ($collections as $name) {
|
||||
$disable_global = false;
|
||||
if (!isset($this->collections[$name])) {
|
||||
trigger_error("$name collection is undefined", E_USER_ERROR);
|
||||
continue;
|
||||
}
|
||||
$cols = $this->collections[$name];
|
||||
if (isset($cols[$this->doctype])) {
|
||||
if (isset($cols[$this->doctype]['*'])) {
|
||||
unset($cols[$this->doctype]['*']);
|
||||
$disable_global = true;
|
||||
}
|
||||
$modules += $cols[$this->doctype];
|
||||
$numOfCollectionsUsed++;
|
||||
}
|
||||
// accept catch-all doctype
|
||||
if (
|
||||
$this->doctype !== '*' &&
|
||||
isset($cols['*']) &&
|
||||
!$disable_global
|
||||
) {
|
||||
$modules += $cols['*'];
|
||||
}
|
||||
}
|
||||
|
||||
if ($numOfCollectionsUsed < 1) {
|
||||
// possible XSS injection if user-specified doctypes
|
||||
// are allowed
|
||||
trigger_error("Doctype {$this->doctype} does not exist, ".
|
||||
"check for typos (if you desire a doctype that allows ".
|
||||
"no elements, use an empty array collection)", E_USER_ERROR);
|
||||
}
|
||||
return $modules;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a collection and performs inclusions and substitutions for it.
|
||||
* @param $cols Reference to collections class member variable
|
||||
*/
|
||||
function processCollections(&$cols) {
|
||||
|
||||
// $cols is the set of collections
|
||||
// $col_i is the name (index) of a collection
|
||||
// $col is a collection/list of modules
|
||||
|
||||
// perform inclusions
|
||||
foreach ($cols as $col_i => $col) {
|
||||
$seen = array();
|
||||
if (!empty($col[0]) && is_array($col[0])) {
|
||||
$seen[$col_i] = true; // recursion reporting
|
||||
$includes = $col[0];
|
||||
unset($cols[$col_i][0]); // remove inclusions value, recursion guard
|
||||
} else {
|
||||
$includes = array();
|
||||
}
|
||||
if (empty($includes)) continue;
|
||||
for ($i = 0; isset($includes[$i]); $i++) {
|
||||
$inc = $includes[$i];
|
||||
if (isset($seen[$inc])) {
|
||||
trigger_error(
|
||||
"Circular inclusion detected in $col_i collection",
|
||||
E_USER_ERROR
|
||||
);
|
||||
continue;
|
||||
} else {
|
||||
$seen[$inc] = true;
|
||||
}
|
||||
if (!isset($cols[$inc])) {
|
||||
trigger_error(
|
||||
"Collection $col_i tried to include undefined ".
|
||||
"collection $inc", E_USER_ERROR);
|
||||
continue;
|
||||
}
|
||||
foreach ($cols[$inc] as $module) {
|
||||
if (is_array($module)) { // another inclusion!
|
||||
foreach ($module as $inc2) $includes[] = $inc2;
|
||||
continue;
|
||||
}
|
||||
$cols[$col_i][] = $module; // merge in the other modules
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// replace with real modules, invert module from list to
|
||||
// assoc array of module name to module instance
|
||||
foreach ($cols as $col_i => $col) {
|
||||
$ignore_global = false;
|
||||
$order = array();
|
||||
foreach ($col as $module_i => $module) {
|
||||
unset($cols[$col_i][$module_i]);
|
||||
if (is_array($module)) {
|
||||
trigger_error("Illegal inclusion array at index".
|
||||
" $module_i found collection $col_i, inclusion".
|
||||
" arrays must be at start of collection (index 0)",
|
||||
E_USER_ERROR);
|
||||
continue;
|
||||
}
|
||||
if ($module_i === '*' && $module === false) {
|
||||
$ignore_global = true;
|
||||
continue;
|
||||
}
|
||||
if (!isset($this->modules[$module])) {
|
||||
trigger_error(
|
||||
"Collection $col_i references undefined ".
|
||||
"module $module",
|
||||
E_USER_ERROR
|
||||
);
|
||||
continue;
|
||||
}
|
||||
$module = $this->modules[$module];
|
||||
$cols[$col_i][$module->name] = $module;
|
||||
$order[$module->name] = $module->order;
|
||||
}
|
||||
array_multisort(
|
||||
$order, SORT_ASC, SORT_NUMERIC, $cols[$col_i]
|
||||
);
|
||||
if ($ignore_global) $cols[$col_i]['*'] = false;
|
||||
}
|
||||
|
||||
// delete pseudo-collections
|
||||
foreach ($cols as $col_i => $col) {
|
||||
if ($col_i[0] == '_') unset($cols[$col_i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the doctype from the configuration object
|
||||
*/
|
||||
function getDoctype($config) {
|
||||
$doctype = $config->get('HTML', 'Doctype');
|
||||
if ($doctype !== null) {
|
||||
return $doctype;
|
||||
}
|
||||
if (!$this->initialized) {
|
||||
// don't do HTML-oriented backwards compatibility stuff
|
||||
// use either the auto-doctype, or the catch-all doctype
|
||||
return $this->autoDoctype ? $this->autoDoctype : '*';
|
||||
}
|
||||
// this is backwards-compatibility stuff
|
||||
if ($config->get('Core', 'XHTML')) {
|
||||
$doctype = 'XHTML 1.0';
|
||||
} else {
|
||||
$doctype = 'HTML 4.01';
|
||||
}
|
||||
if ($config->get('HTML', 'Strict')) {
|
||||
$doctype .= ' Strict';
|
||||
} else {
|
||||
$doctype .= ' Transitional';
|
||||
}
|
||||
return $doctype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves merged element definitions for all active elements.
|
||||
* @note We may want to generate an elements array during setup
|
||||
* and pass that on, because a specific combination of
|
||||
* elements may trigger the loading of a module.
|
||||
* @param $config Instance of HTMLPurifier_Config, for determining
|
||||
* stray elements.
|
||||
*/
|
||||
function getElements($config) {
|
||||
|
||||
$elements = array();
|
||||
foreach ($this->activeModules as $module) {
|
||||
foreach ($module->elements as $name) {
|
||||
$elements[$name] = $this->getElement($name, $config);
|
||||
}
|
||||
}
|
||||
|
||||
// standalone elements now loaded
|
||||
|
||||
return $elements;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a single merged element definition
|
||||
* @param $name Name of element
|
||||
* @param $config Instance of HTMLPurifier_Config, may not be necessary.
|
||||
*/
|
||||
function getElement($name, $config) {
|
||||
|
||||
$def = false;
|
||||
|
||||
$modules = $this->validModules;
|
||||
|
||||
if (!isset($this->elementLookup[$name])) {
|
||||
return false;
|
||||
}
|
||||
|
||||
foreach($this->elementLookup[$name] as $module_name) {
|
||||
|
||||
$module = $modules[$module_name];
|
||||
$new_def = $module->info[$name];
|
||||
|
||||
if (!$def && $new_def->standalone) {
|
||||
$def = $new_def;
|
||||
} elseif ($def) {
|
||||
$def->mergeIn($new_def);
|
||||
} else {
|
||||
// could "save it for another day":
|
||||
// non-standalone definitions that don't have a standalone
|
||||
// to merge into could be deferred to the end
|
||||
continue;
|
||||
}
|
||||
|
||||
// attribute value expansions
|
||||
$this->attrCollections->performInclusions($def->attr);
|
||||
$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
|
||||
|
||||
// descendants_are_inline, for ChildDef_Chameleon
|
||||
if (is_string($def->content_model) &&
|
||||
strpos($def->content_model, 'Inline') !== false) {
|
||||
if ($name != 'del' && $name != 'ins') {
|
||||
// this is for you, ins/del
|
||||
$def->descendants_are_inline = true;
|
||||
}
|
||||
}
|
||||
|
||||
$this->contentSets->generateChildDef($def, $module);
|
||||
}
|
||||
|
||||
return $def;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
42
lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
Normal file
42
lib/htmlpurifier/HTMLPurifier/IDAccumulator.php
Normal file
|
@ -0,0 +1,42 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
||||
* @note In Slashdot-speak, dupe means duplicate.
|
||||
* @note This class does not accept $config or $context, thus, it is the
|
||||
* burden of the callee to register the appropriate errors or
|
||||
* configuration.
|
||||
*/
|
||||
class HTMLPurifier_IDAccumulator
|
||||
{
|
||||
|
||||
/**
|
||||
* Lookup table of IDs we've accumulated.
|
||||
* @public
|
||||
*/
|
||||
var $ids = array();
|
||||
|
||||
/**
|
||||
* Add an ID to the lookup table.
|
||||
* @param $id ID to be added.
|
||||
* @return Bool status, true if success, false if there's a dupe
|
||||
*/
|
||||
function add($id) {
|
||||
if (isset($this->ids[$id])) return false;
|
||||
return $this->ids[$id] = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a list of IDs into the lookup table
|
||||
* @param $array_of_ids Array of IDs to load
|
||||
* @note This function doesn't care about duplicates
|
||||
*/
|
||||
function load($array_of_ids) {
|
||||
foreach ($array_of_ids as $id) {
|
||||
$this->ids[$id] = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
56
lib/htmlpurifier/HTMLPurifier/Language.php
Normal file
56
lib/htmlpurifier/HTMLPurifier/Language.php
Normal file
|
@ -0,0 +1,56 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
|
||||
class HTMLPurifier_Language
|
||||
{
|
||||
|
||||
/**
|
||||
* ISO 639 language code of language. Prefers shortest possible version
|
||||
*/
|
||||
var $code = 'en';
|
||||
|
||||
/**
|
||||
* Fallback language code
|
||||
*/
|
||||
var $fallback = false;
|
||||
|
||||
/**
|
||||
* Array of localizable messages
|
||||
*/
|
||||
var $messages = array();
|
||||
|
||||
/**
|
||||
* Has the language object been loaded yet?
|
||||
* @private
|
||||
*/
|
||||
var $_loaded = false;
|
||||
|
||||
/**
|
||||
* Loads language object with necessary info from factory cache
|
||||
* @note This is a lazy loader
|
||||
*/
|
||||
function load() {
|
||||
if ($this->_loaded) return;
|
||||
$factory = HTMLPurifier_LanguageFactory::instance();
|
||||
$factory->loadLanguage($this->code);
|
||||
foreach ($factory->keys as $key) {
|
||||
$this->$key = $factory->cache[$this->code][$key];
|
||||
}
|
||||
$this->_loaded = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a localised message. Does not perform any operations.
|
||||
* @param $key string identifier of message
|
||||
* @return string localised message
|
||||
*/
|
||||
function getMessage($key) {
|
||||
if (!$this->_loaded) $this->load();
|
||||
if (!isset($this->messages[$key])) return '';
|
||||
return $this->messages[$key];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
12
lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
Normal file
12
lib/htmlpurifier/HTMLPurifier/Language/classes/en-x-test.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
// private class for unit testing
|
||||
|
||||
class HTMLPurifier_Language_en_x_test extends HTMLPurifier_Language
|
||||
{
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
|
||||
// private language message file for unit testing purposes
|
||||
|
||||
$fallback = 'en';
|
||||
|
||||
$messages = array(
|
||||
'htmlpurifier' => 'HTML Purifier X'
|
||||
);
|
||||
|
||||
?>
|
12
lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
Normal file
12
lib/htmlpurifier/HTMLPurifier/Language/messages/en.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
$fallback = false;
|
||||
|
||||
$messages = array(
|
||||
|
||||
'htmlpurifier' => 'HTML Purifier',
|
||||
'pizza' => 'Pizza', // for unit testing purposes
|
||||
|
||||
);
|
||||
|
||||
?>
|
196
lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
Normal file
196
lib/htmlpurifier/HTMLPurifier/LanguageFactory.php
Normal file
|
@ -0,0 +1,196 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Language.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||
|
||||
/**
|
||||
* Class responsible for generating HTMLPurifier_Language objects, managing
|
||||
* caching and fallbacks.
|
||||
* @note Thanks to MediaWiki for the general logic, although this version
|
||||
* has been entirely rewritten
|
||||
*/
|
||||
class HTMLPurifier_LanguageFactory
|
||||
{
|
||||
|
||||
/**
|
||||
* Cache of language code information used to load HTMLPurifier_Language objects
|
||||
* Structure is: $factory->cache[$language_code][$key] = $value
|
||||
* @value array map
|
||||
*/
|
||||
var $cache;
|
||||
|
||||
/**
|
||||
* Valid keys in the HTMLPurifier_Language object. Designates which
|
||||
* variables to slurp out of a message file.
|
||||
* @value array list
|
||||
*/
|
||||
var $keys = array('fallback', 'messages');
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
|
||||
* @value object HTMLPurifier_AttrDef_Lang
|
||||
*/
|
||||
var $validator;
|
||||
|
||||
/**
|
||||
* Cached copy of dirname(__FILE__), directory of current file without
|
||||
* trailing slash
|
||||
* @value string filename
|
||||
*/
|
||||
var $dir;
|
||||
|
||||
/**
|
||||
* Keys whose contents are a hash map and can be merged
|
||||
* @value array lookup
|
||||
*/
|
||||
var $mergeable_keys_map = array('messages' => true);
|
||||
|
||||
/**
|
||||
* Keys whose contents are a list and can be merged
|
||||
* @value array lookup
|
||||
*/
|
||||
var $mergeable_keys_list = array();
|
||||
|
||||
/**
|
||||
* Retrieve sole instance of the factory.
|
||||
* @static
|
||||
* @param $prototype Optional prototype to overload sole instance with,
|
||||
* or bool true to reset to default factory.
|
||||
*/
|
||||
function &instance($prototype = null) {
|
||||
static $instance = null;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
} elseif ($instance === null || $prototype == true) {
|
||||
$instance = new HTMLPurifier_LanguageFactory();
|
||||
$instance->setup();
|
||||
}
|
||||
return $instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up the singleton, much like a constructor
|
||||
* @note Prevents people from getting this outside of the singleton
|
||||
*/
|
||||
function setup() {
|
||||
$this->validator = new HTMLPurifier_AttrDef_Lang();
|
||||
$this->dir = dirname(__FILE__);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a language object, handles class fallbacks
|
||||
* @param $code string language code
|
||||
*/
|
||||
function create($code) {
|
||||
|
||||
$config = $context = false; // hope it doesn't use these!
|
||||
$code = $this->validator->validate($code, $config, $context);
|
||||
if ($code === false) $code = 'en'; // malformed code becomes English
|
||||
|
||||
$pcode = str_replace('-', '_', $code); // make valid PHP classname
|
||||
static $depth = 0; // recursion protection
|
||||
|
||||
if ($code == 'en') {
|
||||
$class = 'HTMLPurifier_Language';
|
||||
$file = $this->dir . '/Language.php';
|
||||
} else {
|
||||
$class = 'HTMLPurifier_Language_' . $pcode;
|
||||
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
||||
// PHP5/APC deps bug workaround can go here
|
||||
// you can bypass the conditional include by loading the
|
||||
// file yourself
|
||||
if (file_exists($file) && !class_exists($class)) {
|
||||
include_once $file;
|
||||
}
|
||||
}
|
||||
|
||||
if (!class_exists($class)) {
|
||||
// go fallback
|
||||
$fallback = HTMLPurifier_Language::getFallbackFor($code);
|
||||
$depth++;
|
||||
$lang = Language::factory( $fallback );
|
||||
$depth--;
|
||||
} else {
|
||||
$lang = new $class;
|
||||
}
|
||||
$lang->code = $code;
|
||||
|
||||
return $lang;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the fallback language for language
|
||||
* @note Loads the original language into cache
|
||||
* @param $code string language code
|
||||
*/
|
||||
function getFallbackFor($code) {
|
||||
$this->loadLanguage($code);
|
||||
return $this->cache[$code]['fallback'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads language into the cache, handles message file and fallbacks
|
||||
* @param $code string language code
|
||||
*/
|
||||
function loadLanguage($code) {
|
||||
static $languages_seen = array(); // recursion guard
|
||||
|
||||
// abort if we've already loaded it
|
||||
if (isset($this->cache[$code])) return;
|
||||
|
||||
// generate filename
|
||||
$filename = $this->dir . '/Language/messages/' . $code . '.php';
|
||||
|
||||
// default fallback : may be overwritten by the ensuing include
|
||||
$fallback = ($code != 'en') ? 'en' : false;
|
||||
|
||||
// load primary localisation
|
||||
if (!file_exists($filename)) {
|
||||
// skip the include: will rely solely on fallback
|
||||
$filename = $this->dir . '/Language/messages/en.php';
|
||||
$cache = array();
|
||||
} else {
|
||||
include $filename;
|
||||
$cache = compact($this->keys);
|
||||
}
|
||||
|
||||
// load fallback localisation
|
||||
if (!empty($fallback)) {
|
||||
|
||||
// infinite recursion guard
|
||||
if (isset($languages_seen[$code])) {
|
||||
trigger_error('Circular fallback reference in language ' .
|
||||
$code, E_USER_ERROR);
|
||||
$fallback = 'en';
|
||||
}
|
||||
$language_seen[$code] = true;
|
||||
|
||||
// load the fallback recursively
|
||||
$this->loadLanguage($fallback);
|
||||
$fallback_cache = $this->cache[$fallback];
|
||||
|
||||
// merge fallback with current language
|
||||
foreach ( $this->keys as $key ) {
|
||||
if (isset($cache[$key]) && isset($fallback_cache[$key])) {
|
||||
if (isset($this->mergeable_keys_map[$key])) {
|
||||
$cache[$key] = $cache[$key] + $fallback_cache[$key];
|
||||
} elseif (isset($this->mergeable_keys_list[$key])) {
|
||||
$cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] );
|
||||
}
|
||||
} else {
|
||||
$cache[$key] = $fallback_cache[$key];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// save to cache for later retrieval
|
||||
$this->cache[$code] = $cache;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
237
lib/htmlpurifier/HTMLPurifier/Lexer.php
Normal file
237
lib/htmlpurifier/HTMLPurifier/Lexer.php
Normal file
|
@ -0,0 +1,237 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
require_once 'HTMLPurifier/EntityParser.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
||||
'This parameter determines whether or not the filter should accept full '.
|
||||
'HTML documents, not just HTML fragments. When on, it will '.
|
||||
'drop all sections except the content between body.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
* A lexer parses a string of SGML-style markup and converts them into
|
||||
* corresponding tokens. It doesn't check for well-formedness, although its
|
||||
* internal mechanism may make this automatic (such as the case of
|
||||
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
|
||||
* from.
|
||||
*
|
||||
* A lexer is HTML-oriented: it might work with XML, but it's not
|
||||
* recommended, as we adhere to a subset of the specification for optimization
|
||||
* reasons.
|
||||
*
|
||||
* This class should not be directly instantiated, but you may use create() to
|
||||
* retrieve a default copy of the lexer. Being a supertype, this class
|
||||
* does not actually define any implementation, but offers commonly used
|
||||
* convenience functions for subclasses.
|
||||
*
|
||||
* @note The unit tests will instantiate this class for testing purposes, as
|
||||
* many of the utility functions require a class to be instantiated.
|
||||
* Be careful when porting this class to PHP 5.
|
||||
*
|
||||
* @par
|
||||
*
|
||||
* @note
|
||||
* We use tokens rather than create a DOM representation because DOM would:
|
||||
*
|
||||
* @par
|
||||
* -# Require more processing power to create,
|
||||
* -# Require recursion to iterate,
|
||||
* -# Must be compatible with PHP 5's DOM (otherwise duplication),
|
||||
* -# Has the entire document structure (html and body not needed), and
|
||||
* -# Has unknown readability improvement.
|
||||
*
|
||||
* @par
|
||||
* What the last item means is that the functions for manipulating tokens are
|
||||
* already fairly compact, and when well-commented, more abstraction may not
|
||||
* be needed.
|
||||
*
|
||||
* @see HTMLPurifier_Token
|
||||
*/
|
||||
class HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
function HTMLPurifier_Lexer() {
|
||||
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Most common entity to raw value conversion table for special entities.
|
||||
* @protected
|
||||
*/
|
||||
var $_special_entity2str =
|
||||
array(
|
||||
'"' => '"',
|
||||
'&' => '&',
|
||||
'<' => '<',
|
||||
'>' => '>',
|
||||
''' => "'",
|
||||
''' => "'",
|
||||
''' => "'"
|
||||
);
|
||||
|
||||
/**
|
||||
* Parses special entities into the proper characters.
|
||||
*
|
||||
* This string will translate escaped versions of the special characters
|
||||
* into the correct ones.
|
||||
*
|
||||
* @warning
|
||||
* You should be able to treat the output of this function as
|
||||
* completely parsed, but that's only because all other entities should
|
||||
* have been handled previously in substituteNonSpecialEntities()
|
||||
*
|
||||
* @param $string String character data to be parsed.
|
||||
* @returns Parsed character data.
|
||||
*/
|
||||
function parseData($string) {
|
||||
|
||||
// following functions require at least one character
|
||||
if ($string === '') return '';
|
||||
|
||||
// subtracts amps that cannot possibly be escaped
|
||||
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
|
||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||
|
||||
if (!$num_amp) return $string; // abort if no entities
|
||||
$num_esc_amp = substr_count($string, '&');
|
||||
$string = strtr($string, $this->_special_entity2str);
|
||||
|
||||
// code duplication for sake of optimization, see above
|
||||
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
|
||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||
|
||||
if ($num_amp_2 <= $num_esc_amp) return $string;
|
||||
|
||||
// hmm... now we have some uncommon entities. Use the callback.
|
||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lexes an HTML string into tokens.
|
||||
*
|
||||
* @param $string String HTML.
|
||||
* @return HTMLPurifier_Token array representation of HTML.
|
||||
*/
|
||||
function tokenizeHTML($string, $config, &$context) {
|
||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves or sets the default Lexer as a Prototype Factory.
|
||||
*
|
||||
* Depending on what PHP version you are running, the abstract base
|
||||
* Lexer class will determine which concrete Lexer is best for you:
|
||||
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
|
||||
* for PHP 5 and beyond.
|
||||
*
|
||||
* Passing the optional prototype lexer parameter will override the
|
||||
* default with your own implementation. A copy/reference of the prototype
|
||||
* lexer will now be returned when you request a new lexer.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @note
|
||||
* Though it is possible to call this factory method from subclasses,
|
||||
* such usage is not recommended.
|
||||
*
|
||||
* @param $prototype Optional prototype lexer.
|
||||
* @return Concrete lexer.
|
||||
*/
|
||||
function create($prototype = null) {
|
||||
// we don't really care if it's a reference or a copy
|
||||
static $lexer = null;
|
||||
if ($prototype) {
|
||||
$lexer = $prototype;
|
||||
}
|
||||
if (empty($lexer)) {
|
||||
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
|
||||
class_exists('DOMDocument')) { // check for DOM support
|
||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
$lexer = new HTMLPurifier_Lexer_DOMLex();
|
||||
} else {
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
}
|
||||
}
|
||||
return $lexer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translates CDATA sections into regular sections (through escaping).
|
||||
*
|
||||
* @static
|
||||
* @protected
|
||||
* @param $string HTML string to process.
|
||||
* @returns HTML with CDATA sections escaped.
|
||||
*/
|
||||
function escapeCDATA($string) {
|
||||
return preg_replace_callback(
|
||||
'/<!\[CDATA\[(.+?)\]\]>/',
|
||||
array('HTMLPurifier_Lexer', 'CDATACallback'),
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for escapeCDATA() that does the work.
|
||||
*
|
||||
* @static
|
||||
* @warning Though this is public in order to let the callback happen,
|
||||
* calling it directly is not recommended.
|
||||
* @params $matches PCRE matches array, with index 0 the entire match
|
||||
* and 1 the inside of the CDATA section.
|
||||
* @returns Escaped internals of the CDATA section.
|
||||
*/
|
||||
function CDATACallback($matches) {
|
||||
// not exactly sure why the character set is needed, but whatever
|
||||
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a piece of HTML and normalizes it by converting entities, fixing
|
||||
* encoding, extracting bits, and other good stuff.
|
||||
*/
|
||||
function normalize($html, $config, &$context) {
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||
$html = $this->extractBody($html);
|
||||
}
|
||||
|
||||
// escape CDATA
|
||||
$html = $this->escapeCDATA($html);
|
||||
|
||||
// expand entities that aren't the big five
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
|
||||
// clean into wellformed UTF-8 string for an SGML context: this has
|
||||
// to be done after entity expansion because the entities sometimes
|
||||
// represent non-SGML characters (horror, horror!)
|
||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a string of HTML (fragment or document) and returns the content
|
||||
*/
|
||||
function extractBody($html) {
|
||||
$matches = array();
|
||||
$result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
|
||||
if ($result) {
|
||||
return $matches[1];
|
||||
} else {
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
152
lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
Normal file
152
lib/htmlpurifier/HTMLPurifier/Lexer/DOMLex.php
Normal file
|
@ -0,0 +1,152 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/TokenFactory.php';
|
||||
|
||||
/**
|
||||
* Parser that uses PHP 5's DOM extension (part of the core).
|
||||
*
|
||||
* In PHP 5, the DOM XML extension was revamped into DOM and added to the core.
|
||||
* It gives us a forgiving HTML parser, which we use to transform the HTML
|
||||
* into a DOM, and then into the tokens. It is blazingly fast (for large
|
||||
* documents, it performs twenty times faster than
|
||||
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
||||
*
|
||||
* @note Any empty elements will have empty tokens associated with them, even if
|
||||
* this is prohibited by the spec. This is cannot be fixed until the spec
|
||||
* comes into play.
|
||||
*
|
||||
* @note PHP's DOM extension does not actually parse any entities, we use
|
||||
* our own function to do that.
|
||||
*
|
||||
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||
* If this is a huge problem, due to the fact that HTML is hand
|
||||
* edited and you are unable to get a parser cache that caches the
|
||||
* the output of HTML Purifier while keeping the original HTML lying
|
||||
* around, you may want to run Tidy on the resulting output or use
|
||||
* HTMLPurifier_DirectLex
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
private $factory;
|
||||
|
||||
public function __construct() {
|
||||
// setup the factory
|
||||
parent::HTMLPurifier_Lexer();
|
||||
$this->factory = new HTMLPurifier_TokenFactory();
|
||||
}
|
||||
|
||||
public function tokenizeHTML($string, $config, &$context) {
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
|
||||
// preprocess string, essential for UTF-8
|
||||
$string =
|
||||
'<!DOCTYPE html '.
|
||||
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
|
||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
|
||||
'<html><head>'.
|
||||
'<meta http-equiv="Content-Type" content="text/html;'.
|
||||
' charset=utf-8" />'.
|
||||
'</head><body><div>'.$string.'</div></body></html>';
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->encoding = 'UTF-8'; // technically does nothing, but whatever
|
||||
|
||||
// DOM will toss errors if the HTML its parsing has really big
|
||||
// problems, so we're going to mute them. This can cause problems
|
||||
// if a custom error handler that doesn't implement error_reporting
|
||||
// is set, as noted by a Drupal plugin of HTML Purifier. Consider
|
||||
// making our own error reporter to temporarily load in
|
||||
@$doc->loadHTML($string);
|
||||
|
||||
$tokens = array();
|
||||
$this->tokenizeDOM(
|
||||
$doc->getElementsByTagName('html')->item(0)-> // html
|
||||
getElementsByTagName('body')->item(0)-> // body
|
||||
getElementsByTagName('div')->item(0) // div
|
||||
, $tokens);
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive function that tokenizes a node, putting it into an accumulator.
|
||||
*
|
||||
* @param $node DOMNode to be tokenized.
|
||||
* @param $tokens Array-list of already tokenized tokens.
|
||||
* @param $collect Says whether or start and close are collected, set to
|
||||
* false at first recursion because it's the implicit DIV
|
||||
* tag you're dealing with.
|
||||
* @returns Tokens of node appended to previously passed tokens.
|
||||
*/
|
||||
protected function tokenizeDOM($node, &$tokens, $collect = false) {
|
||||
// recursive goodness!
|
||||
|
||||
// intercept non element nodes. WE MUST catch all of them,
|
||||
// but we're not getting the character reference nodes because
|
||||
// those should have been preprocessed
|
||||
if ($node->nodeType === XML_TEXT_NODE ||
|
||||
$node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
$tokens[] = $this->factory->createText($node->data);
|
||||
return;
|
||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||
$tokens[] = $this->factory->createComment($node->data);
|
||||
return;
|
||||
} elseif (
|
||||
// not-well tested: there may be other nodes we have to grab
|
||||
$node->nodeType !== XML_ELEMENT_NODE
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
$attr = $node->hasAttributes() ?
|
||||
$this->transformAttrToAssoc($node->attributes) :
|
||||
array();
|
||||
|
||||
// We still have to make sure that the element actually IS empty
|
||||
if (!$node->childNodes->length) {
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
|
||||
}
|
||||
} else {
|
||||
if ($collect) { // don't wrap on first iteration
|
||||
$tokens[] = $this->factory->createStart(
|
||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||
$attr
|
||||
);
|
||||
}
|
||||
foreach ($node->childNodes as $node) {
|
||||
// remember, it's an accumulator. Otherwise, we'd have
|
||||
// to use array_merge
|
||||
$this->tokenizeDOM($node, $tokens, true);
|
||||
}
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEnd($tag_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
|
||||
*
|
||||
* @param $attribute_list DOMNamedNodeMap of DOMAttr objects.
|
||||
* @returns Associative array of attributes.
|
||||
*/
|
||||
protected function transformAttrToAssoc($node_map) {
|
||||
// NamedNodeMap is documented very well, so we're using undocumented
|
||||
// features, namely, the fact that it implements Iterator and
|
||||
// has a ->length attribute
|
||||
if ($node_map->length === 0) return array();
|
||||
$array = array();
|
||||
foreach ($node_map as $attr) {
|
||||
$array[$attr->name] = $attr->value;
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
309
lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
Normal file
309
lib/htmlpurifier/HTMLPurifier/Lexer/DirectLex.php
Normal file
|
@ -0,0 +1,309 @@
|
|||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
/**
|
||||
* Our in-house implementation of a parser.
|
||||
*
|
||||
* A pure PHP parser, DirectLex has absolutely no dependencies, making
|
||||
* it a reasonably good default for PHP4. Written with efficiency in mind,
|
||||
* it can be four times faster than HTMLPurifier_Lexer_PEARSax3, although it
|
||||
* pales in comparison to HTMLPurifier_Lexer_DOMLex. It will support UTF-8
|
||||
* completely eventually.
|
||||
*
|
||||
* @todo Reread XML spec and document differences.
|
||||
*
|
||||
* @todo Determine correct behavior in transforming comment data. (preserve dashes?)
|
||||
*/
|
||||
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
/**
|
||||
* Whitespace characters for str(c)spn.
|
||||
* @protected
|
||||
*/
|
||||
var $_whitespace = "\x20\x09\x0D\x0A";
|
||||
|
||||
function tokenizeHTML($html, $config, &$context) {
|
||||
|
||||
$html = $this->normalize($html, $config, $context);
|
||||
|
||||
$cursor = 0; // our location in the text
|
||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||
$array = array(); // result array
|
||||
|
||||
// infinite loop protection
|
||||
// has to be pretty big, since html docs can be big
|
||||
// we're allow two hundred thousand tags... more than enough?
|
||||
$loops = 0;
|
||||
|
||||
while(true) {
|
||||
|
||||
// infinite loop protection
|
||||
if (++$loops > 200000) return array();
|
||||
|
||||
$position_next_lt = strpos($html, '<', $cursor);
|
||||
$position_next_gt = strpos($html, '>', $cursor);
|
||||
|
||||
// triggers on "<b>asdf</b>" but not "asdf <b></b>"
|
||||
if ($position_next_lt === $cursor) {
|
||||
$inside_tag = true;
|
||||
$cursor++;
|
||||
}
|
||||
|
||||
if (!$inside_tag && $position_next_lt !== false) {
|
||||
// We are not inside tag and there still is another tag to parse
|
||||
$array[] = new
|
||||
HTMLPurifier_Token_Text(
|
||||
$this->parseData(
|
||||
substr(
|
||||
$html, $cursor, $position_next_lt - $cursor
|
||||
)
|
||||
)
|
||||
);
|
||||
$cursor = $position_next_lt + 1;
|
||||
$inside_tag = true;
|
||||
continue;
|
||||
} elseif (!$inside_tag) {
|
||||
// We are not inside tag but there are no more tags
|
||||
// If we're already at the end, break
|
||||
if ($cursor === strlen($html)) break;
|
||||
// Create Text of rest of string
|
||||
$array[] = new
|
||||
HTMLPurifier_Token_Text(
|
||||
$this->parseData(
|
||||
substr(
|
||||
$html, $cursor
|
||||
)
|
||||
)
|
||||
);
|
||||
break;
|
||||
} elseif ($inside_tag && $position_next_gt !== false) {
|
||||
// We are in tag and it is well formed
|
||||
// Grab the internals of the tag
|
||||
$strlen_segment = $position_next_gt - $cursor;
|
||||
$segment = substr($html, $cursor, $strlen_segment);
|
||||
|
||||
// Check if it's a comment
|
||||
if (
|
||||
substr($segment, 0, 3) == '!--' &&
|
||||
substr($segment, $strlen_segment-2, 2) == '--'
|
||||
) {
|
||||
$array[] = new
|
||||
HTMLPurifier_Token_Comment(
|
||||
substr(
|
||||
$segment, 3, $strlen_segment - 5
|
||||
)
|
||||
);
|
||||
$inside_tag = false;
|
||||
$cursor = $position_next_gt + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if it's an end tag
|
||||
$is_end_tag = (strpos($segment,'/') === 0);
|
||||
if ($is_end_tag) {
|
||||
$type = substr($segment, 1);
|
||||
$array[] = new HTMLPurifier_Token_End($type);
|
||||
$inside_tag = false;
|
||||
$cursor = $position_next_gt + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if it is explicitly self closing, if so, remove
|
||||
// trailing slash. Remember, we could have a tag like <br>, so
|
||||
// any later token processing scripts must convert improperly
|
||||
// classified EmptyTags from StartTags.
|
||||
$is_self_closing= (strpos($segment,'/') === $strlen_segment-1);
|
||||
if ($is_self_closing) {
|
||||
$strlen_segment--;
|
||||
$segment = substr($segment, 0, $strlen_segment);
|
||||
}
|
||||
|
||||
// Check if there are any attributes
|
||||
$position_first_space = strcspn($segment, $this->_whitespace);
|
||||
|
||||
if ($position_first_space >= $strlen_segment) {
|
||||
if ($is_self_closing) {
|
||||
$array[] = new HTMLPurifier_Token_Empty($segment);
|
||||
} else {
|
||||
$array[] = new HTMLPurifier_Token_Start($segment);
|
||||
}
|
||||
$inside_tag = false;
|
||||
$cursor = $position_next_gt + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Grab out all the data
|
||||
$type = substr($segment, 0, $position_first_space);
|
||||
$attribute_string =
|
||||
trim(
|
||||
substr(
|
||||
$segment, $position_first_space
|
||||
)
|
||||
);
|
||||
if ($attribute_string) {
|
||||
$attr = $this->parseAttributeString(
|
||||
$attribute_string
|
||||
, $config, $context
|
||||
);
|
||||
} else {
|
||||
$attr = array();
|
||||
}
|
||||
|
||||
if ($is_self_closing) {
|
||||
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
|
||||
} else {
|
||||
$array[] = new HTMLPurifier_Token_Start($type, $attr);
|
||||
}
|
||||
$cursor = $position_next_gt + 1;
|
||||
$inside_tag = false;
|
||||
continue;
|
||||
} else {
|
||||
$array[] = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
$this->parseData(
|
||||
substr($html, $cursor)
|
||||
)
|
||||
);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes the inside of an HTML tag and makes an assoc array of attributes.
|
||||
*
|
||||
* @param $string Inside of tag excluding name.
|
||||
* @returns Assoc array of attributes.
|
||||
*/
|
||||
function parseAttributeString($string, $config, &$context) {
|
||||
$string = (string) $string; // quick typecast
|
||||
|
||||
if ($string == '') return array(); // no attributes
|
||||
|
||||
// let's see if we can abort as quickly as possible
|
||||
// one equal sign, no spaces => one attribute
|
||||
$num_equal = substr_count($string, '=');
|
||||
$has_space = strpos($string, ' ');
|
||||
if ($num_equal === 0 && !$has_space) {
|
||||
// bool attribute
|
||||
return array($string => $string);
|
||||
} elseif ($num_equal === 1 && !$has_space) {
|
||||
// only one attribute
|
||||
list($key, $quoted_value) = explode('=', $string);
|
||||
$quoted_value = trim($quoted_value);
|
||||
if (!$key) return array();
|
||||
if (!$quoted_value) return array($key => '');
|
||||
$first_char = @$quoted_value[0];
|
||||
$last_char = @$quoted_value[strlen($quoted_value)-1];
|
||||
|
||||
$same_quote = ($first_char == $last_char);
|
||||
$open_quote = ($first_char == '"' || $first_char == "'");
|
||||
|
||||
if ( $same_quote && $open_quote) {
|
||||
// well behaved
|
||||
$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
|
||||
} else {
|
||||
// not well behaved
|
||||
if ($open_quote) {
|
||||
$value = substr($quoted_value, 1);
|
||||
} else {
|
||||
$value = $quoted_value;
|
||||
}
|
||||
}
|
||||
return array($key => $value);
|
||||
}
|
||||
|
||||
// setup loop environment
|
||||
$array = array(); // return assoc array of attributes
|
||||
$cursor = 0; // current position in string (moves forward)
|
||||
$size = strlen($string); // size of the string (stays the same)
|
||||
|
||||
// if we have unquoted attributes, the parser expects a terminating
|
||||
// space, so let's guarantee that there's always a terminating space.
|
||||
$string .= ' ';
|
||||
|
||||
// infinite loop protection
|
||||
$loops = 0;
|
||||
|
||||
while(true) {
|
||||
|
||||
// infinite loop protection
|
||||
if (++$loops > 1000) return array();
|
||||
|
||||
if ($cursor >= $size) {
|
||||
break;
|
||||
}
|
||||
|
||||
$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
|
||||
|
||||
// grab the key
|
||||
|
||||
$key_begin = $cursor; //we're currently at the start of the key
|
||||
|
||||
// scroll past all characters that are the key (not whitespace or =)
|
||||
$cursor += strcspn($string, $this->_whitespace . '=', $cursor);
|
||||
|
||||
$key_end = $cursor; // now at the end of the key
|
||||
|
||||
$key = substr($string, $key_begin, $key_end - $key_begin);
|
||||
|
||||
if (!$key) continue; // empty key
|
||||
|
||||
// scroll past all whitespace
|
||||
$cursor += strspn($string, $this->_whitespace, $cursor);
|
||||
|
||||
if ($cursor >= $size) {
|
||||
$array[$key] = $key;
|
||||
break;
|
||||
}
|
||||
|
||||
// if the next character is an equal sign, we've got a regular
|
||||
// pair, otherwise, it's a bool attribute
|
||||
$first_char = @$string[$cursor];
|
||||
|
||||
if ($first_char == '=') {
|
||||
// key="value"
|
||||
|
||||
$cursor++;
|
||||
$cursor += strspn($string, $this->_whitespace, $cursor);
|
||||
|
||||
// we might be in front of a quote right now
|
||||
|
||||
$char = @$string[$cursor];
|
||||
|
||||
if ($char == '"' || $char == "'") {
|
||||
// it's quoted, end bound is $char
|
||||
$cursor++;
|
||||
$value_begin = $cursor;
|
||||
$cursor = strpos($string, $char, $cursor);
|
||||
$value_end = $cursor;
|
||||
} else {
|
||||
// it's not quoted, end bound is whitespace
|
||||
$value_begin = $cursor;
|
||||
$cursor += strcspn($string, $this->_whitespace, $cursor);
|
||||
$value_end = $cursor;
|
||||
}
|
||||
|
||||
$value = substr($string, $value_begin, $value_end - $value_begin);
|
||||
$array[$key] = $this->parseData($value);
|
||||
$cursor++;
|
||||
|
||||
} else {
|
||||
// boolattr
|
||||
if ($key !== '') {
|
||||
$array[$key] = $key;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
110
lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php
Normal file
110
lib/htmlpurifier/HTMLPurifier/Lexer/PEARSax3.php
Normal file
|
@ -0,0 +1,110 @@
|
|||
<?php
|
||||
|
||||
require_once 'XML/HTMLSax3.php'; // PEAR
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
/**
|
||||
* Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
|
||||
*
|
||||
* PEAR, not suprisingly, also has a SAX parser for HTML. I don't know
|
||||
* very much about implementation, but it's fairly well written. However, that
|
||||
* abstraction comes at a price: performance. You need to have it installed,
|
||||
* and if the API changes, it might break our adapter. Not sure whether or not
|
||||
* it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
|
||||
* text and attributes).
|
||||
*
|
||||
* Quite personally, I don't recommend using the PEAR class, and the defaults
|
||||
* don't use it. The unit tests do perform the tests on the SAX parser too, but
|
||||
* whatever it does for poorly formed HTML is up to it.
|
||||
*
|
||||
* @todo Generalize so that XML_HTMLSax is also supported.
|
||||
*
|
||||
* @warning Entity-resolution inside attributes is broken.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
/**
|
||||
* Internal accumulator array for SAX parsers.
|
||||
* @protected
|
||||
*/
|
||||
var $tokens = array();
|
||||
|
||||
function tokenizeHTML($string, $config, &$context) {
|
||||
|
||||
$this->tokens = array();
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
|
||||
$parser = new XML_HTMLSax3();
|
||||
$parser->set_object($this);
|
||||
$parser->set_element_handler('openHandler','closeHandler');
|
||||
$parser->set_data_handler('dataHandler');
|
||||
$parser->set_escape_handler('escapeHandler');
|
||||
|
||||
// doesn't seem to work correctly for attributes
|
||||
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
||||
|
||||
$parser->parse($string);
|
||||
|
||||
return $this->tokens;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Open tag event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
function openHandler(&$parser, $name, $attrs, $closed) {
|
||||
// entities are not resolved in attrs
|
||||
foreach ($attrs as $key => $attr) {
|
||||
$attrs[$key] = $this->parseData($attr);
|
||||
}
|
||||
if ($closed) {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
||||
} else {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close tag event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
function closeHandler(&$parser, $name) {
|
||||
// HTMLSax3 seems to always send empty tags an extra close tag
|
||||
// check and ignore if you see it:
|
||||
// [TESTME] to make sure it doesn't overreach
|
||||
if ($this->tokens[count($this->tokens)-1]->type == 'empty') {
|
||||
return true;
|
||||
}
|
||||
$this->tokens[] = new HTMLPurifier_Token_End($name);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Data event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
function dataHandler(&$parser, $data) {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Text($data);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escaped text handler, interface is defined by PEAR package.
|
||||
*/
|
||||
function escapeHandler(&$parser, $data) {
|
||||
if (strpos($data, '--') === 0) {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
|
||||
}
|
||||
// CDATA is handled elsewhere, but if it was handled here:
|
||||
//if (strpos($data, '[CDATA[') === 0) {
|
||||
// $this->tokens[] = new HTMLPurifier_Token_Text(
|
||||
// substr($data, 7, strlen($data) - 9) );
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
47
lib/htmlpurifier/HTMLPurifier/PercentEncoder.php
Normal file
47
lib/htmlpurifier/HTMLPurifier/PercentEncoder.php
Normal file
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Class that handles operations involving percent-encoding in URIs.
|
||||
*/
|
||||
class HTMLPurifier_PercentEncoder
|
||||
{
|
||||
|
||||
/**
|
||||
* Fix up percent-encoding by decoding unreserved characters and normalizing
|
||||
* @param $string String to normalize
|
||||
*/
|
||||
function normalize($string) {
|
||||
if ($string == '') return '';
|
||||
$parts = explode('%', $string);
|
||||
$ret = array_shift($parts);
|
||||
foreach ($parts as $part) {
|
||||
$length = strlen($part);
|
||||
if ($length < 2) {
|
||||
$ret .= '%25' . $part;
|
||||
continue;
|
||||
}
|
||||
$encoding = substr($part, 0, 2);
|
||||
$text = substr($part, 2);
|
||||
if (!ctype_xdigit($encoding)) {
|
||||
$ret .= '%25' . $part;
|
||||
continue;
|
||||
}
|
||||
$int = hexdec($encoding);
|
||||
if (
|
||||
($int >= 48 && $int <= 57) || // digits
|
||||
($int >= 65 && $int <= 90) || // uppercase letters
|
||||
($int >= 97 && $int <= 122) || // lowercase letters
|
||||
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
|
||||
) {
|
||||
$ret .= chr($int) . $text;
|
||||
continue;
|
||||
}
|
||||
$encoding = strtoupper($encoding);
|
||||
$ret .= '%' . $encoding . $text;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue