Merge branch 'MDL-53393-master' of git://github.com/cameron1729/moodle

This commit is contained in:
Eloy Lafuente (stronk7) 2016-03-22 03:16:51 +01:00
commit 55e90f2270
8 changed files with 76 additions and 51 deletions

View file

@ -23,6 +23,8 @@ class Html2Text
{
const ENCODING = 'UTF-8';
protected $htmlFuncFlags;
/**
* Contains the HTML content to convert.
*
@ -47,27 +49,25 @@ class Html2Text
protected $search = array(
"/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs
'/<head[^>]*>.*?<\/head>/i', // <head>
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<p[^>]*>/i', // <P>
'/<br[^>]*>/i', // <br>
'/<i[^>]*>(.*?)<\/i>/i', // <i>
'/<em[^>]*>(.*?)<\/em>/i', // <em>
'/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/(<dl[^>]*>|<\/dl>)/i', // <dl> and </dl>
'/<li[^>]*>(.*?)<\/li>/i', // <li> and </li>
'/<dd[^>]*>(.*?)<\/dd>/i', // <dd> and </dd>
'/<dt[^>]*>(.*?)<\/dt>/i', // <dt> and </dt>
'/<li[^>]*>/i', // <li>
'/<hr[^>]*>/i', // <hr>
'/<div[^>]*>/i', // <div>
'/(<table[^>]*>|<\/table>)/i', // <table> and </table>
'/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/<head\b[^>]*>.*?<\/head>/i', // <head>
'/<script\b[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style\b[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<i\b[^>]*>(.*?)<\/i>/i', // <i>
'/<em\b[^>]*>(.*?)<\/em>/i', // <em>
'/(<ul\b[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/(<ol\b[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/(<dl\b[^>]*>|<\/dl>)/i', // <dl> and </dl>
'/<li\b[^>]*>(.*?)<\/li>/i', // <li> and </li>
'/<dd\b[^>]*>(.*?)<\/dd>/i', // <dd> and </dd>
'/<dt\b[^>]*>(.*?)<\/dt>/i', // <dt> and </dt>
'/<li\b[^>]*>/i', // <li>
'/<hr\b[^>]*>/i', // <hr>
'/<div\b[^>]*>/i', // <div>
'/(<table\b[^>]*>|<\/table>)/i', // <table> and </table>
'/(<tr\b[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td\b[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span>
'/<(img)[^>]*alt=\"([^>"]+)\"[^>]*>/i', // <img> with alt tag
'/<(img)\b[^>]*alt=\"([^>"]+)\"[^>]*>/i', // <img> with alt tag
);
/**
@ -82,8 +82,6 @@ class Html2Text
'', // <head>
'', // <script>s -- which strip_tags supposedly has problems with
'', // <style>s -- which strip_tags supposedly has problems with
"\n\n", // <P>
"\n", // <br>
'_\\1_', // <i>
'_\\1_', // <em>
"\n\n", // <ul> and </ul>
@ -137,6 +135,8 @@ class Html2Text
*/
protected $callbackSearch = array(
'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
'/[ ]*<(p)( [^>]*)?>(.*?)<\/p>[ ]*/si', // <p> with surrounding whitespace.
'/<(br)[^>]*>[ ]*/i', // <br> with leading whitespace after the newline.
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
'/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
'/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
@ -212,6 +212,7 @@ class Html2Text
// 'inline' (show links inline)
// 'nextline' (show links on the next line)
// 'table' (if a table of link URLs should be listed after the text.
// 'bbcode' (show links as bbcode)
'width' => 70, // Maximum width of the formatted text, in columns.
// Set this value to 0 (or less) to ignore word wrapping
@ -237,6 +238,9 @@ class Html2Text
$this->html = $html;
$this->options = array_merge($this->options, $options);
$this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
? ENT_COMPAT
: ENT_COMPAT | ENT_HTML5;
}
/**
@ -319,6 +323,16 @@ class Html2Text
}
protected function convert()
{
$origEncoding = mb_internal_encoding();
mb_internal_encoding(self::ENCODING);
$this->doConvert();
mb_internal_encoding($origEncoding);
}
protected function doConvert()
{
$this->linkList = array();
@ -346,7 +360,7 @@ class Html2Text
$text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
$text = strip_tags($text);
$text = preg_replace($this->entSearch, $this->entReplace, $text);
$text = html_entity_decode($text, ENT_QUOTES, self::ENCODING);
$text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);
// Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@ -396,7 +410,7 @@ class Html2Text
$url = $link;
} else {
$url = $this->baseurl;
if (substr($link, 0, 1) != '/') {
if (mb_substr($link, 0, 1) != '/') {
$url .= '/';
}
$url .= $link;
@ -411,6 +425,8 @@ class Html2Text
return $display . ' [' . ($index + 1) . ']';
} elseif ($linkMethod == 'nextline') {
return $display . "\n[" . $url . ']';
} elseif ($linkMethod == 'bbcode') {
return sprintf('[url=%s]%s[/url]', $url, $display);
} else { // link_method defaults to inline
return $display . ' [' . $url . ']';
}
@ -420,7 +436,8 @@ class Html2Text
{
// get the content of PRE element
while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
$this->preContent = $matches[1];
// Replace br tags with newlines to prevent the search-and-replace callback from killing whitespace
$this->preContent = preg_replace('/(<br\b[^>]*>)/i', "\n", $matches[1]);
// Run our defined tags search-and-replace with callback
$this->preContent = preg_replace_callback(
@ -456,11 +473,13 @@ class Html2Text
protected function convertBlockquotes(&$text)
{
if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
$originalText = $text;
$start = 0;
$taglen = 0;
$level = 0;
$diff = 0;
foreach ($matches[0] as $m) {
$m[1] = mb_strlen(substr($originalText, 0, $m[1]));
if ($m[0][0] == '<' && $m[0][1] == '/') {
$level--;
if ($level < 0) {
@ -471,7 +490,7 @@ class Html2Text
$end = $m[1];
$len = $end - $taglen - $start;
// Get blockquote content
$body = substr($text, $start + $taglen - $diff, $len);
$body = mb_substr($text, $start + $taglen - $diff, $len);
// Set text width
$pWidth = $this->options['width'];
@ -481,20 +500,21 @@ class Html2Text
$this->converter($body);
// Add citation markers and create PRE block
$body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
$body = '<pre>' . htmlspecialchars($body) . '</pre>';
$body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
// Re-set text width
$this->options['width'] = $pWidth;
// Replace content
$text = substr($text, 0, $start - $diff)
. $body . substr($text, $end + strlen($m[0]) - $diff);
$text = mb_substr($text, 0, $start - $diff)
. $body
. mb_substr($text, $end + mb_strlen($m[0]) - $diff);
$diff = $len + $taglen + strlen($m[0]) - strlen($body);
$diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
unset($body);
}
} else {
if ($level == 0) {
$start = $m[1];
$taglen = strlen($m[0]);
$taglen = mb_strlen($m[0]);
}
$level++;
}
@ -510,7 +530,18 @@ class Html2Text
*/
protected function pregCallback($matches)
{
switch (strtolower($matches[1])) {
switch (mb_strtolower($matches[1])) {
case 'p':
// Replace newlines with spaces.
$para = str_replace("\n", " ", $matches[3]);
// Trim trailing and leading whitespace within the tag.
$para = trim($para);
// Add trailing newlines for this para.
return "\n" . $para . "\n";
case 'br':
return "\n";
case 'b':
case 'strong':
return $this->toupper($matches[3]);
@ -553,7 +584,7 @@ class Html2Text
protected function toupper($str)
{
// string can contain HTML tags
$chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
// convert toupper only the text between HTML tags
foreach ($chunks as $i => $chunk) {
@ -573,15 +604,9 @@ class Html2Text
*/
protected function strtoupper($str)
{
$str = html_entity_decode($str, ENT_COMPAT, self::ENCODING);
if (function_exists('mb_strtoupper')) {
$str = mb_strtoupper($str, self::ENCODING);
} else {
$str = strtoupper($str);
}
$str = htmlspecialchars($str, ENT_COMPAT, self::ENCODING);
$str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
$str = mb_strtoupper($str);
$str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);
return $str;
}

View file

@ -133,8 +133,8 @@ have been fixed <strong><a href="http://third.url/view.php">last week</a></stron
* Basic text formatting.
*/
public function test_simple() {
$this->assertSame("_Hello_ WORLD!", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
$this->assertSame("All the WORLDS a stage.\n\n-- William Shakespeare", html_to_text('<p>All the <strong>worlds</strong> a stage.</p><p>-- William Shakespeare</p>'));
$this->assertSame("_Hello_ WORLD!\n", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
$this->assertSame("All the WORLDS a stage.\n\n-- William Shakespeare\n", html_to_text('<p>All the <strong>worlds</strong> a stage.</p><p>-- William Shakespeare</p>'));
$this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>'));
$this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!'));
}

View file

@ -88,9 +88,9 @@ class core_weblib_testcase extends advanced_testcase {
}
public function test_format_text_email() {
$this->assertSame("This is a TEST",
$this->assertSame("This is a TEST\n",
format_text_email('<p>This is a <strong>test</strong></p>', FORMAT_HTML));
$this->assertSame("This is a TEST",
$this->assertSame("This is a TEST\n",
format_text_email('<p class="frogs">This is a <strong class=\'fishes\'>test</strong></p>', FORMAT_HTML));
$this->assertSame('& so is this',
format_text_email('&amp; so is this', FORMAT_HTML));

View file

@ -200,7 +200,7 @@
<location>html2text.php</location>
<name>HTML2Text</name>
<license>GPL</license>
<version>3.0.0</version>
<version>4.0.1</version>
<licenseversion>2.0+</licenseversion>
</library>
<library>

View file

@ -152,7 +152,7 @@ class mod_quiz_locallib_testcase extends advanced_testcase {
$summary = quiz_question_tostring($question);
$this->assertEquals('<span class="questionname">The question name</span> ' .
'<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]</span>', $summary);
'<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]' . "\n" . '</span>', $summary);
}
/**

View file

@ -218,7 +218,7 @@ class qtype_essay_attempt_upgrader_test extends question_attempt_upgrader_test_b
'minfraction' => 0,
'maxfraction' => 1,
'flagged' => 0,
'questionsummary' => "* Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_",
'questionsummary' => "Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_",
'rightanswer' => '',
'responsesummary' => "Variable can be typed \n\nxml Schemas fully support Namespaces",
'timemodified' => 1273068477,