Merge branch 'MDL-53393-master' of git://github.com/cameron1729/moodle

This commit is contained in:
Eloy Lafuente (stronk7) 2016-03-22 03:16:51 +01:00
commit 55e90f2270
8 changed files with 76 additions and 51 deletions

View file

@ -23,6 +23,8 @@ class Html2Text
{ {
const ENCODING = 'UTF-8'; const ENCODING = 'UTF-8';
protected $htmlFuncFlags;
/** /**
* Contains the HTML content to convert. * Contains the HTML content to convert.
* *
@ -47,27 +49,25 @@ class Html2Text
protected $search = array( protected $search = array(
"/\r/", // Non-legal carriage return "/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs "/[\n\t]+/", // Newlines and tabs
'/<head[^>]*>.*?<\/head>/i', // <head> '/<head\b[^>]*>.*?<\/head>/i', // <head>
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with '/<script\b[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with '/<style\b[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<p[^>]*>/i', // <P> '/<i\b[^>]*>(.*?)<\/i>/i', // <i>
'/<br[^>]*>/i', // <br> '/<em\b[^>]*>(.*?)<\/em>/i', // <em>
'/<i[^>]*>(.*?)<\/i>/i', // <i> '/(<ul\b[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/<em[^>]*>(.*?)<\/em>/i', // <em> '/(<ol\b[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul> '/(<dl\b[^>]*>|<\/dl>)/i', // <dl> and </dl>
'/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol> '/<li\b[^>]*>(.*?)<\/li>/i', // <li> and </li>
'/(<dl[^>]*>|<\/dl>)/i', // <dl> and </dl> '/<dd\b[^>]*>(.*?)<\/dd>/i', // <dd> and </dd>
'/<li[^>]*>(.*?)<\/li>/i', // <li> and </li> '/<dt\b[^>]*>(.*?)<\/dt>/i', // <dt> and </dt>
'/<dd[^>]*>(.*?)<\/dd>/i', // <dd> and </dd> '/<li\b[^>]*>/i', // <li>
'/<dt[^>]*>(.*?)<\/dt>/i', // <dt> and </dt> '/<hr\b[^>]*>/i', // <hr>
'/<li[^>]*>/i', // <li> '/<div\b[^>]*>/i', // <div>
'/<hr[^>]*>/i', // <hr> '/(<table\b[^>]*>|<\/table>)/i', // <table> and </table>
'/<div[^>]*>/i', // <div> '/(<tr\b[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/(<table[^>]*>|<\/table>)/i', // <table> and </table> '/<td\b[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span> '/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span>
'/<(img)[^>]*alt=\"([^>"]+)\"[^>]*>/i', // <img> with alt tag '/<(img)\b[^>]*alt=\"([^>"]+)\"[^>]*>/i', // <img> with alt tag
); );
/** /**
@ -82,8 +82,6 @@ class Html2Text
'', // <head> '', // <head>
'', // <script>s -- which strip_tags supposedly has problems with '', // <script>s -- which strip_tags supposedly has problems with
'', // <style>s -- which strip_tags supposedly has problems with '', // <style>s -- which strip_tags supposedly has problems with
"\n\n", // <P>
"\n", // <br>
'_\\1_', // <i> '_\\1_', // <i>
'_\\1_', // <em> '_\\1_', // <em>
"\n\n", // <ul> and </ul> "\n\n", // <ul> and </ul>
@ -137,6 +135,8 @@ class Html2Text
*/ */
protected $callbackSearch = array( protected $callbackSearch = array(
'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6 '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
'/[ ]*<(p)( [^>]*)?>(.*?)<\/p>[ ]*/si', // <p> with surrounding whitespace.
'/<(br)[^>]*>[ ]*/i', // <br> with leading whitespace after the newline.
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b> '/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
'/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong> '/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
'/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th> '/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
@ -212,6 +212,7 @@ class Html2Text
// 'inline' (show links inline) // 'inline' (show links inline)
// 'nextline' (show links on the next line) // 'nextline' (show links on the next line)
// 'table' (if a table of link URLs should be listed after the text. // 'table' (if a table of link URLs should be listed after the text.
// 'bbcode' (show links as bbcode)
'width' => 70, // Maximum width of the formatted text, in columns. 'width' => 70, // Maximum width of the formatted text, in columns.
// Set this value to 0 (or less) to ignore word wrapping // Set this value to 0 (or less) to ignore word wrapping
@ -237,6 +238,9 @@ class Html2Text
$this->html = $html; $this->html = $html;
$this->options = array_merge($this->options, $options); $this->options = array_merge($this->options, $options);
$this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
? ENT_COMPAT
: ENT_COMPAT | ENT_HTML5;
} }
/** /**
@ -319,6 +323,16 @@ class Html2Text
} }
protected function convert() protected function convert()
{
$origEncoding = mb_internal_encoding();
mb_internal_encoding(self::ENCODING);
$this->doConvert();
mb_internal_encoding($origEncoding);
}
protected function doConvert()
{ {
$this->linkList = array(); $this->linkList = array();
@ -346,7 +360,7 @@ class Html2Text
$text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text); $text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
$text = strip_tags($text); $text = strip_tags($text);
$text = preg_replace($this->entSearch, $this->entReplace, $text); $text = preg_replace($this->entSearch, $this->entReplace, $text);
$text = html_entity_decode($text, ENT_QUOTES, self::ENCODING); $text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);
// Remove unknown/unhandled entities (this cannot be done in search-and-replace block) // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
$text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text); $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@ -396,7 +410,7 @@ class Html2Text
$url = $link; $url = $link;
} else { } else {
$url = $this->baseurl; $url = $this->baseurl;
if (substr($link, 0, 1) != '/') { if (mb_substr($link, 0, 1) != '/') {
$url .= '/'; $url .= '/';
} }
$url .= $link; $url .= $link;
@ -411,6 +425,8 @@ class Html2Text
return $display . ' [' . ($index + 1) . ']'; return $display . ' [' . ($index + 1) . ']';
} elseif ($linkMethod == 'nextline') { } elseif ($linkMethod == 'nextline') {
return $display . "\n[" . $url . ']'; return $display . "\n[" . $url . ']';
} elseif ($linkMethod == 'bbcode') {
return sprintf('[url=%s]%s[/url]', $url, $display);
} else { // link_method defaults to inline } else { // link_method defaults to inline
return $display . ' [' . $url . ']'; return $display . ' [' . $url . ']';
} }
@ -420,7 +436,8 @@ class Html2Text
{ {
// get the content of PRE element // get the content of PRE element
while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) { while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
$this->preContent = $matches[1]; // Replace br tags with newlines to prevent the search-and-replace callback from killing whitespace
$this->preContent = preg_replace('/(<br\b[^>]*>)/i', "\n", $matches[1]);
// Run our defined tags search-and-replace with callback // Run our defined tags search-and-replace with callback
$this->preContent = preg_replace_callback( $this->preContent = preg_replace_callback(
@ -456,11 +473,13 @@ class Html2Text
protected function convertBlockquotes(&$text) protected function convertBlockquotes(&$text)
{ {
if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) { if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
$originalText = $text;
$start = 0; $start = 0;
$taglen = 0; $taglen = 0;
$level = 0; $level = 0;
$diff = 0; $diff = 0;
foreach ($matches[0] as $m) { foreach ($matches[0] as $m) {
$m[1] = mb_strlen(substr($originalText, 0, $m[1]));
if ($m[0][0] == '<' && $m[0][1] == '/') { if ($m[0][0] == '<' && $m[0][1] == '/') {
$level--; $level--;
if ($level < 0) { if ($level < 0) {
@ -471,7 +490,7 @@ class Html2Text
$end = $m[1]; $end = $m[1];
$len = $end - $taglen - $start; $len = $end - $taglen - $start;
// Get blockquote content // Get blockquote content
$body = substr($text, $start + $taglen - $diff, $len); $body = mb_substr($text, $start + $taglen - $diff, $len);
// Set text width // Set text width
$pWidth = $this->options['width']; $pWidth = $this->options['width'];
@ -481,20 +500,21 @@ class Html2Text
$this->converter($body); $this->converter($body);
// Add citation markers and create PRE block // Add citation markers and create PRE block
$body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body)); $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
$body = '<pre>' . htmlspecialchars($body) . '</pre>'; $body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
// Re-set text width // Re-set text width
$this->options['width'] = $pWidth; $this->options['width'] = $pWidth;
// Replace content // Replace content
$text = substr($text, 0, $start - $diff) $text = mb_substr($text, 0, $start - $diff)
. $body . substr($text, $end + strlen($m[0]) - $diff); . $body
. mb_substr($text, $end + mb_strlen($m[0]) - $diff);
$diff = $len + $taglen + strlen($m[0]) - strlen($body); $diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
unset($body); unset($body);
} }
} else { } else {
if ($level == 0) { if ($level == 0) {
$start = $m[1]; $start = $m[1];
$taglen = strlen($m[0]); $taglen = mb_strlen($m[0]);
} }
$level++; $level++;
} }
@ -510,7 +530,18 @@ class Html2Text
*/ */
protected function pregCallback($matches) protected function pregCallback($matches)
{ {
switch (strtolower($matches[1])) { switch (mb_strtolower($matches[1])) {
case 'p':
// Replace newlines with spaces.
$para = str_replace("\n", " ", $matches[3]);
// Trim trailing and leading whitespace within the tag.
$para = trim($para);
// Add trailing newlines for this para.
return "\n" . $para . "\n";
case 'br':
return "\n";
case 'b': case 'b':
case 'strong': case 'strong':
return $this->toupper($matches[3]); return $this->toupper($matches[3]);
@ -553,7 +584,7 @@ class Html2Text
protected function toupper($str) protected function toupper($str)
{ {
// string can contain HTML tags // string can contain HTML tags
$chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); $chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
// convert toupper only the text between HTML tags // convert toupper only the text between HTML tags
foreach ($chunks as $i => $chunk) { foreach ($chunks as $i => $chunk) {
@ -573,15 +604,9 @@ class Html2Text
*/ */
protected function strtoupper($str) protected function strtoupper($str)
{ {
$str = html_entity_decode($str, ENT_COMPAT, self::ENCODING); $str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
$str = mb_strtoupper($str);
if (function_exists('mb_strtoupper')) { $str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);
$str = mb_strtoupper($str, self::ENCODING);
} else {
$str = strtoupper($str);
}
$str = htmlspecialchars($str, ENT_COMPAT, self::ENCODING);
return $str; return $str;
} }

View file

@ -11,7 +11,7 @@ positive."
----EXPECTEDHTML---- ----EXPECTEDHTML----
An ion meets his atom friend on the street and says he's lost an An ion meets his atom friend on the street and says he's lost an
electron. "Are you sure?" asks the atom. The ion replies, "I'm positive." electron. "Are you sure?" asks the atom. The ion replies, "I'm positive."
----FULLSOURCE---- ----FULLSOURCE----
Message-ID: <1430198383.10608.0.camel@jean> Message-ID: <1430198383.10608.0.camel@jean>

View file

@ -18,7 +18,7 @@ Sending mail via clent and it seems to go all good...
Havent tried this before and it is awesome.... Havent tried this before and it is awesome....
Cheers Cheers
Rajesh Rajesh
----FULLSOURCE---- ----FULLSOURCE----
Delivered-To: moodlehqtest+aaaaaaaaaaiaaaaaaaaabqaaaaaaaaazd63zvl6kcy04ioh+@example.com Delivered-To: moodlehqtest+aaaaaaaaaaiaaaaaaaaabqaaaaaaaaazd63zvl6kcy04ioh+@example.com

View file

@ -133,8 +133,8 @@ have been fixed <strong><a href="http://third.url/view.php">last week</a></stron
* Basic text formatting. * Basic text formatting.
*/ */
public function test_simple() { public function test_simple() {
$this->assertSame("_Hello_ WORLD!", html_to_text('<p><i>Hello</i> <b>world</b>!</p>')); $this->assertSame("_Hello_ WORLD!\n", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
$this->assertSame("All the WORLDS a stage.\n\n-- William Shakespeare", html_to_text('<p>All the <strong>worlds</strong> a stage.</p><p>-- William Shakespeare</p>')); $this->assertSame("All the WORLDS a stage.\n\n-- William Shakespeare\n", html_to_text('<p>All the <strong>worlds</strong> a stage.</p><p>-- William Shakespeare</p>'));
$this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>')); $this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>'));
$this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!')); $this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!'));
} }

View file

@ -88,9 +88,9 @@ class core_weblib_testcase extends advanced_testcase {
} }
public function test_format_text_email() { public function test_format_text_email() {
$this->assertSame("This is a TEST", $this->assertSame("This is a TEST\n",
format_text_email('<p>This is a <strong>test</strong></p>', FORMAT_HTML)); format_text_email('<p>This is a <strong>test</strong></p>', FORMAT_HTML));
$this->assertSame("This is a TEST", $this->assertSame("This is a TEST\n",
format_text_email('<p class="frogs">This is a <strong class=\'fishes\'>test</strong></p>', FORMAT_HTML)); format_text_email('<p class="frogs">This is a <strong class=\'fishes\'>test</strong></p>', FORMAT_HTML));
$this->assertSame('& so is this', $this->assertSame('& so is this',
format_text_email('&amp; so is this', FORMAT_HTML)); format_text_email('&amp; so is this', FORMAT_HTML));

View file

@ -200,7 +200,7 @@
<location>html2text.php</location> <location>html2text.php</location>
<name>HTML2Text</name> <name>HTML2Text</name>
<license>GPL</license> <license>GPL</license>
<version>3.0.0</version> <version>4.0.1</version>
<licenseversion>2.0+</licenseversion> <licenseversion>2.0+</licenseversion>
</library> </library>
<library> <library>

View file

@ -152,7 +152,7 @@ class mod_quiz_locallib_testcase extends advanced_testcase {
$summary = quiz_question_tostring($question); $summary = quiz_question_tostring($question);
$this->assertEquals('<span class="questionname">The question name</span> ' . $this->assertEquals('<span class="questionname">The question name</span> ' .
'<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]</span>', $summary); '<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]' . "\n" . '</span>', $summary);
} }
/** /**

View file

@ -218,7 +218,7 @@ class qtype_essay_attempt_upgrader_test extends question_attempt_upgrader_test_b
'minfraction' => 0, 'minfraction' => 0,
'maxfraction' => 1, 'maxfraction' => 1,
'flagged' => 0, 'flagged' => 0,
'questionsummary' => "* Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_", 'questionsummary' => "Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_",
'rightanswer' => '', 'rightanswer' => '',
'responsesummary' => "Variable can be typed \n\nxml Schemas fully support Namespaces", 'responsesummary' => "Variable can be typed \n\nxml Schemas fully support Namespaces",
'timemodified' => 1273068477, 'timemodified' => 1273068477,