mirror of
https://github.com/moodle/moodle.git
synced 2025-08-08 10:26:40 +02:00
MDL-52954 core: Change from pandoc to unoconv - it gives better results
Most importantly it retains formatting better, and supports different charsets far better than pandoc.
This commit is contained in:
parent
128d8736d3
commit
1356d85151
10 changed files with 75 additions and 49 deletions
|
@ -166,7 +166,7 @@ function behat_clean_init_config() {
|
|||
'umaskpermissions', 'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix',
|
||||
'dboptions', 'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword',
|
||||
'proxybypass', 'theme', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot', 'skiplangupgrade',
|
||||
'altcacheconfigpath', 'pathtopandoc'
|
||||
'altcacheconfigpath', 'pathtounoconv'
|
||||
));
|
||||
|
||||
// Add extra allowed settings.
|
||||
|
|
|
@ -187,21 +187,29 @@ class file_storage {
|
|||
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
|
||||
* @return bool - True if the format is supported for input.
|
||||
*/
|
||||
protected function is_input_format_supported_by_pandoc($format) {
|
||||
protected function is_format_supported_by_unoconv($format) {
|
||||
global $CFG;
|
||||
|
||||
if (!isset($this->unoconvformats)) {
|
||||
// Ask unoconv for it's list of supported document formats.
|
||||
$cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' --show';
|
||||
$pipes = array();
|
||||
$pipesspec = array(2 => array('pipe', 'w'));
|
||||
$proc = proc_open($cmd, $pipesspec, $pipes);
|
||||
$programoutput = stream_get_contents($pipes[2]);
|
||||
fclose($pipes[2]);
|
||||
proc_close($proc);
|
||||
$matches = array();
|
||||
preg_match_all('/\[\.(.*)\]/', $programoutput, $matches);
|
||||
|
||||
$this->unoconvformats = $matches[1];
|
||||
$this->unoconvformats = array_unique($this->unoconvformats);
|
||||
}
|
||||
|
||||
$sanitized = trim(strtolower($format));
|
||||
return in_array($sanitized, array('md', 'html', 'tex', 'docx', 'odt', 'epub', 'png', 'jpg', 'gif'));
|
||||
return in_array($sanitized, $this->unoconvformats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify the format is supported.
|
||||
*
|
||||
* @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension.
|
||||
* @return bool - True if the format is supported for output.
|
||||
*/
|
||||
protected function is_output_format_supported_by_pandoc($format) {
|
||||
$sanitized = trim(strtolower($format));
|
||||
return in_array($sanitized, array('md', 'pdf', 'html', 'tex', 'docx', 'odt', 'odf', 'epub'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a file format conversion on the specified document.
|
||||
|
@ -213,17 +221,17 @@ class file_storage {
|
|||
protected function create_converted_document(stored_file $file, $format) {
|
||||
global $CFG;
|
||||
|
||||
if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
|
||||
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
|
||||
// No conversions are possible, sorry.
|
||||
return false;
|
||||
}
|
||||
|
||||
$fileextension = strtolower(pathinfo($file->get_filename(), PATHINFO_EXTENSION));
|
||||
if (!self::is_input_format_supported_by_pandoc($fileextension)) {
|
||||
if (!self::is_format_supported_by_unoconv($fileextension)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!self::is_output_format_supported_by_pandoc($format)) {
|
||||
if (!self::is_format_supported_by_unoconv($format)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -236,21 +244,14 @@ class file_storage {
|
|||
$filename = $tmp . '/' . $localfilename;
|
||||
$file->copy_content_to($filename);
|
||||
|
||||
if (in_array($fileextension, array('gif', 'jpg', 'png'))) {
|
||||
// We wrap images in a tiny html file - pandoc will generate documents from them.
|
||||
$htmlwrapperfile = $tmp . '/wrapper.html';
|
||||
|
||||
file_put_contents($htmlwrapperfile, "<html><body><img src=\"$localfilename\"></body></html>");
|
||||
|
||||
$filename = $htmlwrapperfile;
|
||||
}
|
||||
|
||||
$newtmpfile = pathinfo($filename, PATHINFO_FILENAME) . '.' . $format;
|
||||
|
||||
// Safety.
|
||||
$newtmpfile = $tmp . '/' . clean_param($newtmpfile, PARAM_FILE);
|
||||
|
||||
$cmd = escapeshellcmd(trim($CFG->pathtopandoc)) . ' ' .
|
||||
$cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' ' .
|
||||
escapeshellarg('-f') . ' ' .
|
||||
escapeshellarg($format) . ' ' .
|
||||
escapeshellarg('-o') . ' ' .
|
||||
escapeshellarg($newtmpfile) . ' ' .
|
||||
escapeshellarg($filename);
|
||||
|
@ -259,6 +260,7 @@ class file_storage {
|
|||
$output = null;
|
||||
$currentdir = getcwd();
|
||||
chdir($tmp);
|
||||
$result = exec('env 1>&2', $output);
|
||||
$result = exec($cmd, $output);
|
||||
chdir($currentdir);
|
||||
if (!file_exists($newtmpfile)) {
|
||||
|
|
|
@ -186,7 +186,7 @@ $allowed = array('wwwroot', 'dataroot', 'dirroot', 'admin', 'directorypermission
|
|||
'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix', 'dboptions',
|
||||
'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword', 'proxybypass', // keep proxy settings from config.php
|
||||
'altcacheconfigpath', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot',
|
||||
'pathtopandoc'
|
||||
'pathtounoconv'
|
||||
);
|
||||
$productioncfg = (array)$CFG;
|
||||
$CFG = new stdClass();
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* Test pandoc functionality.
|
||||
* Test unoconv functionality.
|
||||
*
|
||||
* @package core
|
||||
* @category phpunit
|
||||
|
@ -27,14 +27,14 @@ defined('MOODLE_INTERNAL') || die();
|
|||
|
||||
|
||||
/**
|
||||
* A set of tests for some of the pandoc functionality within Moodle.
|
||||
* A set of tests for some of the unoconv functionality within Moodle.
|
||||
*
|
||||
* @package core
|
||||
* @category phpunit
|
||||
* @copyright 2016 Damyon Wiese
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
||||
*/
|
||||
class core_pandoc_testcase extends advanced_testcase {
|
||||
class core_unoconv_testcase extends advanced_testcase {
|
||||
|
||||
private $testfile1 = null;
|
||||
private $testfile2 = null;
|
||||
|
@ -51,7 +51,7 @@ class core_pandoc_testcase extends advanced_testcase {
|
|||
'filepath' => '/',
|
||||
'filename' => 'test.html'
|
||||
);
|
||||
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.html');
|
||||
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.html');
|
||||
$this->testfile1 = $fs->create_file_from_string($filerecord, $teststring);
|
||||
|
||||
$filerecord = array(
|
||||
|
@ -62,7 +62,7 @@ class core_pandoc_testcase extends advanced_testcase {
|
|||
'filepath' => '/',
|
||||
'filename' => 'test.docx'
|
||||
);
|
||||
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.docx');
|
||||
$teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.docx');
|
||||
$this->testfile2 = $fs->create_file_from_string($filerecord, $teststring);
|
||||
|
||||
$this->resetAfterTest();
|
||||
|
@ -71,16 +71,18 @@ class core_pandoc_testcase extends advanced_testcase {
|
|||
public function test_generate_pdf() {
|
||||
global $CFG;
|
||||
|
||||
if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
|
||||
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
|
||||
// No conversions are possible, sorry.
|
||||
return $this->markTestSkipped();
|
||||
}
|
||||
$fs = get_file_storage();
|
||||
|
||||
$result = $fs->get_converted_document($this->testfile1, 'pdf');
|
||||
$this->assertNotFalse($result);
|
||||
$this->assertSame($result->get_mimetype(), 'application/pdf');
|
||||
$this->assertGreaterThan(0, $result->get_filesize());
|
||||
$result = $fs->get_converted_document($this->testfile2, 'pdf');
|
||||
$this->assertNotFalse($result);
|
||||
$this->assertSame($result->get_mimetype(), 'application/pdf');
|
||||
$this->assertGreaterThan(0, $result->get_filesize());
|
||||
}
|
||||
|
@ -88,16 +90,18 @@ class core_pandoc_testcase extends advanced_testcase {
|
|||
public function test_generate_markdown() {
|
||||
global $CFG;
|
||||
|
||||
if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) {
|
||||
if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) {
|
||||
// No conversions are possible, sorry.
|
||||
return $this->markTestSkipped();
|
||||
}
|
||||
$fs = get_file_storage();
|
||||
|
||||
$result = $fs->get_converted_document($this->testfile1, 'md');
|
||||
$result = $fs->get_converted_document($this->testfile1, 'txt');
|
||||
$this->assertNotFalse($result);
|
||||
$this->assertSame($result->get_mimetype(), 'text/plain');
|
||||
$this->assertGreaterThan(0, $result->get_filesize());
|
||||
$result = $fs->get_converted_document($this->testfile2, 'md');
|
||||
$result = $fs->get_converted_document($this->testfile2, 'txt');
|
||||
$this->assertNotFalse($result);
|
||||
$this->assertSame($result->get_mimetype(), 'text/plain');
|
||||
$this->assertGreaterThan(0, $result->get_filesize());
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue