From 1356d8515163e7d8ad4b1063179f12f9951d70c4 Mon Sep 17 00:00:00 2001 From: Damyon Wiese Date: Fri, 26 Feb 2016 11:52:40 +0800 Subject: [PATCH] MDL-52954 core: Change from pandoc to unoconv - it gives better results Most importantly it retains formatting better, and supports different charsets far better than pandoc. --- admin/settings/server.php | 2 +- config-dist.php | 10 ++-- lang/en/admin.php | 4 +- lib/behat/lib.php | 2 +- lib/filestorage/file_storage.php | 52 +++++++++--------- lib/phpunit/bootstrap.php | 2 +- ...pandoc-source.docx => unoconv-source.docx} | Bin ...pandoc-source.html => unoconv-source.html} | 0 .../{pandoc_test.php => unoconv_test.php} | 22 +++++--- .../editpdf/classes/document_services.php | 30 ++++++++-- 10 files changed, 75 insertions(+), 49 deletions(-) rename lib/tests/fixtures/{pandoc-source.docx => unoconv-source.docx} (100%) rename lib/tests/fixtures/{pandoc-source.html => unoconv-source.html} (100%) rename lib/tests/{pandoc_test.php => unoconv_test.php} (85%) diff --git a/admin/settings/server.php b/admin/settings/server.php index 07b7c7f895d..548031bb771 100644 --- a/admin/settings/server.php +++ b/admin/settings/server.php @@ -12,7 +12,7 @@ $temp->add(new admin_setting_configexecutable('pathtodu', new lang_string('patht $temp->add(new admin_setting_configexecutable('aspellpath', new lang_string('aspellpath', 'admin'), new lang_string('edhelpaspellpath'), '')); $temp->add(new admin_setting_configexecutable('pathtodot', new lang_string('pathtodot', 'admin'), new lang_string('pathtodot_help', 'admin'), '')); $temp->add(new admin_setting_configexecutable('pathtogs', new lang_string('pathtogs', 'admin'), new lang_string('pathtogs_help', 'admin'), '/usr/bin/gs')); -$temp->add(new admin_setting_configexecutable('pathtopandoc', new lang_string('pathtopandoc', 'admin'), new lang_string('pathtopandoc_help', 'admin'), '/usr/bin/pandoc')); +$temp->add(new admin_setting_configexecutable('pathtounoconv', new lang_string('pathtounoconv', 'admin'), new lang_string('pathtounoconv_help', 'admin'), '/usr/bin/unoconv')); $ADMIN->add('server', $temp); diff --git a/config-dist.php b/config-dist.php index 894e1df24c5..ae794095569 100644 --- a/config-dist.php +++ b/config-dist.php @@ -838,11 +838,11 @@ $CFG->admin = 'admin'; // (Development->Profiling) built into Moodle. // $CFG->pathtodot = ''; // -// Path to pandoc. -// Probably something like /usr/bin/pandoc. Used to convert between document formats. -// It is recommended to install the latest stable release of pandoc. -// Download packages for all platforms are available from http://pandoc.org/ -// $CFG->pathtopandoc = ''; +// Path to unoconv. +// Probably something like /usr/bin/unoconv. Used as a fallback to convert between document formats. +// Unoconv is used convert between file formats supported by LibreOffice. +// Use a recent version of unoconv ( >= 0.7 ), older versions have trouble running from a webserver. +// $CFG->pathtounoconv = ''; //========================================================================= // ALL DONE! To continue installation, visit your main page with a browser diff --git a/lang/en/admin.php b/lang/en/admin.php index 5b64a22be08..9455d539064 100644 --- a/lang/en/admin.php +++ b/lang/en/admin.php @@ -779,8 +779,6 @@ $string['passwordpolicy'] = 'Password policy'; $string['passwordresettime'] = 'Maximum time to validate password reset request'; $string['passwordreuselimit'] = 'Password rotation limit'; $string['passwordreuselimit_desc'] = 'Number of times a user must change their password before they are allowed to reuse a password. Hashes of previously used passwords are stored in local database table. This feature might not be compatible with some external authentication plugins.'; -$string['pathtopandoc'] = 'Path to pandoc document converter'; -$string['pathtopandoc_help'] = 'Path to pandoc document converter. This is an executable that is capable of converting between document formats. This is optional, but if specified, Moodle will be able to perform automated conversion of documents from a wide range of file formats. This is used by the Assignment module "Annotate PDF" feature.'; $string['pathtodot'] = 'Path to dot'; $string['pathtodot_help'] = 'Path to dot. Probably something like /usr/bin/dot. To be able to generate graphics from DOT files, you must have installed the dot executable and point to it here. Note that, for now, this only used by the profiling features (Development->Profiling) built into Moodle.'; $string['pathtodu'] = 'Path to du'; @@ -792,6 +790,8 @@ $string['pathtopgdumpinvalid'] = 'Invalid path to pg_dump - either wrong path or $string['pathtopsql'] = 'Path to psql'; $string['pathtopsqldesc'] = 'This is only necessary to enter if you have more than one psql on your system (for example if you have more than one version of postgresql installed)'; $string['pathtopsqlinvalid'] = 'Invalid path to psql - either wrong path or not executable'; +$string['pathtounoconv'] = 'Path to unoconv document converter'; +$string['pathtounoconv_help'] = 'Path to unoconv document converter. This is an executable that is capable of converting between document formats supported by LibreOffice. This is optional, but if specified, Moodle will use it to automatically convert between document formats. This is used to support a wider range of input files for the assignment annotate PDF feature.'; $string['pcreunicodewarning'] = 'It is strongly recommended to use PCRE PHP extension that is compatible with Unicode characters.'; $string['perfdebug'] = 'Performance info'; $string['performance'] = 'Performance'; diff --git a/lib/behat/lib.php b/lib/behat/lib.php index 91b46f3fdee..201831fcda5 100644 --- a/lib/behat/lib.php +++ b/lib/behat/lib.php @@ -166,7 +166,7 @@ function behat_clean_init_config() { 'umaskpermissions', 'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix', 'dboptions', 'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword', 'proxybypass', 'theme', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot', 'skiplangupgrade', - 'altcacheconfigpath', 'pathtopandoc' + 'altcacheconfigpath', 'pathtounoconv' )); // Add extra allowed settings. diff --git a/lib/filestorage/file_storage.php b/lib/filestorage/file_storage.php index 2503b4a1fbc..6034798e99a 100644 --- a/lib/filestorage/file_storage.php +++ b/lib/filestorage/file_storage.php @@ -187,21 +187,29 @@ class file_storage { * @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension. * @return bool - True if the format is supported for input. */ - protected function is_input_format_supported_by_pandoc($format) { + protected function is_format_supported_by_unoconv($format) { + global $CFG; + + if (!isset($this->unoconvformats)) { + // Ask unoconv for it's list of supported document formats. + $cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' --show'; + $pipes = array(); + $pipesspec = array(2 => array('pipe', 'w')); + $proc = proc_open($cmd, $pipesspec, $pipes); + $programoutput = stream_get_contents($pipes[2]); + fclose($pipes[2]); + proc_close($proc); + $matches = array(); + preg_match_all('/\[\.(.*)\]/', $programoutput, $matches); + + $this->unoconvformats = $matches[1]; + $this->unoconvformats = array_unique($this->unoconvformats); + } + $sanitized = trim(strtolower($format)); - return in_array($sanitized, array('md', 'html', 'tex', 'docx', 'odt', 'epub', 'png', 'jpg', 'gif')); + return in_array($sanitized, $this->unoconvformats); } - /** - * Verify the format is supported. - * - * @param string $format The desired format - e.g. 'pdf'. Formats are specified by file extension. - * @return bool - True if the format is supported for output. - */ - protected function is_output_format_supported_by_pandoc($format) { - $sanitized = trim(strtolower($format)); - return in_array($sanitized, array('md', 'pdf', 'html', 'tex', 'docx', 'odt', 'odf', 'epub')); - } /** * Perform a file format conversion on the specified document. @@ -213,17 +221,17 @@ class file_storage { protected function create_converted_document(stored_file $file, $format) { global $CFG; - if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) { + if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) { // No conversions are possible, sorry. return false; } $fileextension = strtolower(pathinfo($file->get_filename(), PATHINFO_EXTENSION)); - if (!self::is_input_format_supported_by_pandoc($fileextension)) { + if (!self::is_format_supported_by_unoconv($fileextension)) { return false; } - if (!self::is_output_format_supported_by_pandoc($format)) { + if (!self::is_format_supported_by_unoconv($format)) { return false; } @@ -236,21 +244,14 @@ class file_storage { $filename = $tmp . '/' . $localfilename; $file->copy_content_to($filename); - if (in_array($fileextension, array('gif', 'jpg', 'png'))) { - // We wrap images in a tiny html file - pandoc will generate documents from them. - $htmlwrapperfile = $tmp . '/wrapper.html'; - - file_put_contents($htmlwrapperfile, ""); - - $filename = $htmlwrapperfile; - } - $newtmpfile = pathinfo($filename, PATHINFO_FILENAME) . '.' . $format; // Safety. $newtmpfile = $tmp . '/' . clean_param($newtmpfile, PARAM_FILE); - $cmd = escapeshellcmd(trim($CFG->pathtopandoc)) . ' ' . + $cmd = escapeshellcmd(trim($CFG->pathtounoconv)) . ' ' . + escapeshellarg('-f') . ' ' . + escapeshellarg($format) . ' ' . escapeshellarg('-o') . ' ' . escapeshellarg($newtmpfile) . ' ' . escapeshellarg($filename); @@ -259,6 +260,7 @@ class file_storage { $output = null; $currentdir = getcwd(); chdir($tmp); + $result = exec('env 1>&2', $output); $result = exec($cmd, $output); chdir($currentdir); if (!file_exists($newtmpfile)) { diff --git a/lib/phpunit/bootstrap.php b/lib/phpunit/bootstrap.php index 39237783a42..aff8506868e 100644 --- a/lib/phpunit/bootstrap.php +++ b/lib/phpunit/bootstrap.php @@ -186,7 +186,7 @@ $allowed = array('wwwroot', 'dataroot', 'dirroot', 'admin', 'directorypermission 'dbtype', 'dblibrary', 'dbhost', 'dbname', 'dbuser', 'dbpass', 'prefix', 'dboptions', 'proxyhost', 'proxyport', 'proxytype', 'proxyuser', 'proxypassword', 'proxybypass', // keep proxy settings from config.php 'altcacheconfigpath', 'pathtogs', 'pathtodu', 'aspellpath', 'pathtodot', - 'pathtopandoc' + 'pathtounoconv' ); $productioncfg = (array)$CFG; $CFG = new stdClass(); diff --git a/lib/tests/fixtures/pandoc-source.docx b/lib/tests/fixtures/unoconv-source.docx similarity index 100% rename from lib/tests/fixtures/pandoc-source.docx rename to lib/tests/fixtures/unoconv-source.docx diff --git a/lib/tests/fixtures/pandoc-source.html b/lib/tests/fixtures/unoconv-source.html similarity index 100% rename from lib/tests/fixtures/pandoc-source.html rename to lib/tests/fixtures/unoconv-source.html diff --git a/lib/tests/pandoc_test.php b/lib/tests/unoconv_test.php similarity index 85% rename from lib/tests/pandoc_test.php rename to lib/tests/unoconv_test.php index 68fcdcc1202..6c266f7bdff 100644 --- a/lib/tests/pandoc_test.php +++ b/lib/tests/unoconv_test.php @@ -15,7 +15,7 @@ // along with Moodle. If not, see . /** - * Test pandoc functionality. + * Test unoconv functionality. * * @package core * @category phpunit @@ -27,14 +27,14 @@ defined('MOODLE_INTERNAL') || die(); /** - * A set of tests for some of the pandoc functionality within Moodle. + * A set of tests for some of the unoconv functionality within Moodle. * * @package core * @category phpunit * @copyright 2016 Damyon Wiese * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later */ -class core_pandoc_testcase extends advanced_testcase { +class core_unoconv_testcase extends advanced_testcase { private $testfile1 = null; private $testfile2 = null; @@ -51,7 +51,7 @@ class core_pandoc_testcase extends advanced_testcase { 'filepath' => '/', 'filename' => 'test.html' ); - $teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.html'); + $teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.html'); $this->testfile1 = $fs->create_file_from_string($filerecord, $teststring); $filerecord = array( @@ -62,7 +62,7 @@ class core_pandoc_testcase extends advanced_testcase { 'filepath' => '/', 'filename' => 'test.docx' ); - $teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'pandoc-source.docx'); + $teststring = file_get_contents($this->fixturepath . DIRECTORY_SEPARATOR . 'unoconv-source.docx'); $this->testfile2 = $fs->create_file_from_string($filerecord, $teststring); $this->resetAfterTest(); @@ -71,16 +71,18 @@ class core_pandoc_testcase extends advanced_testcase { public function test_generate_pdf() { global $CFG; - if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) { + if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) { // No conversions are possible, sorry. return $this->markTestSkipped(); } $fs = get_file_storage(); $result = $fs->get_converted_document($this->testfile1, 'pdf'); + $this->assertNotFalse($result); $this->assertSame($result->get_mimetype(), 'application/pdf'); $this->assertGreaterThan(0, $result->get_filesize()); $result = $fs->get_converted_document($this->testfile2, 'pdf'); + $this->assertNotFalse($result); $this->assertSame($result->get_mimetype(), 'application/pdf'); $this->assertGreaterThan(0, $result->get_filesize()); } @@ -88,16 +90,18 @@ class core_pandoc_testcase extends advanced_testcase { public function test_generate_markdown() { global $CFG; - if (empty($CFG->pathtopandoc) || !is_executable(trim($CFG->pathtopandoc))) { + if (empty($CFG->pathtounoconv) || !is_executable(trim($CFG->pathtounoconv))) { // No conversions are possible, sorry. return $this->markTestSkipped(); } $fs = get_file_storage(); - $result = $fs->get_converted_document($this->testfile1, 'md'); + $result = $fs->get_converted_document($this->testfile1, 'txt'); + $this->assertNotFalse($result); $this->assertSame($result->get_mimetype(), 'text/plain'); $this->assertGreaterThan(0, $result->get_filesize()); - $result = $fs->get_converted_document($this->testfile2, 'md'); + $result = $fs->get_converted_document($this->testfile2, 'txt'); + $this->assertNotFalse($result); $this->assertSame($result->get_mimetype(), 'text/plain'); $this->assertGreaterThan(0, $result->get_filesize()); } diff --git a/mod/assign/feedback/editpdf/classes/document_services.php b/mod/assign/feedback/editpdf/classes/document_services.php index 83684e3fad5..4ada0a79829 100644 --- a/mod/assign/feedback/editpdf/classes/document_services.php +++ b/mod/assign/feedback/editpdf/classes/document_services.php @@ -53,6 +53,30 @@ class document_services { /** Filename for combined pdf */ const COMBINED_PDF_FILENAME = 'combined.pdf'; + /** Base64 encoded blank pdf. This is the most reliable/fastest way to generate a blank pdf. */ + const BLANK_PDF_BASE64 = <<Output(self::COMBINED_PDF_FILENAME, 'S'); - $file = $fs->create_file_from_string($record, $content); - $blankpdf->Close(); // No real need to close this pdf, because it has been outputted, but for clarity. + $file = $fs->create_file_from_string($record, base64_decode(self::BLANK_PDF_BASE64)); } else { // This was a combined pdf. $file = $fs->create_file_from_pathname($record, $tmpfile);