MDL-70446 search_solr: File indexing can fail due to multipart upload

Due to Solr bug SOLR-15039, uploading files for indexing can fail if
it uses multipart upload. This changes it to use direct binary upload.

Unfortunately, the direct binary version in PHP curl only supports a
string, so we have to load the file into memory. I added extra code to
restrict the size of files indexed to (memory limit - 100MB), which is
usually 284MB unless configured differently because cron runs under
MEMORY_EXTRA.
This commit is contained in:
sam marshall 2020-12-10 12:02:41 +00:00
parent f0eb6a5729
commit cca005b48a

View file

@ -1095,8 +1095,10 @@ class engine extends \core_search\engine {
// A giant block of code that is really just error checking around the curl request. // A giant block of code that is really just error checking around the curl request.
try { try {
// Now actually do the request. // We have to post the file directly in binary data (not using multipart) to avoid
$result = $curl->post($url->out(false), array('myfile' => $storedfile)); // Solr bug SOLR-15039 which can cause incorrect data when you use multipart upload.
// Note this loads the whole file into memory; see limit in file_is_indexable().
$result = $curl->post($url->out(false), $storedfile->get_content());
$code = $curl->get_errno(); $code = $curl->get_errno();
$info = $curl->get_info(); $info = $curl->get_info();
@ -1160,6 +1162,18 @@ class engine extends \core_search\engine {
return false; return false;
} }
// Because we now load files into memory to index them in Solr, we also have to ensure that
// we don't try to index anything bigger than the memory limit (less 100MB for safety).
// Memory limit in cron is MEMORY_EXTRA which is usually 256 or 384MB but can be increased
// in config, so this will allow files over 100MB to be indexed.
$limit = ini_get('memory_limit');
if ($limit && $limit != -1) {
$limitbytes = get_real_size($limit);
if ($file->get_filesize() > $limitbytes) {
return false;
}
}
$mime = $file->get_mimetype(); $mime = $file->get_mimetype();
if ($mime == 'application/vnd.moodle.backup') { if ($mime == 'application/vnd.moodle.backup') {