moodle/search/engine/solr/classes/engine.php

<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.

/**
 * Solr engine.
 *
 * @package    search_solr
 * @copyright  2015 Daniel Neis Araujo
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */

namespace search_solr;

defined('MOODLE_INTERNAL') || die();

/**
 * Solr engine.
 *
 * @package    search_solr
 * @copyright  2015 Daniel Neis Araujo
 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class engine extends \core_search\engine {

    /**
     * @var string The date format used by solr.
     */
    const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';

    /**
     * @var int Commit documents interval (number of miliseconds).
     */
    const AUTOCOMMIT_WITHIN = 15000;

    /**
     * @var int Highlighting fragsize.
     */
    const FRAG_SIZE = 500;

    /**
     * @var \SolrClient
     */
    protected $client = null;

    /**
     * @var \curl Direct curl object.
     */
    protected $curl = null;

    /**
     * @var array Fields that can be highlighted.
     */
    protected $highlightfields = array('content', 'description1', 'description2');

    /**
     * Prepares a Solr query, applies filters and executes it returning its results.
     *
     * @throws \core_search\engine_exception
     * @param  stdClass  $filters Containing query and filters.
     * @param  array     $usercontexts Contexts where the user has access. True if the user can access all contexts.
     * @return \core_search\document[] Results or false if no results
     */
    public function execute_query($filters, $usercontexts) {
        global $USER;

        // Let's keep these changes internal.
        $data = clone $filters;

        // If there is any problem we trigger the exception as soon as possible.
        $this->client = $this->get_search_client();

        $serverstatus = $this->is_server_ready();
        if ($serverstatus !== true) {
            throw new \core_search\engine_exception('engineserverstatus', 'search');
        }

        $query = new \SolrQuery();
        $maxrows = \core_search\manager::MAX_RESULTS;
        if ($this->file_indexing_enabled()) {
            // When using file indexing and grouping, we are going to collapse results, so we want extra results.
            $maxrows *= 2;
        }
        $this->set_query($query, $data->q, $maxrows);
        $this->add_fields($query);

        // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
        // we are really interested in caching contexts filters instead.
        if (!empty($data->title)) {
            $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
        }
        if (!empty($data->areaid)) {
            // Even if it is only supposed to contain PARAM_ALPHANUMEXT, better to prevent.
            $query->addFilterQuery('{!field cache=false f=areaid}' . $data->areaid);
        }

        if (!empty($data->timestart) or !empty($data->timeend)) {
            if (empty($data->timestart)) {
                $data->timestart = '*';
            } else {
                $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
            }
            if (empty($data->timeend)) {
                $data->timeend = '*';
            } else {
                $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
            }

            // No cache.
            $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
        }

        // Restrict to users who are supposed to be able to see a particular result.
        $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');

        // And finally restrict it to the context where the user can access, we want this one cached.
        // If the user can access all contexts $usercontexts value is just true, we don't need to filter
        // in that case.
        if ($usercontexts && is_array($usercontexts)) {
            if (!empty($data->areaid)) {
                $query->addFilterQuery('contextid:(' . implode(' OR ', $usercontexts[$data->areaid]) . ')');
            } else {
                // Join all area contexts into a single array and implode.
                $allcontexts = array();
                foreach ($usercontexts as $areacontexts) {
                    foreach ($areacontexts as $contextid) {
                        // Ensure they are unique.
                        $allcontexts[$contextid] = $contextid;
                    }
                }
                $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
            }
        }

        try {
            if ($this->file_indexing_enabled()) {
                // Now group records by solr_filegroupingid. Limit to 3 results per group.
                $query->setGroup(true);
                $query->setGroupLimit(3);
                $query->addGroupField('solr_filegroupingid');
                return $this->grouped_files_query_response($this->client->query($query));
            } else {
                return $this->query_response($this->client->query($query));
            }
        } catch (\SolrClientException $ex) {
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
            $this->queryerror = $ex->getMessage();
            return array();
        } catch (\SolrServerException $ex) {
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
            $this->queryerror = $ex->getMessage();
            return array();
        }

    }

    /**
     * Prepares a new query by setting the query, start offset and rows to return.
     * @param SolrQuery $query
     * @param object    $q Containing query and filters.
     * @param null|int  $maxresults The number of results to limit. manager::MAX_RESULTS if not set.
     */
    protected function set_query($query, $q, $maxresults = null) {
        if (!is_numeric($maxresults)) {
            $maxresults = \core_search\manager::MAX_RESULTS;
        }

        // Set hightlighting.
        $query->setHighlight(true);
        foreach ($this->highlightfields as $field) {
            $query->addHighlightField($field);
        }
        $query->setHighlightFragsize(static::FRAG_SIZE);
        $query->setHighlightSimplePre('__');
        $query->setHighlightSimplePost('__');

        $query->setQuery($q);

        // A reasonable max.
        $query->setRows($maxresults);
    }

    /**
     * Sets fields to be returned in the result.
     *
     * @param SolrQuery $query object.
     */
    public function add_fields($query) {
        $documentclass = $this->get_document_classname();
        $fields = array_keys($documentclass::get_default_fields_definition());
        foreach ($fields as $field) {
            $query->addField($field);
        }
    }

    /**
     * Finds the key common to both highlighing and docs array returned from response.
     * @param object $response containing results.
     */
    public function add_highlight_content($response) {
        if (!isset($response->highlighting)) {
            // There is no highlighting to add.
            return;
        }

        $highlightedobject = $response->highlighting;
        foreach ($response->response->docs as $doc) {
            $x = $doc->id;
            $highlighteddoc = $highlightedobject->$x;
            $this->merge_highlight_field_values($doc, $highlighteddoc);
        }
    }

    /**
     * Adds the highlighting array values to docs array values.
     *
     * @throws \core_search\engine_exception
     * @param object $doc containing the results.
     * @param object $highlighteddoc containing the highlighted results values.
     */
    public function merge_highlight_field_values($doc, $highlighteddoc) {

        foreach ($this->highlightfields as $field) {
            if (!empty($doc->$field)) {

                // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
                if (is_array($doc->{$field})) {
                    throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
                }

                if (!empty($highlighteddoc->$field)) {
                    // Replace by the highlighted result.
                    $doc->$field = reset($highlighteddoc->$field);
                }
            }
        }
    }

    /**
     * Filters the response on Moodle side.
     *
     * @param object $queryresponse containing the response return from solr server.
     * @return array $results containing final results to be displayed.
     */
    public function query_response($queryresponse) {
        global $USER;

        $userid = $USER->id;
        $noownerid = \core_search\manager::NO_OWNER_ID;

        $response = $queryresponse->getResponse();
        $numgranted = 0;

        if (!$docs = $response->response->docs) {
            return array();
        }

        if (!empty($response->response->numFound)) {
            $this->add_highlight_content($response);

            // Iterate through the results checking its availability and whether they are available for the user or not.
            foreach ($docs as $key => $docdata) {
                if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
                    // If owneruserid is set, no other user should be able to access this record.
                    unset($docs[$key]);
                    continue;
                }

                if (!$searcharea = $this->get_search_area($docdata->areaid)) {
                    unset($docs[$key]);
                    continue;
                }

                $docdata = $this->standarize_solr_obj($docdata);

                $access = $searcharea->check_access($docdata['itemid']);
                switch ($access) {
                    case \core_search\manager::ACCESS_DELETED:
                        $this->delete_by_id($docdata['id']);
                        unset($docs[$key]);
                        break;
                    case \core_search\manager::ACCESS_DENIED:
                        unset($docs[$key]);
                        break;
                    case \core_search\manager::ACCESS_GRANTED:
                        $numgranted++;

                        // Add the doc.
                        $docs[$key] = $this->to_document($searcharea, $docdata);
                        break;
                }

                // This should never happen.
                if ($numgranted >= \core_search\manager::MAX_RESULTS) {
                    $docs = array_slice($docs, 0, \core_search\manager::MAX_RESULTS, true);
                    break;
                }
            }
        }

        return $docs;
    }

    /**
     * Processes grouped file results into documents, with attached matching files.
     *
     * @param SolrQueryResponse $queryresponse The response returned from solr server
     * @return array Final results to be displayed.
     */
    protected function grouped_files_query_response($queryresponse) {
        $response = $queryresponse->getResponse();

        // If we can't find the grouping, or there are no matches in the grouping, return empty.
        if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
            return array();
        }

        $numgranted = 0;
        $orderedids = array();
        $completedocs = array();
        $incompletedocs = array();

        $highlightingobj = $response->highlighting;

        // Each group represents a "master document".
        $groups = $response->grouped->solr_filegroupingid->groups;
        foreach ($groups as $group) {
            $groupid = $group->groupValue;
            $groupdocs = $group->doclist->docs;
            $firstdoc = reset($groupdocs);

            if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
                // Well, this is a problem.
                continue;
            }

            // Check for access.
            $access = $searcharea->check_access($firstdoc->itemid);
            switch ($access) {
                case \core_search\manager::ACCESS_DELETED:
                    // If deleted from Moodle, delete from index and then continue.
                    $this->delete_by_id($firstdoc->id);
                    continue 2;
                    break;
                case \core_search\manager::ACCESS_DENIED:
                    // This means we should just skip for the current user.
                    continue 2;
                    break;
            }
            $numgranted++;

            $maindoc = false;
            $fileids = array();
            // Seperate the main document and any files returned.
            foreach ($groupdocs as $groupdoc) {
                if ($groupdoc->id == $groupid) {
                    $maindoc = $groupdoc;
                } else if (isset($groupdoc->solr_fileid)) {
                    $fileids[] = $groupdoc->solr_fileid;
                }
            }

            // Store the id of this group, in order, for later merging.
            $orderedids[] = $groupid;

            if (!$maindoc) {
                // We don't have the main doc, store what we know for later building.
                $incompletedocs[$groupid] = $fileids;
            } else {
                if (isset($highlightingobj->$groupid)) {
                    // Merge the highlighting for this doc.
                    $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
                }
                $docdata = $this->standarize_solr_obj($maindoc);
                $doc = $this->to_document($searcharea, $docdata);
                // Now we need to attach the result files to the doc.
                foreach ($fileids as $fileid) {
                    $doc->add_stored_file($fileid);
                }
                $completedocs[$groupid] = $doc;
            }

            if ($numgranted >= \core_search\manager::MAX_RESULTS) {
                // We have hit the max results, we will just ignore the rest.
                break;
            }
        }

        $incompletedocs = $this->get_missing_docs($incompletedocs);

        $out = array();
        // Now merge the complete and incomplete documents, in results order.
        foreach ($orderedids as $docid) {
            if (isset($completedocs[$docid])) {
                $out[] = $completedocs[$docid];
            } else if (isset($incompletedocs[$docid])) {
                $out[] = $incompletedocs[$docid];
            }
        }

        return $out;
    }

    /**
     * Retreive any missing main documents and attach provided files.
     *
     * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
     * associated to the key should be an array of stored_files or stored file ids to attach to the result document.
     *
     * Return array also indexed by document id.
     *
     * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
     * @return document[]
     */
    protected function get_missing_docs($missingdocs) {
        if (empty($missingdocs)) {
            return array();
        }

        $docids = array_keys($missingdocs);

        // Build a custom query that will get all the missing documents.
        $query = new \SolrQuery();
        $this->set_query($query, '*', count($docids));
        $this->add_fields($query);
        $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');

        try {
            $results = $this->query_response($this->get_search_client()->query($query));
        } catch (\SolrClientException $ex) {
            return array();
        } catch (\SolrServerException $ex) {
            return array();
        }

        $out = array();
        foreach ($results as $result) {
            $resultid = $result->get('id');
            if (!isset($missingdocs[$resultid])) {
                // We got a result we didn't expect. Skip it.
                continue;
            }
            // Attach the files.
            foreach ($missingdocs[$resultid] as $filedoc) {
                $result->add_stored_file($filedoc);
            }
            $out[$resultid] = $result;
        }

        return $out;
    }

    /**
     * Returns a standard php array from a \SolrObject instance.
     *
     * @param \SolrObject $obj
     * @return array The returned document as an array.
     */
    public function standarize_solr_obj(\SolrObject $obj) {
        $properties = $obj->getPropertyNames();

        $docdata = array();
        foreach($properties as $name) {
            // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
            $name = trim($name);
            $docdata[$name] = $obj->offsetGet($name);
        }
        return $docdata;
    }

    /**
     * Adds a document to the search engine.
     *
     * This does not commit to the search engine.
     *
     * @param document $document
     * @param bool     $fileindexing True if file indexing is to be used
     * @return bool
     */
    public function add_document($document, $fileindexing = false) {
        $docdata = $document->export_for_engine();

        if (!$this->add_solr_document($docdata)) {
            return false;
        }

        if ($fileindexing) {
            // This will take care of updating all attached files in the index.
            $this->process_document_files($document);
        }

        return true;
    }

    /**
     * Adds a text document to the search engine.
     *
     * @param array $doc
     * @return bool
     */
    protected function add_solr_document($doc) {
        $solrdoc = new \SolrInputDocument();
        foreach ($doc as $field => $value) {
            $solrdoc->addField($field, $value);
        }

        try {
            $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
            return true;
        } catch (\SolrClientException $e) {
            debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
        } catch (\SolrServerException $e) {
            // We only use the first line of the message, as it's a fully java stacktrace behind it.
            $msg = strtok($e->getMessage(), "\n");
            debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
        }

        return false;
    }

    /**
     * Index files attached to the docuemnt, ensuring the index matches the current document files.
     *
     * For documents that aren't known to be new, we check the index for existing files.
     * - New files we will add.
     * - Existing and unchanged files we will skip.
     * - File that are in the index but not on the document will be deleted from the index.
     * - Files that have changed will be re-indexed.
     *
     * @param document $document
     */
    protected function process_document_files($document) {
        if (!$this->file_indexing_enabled()) {
            return;
        }

        // Maximum rows to process at a time.
        $rows = 500;

        // Get the attached files.
        $files = $document->get_files();

        // If this isn't a new document, we need to check the exiting indexed files.
        if (!$document->get_is_new()) {
            // We do this progressively, so we can handle lots of files cleanly.
            list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
            $count = 0;
            $idstodelete = array();

            do {
                // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
                foreach ($indexedfiles as $indexedfile) {
                    $fileid = $indexedfile->solr_fileid;

                    if (isset($files[$fileid])) {
                        // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
                        // Filelib does not guarantee time modified is updated, so we will check important values.
                        if ($indexedfile->modified < $files[$fileid]->get_timemodified()) {
                            continue;
                        }
                        if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
                            continue;
                        }
                        if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
                            continue;
                        }
                        if ($indexedfile->solr_fileindexedcontent == document::INDEXED_FILE_FALSE &&
                                $this->file_is_indexable($files[$fileid])) {
                            // This means that the last time we indexed this file, filtering blocked it.
                            // Current settings say it is indexable, so we will allow it to be indexed.
                            continue;
                        }

                        // If the file is already indexed, we can just remove it from the files array and skip it.
                        unset($files[$fileid]);
                    } else {
                        // This means we have found a file that is no longer attached, so we need to delete from the index.
                        // We do it later, since this is progressive, and it could reorder results.
                        $idstodelete[] = $indexedfile->id;
                    }
                }
                $count += $rows;

                if ($count < $numfound) {
                    // If we haven't hit the total count yet, fetch the next batch.
                    list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
                }

            } while ($count < $numfound);

            // Delete files that are no longer attached.
            foreach ($idstodelete as $id) {
                // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
                $this->get_search_client()->deleteById($id);
            }
        }

        // Now we can actually index all the remaining files.
        foreach ($files as $file) {
            $this->add_stored_file($document, $file);
        }
    }

    /**
     * Get the currently indexed files for a particular document, returns the total count, and a subset of files.
     *
     * @param document $document
     * @param int      $start The row to start the results on. Zero indexed.
     * @param int      $rows The number of rows to fetch
     * @return array   A two element array, the first is the total number of availble results, the second is an array
     *                 of documents for the current request.
     */
    protected function get_indexed_files($document, $start = 0, $rows = 500) {
        // Build a custom query that will get any document files that are in our solr_filegroupingid.
        $query = new \SolrQuery();

        // We want to get all file records tied to a document.
        // For efficiency, we are building our own, stripped down, query.
        $query->setQuery('*');
        $query->setRows($rows);
        $query->setStart($start);
        // We want a consistent sorting.
        $query->addSortField('id');

        // We only want the bare minimum of fields.
        $query->addField('id');
        $query->addField('modified');
        $query->addField('title');
        $query->addField('solr_fileid');
        $query->addField('solr_filecontenthash');
        $query->addField('solr_fileindexedcontent');

        $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
        $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);

        try {
            $response = $this->get_search_client()->query($query);
            $responsedoc = $response->getResponse();

            if (empty($responsedoc->response->numFound)) {
                return array(0, array());
            }
            $numfound = $responsedoc->response->numFound;

            return array($numfound, $this->convert_file_results($responsedoc));
        } catch (\SolrClientException $ex) {
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
            $this->queryerror = $ex->getMessage();
            return array(0, array());
        } catch (\SolrServerException $ex) {
            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
            $this->queryerror = $ex->getMessage();
            return array(0, array());
        }
    }

    /**
     * A very lightweight handler for getting information about already indexed files from a Solr response.
     *
     * @param SolrObject $responsedoc A Solr response document
     * @return stdClass[] An array of objects that contain the basic information for file processing.
     */
    protected function convert_file_results($responsedoc) {
        if (!$docs = $responsedoc->response->docs) {
            return array();
        }

        $out = array();

        foreach ($docs as $doc) {
            // Copy the bare minimim needed info.
            $result = new \stdClass();
            $result->id = $doc->id;
            $result->modified = document::import_time_from_engine($doc->modified);
            $result->title = $doc->title;
            $result->solr_fileid = $doc->solr_fileid;
            $result->solr_filecontenthash = $doc->solr_filecontenthash;
            $result->solr_fileindexedcontent = $doc->solr_fileindexedcontent;
            $out[] = $result;
        }

        return $out;
    }

    /**
     * Adds a file to the search engine.
     *
     * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
     * Tika has much better content type detection than Moodle, and we will have many more doc failures
     * if we try to send mime types.
     *
     * @param document $document
     * @param \stored_file $storedfile
     * @return void
     */
    protected function add_stored_file($document, $storedfile) {
        $filedoc = $document->export_file_for_engine($storedfile);

        if (!$this->file_is_indexable($storedfile)) {
            // For files that we don't consider indexable, we will still place a reference in the search engine.
            $filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_FALSE;
            $this->add_solr_document($filedoc);
            return;
        }

        $curl = $this->get_curl_object();

        $url = $this->get_connection_url('/update/extract');

        // This will prevent solr from automatically making fields for every tika output.
        $url->param('uprefix', 'ignored_');

        // These are common fields that matches the standard *_point dynamic field and causes an error.
        $url->param('fmap.media_white_point', 'ignored_mwp');
        $url->param('fmap.media_black_point', 'ignored_mbp');

        // Copy each key to the url with literal.
        // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
        foreach ($filedoc as $key => $value) {
            // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
            $url->param('fmap.'.$key, 'ignored_'.$key);
            // Place data in a tmp field.
            $url->param('literal.mdltmp_'.$key, $value);
            // Then move to the final field.
            $url->param('fmap.mdltmp_'.$key, $key);
        }

        // This sets the true filename for Tika.
        $url->param('resource.name', $storedfile->get_filename());

        // A giant block of code that is really just error checking around the curl request.
        try {
            // Now actually do the request.
            $result = $curl->post($url->out(false), array('myfile' => $storedfile));

            $code = $curl->get_errno();
            $info = $curl->get_info();

            // Now error handling. It is just informational, since we aren't tracking per file/doc results.
            if ($code != 0) {
                // This means an internal cURL error occurred error is in result.
                $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
                debugging($message, DEBUG_DEVELOPER);
            } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
                // Unexpected HTTP response code.
                $message = 'Error while indexing file with document id '.$filedoc['id'];
                // Try to get error message out of msg or title if it exists.
                if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
                    $message .= ': '.$matches[1];
                } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
                    $message .= ': '.$matches[1];
                }
                // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
                if (CLI_SCRIPT && !PHPUNIT_TEST) {
                    mtrace($message);
                }
            } else {
                // Check for the expected status field.
                if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
                    // Now check for the expected status of 0, if not, error.
                    if ((int)$matches[1] !== 0) {
                        $message = 'Unexpected Solr status code '.(int)$matches[1];
                        $message .= ' while indexing file with document id '.$filedoc['id'].'.';
                        debugging($message, DEBUG_DEVELOPER);
                    } else {
                        // The document was successfully indexed.
                        return;
                    }
                } else {
                    // We received an unprocessable response.
                    $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
                    $message .= strtok($result, "\n");
                    debugging($message, DEBUG_DEVELOPER);
                }
            }
        } catch (\Exception $e) {
            // There was an error, but we are not tracking per-file success, so we just continue on.
            debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
        }

        // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
        $filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_ERROR;
        $this->add_solr_document($filedoc);
    }

    /**
     * Checks to see if a passed file is indexable.
     *
     * @param \stored_file $file The file to check
     * @return bool True if the file can be indexed
     */
    protected function file_is_indexable($file) {
        if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
            // The file is too big to index.
            return false;
        }

        $mime = $file->get_mimetype();

        if ($mime == 'application/vnd.moodle.backup') {
            // We don't index Moodle backup files. There is nothing usefully indexable in them.
            return false;
        }

        return true;
    }

    /**
     * Commits all pending changes.
     *
     * @return void
     */
    protected function commit() {
        $this->get_search_client()->commit();
    }

    /**
     * Do any area cleanup needed, and do anything to confirm contents.
     *
     * Return false to prevent the search area completed time and stats from being updated.
     *
     * @param \core_search\area\base $searcharea The search area that was complete
     * @param int $numdocs The number of documents that were added to the index
     * @param bool $fullindex True if a full index is being performed
     * @return bool True means that data is considered indexed
     */
    public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
        $this->commit();

        return true;
    }

    /**
     * Return true if file indexing is supported and enabled. False otherwise.
     *
     * @return bool
     */
    public function file_indexing_enabled() {
        return (bool)$this->config->fileindexing;
    }

    /**
     * Defragments the index.
     *
     * @return void
     */
    public function optimize() {
        $this->get_search_client()->optimize(1, true, false);
    }

    /**
     * Deletes the specified document.
     *
     * @param string $id The document id to delete
     * @return void
     */
    public function delete_by_id($id) {
        // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
        $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
        $this->commit();
    }

    /**
     * Delete all area's documents.
     *
     * @param string $areaid
     * @return void
     */
    public function delete($areaid = null) {
        if ($areaid) {
            $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
        } else {
            $this->get_search_client()->deleteByQuery('*:*');
        }
        $this->commit();
    }

    /**
     * Pings the Solr server using search_solr config
     *
     * @return true|string Returns true if all good or an error string.
     */
    public function is_server_ready() {

        if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
            return 'No solr configuration found';
        }

        if (!$this->client = $this->get_search_client(false)) {
            return get_string('engineserverstatus', 'search');
        }

        try {
            @$this->client->ping();
        } catch (\SolrClientException $ex) {
            return 'Solr client error: ' . $ex->getMessage();
        } catch (\SolrServerException $ex) {
            return 'Solr server error: ' . $ex->getMessage();
        }

        // Check that setup schema has already run.
        try {
            $schema = new \search_solr\schema();
            $schema->validate_setup();
        } catch (\moodle_exception $e) {
            return $e->getMessage();
        }

        return true;
    }

    /**
     * Checks if the PHP Solr extension is available.
     *
     * @return bool
     */
    public function is_installed() {
        return function_exists('solr_get_version');
    }

    /**
     * Returns the solr client instance.
     *
     * @throws \core_search\engine_exception
     * @param bool $triggerexception
     * @return \SolrClient
     */
    protected function get_search_client($triggerexception = true) {

        // Type comparison as it is set to false if not available.
        if ($this->client !== null) {
            return $this->client;
        }

        $options = array(
            'hostname' => $this->config->server_hostname,
            'path'     => '/solr/' . $this->config->indexname,
            'login'    => !empty($this->config->server_username) ? $this->config->server_username : '',
            'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
            'port'     => !empty($this->config->server_port) ? $this->config->server_port : '',
            'secure' => !empty($this->config->secure) ? true : false,
            'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
            'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
            'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
            'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
            'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
            'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
        );

        $this->client = new \SolrClient($options);

        if ($this->client === false && $triggerexception) {
            throw new \core_search\engine_exception('engineserverstatus', 'search');
        }

        return $this->client;
    }

    /**
     * Returns a curl object for conntecting to solr.
     *
     * @return \curl
     */
    public function get_curl_object() {
        if (!is_null($this->curl)) {
            return $this->curl;
        }

        $this->curl = new \curl();

        $options = array();
        // Build the SSL options. Based on pecl-solr and general testing.
        if (!empty($this->config->secure)) {
            if (!empty($this->config->ssl_cert)) {
                $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
                $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
            }

            if (!empty($this->config->ssl_key)) {
                $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
                $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
            }

            if (!empty($this->config->ssl_keypassword)) {
                $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
            }

            if (!empty($this->config->ssl_cainfo)) {
                $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
            }

            if (!empty($this->config->ssl_capath)) {
                $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
            }
        }

        $this->curl->setopt($options);

        if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
            $authorization = $this->config->server_username . ':' . $this->config->server_password;
            $this->curl->setHeader('Authorization', 'Basic ' . base64_encode($authorization));
        }

        return $this->curl;
    }

    /**
     * Return a Moodle url object for the server connection.
     *
     * @param string $path The solr path to append.
     * @return \moodle_url
     */
    public function get_connection_url($path) {
        // Must use the proper protocol, or SSL will fail.
        $protocol = !empty($this->config->secure) ? 'https' : 'http';
        $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
        if (!empty($this->config->server_port)) {
            $url .= ':' . $this->config->server_port;
        }
        $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');

        return new \moodle_url($url);
    }
}