This commit is contained in:
Andrew Nicols 2016-04-07 09:40:40 +08:00
commit b2c40917c3
3 changed files with 28 additions and 10 deletions

View file

@ -63,17 +63,25 @@ class document extends \core_search\document {
'solr_fileid' => array( 'solr_fileid' => array(
'type' => 'string', 'type' => 'string',
'stored' => true, 'stored' => true,
'indexed' => false 'indexed' => true
), ),
'solr_filecontenthash' => array( 'solr_filecontenthash' => array(
'type' => 'string', 'type' => 'string',
'stored' => true, 'stored' => true,
'indexed' => false 'indexed' => true
), ),
'solr_fileindexedcontent' => array( // Stores the status of file indexing.
'solr_fileindexstatus' => array(
'type' => 'int', 'type' => 'int',
'stored' => true, 'stored' => true,
'indexed' => true 'indexed' => true
),
// Field to index, but not store, file contents.
'solr_filecontent' => array(
'type' => 'text',
'stored' => false,
'indexed' => true,
'mainquery' => true
) )
); );
@ -186,7 +194,7 @@ class document extends \core_search\document {
$data['type'] = \core_search\manager::TYPE_FILE; $data['type'] = \core_search\manager::TYPE_FILE;
$data['solr_fileid'] = $file->get_id(); $data['solr_fileid'] = $file->get_id();
$data['solr_filecontenthash'] = $file->get_contenthash(); $data['solr_filecontenthash'] = $file->get_contenthash();
$data['solr_fileindexedcontent'] = self::INDEXED_FILE_TRUE; $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE;
$data['title'] = $file->get_filename(); $data['title'] = $file->get_filename();
return $data; return $data;

View file

@ -234,7 +234,7 @@ class engine extends \core_search\engine {
$fields = $documentclass::get_default_fields_definition(); $fields = $documentclass::get_default_fields_definition();
$dismax = false; $dismax = false;
if ($query instanceof SolrDisMaxQuery) { if ($query instanceof \SolrDisMaxQuery) {
$dismax = true; $dismax = true;
} }
@ -618,7 +618,7 @@ class engine extends \core_search\engine {
if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
continue; continue;
} }
if ($indexedfile->solr_fileindexedcontent == document::INDEXED_FILE_FALSE && if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
$this->file_is_indexable($files[$fileid])) { $this->file_is_indexable($files[$fileid])) {
// This means that the last time we indexed this file, filtering blocked it. // This means that the last time we indexed this file, filtering blocked it.
// Current settings say it is indexable, so we will allow it to be indexed. // Current settings say it is indexable, so we will allow it to be indexed.
@ -682,7 +682,7 @@ class engine extends \core_search\engine {
$query->addField('title'); $query->addField('title');
$query->addField('solr_fileid'); $query->addField('solr_fileid');
$query->addField('solr_filecontenthash'); $query->addField('solr_filecontenthash');
$query->addField('solr_fileindexedcontent'); $query->addField('solr_fileindexstatus');
$query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')'); $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
$query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE); $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
@ -729,7 +729,7 @@ class engine extends \core_search\engine {
$result->title = $doc->title; $result->title = $doc->title;
$result->solr_fileid = $doc->solr_fileid; $result->solr_fileid = $doc->solr_fileid;
$result->solr_filecontenthash = $doc->solr_filecontenthash; $result->solr_filecontenthash = $doc->solr_filecontenthash;
$result->solr_fileindexedcontent = $doc->solr_fileindexedcontent; $result->solr_fileindexstatus = $doc->solr_fileindexstatus;
$out[] = $result; $out[] = $result;
} }
@ -752,7 +752,7 @@ class engine extends \core_search\engine {
if (!$this->file_is_indexable($storedfile)) { if (!$this->file_is_indexable($storedfile)) {
// For files that we don't consider indexable, we will still place a reference in the search engine. // For files that we don't consider indexable, we will still place a reference in the search engine.
$filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_FALSE; $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
$this->add_solr_document($filedoc); $this->add_solr_document($filedoc);
return; return;
} }
@ -764,6 +764,11 @@ class engine extends \core_search\engine {
// This will prevent solr from automatically making fields for every tika output. // This will prevent solr from automatically making fields for every tika output.
$url->param('uprefix', 'ignored_'); $url->param('uprefix', 'ignored_');
// Control how content is captured. This will keep our file content clean of non-important metadata.
$url->param('captureAttr', 'true');
// Move the content to a field for indexing.
$url->param('fmap.content', 'solr_filecontent');
// These are common fields that matches the standard *_point dynamic field and causes an error. // These are common fields that matches the standard *_point dynamic field and causes an error.
$url->param('fmap.media_white_point', 'ignored_mwp'); $url->param('fmap.media_white_point', 'ignored_mwp');
$url->param('fmap.media_black_point', 'ignored_mbp'); $url->param('fmap.media_black_point', 'ignored_mbp');
@ -833,7 +838,7 @@ class engine extends \core_search\engine {
} }
// If we get here, the document was not indexed due to an error. So we will index just the base info without the file. // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
$filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_ERROR; $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
$this->add_solr_document($filedoc); $this->add_solr_document($filedoc);
} }

View file

@ -165,6 +165,11 @@ class search_solr_engine_testcase extends advanced_testcase {
$this->assertEquals($USER->id, $results[0]->get('userid')); $this->assertEquals($USER->id, $results[0]->get('userid'));
$this->assertEquals(\context_system::instance()->id, $results[0]->get('contextid')); $this->assertEquals(\context_system::instance()->id, $results[0]->get('contextid'));
// Do a test to make sure we aren't searching non-query fields, like areaid.
$querydata->q = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities');
$this->assertCount(0, $this->search->search($querydata));
$querydata->q = 'message';
sleep(1); sleep(1);
$beforeadding = time(); $beforeadding = time();
sleep(1); sleep(1);