General updates, see README.txt.

This commit is contained in:
mchampan 2006-07-25 17:23:10 +00:00
parent d9e1bf2478
commit b585dc5fcb
12 changed files with 149 additions and 279 deletions

View file

@ -1,7 +1,23 @@
2006/07/25
----------
Query logic moved into the SearchQuery class in querylib.php. Should be able
to include this file in any page and run a query against the index (PHP 5
checks must be added to those pages then, though).
Index info can be retrieved using IndexInfo class in indexlib.php.
Abstracted some stuff away, to reduce rendundancy and decrease the
likelihood of errors. Improved the stats.php page to include some
diagnostics for adminstrators.
delete.php skeleton created for removing deleted documents from the
index. cron.php will contain the logic for running delete.php,
update.php and eventually add.php.
2006/07/11
----------
(Warning: It took me 1900 seconds to index the forum, go make coffee
whilst you wait.)
whilst you wait.) [Moodle.org forum data]
Forum search functions changed to use 'get_recordset' instead of
'get_records', for speed reasons. This provides a significant improvement,

View file

@ -430,7 +430,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
$freqPointer = $prevTermInfo->freqPointer;
$proxPointer = $prevTermInfo->proxPointer;
for ($count = $prevPosition*$indexInterval + 1;
$count < $termCount &&
$count <= $termCount &&
( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
($this->_getFieldPosition($termFieldNum) == $searchDicField &&
strcmp($termValue, $term->text) < 0) );

View file

@ -64,7 +64,16 @@ class Zend_Search_Lucene_Search_QueryTokenizer implements Iterator
$currentToken = '';
for ($count = 0; $count < strlen($inputString); $count++) {
if (ctype_alnum( $inputString{$count} )) {
if (ctype_alnum( $inputString{$count} ) ||
$inputString{$count} == '_') {
$currentToken .= $inputString{$count};
} else if ($inputString{$count} == '\\') { // Escaped character
$count++;
if ($count == strlen($inputString)) {
throw new Zend_Search_Lucene_Exception('Non finished escape sequence.');
}
$currentToken .= $inputString{$count};
} else {
// Previous token is finished

View file

@ -3,7 +3,7 @@
* extend.
* */
class SearchDocument extends Zend_Search_Lucene_Document {
abstract class SearchDocument extends Zend_Search_Lucene_Document {
public function __construct(&$doc, &$data, $document_type, $course_id, $group_id) {
$this->addField(Zend_Search_Lucene_Field::Keyword('id', $doc->id));
$this->addField(Zend_Search_Lucene_Field::Text('title', $doc->title));

View file

@ -8,11 +8,6 @@
class ForumSearchDocument extends SearchDocument {
public function __construct(&$post, $forum_id, $course_id, $group_id) {
// generic information
/*$doc->id = $post->id;
$doc->title = $post->subject;
$doc->author = $post->firstname." ".$post->lastname;
$doc->contents = $post->message;*/
$doc->id = $post['id'];
$doc->title = $post['subject'];
$doc->author = $post['firstname']." ".$post['lastname'];
@ -24,7 +19,7 @@
$data->forum = $forum_id;
$data->discussion = $post['discussion'];
parent::__construct($doc, $data, SEARCH_FORUM_TYPE, $course_id, $group_id);
parent::__construct($doc, $data, SEARCH_TYPE_FORUM, $course_id, $group_id);
} //constructor
} //ForumSearchDocument
@ -34,7 +29,7 @@
} //forum_make_link
function forum_iterator() {
//no @ = Undefined index: 82 in /home/michael/public_html/moodle/lib/datalib.php on line 2671
//no @ = Undefined index: 82 in moodle/lib/datalib.php on line 2671
return @get_all_instances_in_courses("forum", get_courses());
} //forum_iterator
@ -72,33 +67,6 @@
return $documents;
} //forum_get_content_for_index
//old slower version
function forum_get_content_for_index_old(&$forum) {
$documents = array();
if (!$forum) return $documents;
$posts = forum_get_discussions($forum->id);
if (!$posts) return $documents;
foreach($posts as $post) {
if (is_object($post)) {
if (strlen($post->message) > 0 && ($post->deleted != 1)) {
$documents[] = new ForumSearchDocument($post, $forum->id, $forum->course, $post->groupid);
} //if
if ($children = forum_get_child_posts($post->id, $forum->id)) {
foreach ($children as $child) {
if (strlen($child->message) > 0 && ($child->deleted != 1)) {
$documents[] = new ForumSearchDocument($child, $forum->id, $forum->course, $post->groupid);
} //if
} //foreach
} //if
} //if
} //foreach
return $documents;
} //forum_get_content_for_index_old
//reworked faster version from /mod/forum/lib.php
function forum_get_discussions_fast($forum) {
global $CFG, $USER;

View file

@ -32,7 +32,7 @@
$data->wiki = $wiki_id;
// construct the parent class
parent::__construct($doc, $data, SEARCH_WIKI_TYPE, $course_id, $group_id);
parent::__construct($doc, $data, SEARCH_TYPE_WIKI, $course_id, $group_id);
} //constructor
} //WikiSearchDocument

View file

@ -49,15 +49,18 @@
//php5 found, continue including php5-only files
require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
if (get_config("search_indexer_busy") == 1) {
mtrace('<pre>Server Time: '.date('r',time())."\n");
if ($CFG->search_indexer_busy == '1') {
//means indexing was not finished previously
mtrace("Warning: Indexing was not successfully completed last time, restarting.\n");
} //if
//turn on busy flag
set_config("search_indexer_busy", 1);
mtrace('<pre>Server Time: '.date('r',time())."\n");
set_config('search_indexer_busy', '1');
//paths
$index_path = $CFG->dataroot.'/search';
$index_path = SEARCH_INDEX_PATH;
$index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
//setup directory in data root
@ -103,6 +106,7 @@
if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
foreach ($mods as $mod) {
if ($mod->name == 'forum') continue;
$class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
if (file_exists($class_file)) {
@ -174,6 +178,9 @@
mtrace('</pre>');
//finished, turn busy flag off
set_config("search_indexer_busy", 0);
set_config("search_indexer_busy", "0");
//mark the time we last updated
set_config("search_indexer_run_date", time());
?>

View file

@ -20,28 +20,16 @@
exit(0);
} //if
$index_path = "$CFG->dataroot/search";
$index_dir = get_directory_list($index_path, '', false, false);
$index_filecount = count($index_dir);
require_once("$CFG->dirroot/search/indexlib.php");
$indexinfo = new IndexInfo();
//check if the table exists in the db
$tables = $db->MetaTables();
if (in_array($CFG->prefix.'search_documents', $tables)) {
$db_count = count_records('search_documents');
} else {
$db_count = 0;
} //else
//TODO: elaborate on error messages, when db!=0 and index=0 -> corrupt, etc.
if ($index_filecount != 0 or $db_count != 0) {
mtrace("<pre>The data directory ($index_path) contains $index_filecount files, and\n"
."there are $db_count records in the <em>search_documents</em> table.\n"
if ($indexinfo->valid()) {
mtrace("<pre>The data directory ($indexinfo->path) contains $indexinfo->filecount files, and\n"
."there are ".$indexinfo->dbcount." records in the <em>search_documents</em> table.\n"
."\n"
."This indicates that you have already succesfully indexed this site, or at least\n"
."started and cancelled an indexing session. Follow the link if you are sure that\n"
."you want to continue indexing - this will replace any existing index data (no\n"
."Moodle data is affected).\n"
."This indicates that you have already succesfully indexed this site. Follow the link\n"
."if you are sure that you want to continue indexing - this will replace any existing\n"
."index data (no Moodle data is affected).\n"
."\n"
."You are encouraged to use the 'Test indexing' script before continuing onto\n"
."indexing - this will check if the modules are set up correctly. Please correct\n"

View file

@ -1,17 +1,30 @@
<?php
//Move this stuff to lib/searchlib.php?
// Author: Michael Champanis
/* Move this stuff to lib/searchlib.php?
* Author: Michael Champanis
*
* This file must not contain any PHP 5, because it is used to test for PHP 5
* itself, and needs to be able to be executed on PHP 4 installations.
* */
define('SEARCH_INDEX_PATH', "$CFG->dataroot/search");
//document types that can be searched
define('SEARCH_NO_TYPE', 'none');
define('SEARCH_WIKI_TYPE', 'wiki');
define('SEARCH_FORUM_TYPE', 'forum');
define('SEARCH_TYPE_NONE', 'none');
define('SEARCH_TYPE_WIKI', 'wiki');
define('SEARCH_TYPE_FORUM', 'forum');
//returns all the document type constants
function search_get_document_types() {
$r = Array(SEARCH_WIKI_TYPE, SEARCH_NO_TYPE, SEARCH_FORUM_TYPE);
return $r;
function search_get_document_types($prefix='SEARCH_TYPE') {
$ret = array();
foreach (get_defined_constants() as $key=>$value) {
if (substr($key, 0, strlen($prefix)) == $prefix) {
$ret[$key] = $value;
} //if
} //foreach
return $ret;
} //search_get_document_types
//shortens a url so it can fit on the results page

View file

@ -25,9 +25,9 @@
require_once('../config.php');
require_once("$CFG->dirroot/search/lib.php");
//check for php5, but don't die yet (see line 27)
//check for php5, but don't die yet (see line 52)
if ($check = search_check_php5()) {
require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
require_once("$CFG->dirroot/search/querylib.php");
$query_string = optional_param('query_string', '', PARAM_CLEAN);
$page_number = optional_param('page', 1, PARAM_INT);
@ -36,102 +36,9 @@
$page_number = 1;
} //if
$index_path = "$CFG->dataroot/search";
$no_index = false; //optimism!
$results_per_page = 10;
try {
$index = new Zend_Search_Lucene($index_path, false);
} catch(Exception $e) {
//print $e;
$no_index = true;
} //catch
$sq = new SearchQuery($query_string, $page_number, 10, true);
} //if
//Result document class that contains all the display information we need
class ResultDocument {
public $url,
$title,
$score,
$doctype,
$author;
} //ResultDocument
//generates an HTML string of links to result pages
function page_numbers($query, $hits, $page=1, $results_per_page=20) {
//total result pages
$pages = ceil($hits/$results_per_page);
$ret = "<div align='center'>";
//Back is disabled if we're on page 1
if ($page > 1) {
$ret .= "<a href='query.php?query_string=$query&page=".($page-1)."'>< Back</a>&nbsp;";
} else {
$ret .= "< Back&nbsp;";
} //else
//don't <a href> the current page
for ($i = 1; $i <= $pages; $i++) {
if ($page == $i) {
$ret .= "[$i]&nbsp;";
} else {
$ret .= "<a href='query.php?query_string=$query&page=$i'>$i</a>&nbsp;";
} //else
} //for
//Next disabled if we're on the last page
if ($page < $pages) {
$ret .= "<a href='query.php?query_string=$query&page=".($page+1)."'>Next ></a>&nbsp;";
} else {
$ret .= "Next >&nbsp;";
} //else
$ret .= "</div>";
//shorten really long page lists, to stop table distorting width-ways
if (strlen($ret) > 70) {
$start = 4;
$end = $page - 5;
$ret = preg_replace("/<a\D+\d+\D+>$start<\/a>.*?<a\D+\d+\D+>$end<\/a>/", '...', $ret);
$start = $page + 5;
$end = $pages - 3;
$ret = preg_replace("/<a\D+\d+\D+>$start<\/a>.*?<a\D+\d+\D+>$end<\/a>/", '...', $ret);
} //if
return $ret;
} //page_numbers
//calculates whether a user is allowed to see this result
function can_display(&$user, $course_id, $group_id) {
return true;
} //can_display
//caches the results of the last query, deletes the previous one also
function cache($id=false, &$object=false) {
//see if there was a previous query
$last_term = (isset($_SESSION['search_last_term'])) ? $_SESSION['search_last_term'] : false;
//if this query is different from the last, clear out the last one
if ($id != false and $last_term != $id) {
unset($_SESSION[$last_term]);
session_unregister($last_term);
} //if
//store the new query if id and object are passed in
if ($object and $id) {
$_SESSION['search_last_term'] = $id;
$_SESSION[$id] = $object;
return true;
//otherwise return the stored results
} else if ($id and isset($_SESSION[$id])) {
return $_SESSION[$id];
} //else
} //cache
if (!$site = get_site()) {
redirect("index.php");
} //if
@ -166,17 +73,17 @@
<div align="center">
<?php
echo 'Searching: ';
print 'Searching: ';
if ($no_index) {
print "0";
if ($sq->is_valid_index()) {
print $sq->index_count();
} else {
print $index->count();
print "0";
} //else
print ' documents.';
if ($no_index and isadmin()) {
if (!$sq->is_valid_index() and isadmin()) {
print "<br><br>Admin: There appears to be no index, click <a href='indexersplash.php'>here</a> to create one.";
} //if
?>
@ -185,43 +92,11 @@
<?php
print_simple_box_end();
if (!empty($query_string) and !$no_index) {
if ($sq->is_valid()) {
print_simple_box_start('center', '50%', 'white', 10);
search_stopwatch();
//if the cache is empty
if (!($hits = cache($query_string))) {
$resultdocs = array();
$resultdoc = new ResultDocument;
//generate a new result-set
$hits = $index->find(strtolower($query_string));
foreach ($hits as $hit) {
//check permissions on each result
if (can_display($USER, $hit->course_id, $hit->group_id)) {
$resultdoc->url = $hit->url;
$resultdoc->title = $hit->title;
$resultdoc->score = $hit->score;
$resultdoc->doctype = $hit->doctype;
$resultdoc->author = $hit->author;
//and store it if it passes the test
$resultdocs[] = clone($resultdoc);
} //if
} //foreach
//cache the results so we don't have to compute this on every page-load
cache($query_string, $resultdocs);
//print "Using new results.";
} else {
//There was something in the cache, so we're using that to save time
//print "Using cached results.";
} //else
$hit_count = count($hits);
$hit_count = $sq->count();
print "<br>";
@ -229,27 +104,13 @@
print "<br>";
if ($hit_count > 0) {
if ($hit_count < $results_per_page) {
$page_number = 1;
} else if ($page_number > ceil($hit_count/$results_per_page)) {
$page_number = $hit_count/$results_per_page;
} //if
$start = ($page_number - 1)*$results_per_page;
$end = $start + $results_per_page;
$page_links = page_numbers($query_string, $hit_count, $page_number, $results_per_page);
$page_links = $sq->page_numbers();
$hits = $sq->results();
print "<ol>";
for ($i = $start; $i < $end; $i++) {
if ($i >= $hit_count) {
break;
} //if
$listing = $hits[$i];
print "<li value='".($i+1)."'><a href='".$listing->url."'>$listing->title</a><br>\n"
foreach ($hits as $listing) {
print "<li value='".($listing->number+1)."'><a href='".$listing->url."'>$listing->title</a><br>\n"
."<em>".search_shorten_url($listing->url, 70)."</em><br>\n"
."Type: ".$listing->doctype.", score: ".round($listing->score, 3).", author: ".$listing->author."<br>\n"
."<br></li>\n";
@ -260,9 +121,6 @@
} //if
print_simple_box_end();
} //if
if (!empty($query_string) and !$no_index) {
?>
<div align="center">
@ -270,7 +128,7 @@
</div>
<?php
} //if
} //if (sq is valid)
print_simple_box_end();
print_footer();

View file

@ -1,6 +1,7 @@
<?php
/* Prints some basic statistics about the current index.
* Allows the administrator to create an index if none exists.
*
* Does some diagnostics if you are logged in as an administrator.
* */
require_once('../config.php');
@ -8,31 +9,9 @@
//check for php5, but don't die yet
if ($check = search_check_php5()) {
//filesystem stats
$index_path = "$CFG->dataroot/search";
$index_size = display_size(get_directory_size($index_path));
$index_dir = get_directory_list($index_path, '', false, false);
$index_filecount = count($index_dir);
require_once("$CFG->dirroot/search/indexlib.php");
//indexed documents stats (via db)
$db_exists = false;
$admin_tables = $db->MetaTables();
if (in_array($CFG->prefix.'search_documents', $admin_tables)) {
$db_exists = true;
$types = search_get_document_types();
sort($types);
//total documents
$type_counts['Total'] = count_records('search_documents');
foreach($types as $type) {
$c = count_records('search_documents', 'doctype', $type);
$type_counts[$type] = (int)$c;
} //foreach
} else {
$type_counts['Total'] = 0;
} //else
$indexinfo = new IndexInfo();
} //if
if (!$site = get_site()) {
@ -66,12 +45,35 @@
$admin_table->cellspacing = 0;
$admin_table->width = '500';
$admin_table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$index_path.'</strong></em>');
$admin_table->data[] = array('Files in index directory', $index_filecount);
$admin_table->data[] = array('Total size', $index_size);
$admin_table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$indexinfo->path.'</strong></em>');
$admin_table->data[] = array('Files in index directory', $indexinfo->filecount);
$admin_table->data[] = array('Total size', $indexinfo->size);
if ($index_filecount == 0 or !$db_exists) {
$admin_table->data[] = array('Click to create index', "<a href='indexersplash.php'>Indexer</a>");
if ($indexinfo->time > 0) {
$admin_table->data[] = array('Created on', date('r', $indexinfo->time));
} else {
$admin_table->data[] = array('Created on', '-');
} //else
if (!$indexinfo->valid($errors)) {
$admin_table->data[] = array('<strong>Errors</strong>', '&nbsp;');
foreach ($errors as $key=>$value) {
$admin_table->data[] = array($key.' ... ', $value);
} //foreach
$admin_table->data[] = array('<strong>Solutions</strong>', '&nbsp;');
if (isset($errors['dir'])) {
$admin_table->data[] = array('Check dir', 'Ensure the data directory exists and is writable.');
} //if
if (isset($errors['db'])) {
$admin_table->data[] = array('Check DB', 'Check your database for any problems.');
} //if
$admin_table->data[] = array('Run indexer test', '<a href=\'tests/index.php\'>tests/index.php</a>');
$admin_table->data[] = array('Run indexer', '<a href=\'indexersplash.php\'>indexersplash.php</a>');
} //if
} //if
@ -84,8 +86,17 @@
$table->width = '500';
$table->data[] = array('<strong>Database</strong>', '<em><strong>search_documents<strong></em>');
foreach($type_counts as $key => $value) {
$table->data[] = array($key, $value);
//add an extra field if we're admin
if (isadmin()) {
//don't want to confuse users if the two totals don't match (hint: they should)
$table->data[] = array('Documents in index', $indexinfo->indexcount);
} //if
$table->data[] = array('Documents in database', $indexinfo->dbcount);
foreach($indexinfo->types as $key => $value) {
$table->data[] = array("'$key' documents", $value);
} //foreach
if (isadmin()) {

View file

@ -49,8 +49,8 @@
if (file_exists($class_file)) {
include_once($class_file);
if (!defined('SEARCH_'.strtoupper($mod->name).'_TYPE')) {
mtrace("ERROR: Constant 'SEARCH_".strtoupper($mod->name)."_TYPE' is not defined in /search/lib.php");
if (!defined('SEARCH_TYPE_'.strtoupper($mod->name))) {
mtrace("ERROR: Constant 'SEARCH_TYPE_".strtoupper($mod->name)."' is not defined in /search/lib.php");
continue;
} //if