MDL-57791 analytics: Changes after review

- Split model::predict in parts
- JS promises updated according to eslint-plugin-promise
- New API methods replacing direct DB queries
- Reduce insights nav link display cost
- Increase time limit as well as memory for big processes
- Move prediction action event to core
- Dataset write locking and others
- Refine last time range end time
- Removed dodgy splitting method id to int
- Replace admin_setting_predictor output_html overwrite for write_setting overwrite
- New APIs for access control
- Discard invalid samples also during prediction
This commit is contained in:
David Monllao 2017-06-15 10:21:58 +02:00
parent 584ffa4ffc
commit 1611308b58
40 changed files with 513 additions and 289 deletions

View file

@ -30,28 +30,26 @@ require_once(__DIR__ . '/../../lib/adminlib.php');
class admin_setting_predictor extends \admin_setting_configselect {
/**
* Builds HTML to display the control.
* Save a setting
*
* The main purpose of this is to display a warning if the selected predictions processor is not ready.
* @param string $data Unused
* @param string $query
* @return string HTML
* @param string $data
* @return string empty of error string
*/
public function output_html($data, $query='') {
global $CFG, $OUTPUT;
$html = '';
public function write_setting($data) {
if (!$this->load_choices() or empty($this->choices)) {
return '';
}
if (!array_key_exists($data, $this->choices)) {
return ''; // ignore it
}
// Calling it here without checking if it is ready because we check it below and show it as a controlled case.
$selectedprocessor = \core_analytics\manager::get_predictions_processor($data, false);
$isready = $selectedprocessor->is_ready();
if ($isready !== true) {
$html .= $OUTPUT->notification(get_string('errorprocessornotready', 'analytics', $isready));
return get_string('errorprocessornotready', 'analytics', $isready);
}
$html .= parent::output_html($data, $query);
return $html;
return ($this->config_write($this->name, $data) ? '' : get_string('errorsetting', 'admin'));
}
}

View file

@ -130,7 +130,8 @@ abstract class calculable {
/**
* Returns the number of weeks a time range contains.
*
* Useful for calculations that depend on the time range duration.
* Useful for calculations that depend on the time range duration. Note that it returns
* a float, rounding the float may lead to inaccurate results.
*
* @param int $starttime
* @param int $endtime
@ -141,9 +142,14 @@ abstract class calculable {
throw new \coding_exception('End time timestamp should be greater than start time.');
}
$diff = $endtime - $starttime;
$starttimedt = new \DateTime();
$starttimedt->setTimestamp($starttime);
$starttimedt->setTimezone(\DateTimeZone::UTC);
$endtimedt = new \DateTime();
$endtimedt->setTimestamp($endtime);
$endtimedt->setTimezone(\DateTimeZone::UTC);
// No need to be strict about DST here.
$diff = $endtimedt->getTimestamp() - $starttimedt->getTimestamp();
return $diff / WEEKSECS;
}

View file

@ -443,9 +443,6 @@ class course implements \core_analytics\analysable {
return false;
}
// TODO Use course_modules_completion's timemodified + COMPLETION_COMPLETE* to discard
// activities that have already been completed.
// We skip activities that were not yet visible or their 'until' was not in this $starttime - $endtime range.
if ($activity->availability) {
$info = new \core_availability\info_module($activity);
@ -485,7 +482,6 @@ class course implements \core_analytics\analysable {
}
}
// TODO Think about activities in sectionnum 0.
if ($activity->sectionnum == 0) {
return false;
}
@ -533,8 +529,6 @@ class course implements \core_analytics\analysable {
$dateconditions = $info->get_availability_tree()->get_all_children('\availability_date\condition');
foreach ($dateconditions as $condition) {
// Availability API does not allow us to check from / to dates nicely, we need to be naughty.
// TODO Would be nice to expand \availability_date\condition API for this calling a save that
// does not save is weird.
$conditiondata = $condition->save();
if ($conditiondata->d === \availability_date\condition::DIRECTION_FROM &&

View file

@ -86,18 +86,21 @@ class dataset_manager {
/**
* Mark the analysable as being analysed.
*
* @return void
* @return bool Could we get the lock or not.
*/
public function init_process() {
$lockkey = 'modelid:' . $this->modelid . '-analysableid:' . $this->analysableid .
'-timesplitting:' . self::convert_to_int($this->timesplittingid) . '-includetarget:' . (int)$this->includetarget;
'-timesplitting:' . self::clean_time_splitting_id($this->timesplittingid) . '-includetarget:' . (int)$this->includetarget;
// Large timeout as processes may be quite long.
$lockfactory = \core\lock\lock_config::get_lock_factory('core_analytics');
$this->lock = $lockfactory->get_lock($lockkey, WEEKSECS);
// We release the lock if there is an error during the process.
\core_shutdown_manager::register_function(array($this, 'release_lock'), array($this->lock));
// If it is not ready in 10 secs skip this model + analysable + timesplittingmethod combination
// it will attempt it again during next cron run.
if (!$this->lock = $lockfactory->get_lock($lockkey, 10)) {
return false;
}
return true;
}
/**
@ -115,7 +118,7 @@ class dataset_manager {
'filearea' => self::get_filearea($this->includetarget),
'itemid' => $this->modelid,
'contextid' => \context_system::instance()->id,
'filepath' => '/analysable/' . $this->analysableid . '/' . self::convert_to_int($this->timesplittingid) . '/',
'filepath' => '/analysable/' . $this->analysableid . '/' . self::clean_time_splitting_id($this->timesplittingid) . '/',
'filename' => self::get_filename($this->evaluation)
];
@ -127,6 +130,10 @@ class dataset_manager {
// Write all this stuff to a tmp file.
$filepath = make_request_directory() . DIRECTORY_SEPARATOR . $filerecord['filename'];
$fh = fopen($filepath, 'w+');
if (!$fh) {
$this->close_process();
throw new \moodle_exception('errorcannotwritedataset', 'analytics', '', $tmpfilepath);
}
foreach ($data as $line) {
fputcsv($fh, $line);
}
@ -144,10 +151,6 @@ class dataset_manager {
$this->lock->release();
}
public function release_lock(\core\lock\lock $lock) {
$lock->release();
}
/**
* Returns the previous evaluation file.
*
@ -162,7 +165,7 @@ class dataset_manager {
$fs = get_file_storage();
// Evaluation data is always labelled.
return $fs->get_file(\context_system::instance()->id, 'analytics', self::LABELLED_FILEAREA, $modelid,
'/timesplitting/' . self::convert_to_int($timesplittingid) . '/', self::EVALUATION_FILENAME);
'/timesplitting/' . self::clean_time_splitting_id($timesplittingid) . '/', self::EVALUATION_FILENAME);
}
public static function delete_previous_evaluation_file($modelid, $timesplittingid) {
@ -183,7 +186,7 @@ class dataset_manager {
// Always evaluation.csv and labelled as it is an evaluation file.
$filearea = self::get_filearea(true);
$filename = self::get_filename(true);
$filepath = '/analysable/' . $analysableid . '/' . self::convert_to_int($timesplittingid) . '/';
$filepath = '/analysable/' . $analysableid . '/' . self::clean_time_splitting_id($timesplittingid) . '/';
return $fs->get_file(\context_system::instance()->id, 'analytics', $filearea, $modelid, $filepath, $filename);
}
@ -235,6 +238,9 @@ class dataset_manager {
// Start writing to the merge file.
$wh = fopen($tmpfilepath, 'w');
if (!$wh) {
throw new \moodle_exception('errorcannotwritedataset', 'analytics', '', $tmpfilepath);
}
fputcsv($wh, $varnames);
fputcsv($wh, $values);
@ -262,7 +268,7 @@ class dataset_manager {
'filearea' => self::get_filearea($includetarget),
'itemid' => $modelid,
'contextid' => \context_system::instance()->id,
'filepath' => '/timesplitting/' . self::convert_to_int($timesplittingid) . '/',
'filepath' => '/timesplitting/' . self::clean_time_splitting_id($timesplittingid) . '/',
'filename' => self::get_filename($evaluation)
];
@ -315,17 +321,14 @@ class dataset_manager {
}
/**
* I know it is not very orthodox...
* Remove all possibly problematic chars from the time splitting method id (id = its full class name).
*
* @param string $string
* @return int
* @param string $timesplittingid
* @return string
*/
protected static function convert_to_int($string) {
$sum = 0;
for ($i = 0; $i < strlen($string); $i++) {
$sum += ord($string[$i]);
}
return $sum;
protected static function clean_time_splitting_id($timesplittingid) {
$timesplittingid = str_replace('\\', '-', $timesplittingid);
return clean_param($timesplittingid, PARAM_ALPHANUMEXT);
}
protected static function get_filename($evaluation) {

View file

@ -1,95 +0,0 @@
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Prediction action clicked event.
*
* @property-read array $other {
* Extra information about event.
*
* - string actionname: The action name
* }
*
* @package core_analytics
* @copyright 2017 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
namespace core_analytics\event;
defined('MOODLE_INTERNAL') || die();
/**
* Event triggered after a user clicked on one of the prediction suggested actions.
*
* @package core_analytics
* @copyright 2017 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class action_clicked extends \core\event\base {
/**
* Set basic properties for the event.
*/
protected function init() {
$this->data['objecttable'] = 'analytics_predictions';
$this->data['crud'] = 'r';
$this->data['edulevel'] = self::LEVEL_TEACHING;
}
/**
* Returns localised general event name.
*
* @return string
*/
public static function get_name() {
return get_string('eventactionclicked', 'analytics');
}
/**
* Returns non-localised event description with id's for admin use only.
*
* @return string
*/
public function get_description() {
return "The user with id '$this->userid' has clicked '{$this->other['actionname']}' action for the prediction with id '".$this->objectid."'.";
}
/**
* Returns relevant URL.
* @return \moodle_url
*/
public function get_url() {
return new \moodle_url('/report/insights/prediction.php', array('id' => $this->objectid));
}
/**
* Custom validations.
*
* @throws \coding_exception
* @return void
*/
protected function validate_data() {
parent::validate_data();
if (!isset($this->objectid)) {
throw new \coding_exception('The \'objectid\' must be set.');
}
}
public static function get_objectid_mapping() {
return array('db' => 'analytics_predictions');
}
}

View file

@ -192,11 +192,11 @@ abstract class base {
// Target instances scope is per-analysable (it can't be lower as calculations run once per
// analysable, not time splitting method nor time range).
$target = forward_static_call(array($this->target, 'instance'));
$target = call_user_func(array($this->target, 'instance'));
// We need to check that the analysable is valid for the target even if we don't include targets
// as we still need to discard invalid analysables for the target.
$result = $target->is_valid_analysable($analysable, $includetarget);
$result = $target->is_valid_analysable($analysable, $includetarget, true);
if ($result !== true) {
$a = new \stdClass();
$a->analysableid = $analysable->get_id();
@ -217,6 +217,7 @@ abstract class base {
$previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
$analysable->get_id(), $timesplitting->get_id());
// 1 week is a partly random time interval, no need to worry about DST.
$boundary = time() - WEEKSECS;
if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
// Recover the previous analysed file and avoid generating a new one.
@ -344,18 +345,37 @@ abstract class base {
$this->options['evaluation'], !empty($target));
// Flag the model + analysable + timesplitting as being analysed (prevent concurrent executions).
$dataset->init_process();
if (!$dataset->init_process()) {
// If this model + analysable + timesplitting combination is being analysed we skip this process.
$result->status = \core_analytics\model::NO_DATASET;
$result->message = get_string('analysisinprogress', 'analytics');
return $result;
}
// Remove samples the target consider invalid. Note that we use $this->target, $target will be false
// during prediction, but we still need to discard samples the target considers invalid.
$this->target->add_sample_data($samplesdata);
$this->target->filter_out_invalid_samples($sampleids, $analysable, $target);
if (!$sampleids) {
$result->status = \core_analytics\model::NO_DATASET;
$result->message = get_string('novalidsamples', 'analytics');
$dataset->close_process();
return $result;
}
foreach ($this->indicators as $key => $indicator) {
// The analyser attaches the main entities the sample depends on and are provided to the
// indicator to calculate the sample.
$this->indicators[$key]->add_sample_data($samplesdata);
}
// Provide samples to the target instance (different than $this->target) $target is the new instance we get
// for each analysis in progress.
if ($target) {
// Also provided to the target.
$target->add_sample_data($samplesdata);
}
// Here we start the memory intensive process that will last until $data var is
// unset (until the method is finished basically).
$data = $timesplitting->calculate($sampleids, $this->get_samples_origin(), $this->indicators, $ranges, $target);
@ -363,6 +383,7 @@ abstract class base {
if (!$data) {
$result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
$result->message = get_string('novaliddata', 'analytics');
$dataset->close_process();
return $result;
}

View file

@ -128,6 +128,11 @@ abstract class community_of_inquiry_activity extends linear {
}
protected function any_feedback($action, \cm_info $cm, $contextid, $user) {
if (!in_array($action, 'submitted', 'replied', 'viewed')) {
throw new \coding_exception('Provided action "' . $action . '" is not valid.');
}
if (empty($this->activitylogs[$contextid])) {
return false;
}

View file

@ -44,10 +44,7 @@ class user_track_forums extends binary {
}
protected function calculate_sample($sampleid, $samplesorigin, $starttime = false, $endtime = false) {
$user = $this->retrieve('user', $sampleid);
// TODO Return null if forums tracking is the default.
return ($user->trackforums) ? self::get_max_value() : self::get_min_value();
}
}

View file

@ -45,7 +45,7 @@ abstract class base extends \core_analytics\calculable {
/**
* Returns the analyser class that should be used along with this target.
*
* @return string
* @return string The full class name as a string
*/
abstract public function get_analyser_class();
@ -62,20 +62,21 @@ abstract class base extends \core_analytics\calculable {
abstract public function is_valid_analysable(\core_analytics\analysable $analysable, $fortraining = true);
/**
* is_valid_sample
* Is this sample from the $analysable valid?
*
* @param int $sampleid
* @param \core_analytics\analysable $analysable
* @return void
* @param bool $fortraining
* @return bool
*/
abstract public function is_valid_sample($sampleid, \core_analytics\analysable $analysable);
abstract public function is_valid_sample($sampleid, \core_analytics\analysable $analysable, $fortraining = true);
/**
* Calculates this target for the provided samples.
*
* In case there are no values to return or the provided sample is not applicable just return null.
*
* @param int $sample
* @param int $sampleid
* @param \core_analytics\analysable $analysable
* @param int|false $starttime Limit calculations to start time
* @param int|false $endtime Limit calculations to end time
@ -103,36 +104,52 @@ abstract class base extends \core_analytics\calculable {
return false;
}
public function prediction_actions(\core_analytics\prediction $prediction) {
global $PAGE;
/**
* Suggested actions for a user.
*
* @param \core_analytics\prediction $prediction
* @param bool $includedetailsaction
* @return \core_analytics\prediction_action[]
*/
public function prediction_actions(\core_analytics\prediction $prediction, $includedetailsaction = false) {
$actions = array();
$predictionurl = new \moodle_url('/report/insights/prediction.php',
array('id' => $prediction->get_prediction_data()->id));
if ($predictionurl->compare($PAGE->url)) {
// We don't show the link to prediction.php if we are already in prediction.php
// prediction.php's $PAGE->set_url call is prior to any core_analytics namespace method call.
return array();
if ($includedetailsaction) {
$predictionurl = new \moodle_url('/report/insights/prediction.php',
array('id' => $prediction->get_prediction_data()->id));
$actions['predictiondetails'] = new \core_analytics\prediction_action('predictiondetails', $prediction,
$predictionurl, new \pix_icon('t/preview', get_string('viewprediction', 'analytics')),
get_string('viewprediction', 'analytics'));
}
return array('predictiondetails' => new \core_analytics\prediction_action('predictiondetails', $prediction, $predictionurl,
new \pix_icon('t/preview', get_string('viewprediction', 'analytics')),
get_string('viewprediction', 'analytics'))
);
return $actions;
}
/**
* Callback to execute once a prediction has been returned from the predictions processor.
*
* @param int $modelid
* @param int $sampleid
* @param int $rangeindex
* @param \context $samplecontext
* @param float|int $prediction
* @param float $predictionscore
* @return void
*/
public function prediction_callback($modelid, $sampleid, $samplecontext, $prediction, $predictionscore) {
public function prediction_callback($modelid, $sampleid, $rangeindex, \context $samplecontext, $prediction, $predictionscore) {
return;
}
public function generate_insights($modelid, $samplecontexts) {
/**
* Generates insights notifications
*
* @param int $modelid
* @param \context[] $samplecontexts
* @return void
*/
public function generate_insight_notifications($modelid, $samplecontexts) {
global $CFG;
foreach ($samplecontexts as $context) {
@ -142,12 +159,7 @@ abstract class base extends \core_analytics\calculable {
$insightinfo->contextname = $context->get_context_name();
$subject = get_string('insightmessagesubject', 'analytics', $insightinfo);
if ($context->contextlevel >= CONTEXT_COURSE) {
// Course level notification.
$users = get_enrolled_users($context, 'moodle/analytics:listinsights');
} else {
$users = get_users_by_capability($context, 'moodle/analytics:listinsights');
}
$users = $this->get_insights_users($context);
if (!$coursecontext = $context->get_course_context(false)) {
$coursecontext = \context_course::instance(SITEID);
@ -181,6 +193,33 @@ abstract class base extends \core_analytics\calculable {
}
/**
* Returns the list of users that will receive insights notifications.
*
* Feel free to overwrite if you need to but keep in mind that moodle/analytics:listinsights
* capability is required to access the list of insights.
*
* @param \context $context
* @return array
*/
protected function get_insights_users(\context $context) {
if ($context->contextlevel >= CONTEXT_COURSE) {
// At course level or below only enrolled users although this is not ideal for
// teachers assigned at category level.
$users = get_enrolled_users($context, 'moodle/analytics:listinsights');
} else {
$users = get_users_by_capability($context, 'moodle/analytics:listinsights');
}
return $users;
}
/**
* Returns an instance of the child class.
*
* Useful to reset cached data.
*
* @return \core_analytics\base\target
*/
public static function instance() {
return new static();
}
@ -200,7 +239,8 @@ abstract class base extends \core_analytics\calculable {
/**
* Should the model callback be triggered?
*
* @param mixed $class
* @param mixed $predictedvalue
* @param float $predictedscore
* @return bool
*/
public function triggers_callback($predictedvalue, $predictionscore) {
@ -235,11 +275,11 @@ abstract class base extends \core_analytics\calculable {
*
* @param array $sampleids
* @param \core_analytics\analysable $analysable
* @param integer $starttime startime is not necessary when calculating targets
* @param integer $endtime endtime is not necessary when calculating targets
* @param int $starttime
* @param int $endtime
* @return array The format to follow is [userid] = scalar|null
*/
public function calculate(&$sampleids, \core_analytics\analysable $analysable) {
public function calculate($sampleids, \core_analytics\analysable $analysable, $starttime = false, $endtime = false) {
if (!PHPUNIT_TEST && CLI_SCRIPT) {
echo '.';
@ -248,14 +288,8 @@ abstract class base extends \core_analytics\calculable {
$calculations = [];
foreach ($sampleids as $sampleid => $unusedsampleid) {
if (!$this->is_valid_sample($sampleid, $analysable)) {
// Skip it and remove the sample from the list of calculated samples.
unset($sampleids[$sampleid]);
continue;
}
// No time limits when calculating the target to train models.
$calculatedvalue = $this->calculate_sample($sampleid, $analysable, false, false);
$calculatedvalue = $this->calculate_sample($sampleid, $analysable, $starttime, $endtime);
if (!is_null($calculatedvalue)) {
if ($this->is_linear() && ($calculatedvalue > static::get_max_value() || $calculatedvalue < static::get_min_value())) {
@ -270,4 +304,20 @@ abstract class base extends \core_analytics\calculable {
}
return $calculations;
}
/**
* Filters out invalid samples for training.
*
* @param int[] $sampleids
* @param \core_analytics\analysable $analysable
* @return void
*/
public function filter_out_invalid_samples(&$sampleids, \core_analytics\analysable $analysable, $fortraining = true) {
foreach ($sampleids as $sampleid => $unusedsampleid) {
if (!$this->is_valid_sample($sampleid, $analysable, $fortraining)) {
// Skip it and remove the sample from the list of calculated samples.
unset($sampleids[$sampleid]);
}
}
}
}

View file

@ -302,7 +302,7 @@ abstract class base {
}
protected function get_headers($indicators, $target = false) {
// 3th column will contain the indicator ids.
// 3rd column will contain the indicator ids.
$headers = array();
if (!$target) {

View file

@ -46,6 +46,9 @@ class deciles extends base {
for ($i = 0; $i < 10; $i++) {
$start = $this->analysable->get_start() + ($rangeduration * $i);
$end = $this->analysable->get_start() + ($rangeduration * ($i + 1));
if ($i === 9) {
$end = $this->analysable->get_end();
}
$ranges[] = array(
'start' => $start,
'end' => $end,

View file

@ -45,6 +45,9 @@ class deciles_accum extends base {
$ranges = array();
for ($i = 0; $i < 10; $i++) {
$end = $this->analysable->get_start() + ($rangeduration * ($i + 1));
if ($i === 9) {
$end = $this->analysable->get_end();
}
$ranges[] = array(
'start' => $this->analysable->get_start(),
'end' => $end,

View file

@ -56,8 +56,8 @@ class quarters extends base {
'time' => $this->analysable->get_start() + ($duration * 3)
], [
'start' => $this->analysable->get_start() + ($duration * 3),
'end' => $this->analysable->get_start() + ($duration * 4),
'time' => $this->analysable->get_start() + ($duration * 4)
'end' => $this->analysable->get_end(),
'time' => $this->analysable->get_end()
]
];
}

View file

@ -56,8 +56,8 @@ class quarters_accum extends base {
'time' => $this->analysable->get_start() + ($duration * 3)
], [
'start' => $this->analysable->get_start(),
'end' => $this->analysable->get_start() + ($duration * 4),
'time' => $this->analysable->get_start() + ($duration * 4)
'end' => $this->analysable->get_end(),
'time' => $this->analysable->get_end()
]
];
}

View file

@ -50,6 +50,27 @@ class manager {
*/
protected static $alltimesplittings = null;
/**
* Checks that the user can manage models
*
* @throws \required_capability_exception
* @return void
*/
public static function check_can_manage_models() {
require_capability('moodle/analytics:managemodels', \context_system::instance());
}
/**
* Checks that the user can list that context insights
*
* @throws \required_capability_exception
* @param \context $context
* @return void
*/
public static function check_can_list_insights(\context $context) {
require_capability('moodle/analytics:listinsights', $context);
}
/**
* Returns all system models that match the provided filters.
*
@ -61,21 +82,31 @@ class manager {
public static function get_all_models($enabled = false, $trained = false, $predictioncontext = false) {
global $DB;
$filters = array();
if ($enabled) {
$filters['enabled'] = 1;
$params = array();
$sql = "SELECT DISTINCT am.* FROM {analytics_models} am";
if ($predictioncontext) {
$sql .= " JOIN {analytics_predictions} ap ON ap.modelid = am.id AND ap.contextid = :contextid";
$params['contextid'] = $predictioncontext->id;
}
if ($trained) {
$filters['trained'] = 1;
if ($enabled || $trained) {
$conditions = [];
if ($enabled) {
$conditions[] = 'am.enabled = :enabled';
$params['enabled'] = 1;
}
if ($trained) {
$conditions[] = 'am.trained = :trained';
$params['trained'] = 1;
}
$sql .= ' WHERE ' . implode(' AND ', $conditions);
}
$modelobjs = $DB->get_records('analytics_models', $filters);
$modelobjs = $DB->get_records_sql($sql, $params);
$models = array();
foreach ($modelobjs as $modelobj) {
$model = new \core_analytics\model($modelobj);
if (!$predictioncontext || $model->predictions_exist($predictioncontext)) {
$models[$modelobj->id] = $model;
}
$models[$modelobj->id] = new \core_analytics\model($modelobj);
}
return $models;
}
@ -126,6 +157,11 @@ class manager {
return self::$predictionprocessors[$checkisready][$predictionclass];
}
/**
* Return all system predictions processors.
*
* @return \core_analytics\predictor
*/
public static function get_all_prediction_processors() {
$mlbackends = \core_component::get_plugin_list('mlbackend');
@ -221,6 +257,12 @@ class manager {
return self::$allindicators;
}
/**
* Returns the specified target
*
* @param mixed $fullclassname
* @return \core_analytics\local\target\base|false False if it is not valid
*/
public static function get_target($fullclassname) {
if (!self::is_valid($fullclassname, 'core_analytics\local\target\base')) {
return false;
@ -245,6 +287,7 @@ class manager {
* Returns whether a time splitting method is valid or not.
*
* @param string $fullclassname
* @param string $baseclass
* @return bool
*/
public static function is_valid($fullclassname, $baseclass) {
@ -257,7 +300,7 @@ class manager {
}
/**
* get_analytics_logstore
* Returns the logstore used for analytics.
*
* @return \core\log\sql_reader
*/
@ -282,6 +325,56 @@ class manager {
return $logstore;
}
/**
* Returns the models with insights at the provided context.
*
* @param \context $context
* @return \core_analytics\model[]
*/
public static function get_models_with_insights(\context $context) {
self::check_can_list_insights($context);
$models = \core_analytics\manager::get_all_models(true, true, $context);
foreach ($models as $key => $model) {
// Check that it not only have predictions but also generates insights from them.
if (!$model->uses_insights()) {
unset($models[$key]);
}
}
return $models;
}
/**
* Returns a prediction
*
* @param int $predictionid
* @param bool $requirelogin
* @return array array($model, $prediction, $context)
*/
public static function get_prediction($predictionid, $requirelogin = false) {
global $DB;
if (!$predictionobj = $DB->get_record('analytics_predictions', array('id' => $predictionid))) {
throw new \moodle_exception('errorpredictionnotfound', 'report_insights');
}
if ($requirelogin) {
list($context, $course, $cm) = get_context_info_array($predictionobj->contextid);
require_login($course, false, $cm);
} else {
$context = \context::instance_by_id($predictionobj->contextid);
}
\core_analytics\manager::check_can_list_insights($context);
$model = new \core_analytics\model($predictionobj->modelid);
$sampledata = $model->prediction_sample_data($predictionobj);
$prediction = new \core_analytics\prediction($predictionobj, $sampledata);
return array($model, $prediction, $context);
}
/**
* Returns the provided element classes in the site.
*
@ -291,7 +384,7 @@ class manager {
private static function get_analytics_classes($element) {
// Just in case...
$element = clean_param($element, PARAM_ALPHAEXT);
$element = clean_param($element, PARAM_ALPHANUMEXT);
$classes = \core_component::get_component_classes_in_namespace('core_analytics', 'local\\' . $element);
foreach (\core_component::get_plugin_types() as $type => $unusedplugintypepath) {

View file

@ -42,7 +42,6 @@ class model {
const EVALUATE_LOW_SCORE = 4;
const EVALUATE_NOT_ENOUGH_DATA = 8;
const ANALYSE_INPROGRESS = 2;
const ANALYSE_REJECTED_RANGE_PROCESSOR = 4;
const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
@ -88,7 +87,7 @@ class model {
global $DB;
if (is_scalar($model)) {
$model = $DB->get_record('analytics_models', array('id' => $model));
$model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
if (!$model) {
throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
}
@ -266,6 +265,8 @@ class model {
public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
global $USER, $DB;
\core_analytics\manager::check_can_manage_models();
$indicatorclasses = self::indicator_classes($indicators);
$now = time();
@ -307,6 +308,8 @@ class model {
public function update($enabled, $indicators, $timesplittingid = '') {
global $USER, $DB;
\core_analytics\manager::check_can_manage_models();
$now = time();
$indicatorclasses = self::indicator_classes($indicators);
@ -345,6 +348,9 @@ class model {
*/
public function delete() {
global $DB;
\core_analytics\manager::check_can_manage_models();
$this->clear_model();
$DB->delete_records('analytics_models', array('id' => $this->model->id));
}
@ -359,6 +365,8 @@ class model {
*/
public function evaluate($options = array()) {
\core_analytics\manager::check_can_manage_models();
if ($this->is_static()) {
$this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
$result = new \stdClass();
@ -366,9 +374,6 @@ class model {
return $result;
}
// Increase memory limit.
$this->increase_memory();
$options['evaluation'] = true;
$this->init_analyser($options);
@ -376,6 +381,8 @@ class model {
throw new \moodle_exception('errornoindicators', 'analytics');
}
$this->heavy_duty_mode();
// Before get_labelled_data call so we get an early exception if it is not ready.
$predictor = \core_analytics\manager::get_predictions_processor();
@ -438,6 +445,8 @@ class model {
public function train() {
global $DB;
\core_analytics\manager::check_can_manage_models();
if ($this->is_static()) {
$this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
$result = new \stdClass();
@ -445,9 +454,6 @@ class model {
return $result;
}
// Increase memory limit.
$this->increase_memory();
if (!$this->is_enabled() || empty($this->model->timesplitting)) {
throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
}
@ -456,6 +462,8 @@ class model {
throw new \moodle_exception('errornoindicators', 'analytics');
}
$this->heavy_duty_mode();
// Before get_labelled_data call so we get an early exception if it is not writable.
$outputdir = $this->get_output_dir(array('execution'));
@ -499,8 +507,7 @@ class model {
public function predict() {
global $DB;
// Increase memory limit.
$this->increase_memory();
\core_analytics\manager::check_can_manage_models();
if (!$this->is_enabled() || empty($this->model->timesplitting)) {
throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
@ -510,6 +517,8 @@ class model {
throw new \moodle_exception('errornoindicators', 'analytics');
}
$this->heavy_duty_mode();
// Before get_unlabelled_data call so we get an early exception if it is not writable.
$outputdir = $this->get_output_dir(array('execution'));
@ -548,74 +557,19 @@ class model {
$result->predictions = $this->get_static_predictions($indicatorcalculations);
} else {
// Defer the prediction to the machine learning backend.
// Prediction process runs on the machine learning backend.
$predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
$result->status = $predictorresult->status;
$result->info = $predictorresult->info;
$result->predictions = array();
if ($predictorresult->predictions) {
foreach ($predictorresult->predictions as $sampleinfo) {
// We parse each prediction
switch (count($sampleinfo)) {
case 1:
// For whatever reason the predictions processor could not process this sample, we
// skip it and do nothing with it.
debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
$sampleinfo[0], DEBUG_DEVELOPER);
continue;
case 2:
// Prediction processors that do not return a prediction score will have the maximum prediction
// score.
list($uniquesampleid, $prediction) = $sampleinfo;
$predictionscore = 1;
break;
case 3:
list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
break;
default:
break;
}
$predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
$result->predictions[$uniquesampleid] = $predictiondata;
}
}
$result->predictions = $this->format_predictor_predictions($predictorresult);
}
// Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
$samplecontexts = array();
if ($result->predictions) {
foreach ($result->predictions as $uniquesampleid => $prediction) {
if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
// The unique sample id contains both the sampleid and the rangeindex.
list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
// Store the predicted values.
$samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction, $prediction->predictionscore,
json_encode($indicatorcalculations[$uniquesampleid]));
// Also store all samples context to later generate insights or whatever action the target wants to perform.
$samplecontexts[$samplecontext->id] = $samplecontext;
$this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
$prediction->prediction, $prediction->predictionscore);
}
}
$samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
}
if (!empty($samplecontexts)) {
// Notify the target that all predictions have been processed.
$this->get_target()->generate_insights($this->model->id, $samplecontexts);
// Aggressive invalidation, the cost of filling up the cache is not high.
$cache = \cache::make('core', 'modelswithpredictions');
foreach ($samplecontexts as $context) {
$cache->delete($context->id);
}
if (!empty($samplecontexts) && $this->uses_insights()) {
$this->trigger_insights($samplecontexts);
}
$this->flag_file_as_used($samplesfile, 'predicted');
@ -624,7 +578,108 @@ class model {
}
/**
* get_static_predictions
* Formats the predictor results.
*
* @param array $predictorresult
* @return array
*/
private function format_predictor_predictions($predictorresult) {
$predictions = array();
if ($predictorresult->predictions) {
foreach ($predictorresult->predictions as $sampleinfo) {
// We parse each prediction
switch (count($sampleinfo)) {
case 1:
// For whatever reason the predictions processor could not process this sample, we
// skip it and do nothing with it.
debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
$sampleinfo[0], DEBUG_DEVELOPER);
continue;
case 2:
// Prediction processors that do not return a prediction score will have the maximum prediction
// score.
list($uniquesampleid, $prediction) = $sampleinfo;
$predictionscore = 1;
break;
case 3:
list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
break;
default:
break;
}
$predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
$predictions[$uniquesampleid] = $predictiondata;
}
}
return $predictions;
}
/**
* Execute the prediction callbacks defined by the target.
*
* @param \stdClass[] $predictions
* @param array $predictions
* @return array
*/
protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
// Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
$samplecontexts = array();
foreach ($predictions as $uniquesampleid => $prediction) {
if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
// The unique sample id contains both the sampleid and the rangeindex.
list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
// Store the predicted values.
$samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction, $prediction->predictionscore,
json_encode($indicatorcalculations[$uniquesampleid]));
// Also store all samples context to later generate insights or whatever action the target wants to perform.
$samplecontexts[$samplecontext->id] = $samplecontext;
$this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
$prediction->prediction, $prediction->predictionscore);
}
}
return $samplecontexts;
}
/**
* Generates insights and updates the cache.
*
* @param \context[] $samplecontexts
* @return void
*/
protected function trigger_insights($samplecontexts) {
// Notify the target that all predictions have been processed.
$this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
// Update cache.
$cache = \cache::make('core', 'contextwithinsights');
foreach ($samplecontexts as $context) {
$modelids = $cache->get($context->id);
if (!$modelids) {
// The cache is empty, but we don't know if it is empty because there are no insights
// in this context or because cache/s have been purged, we need to be conservative and
// "pay" 1 db read to fill up the cache.
$models = \core_analytics\manager::get_models_with_insights($context);
$cache->set($context->id, array_keys($models));
} else if (!in_array($this->get_id(), $modelids)) {
array_push($modelids, $this->get_id());
$cache->set($context->id, $modelids);
}
}
}
/**
* Get predictions from a static model.
*
* @param array $indicatorcalculations
* @return \stdClass[]
@ -673,8 +728,9 @@ class model {
$this->get_target()->add_sample_data($samplesdata);
$this->get_target()->add_sample_data($data->indicatorsdata);
// Append new elements (we can not get duplicated because sample-analysable relation is N-1).
// Append new elements (we can not get duplicates because sample-analysable relation is N-1).
$range = $this->get_time_splitting()->get_range_by_index($rangeindex);
$this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
$calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
// Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
@ -683,7 +739,6 @@ class model {
$indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
if (!isset($calculations[$sampleid])) {
debugging($uniquesampleid . ' discarded by is_valid_sample');
return false;
}
return true;
@ -695,7 +750,6 @@ class model {
// Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
if (is_null($calculations[$sampleid])) {
debugging($uniquesampleid . ' discarded by is_valid_sample');
unset($indicatorcalculations[$uniquesampleid]);
continue;
}
@ -747,6 +801,8 @@ class model {
public function enable($timesplittingid = false) {
global $DB;
\core_analytics\manager::check_can_manage_models();
$now = time();
if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
@ -808,6 +864,8 @@ class model {
public function mark_as_trained() {
global $DB;
\core_analytics\manager::check_can_manage_models();
$this->model->trained = 1;
$DB->update_record('analytics_models', $this->model);
}
@ -873,6 +931,8 @@ class model {
public function get_predictions(\context $context) {
global $DB;
\core_analytics\manager::check_can_list_insights($context);
// Filters out previous predictions keeping only the last time range one.
$sql = "SELECT tip.*
FROM {analytics_predictions} tip
@ -917,7 +977,7 @@ class model {
}
/**
* prediction_sample_data
* Returns the sample data of a prediction.
*
* @param \stdClass $predictionobj
* @return array
@ -934,7 +994,7 @@ class model {
}
/**
* prediction_sample_description
* Returns the description of a sample
*
* @param \core_analytics\prediction $prediction
* @return array 2 elements: list(string, \renderable)
@ -1004,6 +1064,9 @@ class model {
* @return \stdClass
*/
public function export() {
\core_analytics\manager::check_can_manage_models();
$data = clone $this->model;
$data->target = $this->get_target()->get_name();
@ -1027,6 +1090,9 @@ class model {
*/
public function get_logs($limitfrom = 0, $limitnum = 0) {
global $DB;
\core_analytics\manager::check_can_manage_models();
return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
$limitfrom, $limitnum);
}
@ -1120,14 +1186,21 @@ class model {
$DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
$DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
$cache = \cache::make('core', 'modelswithpredictions');
// We don't expect people to clear models regularly and the cost of filling the cache is
// 1 db read per context.
$cache = \cache::make('core', 'contextwithinsights');
$result = $cache->purge();
}
private function increase_memory() {
/**
* Increases system memory and time limits.
*
* @return void
*/
private function heavy_duty_mode() {
if (ini_get('memory_limit') != -1) {
raise_memory_limit(MEMORY_HUGE);
}
\core_php_time_limit::raise();
}
}

View file

@ -36,13 +36,16 @@ class test_target_shortname extends \core_analytics\local\target\binary {
return true;
}
public function is_valid_sample($sampleid, \core_analytics\analysable $analysable) {
public function is_valid_sample($sampleid, \core_analytics\analysable $analysable, $fortraining = true) {
// We skip not-visible courses during training as a way to emulate the training data / prediction data difference.
// In normal circumstances is_valid_sample will return false when they receive a sample that can not be
// processed.
if (!$fortraining) {
return true;
}
$sample = $this->retrieve('course', $sampleid);
if ($sample->visible == 0) {
// We skip not-visible courses as a way to emulate the training data / prediction data difference.
// In normal circumstances is_valid_sample will return false when they receive a sample that can not be
// processed.
return false;
}
return true;

View file

@ -40,6 +40,8 @@ class analytics_model_testcase extends advanced_testcase {
public function setUp() {
$this->setAdminUser();
$target = \core_analytics\manager::get_target('test_target_shortname');
$indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
foreach ($indicators as $key => $indicator) {