mirror of
https://github.com/moodle/moodle.git
synced 2025-08-05 00:46:50 +02:00

- Split model::predict in parts - JS promises updated according to eslint-plugin-promise - New API methods replacing direct DB queries - Reduce insights nav link display cost - Increase time limit as well as memory for big processes - Move prediction action event to core - Dataset write locking and others - Refine last time range end time - Removed dodgy splitting method id to int - Replace admin_setting_predictor output_html overwrite for write_setting overwrite - New APIs for access control - Discard invalid samples also during prediction
357 lines
12 KiB
PHP
357 lines
12 KiB
PHP
<?php
|
|
// This file is part of Moodle - http://moodle.org/
|
|
//
|
|
// Moodle is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// Moodle is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
/**
|
|
*
|
|
* @package core_analytics
|
|
* @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
|
|
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
*/
|
|
|
|
namespace core_analytics;
|
|
|
|
defined('MOODLE_INTERNAL') || die();
|
|
|
|
/**
|
|
*
|
|
* @package core_analytics
|
|
* @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
|
|
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
*/
|
|
class dataset_manager {
|
|
|
|
const LABELLED_FILEAREA = 'labelled';
|
|
const UNLABELLED_FILEAREA = 'unlabelled';
|
|
const EVALUATION_FILENAME = 'evaluation.csv';
|
|
|
|
/**
|
|
* The model id.
|
|
*
|
|
* @var int
|
|
*/
|
|
protected $modelid;
|
|
|
|
/**
|
|
* Range processor in use.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $timesplittingid;
|
|
|
|
/**
|
|
* @var int
|
|
*/
|
|
protected $analysableid;
|
|
|
|
/**
|
|
* Whether this is a dataset for evaluation or not.
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $evaluation;
|
|
|
|
/**
|
|
* Labelled (true) or unlabelled data (false).
|
|
*
|
|
* @var bool
|
|
*/
|
|
protected $includetarget;
|
|
|
|
/**
|
|
* Simple constructor.
|
|
*
|
|
* @return void
|
|
*/
|
|
public function __construct($modelid, $analysableid, $timesplittingid, $evaluation = false, $includetarget = false) {
|
|
$this->modelid = $modelid;
|
|
$this->analysableid = $analysableid;
|
|
$this->timesplittingid = $timesplittingid;
|
|
$this->evaluation = $evaluation;
|
|
$this->includetarget = $includetarget;
|
|
}
|
|
|
|
/**
|
|
* Mark the analysable as being analysed.
|
|
*
|
|
* @return bool Could we get the lock or not.
|
|
*/
|
|
public function init_process() {
|
|
$lockkey = 'modelid:' . $this->modelid . '-analysableid:' . $this->analysableid .
|
|
'-timesplitting:' . self::clean_time_splitting_id($this->timesplittingid) . '-includetarget:' . (int)$this->includetarget;
|
|
|
|
// Large timeout as processes may be quite long.
|
|
$lockfactory = \core\lock\lock_config::get_lock_factory('core_analytics');
|
|
|
|
// If it is not ready in 10 secs skip this model + analysable + timesplittingmethod combination
|
|
// it will attempt it again during next cron run.
|
|
if (!$this->lock = $lockfactory->get_lock($lockkey, 10)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Store the dataset in the internal file system.
|
|
*
|
|
* @param array $data
|
|
* @return \stored_file
|
|
*/
|
|
public function store($data) {
|
|
|
|
// Delete previous file if it exists.
|
|
$fs = get_file_storage();
|
|
$filerecord = [
|
|
'component' => 'analytics',
|
|
'filearea' => self::get_filearea($this->includetarget),
|
|
'itemid' => $this->modelid,
|
|
'contextid' => \context_system::instance()->id,
|
|
'filepath' => '/analysable/' . $this->analysableid . '/' . self::clean_time_splitting_id($this->timesplittingid) . '/',
|
|
'filename' => self::get_filename($this->evaluation)
|
|
];
|
|
|
|
// Delete previous and old (we already checked that previous copies are not recent) evaluation files for this analysable.
|
|
$select = " = {$filerecord['itemid']} AND filepath = :filepath";
|
|
$fs->delete_area_files_select($filerecord['contextid'], $filerecord['component'], $filerecord['filearea'],
|
|
$select, array('filepath' => $filerecord['filepath']));
|
|
|
|
// Write all this stuff to a tmp file.
|
|
$filepath = make_request_directory() . DIRECTORY_SEPARATOR . $filerecord['filename'];
|
|
$fh = fopen($filepath, 'w+');
|
|
if (!$fh) {
|
|
$this->close_process();
|
|
throw new \moodle_exception('errorcannotwritedataset', 'analytics', '', $tmpfilepath);
|
|
}
|
|
foreach ($data as $line) {
|
|
fputcsv($fh, $line);
|
|
}
|
|
fclose($fh);
|
|
|
|
return $fs->create_file_from_pathname($filerecord, $filepath);
|
|
}
|
|
|
|
/**
|
|
* Mark as analysed.
|
|
*
|
|
* @return void
|
|
*/
|
|
public function close_process() {
|
|
$this->lock->release();
|
|
}
|
|
|
|
/**
|
|
* Returns the previous evaluation file.
|
|
*
|
|
* Important to note that this is per modelid + timesplittingid, when dealing with multiple
|
|
* analysables this is the merged file. Do not confuse with self::get_evaluation_analysable_file
|
|
*
|
|
* @param int $modelid
|
|
* @param string $timesplittingid
|
|
* @return \stored_file
|
|
*/
|
|
public static function get_previous_evaluation_file($modelid, $timesplittingid) {
|
|
$fs = get_file_storage();
|
|
// Evaluation data is always labelled.
|
|
return $fs->get_file(\context_system::instance()->id, 'analytics', self::LABELLED_FILEAREA, $modelid,
|
|
'/timesplitting/' . self::clean_time_splitting_id($timesplittingid) . '/', self::EVALUATION_FILENAME);
|
|
}
|
|
|
|
public static function delete_previous_evaluation_file($modelid, $timesplittingid) {
|
|
$fs = get_file_storage();
|
|
if ($file = self::get_previous_evaluation_file($modelid, $timesplittingid)) {
|
|
$file->delete();
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
public static function get_evaluation_analysable_file($modelid, $analysableid, $timesplittingid) {
|
|
|
|
// Delete previous file if it exists.
|
|
$fs = get_file_storage();
|
|
|
|
// Always evaluation.csv and labelled as it is an evaluation file.
|
|
$filearea = self::get_filearea(true);
|
|
$filename = self::get_filename(true);
|
|
$filepath = '/analysable/' . $analysableid . '/' . self::clean_time_splitting_id($timesplittingid) . '/';
|
|
return $fs->get_file(\context_system::instance()->id, 'analytics', $filearea, $modelid, $filepath, $filename);
|
|
}
|
|
|
|
/**
|
|
* Merge multiple files into one.
|
|
*
|
|
* Important! It is the caller responsability to ensure that the datasets are compatible.
|
|
*
|
|
* @param array $files
|
|
* @param string $filename
|
|
* @param int $modelid
|
|
* @param string $timesplittingid
|
|
* @param bool $evaluation
|
|
* @param bool $includetarget
|
|
* @return \stored_file
|
|
*/
|
|
public static function merge_datasets(array $files, $modelid, $timesplittingid, $evaluation, $includetarget) {
|
|
|
|
$tmpfilepath = make_request_directory() . DIRECTORY_SEPARATOR . 'tmpfile.csv';
|
|
|
|
// Add headers.
|
|
// We could also do this with a single iteration gathering all files headers and appending them to the beginning of the file
|
|
// once all file contents are merged.
|
|
$varnames = '';
|
|
$analysablesvalues = array();
|
|
foreach ($files as $file) {
|
|
$rh = $file->get_content_file_handle();
|
|
|
|
// Copy the var names as they are, all files should have the same var names.
|
|
$varnames = fgetcsv($rh);
|
|
|
|
$analysablesvalues[] = fgetcsv($rh);
|
|
|
|
// Copy the columns as they are, all files should have the same columns.
|
|
$columns = fgetcsv($rh);
|
|
}
|
|
|
|
// Merge analysable values skipping the ones that are the same in all analysables.
|
|
$values = array();
|
|
foreach ($analysablesvalues as $analysablevalues) {
|
|
foreach ($analysablevalues as $varkey => $value) {
|
|
// Sha1 to make it unique.
|
|
$values[$varkey][sha1($value)] = $value;
|
|
}
|
|
}
|
|
foreach ($values as $varkey => $varvalues) {
|
|
$values[$varkey] = implode('|', $varvalues);
|
|
}
|
|
|
|
// Start writing to the merge file.
|
|
$wh = fopen($tmpfilepath, 'w');
|
|
if (!$wh) {
|
|
throw new \moodle_exception('errorcannotwritedataset', 'analytics', '', $tmpfilepath);
|
|
}
|
|
|
|
fputcsv($wh, $varnames);
|
|
fputcsv($wh, $values);
|
|
fputcsv($wh, $columns);
|
|
|
|
// Iterate through all files and add them to the tmp one. We don't want file contents in memory.
|
|
foreach ($files as $file) {
|
|
$rh = $file->get_content_file_handle();
|
|
|
|
// Skip headers.
|
|
fgets($rh);
|
|
fgets($rh);
|
|
fgets($rh);
|
|
|
|
// Copy all the following lines.
|
|
while ($line = fgets($rh)) {
|
|
fwrite($wh, $line);
|
|
}
|
|
fclose($rh);
|
|
}
|
|
fclose($wh);
|
|
|
|
$filerecord = [
|
|
'component' => 'analytics',
|
|
'filearea' => self::get_filearea($includetarget),
|
|
'itemid' => $modelid,
|
|
'contextid' => \context_system::instance()->id,
|
|
'filepath' => '/timesplitting/' . self::clean_time_splitting_id($timesplittingid) . '/',
|
|
'filename' => self::get_filename($evaluation)
|
|
];
|
|
|
|
$fs = get_file_storage();
|
|
|
|
return $fs->create_file_from_pathname($filerecord, $tmpfilepath);
|
|
}
|
|
|
|
public static function get_structured_data(\stored_file $dataset) {
|
|
|
|
if ($dataset->get_filearea() !== 'unlabelled') {
|
|
throw new \coding_exception('Sorry, only support for unlabelled data');
|
|
}
|
|
|
|
$rh = $dataset->get_content_file_handle();
|
|
|
|
// Skip dataset info.
|
|
fgets($rh);
|
|
fgets($rh);
|
|
|
|
$calculations = array();
|
|
|
|
$headers = fgetcsv($rh);
|
|
// Get rid of the sampleid column name.
|
|
array_shift($headers);
|
|
|
|
while ($columns = fgetcsv($rh)) {
|
|
$uniquesampleid = array_shift($columns);
|
|
|
|
// Unfortunately fgetcsv does not respect line's var types.
|
|
$calculations[$uniquesampleid] = array_map(function($value) {
|
|
|
|
if ($value === '') {
|
|
// We really want them as null because converted to float become 0
|
|
// and we need to treat the values separately.
|
|
return null;
|
|
} else if (is_numeric($value)) {
|
|
return floatval($value);
|
|
}
|
|
return $value;
|
|
}, array_combine($headers, $columns));
|
|
}
|
|
|
|
return $calculations;
|
|
}
|
|
|
|
public static function clear_model_files($modelid) {
|
|
$fs = get_file_storage();
|
|
return $fs->delete_area_files(\context_system::instance()->id, 'analytics', false, $modelid);
|
|
}
|
|
|
|
/**
|
|
* Remove all possibly problematic chars from the time splitting method id (id = its full class name).
|
|
*
|
|
* @param string $timesplittingid
|
|
* @return string
|
|
*/
|
|
protected static function clean_time_splitting_id($timesplittingid) {
|
|
$timesplittingid = str_replace('\\', '-', $timesplittingid);
|
|
return clean_param($timesplittingid, PARAM_ALPHANUMEXT);
|
|
}
|
|
|
|
protected static function get_filename($evaluation) {
|
|
|
|
if ($evaluation === true) {
|
|
$filename = self::EVALUATION_FILENAME;
|
|
} else {
|
|
// Incremental time, the lock will make sure we don't have concurrency problems.
|
|
$filename = time() . '.csv';
|
|
}
|
|
|
|
return $filename;
|
|
}
|
|
|
|
protected static function get_filearea($includetarget) {
|
|
|
|
if ($includetarget === true) {
|
|
$filearea = self::LABELLED_FILEAREA;
|
|
} else {
|
|
$filearea = self::UNLABELLED_FILEAREA;
|
|
}
|
|
|
|
return $filearea;
|
|
}
|
|
|
|
}
|