MDL-59265 analytics: Rename machine learning backend method

- Method names renamed to avoid interface changes once
  we support regression and unsupervised learning
- Adding regressor interface even if not implemente
- predictor interface comments expanded
- Differentiate model's required accuracy from predictions quality
- Add missing get_callback_boundary call
- Updated datasets' metadata to allow 3rd parties to code
  regressors themselves
- Add missing option to exception message
- Include target data into the dataset regardless of being a prediction
  dataset or a training dataset
- Explicit in_array and array_search non-strict calls
- Overwrite discrete should_be_displayed implementation with the binary one
- Overwrite no_teacher get_display_value as it would otherwise look
  wrong
- Other minor fixes
This commit is contained in:
David Monllao 2017-08-14 10:59:03 +02:00
parent b8fe16cd7c
commit 5c5cb3ee15
15 changed files with 265 additions and 51 deletions

View file

@ -73,14 +73,14 @@ class processor implements \core_analytics\predictor {
}
/**
* Trains a machine learning algorithm with the provided training set.
* Train this processor classification model using the provided supervised learning dataset.
*
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function train($uniqueid, \stored_file $dataset, $outputdir) {
public function train_classification($uniqueid, \stored_file $dataset, $outputdir) {
// Output directory is already unique to the model.
$modelfilepath = $outputdir . DIRECTORY_SEPARATOR . self::MODEL_FILENAME;
@ -134,14 +134,14 @@ class processor implements \core_analytics\predictor {
}
/**
* Predicts the provided samples
* Classifies the provided dataset samples.
*
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function predict($uniqueid, \stored_file $dataset, $outputdir) {
public function classify($uniqueid, \stored_file $dataset, $outputdir) {
// Output directory is already unique to the model.
$modelfilepath = $outputdir . DIRECTORY_SEPARATOR . self::MODEL_FILENAME;
@ -199,7 +199,7 @@ class processor implements \core_analytics\predictor {
}
/**
* Evaluates the provided dataset.
* Evaluates this processor classification model using the provided supervised learning dataset.
*
* During evaluation we need to shuffle the evaluation dataset samples to detect deviated results,
* if the dataset is massive we can not load everything into memory. We know that 2GB is the
@ -216,7 +216,7 @@ class processor implements \core_analytics\predictor {
* @param string $outputdir
* @return \stdClass
*/
public function evaluate($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
$fh = $dataset->get_content_file_handle();
// The first lines are var names and the second one values.
@ -351,6 +351,47 @@ class processor implements \core_analytics\predictor {
return $resultobj;
}
/**
* Train this processor regression model using the provided supervised learning dataset.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function train_regression($uniqueid, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Estimates linear values for the provided dataset samples.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param \stored_file $dataset
* @param mixed $outputdir
* @return void
*/
public function estimate($uniqueid, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Evaluates this processor regression model using the provided supervised learning dataset.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param float $maxdeviation
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Returns the Phi correlation coefficient.
*

View file

@ -79,7 +79,7 @@ class processor implements \core_analytics\predictor {
* @param string $outputdir
* @return \stdClass
*/
public function train($uniqueid, \stored_file $dataset, $outputdir) {
public function train_classification($uniqueid, \stored_file $dataset, $outputdir) {
// Obtain the physical route to the file.
$datasetpath = $this->get_file_path($dataset);
@ -113,14 +113,14 @@ class processor implements \core_analytics\predictor {
}
/**
* Returns predictions for the provided dataset samples.
* Classifies the provided dataset samples.
*
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function predict($uniqueid, \stored_file $dataset, $outputdir) {
public function classify($uniqueid, \stored_file $dataset, $outputdir) {
// Obtain the physical route to the file.
$datasetpath = $this->get_file_path($dataset);
@ -154,7 +154,7 @@ class processor implements \core_analytics\predictor {
}
/**
* Evaluates the provided dataset.
* Evaluates this processor classification model using the provided supervised learning dataset.
*
* @param string $uniqueid
* @param float $maxdeviation
@ -163,7 +163,7 @@ class processor implements \core_analytics\predictor {
* @param string $outputdir
* @return \stdClass
*/
public function evaluate($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
// Obtain the physical route to the file.
$datasetpath = $this->get_file_path($dataset);
@ -195,6 +195,47 @@ class processor implements \core_analytics\predictor {
return $resultobj;
}
/**
* Train this processor regression model using the provided supervised learning dataset.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function train_regression($uniqueid, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Estimates linear values for the provided dataset samples.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param \stored_file $dataset
* @param mixed $outputdir
* @return void
*/
public function estimate($uniqueid, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Evaluates this processor regression model using the provided supervised learning dataset.
*
* @throws new \coding_exception
* @param string $uniqueid
* @param float $maxdeviation
* @param int $niterations
* @param \stored_file $dataset
* @param string $outputdir
* @return \stdClass
*/
public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset, $outputdir) {
throw new \coding_exception('This predictor does not support regression yet.');
}
/**
* Returns the path to the dataset file.
*