MDL-66498 analytics: get_samples() with no params limit

This commit is contained in:
David Monllaó 2019-09-02 14:08:50 +08:00
parent 4fd74fba6f
commit abc745fbd0
2 changed files with 116 additions and 3 deletions

View file

@ -1030,7 +1030,7 @@ class model {
} }
// Get all samples data. // Get all samples data.
list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids); list($sampleids, $samplesdata) = $this->get_samples($sampleids);
// Calculate the targets. // Calculate the targets.
$predictions = array(); $predictions = array();
@ -1344,7 +1344,7 @@ class model {
return $prediction->sampleid; return $prediction->sampleid;
}, $predictions); }, $predictions);
list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids); list($unused, $samplesdata) = $this->get_samples($sampleids);
$current = 0; $current = 0;
@ -1410,7 +1410,7 @@ class model {
*/ */
public function prediction_sample_data($predictionobj) { public function prediction_sample_data($predictionobj) {
list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid)); list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid));
if (empty($samplesdata[$predictionobj->sampleid])) { if (empty($samplesdata[$predictionobj->sampleid])) {
throw new \moodle_exception('errorsamplenotavailable', 'analytics'); throw new \moodle_exception('errorsamplenotavailable', 'analytics');
@ -1859,6 +1859,64 @@ class model {
return $predictionrecords; return $predictionrecords;
} }
/**
* Wrapper around analyser's get_samples to skip DB's max-number-of-params exception.
*
* @param array $sampleids
* @return array
*/
public function get_samples(array $sampleids): array {
if (empty($sampleids)) {
throw new \coding_exception('No sample ids provided');
}
$chunksize = count($sampleids);
// We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we
// try again. We repeat this process until the chunk is small enough for the db engine to process. The
// >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params.
while (empty($done) && $chunksize >= 1) {
$chunks = array_chunk($sampleids, $chunksize);
$allsampleids = [];
$allsamplesdata = [];
foreach ($chunks as $index => $chunk) {
try {
list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk);
} catch (\dml_read_exception $e) {
// Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value.
$chunksize = floor($chunksize / 2);
break;
}
// We can sum as these two arrays are indexed by sampleid and there are no collisions.
$allsampleids = $allsampleids + $chunksampleids;
$allsamplesdata = $allsamplesdata + $chunksamplesdata;
if ($index === count($chunks) - 1) {
// We successfully processed all the samples in all chunks, we are done.
$done = true;
}
}
}
if (empty($done)) {
if (!empty($e)) {
// Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number
// of param's exception.
throw new \dml_read_exception($e);
} else {
throw new \coding_exception('We should never reach this point, there is a bug in ' .
'core_analytics\\model::get_samples\'s code');
}
}
return [$allsampleids, $allsamplesdata];
}
/** /**
* Purges the insights cache. * Purges the insights cache.
*/ */

View file

@ -505,6 +505,61 @@ class analytics_model_testcase extends advanced_testcase {
$this->assertArrayHasKey('\core\analytics\time_splitting\quarters', $this->model->get_potential_timesplittings()); $this->assertArrayHasKey('\core\analytics\time_splitting\quarters', $this->model->get_potential_timesplittings());
} }
/**
* Tests model::get_samples()
*
* @return null
*/
public function test_get_samples() {
$this->resetAfterTest();
if (!PHPUNIT_LONGTEST) {
$this->markTestSkipped('PHPUNIT_LONGTEST is not defined');
}
// 10000 should be enough to make oracle and mssql fail, if we want pgsql to fail we need around 70000
// users, that is a few minutes just to create the users.
$nusers = 10000;
$userids = [];
for ($i = 0; $i < $nusers; $i++) {
$user = $this->getDataGenerator()->create_user();
$userids[] = $user->id;
}
$upcomingactivities = null;
foreach (\core_analytics\manager::get_all_models() as $model) {
if (get_class($model->get_target()) === 'core_user\\analytics\\target\\upcoming_activities_due') {
$upcomingactivities = $model;
}
}
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($userids);
$this->assertCount($nusers, $sampleids);
$this->assertCount($nusers, $samplesdata);
$subset = array_slice($userids, 0, 100);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(100, $sampleids);
$this->assertCount(100, $samplesdata);
$subset = array_slice($userids, 0, 2);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(2, $sampleids);
$this->assertCount(2, $samplesdata);
$subset = array_slice($userids, 0, 1);
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
$this->assertCount(1, $sampleids);
$this->assertCount(1, $samplesdata);
// Unexisting, so nothing returned, but still 2 arrays.
list($sampleids, $samplesdata) = $upcomingactivities->get_samples([1231231231231231]);
$this->assertEmpty($sampleids);
$this->assertEmpty($samplesdata);
}
/** /**
* Generates a model log record. * Generates a model log record.
*/ */