mirror of
https://github.com/moodle/moodle.git
synced 2025-08-04 16:36:37 +02:00
MDL-66498 analytics: get_samples() with no params limit
This commit is contained in:
parent
4fd74fba6f
commit
abc745fbd0
2 changed files with 116 additions and 3 deletions
|
@ -1030,7 +1030,7 @@ class model {
|
|||
}
|
||||
|
||||
// Get all samples data.
|
||||
list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
|
||||
list($sampleids, $samplesdata) = $this->get_samples($sampleids);
|
||||
|
||||
// Calculate the targets.
|
||||
$predictions = array();
|
||||
|
@ -1344,7 +1344,7 @@ class model {
|
|||
return $prediction->sampleid;
|
||||
}, $predictions);
|
||||
|
||||
list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
|
||||
list($unused, $samplesdata) = $this->get_samples($sampleids);
|
||||
|
||||
$current = 0;
|
||||
|
||||
|
@ -1410,7 +1410,7 @@ class model {
|
|||
*/
|
||||
public function prediction_sample_data($predictionobj) {
|
||||
|
||||
list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
|
||||
list($unused, $samplesdata) = $this->get_samples(array($predictionobj->sampleid));
|
||||
|
||||
if (empty($samplesdata[$predictionobj->sampleid])) {
|
||||
throw new \moodle_exception('errorsamplenotavailable', 'analytics');
|
||||
|
@ -1859,6 +1859,64 @@ class model {
|
|||
return $predictionrecords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper around analyser's get_samples to skip DB's max-number-of-params exception.
|
||||
*
|
||||
* @param array $sampleids
|
||||
* @return array
|
||||
*/
|
||||
public function get_samples(array $sampleids): array {
|
||||
|
||||
if (empty($sampleids)) {
|
||||
throw new \coding_exception('No sample ids provided');
|
||||
}
|
||||
|
||||
$chunksize = count($sampleids);
|
||||
|
||||
// We start with just 1 chunk, if it is too large for the db we split the list of sampleids in 2 and we
|
||||
// try again. We repeat this process until the chunk is small enough for the db engine to process. The
|
||||
// >= has been added in case there are other \dml_read_exceptions unrelated to the max number of params.
|
||||
while (empty($done) && $chunksize >= 1) {
|
||||
|
||||
$chunks = array_chunk($sampleids, $chunksize);
|
||||
$allsampleids = [];
|
||||
$allsamplesdata = [];
|
||||
|
||||
foreach ($chunks as $index => $chunk) {
|
||||
|
||||
try {
|
||||
list($chunksampleids, $chunksamplesdata) = $this->get_analyser()->get_samples($chunk);
|
||||
} catch (\dml_read_exception $e) {
|
||||
|
||||
// Reduce the chunksize, we use floor() so the $chunksize is always less than the previous $chunksize value.
|
||||
$chunksize = floor($chunksize / 2);
|
||||
break;
|
||||
}
|
||||
|
||||
// We can sum as these two arrays are indexed by sampleid and there are no collisions.
|
||||
$allsampleids = $allsampleids + $chunksampleids;
|
||||
$allsamplesdata = $allsamplesdata + $chunksamplesdata;
|
||||
|
||||
if ($index === count($chunks) - 1) {
|
||||
// We successfully processed all the samples in all chunks, we are done.
|
||||
$done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($done)) {
|
||||
if (!empty($e)) {
|
||||
// Throw the last exception we caught, the \dml_read_exception we have been catching is unrelated to the max number
|
||||
// of param's exception.
|
||||
throw new \dml_read_exception($e);
|
||||
} else {
|
||||
throw new \coding_exception('We should never reach this point, there is a bug in ' .
|
||||
'core_analytics\\model::get_samples\'s code');
|
||||
}
|
||||
}
|
||||
return [$allsampleids, $allsamplesdata];
|
||||
}
|
||||
|
||||
/**
|
||||
* Purges the insights cache.
|
||||
*/
|
||||
|
|
|
@ -505,6 +505,61 @@ class analytics_model_testcase extends advanced_testcase {
|
|||
$this->assertArrayHasKey('\core\analytics\time_splitting\quarters', $this->model->get_potential_timesplittings());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests model::get_samples()
|
||||
*
|
||||
* @return null
|
||||
*/
|
||||
public function test_get_samples() {
|
||||
$this->resetAfterTest();
|
||||
|
||||
if (!PHPUNIT_LONGTEST) {
|
||||
$this->markTestSkipped('PHPUNIT_LONGTEST is not defined');
|
||||
}
|
||||
|
||||
// 10000 should be enough to make oracle and mssql fail, if we want pgsql to fail we need around 70000
|
||||
// users, that is a few minutes just to create the users.
|
||||
$nusers = 10000;
|
||||
|
||||
$userids = [];
|
||||
for ($i = 0; $i < $nusers; $i++) {
|
||||
$user = $this->getDataGenerator()->create_user();
|
||||
$userids[] = $user->id;
|
||||
}
|
||||
|
||||
$upcomingactivities = null;
|
||||
foreach (\core_analytics\manager::get_all_models() as $model) {
|
||||
if (get_class($model->get_target()) === 'core_user\\analytics\\target\\upcoming_activities_due') {
|
||||
$upcomingactivities = $model;
|
||||
}
|
||||
}
|
||||
|
||||
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($userids);
|
||||
$this->assertCount($nusers, $sampleids);
|
||||
$this->assertCount($nusers, $samplesdata);
|
||||
|
||||
$subset = array_slice($userids, 0, 100);
|
||||
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
|
||||
$this->assertCount(100, $sampleids);
|
||||
$this->assertCount(100, $samplesdata);
|
||||
|
||||
$subset = array_slice($userids, 0, 2);
|
||||
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
|
||||
$this->assertCount(2, $sampleids);
|
||||
$this->assertCount(2, $samplesdata);
|
||||
|
||||
$subset = array_slice($userids, 0, 1);
|
||||
list($sampleids, $samplesdata) = $upcomingactivities->get_samples($subset);
|
||||
$this->assertCount(1, $sampleids);
|
||||
$this->assertCount(1, $samplesdata);
|
||||
|
||||
// Unexisting, so nothing returned, but still 2 arrays.
|
||||
list($sampleids, $samplesdata) = $upcomingactivities->get_samples([1231231231231231]);
|
||||
$this->assertEmpty($sampleids);
|
||||
$this->assertEmpty($samplesdata);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a model log record.
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue