You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
8.7 KiB

<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Keeps track of the analysis results by storing the results in files.
*
* @package core_analytics
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
namespace core_analytics\local\analysis;
defined('MOODLE_INTERNAL') || die();
/**
* Keeps track of the analysis results by storing the results in files.
*
* @package core_analytics
* @copyright 2019 David Monllao {@link http://www.davidmonllao.com}
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class result_file extends result {
/**
* Stores the analysis results by time-splitting method.
* @var array
*/
private $filesbytimesplitting = [];
/**
* Stores the analysis results.
* @param array $results
* @return bool True if anything was successfully analysed
*/
public function add_analysable_results(array $results): bool {
$any = false;
// Process all provided time splitting methods.
foreach ($results as $timesplittingid => $result) {
if (!empty($result->result)) {
$this->filesbytimesplitting[$timesplittingid][] = $result->result;
$any = true;
}
}
if (empty($any)) {
return false;
}
return true;
}
/**
* Retrieves cached results during evaluation.
*
* @param \core_analytics\local\time_splitting\base $timesplitting
* @param \core_analytics\analysable $analysable
* @return mixed A \stored_file in this case.
*/
public function retrieve_cached_result(\core_analytics\local\time_splitting\base $timesplitting,
\core_analytics\analysable $analysable) {
// For evaluation purposes we don't need to be that strict about how updated the data is,
// if this analyser was analysed less that 1 week ago we skip generating a new one. This
// helps scale the evaluation process as sites with tons of courses may need a lot of time to
// complete an evaluation.
if (!empty($options['evaluation']) && !empty($options['reuseprevanalysed'])) {
$previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->analyser->get_modelid(),
$analysable->get_id(), $timesplitting->get_id());
// 1 week is a partly random time interval, no need to worry about DST.
$boundary = time() - WEEKSECS;
if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
// Recover the previous analysed file and avoid generating a new one.
return $previousanalysis;
}
}
return false;
}
/**
* Formats the result.
*
* @param array $data
* @param \core_analytics\local\target\base $target
* @param \core_analytics\local\time_splitting\base $timesplitting
* @param \core_analytics\analysable $analysable
* @return mixed A \stored_file in this case
*/
public function format_result(array $data, \core_analytics\local\target\base $target,
\core_analytics\local\time_splitting\base $timesplitting, \core_analytics\analysable $analysable) {
if (!empty($this->includetarget)) {
$filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
} else {
$filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
}
$dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(),
$timesplitting->get_id(), $filearea, $this->options['evaluation']);
// Add extra metadata.
$this->add_model_metadata($data, $timesplitting, $target);
// Write all calculated data to a file.
if (!$result = $dataset->store($data)) {
return false;
}
return $result;
}
/**
* Returns the results of the analysis.
* @return array
*/
public function get(): array {
if ($this->options['evaluation'] === false) {
// Look for previous training and prediction files we generated and couldn't be used
// by machine learning backends because they weren't big enough.
$pendingfiles = \core_analytics\dataset_manager::get_pending_files($this->modelid, $this->includetarget,
array_keys($this->filesbytimesplitting));
foreach ($pendingfiles as $timesplittingid => $files) {
foreach ($files as $file) {
$this->filesbytimesplitting[$timesplittingid][] = $file;
}
}
}
// We join the datasets by time splitting method.
$timesplittingfiles = array();
foreach ($this->filesbytimesplitting as $timesplittingid => $files) {
if ($this->options['evaluation'] === true) {
// Delete the previous copy. Only when evaluating.
\core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid);
}
// Merge all course files into one.
if ($this->includetarget) {
$filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
} else {
$filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
}
$timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files,
$this->modelid, $timesplittingid, $filearea, $this->options['evaluation']);
}
if (!empty($pendingfiles)) {
// We must remove them now as they are already part of another dataset.
foreach ($pendingfiles as $timesplittingid => $files) {
foreach ($files as $file) {
$file->delete();
}
}
}
return $timesplittingfiles;
}
/**
* Adds target metadata to the dataset.
*
* The final dataset document will look like this:
* ----------------------------------------------------
* metadata1,metadata2,metadata3,.....
* value1, value2, value3,.....
*
* header1,header2,header3,header4,.....
* stud1value1,stud1value2,stud1value3,stud1value4,.....
* stud2value1,stud2value2,stud2value3,stud2value4,.....
* .....
* ----------------------------------------------------
*
* @param array $data
* @param \core_analytics\local\time_splitting\base $timesplitting
* @param \core_analytics\local\target\base $target
* @return null
*/
private function add_model_metadata(array &$data, \core_analytics\local\time_splitting\base $timesplitting,
\core_analytics\local\target\base $target) {
global $CFG;
// If no target the first column is the sampleid, if target the last column is the target.
// This will need to be updated when we support unsupervised learning models.
$metadata = array(
'timesplitting' => $timesplitting->get_id(),
'nfeatures' => count(current($data)) - 1,
'moodleversion' => $CFG->version,
'targetcolumn' => $target->get_id()
);
if ($target->is_linear()) {
$metadata['targettype'] = 'linear';
$metadata['targetmin'] = $target::get_min_value();
$metadata['targetmax'] = $target::get_max_value();
} else {
$metadata['targettype'] = 'discrete';
$metadata['targetclasses'] = json_encode($target::get_classes());
}
// The first 2 samples will be used to store metadata about the dataset.
$metadatacolumns = [];
$metadatavalues = [];
foreach ($metadata as $key => $value) {
$metadatacolumns[] = $key;
$metadatavalues[] = $value;
}
// This will also reset samples' dataset keys.
array_unshift($data, $metadatacolumns, $metadatavalues);
}
}