moodle/lib/mlbackend/php/classes/processor.php

<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.

/**
 * Php predictions processor
 *
 * @package   mlbackend_php
 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */

namespace mlbackend_php;

defined('MOODLE_INTERNAL') || die();

use Phpml\Preprocessing\Normalizer;
use Phpml\CrossValidation\RandomSplit;
use Phpml\Dataset\ArrayDataset;
use Phpml\ModelManager;

/**
 * PHP predictions processor.
 *
 * @package   mlbackend_php
 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
 * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
 */
class processor implements \core_analytics\classifier, \core_analytics\regressor, \core_analytics\packable {

    /**
     * Size of training / prediction batches.
     */
    const BATCH_SIZE = 5000;

    /**
     * Number of train iterations.
     */
    const TRAIN_ITERATIONS = 500;

    /**
     * File name of the serialised model.
     */
    const MODEL_FILENAME = 'model.ser';

    /**
     * @var bool
     */
    protected $limitedsize = false;

    /**
     * Checks if the processor is ready to use.
     *
     * @return bool
     */
    public function is_ready() {
        if (version_compare(phpversion(), '7.0.0') < 0) {
            return get_string('errorphp7required', 'mlbackend_php');
        }
        return true;
    }

    /**
     * Delete the stored models.
     *
     * @param string $uniqueid
     * @param string $modelversionoutputdir
     * @return null
     */
    public function clear_model($uniqueid, $modelversionoutputdir) {
        remove_dir($modelversionoutputdir);
    }

    /**
     * Delete the output directory.
     *
     * @param string $modeloutputdir
     * @return null
     */
    public function delete_output_dir($modeloutputdir) {
        remove_dir($modeloutputdir);
    }

    /**
     * Train this processor classification model using the provided supervised learning dataset.
     *
     * @param string $uniqueid
     * @param \stored_file $dataset
     * @param string $outputdir
     * @return \stdClass
     */
    public function train_classification($uniqueid, \stored_file $dataset, $outputdir) {

        $modelfilepath = $this->get_model_filepath($outputdir);

        $modelmanager = new ModelManager();

        if (file_exists($modelfilepath)) {
            $classifier = $modelmanager->restoreFromFile($modelfilepath);
        } else {
            $classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);
        }

        $fh = $dataset->get_content_file_handle();

        // The first lines are var names and the second one values.
        $metadata = $this->extract_metadata($fh);

        // Skip headers.
        fgets($fh);

        $samples = array();
        $targets = array();
        while (($data = fgetcsv($fh)) !== false) {
            $sampledata = array_map('floatval', $data);
            $samples[] = array_slice($sampledata, 0, $metadata['nfeatures']);
            $targets[] = intval($data[$metadata['nfeatures']]);

            $nsamples = count($samples);
            if ($nsamples === self::BATCH_SIZE) {
                // Training it batches to avoid running out of memory.

                $classifier->partialTrain($samples, $targets, array(0, 1));
                $samples = array();
                $targets = array();
            }
            if (empty($morethan1sample) && $nsamples > 1) {
                $morethan1sample = true;
            }
        }
        fclose($fh);

        if (empty($morethan1sample)) {
            $resultobj = new \stdClass();
            $resultobj->status = \core_analytics\model::NO_DATASET;
            $resultobj->info = array();
            return $resultobj;
        }

        // Train the remaining samples.
        if ($samples) {
            $classifier->partialTrain($samples, $targets, array(0, 1));
        }

        $resultobj = new \stdClass();
        $resultobj->status = \core_analytics\model::OK;
        $resultobj->info = array();

        // Store the trained model.
        $modelmanager->saveToFile($classifier, $modelfilepath);

        return $resultobj;
    }

    /**
     * Classifies the provided dataset samples.
     *
     * @param string $uniqueid
     * @param \stored_file $dataset
     * @param string $outputdir
     * @return \stdClass
     */
    public function classify($uniqueid, \stored_file $dataset, $outputdir) {

        $classifier = $this->load_classifier($outputdir);

        $fh = $dataset->get_content_file_handle();

        // The first lines are var names and the second one values.
        $metadata = $this->extract_metadata($fh);

        // Skip headers.
        fgets($fh);

        $sampleids = array();
        $samples = array();
        $predictions = array();
        while (($data = fgetcsv($fh)) !== false) {
            $sampledata = array_map('floatval', $data);
            $sampleids[] = $data[0];
            $samples[] = array_slice($sampledata, 1, $metadata['nfeatures']);

            if (count($samples) === self::BATCH_SIZE) {
                // Prediction it batches to avoid running out of memory.

                // Append predictions incrementally, we want $sampleids keys in sync with $predictions keys.
                $newpredictions = $classifier->predict($samples);
                foreach ($newpredictions as $prediction) {
                    array_push($predictions, $prediction);
                }
                $samples = array();
            }
        }
        fclose($fh);

        // Finish the remaining predictions.
        if ($samples) {
            $predictions = $predictions + $classifier->predict($samples);
        }

        $resultobj = new \stdClass();
        $resultobj->status = \core_analytics\model::OK;
        $resultobj->info = array();

        foreach ($predictions as $index => $prediction) {
            $resultobj->predictions[$index] = array($sampleids[$index], $prediction);
        }

        return $resultobj;
    }

    /**
     * Evaluates this processor classification model using the provided supervised learning dataset.
     *
     * During evaluation we need to shuffle the evaluation dataset samples to detect deviated results,
     * if the dataset is massive we can not load everything into memory. We know that 2GB is the
     * minimum memory limit we should have (\core_analytics\model::heavy_duty_mode), if we substract the memory
     * that we already consumed and the memory that Phpml algorithms will need we should still have at
     * least 500MB of memory, which should be enough to evaluate a model. In any case this is a robust
     * solution that will work for all sites but it should minimize memory limit problems. Site admins
     * can still set $CFG->mlbackend_php_no_evaluation_limits to true to skip this 500MB limit.
     *
     * @param string $uniqueid
     * @param float $maxdeviation
     * @param int $niterations
     * @param \stored_file $dataset
     * @param string $outputdir
     * @param  string $trainedmodeldir
     * @return \stdClass
     */
    public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
            $outputdir, $trainedmodeldir) {
        $fh = $dataset->get_content_file_handle();

        if ($trainedmodeldir) {
            // We overwrite the number of iterations as the results will always be the same.
            $niterations = 1;
            $classifier = $this->load_classifier($trainedmodeldir);
        }

        // The first lines are var names and the second one values.
        $metadata = $this->extract_metadata($fh);

        // Skip headers.
        fgets($fh);

        if (empty($CFG->mlbackend_php_no_evaluation_limits)) {
            $samplessize = 0;
            $limit = get_real_size('500MB');

            // Just an approximation, will depend on PHP version, compile options...
            // Double size + zval struct (6 bytes + 8 bytes + 16 bytes) + array bucket (96 bytes)
            // https://nikic.github.io/2011/12/12/How-big-are-PHP-arrays-really-Hint-BIG.html.
            $floatsize = (PHP_INT_SIZE * 2) + 6 + 8 + 16 + 96;
        }

        $samples = array();
        $targets = array();
        while (($data = fgetcsv($fh)) !== false) {
            $sampledata = array_map('floatval', $data);

            $samples[] = array_slice($sampledata, 0, $metadata['nfeatures']);
            $targets[] = intval($data[$metadata['nfeatures']]);

            if (empty($CFG->mlbackend_php_no_evaluation_limits)) {
                // We allow admins to disable evaluation memory usage limits by modifying config.php.

                // We will have plenty of missing values in the dataset so it should be a conservative approximation.
                $samplessize = $samplessize + (count($sampledata) * $floatsize);

                // Stop fetching more samples.
                if ($samplessize >= $limit) {
                    $this->limitedsize = true;
                    break;
                }
            }
        }
        fclose($fh);

        // We need at least 2 samples belonging to each target.
        $counts = array_count_values($targets);
        $ntargets = count(explode(',', $metadata['targetclasses']));
        foreach ($counts as $count) {
            if ($count < 2) {
                $notenoughdata = true;
            }
        }
        if ($ntargets > count($counts)) {
            $notenoughdata = true;
        }
        if (!empty($notenoughdata)) {
            $resultobj = new \stdClass();
            $resultobj->status = \core_analytics\model::NOT_ENOUGH_DATA;
            $resultobj->score = 0;
            $resultobj->info = array(get_string('errornotenoughdata', 'mlbackend_php'));
            return $resultobj;
        }

        $phis = array();

        // Evaluate the model multiple times to confirm the results are not significantly random due to a short amount of data.
        for ($i = 0; $i < $niterations; $i++) {

            if (!$trainedmodeldir) {
                $classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);

                // Split up the dataset in classifier and testing.
                $data = new RandomSplit(new ArrayDataset($samples, $targets), 0.2);

                $classifier->train($data->getTrainSamples(), $data->getTrainLabels());
                $predictedlabels = $classifier->predict($data->getTestSamples());
                $phis[] = $this->get_phi($data->getTestLabels(), $predictedlabels);
            } else {
                $predictedlabels = $classifier->predict($samples);
                $phis[] = $this->get_phi($targets, $predictedlabels);
            }
        }

        // Let's fill the results changing the returned status code depending on the phi-related calculated metrics.
        return $this->get_evaluation_result_object($dataset, $phis, $maxdeviation);
    }

    /**
     * Returns the results objects from all evaluations.
     *
     * @param \stored_file $dataset
     * @param array $phis
     * @param float $maxdeviation
     * @return \stdClass
     */
    protected function get_evaluation_result_object(\stored_file $dataset, $phis, $maxdeviation) {

        // Average phi of all evaluations as final score.
        if (count($phis) === 1) {
            $avgphi = reset($phis);
        } else {
            $avgphi = \Phpml\Math\Statistic\Mean::arithmetic($phis);
        }

        // Standard deviation should ideally be calculated against the area under the curve.
        if (count($phis) === 1) {
            $modeldev = 0;
        } else {
            $modeldev = \Phpml\Math\Statistic\StandardDeviation::population($phis);
        }

        // Let's fill the results object.
        $resultobj = new \stdClass();

        // Zero is ok, now we add other bits if something is not right.
        $resultobj->status = \core_analytics\model::OK;
        $resultobj->info = array();

        // Convert phi to a standard score (from -1 to 1 to a value between 0 and 1).
        $resultobj->score = ($avgphi + 1) / 2;

        // If each iteration results varied too much we need more data to confirm that this is a valid model.
        if ($modeldev > $maxdeviation) {
            $resultobj->status = $resultobj->status + \core_analytics\model::NOT_ENOUGH_DATA;
            $a = new \stdClass();
            $a->deviation = $modeldev;
            $a->accepteddeviation = $maxdeviation;
            $resultobj->info[] = get_string('errornotenoughdatadev', 'mlbackend_php', $a);
        }

        if ($resultobj->score < \core_analytics\model::MIN_SCORE) {
            $resultobj->status = $resultobj->status + \core_analytics\model::LOW_SCORE;
            $a = new \stdClass();
            $a->score = $resultobj->score;
            $a->minscore = \core_analytics\model::MIN_SCORE;
            $resultobj->info[] = get_string('errorlowscore', 'mlbackend_php', $a);
        }

        if ($this->limitedsize === true) {
            $resultobj->info[] = get_string('datasetsizelimited', 'mlbackend_php', display_size($dataset->get_filesize()));
        }

        return $resultobj;
    }

    /**
     * Loads the pre-trained classifier.
     *
     * @throws \moodle_exception
     * @param string $outputdir
     * @return \Phpml\Classification\Linear\LogisticRegression
     */
    protected function load_classifier($outputdir) {
        $modelfilepath = $this->get_model_filepath($outputdir);

        if (!file_exists($modelfilepath)) {
            throw new \moodle_exception('errorcantloadmodel', 'mlbackend_php', '', $modelfilepath);
        }

        $modelmanager = new ModelManager();
        return $modelmanager->restoreFromFile($modelfilepath);
    }

    /**
     * Train this processor regression model using the provided supervised learning dataset.
     *
     * @throws new \coding_exception
     * @param string $uniqueid
     * @param \stored_file $dataset
     * @param string $outputdir
     * @return \stdClass
     */
    public function train_regression($uniqueid, \stored_file $dataset, $outputdir) {
        throw new \coding_exception('This predictor does not support regression yet.');
    }

    /**
     * Estimates linear values for the provided dataset samples.
     *
     * @throws new \coding_exception
     * @param string $uniqueid
     * @param \stored_file $dataset
     * @param mixed $outputdir
     * @return void
     */
    public function estimate($uniqueid, \stored_file $dataset, $outputdir) {
        throw new \coding_exception('This predictor does not support regression yet.');
    }

    /**
     * Evaluates this processor regression model using the provided supervised learning dataset.
     *
     * @throws new \coding_exception
     * @param string $uniqueid
     * @param float $maxdeviation
     * @param int $niterations
     * @param \stored_file $dataset
     * @param string $outputdir
     * @param  string $trainedmodeldir
     * @return \stdClass
     */
    public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,
            $outputdir, $trainedmodeldir) {
        throw new \coding_exception('This predictor does not support regression yet.');
    }

    /**
     * Exports the machine learning model.
     *
     * @throws \moodle_exception
     * @param  string $uniqueid  The model unique id
     * @param  string $modeldir  The directory that contains the trained model.
     * @return string            The path to the directory that contains the exported model.
     */
    public function export(string $uniqueid, string $modeldir) : string {

        $modelfilepath = $this->get_model_filepath($modeldir);

        if (!file_exists($modelfilepath)) {
            throw new \moodle_exception('errorexportmodelresult', 'analytics');
        }

        // We can use the actual $modeldir as the directory is not modified during export, just copied into a zip.
        return $modeldir;
    }

    /**
     * Imports the provided machine learning model.
     *
     * @param  string $uniqueid The model unique id
     * @param  string $modeldir  The directory that will contain the trained model.
     * @param  string $importdir The directory that contains the files to import.
     * @return bool Success
     */
    public function import(string $uniqueid, string $modeldir, string $importdir) : bool {

        $importmodelfilepath = $this->get_model_filepath($importdir);
        $modelfilepath = $this->get_model_filepath($modeldir);

        $modelmanager = new ModelManager();

        // Copied from ModelManager::restoreFromFile to validate the serialised contents
        // before restoring them.
        $importconfig = file_get_contents($importmodelfilepath);

        // Clean stuff like function calls.
        $importconfig = preg_replace('/[^a-zA-Z0-9\{\}%\.\*\;\,\:\"\-\0\\\]/', '', $importconfig);

        $object = unserialize($importconfig,
            ['allowed_classes' => ['Phpml\\Classification\\Linear\\LogisticRegression']]);
        if (!$object) {
            return false;
        }

        if (get_class($object) == '__PHP_Incomplete_Class') {
            return false;
        }

        $classifier = $modelmanager->restoreFromFile($importmodelfilepath);

        // This would override any previous classifier.
        $modelmanager->saveToFile($classifier, $modelfilepath);

        return true;
    }

    /**
     * Returns the path to the serialised model file in the provided directory.
     *
     * @param  string $modeldir The model directory
     * @return string           The model file
     */
    protected function get_model_filepath(string $modeldir) : string {
        // Output directory is already unique to the model.
        return $modeldir . DIRECTORY_SEPARATOR . self::MODEL_FILENAME;
    }

    /**
     * Returns the Phi correlation coefficient.
     *
     * @param array $testlabels
     * @param array $predictedlabels
     * @return float
     */
    protected function get_phi($testlabels, $predictedlabels) {

        // Binary here only as well.
        $matrix = \Phpml\Metric\ConfusionMatrix::compute($testlabels, $predictedlabels, array(0, 1));

        $tptn = $matrix[0][0] * $matrix[1][1];
        $fpfn = $matrix[1][0] * $matrix[0][1];
        $tpfp = $matrix[0][0] + $matrix[1][0];
        $tpfn = $matrix[0][0] + $matrix[0][1];
        $tnfp = $matrix[1][1] + $matrix[1][0];
        $tnfn = $matrix[1][1] + $matrix[0][1];
        if ($tpfp === 0 || $tpfn === 0 || $tnfp === 0 || $tnfn === 0) {
            $phi = 0;
        } else {
            $phi = ( $tptn - $fpfn ) / sqrt( $tpfp * $tpfn * $tnfp * $tnfn);
        }

        return $phi;
    }

    /**
     * Extracts metadata from the dataset file.
     *
     * The file poiter should be located at the top of the file.
     *
     * @param resource $fh
     * @return array
     */
    protected function extract_metadata($fh) {
        $metadata = fgetcsv($fh);
        return array_combine($metadata, fgetcsv($fh));
    }
}
Migrando Repositório 3 years ago			`<?php`
			`// This file is part of Moodle - http://moodle.org/`
			`//`
			`// Moodle is free software: you can redistribute it and/or modify`
			`// it under the terms of the GNU General Public License as published by`
			`// the Free Software Foundation, either version 3 of the License, or`
			`// (at your option) any later version.`
			`//`
			`// Moodle is distributed in the hope that it will be useful,`
			`// but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`// GNU General Public License for more details.`
			`//`
			`// You should have received a copy of the GNU General Public License`
			`// along with Moodle. If not, see <http://www.gnu.org/licenses/>.`

			`/**`
			`* Php predictions processor`
			`*`
			`* @package mlbackend_php`
			`* @copyright 2016 David Monllao {@link http://www.davidmonllao.com}`
			`* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later`
			`*/`

			`namespace mlbackend_php;`

			`defined('MOODLE_INTERNAL') \|\| die();`

			`use Phpml\Preprocessing\Normalizer;`
			`use Phpml\CrossValidation\RandomSplit;`
			`use Phpml\Dataset\ArrayDataset;`
			`use Phpml\ModelManager;`

			`/**`
			`* PHP predictions processor.`
			`*`
			`* @package mlbackend_php`
			`* @copyright 2016 David Monllao {@link http://www.davidmonllao.com}`
			`* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later`
			`*/`
			`class processor implements \core_analytics\classifier, \core_analytics\regressor, \core_analytics\packable {`

			`/**`
			`* Size of training / prediction batches.`
			`*/`
			`const BATCH_SIZE = 5000;`

			`/**`
			`* Number of train iterations.`
			`*/`
			`const TRAIN_ITERATIONS = 500;`

			`/**`
			`* File name of the serialised model.`
			`*/`
			`const MODEL_FILENAME = 'model.ser';`

			`/**`
			`* @var bool`
			`*/`
			`protected $limitedsize = false;`

			`/**`
			`* Checks if the processor is ready to use.`
			`*`
			`* @return bool`
			`*/`
			`public function is_ready() {`
			`if (version_compare(phpversion(), '7.0.0') < 0) {`
			`return get_string('errorphp7required', 'mlbackend_php');`
			`}`
			`return true;`
			`}`

			`/**`
			`* Delete the stored models.`
			`*`
			`* @param string $uniqueid`
			`* @param string $modelversionoutputdir`
			`* @return null`
			`*/`
			`public function clear_model($uniqueid, $modelversionoutputdir) {`
			`remove_dir($modelversionoutputdir);`
			`}`

			`/**`
			`* Delete the output directory.`
			`*`
			`* @param string $modeloutputdir`
			`* @return null`
			`*/`
			`public function delete_output_dir($modeloutputdir) {`
			`remove_dir($modeloutputdir);`
			`}`

			`/**`
			`* Train this processor classification model using the provided supervised learning dataset.`
			`*`
			`* @param string $uniqueid`
			`* @param \stored_file $dataset`
			`* @param string $outputdir`
			`* @return \stdClass`
			`*/`
			`public function train_classification($uniqueid, \stored_file $dataset, $outputdir) {`

			`$modelfilepath = $this->get_model_filepath($outputdir);`

			`$modelmanager = new ModelManager();`

			`if (file_exists($modelfilepath)) {`
			`$classifier = $modelmanager->restoreFromFile($modelfilepath);`
			`} else {`
			`$classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);`
			`}`

			`$fh = $dataset->get_content_file_handle();`

			`// The first lines are var names and the second one values.`
			`$metadata = $this->extract_metadata($fh);`

			`// Skip headers.`
			`fgets($fh);`

			`$samples = array();`
			`$targets = array();`
			`while (($data = fgetcsv($fh)) !== false) {`
			`$sampledata = array_map('floatval', $data);`
			`$samples[] = array_slice($sampledata, 0, $metadata['nfeatures']);`
			`$targets[] = intval($data[$metadata['nfeatures']]);`

			`$nsamples = count($samples);`
			`if ($nsamples === self::BATCH_SIZE) {`
			`// Training it batches to avoid running out of memory.`

			`$classifier->partialTrain($samples, $targets, array(0, 1));`
			`$samples = array();`
			`$targets = array();`
			`}`
			`if (empty($morethan1sample) && $nsamples > 1) {`
			`$morethan1sample = true;`
			`}`
			`}`
			`fclose($fh);`

			`if (empty($morethan1sample)) {`
			`$resultobj = new \stdClass();`
			`$resultobj->status = \core_analytics\model::NO_DATASET;`
			`$resultobj->info = array();`
			`return $resultobj;`
			`}`

			`// Train the remaining samples.`
			`if ($samples) {`
			`$classifier->partialTrain($samples, $targets, array(0, 1));`
			`}`

			`$resultobj = new \stdClass();`
			`$resultobj->status = \core_analytics\model::OK;`
			`$resultobj->info = array();`

			`// Store the trained model.`
			`$modelmanager->saveToFile($classifier, $modelfilepath);`

			`return $resultobj;`
			`}`

			`/**`
			`* Classifies the provided dataset samples.`
			`*`
			`* @param string $uniqueid`
			`* @param \stored_file $dataset`
			`* @param string $outputdir`
			`* @return \stdClass`
			`*/`
			`public function classify($uniqueid, \stored_file $dataset, $outputdir) {`

			`$classifier = $this->load_classifier($outputdir);`

			`$fh = $dataset->get_content_file_handle();`

			`// The first lines are var names and the second one values.`
			`$metadata = $this->extract_metadata($fh);`

			`// Skip headers.`
			`fgets($fh);`

			`$sampleids = array();`
			`$samples = array();`
			`$predictions = array();`
			`while (($data = fgetcsv($fh)) !== false) {`
			`$sampledata = array_map('floatval', $data);`
			`$sampleids[] = $data[0];`
			`$samples[] = array_slice($sampledata, 1, $metadata['nfeatures']);`

			`if (count($samples) === self::BATCH_SIZE) {`
			`// Prediction it batches to avoid running out of memory.`

			`// Append predictions incrementally, we want $sampleids keys in sync with $predictions keys.`
			`$newpredictions = $classifier->predict($samples);`
			`foreach ($newpredictions as $prediction) {`
			`array_push($predictions, $prediction);`
			`}`
			`$samples = array();`
			`}`
			`}`
			`fclose($fh);`

			`// Finish the remaining predictions.`
			`if ($samples) {`
			`$predictions = $predictions + $classifier->predict($samples);`
			`}`

			`$resultobj = new \stdClass();`
			`$resultobj->status = \core_analytics\model::OK;`
			`$resultobj->info = array();`

			`foreach ($predictions as $index => $prediction) {`
			`$resultobj->predictions[$index] = array($sampleids[$index], $prediction);`
			`}`

			`return $resultobj;`
			`}`

			`/**`
			`* Evaluates this processor classification model using the provided supervised learning dataset.`
			`*`
			`* During evaluation we need to shuffle the evaluation dataset samples to detect deviated results,`
			`* if the dataset is massive we can not load everything into memory. We know that 2GB is the`
			`* minimum memory limit we should have (\core_analytics\model::heavy_duty_mode), if we substract the memory`
			`* that we already consumed and the memory that Phpml algorithms will need we should still have at`
			`* least 500MB of memory, which should be enough to evaluate a model. In any case this is a robust`
			`* solution that will work for all sites but it should minimize memory limit problems. Site admins`
			`* can still set $CFG->mlbackend_php_no_evaluation_limits to true to skip this 500MB limit.`
			`*`
			`* @param string $uniqueid`
			`* @param float $maxdeviation`
			`* @param int $niterations`
			`* @param \stored_file $dataset`
			`* @param string $outputdir`
			`* @param string $trainedmodeldir`
			`* @return \stdClass`
			`*/`
			`public function evaluate_classification($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,`
			`$outputdir, $trainedmodeldir) {`
			`$fh = $dataset->get_content_file_handle();`

			`if ($trainedmodeldir) {`
			`// We overwrite the number of iterations as the results will always be the same.`
			`$niterations = 1;`
			`$classifier = $this->load_classifier($trainedmodeldir);`
			`}`

			`// The first lines are var names and the second one values.`
			`$metadata = $this->extract_metadata($fh);`

			`// Skip headers.`
			`fgets($fh);`

			`if (empty($CFG->mlbackend_php_no_evaluation_limits)) {`
			`$samplessize = 0;`
			`$limit = get_real_size('500MB');`

			`// Just an approximation, will depend on PHP version, compile options...`
			`// Double size + zval struct (6 bytes + 8 bytes + 16 bytes) + array bucket (96 bytes)`
			`// https://nikic.github.io/2011/12/12/How-big-are-PHP-arrays-really-Hint-BIG.html.`
			`$floatsize = (PHP_INT_SIZE * 2) + 6 + 8 + 16 + 96;`
			`}`

			`$samples = array();`
			`$targets = array();`
			`while (($data = fgetcsv($fh)) !== false) {`
			`$sampledata = array_map('floatval', $data);`

			`$samples[] = array_slice($sampledata, 0, $metadata['nfeatures']);`
			`$targets[] = intval($data[$metadata['nfeatures']]);`

			`if (empty($CFG->mlbackend_php_no_evaluation_limits)) {`
			`// We allow admins to disable evaluation memory usage limits by modifying config.php.`

			`// We will have plenty of missing values in the dataset so it should be a conservative approximation.`
			`$samplessize = $samplessize + (count($sampledata) * $floatsize);`

			`// Stop fetching more samples.`
			`if ($samplessize >= $limit) {`
			`$this->limitedsize = true;`
			`break;`
			`}`
			`}`
			`}`
			`fclose($fh);`

			`// We need at least 2 samples belonging to each target.`
			`$counts = array_count_values($targets);`
			`$ntargets = count(explode(',', $metadata['targetclasses']));`
			`foreach ($counts as $count) {`
			`if ($count < 2) {`
			`$notenoughdata = true;`
			`}`
			`}`
			`if ($ntargets > count($counts)) {`
			`$notenoughdata = true;`
			`}`
			`if (!empty($notenoughdata)) {`
			`$resultobj = new \stdClass();`
			`$resultobj->status = \core_analytics\model::NOT_ENOUGH_DATA;`
			`$resultobj->score = 0;`
			`$resultobj->info = array(get_string('errornotenoughdata', 'mlbackend_php'));`
			`return $resultobj;`
			`}`

			`$phis = array();`

			`// Evaluate the model multiple times to confirm the results are not significantly random due to a short amount of data.`
			`for ($i = 0; $i < $niterations; $i++) {`

			`if (!$trainedmodeldir) {`
			`$classifier = new \Phpml\Classification\Linear\LogisticRegression(self::TRAIN_ITERATIONS, Normalizer::NORM_L2);`

			`// Split up the dataset in classifier and testing.`
			`$data = new RandomSplit(new ArrayDataset($samples, $targets), 0.2);`

			`$classifier->train($data->getTrainSamples(), $data->getTrainLabels());`
			`$predictedlabels = $classifier->predict($data->getTestSamples());`
			`$phis[] = $this->get_phi($data->getTestLabels(), $predictedlabels);`
			`} else {`
			`$predictedlabels = $classifier->predict($samples);`
			`$phis[] = $this->get_phi($targets, $predictedlabels);`
			`}`
			`}`

			`// Let's fill the results changing the returned status code depending on the phi-related calculated metrics.`
			`return $this->get_evaluation_result_object($dataset, $phis, $maxdeviation);`
			`}`

			`/**`
			`* Returns the results objects from all evaluations.`
			`*`
			`* @param \stored_file $dataset`
			`* @param array $phis`
			`* @param float $maxdeviation`
			`* @return \stdClass`
			`*/`
			`protected function get_evaluation_result_object(\stored_file $dataset, $phis, $maxdeviation) {`

			`// Average phi of all evaluations as final score.`
			`if (count($phis) === 1) {`
			`$avgphi = reset($phis);`
			`} else {`
			`$avgphi = \Phpml\Math\Statistic\Mean::arithmetic($phis);`
			`}`

			`// Standard deviation should ideally be calculated against the area under the curve.`
			`if (count($phis) === 1) {`
			`$modeldev = 0;`
			`} else {`
			`$modeldev = \Phpml\Math\Statistic\StandardDeviation::population($phis);`
			`}`

			`// Let's fill the results object.`
			`$resultobj = new \stdClass();`

			`// Zero is ok, now we add other bits if something is not right.`
			`$resultobj->status = \core_analytics\model::OK;`
			`$resultobj->info = array();`

			`// Convert phi to a standard score (from -1 to 1 to a value between 0 and 1).`
			`$resultobj->score = ($avgphi + 1) / 2;`

			`// If each iteration results varied too much we need more data to confirm that this is a valid model.`
			`if ($modeldev > $maxdeviation) {`
			`$resultobj->status = $resultobj->status + \core_analytics\model::NOT_ENOUGH_DATA;`
			`$a = new \stdClass();`
			`$a->deviation = $modeldev;`
			`$a->accepteddeviation = $maxdeviation;`
			`$resultobj->info[] = get_string('errornotenoughdatadev', 'mlbackend_php', $a);`
			`}`

			`if ($resultobj->score < \core_analytics\model::MIN_SCORE) {`
			`$resultobj->status = $resultobj->status + \core_analytics\model::LOW_SCORE;`
			`$a = new \stdClass();`
			`$a->score = $resultobj->score;`
			`$a->minscore = \core_analytics\model::MIN_SCORE;`
			`$resultobj->info[] = get_string('errorlowscore', 'mlbackend_php', $a);`
			`}`

			`if ($this->limitedsize === true) {`
			`$resultobj->info[] = get_string('datasetsizelimited', 'mlbackend_php', display_size($dataset->get_filesize()));`
			`}`

			`return $resultobj;`
			`}`

			`/**`
			`* Loads the pre-trained classifier.`
			`*`
			`* @throws \moodle_exception`
			`* @param string $outputdir`
			`* @return \Phpml\Classification\Linear\LogisticRegression`
			`*/`
			`protected function load_classifier($outputdir) {`
			`$modelfilepath = $this->get_model_filepath($outputdir);`

			`if (!file_exists($modelfilepath)) {`
			`throw new \moodle_exception('errorcantloadmodel', 'mlbackend_php', '', $modelfilepath);`
			`}`

			`$modelmanager = new ModelManager();`
			`return $modelmanager->restoreFromFile($modelfilepath);`
			`}`

			`/**`
			`* Train this processor regression model using the provided supervised learning dataset.`
			`*`
			`* @throws new \coding_exception`
			`* @param string $uniqueid`
			`* @param \stored_file $dataset`
			`* @param string $outputdir`
			`* @return \stdClass`
			`*/`
			`public function train_regression($uniqueid, \stored_file $dataset, $outputdir) {`
			`throw new \coding_exception('This predictor does not support regression yet.');`
			`}`

			`/**`
			`* Estimates linear values for the provided dataset samples.`
			`*`
			`* @throws new \coding_exception`
			`* @param string $uniqueid`
			`* @param \stored_file $dataset`
			`* @param mixed $outputdir`
			`* @return void`
			`*/`
			`public function estimate($uniqueid, \stored_file $dataset, $outputdir) {`
			`throw new \coding_exception('This predictor does not support regression yet.');`
			`}`

			`/**`
			`* Evaluates this processor regression model using the provided supervised learning dataset.`
			`*`
			`* @throws new \coding_exception`
			`* @param string $uniqueid`
			`* @param float $maxdeviation`
			`* @param int $niterations`
			`* @param \stored_file $dataset`
			`* @param string $outputdir`
			`* @param string $trainedmodeldir`
			`* @return \stdClass`
			`*/`
			`public function evaluate_regression($uniqueid, $maxdeviation, $niterations, \stored_file $dataset,`
			`$outputdir, $trainedmodeldir) {`
			`throw new \coding_exception('This predictor does not support regression yet.');`
			`}`

			`/**`
			`* Exports the machine learning model.`
			`*`
			`* @throws \moodle_exception`
			`* @param string $uniqueid The model unique id`
			`* @param string $modeldir The directory that contains the trained model.`
			`* @return string The path to the directory that contains the exported model.`
			`*/`
			`public function export(string $uniqueid, string $modeldir) : string {`

			`$modelfilepath = $this->get_model_filepath($modeldir);`

			`if (!file_exists($modelfilepath)) {`
			`throw new \moodle_exception('errorexportmodelresult', 'analytics');`
			`}`

			`// We can use the actual $modeldir as the directory is not modified during export, just copied into a zip.`
			`return $modeldir;`
			`}`

			`/**`
			`* Imports the provided machine learning model.`
			`*`
			`* @param string $uniqueid The model unique id`
			`* @param string $modeldir The directory that will contain the trained model.`
			`* @param string $importdir The directory that contains the files to import.`
			`* @return bool Success`
			`*/`
			`public function import(string $uniqueid, string $modeldir, string $importdir) : bool {`

			`$importmodelfilepath = $this->get_model_filepath($importdir);`
			`$modelfilepath = $this->get_model_filepath($modeldir);`

			`$modelmanager = new ModelManager();`

			`// Copied from ModelManager::restoreFromFile to validate the serialised contents`
			`// before restoring them.`
			`$importconfig = file_get_contents($importmodelfilepath);`

			`// Clean stuff like function calls.`
			`$importconfig = preg_replace('/[^a-zA-Z0-9\{\}%\.\*\;\,\:\"\-\0\\\]/', '', $importconfig);`

			`$object = unserialize($importconfig,`
			`['allowed_classes' => ['Phpml\\Classification\\Linear\\LogisticRegression']]);`
			`if (!$object) {`
			`return false;`
			`}`

			`if (get_class($object) == '__PHP_Incomplete_Class') {`
			`return false;`
			`}`

			`$classifier = $modelmanager->restoreFromFile($importmodelfilepath);`

			`// This would override any previous classifier.`
			`$modelmanager->saveToFile($classifier, $modelfilepath);`

			`return true;`
			`}`

			`/**`
			`* Returns the path to the serialised model file in the provided directory.`
			`*`
			`* @param string $modeldir The model directory`
			`* @return string The model file`
			`*/`
			`protected function get_model_filepath(string $modeldir) : string {`
			`// Output directory is already unique to the model.`
			`return $modeldir . DIRECTORY_SEPARATOR . self::MODEL_FILENAME;`
			`}`

			`/**`
			`* Returns the Phi correlation coefficient.`
			`*`
			`* @param array $testlabels`
			`* @param array $predictedlabels`
			`* @return float`
			`*/`
			`protected function get_phi($testlabels, $predictedlabels) {`

			`// Binary here only as well.`
			`$matrix = \Phpml\Metric\ConfusionMatrix::compute($testlabels, $predictedlabels, array(0, 1));`

			`$tptn = $matrix[0][0] * $matrix[1][1];`
			`$fpfn = $matrix[1][0] * $matrix[0][1];`
			`$tpfp = $matrix[0][0] + $matrix[1][0];`
			`$tpfn = $matrix[0][0] + $matrix[0][1];`
			`$tnfp = $matrix[1][1] + $matrix[1][0];`
			`$tnfn = $matrix[1][1] + $matrix[0][1];`
			`if ($tpfp === 0 \|\| $tpfn === 0 \|\| $tnfp === 0 \|\| $tnfn === 0) {`
			`$phi = 0;`
			`} else {`
			`$phi = ( $tptn - $fpfn ) / sqrt( $tpfp * $tpfn * $tnfp * $tnfn);`
			`}`

			`return $phi;`
			`}`

			`/**`
			`* Extracts metadata from the dataset file.`
			`*`
			`* The file poiter should be located at the top of the file.`
			`*`
			`* @param resource $fh`
			`* @return array`
			`*/`
			`protected function extract_metadata($fh) {`
			`$metadata = fgetcsv($fh);`
			`return array_combine($metadata, fgetcsv($fh));`
			`}`
			`}`