moodle/search/engine/solr/classes/engine.php


								<?php

								// This file is part of Moodle - http://moodle.org/

								//

								// Moodle is free software: you can redistribute it and/or modify

								// it under the terms of the GNU General Public License as published by

								// the Free Software Foundation, either version 3 of the License, or

								// (at your option) any later version.

								//

								// Moodle is distributed in the hope that it will be useful,

								// but WITHOUT ANY WARRANTY; without even the implied warranty of

								// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								// GNU General Public License for more details.

								//

								// You should have received a copy of the GNU General Public License

								// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.


								/**

								 * Solr engine.

								 *

								 * @package    search_solr

								 * @copyright  2015 Daniel Neis Araujo

								 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later

								 */


								namespace search_solr;


								defined('MOODLE_INTERNAL') || die();


								/**

								 * Solr engine.

								 *

								 * @package    search_solr

								 * @copyright  2015 Daniel Neis Araujo

								 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later

								 */

								class engine extends \core_search\engine {


								    /**

								     * @var string The date format used by solr.

								     */

								    const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';


								    /**

								     * @var int Commit documents interval (number of miliseconds).

								     */

								    const AUTOCOMMIT_WITHIN = 15000;


								    /**

								     * The maximum number of results to fetch at a time.

								     */

								    const QUERY_SIZE = 120;


								    /**

								     * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.

								     */

								    const FRAG_SIZE = 510;


								    /**

								     * Marker for the start of a highlight.

								     */

								    const HIGHLIGHT_START = '@@HI_S@@';


								    /**

								     * Marker for the end of a highlight.

								     */

								    const HIGHLIGHT_END = '@@HI_E@@';


								    /** @var float Boost value for matching course in location-ordered searches */

								    const COURSE_BOOST = 1;


								    /** @var float Boost value for matching context (in addition to course boost) */

								    const CONTEXT_BOOST = 0.5;


								    /**

								     * @var \SolrClient

								     */

								    protected $client = null;


								    /**

								     * @var bool True if we should reuse SolrClients, false if not.

								     */

								    protected $cacheclient = true;


								    /**

								     * @var \curl Direct curl object.

								     */

								    protected $curl = null;


								    /**

								     * @var array Fields that can be highlighted.

								     */

								    protected $highlightfields = array('title', 'content', 'description1', 'description2');


								    /**

								     * @var int Number of total docs reported by Sorl for the last query.

								     */

								    protected $totalenginedocs = 0;


								    /**

								     * @var int Number of docs we have processed for the last query.

								     */

								    protected $processeddocs = 0;


								    /**

								     * @var int Number of docs that have been skipped while processing the last query.

								     */

								    protected $skippeddocs = 0;


								    /**

								     * Solr server major version.

								     *

								     * @var int

								     */

								    protected $solrmajorversion = null;


								    /**

								     * Initialises the search engine configuration.

								     *

								     * @return void

								     */

								    public function __construct() {

								        parent::__construct();


								        $curlversion = curl_version();

								        if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {

								            // There is a flaw with curl 7.35.0 that causes problems with client reuse.

								            $this->cacheclient = false;

								        }

								    }


								    /**

								     * Prepares a Solr query, applies filters and executes it returning its results.

								     *

								     * @throws \core_search\engine_exception

								     * @param  \stdClass $filters Containing query and filters.

								     * @param  \stdClass $accessinfo Information about areas user can access.

								     * @param  int       $limit The maximum number of results to return.

								     * @return \core_search\document[] Results or false if no results

								     */

								    public function execute_query($filters, $accessinfo, $limit = 0) {

								        global $USER;


								        if (empty($limit)) {

								            $limit = \core_search\manager::MAX_RESULTS;

								        }


								        // If there is any problem we trigger the exception as soon as possible.

								        $client = $this->get_search_client();


								        // Create the query object.

								        $query = $this->create_user_query($filters, $accessinfo);


								        // If the query cannot have results, return none.

								        if (!$query) {

								            return [];

								        }


								        // We expect good match rates, so for our first get, we will get a small number of records.

								        // This significantly speeds solr response time for first few pages.

								        $query->setRows(min($limit * 3, static::QUERY_SIZE));

								        $response = $this->get_query_response($query);


								        // Get count data out of the response, and reset our counters.

								        list($included, $found) = $this->get_response_counts($response);

								        $this->totalenginedocs = $found;

								        $this->processeddocs = 0;

								        $this->skippeddocs = 0;

								        if ($included == 0 || $this->totalenginedocs == 0) {

								            // No results.

								            return array();

								        }


								        // Get valid documents out of the response.

								        $results = $this->process_response($response, $limit);


								        // We have processed all the docs in the response at this point.

								        $this->processeddocs += $included;


								        // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.

								        while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {

								            // Offset the start of the query, and since we are making another call, get more per call.

								            $query->setStart($this->processeddocs);

								            $query->setRows(static::QUERY_SIZE);


								            $response = $this->get_query_response($query);

								            list($included, $found) = $this->get_response_counts($response);

								            if ($included == 0 || $found == 0) {

								                // No new results were found. Found being empty would be weird, so we will just return.

								                return $results;

								            }

								            $this->totalenginedocs = $found;


								            // Get the new response docs, limiting to remaining we need, then add it to the end of the results array.

								            $newdocs = $this->process_response($response, $limit - count($results));

								            $results = array_merge($results, $newdocs);


								            // Add to our processed docs count.

								            $this->processeddocs += $included;

								        }


								        return $results;

								    }


								    /**

								     * Takes a query and returns the response in SolrObject format.

								     *

								     * @param  SolrQuery  $query Solr query object.

								     * @return SolrObject|false Response document or false on error.

								     */

								    protected function get_query_response($query) {

								        try {

								            return $this->get_search_client()->query($query)->getResponse();

								        } catch (\SolrClientException $ex) {

								            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);

								            $this->queryerror = $ex->getMessage();

								            return false;

								        } catch (\SolrServerException $ex) {

								            debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);

								            $this->queryerror = $ex->getMessage();

								            return false;

								        }

								    }


								    /**

								     * Returns the total number of documents available for the most recently call to execute_query.

								     *

								     * @return int

								     */

								    public function get_query_total_count() {

								        // Return the total engine count minus the docs we have determined are bad.

								        return $this->totalenginedocs - $this->skippeddocs;

								    }


								    /**

								     * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.

								     *

								     * @param SolrDocument $response The response document from Solr.

								     * @return array A two part array. First how many response docs are in the response.

								     *               Second, how many results are vailable in the engine.

								     */

								    protected function get_response_counts($response) {

								        $found = 0;

								        $included = 0;


								        if (isset($response->grouped->solr_filegroupingid->ngroups)) {

								            // Get the number of results for file grouped queries.

								            $found = $response->grouped->solr_filegroupingid->ngroups;

								            $included = count($response->grouped->solr_filegroupingid->groups);

								        } else if (isset($response->response->numFound)) {

								            // Get the number of results for standard queries.

								            $found = $response->response->numFound;

								            if ($found > 0 && is_array($response->response->docs)) {

								                $included = count($response->response->docs);

								            }

								        }


								        return array($included, $found);

								    }


								    /**

								     * Prepares a new query object with needed limits, filters, etc.

								     *

								     * @param \stdClass $filters Containing query and filters.

								     * @param \stdClass $accessinfo Information about contexts the user can access

								     * @return \SolrDisMaxQuery|null Query object or null if they can't get any results

								     */

								    protected function create_user_query($filters, $accessinfo) {

								        global $USER;


								        // Let's keep these changes internal.

								        $data = clone $filters;


								        $query = new \SolrDisMaxQuery();


								        $this->set_query($query, $data->q);

								        $this->add_fields($query);


								        // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters

								        // we are really interested in caching contexts filters instead.

								        if (!empty($data->title)) {

								            $query->addFilterQuery('{!field cache=false f=title}' . $data->title);

								        }

								        if (!empty($data->areaids)) {

								            // If areaids are specified, we want to get any that match.

								            $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');

								        }

								        if (!empty($data->courseids)) {

								            $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');

								        }

								        if (!empty($data->groupids)) {

								            $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');

								        }

								        if (!empty($data->userids)) {

								            $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');

								        }


								        if (!empty($data->timestart) or !empty($data->timeend)) {

								            if (empty($data->timestart)) {

								                $data->timestart = '*';

								            } else {

								                $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);

								            }

								            if (empty($data->timeend)) {

								                $data->timeend = '*';

								            } else {

								                $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);

								            }


								            // No cache.

								            $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');

								        }


								        // Restrict to users who are supposed to be able to see a particular result.

								        $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');


								        // And finally restrict it to the context where the user can access, we want this one cached.

								        // If the user can access all contexts $usercontexts value is just true, we don't need to filter

								        // in that case.

								        if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {

								            // Join all area contexts into a single array and implode.

								            $allcontexts = array();

								            foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {

								                if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {

								                    // Skip unused areas.

								                    continue;

								                }

								                foreach ($areacontexts as $contextid) {

								                    // Ensure they are unique.

								                    $allcontexts[$contextid] = $contextid;

								                }

								            }

								            if (empty($allcontexts)) {

								                // This means there are no valid contexts for them, so they get no results.

								                return null;

								            }

								            $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');

								        }


								        if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {

								            // Add another restriction to handle group ids. If there are any contexts using separate

								            // groups, then results in that context will not show unless you belong to the group.

								            // (Note: Access all groups is taken care of earlier, when computing these arrays.)


								            // This special exceptions list allows for particularly pig-headed developers to create

								            // multiple search areas within the same module, where one of them uses separate

								            // groups and the other uses visible groups. It is a little inefficient, but this should

								            // be rare.

								            $exceptions = '';

								            if ($accessinfo->visiblegroupscontextsareas) {

								                foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {

								                    $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .

								                            implode(' OR ', $areaids) . '))';

								                }

								            }


								            if ($accessinfo->usergroups) {

								                // Either the document has no groupid, or the groupid is one that the user

								                // belongs to, or the context is not one of the separate groups contexts.

								                $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .

								                        'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .

								                        '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .

								                        $exceptions);

								            } else {

								                // Either the document has no groupid, or the context is not a restricted one.

								                $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .

								                        '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .

								                        $exceptions);

								            }

								        }


								        if ($this->file_indexing_enabled()) {

								            // Now group records by solr_filegroupingid. Limit to 3 results per group.

								            $query->setGroup(true);

								            $query->setGroupLimit(3);

								            $query->setGroupNGroups(true);

								            $query->addGroupField('solr_filegroupingid');

								        } else {

								            // Make sure we only get text files, in case the index has pre-existing files.

								            $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);

								        }


								        // If ordering by location, add in boost for the relevant course or context ids.

								        if (!empty($filters->order) && $filters->order === 'location') {

								            $coursecontext = $filters->context->get_course_context();

								            $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);

								            if ($filters->context->contextlevel !== CONTEXT_COURSE) {

								                // If it's a block or activity, also add a boost for the specific context id.

								                $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);

								            }

								        }


								        return $query;

								    }


								    /**

								     * Prepares a new query by setting the query, start offset and rows to return.

								     *

								     * @param SolrQuery $query

								     * @param object    $q Containing query and filters.

								     */

								    protected function set_query($query, $q) {

								        // Set hightlighting.

								        $query->setHighlight(true);

								        foreach ($this->highlightfields as $field) {

								            $query->addHighlightField($field);

								        }

								        $query->setHighlightFragsize(static::FRAG_SIZE);

								        $query->setHighlightSimplePre(self::HIGHLIGHT_START);

								        $query->setHighlightSimplePost(self::HIGHLIGHT_END);

								        $query->setHighlightMergeContiguous(true);


								        $query->setQuery($q);


								        // A reasonable max.

								        $query->setRows(static::QUERY_SIZE);

								    }


								    /**

								     * Sets fields to be returned in the result.

								     *

								     * @param SolrDisMaxQuery|SolrQuery $query object.

								     */

								    public function add_fields($query) {

								        $documentclass = $this->get_document_classname();

								        $fields = $documentclass::get_default_fields_definition();


								        $dismax = false;

								        if ($query instanceof \SolrDisMaxQuery) {

								            $dismax = true;

								        }


								        foreach ($fields as $key => $field) {

								            $query->addField($key);

								            if ($dismax && !empty($field['mainquery'])) {

								                // Add fields the main query should be run against.

								                $query->addQueryField($key);

								            }

								        }

								    }


								    /**

								     * Finds the key common to both highlighing and docs array returned from response.

								     * @param object $response containing results.

								     */

								    public function add_highlight_content($response) {

								        if (!isset($response->highlighting)) {

								            // There is no highlighting to add.

								            return;

								        }


								        $highlightedobject = $response->highlighting;

								        foreach ($response->response->docs as $doc) {

								            $x = $doc->id;

								            $highlighteddoc = $highlightedobject->$x;

								            $this->merge_highlight_field_values($doc, $highlighteddoc);

								        }

								    }


								    /**

								     * Adds the highlighting array values to docs array values.

								     *

								     * @throws \core_search\engine_exception

								     * @param object $doc containing the results.

								     * @param object $highlighteddoc containing the highlighted results values.

								     */

								    public function merge_highlight_field_values($doc, $highlighteddoc) {


								        foreach ($this->highlightfields as $field) {

								            if (!empty($doc->$field)) {


								                // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.

								                if (is_array($doc->{$field})) {

								                    throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);

								                }


								                if (!empty($highlighteddoc->$field)) {

								                    // Replace by the highlighted result.

								                    $doc->$field = reset($highlighteddoc->$field);

								                }

								            }

								        }

								    }


								    /**

								     * Filters the response on Moodle side.

								     *

								     * @param SolrObject $response Solr object containing the response return from solr server.

								     * @param int        $limit The maximum number of results to return. 0 for all.

								     * @param bool       $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.

								     * @return array $results containing final results to be displayed.

								     */

								    protected function process_response($response, $limit = 0, $skipaccesscheck = false) {

								        global $USER;


								        if (empty($response)) {

								            return array();

								        }


								        if (isset($response->grouped)) {

								            return $this->grouped_files_process_response($response, $limit);

								        }


								        $userid = $USER->id;

								        $noownerid = \core_search\manager::NO_OWNER_ID;


								        $numgranted = 0;


								        if (!$docs = $response->response->docs) {

								            return array();

								        }


								        $out = array();

								        if (!empty($response->response->numFound)) {

								            $this->add_highlight_content($response);


								            // Iterate through the results checking its availability and whether they are available for the user or not.

								            foreach ($docs as $key => $docdata) {

								                if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {

								                    // If owneruserid is set, no other user should be able to access this record.

								                    continue;

								                }


								                if (!$searcharea = $this->get_search_area($docdata->areaid)) {

								                    continue;

								                }


								                $docdata = $this->standarize_solr_obj($docdata);


								                if ($skipaccesscheck) {

								                    $access = \core_search\manager::ACCESS_GRANTED;

								                } else {

								                    $access = $searcharea->check_access($docdata['itemid']);

								                }

								                switch ($access) {

								                    case \core_search\manager::ACCESS_DELETED:

								                        $this->delete_by_id($docdata['id']);

								                        // Remove one from our processed and total counters, since we promptly deleted.

								                        $this->processeddocs--;

								                        $this->totalenginedocs--;

								                        break;

								                    case \core_search\manager::ACCESS_DENIED:

								                        $this->skippeddocs++;

								                        break;

								                    case \core_search\manager::ACCESS_GRANTED:

								                        $numgranted++;


								                        // Add the doc.

								                        $out[] = $this->to_document($searcharea, $docdata);

								                        break;

								                }


								                // Stop when we hit our limit.

								                if (!empty($limit) && count($out) >= $limit) {

								                    break;

								                }

								            }

								        }


								        return $out;

								    }


								    /**

								     * Processes grouped file results into documents, with attached matching files.

								     *

								     * @param SolrObject $response The response returned from solr server

								     * @param int        $limit The maximum number of results to return. 0 for all.

								     * @return array Final results to be displayed.

								     */

								    protected function grouped_files_process_response($response, $limit = 0) {

								        // If we can't find the grouping, or there are no matches in the grouping, return empty.

								        if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {

								            return array();

								        }


								        $numgranted = 0;

								        $orderedids = array();

								        $completedocs = array();

								        $incompletedocs = array();


								        $highlightingobj = $response->highlighting;


								        // Each group represents a "master document".

								        $groups = $response->grouped->solr_filegroupingid->groups;

								        foreach ($groups as $group) {

								            $groupid = $group->groupValue;

								            $groupdocs = $group->doclist->docs;

								            $firstdoc = reset($groupdocs);


								            if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {

								                // Well, this is a problem.

								                continue;

								            }


								            // Check for access.

								            $access = $searcharea->check_access($firstdoc->itemid);

								            switch ($access) {

								                case \core_search\manager::ACCESS_DELETED:

								                    // If deleted from Moodle, delete from index and then continue.

								                    $this->delete_by_id($firstdoc->id);

								                    // Remove one from our processed and total counters, since we promptly deleted.

								                    $this->processeddocs--;

								                    $this->totalenginedocs--;

								                    continue 2;

								                    break;

								                case \core_search\manager::ACCESS_DENIED:

								                    // This means we should just skip for the current user.

								                    $this->skippeddocs++;

								                    continue 2;

								                    break;

								            }

								            $numgranted++;


								            $maindoc = false;

								            $fileids = array();

								            // Seperate the main document and any files returned.

								            foreach ($groupdocs as $groupdoc) {

								                if ($groupdoc->id == $groupid) {

								                    $maindoc = $groupdoc;

								                } else if (isset($groupdoc->solr_fileid)) {

								                    $fileids[] = $groupdoc->solr_fileid;

								                }

								            }


								            // Store the id of this group, in order, for later merging.

								            $orderedids[] = $groupid;


								            if (!$maindoc) {

								                // We don't have the main doc, store what we know for later building.

								                $incompletedocs[$groupid] = $fileids;

								            } else {

								                if (isset($highlightingobj->$groupid)) {

								                    // Merge the highlighting for this doc.

								                    $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);

								                }

								                $docdata = $this->standarize_solr_obj($maindoc);

								                $doc = $this->to_document($searcharea, $docdata);

								                // Now we need to attach the result files to the doc.

								                foreach ($fileids as $fileid) {

								                    $doc->add_stored_file($fileid);

								                }

								                $completedocs[$groupid] = $doc;

								            }


								            if (!empty($limit) && $numgranted >= $limit) {

								                // We have hit the max results, we will just ignore the rest.

								                break;

								            }

								        }


								        $incompletedocs = $this->get_missing_docs($incompletedocs);


								        $out = array();

								        // Now merge the complete and incomplete documents, in results order.

								        foreach ($orderedids as $docid) {

								            if (isset($completedocs[$docid])) {

								                $out[] = $completedocs[$docid];

								            } else if (isset($incompletedocs[$docid])) {

								                $out[] = $incompletedocs[$docid];

								            }

								        }


								        return $out;

								    }


								    /**

								     * Retreive any missing main documents and attach provided files.

								     *

								     * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value

								     * associated to the key should be an array of stored_files or stored file ids to attach to the result document.

								     *

								     * Return array also indexed by document id.

								     *

								     * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.

								     * @return document[]

								     */

								    protected function get_missing_docs($missingdocs) {

								        if (empty($missingdocs)) {

								            return array();

								        }


								        $docids = array_keys($missingdocs);


								        // Build a custom query that will get all the missing documents.

								        $query = new \SolrQuery();

								        $this->set_query($query, '*');

								        $this->add_fields($query);

								        $query->setRows(count($docids));

								        $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');


								        $response = $this->get_query_response($query);

								        // We know the missing docs have already been checked for access, so don't recheck.

								        $results = $this->process_response($response, 0, true);


								        $out = array();

								        foreach ($results as $result) {

								            $resultid = $result->get('id');

								            if (!isset($missingdocs[$resultid])) {

								                // We got a result we didn't expect. Skip it.

								                continue;

								            }

								            // Attach the files.

								            foreach ($missingdocs[$resultid] as $filedoc) {

								                $result->add_stored_file($filedoc);

								            }

								            $out[$resultid] = $result;

								        }


								        return $out;

								    }


								    /**

								     * Returns a standard php array from a \SolrObject instance.

								     *

								     * @param \SolrObject $obj

								     * @return array The returned document as an array.

								     */

								    public function standarize_solr_obj(\SolrObject $obj) {

								        $properties = $obj->getPropertyNames();


								        $docdata = array();

								        foreach($properties as $name) {

								            // http://php.net/manual/en/solrobject.getpropertynames.php#98018.

								            $name = trim($name);

								            $docdata[$name] = $obj->offsetGet($name);

								        }

								        return $docdata;

								    }


								    /**

								     * Adds a document to the search engine.

								     *

								     * This does not commit to the search engine.

								     *

								     * @param document $document

								     * @param bool     $fileindexing True if file indexing is to be used

								     * @return bool

								     */

								    public function add_document($document, $fileindexing = false) {

								        $docdata = $document->export_for_engine();


								        if (!$this->add_solr_document($docdata)) {

								            return false;

								        }


								        if ($fileindexing) {

								            // This will take care of updating all attached files in the index.

								            $this->process_document_files($document);

								        }


								        return true;

								    }


								    /**

								     * Adds a text document to the search engine.

								     *

								     * @param array $doc

								     * @return bool

								     */

								    protected function add_solr_document($doc) {

								        $solrdoc = new \SolrInputDocument();

								        foreach ($doc as $field => $value) {

								            $solrdoc->addField($field, $value);

								        }


								        try {

								            $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);

								            return true;

								        } catch (\SolrClientException $e) {

								            debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);

								        } catch (\SolrServerException $e) {

								            // We only use the first line of the message, as it's a fully java stacktrace behind it.

								            $msg = strtok($e->getMessage(), "\n");

								            debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);

								        }


								        return false;

								    }


								    /**

								     * Index files attached to the docuemnt, ensuring the index matches the current document files.

								     *

								     * For documents that aren't known to be new, we check the index for existing files.

								     * - New files we will add.

								     * - Existing and unchanged files we will skip.

								     * - File that are in the index but not on the document will be deleted from the index.

								     * - Files that have changed will be re-indexed.

								     *

								     * @param document $document

								     */

								    protected function process_document_files($document) {

								        if (!$this->file_indexing_enabled()) {

								            return;

								        }


								        // Maximum rows to process at a time.

								        $rows = 500;


								        // Get the attached files.

								        $files = $document->get_files();


								        // If this isn't a new document, we need to check the exiting indexed files.

								        if (!$document->get_is_new()) {

								            // We do this progressively, so we can handle lots of files cleanly.

								            list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);

								            $count = 0;

								            $idstodelete = array();


								            do {

								                // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.

								                foreach ($indexedfiles as $indexedfile) {

								                    $fileid = $indexedfile->solr_fileid;


								                    if (isset($files[$fileid])) {

								                        // Check for changes that would mean we need to re-index the file. If so, just leave in $files.

								                        // Filelib does not guarantee time modified is updated, so we will check important values.

								                        if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {

								                            continue;

								                        }

								                        if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {

								                            continue;

								                        }

								                        if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {

								                            continue;

								                        }

								                        if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&

								                                $this->file_is_indexable($files[$fileid])) {

								                            // This means that the last time we indexed this file, filtering blocked it.

								                            // Current settings say it is indexable, so we will allow it to be indexed.

								                            continue;

								                        }


								                        // If the file is already indexed, we can just remove it from the files array and skip it.

								                        unset($files[$fileid]);

								                    } else {

								                        // This means we have found a file that is no longer attached, so we need to delete from the index.

								                        // We do it later, since this is progressive, and it could reorder results.

								                        $idstodelete[] = $indexedfile->id;

								                    }

								                }

								                $count += $rows;


								                if ($count < $numfound) {

								                    // If we haven't hit the total count yet, fetch the next batch.

								                    list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);

								                }


								            } while ($count < $numfound);


								            // Delete files that are no longer attached.

								            foreach ($idstodelete as $id) {

								                // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.

								                $this->get_search_client()->deleteById($id);

								            }

								        }


								        // Now we can actually index all the remaining files.

								        foreach ($files as $file) {

								            $this->add_stored_file($document, $file);

								        }

								    }


								    /**

								     * Get the currently indexed files for a particular document, returns the total count, and a subset of files.

								     *

								     * @param document $document

								     * @param int      $start The row to start the results on. Zero indexed.

								     * @param int      $rows The number of rows to fetch

								     * @return array   A two element array, the first is the total number of availble results, the second is an array

								     *                 of documents for the current request.

								     */

								    protected function get_indexed_files($document, $start = 0, $rows = 500) {

								        // Build a custom query that will get any document files that are in our solr_filegroupingid.

								        $query = new \SolrQuery();


								        // We want to get all file records tied to a document.

								        // For efficiency, we are building our own, stripped down, query.

								        $query->setQuery('*');

								        $query->setRows($rows);

								        $query->setStart($start);

								        // We want a consistent sorting.

								        $query->addSortField('id');


								        // We only want the bare minimum of fields.

								        $query->addField('id');

								        $query->addField('modified');

								        $query->addField('title');

								        $query->addField('solr_fileid');

								        $query->addField('solr_filecontenthash');

								        $query->addField('solr_fileindexstatus');


								        $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');

								        $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);


								        $response = $this->get_query_response($query);

								        if (empty($response->response->numFound)) {

								            return array(0, array());

								        }


								        return array($response->response->numFound, $this->convert_file_results($response));

								    }


								    /**

								     * A very lightweight handler for getting information about already indexed files from a Solr response.

								     *

								     * @param SolrObject $responsedoc A Solr response document

								     * @return stdClass[] An array of objects that contain the basic information for file processing.

								     */

								    protected function convert_file_results($responsedoc) {

								        if (!$docs = $responsedoc->response->docs) {

								            return array();

								        }


								        $out = array();


								        foreach ($docs as $doc) {

								            // Copy the bare minimim needed info.

								            $result = new \stdClass();

								            $result->id = $doc->id;

								            $result->modified = document::import_time_from_engine($doc->modified);

								            $result->title = $doc->title;

								            $result->solr_fileid = $doc->solr_fileid;

								            $result->solr_filecontenthash = $doc->solr_filecontenthash;

								            $result->solr_fileindexstatus = $doc->solr_fileindexstatus;

								            $out[] = $result;

								        }


								        return $out;

								    }


								    /**

								     * Adds a file to the search engine.

								     *

								     * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.

								     * Tika has much better content type detection than Moodle, and we will have many more doc failures

								     * if we try to send mime types.

								     *

								     * @param document $document

								     * @param \stored_file $storedfile

								     * @return void

								     */

								    protected function add_stored_file($document, $storedfile) {

								        $filedoc = $document->export_file_for_engine($storedfile);


								        if (!$this->file_is_indexable($storedfile)) {

								            // For files that we don't consider indexable, we will still place a reference in the search engine.

								            $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;

								            $this->add_solr_document($filedoc);

								            return;

								        }


								        $curl = $this->get_curl_object();


								        $url = $this->get_connection_url('/update/extract');


								        // Return results as XML.

								        $url->param('wt', 'xml');


								        // This will prevent solr from automatically making fields for every tika output.

								        $url->param('uprefix', 'ignored_');


								        // Control how content is captured. This will keep our file content clean of non-important metadata.

								        $url->param('captureAttr', 'true');

								        // Move the content to a field for indexing.

								        $url->param('fmap.content', 'solr_filecontent');


								        // These are common fields that matches the standard *_point dynamic field and causes an error.

								        $url->param('fmap.media_white_point', 'ignored_mwp');

								        $url->param('fmap.media_black_point', 'ignored_mbp');


								        // Copy each key to the url with literal.

								        // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.

								        foreach ($filedoc as $key => $value) {

								            // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.

								            $url->param('fmap.'.$key, 'ignored_'.$key);

								            // Place data in a tmp field.

								            $url->param('literal.mdltmp_'.$key, $value);

								            // Then move to the final field.

								            $url->param('fmap.mdltmp_'.$key, $key);

								        }


								        // This sets the true filename for Tika.

								        $url->param('resource.name', $storedfile->get_filename());


								        // A giant block of code that is really just error checking around the curl request.

								        try {

								            // Now actually do the request.

								            $result = $curl->post($url->out(false), array('myfile' => $storedfile));


								            $code = $curl->get_errno();

								            $info = $curl->get_info();


								            // Now error handling. It is just informational, since we aren't tracking per file/doc results.

								            if ($code != 0) {

								                // This means an internal cURL error occurred error is in result.

								                $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';

								                debugging($message, DEBUG_DEVELOPER);

								            } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {

								                // Unexpected HTTP response code.

								                $message = 'Error while indexing file with document id '.$filedoc['id'];

								                // Try to get error message out of msg or title if it exists.

								                if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {

								                    $message .= ': '.$matches[1];

								                } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {

								                    $message .= ': '.$matches[1];

								                }

								                // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.

								                if (CLI_SCRIPT && !PHPUNIT_TEST) {

								                    mtrace($message);

								                }

								            } else {

								                // Check for the expected status field.

								                if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {

								                    // Now check for the expected status of 0, if not, error.

								                    if ((int)$matches[1] !== 0) {

								                        $message = 'Unexpected Solr status code '.(int)$matches[1];

								                        $message .= ' while indexing file with document id '.$filedoc['id'].'.';

								                        debugging($message, DEBUG_DEVELOPER);

								                    } else {

								                        // The document was successfully indexed.

								                        return;

								                    }

								                } else {

								                    // We received an unprocessable response.

								                    $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';

								                    $message .= strtok($result, "\n");

								                    debugging($message, DEBUG_DEVELOPER);

								                }

								            }

								        } catch (\Exception $e) {

								            // There was an error, but we are not tracking per-file success, so we just continue on.

								            debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);

								        }


								        // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.

								        $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;

								        $this->add_solr_document($filedoc);

								    }


								    /**

								     * Checks to see if a passed file is indexable.

								     *

								     * @param \stored_file $file The file to check

								     * @return bool True if the file can be indexed

								     */

								    protected function file_is_indexable($file) {

								        if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {

								            // The file is too big to index.

								            return false;

								        }


								        $mime = $file->get_mimetype();


								        if ($mime == 'application/vnd.moodle.backup') {

								            // We don't index Moodle backup files. There is nothing usefully indexable in them.

								            return false;

								        }


								        return true;

								    }


								    /**

								     * Commits all pending changes.

								     *

								     * @return void

								     */

								    protected function commit() {

								        $this->get_search_client()->commit();

								    }


								    /**

								     * Do any area cleanup needed, and do anything to confirm contents.

								     *

								     * Return false to prevent the search area completed time and stats from being updated.

								     *

								     * @param \core_search\base $searcharea The search area that was complete

								     * @param int $numdocs The number of documents that were added to the index

								     * @param bool $fullindex True if a full index is being performed

								     * @return bool True means that data is considered indexed

								     */

								    public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {

								        $this->commit();


								        return true;

								    }


								    /**

								     * Return true if file indexing is supported and enabled. False otherwise.

								     *

								     * @return bool

								     */

								    public function file_indexing_enabled() {

								        return (bool)$this->config->fileindexing;

								    }


								    /**

								     * Defragments the index.

								     *

								     * @return void

								     */

								    public function optimize() {

								        $this->get_search_client()->optimize(1, true, false);

								    }


								    /**

								     * Deletes the specified document.

								     *

								     * @param string $id The document id to delete

								     * @return void

								     */

								    public function delete_by_id($id) {

								        // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.

								        $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);

								        $this->commit();

								    }


								    /**

								     * Delete all area's documents.

								     *

								     * @param string $areaid

								     * @return void

								     */

								    public function delete($areaid = null) {

								        if ($areaid) {

								            $this->get_search_client()->deleteByQuery('areaid:' . $areaid);

								        } else {

								            $this->get_search_client()->deleteByQuery('*:*');

								        }

								        $this->commit();

								    }


								    /**

								     * Pings the Solr server using search_solr config

								     *

								     * @return true|string Returns true if all good or an error string.

								     */

								    public function is_server_ready() {


								        $configured = $this->is_server_configured();

								        if ($configured !== true) {

								            return $configured;

								        }


								        // As part of the above we have already checked that we can contact the server. For pages

								        // where performance is important, we skip doing a full schema check as well.

								        if ($this->should_skip_schema_check()) {

								            return true;

								        }


								        // Update schema if required/possible.

								        $schemalatest = $this->check_latest_schema();

								        if ($schemalatest !== true) {

								            return $schemalatest;

								        }


								        // Check that the schema is already set up.

								        try {

								            $schema = new \search_solr\schema();

								            $schema->validate_setup();

								        } catch (\moodle_exception $e) {

								            return $e->getMessage();

								        }


								        return true;

								    }


								    /**

								     * Is the solr server properly configured?.

								     *

								     * @return true|string Returns true if all good or an error string.

								     */

								    public function is_server_configured() {


								        if (empty($this->config->server_hostname) || empty($this->config->indexname)) {

								            return 'No solr configuration found';

								        }


								        if (!$client = $this->get_search_client(false)) {

								            return get_string('engineserverstatus', 'search');

								        }


								        try {

								            if ($this->get_solr_major_version() < 4) {

								                // Minimum solr 4.0.

								                return get_string('minimumsolr4', 'search_solr');

								            }

								        } catch (\SolrClientException $ex) {

								            debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);

								            return get_string('engineserverstatus', 'search');

								        } catch (\SolrServerException $ex) {

								            debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);

								            return get_string('engineserverstatus', 'search');

								        }


								        return true;

								    }


								    /**

								     * Returns the solr server major version.

								     *

								     * @return int

								     */

								    public function get_solr_major_version() {

								        if ($this->solrmajorversion !== null) {

								            return $this->solrmajorversion;

								        }


								        // We should really ping first the server to see if the specified indexname is valid but

								        // we want to minimise solr server requests as they are expensive. system() emits a warning

								        // if it can not connect to the configured index in the configured server.

								        $systemdata = @$this->get_search_client()->system();

								        $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');

								        $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));


								        return $this->solrmajorversion;

								    }


								    /**

								     * Checks if the PHP Solr extension is available.

								     *

								     * @return bool

								     */

								    public function is_installed() {

								        return function_exists('solr_get_version');

								    }


								    /**

								     * Returns the solr client instance.

								     *

								     * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.

								     *

								     * @throws \core_search\engine_exception

								     * @param bool $triggerexception

								     * @return \SolrClient

								     */

								    protected function get_search_client($triggerexception = true) {

								        global $CFG;


								        // Type comparison as it is set to false if not available.

								        if ($this->client !== null) {

								            return $this->client;

								        }


								        $options = array(

								            'hostname' => $this->config->server_hostname,

								            'path'     => '/solr/' . $this->config->indexname,

								            'login'    => !empty($this->config->server_username) ? $this->config->server_username : '',

								            'password' => !empty($this->config->server_password) ? $this->config->server_password : '',

								            'port'     => !empty($this->config->server_port) ? $this->config->server_port : '',

								            'secure' => !empty($this->config->secure) ? true : false,

								            'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',

								            'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',

								            'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',

								            'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',

								            'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',

								            'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'

								        );


								        if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {

								            $options['proxy_host'] = $CFG->proxyhost;

								            if (!empty($CFG->proxyport)) {

								                $options['proxy_port'] = $CFG->proxyport;

								            }

								            if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {

								                $options['proxy_login'] = $CFG->proxyuser;

								                $options['proxy_password'] = $CFG->proxypassword;

								            }

								        }


								        if (!class_exists('\SolrClient')) {

								            throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');

								        }


								        $client = new \SolrClient($options);


								        if ($client === false && $triggerexception) {

								            throw new \core_search\engine_exception('engineserverstatus', 'search');

								        }


								        if ($this->cacheclient) {

								            $this->client = $client;

								        }


								        return $client;

								    }


								    /**

								     * Returns a curl object for conntecting to solr.

								     *

								     * @return \curl

								     */

								    public function get_curl_object() {

								        if (!is_null($this->curl)) {

								            return $this->curl;

								        }


								        // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.

								        $this->curl = new \curl(['ignoresecurity' => true]);


								        $options = array();

								        // Build the SSL options. Based on pecl-solr and general testing.

								        if (!empty($this->config->secure)) {

								            if (!empty($this->config->ssl_cert)) {

								                $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;

								                $options['CURLOPT_SSLCERTTYPE'] = 'PEM';

								            }


								            if (!empty($this->config->ssl_key)) {

								                $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;

								                $options['CURLOPT_SSLKEYTYPE'] = 'PEM';

								            }


								            if (!empty($this->config->ssl_keypassword)) {

								                $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;

								            }


								            if (!empty($this->config->ssl_cainfo)) {

								                $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;

								            }


								            if (!empty($this->config->ssl_capath)) {

								                $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;

								            }

								        }


								        // Set timeout as for Solr client.

								        $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';


								        $this->curl->setopt($options);


								        if (!empty($this->config->server_username) && !empty($this->config->server_password)) {

								            $authorization = $this->config->server_username . ':' . $this->config->server_password;

								            $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));

								        }


								        return $this->curl;

								    }


								    /**

								     * Return a Moodle url object for the server connection.

								     *

								     * @param string $path The solr path to append.

								     * @return \moodle_url

								     */

								    public function get_connection_url($path) {

								        // Must use the proper protocol, or SSL will fail.

								        $protocol = !empty($this->config->secure) ? 'https' : 'http';

								        $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');

								        if (!empty($this->config->server_port)) {

								            $url .= ':' . $this->config->server_port;

								        }

								        $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');


								        return new \moodle_url($url);

								    }


								    /**

								     * Solr includes group support in the execute_query function.

								     *

								     * @return bool True

								     */

								    public function supports_group_filtering() {

								        return true;

								    }


								    protected function update_schema($oldversion, $newversion) {

								        // Construct schema.

								        $schema = new schema();

								        $cansetup = $schema->can_setup_server();

								        if ($cansetup !== true) {

								            return $cansetup;

								        }


								        switch ($newversion) {

								            // This version just requires a setup call to add new fields.

								            case 2017091700:

								                $setup = true;

								                break;


								            // If we don't know about the schema version we might not have implemented the

								            // change correctly, so return.

								            default:

								                return get_string('schemaversionunknown', 'search');

								        }


								        if ($setup) {

								            $schema->setup();

								        }


								        return true;

								    }


								    /**

								     * Solr supports sort by location within course contexts or below.

								     *

								     * @param \context $context Context that the user requested search from

								     * @return array Array from order name => display text

								     */

								    public function get_supported_orders(\context $context) {

								        $orders = parent::get_supported_orders($context);


								        // If not within a course, no other kind of sorting supported.

								        $coursecontext = $context->get_course_context(false);

								        if ($coursecontext) {

								            // Within a course or activity/block, support sort by location.

								            $orders['location'] = get_string('order_location', 'search',

								                    $context->get_context_name());

								        }


								        return $orders;

								    }


								    /**

								     * Solr supports search by user id.

								     *

								     * @return bool True

								     */

								    public function supports_users() {

								        return true;

								    }

								}