moodle/filter/urltolink/filter.php


								<?php


								// This file is part of Moodle - http://moodle.org/

								//

								// Moodle is free software: you can redistribute it and/or modify

								// it under the terms of the GNU General Public License as published by

								// the Free Software Foundation, either version 3 of the License, or

								// (at your option) any later version.

								//

								// Moodle is distributed in the hope that it will be useful,

								// but WITHOUT ANY WARRANTY; without even the implied warranty of

								// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								// GNU General Public License for more details.

								//

								// You should have received a copy of the GNU General Public License

								// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.


								/**

								 * Filter converting URLs in the text to HTML links

								 *

								 * @package    filter

								 * @subpackage urltolink

								 * @copyright  2010 David Mudrak <david@moodle.com>

								 * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later

								 */


								defined('MOODLE_INTERNAL') || die();


								class filter_urltolink extends moodle_text_filter {


								    /**

								     * @var array global configuration for this filter

								     *

								     * This might be eventually moved into parent class if we found it

								     * useful for other filters, too.

								     */

								    protected static $globalconfig;


								    /**

								     * Apply the filter to the text

								     *

								     * @see filter_manager::apply_filter_chain()

								     * @param string $text to be processed by the text

								     * @param array $options filter options

								     * @return string text after processing

								     */

								    public function filter($text, array $options = array()) {

								        if (!isset($options['originalformat'])) {

								            // if the format is not specified, we are probably called by {@see format_string()}

								            // in that case, it would be dangerous to replace URL with the link because it could

								            // be stripped. therefore, we do nothing

								            return $text;

								        }

								        if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {

								            $this->convert_urls_into_links($text);

								        }

								        return $text;

								    }


								    ////////////////////////////////////////////////////////////////////////////

								    // internal implementation starts here

								    ////////////////////////////////////////////////////////////////////////////


								    /**

								     * Given some text this function converts any URLs it finds into HTML links

								     *

								     * @param string $text Passed in by reference. The string to be searched for urls.

								     */

								    protected function convert_urls_into_links(&$text) {

								        //I've added img tags to this list of tags to ignore.

								        //See MDL-21168 for more info. A better way to ignore tags whether or not

								        //they are escaped partially or completely would be desirable. For example:

								        //<a href="blah">

								        //&lt;a href="blah"&gt;

								        //&lt;a href="blah">

								        $filterignoretagsopen  = array('<a\s[^>]+?>', '<span[^>]+?class="nolink"[^>]*?>');

								        $filterignoretagsclose = array('</a>', '</span>');

								        $ignoretags = [];

								        filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);


								        // Check if we support unicode modifiers in regular expressions. Cache it.

								        // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode

								        // chars are going to arrive to URLs officially really soon (2010?)

								        // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/

								        // Various ideas from: http://alanstorm.com/url_regex_explained

								        // Unicode check, negative assertion and other bits from Moodle.

								        static $unicoderegexp;

								        if (!isset($unicoderegexp)) {

								            $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,

								        }


								        // TODO MDL-21296 - use of unicode modifiers may cause a timeout

								        $urlstart = '(?:http(s)?://|(?<!://)(www\.))';

								        $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';

								        $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';

								        $port = '(?::\d*)';

								        $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';

								        $path = "(?:/$pathchar*)*";

								        $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';

								        $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';


								        // Lookbehind assertions.

								        // Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.

								        $lookbehindend = "(?<![]),.;])";


								        $regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .

								                "($port?$path$querystring?$fragment?)$lookbehindend";

								        if ($unicoderegexp) {

								            $regex = '#' . $regex . '#ui';

								        } else {

								            $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';

								        }


								        // Locate any HTML tags.

								        $matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);


								        // Iterate through the tokenized text to handle chunks (html and content).

								        foreach ($matches as $idx => $chunk) {

								            // Nothing to do. We skip completely any html chunk.

								            if (strpos(trim($chunk), '<') === 0) {

								                continue;

								            }


								            // Nothing to do. We skip any content chunk having any of these attributes.

								            if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {

								                continue;

								            }


								            // Arrived here, we want to process every word in this chunk.

								            $text = $chunk;

								            $words = explode(' ', $text);


								            foreach ($words as $idx2 => $word) {

								                // ReDoS protection. Stop processing if a word is too large.

								                if (strlen($word) < 4096) {

								                    $words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);

								                }

								            }

								            $text = implode(' ', $words);


								            // Copy the result back to the array.

								            $matches[$idx] = $text;

								        }


								        $text = implode('', $matches);


								        if (!empty($ignoretags)) {

								            $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.

								            $text = str_replace(array_keys($ignoretags),$ignoretags,$text);

								        }


								        if (get_config('filter_urltolink', 'embedimages')) {

								            // now try to inject the images, this code was originally in the mediapluing filter

								            // this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted

								            // to URLs which in turn change to real images

								            $search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';

								            $text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);

								        }

								    }

								}


								/**

								 * Change links to images into embedded images.

								 *

								 * This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.

								 *

								 * @param  $link

								 * @return string

								 */

								function filter_urltolink_img_callback($link) {

								    if ($link[1] !== $link[3]) {

								        // this is not a link created by this filter, because the url does not match the text

								        return $link[0];

								    }

								    return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';

								}