You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

177 lines
7.3 KiB

<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Filter converting URLs in the text to HTML links
*
* @package filter
* @subpackage urltolink
* @copyright 2010 David Mudrak <david@moodle.com>
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
class filter_urltolink extends moodle_text_filter {
/**
* @var array global configuration for this filter
*
* This might be eventually moved into parent class if we found it
* useful for other filters, too.
*/
protected static $globalconfig;
/**
* Apply the filter to the text
*
* @see filter_manager::apply_filter_chain()
* @param string $text to be processed by the text
* @param array $options filter options
* @return string text after processing
*/
public function filter($text, array $options = array()) {
if (!isset($options['originalformat'])) {
// if the format is not specified, we are probably called by {@see format_string()}
// in that case, it would be dangerous to replace URL with the link because it could
// be stripped. therefore, we do nothing
return $text;
}
if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {
$this->convert_urls_into_links($text);
}
return $text;
}
////////////////////////////////////////////////////////////////////////////
// internal implementation starts here
////////////////////////////////////////////////////////////////////////////
/**
* Given some text this function converts any URLs it finds into HTML links
*
* @param string $text Passed in by reference. The string to be searched for urls.
*/
protected function convert_urls_into_links(&$text) {
//I've added img tags to this list of tags to ignore.
//See MDL-21168 for more info. A better way to ignore tags whether or not
//they are escaped partially or completely would be desirable. For example:
//<a href="blah">
//&lt;a href="blah"&gt;
//&lt;a href="blah">
$filterignoretagsopen = array('<a\s[^>]+?>', '<span[^>]+?class="nolink"[^>]*?>');
$filterignoretagsclose = array('</a>', '</span>');
$ignoretags = [];
filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
// Check if we support unicode modifiers in regular expressions. Cache it.
// TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
// chars are going to arrive to URLs officially really soon (2010?)
// Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
// Various ideas from: http://alanstorm.com/url_regex_explained
// Unicode check, negative assertion and other bits from Moodle.
static $unicoderegexp;
if (!isset($unicoderegexp)) {
$unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,
}
// TODO MDL-21296 - use of unicode modifiers may cause a timeout
$urlstart = '(?:http(s)?://|(?<!://)(www\.))';
$domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
$numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
$port = '(?::\d*)';
$pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';
$path = "(?:/$pathchar*)*";
$querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
$fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
// Lookbehind assertions.
// Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.
$lookbehindend = "(?<![]),.;])";
$regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
"($port?$path$querystring?$fragment?)$lookbehindend";
if ($unicoderegexp) {
$regex = '#' . $regex . '#ui';
} else {
$regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';
}
// Locate any HTML tags.
$matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
// Iterate through the tokenized text to handle chunks (html and content).
foreach ($matches as $idx => $chunk) {
// Nothing to do. We skip completely any html chunk.
if (strpos(trim($chunk), '<') === 0) {
continue;
}
// Nothing to do. We skip any content chunk having any of these attributes.
if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {
continue;
}
// Arrived here, we want to process every word in this chunk.
$text = $chunk;
$words = explode(' ', $text);
foreach ($words as $idx2 => $word) {
// ReDoS protection. Stop processing if a word is too large.
if (strlen($word) < 4096) {
$words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);
}
}
$text = implode(' ', $words);
// Copy the result back to the array.
$matches[$idx] = $text;
}
$text = implode('', $matches);
if (!empty($ignoretags)) {
$ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
$text = str_replace(array_keys($ignoretags),$ignoretags,$text);
}
if (get_config('filter_urltolink', 'embedimages')) {
// now try to inject the images, this code was originally in the mediapluing filter
// this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted
// to URLs which in turn change to real images
$search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';
$text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);
}
}
}
/**
* Change links to images into embedded images.
*
* This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.
*
* @param $link
* @return string
*/
function filter_urltolink_img_callback($link) {
if ($link[1] !== $link[3]) {
// this is not a link created by this filter, because the url does not match the text
return $link[0];
}
return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';
}