Changeset View
Changeset View
Standalone View
Standalone View
src/infrastructure/markup/markuprule/PhutilRemarkupHyperlinkRule.php
<?php | <?php | ||||
final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule { | final class PhutilRemarkupHyperlinkRule extends PhutilRemarkupRule { | ||||
const KEY_HYPERLINKS = 'hyperlinks'; | const KEY_HYPERLINKS = 'hyperlinks'; | ||||
public function getPriority() { | public function getPriority() { | ||||
return 400.0; | return 400.0; | ||||
} | } | ||||
public function apply($text) { | public function apply($text) { | ||||
static $angle_pattern; | static $angle_pattern; | ||||
static $curly_pattern; | static $curly_pattern; | ||||
static $bare_pattern; | static $bare_pattern; | ||||
if ($angle_pattern === null) { | if ($angle_pattern === null) { | ||||
// See T13608. Limit protocol matches to 32 characters to improve the | // See T13608. A previous version of this code matched bare URIs | ||||
// performance of the "<protocol>://" pattern, which can take a very long | // starting with "\w{3,}", which can take a very long time to match | ||||
// time to match against long inputs if the maximum length of a protocol | // against long inputs. | ||||
// sequence is unrestricted. | // | ||||
// Use a protocol length limit in all patterns for general sanity, | |||||
// and a negative lookbehind in the bare pattern to avoid explosive | |||||
// complexity during expression evaluation. | |||||
$protocol_fragment = '\w{3,32}'; | $protocol_fragment = '\w{3,32}'; | ||||
$uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+'; | $uri_fragment = '[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+'; | ||||
$angle_pattern = sprintf( | $angle_pattern = sprintf( | ||||
'(<(%s://%s?)>)', | '(<(%s://%s?)>)', | ||||
$protocol_fragment, | $protocol_fragment, | ||||
$uri_fragment); | $uri_fragment); | ||||
$curly_pattern = sprintf( | $curly_pattern = sprintf( | ||||
'({(%s://%s?)})', | '({(%s://%s?)})', | ||||
$protocol_fragment, | $protocol_fragment, | ||||
$uri_fragment); | $uri_fragment); | ||||
$bare_pattern = sprintf( | $bare_pattern = sprintf( | ||||
'(%s://%s)', | '((?<!\w)%s://%s)', | ||||
$protocol_fragment, | $protocol_fragment, | ||||
$uri_fragment); | $uri_fragment); | ||||
} | } | ||||
// Hyperlinks with explicit "<>" around them get linked exactly, without | // Hyperlinks with explicit "<>" around them get linked exactly, without | ||||
// the "<>". Angle brackets are basically special and mean "this is a URL | // the "<>". Angle brackets are basically special and mean "this is a URL | ||||
// with weird characters". This is assumed to be reasonable because they | // with weird characters". This is assumed to be reasonable because they | ||||
// don't appear in most normal text or most normal URLs. | // don't appear in most normal text or most normal URLs. | ||||
▲ Show 20 Lines • Show All 220 Lines • Show Last 20 Lines |