diff --git a/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php index 8f53ecc..74932d9 100644 --- a/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php +++ b/src/markup/engine/__tests__/PhutilRemarkupEngineTestCase.php @@ -1,115 +1,121 @@ markupText($root.$file); } } private function markupText($markup_file) { $contents = Filesystem::readFile($markup_file); $file = basename($markup_file); $parts = explode("\n~~~~~~~~~~\n", $contents); $this->assertEqual(3, count($parts), $markup_file); list($input_remarkup, $expected_output, $expected_text) = $parts; $engine = $this->buildNewTestEngine(); switch ($file) { case 'raw-escape.txt': // NOTE: Here, we want to test PhutilRemarkupEscapeRemarkupRule and // PhutilRemarkupBlockStorage, which are triggered by "\1". In the // test, "~" is used as a placeholder for "\1" since it's hard to type // "\1". $input_remarkup = str_replace('~', "\1", $input_remarkup); $expected_output = str_replace('~', "\1", $expected_output); $expected_text = str_replace('~', "\1", $expected_text); break; case 'toc.txt': $engine->setConfig('header.generate-toc', true); break; + case 'link-same-window.txt': + $engine->setConfig('uri.same-window', true); + break; + case 'link-square.txt': + $engine->setConfig('uri.base', 'http://www.example.com/'); + $engine->setConfig('uri.here', 'http://www.example.com/page/'); + break; } $actual_output = (string)$engine->markupText($input_remarkup); switch ($file) { case 'toc.txt': $table_of_contents = PhutilRemarkupHeaderBlockRule::renderTableOfContents($engine); $actual_output = $table_of_contents."\n\n".$actual_output; break; } $this->assertEqual( $expected_output, $actual_output, pht("Failed to markup HTML in file '%s'.", $file)); $engine->setMode(PhutilRemarkupEngine::MODE_TEXT); $actual_output = (string)$engine->markupText($input_remarkup); $this->assertEqual( $expected_text, $actual_output, pht("Failed to markup text in file '%s'.", $file)); } private function buildNewTestEngine() { $engine = new PhutilRemarkupEngine(); - $engine->setConfig('uri.prefix', 'http://www.example.com/'); $engine->setConfig( 'uri.allowed-protocols', array( 'http' => true, 'mailto' => true, 'tel' => true, )); $rules = array(); $rules[] = new PhutilRemarkupEscapeRemarkupRule(); $rules[] = new PhutilRemarkupMonospaceRule(); $rules[] = new PhutilRemarkupDocumentLinkRule(); $rules[] = new PhutilRemarkupHyperlinkRule(); $rules[] = new PhutilRemarkupBoldRule(); $rules[] = new PhutilRemarkupItalicRule(); $rules[] = new PhutilRemarkupDelRule(); $rules[] = new PhutilRemarkupUnderlineRule(); $rules[] = new PhutilRemarkupHighlightRule(); $blocks = array(); $blocks[] = new PhutilRemarkupQuotesBlockRule(); $blocks[] = new PhutilRemarkupReplyBlockRule(); $blocks[] = new PhutilRemarkupHeaderBlockRule(); $blocks[] = new PhutilRemarkupHorizontalRuleBlockRule(); $blocks[] = new PhutilRemarkupCodeBlockRule(); $blocks[] = new PhutilRemarkupLiteralBlockRule(); $blocks[] = new PhutilRemarkupNoteBlockRule(); $blocks[] = new PhutilRemarkupTableBlockRule(); $blocks[] = new PhutilRemarkupSimpleTableBlockRule(); $blocks[] = new PhutilRemarkupDefaultBlockRule(); $blocks[] = new PhutilRemarkupListBlockRule(); $blocks[] = new PhutilRemarkupInterpreterBlockRule(); foreach ($blocks as $block) { if (!($block instanceof PhutilRemarkupCodeBlockRule)) { $block->setMarkupRules($rules); } } $engine->setBlockRules($blocks); return $engine; } } diff --git a/src/markup/engine/__tests__/remarkup/link-same-window.txt b/src/markup/engine/__tests__/remarkup/link-same-window.txt new file mode 100644 index 0000000..937c83f --- /dev/null +++ b/src/markup/engine/__tests__/remarkup/link-same-window.txt @@ -0,0 +1,11 @@ +[[http://www.example.com/]] + +http://www.example.com/ +~~~~~~~~~~ +

http://www.example.com/

+ +

http://www.example.com/

+~~~~~~~~~~ +http://www.example.com/ + +http://www.example.com/ diff --git a/src/markup/engine/__tests__/remarkup/link-square.txt b/src/markup/engine/__tests__/remarkup/link-square.txt index 91d85f3..2b24f3a 100644 --- a/src/markup/engine/__tests__/remarkup/link-square.txt +++ b/src/markup/engine/__tests__/remarkup/link-square.txt @@ -1,29 +1,29 @@ [[http://www.example.com/]] [[http://www.example.com/ | example.com]] [[/]] [[#anchor]] [[#anchor | Anchors ]] ~~~~~~~~~~

http://www.example.com/

example.com

-

/

+

http://www.example.com/

-

#anchor

+

http://www.example.com/page/#anchor

-

Anchors

+

Anchors

~~~~~~~~~~ http://www.example.com/ example.com http://www.example.com/ -http://www.example.com/#anchor +http://www.example.com/page/#anchor -Anchors +Anchors diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php index ed85d72..f632b98 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupDocumentLinkRule.php @@ -1,132 +1,137 @@ getEngine()->isTextMode()) { - $text = $link; - if (strncmp($link, '/', 1) == 0 || strncmp($link, '#', 1) == 0) { - $base = $this->getEngine()->getConfig('uri.prefix'); - if (strncmp($link, '/', 1) == 0) { - $base = rtrim($base, '/'); - } - $text = $base.$text; - } + $engine = $this->getEngine(); + + $is_anchor = false; + if (strncmp($link, '/', 1) == 0) { + $base = $engine->getConfig('uri.base'); + $base = rtrim($base, '/'); + $link = $base.$link; + } else if (strncmp($link, '#', 1) == 0) { + $here = $engine->getConfig('uri.here'); + $link = $here.$link; + + $is_anchor = true; + } + if ($engine->isTextMode()) { // If present, strip off "mailto:" or "tel:". - $text = preg_replace('/^(?:mailto|tel):/', '', $text); + $link = preg_replace('/^(?:mailto|tel):/', '', $link); - if ($link == $name) { - return $text; + if (!strlen($name)) { + return $link; } - return $name.' <'.$text.'>'; - } else if ($this->getEngine()->isHTMLMailMode()) { - if (strncmp($link, '/', 1) == 0 || strncmp($link, '#', 1) == 0) { - $base = $this->getEngine()->getConfig('uri.base'); - $text = $link; - if (strncmp($link, '/', 1) == 0) { - $base = rtrim($base, '/'); - } - $link = $base.$text; - } - } - // By default, we open links in a new window or tab. For anchors on the same - // page, just jump normally. - $target = '_blank'; - if (strncmp($link, '#', 1) == 0) { - $target = null; + return $name.' <'.$link.'>'; } - $name = preg_replace('/^(?:mailto|tel):/', '', $name); + if (!strlen($name)) { + $name = $link; + $name = preg_replace('/^(?:mailto|tel):/', '', $name); + } - if ($this->getEngine()->getState('toc')) { + if ($engine->getState('toc')) { return $name; + } + + $same_window = $engine->getConfig('uri.same-window', false); + if ($same_window) { + $target = null; } else { - return phutil_tag( - 'a', - array( - 'href' => $link, - 'class' => 'remarkup-link', - 'target' => $target, - ), - $name); + $target = '_blank'; } + + // For anchors on the same page, always stay here. + if ($is_anchor) { + $target = null; + } + + return phutil_tag( + 'a', + array( + 'href' => $link, + 'class' => 'remarkup-link', + 'target' => $target, + ), + $name); } public function markupAlternateLink(array $matches) { $uri = trim($matches[2]); // NOTE: We apply some special rules to avoid false positives here. The // major concern is that we do not want to convert `x[0][1](y)` in a // discussion about C source code into a link. To this end, we: // // - Don't match at word boundaries; // - require the URI to contain a "/" character or "@" character; and // - reject URIs which being with a quote character. if ($uri[0] == '"' || $uri[0] == "'" || $uri[0] == '`') { return $matches[0]; } if (strpos($uri, '/') === false && strpos($uri, '@') === false && strncmp($uri, 'tel:', 4)) { return $matches[0]; } return $this->markupDocumentLink( array( $matches[0], $matches[2], $matches[1], )); } public function markupDocumentLink(array $matches) { $uri = trim($matches[1]); - $name = trim(idx($matches, 2, $uri)); + $name = trim(idx($matches, 2)); // If whatever is being linked to begins with "/" or "#", or has "://", // or is "mailto:" or "tel:", treat it as a URI instead of a wiki page. $is_uri = preg_match('@(^/)|(://)|(^#)|(^(?:mailto|tel):)@', $uri); if ($is_uri && strncmp('/', $uri, 1) && strncmp('#', $uri, 1)) { $protocols = $this->getEngine()->getConfig( 'uri.allowed-protocols', array()); $protocol = id(new PhutilURI($uri))->getProtocol(); if (!idx($protocols, $protocol)) { // Don't treat this as a URI if it's not an allowed protocol. $is_uri = false; } } if (!$is_uri) { return $matches[0]; } return $this->getEngine()->storeText($this->renderHyperlink($uri, $name)); } } diff --git a/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php index b9c51e5..6147dba 100644 --- a/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php +++ b/src/markup/engine/remarkup/markuprule/PhutilRemarkupHyperlinkRule.php @@ -1,99 +1,108 @@ " around them get linked exactly, without // the "<>". Angle brackets are basically special and mean "this is a URL // with weird characters". This is assumed to be reasonable because they // don't appear in normal text or normal URLs. $text = preg_replace_callback( '@<(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+?)>@', array($this, 'markupHyperlink'), $text); // Anything else we match "ungreedily", which means we'll look for // stuff that's probably puncutation or otherwise not part of the URL and // not link it. This lets someone write "QuicK! Go to // http://www.example.com/!". We also apply some paren balancing rules. // NOTE: We're explicitly avoiding capturing stored blocks, so text like // `http://www.example.com/[[x | y]]` doesn't get aggressively captured. $text = preg_replace_callback( '@(\w{3,}://[^\s'.PhutilRemarkupBlockStorage::MAGIC_BYTE.']+)@', array($this, 'markupHyperlinkUngreedy'), $text); return $text; } protected function markupHyperlink(array $matches) { $protocols = $this->getEngine()->getConfig( 'uri.allowed-protocols', array()); $protocol = id(new PhutilURI($matches[1]))->getProtocol(); if (!idx($protocols, $protocol)) { // If this URI doesn't use a whitelisted protocol, don't link it. This // is primarily intended to prevent javascript:// silliness. return $this->getEngine()->storeText($matches[1]); } return $this->storeRenderedHyperlink($matches[1]); } protected function storeRenderedHyperlink($link) { return $this->getEngine()->storeText($this->renderHyperlink($link)); } protected function renderHyperlink($link) { - if ($this->getEngine()->isTextMode()) { + $engine = $this->getEngine(); + + if ($engine->isTextMode()) { return $link; } - if ($this->getEngine()->getState('toc')) { + if ($engine->getState('toc')) { return $link; + } + + $same_window = $engine->getConfig('uri.same-window', false); + if ($same_window) { + $target = null; } else { - return phutil_tag( - 'a', - array( - 'href' => $link, - 'class' => 'remarkup-link', - 'target' => '_blank', - ), - $link); + $target = '_blank'; } + + return phutil_tag( + 'a', + array( + 'href' => $link, + 'class' => 'remarkup-link', + 'target' => $target, + ), + $link); } protected function markupHyperlinkUngreedy($matches) { $match = $matches[1]; $tail = null; $trailing = null; if (preg_match('/[;,.:!?]+$/', $match, $trailing)) { $tail = $trailing[0]; $match = substr($match, 0, -strlen($tail)); } // If there's a closing paren at the end but no balancing open paren in // the URL, don't link the close paren. This is an attempt to gracefully // handle the two common paren cases, Wikipedia links and English language // parentheticals, e.g.: // // http://en.wikipedia.org/wiki/Noun_(disambiguation) // (see also http://www.example.com) // // We could apply a craftier heuristic here which tries to actually balance // the parens, but this is probably sufficient. if (preg_match('/\\)$/', $match) && !preg_match('/\\(/', $match)) { $tail = ')'.$tail; $match = substr($match, 0, -1); } return hsprintf('%s%s', $this->markupHyperlink(array(null, $match)), $tail); } }