diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -2150,6 +2150,7 @@ 'PhabricatorAlmanacApplication' => 'applications/almanac/application/PhabricatorAlmanacApplication.php', 'PhabricatorAmazonAuthProvider' => 'applications/auth/provider/PhabricatorAmazonAuthProvider.php', 'PhabricatorAmazonSNSFuture' => 'applications/metamta/future/PhabricatorAmazonSNSFuture.php', + 'PhabricatorAnchorTestCase' => 'infrastructure/markup/__tests__/PhabricatorAnchorTestCase.php', 'PhabricatorAnchorView' => 'view/layout/PhabricatorAnchorView.php', 'PhabricatorAphlictManagementDebugWorkflow' => 'applications/aphlict/management/PhabricatorAphlictManagementDebugWorkflow.php', 'PhabricatorAphlictManagementNotifyWorkflow' => 'applications/aphlict/management/PhabricatorAphlictManagementNotifyWorkflow.php', @@ -8314,6 +8315,7 @@ 'PhabricatorAlmanacApplication' => 'PhabricatorApplication', 'PhabricatorAmazonAuthProvider' => 'PhabricatorOAuth2AuthProvider', 'PhabricatorAmazonSNSFuture' => 'PhutilAWSFuture', + 'PhabricatorAnchorTestCase' => 'PhabricatorTestCase', 'PhabricatorAnchorView' => 'AphrontView', 'PhabricatorAphlictManagementDebugWorkflow' => 'PhabricatorAphlictManagementWorkflow', 'PhabricatorAphlictManagementNotifyWorkflow' => 'PhabricatorAphlictManagementWorkflow', diff --git a/src/infrastructure/markup/__tests__/PhabricatorAnchorTestCase.php b/src/infrastructure/markup/__tests__/PhabricatorAnchorTestCase.php new file mode 100644 --- /dev/null +++ b/src/infrastructure/markup/__tests__/PhabricatorAnchorTestCase.php @@ -0,0 +1,38 @@ + '', + 'Bells and Whistles' => 'bells-and-whistles', + 'Termination for Nonpayment' => 'termination-for-nonpayment', + $low_ascii => '0123456789-abcdefghijklmnopqrstu', + 'xxxx xxxx xxxx xxxx xxxx on' => 'xxxx-xxxx-xxxx-xxxx-xxxx', + 'xxxx xxxx xxxx xxxx xxxx ox' => 'xxxx-xxxx-xxxx-xxxx-xxxx-ox', + "So, You Want To Build A {$snowman}?" => + "so-you-want-to-build-a-{$snowman}", + str_repeat($snowman, 128) => str_repeat($snowman, 32), + ); + + foreach ($map as $input => $expect) { + $anchor = PhutilRemarkupHeaderBlockRule::getAnchorNameFromHeaderText( + $input); + + $this->assertEqual( + $expect, + $anchor, + pht('Anchor for "%s".', $input)); + } + } + +} diff --git a/src/infrastructure/markup/blockrule/PhutilRemarkupHeaderBlockRule.php b/src/infrastructure/markup/blockrule/PhutilRemarkupHeaderBlockRule.php --- a/src/infrastructure/markup/blockrule/PhutilRemarkupHeaderBlockRule.php +++ b/src/infrastructure/markup/blockrule/PhutilRemarkupHeaderBlockRule.php @@ -73,24 +73,7 @@ } private function generateAnchor($level, $text) { - $anchor = strtolower($text); - $anchor = preg_replace('/[^a-z0-9]/', '-', $anchor); - $anchor = preg_replace('/--+/', '-', $anchor); - $anchor = trim($anchor, '-'); - $anchor = substr($anchor, 0, 24); - $anchor = trim($anchor, '-'); - $base = $anchor; - - $key = self::KEY_HEADER_TOC; $engine = $this->getEngine(); - $anchors = $engine->getTextMetadata($key, array()); - - $suffix = 1; - while (!strlen($anchor) || isset($anchors[$anchor])) { - $anchor = $base.'-'.$suffix; - $anchor = trim($anchor, '-'); - $suffix++; - } // When a document contains a link inside a header, like this: // @@ -100,12 +83,30 @@ // header itself. We push the 'toc' state so all the link rules generate // just names. $engine->pushState('toc'); - $text = $this->applyRules($text); - $text = $engine->restoreText($text); - - $anchors[$anchor] = array($level, $text); + $plain_text = $text; + $plain_text = $this->applyRules($plain_text); + $plain_text = $engine->restoreText($plain_text); $engine->popState('toc'); + $anchor = self::getAnchorNameFromHeaderText($plain_text); + + if (!strlen($anchor)) { + return null; + } + + $base = $anchor; + + $key = self::KEY_HEADER_TOC; + $anchors = $engine->getTextMetadata($key, array()); + + $suffix = 1; + while (isset($anchors[$anchor])) { + $anchor = $base.'-'.$suffix; + $anchor = trim($anchor, '-'); + $suffix++; + } + + $anchors[$anchor] = array($level, $plain_text); $engine->setTextMetadata($key, $anchors); return phutil_tag( @@ -159,4 +160,31 @@ return phutil_implode_html("\n", $toc); } + public static function getAnchorNameFromHeaderText($text) { + $anchor = phutil_utf8_strtolower($text); + + // Replace all latin characters which are not "a-z" or "0-9" with "-". + // Preserve other characters, since non-latin letters and emoji work + // fine in anchors. + $anchor = preg_replace('/[\x00-\x2F\x3A-\x60\x7B-\x7F]+/', '-', $anchor); + $anchor = trim($anchor, '-'); + + // Truncate the fragment to something reasonable. + $anchor = id(new PhutilUTF8StringTruncator()) + ->setMaximumGlyphs(32) + ->setTerminator('') + ->truncateString($anchor); + + // If the fragment is terminated by a word which "The U.S. Government + // Printing Office Style Manual" normally discourages capitalizing in + // titles, discard it. This is an arbitrary heuristic intended to avoid + // awkward hanging words in anchors. + $anchor = preg_replace( + '/-(a|an|the|at|by|for|in|of|on|per|to|up|and|as|but|if|or|nor)\z/', + '', + $anchor); + + return $anchor; + } + } diff --git a/src/infrastructure/markup/markuprule/PhutilRemarkupBoldRule.php b/src/infrastructure/markup/markuprule/PhutilRemarkupBoldRule.php --- a/src/infrastructure/markup/markuprule/PhutilRemarkupBoldRule.php +++ b/src/infrastructure/markup/markuprule/PhutilRemarkupBoldRule.php @@ -18,6 +18,10 @@ } protected function applyCallback(array $matches) { + if ($this->getEngine()->isAnchorMode()) { + return $matches[1]; + } + return hsprintf('%s', $matches[1]); } diff --git a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php --- a/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php +++ b/src/infrastructure/markup/remarkup/PhutilRemarkupEngine.php @@ -34,6 +34,10 @@ return $this->mode & self::MODE_TEXT; } + public function isAnchorMode() { + return $this->getState('toc'); + } + public function isHTMLMailMode() { return $this->mode & self::MODE_HTML_MAIL; } diff --git a/src/infrastructure/markup/remarkup/__tests__/remarkup/toc.txt b/src/infrastructure/markup/remarkup/__tests__/remarkup/toc.txt --- a/src/infrastructure/markup/remarkup/__tests__/remarkup/toc.txt +++ b/src/infrastructure/markup/remarkup/__tests__/remarkup/toc.txt @@ -6,14 +6,14 @@ ~~~~~~~~~~ -

link_name

+

link_name

bold