diff --git a/src/utils/PhutilUTF8StringTruncator.php b/src/utils/PhutilUTF8StringTruncator.php --- a/src/utils/PhutilUTF8StringTruncator.php +++ b/src/utils/PhutilUTF8StringTruncator.php @@ -239,6 +239,7 @@ // Search backward in the string, looking for reasonable places to break it. $word_boundary = null; $stop_boundary = null; + $any_nonboundary = false; // If we do a word break with a terminal, we have to look beyond at least // the number of characters in the terminal. If the terminal is longer than @@ -250,7 +251,12 @@ // a non-latin language without word break characters we're just wasting // time. - $search = max(0, $cutoff - 256); + // See PHI654. We also only look for a break near the end of the text, + // relative to the length of the text. If the text is something like + // "O123: MMMMMM..." or "See path/to/long/thing", we want to cut the very + // long word in half, not just render "O123..." or "See...". + + $search = max(0, $cutoff - 256, $cutoff / 2); for ($ii = min($cutoff, $glyph_len - 1); $ii >= $search; $ii--) { $c = $string_gv[$ii]; @@ -260,6 +266,7 @@ $stop_boundary = $ii + 1; break; } else { + $any_nonboundary = true; if ($word_boundary !== null) { break; } @@ -275,7 +282,7 @@ // If we didn't find any boundary characters or we found ONLY boundary // characters, just break at the maximum character length. - if ($word_boundary === null || $word_boundary === 0) { + if ($word_boundary === null || !$any_nonboundary) { $word_boundary = $cutoff; } diff --git a/src/utils/__tests__/PhutilUTF8TestCase.php b/src/utils/__tests__/PhutilUTF8TestCase.php --- a/src/utils/__tests__/PhutilUTF8TestCase.php +++ b/src/utils/__tests__/PhutilUTF8TestCase.php @@ -209,13 +209,11 @@ array("Gr\xCD\xA0mpyCatSmiles", 8, '...', "Gr\xCD\xA0mpy..."), array("X\xCD\xA0\xCD\xA0\xCD\xA0Y", 1, '', "X\xCD\xA0\xCD\xA0\xCD\xA0"), - // This behavior is maybe a little bad, but it seems mostly reasonable, - // at least for latin languages. array( 'Derp, supercalafragalisticexpialadoshus', 30, '...', - 'Derp...', + 'Derp, supercalafragalistice...', ), // If a string has only word-break characters in it, we should just cut @@ -224,6 +222,13 @@ // Terminal is longer than requested input. array('derp', 3, 'quack', 'quack'), + + array( + 'O123: com/oracle/java/path/to/application/source/ThingFactory.java', + 32, + '...', + 'O123: com/oracle/java/path/to...', + ), ); foreach ($inputs as $input) {