Changeset View
Changeset View
Standalone View
Standalone View
src/utils/PhutilUTF8StringTruncator.php
Show First 20 Lines • Show All 233 Lines • ▼ Show 20 Lines | static $stop_characters = array( | ||||
'.' => true, | '.' => true, | ||||
'!' => true, | '!' => true, | ||||
'?' => true, | '?' => true, | ||||
); | ); | ||||
// Search backward in the string, looking for reasonable places to break it. | // Search backward in the string, looking for reasonable places to break it. | ||||
$word_boundary = null; | $word_boundary = null; | ||||
$stop_boundary = null; | $stop_boundary = null; | ||||
$any_nonboundary = false; | |||||
// If we do a word break with a terminal, we have to look beyond at least | // If we do a word break with a terminal, we have to look beyond at least | ||||
// the number of characters in the terminal. If the terminal is longer than | // the number of characters in the terminal. If the terminal is longer than | ||||
// the required length, we'll skip this whole block and return it on its | // the required length, we'll skip this whole block and return it on its | ||||
// own. | // own. | ||||
// Only search backward for a while. At some point we don't get a better | // Only search backward for a while. At some point we don't get a better | ||||
// result by looking through the whole string, and if this is "MMM..." or | // result by looking through the whole string, and if this is "MMM..." or | ||||
// a non-latin language without word break characters we're just wasting | // a non-latin language without word break characters we're just wasting | ||||
// time. | // time. | ||||
$search = max(0, $cutoff - 256); | // See PHI654. We also only look for a break near the end of the text, | ||||
// relative to the length of the text. If the text is something like | |||||
// "O123: MMMMMM..." or "See path/to/long/thing", we want to cut the very | |||||
// long word in half, not just render "O123..." or "See...". | |||||
$search = max(0, $cutoff - 256, $cutoff / 2); | |||||
for ($ii = min($cutoff, $glyph_len - 1); $ii >= $search; $ii--) { | for ($ii = min($cutoff, $glyph_len - 1); $ii >= $search; $ii--) { | ||||
$c = $string_gv[$ii]; | $c = $string_gv[$ii]; | ||||
if (isset($break_characters[$c])) { | if (isset($break_characters[$c])) { | ||||
$word_boundary = $ii; | $word_boundary = $ii; | ||||
} else if (isset($stop_characters[$c])) { | } else if (isset($stop_characters[$c])) { | ||||
$stop_boundary = $ii + 1; | $stop_boundary = $ii + 1; | ||||
break; | break; | ||||
} else { | } else { | ||||
$any_nonboundary = true; | |||||
if ($word_boundary !== null) { | if ($word_boundary !== null) { | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ($stop_boundary !== null) { | if ($stop_boundary !== null) { | ||||
// We found a character like ".". Cut the string there, without appending | // We found a character like ".". Cut the string there, without appending | ||||
// the terminal. | // the terminal. | ||||
$string_part = array_slice($string_gv, 0, $stop_boundary); | $string_part = array_slice($string_gv, 0, $stop_boundary); | ||||
return implode('', $string_part); | return implode('', $string_part); | ||||
} | } | ||||
// If we didn't find any boundary characters or we found ONLY boundary | // If we didn't find any boundary characters or we found ONLY boundary | ||||
// characters, just break at the maximum character length. | // characters, just break at the maximum character length. | ||||
if ($word_boundary === null || $word_boundary === 0) { | if ($word_boundary === null || !$any_nonboundary) { | ||||
$word_boundary = $cutoff; | $word_boundary = $cutoff; | ||||
} | } | ||||
$string_part = array_slice($string_gv, 0, $word_boundary); | $string_part = array_slice($string_gv, 0, $word_boundary); | ||||
$string_part = implode('', $string_part); | $string_part = implode('', $string_part); | ||||
return $string_part.$this->terminator; | return $string_part.$this->terminator; | ||||
} | } | ||||
} | } |