Changeset View
Changeset View
Standalone View
Standalone View
src/utils/utf8.php
Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | while (preg_match($regex, $string, $matches, 0, $offset)) { | ||||
$offset += strlen($matches[0]); | $offset += strlen($matches[0]); | ||||
} | } | ||||
return implode('', $result); | return implode('', $result); | ||||
} | } | ||||
/** | /** | ||||
* Determine if a string is valid UTF-8, with only basic multilingual plane | |||||
* characters. This is particularly important because MySQL's `utf8` column | |||||
* types silently truncate strings which contain characters outside of this | |||||
* set. | |||||
* | |||||
* @param string String to test for being valid UTF-8 with only characters in | |||||
* the basic multilingual plane. | |||||
* @return bool True if the string is valid UTF-8 with only BMP characters. | |||||
*/ | |||||
function phutil_is_utf8_with_only_bmp_characters($string) { | |||||
$regex = | |||||
"/^(". | |||||
"[\x01-\x7F]+". | |||||
"|([\xC2-\xDF][\x80-\xBF])". | |||||
"|([\xE0-\xEF][\x80-\xBF][\x80-\xBF]))*\$/"; | |||||
return (bool)preg_match($regex, $string); | |||||
} | |||||
/** | |||||
* Determine if a string is valid UTF-8. | * Determine if a string is valid UTF-8. | ||||
* | * | ||||
* @param string Some string which may or may not be valid UTF-8. | * @param string Some string which may or may not be valid UTF-8. | ||||
* @return bool True if the string is valid UTF-8. | * @return bool True if the string is valid UTF-8. | ||||
* @group utf8 | * @group utf8 | ||||
*/ | */ | ||||
function phutil_is_utf8($string) { | function phutil_is_utf8($string) { | ||||
if (function_exists('mb_check_encoding')) { | if (function_exists('mb_check_encoding')) { | ||||
▲ Show 20 Lines • Show All 632 Lines • Show Last 20 Lines |