Differential D18634 Diff 44741 src/utils/utf8.php

Changeset View

Standalone View

src/utils/utf8.php

Show First 20 Lines • Show All 322 Lines • ▼ Show 20 Lines	foreach ($display_glyphs as $display_glyph) {
}		}
}		}

return $len;		return $len;
}		}


/**		/**
		* Test if a string contains Chinese, Japanese, or Korean characters.
		*
		* Most languages use spaces to separate words, but these languages do not.
		*
		* @param string String to examine, in UTF8.
		* @return bool True if the string contains Chinese, Japanese, or Korean
		* characters.
		*/
		function phutil_utf8_is_cjk($string) {
		amckinleyUnsubmitted Not Done Inline Actions Is this a function that will need to change all the time? I have no idea how stable UTF8 is for CJK stuff. amckinley: Is this a function that will need to change all the time? I have no idea how stable UTF8 is for…
		$codepoints = phutil_utf8v_codepoints($string);

		foreach ($codepoints as $codepoint) {
		// CJK Unified Ideographs
		if ($codepoint >= 0x4E00 && $codepoint <= 0x9FFF) {
		return true;
		}

		// CJK Unified Ideographs Extension A
		if ($codepoint >= 0x3400 && $codepoint <= 0x4DBF) {
		return true;
		}

		// CJK Unified Ideographs Extension B
		if ($codepoint >= 0x20000 && $codepoint <= 0x2A6DF) {
		return true;
		}

		// CJK Unified Ideographs Extension C
		if ($codepoint >= 0x2A700 && $codepoint <= 0x2B73F) {
		return true;
		}

		// CJK Unified Ideographs Extension D
		if ($codepoint >= 0x2B740 && $codepoint <= 0x2B81F) {
		return true;
		}

		// CJK Unified Ideographs Extension E
		if ($codepoint >= 0x2B820 && $codepoint <= 0x2CEAF) {
		return true;
		}

		// CJK Unified Ideographs Extension F
		if ($codepoint >= 0x2CEB0 && $codepoint <= 0x2EBEF) {
		return true;
		}

		// CJK Compatibility Ideographs
		if ($codepoint >= 0xF900 && $codepoint <= 0xFAFF) {
		return true;
		}
		}

		return false;
		}


		/**
* Split a UTF-8 string into an array of characters. Combining characters are		* Split a UTF-8 string into an array of characters. Combining characters are
* also split.		* also split.
*		*
* @param string A valid utf-8 string.		* @param string A valid utf-8 string.
* @param int\|null Stop processing after examining this many bytes.		* @param int\|null Stop processing after examining this many bytes.
* @return list A list of characters in the string.		* @return list A list of characters in the string.
*/		*/
function phutil_utf8v($string, $byte_limit = null) {		function phutil_utf8v($string, $byte_limit = null) {
▲ Show 20 Lines • Show All 503 Lines • Show Last 20 Lines