Changeset View
Changeset View
Standalone View
Standalone View
src/utils/__tests__/PhutilUTF8TestCase.php
| Show First 20 Lines • Show All 514 Lines • ▼ Show 20 Lines | $tests = array( | ||||
| "\xE2\x98\x83" => array(true, true, 'snowman'), | "\xE2\x98\x83" => array(true, true, 'snowman'), | ||||
| // This is the last character in BMP, U+FFFF. | // This is the last character in BMP, U+FFFF. | ||||
| "\xEF\xBF\xBF" => array(true, true, 'U+FFFF'), | "\xEF\xBF\xBF" => array(true, true, 'U+FFFF'), | ||||
| // This isn't valid. | // This isn't valid. | ||||
| "\xEF\xBF\xC0" => array(false, false, 'Invalid, byte range.'), | "\xEF\xBF\xC0" => array(false, false, 'Invalid, byte range.'), | ||||
| // This is an invalid nonminimal representation. | |||||
| "\xF0\x81\x80\x80" => array(false, false, 'Nonminimal 4-byte characer.'), | |||||
| // This is the first character above BMP, U+10000. | // This is the first character above BMP, U+10000. | ||||
| "\xF0\x90\x80\x80" => array(true, false, 'U+10000'), | "\xF0\x90\x80\x80" => array(true, false, 'U+10000'), | ||||
| "\xF0\x9D\x84\x9E" => array(true, false, 'gclef'), | "\xF0\x9D\x84\x9E" => array(true, false, 'gclef'), | ||||
| "musical \xF0\x9D\x84\x9E g-clef" => array(true, false, 'gclef text'), | "musical \xF0\x9D\x84\x9E g-clef" => array(true, false, 'gclef text'), | ||||
| "\xF0\x9D\x84" => array(false, false, 'Invalid, truncated.'), | "\xF0\x9D\x84" => array(false, false, 'Invalid, truncated.'), | ||||
| "\xE0\x80\x80" => array(false, false, 'Nonminimal 3-byte character.'), | "\xE0\x80\x80" => array(false, false, 'Nonminimal 3-byte character.'), | ||||
| // Partial BMP characters. | // Partial BMP characters. | ||||
| "\xCD" => array(false, false, 'Partial 2-byte character.'), | "\xCD" => array(false, false, 'Partial 2-byte character.'), | ||||
| "\xE0\xA0" => array(false, false, 'Partial BMP 0xE0 character.'), | "\xE0\xA0" => array(false, false, 'Partial BMP 0xE0 character.'), | ||||
| "\xE2\x98" => array(false, false, 'Partial BMP cahracter.'), | "\xE2\x98" => array(false, false, 'Partial BMP cahracter.'), | ||||
| ); | ); | ||||
| foreach ($tests as $input => $test) { | foreach ($tests as $input => $test) { | ||||
| list($expect_utf8, $expect_bmp, $test_name) = $test; | list($expect_utf8, $expect_bmp, $test_name) = $test; | ||||
| // Depending on what's installed on the system, this may use an | |||||
| // extension. | |||||
| $this->assertEqual( | $this->assertEqual( | ||||
| $expect_utf8, | $expect_utf8, | ||||
| phutil_is_utf8($input), | phutil_is_utf8($input), | ||||
| pht('is_utf(%s)', $test_name)); | pht('is_utf(%s)', $test_name)); | ||||
| // Also test this against the pure PHP implementation, explicitly. | |||||
| $this->assertEqual( | |||||
| $expect_utf8, | |||||
| phutil_is_utf8_slowly($input), | |||||
| pht('is_utf_slowly(%s)', $test_name)); | |||||
| $this->assertEqual( | $this->assertEqual( | ||||
| $expect_bmp, | $expect_bmp, | ||||
| phutil_is_utf8_with_only_bmp_characters($input), | phutil_is_utf8_with_only_bmp_characters($input), | ||||
| pht('is_utf_bmp(%s)', $test_name)); | pht('is_utf_bmp(%s)', $test_name)); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||