Page MenuHomePhabricator

D8313.diff
No OneTemporary

D8313.diff

Index: src/utils/__tests__/PhutilUTF8TestCase.php
===================================================================
--- src/utils/__tests__/PhutilUTF8TestCase.php
+++ src/utils/__tests__/PhutilUTF8TestCase.php
@@ -456,6 +456,8 @@
"musical \xF0\x9D\x84\x9E g-clef" => array(true, false, "gclef text"),
"\xF0\x9D\x84" => array(false, false, "Invalid, truncated."),
+
+ "\xE0\x80\x80" => array(false, false, "Nonminimal 3-byte character."),
);
foreach ($tests as $input => $test) {
Index: src/utils/utf8.php
===================================================================
--- src/utils/utf8.php
+++ src/utils/utf8.php
@@ -77,7 +77,7 @@
continue;
}
return false;
- } else if ($chr >= 0xE0 && $chr <= 0xEF) {
+ } else if ($chr > 0xE0 && $chr <= 0xEF) {
$chr = ord($string[++$ii]);
if ($chr >= 0x80 && $chr <= 0xBF) {
$chr = ord($string[++$ii]);
@@ -86,6 +86,20 @@
}
}
return false;
+ } else if ($chr == 0xE0) {
+ $chr = ord($string[++$ii]);
+
+ // NOTE: This range starts at 0xA0, not 0x80. The values 0x80-0xA0 are
+ // "valid", but not minimal representations, and MySQL rejects them. We're
+ // special casing this part of the range.
+
+ if ($chr >= 0xA0 && $chr <= 0xBF) {
+ $chr = ord($string[++$ii]);
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ continue;
+ }
+ }
+ return false;
}
return false;
@@ -109,6 +123,9 @@
return mb_check_encoding($string, 'UTF-8');
}
+ // NOTE: This incorrectly accepts characters like \xE0\x80\x80, but should
+ // not. The MB version works correctly.
+
$regex =
"/^(".
"[\x01-\x7F]+".
@@ -116,7 +133,7 @@
"|([\xE0-\xEF][\x80-\xBF][\x80-\xBF])".
"|([\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF]))*\$/";
- return preg_match($regex, $string);
+ return (bool)preg_match($regex, $string);
}

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 1, 12:10 PM (4 d, 4 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6756105
Default Alt Text
D8313.diff (1 KB)

Event Timeline