Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F14010729
D8313.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1 KB
Referenced Files
None
Subscribers
None
D8313.diff
View Options
Index: src/utils/__tests__/PhutilUTF8TestCase.php
===================================================================
--- src/utils/__tests__/PhutilUTF8TestCase.php
+++ src/utils/__tests__/PhutilUTF8TestCase.php
@@ -456,6 +456,8 @@
"musical \xF0\x9D\x84\x9E g-clef" => array(true, false, "gclef text"),
"\xF0\x9D\x84" => array(false, false, "Invalid, truncated."),
+
+ "\xE0\x80\x80" => array(false, false, "Nonminimal 3-byte character."),
);
foreach ($tests as $input => $test) {
Index: src/utils/utf8.php
===================================================================
--- src/utils/utf8.php
+++ src/utils/utf8.php
@@ -77,7 +77,7 @@
continue;
}
return false;
- } else if ($chr >= 0xE0 && $chr <= 0xEF) {
+ } else if ($chr > 0xE0 && $chr <= 0xEF) {
$chr = ord($string[++$ii]);
if ($chr >= 0x80 && $chr <= 0xBF) {
$chr = ord($string[++$ii]);
@@ -86,6 +86,20 @@
}
}
return false;
+ } else if ($chr == 0xE0) {
+ $chr = ord($string[++$ii]);
+
+ // NOTE: This range starts at 0xA0, not 0x80. The values 0x80-0xA0 are
+ // "valid", but not minimal representations, and MySQL rejects them. We're
+ // special casing this part of the range.
+
+ if ($chr >= 0xA0 && $chr <= 0xBF) {
+ $chr = ord($string[++$ii]);
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ continue;
+ }
+ }
+ return false;
}
return false;
@@ -109,6 +123,9 @@
return mb_check_encoding($string, 'UTF-8');
}
+ // NOTE: This incorrectly accepts characters like \xE0\x80\x80, but should
+ // not. The MB version works correctly.
+
$regex =
"/^(".
"[\x01-\x7F]+".
@@ -116,7 +133,7 @@
"|([\xE0-\xEF][\x80-\xBF][\x80-\xBF])".
"|([\xF0-\xF4][\x80-\xBF][\x80-\xBF][\x80-\xBF]))*\$/";
- return preg_match($regex, $string);
+ return (bool)preg_match($regex, $string);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 1, 12:10 PM (4 d, 4 h ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6756105
Default Alt Text
D8313.diff (1 KB)
Attached To
Mode
D8313: Reject nonminmal representations of UTF8 at the beginning of the 3-byte BMP range
Attached
Detach File
Event Timeline
Log In to Comment