Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F13980769
D19724.id47132.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1 KB
Referenced Files
None
Subscribers
None
D19724.id47132.diff
View Options
Index: src/utils/__tests__/PhutilUTF8TestCase.php
===================================================================
--- src/utils/__tests__/PhutilUTF8TestCase.php
+++ src/utils/__tests__/PhutilUTF8TestCase.php
@@ -61,6 +61,13 @@
);
foreach ($map as $input => $expect) {
+ if ($input !== $expect) {
+ $this->assertEqual(
+ false,
+ phutil_is_utf8_slowly($input),
+ pht('Slowly reject overlong form of: %s', $input));
+ }
+
$actual = phutil_utf8ize($input);
$this->assertEqual(
$expect,
@@ -77,6 +84,13 @@
);
foreach ($map as $input => $expect) {
+ if ($input !== $expect) {
+ $this->assertEqual(
+ false,
+ phutil_is_utf8_slowly($input),
+ pht('Slowly reject surrogate: %s', $input));
+ }
+
$actual = phutil_utf8ize($input);
$this->assertEqual(
$expect,
Index: src/utils/utf8.php
===================================================================
--- src/utils/utf8.php
+++ src/utils/utf8.php
@@ -149,6 +149,34 @@
continue;
}
return false;
+ } else if ($chr == 0xED) {
+ // See T11525. Some sequences in this block are surrogate codepoints
+ // that are reserved for use in UTF16. We should reject them.
+ $codepoint = ($chr & 0x0F) << 12;
+ ++$ii;
+ if ($ii >= $len) {
+ return false;
+ }
+ $chr = ord($string[$ii]);
+ $codepoint += ($chr & 0x3F) << 6;
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ ++$ii;
+ if ($ii >= $len) {
+ return false;
+ }
+ $chr = ord($string[$ii]);
+ $codepoint += ($chr & 0x3F);
+
+ if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) {
+ // Reject these surrogate codepoints.
+ return false;
+ }
+
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ continue;
+ }
+ }
+ return false;
} else if ($chr > 0xE0 && $chr <= 0xEF) {
++$ii;
if ($ii >= $len) {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Oct 20, 12:28 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6724760
Default Alt Text
D19724.id47132.diff (1 KB)
Attached To
Mode
D19724: [Wilds] Fix phutil_is_utf8_slowly() to reject reserved UTF16 surrogate character ranges
Attached
Detach File
Event Timeline
Log In to Comment