Page MenuHomePhabricator

D19724.id47133.diff
No OneTemporary

D19724.id47133.diff

Index: src/utils/__tests__/PhutilUTF8TestCase.php
===================================================================
--- src/utils/__tests__/PhutilUTF8TestCase.php
+++ src/utils/__tests__/PhutilUTF8TestCase.php
@@ -61,6 +61,13 @@
);
foreach ($map as $input => $expect) {
+ if ($input !== $expect) {
+ $this->assertEqual(
+ false,
+ phutil_is_utf8_slowly($input),
+ pht('Slowly reject overlong form of: %s', $input));
+ }
+
$actual = phutil_utf8ize($input);
$this->assertEqual(
$expect,
@@ -77,6 +84,13 @@
);
foreach ($map as $input => $expect) {
+ if ($input !== $expect) {
+ $this->assertEqual(
+ false,
+ phutil_is_utf8_slowly($input),
+ pht('Slowly reject surrogate: %s', $input));
+ }
+
$actual = phutil_utf8ize($input);
$this->assertEqual(
$expect,
Index: src/utils/utf8.php
===================================================================
--- src/utils/utf8.php
+++ src/utils/utf8.php
@@ -149,6 +149,34 @@
continue;
}
return false;
+ } else if ($chr == 0xED) {
+ // See T11525. Some sequences in this block are surrogate codepoints
+ // that are reserved for use in UTF16. We should reject them.
+ $codepoint = ($chr & 0x0F) << 12;
+ ++$ii;
+ if ($ii >= $len) {
+ return false;
+ }
+ $chr = ord($string[$ii]);
+ $codepoint += ($chr & 0x3F) << 6;
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ ++$ii;
+ if ($ii >= $len) {
+ return false;
+ }
+ $chr = ord($string[$ii]);
+ $codepoint += ($chr & 0x3F);
+
+ if ($codepoint >= 0xD800 && $codepoint <= 0xDFFF) {
+ // Reject these surrogate codepoints.
+ return false;
+ }
+
+ if ($chr >= 0x80 && $chr <= 0xBF) {
+ continue;
+ }
+ }
+ return false;
} else if ($chr > 0xE0 && $chr <= 0xEF) {
++$ii;
if ($ii >= $len) {

File Metadata

Mime Type
text/plain
Expires
Sat, May 11, 1:37 AM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
6284660
Default Alt Text
D19724.id47133.diff (1 KB)

Event Timeline