Index: resources/sql/autopatches/20140223.bigutf8scratch.sql =================================================================== --- /dev/null +++ resources/sql/autopatches/20140223.bigutf8scratch.sql @@ -0,0 +1,2 @@ +ALTER TABLE {$NAMESPACE}_harbormaster.harbormaster_scratchtable + ADD bigData LONGTEXT COLLATE utf8_bin; Index: src/applications/harbormaster/storage/HarbormasterScratchTable.php =================================================================== --- src/applications/harbormaster/storage/HarbormasterScratchTable.php +++ src/applications/harbormaster/storage/HarbormasterScratchTable.php @@ -9,5 +9,6 @@ final class HarbormasterScratchTable extends HarbormasterDAO { protected $data; + protected $bigData; } Index: src/infrastructure/__tests__/PhabricatorInfrastructureTestCase.php =================================================================== --- src/infrastructure/__tests__/PhabricatorInfrastructureTestCase.php +++ src/infrastructure/__tests__/PhabricatorInfrastructureTestCase.php @@ -3,6 +3,12 @@ final class PhabricatorInfrastructureTestCase extends PhabricatorTestCase { + protected function getPhabricatorTestCaseConfiguration() { + return array( + self::PHABRICATOR_TESTCONFIG_BUILD_STORAGE_FIXTURES => true, + ); + } + /** * This is more of an acceptance test case instead of a unittest. It verifies * that all symbols can be loaded correctly. It can catch problems like @@ -22,6 +28,54 @@ 'In test cases, all applications should default to installed.'); } + public function testMySQLAgreesWithUsAboutBMP() { + // Build a string with every BMP character in it, then insert it into MySQL + // and read it back. We expect to get the same string out that we put in, + // demonstrating that strings which pass our BMP checks are also valid in + // MySQL and no silent data truncation will occur. + + $buf = ''; + + for ($ii = 0x01; $ii <= 0x7F; $ii++) { + $buf .= chr($ii); + } + + for ($ii = 0xC2; $ii <= 0xDF; $ii++) { + for ($jj = 0x80; $jj <= 0xBF; $jj++) { + $buf .= chr($ii).chr($jj); + } + } + + // NOTE: This is \xE0\xA0\xZZ. + for ($ii = 0xE0; $ii <= 0xE0; $ii++) { + for ($jj = 0xA0; $jj <= 0xBF; $jj++) { + for ($kk = 0x80; $kk <= 0xBF; $kk++) { + $buf .= chr($ii).chr($jj).chr($kk); + } + } + } + + // NOTE: This is \xE1\xZZ\xZZ through \xEF\xZZ\xZZ. + for ($ii = 0xE1; $ii <= 0xEF; $ii++) { + for ($jj = 0x80; $jj <= 0xBF; $jj++) { + for ($kk = 0x80; $kk <= 0xBF; $kk++) { + $buf .= chr($ii).chr($jj).chr($kk); + } + } + } + + $this->assertEqual(194431, strlen($buf)); + $this->assertEqual(true, phutil_is_utf8_with_only_bmp_characters($buf)); + + $write = id(new HarbormasterScratchTable()) + ->setData('all.utf8.bmp') + ->setBigData($buf) + ->save(); + + $read = id(new HarbormasterScratchTable())->load($write->getID()); + + $this->assertEqual($buf, $read->getBigData()); + } }