diff --git a/src/applications/config/check/PhabricatorMySQLSetupCheck.php b/src/applications/config/check/PhabricatorMySQLSetupCheck.php --- a/src/applications/config/check/PhabricatorMySQLSetupCheck.php +++ b/src/applications/config/check/PhabricatorMySQLSetupCheck.php @@ -315,6 +315,26 @@ ->addMySQLConfig('innodb_buffer_pool_size'); } + $ok = PhabricatorStorageManagementAPI::isCharacterSetAvailableOnConnection( + 'utf8mb4', + id(new PhabricatorUser())->establishConnection('w')); + if (!$ok) { + $summary = pht( + 'You are using an old version of MySQL, and should upgrade.'); + + $message = pht( + 'You are using an old version of MySQL which has poor unicode '. + 'support (it does not support the "utf8mb4" collation set). You will '. + 'encounter limitations when working with some unicode data.'. + "\n\n". + 'We strongly recommend you upgrade to MySQL 5.5 or newer.'); + + $this->newIssue('mysql.utf8mb4') + ->setName(pht('Old MySQL Version')) + ->setSummary($summary) + ->setMessage($message); + } + } } diff --git a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php --- a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php +++ b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php @@ -233,7 +233,12 @@ } $conn = $this->getConn(null); + return self::isCharacterSetAvailableOnConnection($character_set, $conn); + } + public static function isCharacterSetAvailableOnConnection( + $character_set, + AphrontDatabaseConnection $conn) { $result = queryfx_one( $conn, 'SELECT CHARACTER_SET_NAME FROM INFORMATION_SCHEMA.CHARACTER_SETS @@ -254,24 +259,32 @@ $collate_sort = 'utf8mb4_unicode_ci'; $collate_full = 'utf8mb4_unicode_ci'; } else { - // If utf8mb4 is not available, we use binary. This allows us to store - // 4-byte unicode characters. This has some tradeoffs: - // - // Unicode characters won't sort correctly. There's nothing we can do - // about this while still supporting 4-byte characters. + // If utf8mb4 is not available, we use binary for most data. This allows + // us to store 4-byte unicode characters. // // It's possible that strings will be truncated in the middle of a // character on insert. We encourage users to set STRICT_ALL_TABLES // to prevent this. // - // There's no valid collation we can use to get a fulltext index on - // 4-byte unicode characters: we can't add a fulltext key to a binary - // column. + // For "fulltext" and "sort" columns, we don't use binary. + // + // With "fulltext", we can not use binary because MySQL won't let us. + // We use 3-byte utf8 instead and accept being unable to index 4-byte + // characters. + // + // With "sort", if we use binary we lose case insensitivity (for + // example, "ALincoln@logcabin.com" and "alincoln@logcabin.com" would no + // longer be identified as the same email address). This can be very + // confusing and is far worse overall than not supporting 4-byte unicode + // characters, so we use 3-byte utf8 and accept limited 4-byte support as + // a tradeoff to get sensible collation behavior. Many columns where + // collation is important rarely contain 4-byte characters anyway, so we + // are not giving up too much. $charset = 'binary'; $charset_full = 'utf8'; $collate_text = 'binary'; - $collate_sort = 'binary'; + $collate_sort = 'utf8_general_ci'; $collate_full = 'utf8_general_ci'; } diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php @@ -57,9 +57,11 @@ if (!$force && !$api->isCharacterSetAvailable('utf8mb4')) { $message = pht( "You have an old version of MySQL (older than 5.5) which does not ". - "support the utf8mb4 character set. If you apply adjustments now ". - "and later update MySQL to 5.5 or newer, you'll need to apply ". - "adjustments again (and they will take a long time).\n\n". + "support the utf8mb4 character set. We strongly recomend upgrading to ". + "5.5 or newer.\n\n". + "If you apply adjustments now and later update MySQL to 5.5 or newer, ". + "you'll need to apply adjustments again (and they will take a long ". + "time).\n\n". "You can exit this workflow, update MySQL now, and then run this ". "workflow again. This is recommended, but may cause a lot of downtime ". "right now.\n\n".