diff --git a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php index a53d9bf089..7ee2e02485 100644 --- a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php +++ b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php @@ -1,121 +1,128 @@ stripTextBody($body); $lines = explode("\n", trim($body)); $first_line = head($lines); $command = null; $command_value = null; $matches = null; if (preg_match('/^!(\w+)\s*(\S+)?/', $first_line, $matches)) { $lines = array_slice($lines, 1); $body = implode("\n", $lines); $body = trim($body); $command = $matches[1]; $command_value = idx($matches, 2); } return array( 'body' => $body, 'command' => $command, 'command_value' => $command_value); } public function stripTextBody($body) { return trim($this->stripSignature($this->stripQuotedText($body))); } private function stripQuotedText($body) { // Look for "On , wrote:". This may be split across multiple // lines. We need to be careful not to remove all of a message like this: // // On which day do you want to meet? // // On , wrote: // > Let's set up a meeting. $start = null; $lines = phutil_split_lines($body); foreach ($lines as $key => $line) { if (preg_match('/^\s*>?\s*On\b/', $line)) { $start = $key; } if ($start !== null) { if (preg_match('/\bwrote:/', $line)) { $lines = array_slice($lines, 0, $start); $body = implode('', $lines); break; } } } // Outlook english $body = preg_replace( '/^\s*(> )?-----Original Message-----.*?/imsU', '', $body); // Outlook danish $body = preg_replace( '/^\s*(> )?-----Oprindelig Meddelelse-----.*?/imsU', '', $body); // See example in T3217. $body = preg_replace( '/^________________________________________\s+From:.*?/imsU', '', $body); return rtrim($body); } private function stripSignature($body) { // Quasi-"standard" delimiter, for lols see: // https://bugzilla.mozilla.org/show_bug.cgi?id=58406 $body = preg_replace( '/^-- +$.*/sm', '', $body); + // Mailbox seems to make an attempt to comply with the "standard" but + // omits the leading newline and uses an em dash? + $body = preg_replace( + "/\s*\xE2\x80\x94 \nSent from Mailbox\s*\z/su", + '', + $body); + // HTC Mail application (mobile) $body = preg_replace( '/^\s*^Sent from my HTC smartphone.*/sm', '', $body); // Apple iPhone $body = preg_replace( '/^\s*^Sent from my iPhone\s*$.*/sm', '', $body); return rtrim($body); } } diff --git a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php index 4a9c2cc845..94473f498c 100644 --- a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php +++ b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php @@ -1,183 +1,189 @@ getEmailBodies(); foreach ($bodies as $body) { $parser = new PhabricatorMetaMTAEmailBodyParser(); $stripped = $parser->stripTextBody($body); $this->assertEqual('OKAY', $stripped); } } public function testEmailBodyCommandParsing() { $bodies = $this->getEmailBodiesWithFullCommands(); foreach ($bodies as $body) { $parser = new PhabricatorMetaMTAEmailBodyParser(); $body_data = $parser->parseBody($body); $this->assertEqual('OKAY', $body_data['body']); $this->assertEqual('whatevs', $body_data['command']); $this->assertEqual('dude', $body_data['command_value']); } $bodies = $this->getEmailBodiesWithPartialCommands(); foreach ($bodies as $body) { $parser = new PhabricatorMetaMTAEmailBodyParser(); $body_data = $parser->parseBody($body); $this->assertEqual('OKAY', $body_data['body']); $this->assertEqual('whatevs', $body_data['command']); $this->assertEqual(null, $body_data['command_value']); } } public function testFalsePositiveForOnWrote() { $body = << Hey bro do you want to go ride horses tomorrow? EOEMAIL; $parser = new PhabricatorMetaMTAEmailBodyParser(); $stripped = $parser->stripTextBody($body); $this->assertEqual('On which horse shall you ride?', $stripped); } private function getEmailBodiesWithFullCommands() { $bodies = $this->getEmailBodies(); $with_commands = array(); foreach ($bodies as $body) { $with_commands[] = "!whatevs dude\n".$body; } return $with_commands; } private function getEmailBodiesWithPartialCommands() { $bodies = $this->getEmailBodies(); $with_commands = array(); foreach ($bodies as $body) { $with_commands[] = "!whatevs\n".$body; } return $with_commands; } private function getEmailBodies() { $trailing_space = ' '; + $emdash = "\xE2\x80\x94"; return array( << ... EOEMAIL , << wrote: > ... EOEMAIL , << wrote: > ... EOEMAIL , << ... EOEMAIL , << ... EOEMAIL , << ... EOEMAIL , << ... EOEMAIL , << To: Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm > ... EOEMAIL , << Subject: Core World Tariffs EOMAIL , << On 17 Oct 2013, at 17:47, "Someone" wrote: > ... EOMAIL , << -----Original Message----- > > ... +EOMAIL +, +<<