diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -137,6 +137,7 @@ 'PhutilCalendarEventNode' => 'parser/calendar/data/PhutilCalendarEventNode.php', 'PhutilCalendarNode' => 'parser/calendar/data/PhutilCalendarNode.php', 'PhutilCalendarRawNode' => 'parser/calendar/data/PhutilCalendarRawNode.php', + 'PhutilCalendarRootNode' => 'parser/calendar/data/PhutilCalendarRootNode.php', 'PhutilCallbackFilterIterator' => 'utils/PhutilCallbackFilterIterator.php', 'PhutilCallbackSignalHandler' => 'future/exec/PhutilCallbackSignalHandler.php', 'PhutilChannel' => 'channel/PhutilChannel.php', @@ -228,6 +229,7 @@ 'PhutilHgsprintfTestCase' => 'xsprintf/__tests__/PhutilHgsprintfTestCase.php', 'PhutilHighIntensityIntervalDaemon' => 'daemon/torture/PhutilHighIntensityIntervalDaemon.php', 'PhutilICSParser' => 'parser/calendar/ics/PhutilICSParser.php', + 'PhutilICSParserException' => 'parser/calendar/ics/PhutilICSParserException.php', 'PhutilICSParserTestCase' => 'parser/calendar/ics/__tests__/PhutilICSParserTestCase.php', 'PhutilINIParserException' => 'parser/exception/PhutilINIParserException.php', 'PhutilIPAddress' => 'ip/PhutilIPAddress.php', @@ -710,7 +712,8 @@ 'PhutilCalendarDocumentNode' => 'PhutilCalendarContainerNode', 'PhutilCalendarEventNode' => 'PhutilCalendarNode', 'PhutilCalendarNode' => 'Phobject', - 'PhutilCalendarRawNode' => 'PhutilCalendarNode', + 'PhutilCalendarRawNode' => 'PhutilCalendarContainerNode', + 'PhutilCalendarRootNode' => 'PhutilCalendarContainerNode', 'PhutilCallbackFilterIterator' => 'FilterIterator', 'PhutilCallbackSignalHandler' => 'PhutilSignalHandler', 'PhutilChannel' => 'Phobject', @@ -808,6 +811,7 @@ 'PhutilHgsprintfTestCase' => 'PhutilTestCase', 'PhutilHighIntensityIntervalDaemon' => 'PhutilTortureTestDaemon', 'PhutilICSParser' => 'Phobject', + 'PhutilICSParserException' => 'Exception', 'PhutilICSParserTestCase' => 'PhutilTestCase', 'PhutilINIParserException' => 'Exception', 'PhutilIPAddress' => 'Phobject', diff --git a/src/parser/calendar/data/PhutilCalendarRawNode.php b/src/parser/calendar/data/PhutilCalendarRawNode.php --- a/src/parser/calendar/data/PhutilCalendarRawNode.php +++ b/src/parser/calendar/data/PhutilCalendarRawNode.php @@ -1,7 +1,7 @@ getChildrenOfType(PhutilCalendarDocumentNode::NODETYPE); + } + +} diff --git a/src/parser/calendar/ics/PhutilICSParser.php b/src/parser/calendar/ics/PhutilICSParser.php --- a/src/parser/calendar/ics/PhutilICSParser.php +++ b/src/parser/calendar/ics/PhutilICSParser.php @@ -5,42 +5,67 @@ private $stack; private $node; private $document; + private $lines; + private $cursor; + + const PARSE_MISSING_END = 'missing-end'; + const PARSE_INITIAL_UNFOLD = 'initial-unfold'; + const PARSE_UNEXPECTED_CHILD = 'unexpected-child'; + const PARSE_EXTRA_END = 'extra-end'; + const PARSE_MISMATCHED_SECTIONS = 'mismatched-sections'; + const PARSE_ROOT_PROPERTY = 'root-property'; + const PARSE_BAD_BASE64 = 'bad-base64'; + const PARSE_BAD_BOOLEAN = 'bad-boolean'; + const PARSE_UNEXPECTED_TEXT = 'unexpected-text'; + const PARSE_MALFORMED_DOUBLE_QUOTE = 'malformed-double-quote'; + const PARSE_MALFORMED_PARAMETER_NAME = 'malformed-parameter'; + const PARSE_MALFORMED_PROPERTY = 'malformed-property'; + const PARSE_MISSING_VALUE = 'missing-value'; + const PARSE_UNESCAPED_BACKSLASH = 'unescaped-backslash'; public function parseICSData($data) { $this->stack = array(); $this->node = null; - $this->document = null; + $this->cursor = null; $lines = $this->unfoldICSLines($data); + $this->lines = $lines; - foreach ($lines as $line) { + $root = $this->newICSNode(''); + $this->stack[] = $root; + $this->node = $root; + + foreach ($lines as $key => $line) { + $this->cursor = $key; $matches = null; if (preg_match('(^BEGIN:(.*)\z)', $line, $matches)) { $this->beginParsingNode($matches[1]); } else if (preg_match('(^END:(.*)\z)', $line, $matches)) { $this->endParsingNode($matches[1]); } else { + if (count($this->stack) < 2) { + $this->raiseParseFailure( + self::PARSE_ROOT_PROPERTY, + pht( + 'Found unexpected property at ICS document root.')); + } $this->parseICSProperty($line); } } - if (!$this->document) { - $this->raiseParseFailure( - pht( - 'Expected ICS document to define a "VCALENDAR" section.')); - } - - if ($this->stack) { + if (count($this->stack) > 1) { $this->raiseParseFailure( + self::PARSE_MISSING_END, pht( 'Expected all "BEGIN:" sections in ICS document to have '. 'corresponding "END:" sections.')); } - $document = $this->document; - $this->document = null; + $this->node = null; + $this->lines = null; + $this->cursor = null; - return $document; + return $root; } private function getNode() { @@ -49,22 +74,25 @@ private function unfoldICSLines($data) { $lines = phutil_split_lines($data, $retain_endings = false); + $this->lines = $lines; // ICS files are wrapped at 75 characters, with overlong lines continued // on the following line with an initial space or tab. Unwrap all of the // lines in the file. $last = null; foreach ($lines as $idx => $line) { + $this->cursor = $idx; if (!preg_match('/^[ \t]/', $line)) { $last = $idx; continue; } if ($last === null) { - throw new Exception( + $this->raiseParseFailure( + self::PARSE_INITIAL_UNFOLD, pht( 'First line of ICS file begins with a space or tab, but this '. - 'marks a continuation line.')); + 'marks a line which should be unfolded.')); } $lines[$last] = $lines[$last].substr($line, 1); @@ -78,31 +106,15 @@ $node = $this->getNode(); $new_node = $this->newICSNode($type); - if ($node) { - if ($node instanceof PhutilCalendarContainerNode) { - $node->appendChild($new_node); - } else { - $this->raiseParseFailure( - pht( - 'Found unexpected node "%s" inside node "%s".', - $new_node->getAttribute('ics.type'), - $node->getAttribute('ics.type'))); - } + if ($node instanceof PhutilCalendarContainerNode) { + $node->appendChild($new_node); } else { - if ($new_node instanceof PhutilCalendarDocumentNode) { - if ($this->document) { - $this->raiseParseFailure( - pht( - 'Found multiple "VCALENDAR" nodes in ICS document, '. - 'expected only one.')); - } else { - $this->document = $new_node; - } - } else { - $this->raiseParseFailure( - pht( - 'Expected ICS document to begin "BEGIN:VCALENDAR".')); - } + $this->raiseParseFailure( + self::PARSE_UNEXPECTED_CHILD, + pht( + 'Found unexpected node "%s" inside node "%s".', + $new_node->getAttribute('ics.type'), + $node->getAttribute('ics.type'))); } $this->stack[] = $new_node; @@ -113,6 +125,9 @@ private function newICSNode($type) { switch ($type) { + case '': + $node = new PhutilCalendarRootNode(); + break; case 'VCALENDAR': $node = new PhutilCalendarDocumentNode(); break; @@ -131,8 +146,9 @@ private function endParsingNode($type) { $node = $this->getNode(); - if (!$node) { + if ($node instanceof PhutilCalendarRootNode) { $this->raiseParseFailure( + self::PARSE_EXTRA_END, pht( 'Found unexpected "END" without a "BEGIN".')); } @@ -140,6 +156,7 @@ $old_type = $node->getAttribute('ics.type'); if ($old_type != $type) { $this->raiseParseFailure( + self::PARSE_MISMATCHED_SECTIONS, pht( 'Found mismatched "BEGIN" ("%s") and "END" ("%s") sections.', $old_type, @@ -147,11 +164,7 @@ } array_pop($this->stack); - if ($this->stack) { - $this->node = last($this->stack); - } else { - $this->node = null; - } + $this->node = last($this->stack); return $this; } @@ -163,12 +176,12 @@ // by either a ";" (to begin a list of parameters) or a ":" (to begin // the actual field body). - $ok = preg_match('(^([^;:]+)([;:])(.*)\z)', $line, $matches); + $ok = preg_match('(^([A-Za-z0-9-]+)([;:])(.*)\z)', $line, $matches); if (!$ok) { $this->raiseParseFailure( + self::PARSE_MALFORMED_PROPERTY, pht( - 'Found malformed line in ICS document: %s', - $line)); + 'Found malformed property in ICS document.')); } $name = $matches[1]; @@ -185,6 +198,7 @@ $ok = preg_match('(^([^=]+)=)', $body, $matches); if (!$ok) { $this->raiseParseFailure( + self::PARSE_MALFORMED_PARAMETER_NAME, pht( 'Found malformed property in ICS document: %s', $body)); @@ -206,24 +220,17 @@ $matches); if (!$ok) { $this->raiseParseFailure( + self::PARSE_MALFORMED_DOUBLE_QUOTE, pht( 'Found malformed double-quoted string in ICS document '. - 'parameter value: %s', - $body)); + 'parameter value.')); } } else { $is_quoted = false; - $ok = preg_match( - '(^([^\x00-\x08\x10-\x19";:,]*))', - $body, - $matches); - if (!$ok) { - $this->raiseParseFailure( - pht( - 'Found malformed unquoted string in ICS document '. - 'parameter value: %s', - $body)); - } + + // It's impossible for this not to match since it can match + // nothing, and it's valid for it to match nothing. + preg_match('(^([^\x00-\x08\x10-\x19";:,]*))', $body, $matches); } // NOTE: RFC5545 says "Property parameter values that are not in @@ -239,6 +246,7 @@ $body = substr($body, strlen($matches[0])); if (!strlen($body)) { $this->raiseParseFailure( + self::PARSE_MISSING_VALUE, pht( 'Expected ":" after parameters in ICS document property.')); } @@ -250,17 +258,27 @@ continue; } + // If we have a semicolon, we're going to read another parameter. + if ($body[0] == ';') { + break; + } + // If we have a colon, this is the last value and also the last // property. Break, then handle the colon below. if ($body[0] == ':') { break; } + $short_body = id(new PhutilUTF8StringTruncator()) + ->setMaximumGlyphs(32) + ->truncateString($body); + // We aren't expecting anything else. $this->raiseParseFailure( + self::PARSE_UNEXPECTED_TEXT, pht( - 'Found unexpected text after reading parameter value: %s', - $body)); + 'Found unexpected text ("%s") after reading parameter value.', + $short_body)); } $parameters[] = array( @@ -268,6 +286,11 @@ 'values' => $param_values, ); + if ($body[0] == ';') { + $body = substr($body, 1); + continue; + } + if ($body[0] == ':') { $body = substr($body, 1); break; @@ -278,6 +301,7 @@ $value = $this->unescapeFieldValue($name, $parameters, $body); $node = $this->getNode(); + $raw = $node->getAttribute('ics.properties', array()); $raw[] = array( 'name' => $name, @@ -409,9 +433,10 @@ switch ($value_type) { case 'BINARY': - $result = base64_decode($data); + $result = base64_decode($data, true); if ($result === false) { $this->raiseParseFailure( + self::PARSE_BAD_BASE64, pht( 'Unable to decode base64 data: %s', $data)); @@ -425,6 +450,7 @@ $result = phutil_utf8_strtolower($data); if (!isset($map[$result])) { $this->raiseParseFailure( + self::PARSE_BAD_BOOLEAN, pht( 'Unexpected BOOLEAN value "%s".', $data)); @@ -517,7 +543,8 @@ } if ($esc) { - $this->raiseParsFailure( + $this->raiseParseFailure( + self::PARSE_UNESCAPED_BACKSLASH, pht( 'ICS document contains TEXT value ending with unescaped '. 'backslash.')); @@ -528,8 +555,21 @@ return $result; } - private function raiseParseFailure($message) { - throw new Exception($message); + private function raiseParseFailure($code, $message) { + if ($this->lines && isset($this->lines[$this->cursor])) { + $message = pht( + "ICS Parse Error near line %s:\n\n>>> %s\n\n%s", + $this->cursor + 1, + $this->lines[$this->cursor], + $message); + } else { + $message = pht( + 'ICS Parse Error: %s', + $message); + } + + throw id(new PhutilICSParserException($message)) + ->setParserFailureCode($code); } } diff --git a/src/parser/calendar/ics/PhutilICSParserException.php b/src/parser/calendar/ics/PhutilICSParserException.php new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/PhutilICSParserException.php @@ -0,0 +1,16 @@ +parserFailureCode = $code; + return $this; + } + + public function getParserFailureCode() { + return $this->parserFailureCode; + } + +} diff --git a/src/parser/calendar/ics/__tests__/PhutilICSParserTestCase.php b/src/parser/calendar/ics/__tests__/PhutilICSParserTestCase.php --- a/src/parser/calendar/ics/__tests__/PhutilICSParserTestCase.php +++ b/src/parser/calendar/ics/__tests__/PhutilICSParserTestCase.php @@ -3,7 +3,11 @@ final class PhutilICSParserTestCase extends PhutilTestCase { public function testICSParser() { - $document = $this->parseICSDocument('simple.ics'); + $root = $this->parseICSDocument('simple.ics'); + + $documents = $root->getDocuments(); + $this->assertEqual(1, count($documents)); + $document = head($documents); $events = $document->getEvents(); $this->assertEqual(1, count($events)); @@ -69,6 +73,68 @@ $event->getAttribute('ics.properties')); } + public function testICSParserErrors() { + $map = array( + 'err-missing-end.ics' => PhutilICSParser::PARSE_MISSING_END, + 'err-bad-base64.ics' => PhutilICSParser::PARSE_BAD_BASE64, + 'err-bad-boolean.ics' => PhutilICSParser::PARSE_BAD_BOOLEAN, + 'err-extra-end.ics' => PhutilICSParser::PARSE_EXTRA_END, + 'err-initial-unfold.ics' => PhutilICSParser::PARSE_INITIAL_UNFOLD, + 'err-malformed-double-quote.ics' => + PhutilICSParser::PARSE_MALFORMED_DOUBLE_QUOTE, + 'err-malformed-parameter.ics' => + PhutilICSParser::PARSE_MALFORMED_PARAMETER_NAME, + 'err-malformed-property.ics' => + PhutilICSParser::PARSE_MALFORMED_PROPERTY, + 'err-missing-value.ics' => PhutilICSParser::PARSE_MISSING_VALUE, + 'err-mixmatched-sections.ics' => + PhutilICSParser::PARSE_MISMATCHED_SECTIONS, + 'err-root-property.ics' => PhutilICSParser::PARSE_ROOT_PROPERTY, + 'err-unescaped-backslash.ics' => + PhutilICSParser::PARSE_UNESCAPED_BACKSLASH, + 'err-unexpected-child.ics' => PhutilICSParser::PARSE_UNEXPECTED_CHILD, + 'err-unexpected-text.ics' => PhutilICSParser::PARSE_UNEXPECTED_TEXT, + + 'simple.ics' => null, + 'good-boolean.ics' => null, + 'multiple-vcalendars.ics' => null, + ); + + foreach ($map as $test_file => $expect) { + $caught = null; + try { + $this->parseICSDocument($test_file); + } catch (PhutilICSParserException $ex) { + $caught = $ex; + } + + if ($expect === null) { + $this->assertTrue( + ($caught === null), + pht( + 'Expected no exception parsing "%s", got: %s', + $test_file, + (string)$ex)); + } else { + if ($caught) { + $code = $ex->getParserFailureCode(); + $explain = pht( + 'Expected one exception parsing "%s", got a different '. + 'one: %s', + $test_file, + (string)$ex); + } else { + $code = null; + $explain = pht( + 'Expected exception parsing "%s", got none.', + $test_file); + } + + $this->assertEqual($expect, $code, $explain); + } + } + } + private function parseICSDocument($name) { $path = dirname(__FILE__).'/data/'.$name; $data = Filesystem::readFile($path); diff --git a/src/parser/calendar/ics/__tests__/data/err-bad-base64.ics b/src/parser/calendar/ics/__tests__/data/err-bad-base64.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-bad-base64.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +DATA;VALUE=BINARY;ENCODING=BASE64: +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-bad-boolean.ics b/src/parser/calendar/ics/__tests__/data/err-bad-boolean.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-bad-boolean.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +DUCK;VALUE=BOOLEAN:QUACK +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-extra-end.ics b/src/parser/calendar/ics/__tests__/data/err-extra-end.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-extra-end.ics @@ -0,0 +1 @@ +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-initial-unfold.ics b/src/parser/calendar/ics/__tests__/data/err-initial-unfold.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-initial-unfold.ics @@ -0,0 +1,2 @@ + BEGIN:VCALENDAR +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-malformed-double-quote.ics b/src/parser/calendar/ics/__tests__/data/err-malformed-double-quote.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-malformed-double-quote.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +A;B="C:D +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-malformed-parameter.ics b/src/parser/calendar/ics/__tests__/data/err-malformed-parameter.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-malformed-parameter.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +A;B:C +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-malformed-property.ics b/src/parser/calendar/ics/__tests__/data/err-malformed-property.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-malformed-property.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +PEANUTBUTTER&JELLY:sandwich +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-missing-end.ics b/src/parser/calendar/ics/__tests__/data/err-missing-end.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-missing-end.ics @@ -0,0 +1,2 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT diff --git a/src/parser/calendar/ics/__tests__/data/err-missing-value.ics b/src/parser/calendar/ics/__tests__/data/err-missing-value.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-missing-value.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +TRIANGLE;color=red +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-mixmatched-sections.ics b/src/parser/calendar/ics/__tests__/data/err-mixmatched-sections.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-mixmatched-sections.ics @@ -0,0 +1,4 @@ +BEGIN:A +BEGIN:B +END:A +END:B diff --git a/src/parser/calendar/ics/__tests__/data/err-root-property.ics b/src/parser/calendar/ics/__tests__/data/err-root-property.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-root-property.ics @@ -0,0 +1 @@ +NAME:value diff --git a/src/parser/calendar/ics/__tests__/data/err-unescaped-backslash.ics b/src/parser/calendar/ics/__tests__/data/err-unescaped-backslash.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-unescaped-backslash.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +STORY:The duck coughed up an unescaped backslash: \ +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-unexpected-child.ics b/src/parser/calendar/ics/__tests__/data/err-unexpected-child.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-unexpected-child.ics @@ -0,0 +1,6 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +BEGIN:TEST +END:TEST +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/err-unexpected-text.ics b/src/parser/calendar/ics/__tests__/data/err-unexpected-text.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/err-unexpected-text.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +SQUARE;color=red" +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/good-boolean.ics b/src/parser/calendar/ics/__tests__/data/good-boolean.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/good-boolean.ics @@ -0,0 +1,5 @@ +BEGIN:VCALENDAR +BEGIN:VEVENT +DUCK;VALUE=BOOLEAN:TRUE +END:VEVENT +END:VCALENDAR diff --git a/src/parser/calendar/ics/__tests__/data/multiple-vcalendars.ics b/src/parser/calendar/ics/__tests__/data/multiple-vcalendars.ics new file mode 100644 --- /dev/null +++ b/src/parser/calendar/ics/__tests__/data/multiple-vcalendars.ics @@ -0,0 +1,4 @@ +BEGIN:VCALENDAR +END:VCALENDAR +BEGIN:VCALENDAR +END:VCALENDAR