Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15415488
D20477.id48849.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Referenced Files
None
Subscribers
None
D20477.id48849.diff
View Options
diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php
--- a/src/__phutil_library_map__.php
+++ b/src/__phutil_library_map__.php
@@ -662,6 +662,7 @@
'DifferentialSubscribersCommitMessageField' => 'applications/differential/field/DifferentialSubscribersCommitMessageField.php',
'DifferentialSummaryCommitMessageField' => 'applications/differential/field/DifferentialSummaryCommitMessageField.php',
'DifferentialSummaryField' => 'applications/differential/customfield/DifferentialSummaryField.php',
+ 'DifferentialTabReplacementTestCase' => 'applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php',
'DifferentialTagsCommitMessageField' => 'applications/differential/field/DifferentialTagsCommitMessageField.php',
'DifferentialTasksCommitMessageField' => 'applications/differential/field/DifferentialTasksCommitMessageField.php',
'DifferentialTestPlanCommitMessageField' => 'applications/differential/field/DifferentialTestPlanCommitMessageField.php',
@@ -6324,6 +6325,7 @@
'DifferentialSubscribersCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialSummaryCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialSummaryField' => 'DifferentialCoreCustomField',
+ 'DifferentialTabReplacementTestCase' => 'PhabricatorTestCase',
'DifferentialTagsCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialTasksCommitMessageField' => 'DifferentialCommitMessageField',
'DifferentialTestPlanCommitMessageField' => 'DifferentialCommitMessageField',
diff --git a/src/applications/differential/parser/DifferentialChangesetParser.php b/src/applications/differential/parser/DifferentialChangesetParser.php
--- a/src/applications/differential/parser/DifferentialChangesetParser.php
+++ b/src/applications/differential/parser/DifferentialChangesetParser.php
@@ -1456,9 +1456,10 @@
$line = phutil_string_cast($line);
- if (strpos($line, "\t") !== false) {
- $line = $this->replaceTabsWithSpaces($line);
- }
+ // TODO: This should be flexible, eventually.
+ $tab_width = 2;
+
+ $line = self::replaceTabsWithSpaces($line, $tab_width);
$line = str_replace($search, $replace, $line);
if ($is_html) {
@@ -1543,13 +1544,9 @@
return $rules;
}
- private function replaceTabsWithSpaces($line) {
- // TODO: This should be flexible, eventually.
- $tab_width = 2;
-
- static $tags;
- if ($tags === null) {
- $tags = array();
+ public static function replaceTabsWithSpaces($line, $tab_width) {
+ static $tags = array();
+ if (empty($tags[$tab_width])) {
for ($ii = 1; $ii <= $tab_width; $ii++) {
$tag = phutil_tag(
'span',
@@ -1562,42 +1559,120 @@
}
}
- // If the line is particularly long, don't try to vectorize it. Use a
- // faster approximation of the correct tabstop expansion instead. This
- // usually still arrives at the right result.
- if (strlen($line) > 256) {
- return str_replace("\t", $tags[$tab_width], $line);
+ // Expand all prefix tabs until we encounter any non-tab character. This
+ // is cheap and often immediately produces the correct result with no
+ // further work (and, particularly, no need to handle any unicode cases).
+
+ $len = strlen($line);
+
+ $head = 0;
+ for ($head = 0; $head < $len; $head++) {
+ $char = $line[$head];
+ if ($char !== "\t") {
+ break;
+ }
+ }
+
+ if ($head) {
+ if (empty($tags[$tab_width * $head])) {
+ $tags[$tab_width * $head] = str_repeat($tags[$tab_width], $head);
+ }
+ $prefix = $tags[$tab_width * $head];
+ $line = substr($line, $head);
+ } else {
+ $prefix = '';
+ }
+
+ // If we have no remaining tabs elsewhere in the string after taking care
+ // of all the prefix tabs, we're done.
+ if (strpos($line, "\t") === false) {
+ return $prefix.$line;
+ }
+
+ $len = strlen($line);
+
+ // If the line is particularly long, don't try to do anything special with
+ // it. Use a faster approximation of the correct tabstop expansion instead.
+ // This usually still arrives at the right result.
+ if ($len > 256) {
+ return $prefix.str_replace("\t", $tags[$tab_width], $line);
}
- $line = phutil_utf8v_combined($line);
$in_tag = false;
$pos = 0;
- foreach ($line as $key => $char) {
- if ($char === '<') {
- $in_tag = true;
- continue;
- }
- if ($char === '>') {
- $in_tag = false;
- continue;
+ // See PHI1210. If the line only has single-byte characters, we don't need
+ // to vectorize it and can avoid an expensive UTF8 call.
+
+ $fast_path = preg_match('/^[\x01-\x7F]*\z/', $line);
+ if ($fast_path) {
+ $replace = array();
+ for ($ii = 0; $ii < $len; $ii++) {
+ $char = $line[$ii];
+ if ($char === '>') {
+ $in_tag = false;
+ continue;
+ }
+
+ if ($in_tag) {
+ continue;
+ }
+
+ if ($char === '<') {
+ $in_tag = true;
+ continue;
+ }
+
+ if ($char === "\t") {
+ $count = $tab_width - ($pos % $tab_width);
+ $pos += $count;
+ $replace[$ii] = $tags[$count];
+ continue;
+ }
+
+ $pos++;
}
- if ($in_tag) {
- continue;
+ if ($replace) {
+ // Apply replacements starting at the end of the string so they
+ // don't mess up the offsets for following replacements.
+ $replace = array_reverse($replace, true);
+
+ foreach ($replace as $replace_pos => $replacement) {
+ $line = substr_replace($line, $replacement, $replace_pos, 1);
+ }
}
+ } else {
+ $line = phutil_utf8v_combined($line);
+ foreach ($line as $key => $char) {
+ if ($char === '>') {
+ $in_tag = false;
+ continue;
+ }
- if ($char === "\t") {
- $count = $tab_width - ($pos % $tab_width);
- $pos += $count;
- $line[$key] = $tags[$count];
- continue;
+ if ($in_tag) {
+ continue;
+ }
+
+ if ($char === '<') {
+ $in_tag = true;
+ continue;
+ }
+
+ if ($char === "\t") {
+ $count = $tab_width - ($pos % $tab_width);
+ $pos += $count;
+ $line[$key] = $tags[$count];
+ continue;
+ }
+
+ $pos++;
}
- $pos++;
+ $line = implode('', $line);
}
- return implode('', $line);
+ return $prefix.$line;
}
}
diff --git a/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php b/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php
new file mode 100644
--- /dev/null
+++ b/src/applications/differential/parser/__tests__/DifferentialTabReplacementTestCase.php
@@ -0,0 +1,56 @@
+<?php
+
+final class DifferentialTabReplacementTestCase
+ extends PhabricatorTestCase {
+
+ public function testTabReplacement() {
+ $tab1 = "<span data-copy-text=\"\t\"> </span>";
+ $tab2 = "<span data-copy-text=\"\t\"> </span>";
+
+ $cat = "\xF0\x9F\x90\xB1";
+
+ $cases = array(
+ '' => '',
+ 'x' => 'x',
+
+ // Tabs inside HTML tags should not be replaced.
+ "<\t>x" => "<\t>x",
+
+ // Normal tabs should be replaced. These are all aligned to the tab
+ // width, so they'll be replaced inline.
+ "\tx" => "{$tab2}x",
+ " \tx" => " {$tab2}x",
+ "\t x" => "{$tab2} x",
+ "aa\tx" => "aa{$tab2}x",
+ "aa \tx" => "aa {$tab2}x",
+ "aa\t x" => "aa{$tab2} x",
+
+ // This tab is not tabstop-aligned, so it is replaced with fewer
+ // spaces to bring us to the next tabstop.
+ " \tx" => " {$tab1}x",
+
+ // Text inside HTML tags should not count when aligning tabs with
+ // tabstops.
+ "<tag> </tag>\tx" => "<tag> </tag>{$tab1}x",
+ "<tag2> </tag>\tx" => "<tag2> </tag>{$tab1}x",
+
+ // The code has to take a slow path when inputs contain unicode, but
+ // should produce the right results and align tabs to tabstops while
+ // respecting UTF8 display character widths, not byte widths.
+ "{$cat}\tx" => "{$cat}{$tab1}x",
+ "{$cat}{$cat}\tx" => "{$cat}{$cat}{$tab2}x",
+ );
+
+ foreach ($cases as $input => $expect) {
+ $actual = DifferentialChangesetParser::replaceTabsWithSpaces(
+ $input,
+ 2);
+
+ $this->assertEqual(
+ $expect,
+ $actual,
+ pht('Tabs to Spaces: %s', $input));
+ }
+ }
+
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Mar 21, 6:13 AM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Encrypted (AES-256-CBC)
Storage Handle
7715081
Default Alt Text
D20477.id48849.diff (8 KB)
Attached To
Mode
D20477: Improve the performance of tab replacement in common cases
Attached
Detach File
Event Timeline
Log In to Comment