Changeset View
Standalone View
src/applications/diffusion/engine/DiffusionCommitHookEngine.php
Show First 20 Lines • Show All 160 Lines • ▼ Show 20 Lines | try { | ||||
$is_initial_import = $this->isInitialImport($all_updates); | $is_initial_import = $this->isInitialImport($all_updates); | ||||
if (!$is_initial_import) { | if (!$is_initial_import) { | ||||
$this->applyHeraldRefRules($ref_updates); | $this->applyHeraldRefRules($ref_updates); | ||||
} | } | ||||
try { | try { | ||||
if (!$is_initial_import) { | if (!$is_initial_import) { | ||||
$this->rejectOversizedFiles($content_updates); | |||||
} | |||||
} catch (DiffusionCommitHookRejectException $ex) { | |||||
// If we're rejecting oversized files, flag everything. | |||||
$this->rejectCode = PhabricatorRepositoryPushLog::REJECT_OVERSIZED; | |||||
throw $ex; | |||||
} | |||||
try { | |||||
if (!$is_initial_import) { | |||||
$this->rejectEnormousChanges($content_updates); | $this->rejectEnormousChanges($content_updates); | ||||
} | } | ||||
} catch (DiffusionCommitHookRejectException $ex) { | } catch (DiffusionCommitHookRejectException $ex) { | ||||
// If we're rejecting enormous changes, flag everything. | // If we're rejecting enormous changes, flag everything. | ||||
$this->rejectCode = PhabricatorRepositoryPushLog::REJECT_ENORMOUS; | $this->rejectCode = PhabricatorRepositoryPushLog::REJECT_ENORMOUS; | ||||
throw $ex; | throw $ex; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,073 Lines • ▼ Show 20 Lines | if (isset($this->changesets[$identifier])) { | ||||
return $cached; | return $cached; | ||||
} | } | ||||
$info = $this->loadChangesetsForCommit($identifier); | $info = $this->loadChangesetsForCommit($identifier); | ||||
list($changesets, $size) = $info; | list($changesets, $size) = $info; | ||||
return $changesets; | return $changesets; | ||||
} | } | ||||
private function rejectOversizedFiles(array $content_updates) { | |||||
$repository = $this->getRepository(); | |||||
// TODO: Allow repositories to be configured for a maximum filesize. | |||||
$limit = 0; | |||||
if (!$limit) { | |||||
return; | |||||
} | |||||
foreach ($content_updates as $update) { | |||||
$identifier = $update->getRefNew(); | |||||
$sizes = $this->loadFileSizesForCommit($identifier); | |||||
foreach ($sizes as $path => $size) { | |||||
if ($size <= $limit) { | |||||
continue; | |||||
} | |||||
$message = pht( | |||||
'OVERSIZED FILE'. | |||||
"\n". | |||||
'This repository ("%s") is configured with a maximum individual '. | |||||
'file size limit, but you are pushing a change ("%s") which causes '. | |||||
'the size of a file ("%s") to exceed the limit. The commit makes '. | |||||
'the file %s bytes long, but the limit for this repository is '. | |||||
'%s bytes.', | |||||
$repository->getDisplayName(), | |||||
$identifier, | |||||
$path, | |||||
new PhutilNumber($size), | |||||
new PhutilNumber($limit)); | |||||
throw new DiffusionCommitHookRejectException($message); | |||||
} | |||||
} | |||||
} | |||||
public function loadFileSizesForCommit($identifier) { | |||||
$repository = $this->getRepository(); | |||||
$vcs = $repository->getVersionControlSystem(); | |||||
$path_sizes = array(); | |||||
switch ($vcs) { | |||||
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: | |||||
list($paths_raw) = $repository->execxLocalCommand( | |||||
'diff-tree -z -r --no-commit-id %s --', | |||||
$identifier); | |||||
// With "-z" we get "<fields>\0<filename>\0" for each line. Group the | |||||
// delimited text into "<fields>, <filename>" pairs. | |||||
$paths_raw = trim($paths_raw, "\0"); | |||||
$paths_raw = explode("\0", $paths_raw); | |||||
if (count($paths_raw) % 2) { | |||||
throw new Exception( | |||||
pht( | |||||
'Unexpected number of output lines from "git diff-tree" when '. | |||||
'processing commit ("%s"): got %s lines, expected an even '. | |||||
'number.', | |||||
$identifier, | |||||
phutil_count($paths_raw))); | |||||
} | |||||
$paths_raw = array_chunk($paths_raw, 2); | |||||
$paths = array(); | |||||
foreach ($paths_raw as $path_raw) { | |||||
list($fields, $pathname) = $path_raw; | |||||
$fields = explode(' ', $fields); | |||||
// Fields are: | |||||
// | |||||
// :100644 100644 aaaa bbbb M | |||||
// | |||||
// [0] Old file mode. | |||||
// [1] New file mode. | |||||
// [2] Old object hash. | |||||
// [3] New object hash. | |||||
// [4] Change mode. | |||||
$paths[] = array( | |||||
'path' => $pathname, | |||||
'newHash' => $fields[3], | |||||
); | |||||
} | |||||
if ($paths) { | |||||
$check_paths = array(); | |||||
foreach ($paths as $path) { | |||||
if ($path['newHash'] === self::EMPTY_HASH) { | |||||
$path_sizes[$path['path']] = 0; | |||||
continue; | |||||
} | |||||
$check_paths[$path['newHash']][] = $path['path']; | |||||
} | |||||
if ($check_paths) { | |||||
$future = $repository->getLocalCommandFuture( | |||||
'cat-file --batch-check=%s', | |||||
'%(objectsize)') | |||||
amckinley: We definitely want `objectsize` and not `objectsize:disk`? From the docs:
> Note that the… | |||||
->write(implode("\n", array_keys($check_paths))); | |||||
list($sizes) = $future->resolvex(); | |||||
$sizes = trim($sizes); | |||||
$sizes = phutil_split_lines($sizes, false); | |||||
if (count($sizes) !== count($check_paths)) { | |||||
throw new Exception( | |||||
pht( | |||||
'Unexpected number of output lines from "git cat-file" when '. | |||||
'processing commit ("%s"): got %s lines, expected %s.', | |||||
$identifier, | |||||
phutil_count($sizes), | |||||
phutil_count($check_paths))); | |||||
} | |||||
foreach ($check_paths as $object_hash => $path_names) { | |||||
$object_size = (int)array_shift($sizes); | |||||
Not Done Inline ActionsDoes git cat-file promise that output will be ordered identically to input? I guess it would have to pretty pathological to take paths via STDIN and then permute the output ordering, though. amckinley: Does `git cat-file` promise that output will be ordered identically to input? I guess it would… | |||||
Done Inline ActionsThe manpage doesn't explicitly promise that it's ordered as far as I can tell, but it is in practice, and this sort of use case is clearly the primary/intended use case. We've used cat-file --batch-check elsewhere for years, also assuming order, without issues. epriestley: The manpage doesn't explicitly promise that it's ordered as far as I can tell, but it is in… | |||||
foreach ($path_names as $path_name) { | |||||
$path_sizes[$path_name] = $object_size; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
break; | |||||
default: | |||||
throw new Exception( | |||||
pht( | |||||
'File size limits are not supported for this VCS.')); | |||||
Not Done Inline ActionsI'm not wild about an 80-line case statement. Maybe just throw at the top of the function and avoid this level of indentation entirely? I usually like to separate control flow from complicated logic like the guts of this function. amckinley: I'm not wild about an 80-line `case` statement. Maybe just `throw` at the top of the function… | |||||
Done Inline ActionsYeah, I anticipate moving this to a LowLevelQuery sort of class in the next change or two. epriestley: Yeah, I anticipate moving this to a `LowLevelQuery` sort of class in the next change or two. | |||||
} | |||||
return $path_sizes; | |||||
} | |||||
public function loadCommitRefForCommit($identifier) { | public function loadCommitRefForCommit($identifier) { | ||||
$repository = $this->getRepository(); | $repository = $this->getRepository(); | ||||
$vcs = $repository->getVersionControlSystem(); | $vcs = $repository->getVersionControlSystem(); | ||||
switch ($vcs) { | switch ($vcs) { | ||||
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: | case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT: | ||||
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: | case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL: | ||||
return id(new DiffusionLowLevelCommitQuery()) | return id(new DiffusionLowLevelCommitQuery()) | ||||
->setRepository($repository) | ->setRepository($repository) | ||||
▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines |
We definitely want objectsize and not objectsize:disk? From the docs:
From that info, it seems like using objectsize will allow small changes to existing huge objects to go through (which is arguably a good thing in the interest of backwards compatibility with existing huge objects?), while using objectsize:disk will start throwing exceptions on any change to an existing large object.