Skip to content

Commit

Permalink
Optimize stripping empty/blank lines
Browse files Browse the repository at this point in the history
For calculating the similarity ratio when empty/blank lines are
ignored, these lines have to be stripped from the sequences beforehand.
The stripped lines are restored after calculation so the class can also
be used as sequenceMatcher.
  • Loading branch information
DigiLive authored and DigiLive committed Dec 18, 2020
1 parent 75f5ce0 commit a239f17
Showing 1 changed file with 65 additions and 35 deletions.
100 changes: 65 additions & 35 deletions lib/jblond/Diff/Similarity.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ class Similarity extends SequenceMatcher
* @var array Count of each unique sequence at version 2.
*/
private $uniqueCount2;
/**
* @var array Contains the indexes of lines which are stripped from the sequences by Similarity::stripLines().
* @see Similarity::stripLines()
*/
private $stripped = ['old' => [], 'new' => []];


/**
Expand Down Expand Up @@ -65,15 +70,22 @@ public function setSeq2($version2)
*/
public function getSimilarity(int $type = self::CALC_DEFAULT): float
{
if ($this->options['ignoreLines']) {
// Backup original sequences and filter non blank lines.
$this->stripLines();
}

switch ($type) {
case self::CALC_FAST:
return $this->getRatioFast();
$ratio = $this->getRatioFast();
$this->restoreLines();
break;
case self::CALC_FASTEST:
return $this->getRatioFastest();
$ratio = $this->getRatioFastest();
$this->restoreLines();
break;
default:
if ($this->options['ignoreLines']) {
$this->stripLines();
}
$this->setSequences($this->old, $this->new);
$matches = array_reduce(
$this->getMatchingBlocks(),
function ($carry, $item) {
Expand All @@ -82,8 +94,44 @@ function ($carry, $item) {
0
);

return $this->calculateRatio($matches, count($this->old) + count($this->new));
// TODO: Restore original (un-stripped) versions?
$ratio = $this->calculateRatio($matches, count($this->old) + count($this->new));
$this->restoreLines();
$this->setSequences($this->old, $this->new);
}

return $ratio;
}

/**
* Strip empty or blank lines from the sequences to compare.
*
*/
private function stripLines(): void
{
foreach (['old', 'new'] as $version) {
// Remove empty lines.
$this->$version = array_filter(
$this->$version,
function ($line, $index) use ($version) {
$sanitizedLine = $line;
if ($this->options['ignoreLines'] == self::DIFF_IGNORE_LINE_BLANK) {
$sanitizedLine = trim($line);
}

if ($sanitizedLine == '') {
// Store line to be able to restore later.
$this->stripped[$version][$index] = $line;

return false;
}

return true;
},
ARRAY_FILTER_USE_BOTH
);

// Re-index sequence.
$this->$version = array_values($this->$version);
}
}

Expand All @@ -97,6 +145,7 @@ function ($carry, $item) {
private function getRatioFast(): float
{
if ($this->uniqueCount2 === null) {
// Build unless cached.
$this->uniqueCount2 = [];
$bLength = count($this->new);
for ($iterator = 0; $iterator < $bLength; ++$iterator) {
Expand Down Expand Up @@ -140,6 +189,15 @@ private function calculateRatio(int $matches, int $length = 0): float
return $returnValue;
}

private function restoreLines()
{
foreach (['old', 'new'] as $version) {
foreach ($this->stripped[$version] as $index => $line) {
array_splice($this->$version, $index, 0, $line);
}
}
}

/**
* Return an upper bound ratio really quickly for the similarity of the strings.
*
Expand All @@ -155,34 +213,6 @@ private function getRatioFastest(): float
return $this->calculateRatio(min($aLength, $bLength), $aLength + $bLength);
}

/**
* Strip empty or blank lines from the sequences to compare.
*
*/
private function stripLines(): void
{
foreach (['old', 'new'] as $version) {
if ($this->options['ignoreLines'] == self::DIFF_IGNORE_LINE_BLANK) {
array_walk(
$this->$version,
function (&$line) {
$line = trim($line);
}
);
unset($line);
}

$this->$version = array_filter(
$this->$version,
function ($line) {
return $line != '';
}
);
}

$this->setSequences(array_values($this->old), array_values($this->new));
}

/**
* Helper function to calculate the number of matches for Ratio().
*
Expand Down

0 comments on commit a239f17

Please sign in to comment.