Skip to content

Commit

Permalink
Add q/Q commands and fix cm overwriting old matrix instead of multipl…
Browse files Browse the repository at this point in the history
…ying it
  • Loading branch information
DominikDostal committed Jun 6, 2024
1 parent fdc74ea commit 9a9acba
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 17 deletions.
66 changes: 52 additions & 14 deletions src/Smalot/PdfParser/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -525,14 +525,17 @@ public function getDataCommands(?array $extractedDecodedRawData = null): array
case 'BT':
$extractedData[] = $command;
break;

/*
* ET
* End a text object, discarding the text matrix
* cm
* Concatenation Matrix that will transform all following Tm
*/
case 'cm':
$extractedData[] = $command;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$extractedData[] = $command;
break;
Expand Down Expand Up @@ -643,6 +646,18 @@ public function getDataCommands(?array $extractedDecodedRawData = null): array
case 'TJ':
$extractedData[] = $command;
break;
/*
* q
* Save current graphics state to stack
*/
case 'q':
/*
* Q
* Load last saved graphics state from stack
*/
case 'Q':
$extractedData[] = $command;
break;
default:
}
}
Expand Down Expand Up @@ -675,7 +690,7 @@ public function getDataTm(?array $dataCommands = null): array
*/
$defaultTm = ['1', '0', '0', '1', '0', '0'];
$concatTm = ['1', '0', '0', '1', '0', '0'];

$graphicsStatesStack = [];
/*
* Set the text leading used by T*, ' and " operators
*/
Expand Down Expand Up @@ -734,13 +749,22 @@ public function getDataTm(?array $dataCommands = null): array
$Ty = 0;
break;

case 'cm':
$newConcatTm = (array) explode(' ', $command['c']);
$TempMatrix = [];
// Multiply with previous concatTm
$TempMatrix[0] = (float) $concatTm[0] * (float) $newConcatTm[0] + (float) $concatTm[1] * (float) $newConcatTm[2];
$TempMatrix[1] = (float) $concatTm[0] * (float) $newConcatTm[1] + (float) $concatTm[1] * (float) $newConcatTm[3];
$TempMatrix[2] = (float) $concatTm[2] * (float) $newConcatTm[0] + (float) $concatTm[3] * (float) $newConcatTm[2];
$TempMatrix[3] = (float) $concatTm[2] * (float) $newConcatTm[1] + (float) $concatTm[3] * (float) $newConcatTm[3];
$TempMatrix[4] = (float) $concatTm[4] * (float) $newConcatTm[0] + (float) $concatTm[5] * (float) $newConcatTm[2] + (float) $newConcatTm[4];
$TempMatrix[5] = (float) $concatTm[4] * (float) $newConcatTm[1] + (float) $concatTm[5] * (float) $newConcatTm[3] + (float) $newConcatTm[5];
$concatTm = $TempMatrix;
break;
/*
* ET
* End a text object
*/
case 'cm':
$concatTm = explode(' ', $command['c']);
break;
case 'ET':
break;

Expand Down Expand Up @@ -792,14 +816,14 @@ public function getDataTm(?array $dataCommands = null): array
* [1 0 0 1 0 0]
*/
case 'Tm':
$Tm = explode(' ', $command['c']);
$Tm = explode(' ', $command['c']);
$TempMatrix = [];
$TempMatrix[0] = $Tm[0] * $concatTm[0] + $Tm[1] * $concatTm[2];
$TempMatrix[1] = $Tm[0] * $concatTm[1] + $Tm[1] * $concatTm[3];
$TempMatrix[2] = $Tm[2] * $concatTm[0] + $Tm[3] * $concatTm[2];
$TempMatrix[3] = $Tm[2] * $concatTm[1] + $Tm[3] * $concatTm[3];
$TempMatrix[4] = $Tm[4] * $concatTm[0] + $Tm[5] * $concatTm[2] + $concatTm[4];
$TempMatrix[5] = $Tm[4] * $concatTm[1] + $Tm[5] * $concatTm[3] + $concatTm[5];
$TempMatrix[0] = round((float) $Tm[0] * (float) $concatTm[0] + (float) $Tm[1] * (float) $concatTm[2], 6);
$TempMatrix[1] = round((float) $Tm[0] * (float) $concatTm[1] + (float) $Tm[1] * (float) $concatTm[3], 6);
$TempMatrix[2] = round((float) $Tm[2] * (float) $concatTm[0] + (float) $Tm[3] * (float) $concatTm[2], 6);
$TempMatrix[3] = round((float) $Tm[2] * (float) $concatTm[1] + (float) $Tm[3] * (float) $concatTm[3], 6);
$TempMatrix[4] = round((float) $Tm[4] * (float) $concatTm[0] + (float) $Tm[5] * (float) $concatTm[2] + (float) $concatTm[4], 2);
$TempMatrix[5] = round((float) $Tm[4] * (float) $concatTm[1] + (float) $Tm[5] * (float) $concatTm[3] + (float) $concatTm[5], 2);
$Tm = $TempMatrix;
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
Expand Down Expand Up @@ -895,6 +919,20 @@ public function getDataTm(?array $dataCommands = null): array
}
$extractedData[] = $data;
break;
/*
* q
* Save current graphics state to stack
*/
case 'q':
$graphicsStatesStack[] = $concatTm;
break;
/*
* Q
* Load last saved graphics state from stack
*/
case 'Q':
$concatTm = array_pop($graphicsStatesStack);
break;
default:
}
}
Expand Down
6 changes: 3 additions & 3 deletions tests/PHPUnit/Integration/PageTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ public function testGetDataCommands(): void
$pages = $document->getPages();
$page = $pages[0];
$dataCommands = $page->getDataCommands();
$this->assertCount(176, $dataCommands);
$this->assertCount(185, $dataCommands);

$tmItem = $dataCommands[2];
$tmItem = $dataCommands[6];
$this->assertCount(3, $tmItem);
$this->assertArrayHasKey('t', $tmItem);
$this->assertArrayHasKey('o', $tmItem);
Expand All @@ -267,7 +267,7 @@ public function testGetDataCommands(): void
$this->assertStringContainsString('Tm', $tmItem['o']);
$this->assertStringContainsString('0.999429 0 0 1 201.96 720.68', $tmItem['c']);

$tjItem = $dataCommands[3];
$tjItem = $dataCommands[7];
$this->assertCount(3, $tjItem);
$this->assertArrayHasKey('t', $tjItem);
$this->assertArrayHasKey('o', $tjItem);
Expand Down

0 comments on commit 9a9acba

Please sign in to comment.