Skip to content

Commit

Permalink
Implement missing cm command (#720)
Browse files Browse the repository at this point in the history
* Implement cm command

* Add q/Q commands and fix cm overwriting old matrix instead of multiplying it

* Remove rounding, fix unit tests and add new unit test
  • Loading branch information
DominikDostal authored Jul 24, 2024
1 parent db6af27 commit 9609711
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 15 deletions.
Binary file added samples/Document-Word-Landscape-printedaspdf.pdf
Binary file not shown.
57 changes: 55 additions & 2 deletions src/Smalot/PdfParser/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,13 @@ public function getDataCommands(?array $extractedDecodedRawData = null): array
case 'BT':
$extractedData[] = $command;
break;

/*
* cm
* Concatenation Matrix that will transform all following Tm
*/
case 'cm':
$extractedData[] = $command;
break;
/*
* ET
* End a text object, discarding the text matrix
Expand Down Expand Up @@ -640,6 +646,18 @@ public function getDataCommands(?array $extractedDecodedRawData = null): array
case 'TJ':
$extractedData[] = $command;
break;
/*
* q
* Save current graphics state to stack
*/
case 'q':
/*
* Q
* Load last saved graphics state from stack
*/
case 'Q':
$extractedData[] = $command;
break;
default:
}
}
Expand Down Expand Up @@ -671,7 +689,8 @@ public function getDataTm(?array $dataCommands = null): array
* At the beginning of a text object Tm is the identity matrix
*/
$defaultTm = ['1', '0', '0', '1', '0', '0'];

$concatTm = ['1', '0', '0', '1', '0', '0'];
$graphicsStatesStack = [];
/*
* Set the text leading used by T*, ' and " operators
*/
Expand Down Expand Up @@ -730,6 +749,18 @@ public function getDataTm(?array $dataCommands = null): array
$Ty = 0;
break;

case 'cm':
$newConcatTm = (array) explode(' ', $command['c']);
$TempMatrix = [];
// Multiply with previous concatTm
$TempMatrix[0] = (float) $concatTm[0] * (float) $newConcatTm[0] + (float) $concatTm[1] * (float) $newConcatTm[2];
$TempMatrix[1] = (float) $concatTm[0] * (float) $newConcatTm[1] + (float) $concatTm[1] * (float) $newConcatTm[3];
$TempMatrix[2] = (float) $concatTm[2] * (float) $newConcatTm[0] + (float) $concatTm[3] * (float) $newConcatTm[2];
$TempMatrix[3] = (float) $concatTm[2] * (float) $newConcatTm[1] + (float) $concatTm[3] * (float) $newConcatTm[3];
$TempMatrix[4] = (float) $concatTm[4] * (float) $newConcatTm[0] + (float) $concatTm[5] * (float) $newConcatTm[2] + (float) $newConcatTm[4];
$TempMatrix[5] = (float) $concatTm[4] * (float) $newConcatTm[1] + (float) $concatTm[5] * (float) $newConcatTm[3] + (float) $newConcatTm[5];
$concatTm = $TempMatrix;
break;
/*
* ET
* End a text object
Expand Down Expand Up @@ -786,6 +817,14 @@ public function getDataTm(?array $dataCommands = null): array
*/
case 'Tm':
$Tm = explode(' ', $command['c']);
$TempMatrix = [];
$TempMatrix[0] = (float) $Tm[0] * (float) $concatTm[0] + (float) $Tm[1] * (float) $concatTm[2];
$TempMatrix[1] = (float) $Tm[0] * (float) $concatTm[1] + (float) $Tm[1] * (float) $concatTm[3];
$TempMatrix[2] = (float) $Tm[2] * (float) $concatTm[0] + (float) $Tm[3] * (float) $concatTm[2];
$TempMatrix[3] = (float) $Tm[2] * (float) $concatTm[1] + (float) $Tm[3] * (float) $concatTm[3];
$TempMatrix[4] = (float) $Tm[4] * (float) $concatTm[0] + (float) $Tm[5] * (float) $concatTm[2] + (float) $concatTm[4];
$TempMatrix[5] = (float) $Tm[4] * (float) $concatTm[1] + (float) $Tm[5] * (float) $concatTm[3] + (float) $concatTm[5];
$Tm = $TempMatrix;
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
break;
Expand Down Expand Up @@ -880,6 +919,20 @@ public function getDataTm(?array $dataCommands = null): array
}
$extractedData[] = $data;
break;
/*
* q
* Save current graphics state to stack
*/
case 'q':
$graphicsStatesStack[] = $concatTm;
break;
/*
* Q
* Load last saved graphics state from stack
*/
case 'Q':
$concatTm = array_pop($graphicsStatesStack);
break;
default:
}
}
Expand Down
95 changes: 82 additions & 13 deletions tests/PHPUnit/Integration/PageTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ public function testGetDataCommands(): void
$pages = $document->getPages();
$page = $pages[0];
$dataCommands = $page->getDataCommands();
$this->assertCount(176, $dataCommands);
$this->assertCount(185, $dataCommands);

$tmItem = $dataCommands[2];
$tmItem = $dataCommands[6];
$this->assertCount(3, $tmItem);
$this->assertArrayHasKey('t', $tmItem);
$this->assertArrayHasKey('o', $tmItem);
Expand All @@ -267,7 +267,7 @@ public function testGetDataCommands(): void
$this->assertStringContainsString('Tm', $tmItem['o']);
$this->assertStringContainsString('0.999429 0 0 1 201.96 720.68', $tmItem['c']);

$tjItem = $dataCommands[3];
$tjItem = $dataCommands[7];
$this->assertCount(3, $tjItem);
$this->assertArrayHasKey('t', $tjItem);
$this->assertArrayHasKey('o', $tjItem);
Expand Down Expand Up @@ -307,7 +307,14 @@ public function testGetDataTm(): void
'201.96',
'720.68',
],
$item[0]
[
round($item[0][0], 6),
round($item[0][1], 6),
round($item[0][2], 6),
round($item[0][3], 6),
round($item[0][4], 2),
round($item[0][5], 2),
]
);
$this->assertStringContainsString('Document title', $item[1]);

Expand All @@ -321,7 +328,14 @@ public function testGetDataTm(): void
'70.8',
'673.64',
],
$item[0]
[
round($item[0][0], 6),
round($item[0][1], 6),
round($item[0][2], 6),
round($item[0][3], 6),
round($item[0][4], 2),
round($item[0][5], 2),
]
);
$this->assertStringContainsString('Calibri : Lorem ipsum dolor sit amet, consectetur a', $item[1]);

Expand All @@ -332,10 +346,17 @@ public function testGetDataTm(): void
'0',
'0',
'1',
'342.840222606',
'342.84',
'81.44',
],
$item[0]
[
round($item[0][0], 6),
round($item[0][1], 6),
round($item[0][2], 6),
round($item[0][3], 6),
round($item[0][4], 2),
round($item[0][5], 2),
]
);
$this->assertStringContainsString('nenatis.', $item[1]);

Expand Down Expand Up @@ -626,7 +647,7 @@ public function testGetTextXY(): void
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
$result = $page->getTextXY(201.96, 720.68);
$result = $page->getTextXY(201.96, 720.68, 0.01, 0.01);
$this->assertCount(1, $result);
$this->assertCount(2, $result[0]);
$this->assertEquals(
Expand All @@ -638,7 +659,14 @@ public function testGetTextXY(): void
'201.96',
'720.68',
],
$result[0][0]
[
round($result[0][0][0], 6),
round($result[0][0][1], 6),
round($result[0][0][2], 6),
round($result[0][0][3], 6),
round($result[0][0][4], 2),
round($result[0][0][5], 2),
]
);
$this->assertStringContainsString('Document title', $result[0][1]);

Expand All @@ -657,7 +685,14 @@ public function testGetTextXY(): void
'201.96',
'720.68',
],
$result[0][0]
[
round($result[0][0][0], 6),
round($result[0][0][1], 6),
round($result[0][0][2], 6),
round($result[0][0][3], 6),
round($result[0][0][4], 2),
round($result[0][0][5], 2),
]
);
$this->assertStringContainsString('Document title', $result[0][1]);

Expand Down Expand Up @@ -827,10 +862,10 @@ public function testIssue454(): void
$this->assertEquals(2, \count($dataTm[0]));
$this->assertIsArray($dataTm[0][0]);
$this->assertEquals(6, \count($dataTm[0][0]));
$this->assertEquals(201.96, $dataTm[0][0][4]);
$this->assertEquals(720.68, $dataTm[0][0][5]);
$this->assertEquals(201.96, round($dataTm[0][0][4], 2));
$this->assertEquals(720.68, round($dataTm[0][0][5], 2));
$this->assertStringContainsString('Document title', $dataTm[0][1]);
$textData = $page->getTextXY(201.96, 720.68);
$textData = $page->getTextXY(201.96, 720.68, 0.01, 0.01);
$this->assertStringContainsString('Document title', $textData[0][1]);
$page = $pages[2];
$dataTm = $page->getDataTm();
Expand Down Expand Up @@ -889,4 +924,38 @@ public function testIssue629WithoutDataTmFontInfo(): void
$this->assertCount(2, $dataTm[0]);
$this->assertFalse(isset($dataTm[0][2]));
}

public function testCmCommandInPdfs(): void
{
$config = new Config();
$parser = $this->getParserInstance($config);
$filename = $this->rootDir.'/samples/Document-Word-Landscape-printedaspdf.pdf';
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
$dataTm = $page->getDataTm();
$item = $dataTm[2];
$this->assertCount(6, $dataTm);
$this->assertCount(2, $item);
$this->assertCount(6, $item[0]);
$this->assertEquals('This is just a test', trim($item[1]));
$this->assertEquals(
[
'0.75',
'0.0',
'0.0',
'0.75',
'59.16',
'500.4',
],
[
round($item[0][0], 6),
round($item[0][1], 6),
round($item[0][2], 6),
round($item[0][3], 6),
round($item[0][4], 2),
round($item[0][5], 2),
]
);
}
}

0 comments on commit 9609711

Please sign in to comment.