Skip to content

Commit

Permalink
allow for line breaks when splitting xrefs for id and position (smalo…
Browse files Browse the repository at this point in the history
…t#345)

* allow for line breaks when splitting xrefs for id and position

* extend TestCase.php with functionality to "catch" E_NOTICE and E_WARNING

* added test case for this fix

* only reset error handler when the current handler is the handler we had set before

* work around for failing CI build with PHP 5.6

* added comment and link to the workaround getting the current error handler

* removed unnecessary ini_set call

* remove error level constant name before error message

* restore error from the error handler itself, to prevent PHPUnit's "THE ERROR HANDLER HAS CHANGED!" message

* reverse the changes made to the TestCase class and the code in the test case depending on it

* simplified test case, now checking if object has been parsed correctly

* code linting
  • Loading branch information
Connum authored and partulaj committed Dec 21, 2020
1 parent 5e8cea5 commit 1624079
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ protected function parseObject($id, $structure, $document)
$table = [];

foreach ($xrefs as $xref) {
list($id, $position) = explode(' ', trim($xref));
list($id, $position) = preg_split("/\s+/", trim($xref));
$table[$position] = $id;
}

Expand Down
56 changes: 56 additions & 0 deletions tests/Integration/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
namespace Tests\Smalot\PdfParser\Integration;

use Exception;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Parser;
use Smalot\PdfParser\XObject\Image;
use Tests\Smalot\PdfParser\TestCase;
Expand Down Expand Up @@ -77,6 +78,48 @@ public function testParseFile()
}
}

/**
* Tests that xrefs with line breaks between id and position are parsed correctly
*
* @see https://github.com/smalot/pdfparser/issues/336
*/
public function testIssue19()
{
$fixture = new ParserSub();
$structure = [
[
'<<',
[
[
'/',
'Type',
7735,
],
[
'/',
'ObjStm',
7742,
],
],
],
[
'stream',
'',
7804,
[
"17\n0",
[],
],
],
];
$document = new Document();

$fixture->exposedParseObject('19_0', $structure, $document);
$objects = $fixture->getObjects();

$this->assertArrayHasKey('17_0', $objects);
}

/**
* Test that issue related pdf can now be parsed
*
Expand Down Expand Up @@ -127,3 +170,16 @@ public function testIssue334()
$this->assertStringContainsString('This question already has an answer here', $document->getText());
}
}

class ParserSub extends Parser
{
public function exposedParseObject($id, $structure, $document)
{
return $this->parseObject($id, $structure, $document);
}

public function getObjects()
{
return $this->objects;
}
}

0 comments on commit 1624079

Please sign in to comment.