Skip to content

Commit

Permalink
Merge branch 'master' into feature/php-8.4-support
Browse files Browse the repository at this point in the history
  • Loading branch information
k00ni committed Oct 24, 2024
2 parents fabd628 + 15da82a commit 304cd3d
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 11 deletions.
4 changes: 1 addition & 3 deletions .php-cs-fixer.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
$config = new Config();
$config
->setFinder($finder)
->setRiskyAllowed(true)
->setRules([
'@Symfony' => true,
'@Symfony:risky' => true,
'@PSR12' => true,
'array_syntax' => ['syntax' => 'short'],
'no_empty_phpdoc' => true,
'no_unused_imports' => true,
Expand Down
Binary file added samples/bugs/Issue727.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/Document.php
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ public function extractXMPMetadata(string $content): void
if ('rdf:li' == $val['tag']) {
$metadata[] = $val['value'];

// Else assign a value to this property
// Else assign a value to this property
} else {
$metadata[$val['tag']] = $val['value'];
}
Expand Down
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/PDFObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ public function getSectionsText(?string $content): array
$inTextBlock = true;
$sections[] = $line;

// If an 'ET' is encountered, unset the $inTextBlock flag
// If an 'ET' is encountered, unset the $inTextBlock flag
} elseif ('ET' == $line) {
$inTextBlock = false;
$sections[] = $line;
Expand Down
18 changes: 12 additions & 6 deletions src/Smalot/PdfParser/RawData/RawDataParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,11 @@ protected function decodeXref(string $pdfData, int $startxref, array $xref = [])
}
}
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) $matches[1], $xref);
$offset = (int) $matches[1];
if (0 != $offset) {
// get previous xref
$xref = $this->getXrefData($pdfData, $offset, $xref);
}
}
} else {
throw new \Exception('Unable to find trailer');
Expand Down Expand Up @@ -264,7 +267,8 @@ protected function decodeXrefStream(string $pdfData, int $startxref, array $xref
if (
('/' == $v[0])
&& ('Type' == $v[1])
&& (isset($sarr[$k + 1])
&& (
isset($sarr[$k + 1])
&& '/' == $sarr[$k + 1][0]
&& 'XRef' == $sarr[$k + 1][1]
)
Expand All @@ -290,15 +294,17 @@ protected function decodeXrefStream(string $pdfData, int $startxref, array $xref
if (
'/' == $vdc[0]
&& 'Columns' == $vdc[1]
&& (isset($decpar[$kdc + 1])
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
$columns = (int) $decpar[$kdc + 1][1];
} elseif (
'/' == $vdc[0]
&& 'Predictor' == $vdc[1]
&& (isset($decpar[$kdc + 1])
&& (
isset($decpar[$kdc + 1])
&& 'numeric' == $decpar[$kdc + 1][0]
)
) {
Expand Down Expand Up @@ -404,7 +410,7 @@ protected function decodeXrefStream(string $pdfData, int $startxref, array $xref
}
$prev_row = $ddata[$k];
} // end for each row
// complete decoding
// complete decoding
} else {
// number of bytes in a row
$rowlen = array_sum($wb);
Expand Down
19 changes: 19 additions & 0 deletions tests/PHPUnit/Integration/RawData/RawDataParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,23 @@ public function testGetXrefDataIssue673(): void

self::assertStringContainsString('6 rue des Goutais', $text);
}

/**
* Handle self referencing xref
*
* It seems that some PDF creators output `Prev 0` when there is no previous xref.
*
* @see https://github.com/smalot/pdfparser/pull/727
*/
public function testDecodeXrefIssue727(): void
{
$filename = $this->rootDir.'/samples/bugs/Issue727.pdf';

// Parsing this document would previously cause an infinite loop
$parser = $this->getParserInstance();
$document = $parser->parseFile($filename);
$text = $document->getText();

self::assertStringContainsString('', $text);
}
}

0 comments on commit 304cd3d

Please sign in to comment.