diff --git a/src/Smalot/PdfParser/RawData/RawDataParser.php b/src/Smalot/PdfParser/RawData/RawDataParser.php index 1a4583c0..ec8f600b 100644 --- a/src/Smalot/PdfParser/RawData/RawDataParser.php +++ b/src/Smalot/PdfParser/RawData/RawDataParser.php @@ -901,8 +901,15 @@ protected function getXrefData(string $pdfData, int $offset = 0, array $xref = [ // Cross-Reference $xref = $this->decodeXref($pdfData, $startxref, $xref); } else { - // Cross-Reference Stream - $xref = $this->decodeXrefStream($pdfData, $startxref, $xref); + // Check if the $pdfData might have the wrong line-endings + $pdfDataUnix = str_replace("\r\n", "\n", $pdfData); + if ($startxref < \strlen($pdfDataUnix) && strpos($pdfDataUnix, 'xref', $startxref) == $startxref) { + // Return Unix-line-ending flag + $xref = ['Unix' => true]; + } else { + // Cross-Reference Stream + $xref = $this->decodeXrefStream($pdfData, $startxref, $xref); + } } if (empty($xref)) { throw new \Exception('Unable to find xref'); @@ -937,6 +944,12 @@ public function parseData(string $data): array // get xref and trailer data $xref = $this->getXrefData($pdfData); + // If we found Unix line-endings + if (isset($xref['Unix'])) { + $pdfData = str_replace("\r\n", "\n", $pdfData); + $xref = $this->getXrefData($pdfData); + } + // parse all document objects $objects = []; foreach ($xref['xref'] as $obj => $offset) { diff --git a/tests/PHPUnit/Integration/FontTest.php b/tests/PHPUnit/Integration/FontTest.php index b07bbf76..599a4203 100644 --- a/tests/PHPUnit/Integration/FontTest.php +++ b/tests/PHPUnit/Integration/FontTest.php @@ -294,9 +294,6 @@ public function testDecodeUnicode(): void $this->assertEquals('AB', Font::decodeUnicode("\xFE\xFF\x00A\x00B")); } - /** - * @group linux-only - */ public function testDecodeText(): void { $filename = $this->rootDir.'/samples/Document1_pdfcreator_nocompressed.pdf';