diff --git a/samples/bugs/Issue665.pdf b/samples/bugs/Issue665.pdf new file mode 100644 index 00000000..b72d3e33 Binary files /dev/null and b/samples/bugs/Issue665.pdf differ diff --git a/src/Smalot/PdfParser/Encoding.php b/src/Smalot/PdfParser/Encoding.php index 6018eec8..511411b8 100644 --- a/src/Smalot/PdfParser/Encoding.php +++ b/src/Smalot/PdfParser/Encoding.php @@ -145,6 +145,12 @@ protected function getEncodingClass(): string { // Load reference table charset. $baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent()); + + // Check for empty BaseEncoding field value + if (!\is_string($baseEncoding) || 0 == \strlen($baseEncoding)) { + $baseEncoding = 'StandardEncoding'; + } + $className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding; if (!class_exists($className)) { diff --git a/tests/PHPUnit/Integration/EncodingTest.php b/tests/PHPUnit/Integration/EncodingTest.php index 766bb554..aa01fac3 100644 --- a/tests/PHPUnit/Integration/EncodingTest.php +++ b/tests/PHPUnit/Integration/EncodingTest.php @@ -42,9 +42,17 @@ use Smalot\PdfParser\Encoding\StandardEncoding; use Smalot\PdfParser\Exception\EncodingNotFoundException; use Smalot\PdfParser\Header; +use Smalot\PdfParser\Parser; class EncodingTest extends TestCase { + protected function setUp(): void + { + parent::setUp(); + + $this->fixture = new Parser(); + } + public function testGetEncodingClass(): void { $header = new Header(['BaseEncoding' => new Element('StandardEncoding')]); @@ -103,4 +111,20 @@ public function testToStringGetEncodingClassMissingClassException(): void $encoding->__toString(); } } + + /** + * Fall back to 'StandardEncoding' when the document has none + * + * @see https://github.com/smalot/pdfparser/issues/665 + */ + public function testEmptyBaseEncodingFallback(): void + { + $filename = $this->rootDir.'/samples/bugs/Issue665.pdf'; + + $document = $this->fixture->parseFile($filename); + $objects = $document->getObjects(); + + $this->assertEquals(25, \count($objects)); + $this->assertArrayHasKey('3_0', $objects); + } }