Skip to content

Commit

Permalink
Baseencoding fallback (#669)
Browse files Browse the repository at this point in the history
* Use 'StandardEncoding' as default for BaseEncoding

When a document doesn't include a BaseEncoding, 'StandardEncoding' should be assumed as the default instead of an empty string.

* PHP-CS-Fixer edits

* Strict check for preg_replace output
  • Loading branch information
GreyWyvern authored Feb 2, 2024
1 parent 4b70df1 commit 4db3b81
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 0 deletions.
Binary file added samples/bugs/Issue665.pdf
Binary file not shown.
6 changes: 6 additions & 0 deletions src/Smalot/PdfParser/Encoding.php
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ protected function getEncodingClass(): string
{
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());

// Check for empty BaseEncoding field value
if (!\is_string($baseEncoding) || 0 == \strlen($baseEncoding)) {
$baseEncoding = 'StandardEncoding';
}

$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;

if (!class_exists($className)) {
Expand Down
24 changes: 24 additions & 0 deletions tests/PHPUnit/Integration/EncodingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@
use Smalot\PdfParser\Encoding\StandardEncoding;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Parser;

class EncodingTest extends TestCase
{
protected function setUp(): void
{
parent::setUp();

$this->fixture = new Parser();
}

public function testGetEncodingClass(): void
{
$header = new Header(['BaseEncoding' => new Element('StandardEncoding')]);
Expand Down Expand Up @@ -103,4 +111,20 @@ public function testToStringGetEncodingClassMissingClassException(): void
$encoding->__toString();
}
}

/**
* Fall back to 'StandardEncoding' when the document has none
*
* @see https://github.com/smalot/pdfparser/issues/665
*/
public function testEmptyBaseEncodingFallback(): void
{
$filename = $this->rootDir.'/samples/bugs/Issue665.pdf';

$document = $this->fixture->parseFile($filename);
$objects = $document->getObjects();

$this->assertEquals(25, \count($objects));
$this->assertArrayHasKey('3_0', $objects);
}
}

0 comments on commit 4db3b81

Please sign in to comment.