diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index cfe85d79..af2d0f9f 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -191,7 +191,7 @@ public function loadTranslateTable(): array // Support for multiple bfchar sections if (preg_match_all('/beginbfchar(?P.*?)endbfchar/s', $content, $matches)) { foreach ($matches['sections'] as $section) { - $regexp = '/<(?P[0-9A-F]+)> +<(?P[0-9A-F]+)>[ \r\n]+/is'; + $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)>[ \r\n]+/is'; preg_match_all($regexp, $section, $matches); diff --git a/src/Smalot/PdfParser/Page.php b/src/Smalot/PdfParser/Page.php index d6ffaf08..0dafbfcc 100644 --- a/src/Smalot/PdfParser/Page.php +++ b/src/Smalot/PdfParser/Page.php @@ -54,6 +54,18 @@ class Page extends PDFObject */ protected $dataTm; + /** + * @param array<\Smalot\PdfParser\Font> $fonts + * + * @internal + */ + public function setFonts($fonts) + { + if (empty($this->fonts)) { + $this->fonts = $fonts; + } + } + /** * @return Font[] */ diff --git a/src/Smalot/PdfParser/Pages.php b/src/Smalot/PdfParser/Pages.php index 6b878650..f95134b1 100644 --- a/src/Smalot/PdfParser/Pages.php +++ b/src/Smalot/PdfParser/Pages.php @@ -40,7 +40,13 @@ class Pages extends PDFObject { /** - * @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them, + * @var array<\Smalot\PdfParser\Font>|null + */ + protected $fonts; + + /** + * @todo Objects other than Pages or Page might need to be treated specifically + * in order to get Page objects out of them. * * @see https://github.com/smalot/pdfparser/issues/331 */ @@ -57,6 +63,12 @@ public function getPages(bool $deep = false): array return $kidsElement->getContent(); } + // Prepare to apply the Pages' object's fonts to each page + if (false === \is_array($this->fonts)) { + $this->setupFonts(); + } + $fontsAvailable = 0 < \count($this->fonts); + $kids = $kidsElement->getContent(); $pages = []; @@ -64,10 +76,56 @@ public function getPages(bool $deep = false): array if ($kid instanceof self) { $pages = array_merge($pages, $kid->getPages(true)); } elseif ($kid instanceof Page) { + if ($fontsAvailable) { + $kid->setFonts($this->fonts); + } $pages[] = $kid; } } return $pages; } + + /** + * Gathers information about fonts and collects them in a list. + * + * @return void + * + * @internal + */ + protected function setupFonts() + { + $resources = $this->get('Resources'); + + if (method_exists($resources, 'has') && $resources->has('Font')) { + // no fonts available, therefore stop here + if ($resources->get('Font') instanceof Element\ElementMissing) { + return; + } + + if ($resources->get('Font') instanceof Header) { + $fonts = $resources->get('Font')->getElements(); + } else { + $fonts = $resources->get('Font')->getHeader()->getElements(); + } + + $table = []; + + foreach ($fonts as $id => $font) { + if ($font instanceof Font) { + $table[$id] = $font; + + // Store too on cleaned id value (only numeric) + $id = preg_replace('/[^0-9\.\-_]/', '', $id); + if ('' != $id) { + $table[$id] = $font; + } + } + } + + $this->fonts = $table; + } else { + $this->fonts = []; + } + } } diff --git a/tests/PHPUnit/Integration/PagesTest.php b/tests/PHPUnit/Integration/PagesTest.php new file mode 100644 index 00000000..fb069c08 --- /dev/null +++ b/tests/PHPUnit/Integration/PagesTest.php @@ -0,0 +1,106 @@ + + * + * @date 2024-04-19 + * + * @license LGPLv3 + * + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace PHPUnitTests\Integration; + +use PHPUnitTests\TestCase; +use Smalot\PdfParser\Document; +use Smalot\PdfParser\Element\ElementArray; +use Smalot\PdfParser\Font; +use Smalot\PdfParser\Header; +use Smalot\PdfParser\Page; +use Smalot\PdfParser\Pages; + +/** + * @internal only for test purposes + */ +class PagesDummy extends Pages +{ + /** + * The purpose of this function is to bypass the tedious + * work to setup instances which lead to a valid $fonts variable. + * + * @param array<\Smalot\PdfParser\Font> $fonts + * + * @return void + */ + public function setFonts($fonts) + { + $this->fonts = $fonts; + } +} + +class PagesTest extends TestCase +{ + public function testFontsArePassedFromPagesToPage(): void + { + // Create mock Document, Font and Page objects + $document = $this->createMock(Document::class); + $font1 = new Font($document); + $page = new Page($document); + + // Create a Header object that indicates $page is a child + $header = new Header([ + 'Kids' => new ElementArray([ + $page, + ]), + ], $document); + + // Use this header to create a mock Pages object + $pages = new PagesDummy($document, $header); + + // Apply $font1 as a Font object to this Pages object; + // setFonts is used here as part of PagesDummy, only to access + // the protected Pages::fonts variable; it is not a method + // available in production + $pages->setFonts([$font1]); + + // Trigger setupFonts method in $pages + $pages->getPages(true); + + // Since the $page object font list is empty, $font1 from Pages + // object must be passed to the Page object + $this->assertEquals([$font1], $page->getFonts()); + + // Create a second $font2 using a different method + $font2 = $this->createMock(Font::class); + + // Update the fonts in $pages + $pages->setFonts([$font1, $font2]); + + // Trigger setupFonts method in $pages + $pages->getPages(true); + + // Now that $page already has a font, updates from $pages + // should not overwrite it + $this->assertEquals([$font1], $page->getFonts()); + } +}