Skip to content

Commit

Permalink
Fix for smalot#434. Reworked the Document's object cache dictionary. …
Browse files Browse the repository at this point in the history
…The getObjectsByType() method now uses it correctly. The dictionary also should support subtype searches. Only one font is asked for and returned to get the default font.
  • Loading branch information
jee7 committed Jun 13, 2021
1 parent b32bb7a commit 874af8e
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 17 deletions.
62 changes: 48 additions & 14 deletions src/Smalot/PdfParser/Document.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,26 @@ protected function buildDictionary()
$this->dictionary = [];

foreach ($this->objects as $id => $object) {
// Cache objects by type and subtype
$type = $object->getHeader()->get('Type')->getContent();

if (!empty($type)) {
$this->dictionary[$type][$id] = $id;
if (null != $type) {
if (!isset($this->dictionary[$type])) {
$this->dictionary[$type] = [
'all' => [],
'subtype' => [],
];
}

$this->dictionary[$type]['all'][$id] = $object;

$subtype = $object->getHeader()->get('Subtype')->getContent();
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
$this->dictionary[$type]['subtype'][$subtype] = [];
}
$this->dictionary[$type]['subtype'][$subtype][$id] = $object;
}
}
}
}
Expand Down Expand Up @@ -169,6 +185,11 @@ public function getObjectById($id)
return null;
}

public function hasObjectsByType($type, $subtype = null)
{
return 0 < \count($this->getObjectsByType($type, $subtype));
}

/**
* @param string $type
* @param string $subtype
Expand All @@ -177,17 +198,19 @@ public function getObjectById($id)
*/
public function getObjectsByType($type, $subtype = null)
{
$objects = [];
if (!isset($this->dictionary[$type])) {
return [];
}

foreach ($this->objects as $id => $object) {
if ($object->getHeader()->get('Type') == $type &&
(null === $subtype || $object->getHeader()->get('Subtype') == $subtype)
) {
$objects[$id] = $object;
if (null != $subtype) {
if (!isset($this->dictionary[$type]['subtype'][$subtype])) {
return [];
}

return $this->dictionary[$type]['subtype'][$subtype];
}

return $objects;
return $this->dictionary[$type]['all'];
}

/**
Expand All @@ -198,25 +221,36 @@ public function getFonts()
return $this->getObjectsByType('Font');
}

/**
* @return PDFObject
*/
public function getFirstFont()
{
$fonts = $this->getFonts();

return reset($fonts);
}

/**
* @return Page[]
*
* @throws \Exception
*/
public function getPages()
{
if (isset($this->dictionary['Catalog'])) {
if ($this->hasObjectsByType('Catalog')) {
// Search for catalog to list pages.
$id = reset($this->dictionary['Catalog']);
$catalogues = $this->getObjectsByType('Catalog');
$catalogue = reset($catalogues);

/** @var Pages $object */
$object = $this->objects[$id]->get('Pages');
$object = $catalogue->get('Pages');
if (method_exists($object, 'getPages')) {
return $object->getPages(true);
}
}

if (isset($this->dictionary['Pages'])) {
if ($this->hasObjectsByType('Pages')) {
// Search for pages to list kids.
$pages = [];

Expand All @@ -229,7 +263,7 @@ public function getPages()
return $pages;
}

if (isset($this->dictionary['Page'])) {
if ($this->hasObjectsByType('Page')) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');

Expand Down
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/PDFObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ private function getDefaultFont(Page $page = null)
$fonts = $page->getFonts();
}

$fonts = array_merge($fonts, array_values($this->document->getFonts()));
$fonts[] = $this->document->getFirstFont();

if (\count($fonts) > 0) {
return reset($fonts);
Expand Down
4 changes: 2 additions & 2 deletions tests/Integration/DocumentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ public function testDictionary()

$objects = $document->getDictionary();
$this->assertEquals(1, \count($objects));
$this->assertEquals(1, \count($objects['Page']));
$this->assertEquals(2, $objects['Page'][2]);
$this->assertEquals(1, \count($objects['Page']['all']));
$this->assertEquals($object2, $objects['Page']['all'][2]);
}

public function testGetObjectsByType()
Expand Down

0 comments on commit 874af8e

Please sign in to comment.