Skip to content

Commit

Permalink
hotfix for smalot#359 and smalot#360: fallback for glyphs not in the …
Browse files Browse the repository at this point in the history
…postscript lookup table
  • Loading branch information
Connum committed Oct 26, 2020
1 parent 722061c commit 0873d21
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
Binary file added samples/bugs/Issue359.pdf
Binary file not shown.
8 changes: 7 additions & 1 deletion src/Smalot/PdfParser/Encoding/PostScriptGlyphs.php
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,12 @@ public static function getGlyphs()

public static function getCodePoint($glyph)
{
return hexdec(static::getGlyphs()[$glyph]);
$glyphsMap = static::getGlyphs();

if (isset($glyphsMap[$glyph])) {
return hexdec($glyphsMap[$glyph]);
}

return $glyph;
}
}
23 changes: 23 additions & 0 deletions tests/Integration/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,29 @@ public function testIssue334()

$this->assertStringContainsString('This question already has an answer here', $document->getText());
}

/**
* Test that issue related pdf can now be parsed:
* Glyphs not in the Postscript lookup table would cause "Notice: Undefined offset"
*
* Note that some characters are missing from the parsed text or wrongly decoded,
* like the "ł" in przepływu or "ó" in osób. This was already the case before the
* PR that caused this issue and is not currently covered by this test case.
* However, these issues should be addressed in the future and their fixes can
* then be incorporated into this test by uncommenting the two assertions below.
*
* @see https://github.com/smalot/pdfparser/issues/359
*/
public function testIssue359()
{
$filename = $this->rootDir.'/samples/bugs/Issue359.pdf';

$document = $this->fixture->parseFile($filename);

$this->assertStringContainsString('dnia 10 maja 2018 roku o ochronie danych osobowych', $document->getText());
// $this->assertStringContainsString('sprawie ochrony osób fizycznych w związku', $document->getText());
// $this->assertStringContainsString('sprawie swobodnego przepływu takich danych oraz uchylenia dyrektywy', $document->getText());
}
}

class ParserSub extends Parser
Expand Down

0 comments on commit 0873d21

Please sign in to comment.