diff --git a/samples/bugs/Issue494.pdf b/samples/bugs/Issue494.pdf new file mode 100644 index 00000000..7c4bfbd4 Binary files /dev/null and b/samples/bugs/Issue494.pdf differ diff --git a/src/Smalot/PdfParser/Config.php b/src/Smalot/PdfParser/Config.php index 8bd1624e..e732030d 100644 --- a/src/Smalot/PdfParser/Config.php +++ b/src/Smalot/PdfParser/Config.php @@ -40,6 +40,11 @@ class Config { private $fontSpaceLimit = -50; + /** + * @var string + */ + private $horizontalOffset = ' '; + /** * Represents: (NUL, HT, LF, FF, CR, SP) * @@ -78,6 +83,16 @@ public function setFontSpaceLimit($value) $this->fontSpaceLimit = $value; } + public function getHorizontalOffset(): string + { + return $this->horizontalOffset; + } + + public function setHorizontalOffset($value): void + { + $this->horizontalOffset = $value; + } + public function getPdfWhitespaces(): string { return $this->pdfWhitespaces; diff --git a/src/Smalot/PdfParser/PDFObject.php b/src/Smalot/PdfParser/PDFObject.php index 2bce9ce6..fdab1363 100644 --- a/src/Smalot/PdfParser/PDFObject.php +++ b/src/Smalot/PdfParser/PDFObject.php @@ -280,8 +280,7 @@ public function getText(?Page $page = null): string $current_position_td['x'] ) ) { - // horizontal offset - $text .= ' '; + $text .= $this->config->getHorizontalOffset(); } $current_position_td = ['x' => $x, 'y' => $y]; break; diff --git a/tests/Integration/ConfigTest.php b/tests/Integration/ConfigTest.php new file mode 100644 index 00000000..2eda4242 --- /dev/null +++ b/tests/Integration/ConfigTest.php @@ -0,0 +1,55 @@ + + * @date 2020-06-01 + * + * @author Sébastien MALOT + * @date 2017-01-03 + * + * @license LGPLv3 + * @url + * + * PdfParser is a pdf library written in PHP, extraction oriented. + * Copyright (C) 2017 - Sébastien MALOT + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. + * If not, see . + */ + +namespace Tests\Smalot\PdfParser\Integration; + +use Smalot\PdfParser\Config; +use Tests\Smalot\PdfParser\TestCase; + +class ConfigTest extends TestCase +{ + public function testHorizontalOffset() + { + $filename = $this->rootDir.'/samples/bugs/Issue494.pdf'; + + $config = new Config(); + $config->setHorizontalOffset(''); + + $parser = $this->getParserInstance($config); + $document = $parser->parseFile($filename); + $text = $document->getText(); + + $reference = '11 ADET DERGİ İÇİN 3 KALEM HİZMET ALIMI İHALE EDİLECEKTİR '; + $firstLine = explode("\n", $text)[0]; + $this->assertEquals($reference, $firstLine); + } +} diff --git a/tests/TestCase.php b/tests/TestCase.php index a4bdca2e..fa5f51f9 100644 --- a/tests/TestCase.php +++ b/tests/TestCase.php @@ -33,6 +33,7 @@ namespace Tests\Smalot\PdfParser; use PHPUnit\Framework\TestCase as PHPTestCase; +use Smalot\PdfParser\Config; use Smalot\PdfParser\Document; use Smalot\PdfParser\Element; use Smalot\PdfParser\Parser; @@ -63,8 +64,8 @@ protected function getElementInstance($value) return new Element($value); } - protected function getParserInstance() + protected function getParserInstance(?Config $config = null) { - return new Parser(); + return new Parser([], $config); } } diff --git a/tests/Unit/ConfigTest.php b/tests/Unit/ConfigTest.php index 0b0ef77f..e2419294 100644 --- a/tests/Unit/ConfigTest.php +++ b/tests/Unit/ConfigTest.php @@ -52,6 +52,17 @@ public function testFontSpaceLimitSetterGetter() $this->assertEquals(1, $this->fixture->getFontSpaceLimit()); } + /** + * Tests setter and getter for horizontal offset. + */ + public function testHorizontalOffsetSetterGetter() + { + $this->assertEquals(' ', $this->fixture->getHorizontalOffset()); + + $this->fixture->setHorizontalOffset(' '); + $this->assertEquals(' ', $this->fixture->getHorizontalOffset()); + } + /** * Tests setter and getter for retaining of raw image data. */