Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make horizontal offset configurable #505

Merged
merged 4 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added samples/bugs/Issue494.pdf
Binary file not shown.
15 changes: 15 additions & 0 deletions src/Smalot/PdfParser/Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ class Config
{
private $fontSpaceLimit = -50;

/**
* @var string
*/
private $horizontalOffset = ' ';

rubenvanerk marked this conversation as resolved.
Show resolved Hide resolved
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
Expand Down Expand Up @@ -78,6 +83,16 @@ public function setFontSpaceLimit($value)
$this->fontSpaceLimit = $value;
}

public function getHorizontalOffset(): string
{
return $this->horizontalOffset;
}

public function setHorizontalOffset($value): void
{
$this->horizontalOffset = $value;
}

public function getPdfWhitespaces(): string
{
return $this->pdfWhitespaces;
Expand Down
3 changes: 1 addition & 2 deletions src/Smalot/PdfParser/PDFObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,7 @@ public function getText(?Page $page = null): string
$current_position_td['x']
)
) {
// horizontal offset
$text .= ' ';
$text .= $this->config->getHorizontalOffset();
rubenvanerk marked this conversation as resolved.
Show resolved Hide resolved
}
$current_position_td = ['x' => $x, 'y' => $y];
break;
Expand Down
55 changes: 55 additions & 0 deletions tests/Integration/ConfigTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <[email protected]>
* @date 2020-06-01
*
* @author Sébastien MALOT <[email protected]>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <[email protected]>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/

namespace Tests\Smalot\PdfParser\Integration;

use Smalot\PdfParser\Config;
use Tests\Smalot\PdfParser\TestCase;

class ConfigTest extends TestCase
{
public function testHorizontalOffset()
{
$filename = $this->rootDir.'/samples/bugs/Issue494.pdf';

$config = new Config();
$config->setHorizontalOffset('');

$parser = $this->getParserInstance($config);
$document = $parser->parseFile($filename);
$text = $document->getText();

$reference = '11 ADET DERGİ İÇİN 3 KALEM HİZMET ALIMI İHALE EDİLECEKTİR ';
$firstLine = explode("\n", $text)[0];
$this->assertEquals($reference, $firstLine);
}
}
5 changes: 3 additions & 2 deletions tests/TestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
namespace Tests\Smalot\PdfParser;

use PHPUnit\Framework\TestCase as PHPTestCase;
use Smalot\PdfParser\Config;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Parser;
Expand Down Expand Up @@ -63,8 +64,8 @@ protected function getElementInstance($value)
return new Element($value);
}

protected function getParserInstance()
protected function getParserInstance(?Config $config = null)
{
return new Parser();
return new Parser([], $config);
}
}
11 changes: 11 additions & 0 deletions tests/Unit/ConfigTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ public function testFontSpaceLimitSetterGetter()
$this->assertEquals(1, $this->fixture->getFontSpaceLimit());
}

/**
* Tests setter and getter for horizontal offset.
*/
public function testHorizontalOffsetSetterGetter()
{
$this->assertEquals(' ', $this->fixture->getHorizontalOffset());

$this->fixture->setHorizontalOffset(' ');
$this->assertEquals(' ', $this->fixture->getHorizontalOffset());
}

/**
* Tests setter and getter for retaining of raw image data.
*/
Expand Down