-
Notifications
You must be signed in to change notification settings - Fork 541
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Fix for #434. Reworked the Document's object cache dictionary. The getObjectsByType() method now uses it correctly. The dictionary also should support subtype searches. Only one font is asked for and returned to get the default font. * Added type declarations. * Testing performance test workflow * Testing performance test workflow * Testing performance test workflow * Testing performance test workflow * Testing performance test workflow * Testing performance test workflow * Added performance testing as requested for PR to fix the issue #434 * Style fix * File require fix. * File require fix. Could not get autoload to work. * GitHub performance is lower than in localhost. * Style fix * Performance tests GitHub Action name change. * Autoload test (pretty sure this did not work before). * Yep, autoload does not work. Revert. * Performance tests run name change. * Removed unnecessary PHPDocs and refactored methods to use Type Declarations instead when able. * Style fix. * Performance test also succeeds, when time is exactly the same as required (although this will likely never happen). * More PHPDoc removal in favour of Type Declarations. * Document cache dictionary performance test tweak. * Removed unused parameters. * Another Type Declarations fix. * Another Type Declarations fix. * Autoload test with composer update. * Autoload test with composer update. * Added the thesis document used in the document cache dictionary performance test to the repository. The author gave his approval. * Automatic code style fix. Co-authored-by: vagrant <[email protected]>
- Loading branch information
Showing
12 changed files
with
213 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
name: "Performance Tests" | ||
|
||
on: | ||
pull_request: | ||
push: | ||
branches: | ||
- "master" | ||
|
||
env: | ||
fail-fast: true | ||
|
||
jobs: | ||
performance-tests: | ||
name: "Tests for the performance testing the PDF parsing" | ||
runs-on: "ubuntu-20.04" | ||
|
||
strategy: | ||
matrix: | ||
php: | ||
- "7.4" | ||
|
||
steps: | ||
- name: "Checkout" | ||
uses: "actions/checkout@v2" | ||
|
||
- name: "Run composer for further autoloading" | ||
run: "composer update" | ||
|
||
- name: "Run performance tests" | ||
run: "php tests/Performance/runPerformanceTests.php" |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
<?php | ||
|
||
namespace Tests\Smalot\PdfParser\Performance\Exception; | ||
|
||
class PerformanceFailException extends \Exception | ||
{ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?php | ||
|
||
namespace Tests\Smalot\PdfParser\Performance\Test; | ||
|
||
abstract class AbstractPerformanceTest | ||
{ | ||
/** | ||
* Initializes the test (eg, fetches the files etc). | ||
*/ | ||
abstract public function init(): void; | ||
|
||
/** | ||
* Executes the test. | ||
*/ | ||
abstract public function run(): void; | ||
|
||
/** | ||
* Returns the time over which the test is considered a fail. | ||
*/ | ||
abstract public function getMaxEstimatedTime(): int; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
<?php | ||
|
||
/** | ||
* @file This file is part of the PdfParser library. | ||
* | ||
* @author Konrad Abicht <[email protected]> | ||
* @date 2020-06-01 | ||
* | ||
* @author Sébastien MALOT <[email protected]> | ||
* @date 2017-01-03 | ||
* | ||
* @license LGPLv3 | ||
* @url <https://github.com/smalot/pdfparser> | ||
* | ||
* PdfParser is a pdf library written in PHP, extraction oriented. | ||
* Copyright (C) 2017 - Sébastien MALOT <[email protected]> | ||
* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this program. | ||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>. | ||
*/ | ||
|
||
namespace Tests\Smalot\PdfParser\Performance\Test; | ||
|
||
use Smalot\PdfParser\Page; | ||
use Smalot\PdfParser\Parser; | ||
|
||
/** | ||
* This test checks does a performance test with certain PDF files that extensively use | ||
* the getFirstFont() method of Document.php. If Document.php correctly uses a dictionary | ||
* to cache the objects inside the PDF file, then the parsing should be quick. | ||
* If it does not, the parsing can be extensively slow or even crash. | ||
*/ | ||
class DocumentDictionaryCacheTest extends AbstractPerformanceTest | ||
{ | ||
/** | ||
* @var Parser | ||
*/ | ||
protected $parser; | ||
protected $data; | ||
|
||
public function init(): void | ||
{ | ||
$this->parser = new Parser(); | ||
|
||
// load PDF file content | ||
$this->data = file_get_contents(__DIR__.'/../../../samples/DocumentWithLotsOfObjects.pdf'); | ||
} | ||
|
||
public function run(): void | ||
{ | ||
// give PDF content to function and parse it | ||
$pdf = $this->parser->parseContent($this->data); | ||
|
||
$pages = $pdf->getPages(); | ||
|
||
foreach ($pages as $i => $page) { /* @var $page Page */ | ||
if ($i < 77) { | ||
continue; | ||
} | ||
if ($i > 78) { | ||
continue; | ||
} | ||
|
||
$page->getText(); // Test this method | ||
} | ||
} | ||
|
||
public function getMaxEstimatedTime(): int | ||
{ | ||
return 20; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?php | ||
|
||
require __DIR__.'/../../vendor/autoload.php'; | ||
|
||
$tests = [ | ||
new \Tests\Smalot\PdfParser\Performance\Test\DocumentDictionaryCacheTest(), | ||
]; | ||
|
||
foreach ($tests as $test) { /* @var $test \Tests\Smalot\PdfParser\Performance\Test\AbstractPerformanceTest */ | ||
$test->init(); | ||
|
||
$startTime = microtime(true); | ||
$test->run(); | ||
$endTime = microtime(true); | ||
|
||
$time = $endTime - $startTime; | ||
|
||
if ($test->getMaxEstimatedTime() <= $time) { | ||
throw new \Tests\Smalot\PdfParser\Performance\Exception\PerformanceFailException(sprintf('Performance failed on test "%s". Time taken was %.2f seconds, expected less than %d seconds.', get_class($test), $time, $test->getMaxEstimatedTime())); | ||
} | ||
} |