From 3c2d34c521978fef8b7fc420e5382255a2f05370 Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Tue, 30 Oct 2018 17:09:45 +0100 Subject: [PATCH 1/8] Determine includes and excludes based on scores --- src/IterableCodeExtractor.php | 74 +++++++++-------------------- tests/IterableCodeExtractorTest.php | 51 +++++++++++--------- 2 files changed, 51 insertions(+), 74 deletions(-) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index 5a306489..20404211 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -92,64 +92,36 @@ public static function fromDirectory( $dir, Translations $translations, array $o /** * Determines whether a file is valid based on the include option. * - * @param SplFileInfo $file File or directory. - * @param array $include List of files and directories to include. - * @return bool + * @param SplFileInfo $file File or directory. + * @param array $matchers List of files and directories to match. + * @return int How strongly the file is matched. + * 0 for not matched. + * 1 for matched by wildcard. + * 2 for matched by direct match on filename. + * 3 for matched by direct match on path. */ - protected static function isIncluded( SplFileInfo $file, array $include = [] ) { - if ( empty( $include ) ) { - return true; + protected static function calculateMatchScore( SplFileInfo $file, array $matchers = [] ) { + if ( empty( $matchers ) ) { + return 0; } - if ( in_array( $file->getBasename(), $include, true ) ) { - return true; + if ( in_array( $file->getBasename(), $matchers, true ) ) { + return 2; } // Check for more complex paths, e.g. /some/sub/folder. $root_relative_path = str_replace( static::$dir, '', $file->getPathname() ); - foreach ( $include as $path_or_file ) { - $pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) ); - $pattern = '/' . str_replace( '__wildcard__', '(.+)', $pattern ); - - if ( - false !== mb_ereg( $pattern, $root_relative_path . '$' ) && - false !== mb_ereg( $pattern, $root_relative_path . '/' ) - ) { - return true; - } - } - - return false; - } - - /** - * Determines whether a file is valid based on the exclude option. - * - * @param SplFileInfo $file File or directory. - * @param array $exclude List of files and directories to skip. - * @return bool - */ - protected static function isExcluded( SplFileInfo $file, array $exclude = [] ) { - if ( empty( $exclude ) ) { - return false; - } - if ( in_array( $file->getBasename(), $exclude, true ) ) { - return true; - } - - // Check for more complex paths, e.g. /some/sub/folder. - foreach ( $exclude as $path_or_file ) { + foreach ( $matchers as $path_or_file ) { $pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) ); - $pattern = '/' . str_replace( '__wildcard__', '(.+)', $pattern ) . '$'; + $pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern ) . '($|/)'; - $root_relative_path = str_replace( static::$dir, '', $file->getPathname() ); if ( false !== mb_ereg( $pattern, $root_relative_path ) ) { - return true; + return false === strpos( $path_or_file, '*' ) ? 3 : 1; } } - return false; + return 0; } /** @@ -172,16 +144,16 @@ function ( $file, $key, $iterator ) use ( $include, $exclude, $extensions ) { /** @var RecursiveCallbackFilterIterator $iterator */ /** @var SplFileInfo $file */ - if ( static::isExcluded( $file, $exclude ) && ( empty( $include ) || ! static::isIncluded( $file, $include ) ) ) { - return false; + if ( $iterator->hasChildren() ) { + return true; } - if ( ! static::isIncluded( $file, $include ) && ! $iterator->hasChildren() ) { - return false; - } + // If no $include is passed everything gets the weakest possible matching score. + $inclusion_score = empty( $include ) ? 0.1 : static::calculateMatchScore( $file, $include ); + $exclusion_score = static::calculateMatchScore( $file, $exclude ); - if ( $iterator->hasChildren() ) { - return true; + if ( $inclusion_score === 0 || $exclusion_score > $inclusion_score ) { + return false; } return ( $file->isFile() && in_array( $file->getExtension(), $extensions, true ) ); diff --git a/tests/IterableCodeExtractorTest.php b/tests/IterableCodeExtractorTest.php index 255e495c..a6b58377 100644 --- a/tests/IterableCodeExtractorTest.php +++ b/tests/IterableCodeExtractorTest.php @@ -16,10 +16,15 @@ public function setUp() { * PHP5.4 cannot set property with __DIR__ constant. */ self::$base = __DIR__ . '/data/'; + + $property = new \ReflectionProperty( 'WP_CLI\I18n\IterableCodeExtractor', 'dir' ); + $property->setAccessible( true ); + $property->setValue( null, self::$base ); + $property->setAccessible( false ); } public function test_can_include_files() { - $includes = [ 'foo', 'bar', 'baz/inc*.js' ]; + $includes = [ 'foo-plugin', 'bar', 'baz/inc*.js' ]; $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, [], [ 'php', 'js' ] ); $expected = static::$base . 'foo-plugin/foo-plugin.php'; $this->assertContains( $expected, $result ); @@ -78,31 +83,31 @@ public function test_can_exclude_override_matching_directory() { } public function test_can_not_exclude_partially_directory() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['exc'], [ 'js' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], ['exc'], [ 'js' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/ignored.js'; - //$this->assertContains( $expected_1, $result ); + $this->assertNotContains( $expected_1, $result ); $this->assertContains( $expected_2, $result ); } - public function test_can_exclude_by_wildcard() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], [ '*' ], [ 'php', 'js' ] ); - $this->assertEmpty( $result ); - } - - public function test_can_exclude_files() { - $excludes = [ 'hoge' ]; - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], $excludes, [ 'php', 'js' ] ); - $expected = static::$base . 'hoge/should_NOT_be_included.js'; - $this->assertNotContains( $expected, $result ); - } - - public function test_can_override_exclude_by_include() { - // Overrides include option - $includes = [ 'excluded' ]; - $excludes = [ 'excluded/ignored.js' ]; - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, $excludes, [ 'php', 'js' ] ); - $expected = static::$base . 'foo/bar/excluded/ignored.js'; - $this->assertContains( $expected, $result ); - } +// public function test_can_exclude_by_wildcard() { +// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], [ '*' ], [ 'php', 'js' ] ); +// $this->assertEmpty( $result ); +// } +// +// public function test_can_exclude_files() { +// $excludes = [ 'hoge' ]; +// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], $excludes, [ 'php', 'js' ] ); +// $expected = static::$base . 'hoge/should_NOT_be_included.js'; +// $this->assertNotContains( $expected, $result ); +// } +// +// public function test_can_override_exclude_by_include() { +// // Overrides include option +// $includes = [ 'excluded/ignored.js' ]; +// $excludes = [ 'excluded/*.js' ]; +// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, $excludes, [ 'php', 'js' ] ); +// $expected = static::$base . 'foo/bar/excluded/ignored.js'; +// $this->assertContains( $expected, $result ); +// } } From e5b43b5a87c66c8d5c33deb1a29da98b61439f68 Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Tue, 30 Oct 2018 17:15:10 +0100 Subject: [PATCH 2/8] Uncomment tests. --- tests/IterableCodeExtractorTest.php | 42 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tests/IterableCodeExtractorTest.php b/tests/IterableCodeExtractorTest.php index a6b58377..32222c17 100644 --- a/tests/IterableCodeExtractorTest.php +++ b/tests/IterableCodeExtractorTest.php @@ -83,31 +83,31 @@ public function test_can_exclude_override_matching_directory() { } public function test_can_not_exclude_partially_directory() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], ['exc'], [ 'js' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['exc'], [ 'js' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/ignored.js'; $this->assertNotContains( $expected_1, $result ); $this->assertContains( $expected_2, $result ); } -// public function test_can_exclude_by_wildcard() { -// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], [ '*' ], [ 'php', 'js' ] ); -// $this->assertEmpty( $result ); -// } -// -// public function test_can_exclude_files() { -// $excludes = [ 'hoge' ]; -// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], $excludes, [ 'php', 'js' ] ); -// $expected = static::$base . 'hoge/should_NOT_be_included.js'; -// $this->assertNotContains( $expected, $result ); -// } -// -// public function test_can_override_exclude_by_include() { -// // Overrides include option -// $includes = [ 'excluded/ignored.js' ]; -// $excludes = [ 'excluded/*.js' ]; -// $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, $excludes, [ 'php', 'js' ] ); -// $expected = static::$base . 'foo/bar/excluded/ignored.js'; -// $this->assertContains( $expected, $result ); -// } + public function test_can_exclude_by_wildcard() { + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], [ '*' ], [ 'php', 'js' ] ); + $this->assertEmpty( $result ); + } + + public function test_can_exclude_files() { + $excludes = [ 'hoge' ]; + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [], $excludes, [ 'php', 'js' ] ); + $expected = static::$base . 'hoge/should_NOT_be_included.js'; + $this->assertNotContains( $expected, $result ); + } + + public function test_can_override_exclude_by_include() { + // Overrides include option + $includes = [ 'excluded/ignored.js' ]; + $excludes = [ 'excluded/*.js' ]; + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, $excludes, [ 'php', 'js' ] ); + $expected = static::$base . 'foo/bar/excluded/ignored.js'; + $this->assertContains( $expected, $result ); + } } From 8d2a03d8633d09d989f20aa9c554c235b722075b Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Tue, 30 Oct 2018 17:32:08 +0100 Subject: [PATCH 3/8] Improve matching scores --- src/IterableCodeExtractor.php | 21 +++++++++++++-------- tests/IterableCodeExtractorTest.php | 4 ++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index 20404211..0107f845 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -95,10 +95,6 @@ public static function fromDirectory( $dir, Translations $translations, array $o * @param SplFileInfo $file File or directory. * @param array $matchers List of files and directories to match. * @return int How strongly the file is matched. - * 0 for not matched. - * 1 for matched by wildcard. - * 2 for matched by direct match on filename. - * 3 for matched by direct match on path. */ protected static function calculateMatchScore( SplFileInfo $file, array $matchers = [] ) { if ( empty( $matchers ) ) { @@ -106,7 +102,7 @@ protected static function calculateMatchScore( SplFileInfo $file, array $matcher } if ( in_array( $file->getBasename(), $matchers, true ) ) { - return 2; + return 10; } // Check for more complex paths, e.g. /some/sub/folder. @@ -114,10 +110,19 @@ protected static function calculateMatchScore( SplFileInfo $file, array $matcher foreach ( $matchers as $path_or_file ) { $pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) ); - $pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern ) . '($|/)'; + $pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern ); - if ( false !== mb_ereg( $pattern, $root_relative_path ) ) { - return false === strpos( $path_or_file, '*' ) ? 3 : 1; + $base_score = count( explode( '/', $path_or_file ) ); + + if ( + false === strpos( $path_or_file, '*' ) && + false !== mb_ereg( $pattern . '$', $root_relative_path ) + ) { + return $base_score * 10; + } + + if ( false !== mb_ereg( $pattern . '(/|$)', $root_relative_path ) ) { + return $base_score; } } diff --git a/tests/IterableCodeExtractorTest.php b/tests/IterableCodeExtractorTest.php index 32222c17..213b806e 100644 --- a/tests/IterableCodeExtractorTest.php +++ b/tests/IterableCodeExtractorTest.php @@ -75,7 +75,7 @@ public function test_can_exclude_override_wildcard() { } public function test_can_exclude_override_matching_directory() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['excluded'], [ 'php' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['foo/bar/excluded/*'], [ 'php' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/excluded.js'; $this->assertContains( $expected_1, $result ); @@ -83,7 +83,7 @@ public function test_can_exclude_override_matching_directory() { } public function test_can_not_exclude_partially_directory() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['exc'], [ 'js' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'exc' ], [ 'js' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/ignored.js'; $this->assertNotContains( $expected_1, $result ); From 527bbea38d44552bd5fdcbf34de40e00457f4ef9 Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Tue, 30 Oct 2018 17:36:57 +0100 Subject: [PATCH 4/8] Do not count * as a component --- src/IterableCodeExtractor.php | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index 0107f845..bb07ea3e 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -112,7 +112,16 @@ protected static function calculateMatchScore( SplFileInfo $file, array $matcher $pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) ); $pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern ); - $base_score = count( explode( '/', $path_or_file ) ); + $base_score = count( + array_filter( + explode( '/', $path_or_file ), + function ( $component) { return $component !== '*'; } + ) + ); + if ( $base_score === 0 ) { + $base_score = 0.2; + } + if ( false === strpos( $path_or_file, '*' ) && From a4ac87d57423925451846ca9ed4e89b012d03d1c Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Tue, 30 Oct 2018 17:40:18 +0100 Subject: [PATCH 5/8] Add explanation to 0.2 base score --- src/IterableCodeExtractor.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index bb07ea3e..a7f57d99 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -119,6 +119,7 @@ function ( $component) { return $component !== '*'; } ) ); if ( $base_score === 0 ) { + // If the matcher is simply * it gets a score above the implicit score but below 1. $base_score = 0.2; } From 266e94a0df893918c8a9d1838612f6e71767bf8f Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Wed, 31 Oct 2018 12:41:57 +0100 Subject: [PATCH 6/8] Improve directory matching --- src/IterableCodeExtractor.php | 53 +++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index a7f57d99..bd851d27 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -90,7 +90,7 @@ public static function fromDirectory( $dir, Translations $translations, array $o } /** - * Determines whether a file is valid based on the include option. + * Determines whether a file is valid based on given matchers. * * @param SplFileInfo $file File or directory. * @param array $matchers List of files and directories to match. @@ -118,7 +118,7 @@ protected static function calculateMatchScore( SplFileInfo $file, array $matcher function ( $component) { return $component !== '*'; } ) ); - if ( $base_score === 0 ) { + if ( 0 === $base_score ) { // If the matcher is simply * it gets a score above the implicit score but below 1. $base_score = 0.2; } @@ -139,6 +139,41 @@ function ( $component) { return $component !== '*'; } return 0; } + /** + * Determines whether or not a directory has children that may be matched. + * + * @param SplFileInfo $dir Directory. + * @param array $matchers List of files and directories to match. + * @return bool Whether or not there are any matchers for children of this directory. + */ + protected static function containsMatchingChildren( SplFileInfo $dir, array $matchers = [] ) { + if ( empty( $matchers ) ) { + return false; + } + + $root_relative_path = str_replace( static::$dir, '', $dir->getPathname() ); + + foreach ( $matchers as $path_or_file ) { + if ( + false === strpos( $path_or_file, '*' ) && + 0 === strpos( $path_or_file . '/', $root_relative_path ) + ) { + return true; + } + + $base = current( explode( '*', $path_or_file ) ); + + if ( + 0 === strpos( $base, $root_relative_path ) || + 0 === strpos( $root_relative_path, $base ) + ) { + return true; + } + } + + return false; + } + /** * Recursively gets all PHP files within a directory. * @@ -159,16 +194,18 @@ function ( $file, $key, $iterator ) use ( $include, $exclude, $extensions ) { /** @var RecursiveCallbackFilterIterator $iterator */ /** @var SplFileInfo $file */ - if ( $iterator->hasChildren() ) { - return true; - } - // If no $include is passed everything gets the weakest possible matching score. $inclusion_score = empty( $include ) ? 0.1 : static::calculateMatchScore( $file, $include ); $exclusion_score = static::calculateMatchScore( $file, $exclude ); - if ( $inclusion_score === 0 || $exclusion_score > $inclusion_score ) { - return false; + // Always include directories that aren't excluded. + if ( 0 === $exclusion_score && $iterator->hasChildren() ) { + return true; + } + + if ( 0 === $inclusion_score || $exclusion_score > $inclusion_score ) { + // Always include directories that may have matching children even if they are excluded. + return $iterator->hasChildren() && static::containsMatchingChildren( $file, $include ); } return ( $file->isFile() && in_array( $file->getExtension(), $extensions, true ) ); From 6b50d8edfdc9db6f9205f26ee8432af48d65915f Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Wed, 31 Oct 2018 12:43:11 +0100 Subject: [PATCH 7/8] Improve code style --- tests/IterableCodeExtractorTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/IterableCodeExtractorTest.php b/tests/IterableCodeExtractorTest.php index 213b806e..6f5de4e3 100644 --- a/tests/IterableCodeExtractorTest.php +++ b/tests/IterableCodeExtractorTest.php @@ -67,7 +67,7 @@ public function test_can_include_only_php() { } public function test_can_exclude_override_wildcard() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['foo/bar/excluded/*'], [ 'php' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'foo/bar/excluded/*' ], [ 'php' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/excluded.js'; $this->assertContains( $expected_1, $result ); @@ -75,7 +75,7 @@ public function test_can_exclude_override_wildcard() { } public function test_can_exclude_override_matching_directory() { - $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['foo/bar/excluded/*'], [ 'php' ] ); + $result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'foo/bar/excluded/*' ], [ 'php' ] ); $expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php'; $expected_2 = static::$base . 'foo/bar/excluded/excluded.js'; $this->assertContains( $expected_1, $result ); From a35b16927134f65ec29c3114760c839406e84533 Mon Sep 17 00:00:00 2001 From: Herre Groen Date: Wed, 31 Oct 2018 13:10:29 +0100 Subject: [PATCH 8/8] Add more comments --- src/IterableCodeExtractor.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/IterableCodeExtractor.php b/src/IterableCodeExtractor.php index bd851d27..f2d4930d 100644 --- a/src/IterableCodeExtractor.php +++ b/src/IterableCodeExtractor.php @@ -112,6 +112,7 @@ protected static function calculateMatchScore( SplFileInfo $file, array $matcher $pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) ); $pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern ); + // Base score is the amount of nested directories, discounting wildcards. $base_score = count( array_filter( explode( '/', $path_or_file ), @@ -123,7 +124,7 @@ function ( $component) { return $component !== '*'; } $base_score = 0.2; } - + // If the matcher contains no wildcards and matches the end of the path. if ( false === strpos( $path_or_file, '*' ) && false !== mb_ereg( $pattern . '$', $root_relative_path ) @@ -131,6 +132,7 @@ function ( $component) { return $component !== '*'; } return $base_score * 10; } + // If the matcher matches the end of the path or a full directory contained. if ( false !== mb_ereg( $pattern . '(/|$)', $root_relative_path ) ) { return $base_score; } @@ -154,6 +156,7 @@ protected static function containsMatchingChildren( SplFileInfo $dir, array $mat $root_relative_path = str_replace( static::$dir, '', $dir->getPathname() ); foreach ( $matchers as $path_or_file ) { + // If the matcher contains no wildcards and the path matches the start of the matcher. if ( false === strpos( $path_or_file, '*' ) && 0 === strpos( $path_or_file . '/', $root_relative_path ) @@ -163,6 +166,8 @@ protected static function containsMatchingChildren( SplFileInfo $dir, array $mat $base = current( explode( '*', $path_or_file ) ); + // If start of the path matches the start of the matcher until the first wildcard. + // Or the start of the matcher until the first wildcard matches the start of the path. if ( 0 === strpos( $base, $root_relative_path ) || 0 === strpos( $root_relative_path, $base )