Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Determine includes and excludes based on scores #104

Merged
merged 8 commits into from
Oct 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 66 additions & 37 deletions src/IterableCodeExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,61 +90,88 @@ public static function fromDirectory( $dir, Translations $translations, array $o
}

/**
* Determines whether a file is valid based on the include option.
* Determines whether a file is valid based on given matchers.
*
* @param SplFileInfo $file File or directory.
* @param array $include List of files and directories to include.
* @return bool
* @param SplFileInfo $file File or directory.
* @param array $matchers List of files and directories to match.
* @return int How strongly the file is matched.
*/
protected static function isIncluded( SplFileInfo $file, array $include = [] ) {
if ( empty( $include ) ) {
return true;
protected static function calculateMatchScore( SplFileInfo $file, array $matchers = [] ) {
if ( empty( $matchers ) ) {
return 0;
}

if ( in_array( $file->getBasename(), $include, true ) ) {
return true;
if ( in_array( $file->getBasename(), $matchers, true ) ) {
return 10;
}

// Check for more complex paths, e.g. /some/sub/folder.
$root_relative_path = str_replace( static::$dir, '', $file->getPathname() );
foreach ( $include as $path_or_file ) {

foreach ( $matchers as $path_or_file ) {
$pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) );
$pattern = '/' . str_replace( '__wildcard__', '(.+)', $pattern );
$pattern = '(^|/)' . str_replace( '__wildcard__', '(.+)', $pattern );

// Base score is the amount of nested directories, discounting wildcards.
$base_score = count(
array_filter(
explode( '/', $path_or_file ),
function ( $component) { return $component !== '*'; }
)
);
if ( 0 === $base_score ) {
// If the matcher is simply * it gets a score above the implicit score but below 1.
$base_score = 0.2;
}

// If the matcher contains no wildcards and matches the end of the path.
if (
false !== mb_ereg( $pattern, $root_relative_path . '$' ) &&
false !== mb_ereg( $pattern, $root_relative_path . '/' )
false === strpos( $path_or_file, '*' ) &&
false !== mb_ereg( $pattern . '$', $root_relative_path )
) {
return true;
return $base_score * 10;
}

// If the matcher matches the end of the path or a full directory contained.
if ( false !== mb_ereg( $pattern . '(/|$)', $root_relative_path ) ) {
return $base_score;
}
}

return false;
return 0;
}

/**
* Determines whether a file is valid based on the exclude option.
* Determines whether or not a directory has children that may be matched.
*
* @param SplFileInfo $file File or directory.
* @param array $exclude List of files and directories to skip.
* @return bool
* @param SplFileInfo $dir Directory.
* @param array $matchers List of files and directories to match.
* @return bool Whether or not there are any matchers for children of this directory.
*/
protected static function isExcluded( SplFileInfo $file, array $exclude = [] ) {
if ( empty( $exclude ) ) {
protected static function containsMatchingChildren( SplFileInfo $dir, array $matchers = [] ) {
if ( empty( $matchers ) ) {
return false;
}

if ( in_array( $file->getBasename(), $exclude, true ) ) {
return true;
}
$root_relative_path = str_replace( static::$dir, '', $dir->getPathname() );

// Check for more complex paths, e.g. /some/sub/folder.
foreach ( $exclude as $path_or_file ) {
$pattern = preg_quote( str_replace( '*', '__wildcard__', $path_or_file ) );
$pattern = '/' . str_replace( '__wildcard__', '(.+)', $pattern ) . '$';
foreach ( $matchers as $path_or_file ) {
// If the matcher contains no wildcards and the path matches the start of the matcher.
if (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to have a short comment here describing what is being checked. Maybe something like this:

Suggested change
if (
// Without a wildcard, we can directly check the folder hierarchy.
if (

false === strpos( $path_or_file, '*' ) &&
0 === strpos( $path_or_file . '/', $root_relative_path )
) {
return true;
}

$root_relative_path = str_replace( static::$dir, '', $file->getPathname() );
if ( false !== mb_ereg( $pattern, $root_relative_path ) ) {
$base = current( explode( '*', $path_or_file ) );

// If start of the path matches the start of the matcher until the first wildcard.
// Or the start of the matcher until the first wildcard matches the start of the path.
if (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a comment as well. I'm not 100% sure what the reason is for checking both ways, I assume this is done for being overly conservative. This certainly needs an explanation.

Copy link
Contributor Author

@herregroen herregroen Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed for partial wildcards like this:

Given an include wildcard of: dir/wp-* any directory starting with dir/wp- should always be checked. We're checking if the directory starts with the matcher.

Whereas for normal wildcards the check is the other way around: one/two/three/*.js means that a directory called one should be checked to see if the matcher starts with the directory.

0 === strpos( $base, $root_relative_path ) ||
0 === strpos( $root_relative_path, $base )
) {
return true;
}
}
Expand Down Expand Up @@ -172,16 +199,18 @@ function ( $file, $key, $iterator ) use ( $include, $exclude, $extensions ) {
/** @var RecursiveCallbackFilterIterator $iterator */
/** @var SplFileInfo $file */

if ( static::isExcluded( $file, $exclude ) && ( empty( $include ) || ! static::isIncluded( $file, $include ) ) ) {
return false;
}
// If no $include is passed everything gets the weakest possible matching score.
$inclusion_score = empty( $include ) ? 0.1 : static::calculateMatchScore( $file, $include );
$exclusion_score = static::calculateMatchScore( $file, $exclude );

if ( ! static::isIncluded( $file, $include ) && ! $iterator->hasChildren() ) {
return false;
// Always include directories that aren't excluded.
if ( 0 === $exclusion_score && $iterator->hasChildren() ) {
return true;
}

if ( $iterator->hasChildren() ) {
return true;
if ( 0 === $inclusion_score || $exclusion_score > $inclusion_score ) {
// Always include directories that may have matching children even if they are excluded.
return $iterator->hasChildren() && static::containsMatchingChildren( $file, $include );
}

return ( $file->isFile() && in_array( $file->getExtension(), $extensions, true ) );
Expand Down
19 changes: 12 additions & 7 deletions tests/IterableCodeExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,15 @@ public function setUp() {
* PHP5.4 cannot set property with __DIR__ constant.
*/
self::$base = __DIR__ . '/data/';

$property = new \ReflectionProperty( 'WP_CLI\I18n\IterableCodeExtractor', 'dir' );
$property->setAccessible( true );
$property->setValue( null, self::$base );
$property->setAccessible( false );
}

public function test_can_include_files() {
$includes = [ 'foo', 'bar', 'baz/inc*.js' ];
$includes = [ 'foo-plugin', 'bar', 'baz/inc*.js' ];
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, [], [ 'php', 'js' ] );
$expected = static::$base . 'foo-plugin/foo-plugin.php';
$this->assertContains( $expected, $result );
Expand Down Expand Up @@ -62,26 +67,26 @@ public function test_can_include_only_php() {
}

public function test_can_exclude_override_wildcard() {
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['foo/bar/excluded/*'], [ 'php' ] );
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'foo/bar/excluded/*' ], [ 'php' ] );
$expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php';
$expected_2 = static::$base . 'foo/bar/excluded/excluded.js';
$this->assertContains( $expected_1, $result );
$this->assertNotContains( $expected_2, $result );
}

public function test_can_exclude_override_matching_directory() {
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['excluded'], [ 'php' ] );
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'foo/bar/excluded/*' ], [ 'php' ] );
$expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php';
$expected_2 = static::$base . 'foo/bar/excluded/excluded.js';
$this->assertContains( $expected_1, $result );
$this->assertNotContains( $expected_2, $result );
}

public function test_can_not_exclude_partially_directory() {
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], ['exc'], [ 'js' ] );
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, [ 'foo/bar/*' ], [ 'exc' ], [ 'js' ] );
$expected_1 = static::$base . 'foo/bar/foo/bar/foo/bar/deep_directory_also_included.php';
$expected_2 = static::$base . 'foo/bar/excluded/ignored.js';
//$this->assertContains( $expected_1, $result );
$this->assertNotContains( $expected_1, $result );
$this->assertContains( $expected_2, $result );
}

Expand All @@ -99,8 +104,8 @@ public function test_can_exclude_files() {

public function test_can_override_exclude_by_include() {
// Overrides include option
$includes = [ 'excluded' ];
$excludes = [ 'excluded/ignored.js' ];
$includes = [ 'excluded/ignored.js' ];
$excludes = [ 'excluded/*.js' ];
$result = IterableCodeExtractor::getFilesFromDirectory( self::$base, $includes, $excludes, [ 'php', 'js' ] );
$expected = static::$base . 'foo/bar/excluded/ignored.js';
$this->assertContains( $expected, $result );
Expand Down