' );
$tags->next_tag( 'div' );
- $this->assertTrue( $tags->next_within_balanced_tags( 'img', 3 ) );
+ $state = [];
+ $this->assertTrue( $tags->next_within_balanced_tags( $state, 'img', 3 ) );
}
}
From 02e3e5cc2ae026d94b0f62551132c1dc21f2c1ec Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Thu, 15 Dec 2022 14:33:08 -0700
Subject: [PATCH 09/19] Scribble notes
---
.../html/class-wp-html-attribute-sourcer.php | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index 697228df04b34a..d6e701e6174a19 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -1,5 +1,15 @@
Date: Wed, 21 Dec 2022 16:58:07 -0700
Subject: [PATCH 10/19] Revise HTML navigation to use `balanced_next` with a
depth tracker
Tests are broken because the attribute sourcer needs rework given this
different approach for navigating tags.
---
.../html/class-wp-html-attribute-sourcer.php | 13 +-
.../html/class-wp-html-processor.php | 152 +++++++++++++-----
phpunit/html/wp-html-processor-test.php | 37 +++--
3 files changed, 142 insertions(+), 60 deletions(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index d6e701e6174a19..22bfb94ea05c46 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -208,8 +208,8 @@ public static function select( $selectors, $html ) {
'tag' => $tags->get_tag(),
'selector' => $next
] );
- $state = [];
- while ( $tags->next_within_balanced_tags( $state ) ) {
+ $state = WP_HTML_Processor::new_state();
+ while ( $tags->balanced_next( $state ) ) {
continue;
}
@@ -234,8 +234,9 @@ public static function select( $selectors, $html ) {
'tag' => $tags->get_tag(),
'selector' => $next
] );
- $state = [];
- while ( $tags->next_within_balanced_tags( $state, null, 1 ) ) {
+ $state = WP_HTML_Processor::new_state();
+ $state->match_depth = 1;
+ while ( $tags->balanced_next( $state ) ) {
if ( null === self::select_match( $tags, $next ) ) {
continue;
}
@@ -259,8 +260,8 @@ public static function select( $selectors, $html ) {
'tag' => $tags->get_tag(),
'selector' => $next
] );
- $state = [];
- while ( $tags->next_within_balanced_tags( $state ) ) {
+ $state = WP_HTML_Processor::new_state();
+ while ( $tags->balanced_next( $state ) ) {
if ( null === self::select_match( $tags, $next ) ) {
continue;
}
diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php
index a4179455892e6c..7a207f287abc9d 100644
--- a/lib/experimental/html/class-wp-html-processor.php
+++ b/lib/experimental/html/class-wp-html-processor.php
@@ -16,42 +16,115 @@
* not, that `
` itself is a special element.
*/
+
+class WP_HTML_Processor_Scan_State {
+ public $budget = 1000;
+ public $open_tags = array();
+ public $bail_depth = 0;
+ public $match_depth = null;
+
+ public function relative_depth() {
+ return count( $this->open_tags );
+ }
+
+ public function also_scan_siblings() {
+ $this->bail_depth = -1;
+ }
+}
+
+
class WP_HTML_Processor extends WP_HTML_Tag_Processor {
- public function next_within_balanced_tags( &$state, $query = null, $max_depth = 1000 ) {
- if ( empty( $state ) ) {
- $state['budget'] = 1000;
- $state['tag_name'] = $this->get_tag();
- $state['balanced_depth'] = 1;
- $state['depth'] = 1;
-
- if ( self::is_html_void_element( $this->get_tag() ) ) {
- return false;
+ public static function new_state() {
+ return new WP_HTML_Processor_Scan_State();
+ }
+
+ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = null ) {
+ while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $state->budget-- > 0 ) {
+ $tag_name = $this->get_tag();
+ $is_closer = $this->is_tag_closer();
+ $is_void = self::is_html_void_element( $tag_name );
+ $type = self::classify_tag_type( $is_closer, $is_void );
+
+ /*
+ * Step 1. Update the stack of open tags.
+ *
+ * If and when we add more complete HTML parsing support we will also
+ * need to track the stack of active formats so that we can properly
+ * handle missing tags and overlapping tags.
+ */
+
+ switch ( $type ) {
+ case 'void':
+ /*
+ * Void tags (such as ) can't have children and so we
+ * won't push or pop them from the stack of open tags.
+ *
+ * If and when we support self-closing foreign tags we would
+ * need to separately track those, but their behavior matches
+ * this case. The self-closing flag is ignored for HTML5 tags.
+ */
+ break;
+
+ case 'opener':
+ $state->open_tags[] = $tag_name;
+ break;
+
+ case 'closer':
+ $last_tag = array_pop( $state->open_tags );
+
+ /*
+ * Currently we can only support fully-normative and balanced HTML5.
+ * If we encounter anything we don't expect then we will bail. In a
+ * future update we may perform more careful HTML parsing and unlock
+ * navigating through non-normative documents.
+ */
+ if ( $last_tag !== $tag_name ) {
+ return false;
+ }
+ break;
}
- }
- while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) && $state['budget']-- > 0 ) {
- if (
- $this->get_tag() === $state['tag_name'] &&
- $this->is_tag_closer() &&
- $state['balanced_depth'] === 1
- ) {
+ /*
+ * Void elements don't enter the stack, but they do exist in the
+ * depth hierarchy, so we have to temporarily account for that.
+ *
+ * We could have followed the approach in the HTML5 spec by appending
+ * the void tag to the stack of open tags, and then remember to pop it
+ * when existing this function, but by tracking it like this we don't
+ * have to remember to do that.
+ */
+ $depth = $type === 'void'
+ ? $state->relative_depth() + 1
+ : $state->relative_depth();
+
+ /*
+ * Step 2. If we've reached the depth at which we want to stop searching,
+ * then bail at the current tag. This is mostly used to stop at the end
+ * of the opening tag's closing tag, but if set negative can continue
+ * scanning sibling elements (-1) or parents (-2) and so on.
+ */
+
+ if ( $state->bail_depth === $depth ) {
return false;
}
- if ( $state['depth'] <= $max_depth ) {
+ /*
+ * Step 3. Determine if we have a matching tag. In addition to the query
+ * we pass along to the underlying tag processor we're going to allow
+ * specifying the relative depth for a match. For example, a CSS child
+ * combinator would specify that a match must have a relative depth of 1,
+ * indicating that it's a direct child of the surrounding element, whereas
+ * the descendant selector could match at any depth and so sets this to `null`.
+ * To prevent matching _above_ a tag we rely on the `bail_depth` to stop
+ * searching once we've exited the tag on which we started, or reach its parent.
+ */
+
+ if ( ! isset( $state->match_depth ) || $state->match_depth === $depth ) {
$this->parse_query( $query );
if ( $this->matches() ) {
return true;
}
}
-
- if ( ! self::is_html_void_element( $this->get_tag() ) ) {
- $state['depth'] += $this->is_tag_closer() ? -1 : 1;
- }
-
- if ( $this->get_tag() === $state['tag_name'] ) {
- $state['balanced_depth'] += $this->is_tag_closer() ? -1 : 1;
- }
}
return false;
@@ -72,26 +145,13 @@ public function get_content_inside_balanced_tags() {
}
$this->set_bookmark( $start_name );
- $tag_name = $this->get_tag();
- $depth = 1;
- if ( self::is_html_void_element( $tag_name ) ) {
- return '';
- }
-
- while ( $this->next_tag( [ 'tag_closers' => 'visit' ] ) ) {
- if ( $this->get_tag() !== $tag_name ) {
- continue;
- }
-
- if ( $this->is_tag_closer() && $depth === 1 ) {
- $this->set_bookmark( $end_name );
- break;
- }
-
- $depth += $this->is_tag_closer() ? -1 : 1;
+ $state = self::new_state();
+ while ( $this->balanced_next( $state ) ) {
+ continue;
}
+ $this->set_bookmark( $end_name );
$content = $this->content_inside_bookmarks( $start_name, $end_name );
$this->seek( $start_name );
@@ -116,6 +176,14 @@ private function content_inside_bookmarks( $start_bookmark, $end_bookmark ) {
* HTML-related Utility Functions
*/
+ public static function classify_tag_type( $is_closer, $is_void ) {
+ if ( $is_void ) {
+ return 'void';
+ }
+
+ return $is_closer ? 'closer' : 'opener';
+ }
+
/**
* @see https://html.spec.whatwg.org/#elements-2
*/
diff --git a/phpunit/html/wp-html-processor-test.php b/phpunit/html/wp-html-processor-test.php
index 027a314cd3325e..b14408f0ee287d 100644
--- a/phpunit/html/wp-html-processor-test.php
+++ b/phpunit/html/wp-html-processor-test.php
@@ -10,7 +10,7 @@
require_once __DIR__ . '/../../lib/experimental/html/class-wp-html-processor.php';
/**
- * @group html
+ * @group html-proc
*
* @coversDefaultClass WP_HTML_Processor
*/
@@ -19,35 +19,48 @@ public function test_find_descendant_tag() {
$tags = new WP_HTML_Processor( '
',
+ array(
+ 'link' => array(
+ 'type' => 'string',
+ 'source' => 'attribute',
+ 'selector' => 'section > div > a',
+ 'attribute' => 'href'
+ ),
+ ),
+ ),
+
+ array(
+ array( 'attributes' => array( 'link' => 'docs.html' ), 'unparsed' => array() ),
+ '',
+ array(
+ 'link' => array(
+ 'type' => 'string',
+ 'source' => 'attribute',
+ 'selector' => 'section > div + a',
+ 'attribute' => 'href'
+ ),
+ ),
+ ),
+ );
+ }
+
+ /**
+ * @dataProvider data_skipping_non_matches
+ */
+ public function test_sources_skipping_non_matches( $expected, $html, $attributes ) {
+ $this->assertSame( $expected, ( new WP_HTML_Attribute_Sourcer( $attributes, $html ) )->source_attributes() );
+ }
+
+ public function data_skipping_non_matches() {
+ return array(
+
+ );
+ }
+
/**
* @dataProvider data_sourced_attributes
*/
From eb1367df9573b70a6ff57d3c795e204943a7bee4 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Fri, 23 Dec 2022 22:50:41 -0600
Subject: [PATCH 15/19] Closer still
---
.../html/class-wp-html-attribute-sourcer.php | 40 +++++++++++++++----
.../html/class-wp-html-processor.php | 15 +++----
.../html/wp-html-attribute-sourcer-test.php | 13 ++++++
3 files changed, 54 insertions(+), 14 deletions(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index 2373726022fdca..006d0e9781c37e 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -8,6 +8,10 @@
* - select_adjacent_sibling( $tags_at_start_tag, $selector_sequence )
* - select_general_sibling( $tags_at_start_tag, $selector_sequence )
* - close_n_levels( $tags_at_depth, $n_levels )
+ *
+ * @TODO:
+ * - [ ] Handle multiple joined constraints for classes and attributes
+ * e.g. ".locale-en-US.localized[data-translation-id][data-translate]"
*/
/**
@@ -174,31 +178,53 @@ public static function select( $selectors, $html ) {
return $tags;
}
+ inner_loop:
$prev = $next;
$next = $next['then'];
$inner_state = $tags->new_state();
switch ( $next['combinator'] ) {
+ /*
+ * Adjacent sibling must be the immediately-following
+ * element which shares the same parent.
+ */
case '+':
- $outer_state->match_depth = 1;
- while ( $tags->balanced_next( $outer_state ) ) {
- if ( self::select_match( $tags, $next ) ) {
+ // Close out this tag if it needs to be.
+ while ( $tags->balanced_next( $inner_state ) ) {
+ continue;
+ }
+
+ if ( $tags->balanced_next( $outer_state ) && self::select_match( $tags, $next ) ) {
+ if ( ! isset( $next['then'] ) ) {
return $tags;
}
+ goto inner_loop;
}
+
+ $next = $prev;
break;
+ // Child combinator
case '>':
$inner_state->match_depth = 1;
+ // Intentional fallthrough
+ // Descendant combinator
case ' ':
+ /*
+ * This match has to be a child of the matched tag,
+ * and the matched tag has to be its parent for the
+ * case of the child combinator.
+ */
while ( $tags->balanced_next( $inner_state ) ) {
if ( self::select_match( $tags, $next ) ) {
- return $tags;
+ if ( ! isset( $next['then'] ) ) {
+ return $tags;
+ }
+
+ goto inner_loop;
}
}
- while ( $tags->balanced_next( $inner_state ) ) {
- continue;
- }
+
$next = $prev;
goto loop;
}
diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php
index a36d77750466aa..ced54246cf35e5 100644
--- a/lib/experimental/html/class-wp-html-processor.php
+++ b/lib/experimental/html/class-wp-html-processor.php
@@ -83,6 +83,14 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul
if ( $last_tag !== $tag_name ) {
return false;
}
+
+ /*
+ * Step 2. Bail if we've reached the end of the tag in which we started.
+ */
+ if ( 0 === $state->relative_depth() ) {
+ return false;
+ }
+
break;
}
@@ -99,13 +107,6 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul
? $state->relative_depth() + 1
: $state->relative_depth();
- /*
- * Step 2. Bail if we've reached the end of the tag in which we started.
- */
- if ( 0 === $depth ) {
- return false;
- }
-
/*
* Step 3. Determine if we have a matching tag. In addition to the query
* we pass along to the underlying tag processor we're going to allow
diff --git a/phpunit/html/wp-html-attribute-sourcer-test.php b/phpunit/html/wp-html-attribute-sourcer-test.php
index da8012e460c404..db370385b9a7cc 100644
--- a/phpunit/html/wp-html-attribute-sourcer-test.php
+++ b/phpunit/html/wp-html-attribute-sourcer-test.php
@@ -69,6 +69,19 @@ public function data_single_combinators() {
),
),
),
+
+ array(
+ array( 'attributes' => array( 'link' => null ), 'unparsed' => array() ),
+ '',
+ array(
+ 'link' => array(
+ 'type' => 'string',
+ 'source' => 'attribute',
+ 'selector' => 'div + a',
+ 'attribute' => 'href'
+ ),
+ ),
+ ),
);
}
From 30172abb612d7616ad8f2b8ac534744a61c12fbc Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Fri, 23 Dec 2022 22:56:15 -0600
Subject: [PATCH 16/19] Passing tests!
---
lib/experimental/html/class-wp-html-attribute-sourcer.php | 3 +--
phpunit/html/wp-html-attribute-sourcer-test.php | 2 +-
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index 006d0e9781c37e..27a78aecffd77a 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -165,12 +165,11 @@ public static function select( $selectors, $html ) {
$tags = new WP_HTML_Processor( $html );
$outer_state = $tags->new_state();
- $selector = $selectors[$selector_index];
$next = $selectors[$selector_index];
loop:
while ( $tags->balanced_next( $outer_state ) ) {
- if ( ! self::select_match( $tags, $selector ) ) {
+ if ( ! self::select_match( $tags, $next ) ) {
continue;
}
diff --git a/phpunit/html/wp-html-attribute-sourcer-test.php b/phpunit/html/wp-html-attribute-sourcer-test.php
index db370385b9a7cc..fda53531baaaaf 100644
--- a/phpunit/html/wp-html-attribute-sourcer-test.php
+++ b/phpunit/html/wp-html-attribute-sourcer-test.php
@@ -171,7 +171,7 @@ public function test_sources_attributes( $expected, $html, $attributes ) {
public function data_sourced_attributes() {
return array(
array(
- array( 'attributes' => array( 'link' => 'docs.html' ), 'unparsed' => array() ),
+ array( 'attributes' => array( 'link' => 'image' ), 'unparsed' => array() ),
<<Just another section
From f415bdba0a34c8fb5444f7ba6a093ff45fe81b5a Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Fri, 23 Dec 2022 23:01:11 -0600
Subject: [PATCH 17/19] Remove first draft select function
---
.../html/class-wp-html-attribute-sourcer.php | 121 ------------------
1 file changed, 121 deletions(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index 27a78aecffd77a..ea178cbf2eafd8 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -236,127 +236,6 @@ public static function select( $selectors, $html ) {
return false;
}
- public static function select_draft1( $selectors, $html ) {
- $tags = new WP_HTML_Processor( $html );
- if ( ! $tags->next_tag() ) {
- return null;
- }
-
- $tags->set_bookmark( 'start' );
-
- foreach ( $selectors as $s ) {
- $tags->seek( 'start' );
- $max = 100;
- while ( --$max > 0 ) {
- $next = $s;
-
- // This label is probably where some stack-level data should reside.
- next:
- // Find the next starting point
- while ( null === self::select_match( $tags, $next ) && $tags->next_tag() ) {
- continue;
- }
-
- // We're out of possible starting points
- if ( null === self::select_match( $tags, $next ) ) {
- continue 2;
- }
-
- // No further selectors, then bingo!
- if ( ! isset( $next['then'] ) ) {
- return $tags;
- }
-
- $next = $next['then'];
-
- // Adjacent sibling must be the immediately-following element.
- if ( '+' === $next['combinator'] ) {
- var_dump( [
- 'msg' => "Processing adjacent sibling",
- 'html' => $html,
- 'tag' => $tags->get_tag(),
- 'selector' => $next
- ] );
- $state = $tags->new_state();
- while ( $tags->balanced_next( $state ) ) {
- continue;
- }
-
- $tags->next_tag();
- if ( null === self::select_match( $tags, $next ) ) {
- continue;
- }
-
- if ( isset( $next['then'] ) ) {
- goto next;
- }
-
- // @TODO: Recurse here so we can handle more than one level.
- return $tags;
- }
-
- // Child must be one level into current tag.
- if ( '>' === $next['combinator'] ) {
- var_dump( [
- 'msg' => "Processing child",
- 'html' => $html,
- 'tag' => $tags->get_tag(),
- 'selector' => $next
- ] );
- $state = $tags->new_state();
- $state->match_depth = 1;
- while ( $tags->balanced_next( $state ) ) {
- if ( null === self::select_match( $tags, $next ) ) {
- continue;
- }
-
- if ( isset( $next['then'] ) ) {
- goto next;
- }
-
- // @TODO: Recurse here so we can handle more than one level.
- return $tags;
- }
-
- continue;
- }
-
- // Descendant can be anywhere inside current tag.
- if ( ' ' === $next['combinator'] ) {
- var_dump( [
- 'msg' => "Processing descendant",
- 'html' => $html,
- 'tag' => $tags->get_tag(),
- 'selector' => $next
- ] );
- $state = $tags->new_state();
- while ( $tags->balanced_next( $state ) ) {
- if ( null === self::select_match( $tags, $next ) ) {
- continue;
- }
-
- if ( isset( $next['then'] ) ) {
- goto next;
- }
-
- // @TODO: Recurse here so we can handle more than one level.
- return $tags;
- }
-
- continue;
- }
-
- // General sibling must be anything at current level.
- if ( '~' === $next['combinator'] ) {
- // @TODO: Support this.
- return null;
- }
- }
- }
-
- return null;
- }
-
public static function parse_definition( $definition ) {
if ( empty( $definition['source'] ) ) {
return 'not-sourced';
From 6103139e2119d6995ee26bce5f4ad2047707b6ac Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Fri, 23 Dec 2022 23:52:28 -0600
Subject: [PATCH 18/19] More test cases, a couple new failures
---
.../html/class-wp-html-attribute-sourcer.php | 7 ++-
.../html/class-wp-html-processor.php | 5 ++
.../html/wp-html-attribute-sourcer-test.php | 50 +++++++++++++++++++
3 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index ea178cbf2eafd8..c2be48ee429b6d 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -12,6 +12,7 @@
* @TODO:
* - [ ] Handle multiple joined constraints for classes and attributes
* e.g. ".locale-en-US.localized[data-translation-id][data-translate]"
+ * - [ ] Handle comma-separated selector sequences; apparently we only grab the first right now
*/
/**
@@ -158,6 +159,10 @@ public static function select_match( $tags, $s ) {
return $tags;
}
+ /**
+ * @TODO: This needs to be able to continue to the next match
+ * Pass in $tags? Pass in a bookmark?
+ */
public static function select( $selectors, $html ) {
$selector_index = 0;
@@ -165,7 +170,7 @@ public static function select( $selectors, $html ) {
$tags = new WP_HTML_Processor( $html );
$outer_state = $tags->new_state();
- $next = $selectors[$selector_index];
+ $next = $selectors[ $selector_index ];
loop:
while ( $tags->balanced_next( $outer_state ) ) {
diff --git a/lib/experimental/html/class-wp-html-processor.php b/lib/experimental/html/class-wp-html-processor.php
index ced54246cf35e5..6fa236aa94a336 100644
--- a/lib/experimental/html/class-wp-html-processor.php
+++ b/lib/experimental/html/class-wp-html-processor.php
@@ -4,6 +4,7 @@
* @TODO: Handle self-closing foreign elements.
* @TODO: Detect non-normative HTML input.
* @TODO: Consider parsing non-normative HTML input, support adoption agency algorithm.
+ * @TODO: Figure out how multiple external states can conflict.
*
* If we support non-normative HTML we can probably handle significantly more
* HTML without introducing unexpected results, but I'm not sure yet if we can
@@ -65,6 +66,10 @@ public function balanced_next( WP_HTML_Processor_Scan_State $state, $query = nul
* need to separately track those, but their behavior matches
* this case. The self-closing flag is ignored for HTML5 tags.
*/
+ if ( 0 === $state->relative_depth() ) {
+ return false;
+ }
+
break;
case 'opener':
diff --git a/phpunit/html/wp-html-attribute-sourcer-test.php b/phpunit/html/wp-html-attribute-sourcer-test.php
index fda53531baaaaf..38a274008a9de3 100644
--- a/phpunit/html/wp-html-attribute-sourcer-test.php
+++ b/phpunit/html/wp-html-attribute-sourcer-test.php
@@ -22,6 +22,56 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {}
* @coversDefaultClass WP_HTML_Attribute_Sourcer
*/
class WP_HTML_Attribute_Sourcer_Test extends WP_UnitTestCase {
+ /**
+ * @dataProvider data_ids_and_their_selectors
+ */
+ public function test_selects_proper_html_from_selector( $wanted_ids, $selector ) {
+ $html = <<
+
+
It's a post!
+
+
+
+
The antics of ants with antlers
+
+
+ Ants
+ with antlers can be funny.
+
+
+
Decorations
+
Cleanup crew
+
Spooky visitors
+
+
+
+HTML;
+
+ list( $selectors ) = WP_HTML_Attribute_Sourcer::parse_selector( $selector );
+ $this->assertIsArray( $selectors );
+
+ $found_ids = array();
+ if ( $tags = WP_HTML_Attribute_Sourcer::select( [ $selectors ], $html ) ) {
+ $found_ids[] = $tags->get_attribute( 'id' );
+ }
+
+ $this->assertEqualsCanonicalizing( $wanted_ids, $found_ids );
+ }
+
+ public function data_ids_and_their_selectors() {
+ return array(
+ array( array( 'li-1' ), 'li' ),
+ array( array(), 'section > p img + em' ),
+ array( array( 'strong-ants' ), 'section > p img + strong' ),
+ array( array( 'funny-stuff' ), 'section > p strong + em' ),
+ array( array( 'page-title' ), 'section h2' ),
+ array( array( 'post-title' ), 'section > h2' ),
+ array( array( 'ant-logo' ), '[href]' ),
+ array( array(), '.non-existent' ),
+ );
+ }
+
/**
* @dataProvider data_single_combinators
*/
From 79c24bdcba3d031ab488ab1836e7b9926dc63f26 Mon Sep 17 00:00:00 2001
From: Dennis Snell
Date: Mon, 9 Jan 2023 13:03:43 -0800
Subject: [PATCH 19/19] Only traverse children of non-closing or non-void
elements.
---
.../html/class-wp-html-attribute-sourcer.php | 13 +++++++++---
.../html/class-wp-html-tag-processor.php | 21 +++++++++++++++++++
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/lib/experimental/html/class-wp-html-attribute-sourcer.php b/lib/experimental/html/class-wp-html-attribute-sourcer.php
index c2be48ee429b6d..c66ffbf4550c88 100644
--- a/lib/experimental/html/class-wp-html-attribute-sourcer.php
+++ b/lib/experimental/html/class-wp-html-attribute-sourcer.php
@@ -193,9 +193,16 @@ public static function select( $selectors, $html ) {
* element which shares the same parent.
*/
case '+':
- // Close out this tag if it needs to be.
- while ( $tags->balanced_next( $inner_state ) ) {
- continue;
+ /*
+ * If we have opened a tag we need to continue scanning past all of its children.
+ * `balanced_next()` will end up on the closing tag, so if we don't have any
+ * children, or no closing tag, we need to skip this because `balanced_tag()`
+ * would end up in those cases on the sibling element.
+ */
+ if ( ! WP_HTML_Processor::is_html_void_element( $tags->get_tag() ) ) {
+ while ( $tags->balanced_next( $inner_state ) ) {
+ continue;
+ }
}
if ( $tags->balanced_next( $outer_state ) && self::select_match( $tags, $next ) ) {
diff --git a/lib/experimental/html/class-wp-html-tag-processor.php b/lib/experimental/html/class-wp-html-tag-processor.php
index a0565c3d452864..72c342dbd02a73 100644
--- a/lib/experimental/html/class-wp-html-tag-processor.php
+++ b/lib/experimental/html/class-wp-html-tag-processor.php
@@ -20,6 +20,7 @@
* @TODO: Add slow mode to escape character entities in CSS class names?
* (This requires a custom decoder since `html_entity_decode()`
* doesn't handle attribute character reference decoding rules.
+ * @TODO: Do we make any indexing assumptions based on only scanning tag openers? $tag_name - 1 vs. ?
*
* @package WordPress
* @subpackage HTML
@@ -1439,6 +1440,26 @@ public function get_tag() {
return strtoupper( $tag_name );
}
+ /**
+ * Returns a representation of the currently-open tag, for debug purposes.
+ *
+ * @since 6.3.0
+ * @return string
+ */
+ public function debug_current_token() {
+ if ( null === $this->tag_name_starts_at ) {
+ return '';
+ }
+
+ if ( $this->is_tag_closer() ) {
+ $tag_name = substr( $this->html, $this->tag_name_starts_at, $this->tag_name_length );
+ return "{$tag_name}>";
+ }
+
+ $tag_starts_at = $this->tag_name_starts_at - 1;
+ return substr( $this->html, $tag_starts_at, $this->tag_ends_at - $tag_starts_at + 1 );
+ }
+
/**
* Indicates if the current tag token is a tag closer.
*