diff --git a/class-stringer.php b/class-stringer.php index 49417c1..34ed6e8 100644 --- a/class-stringer.php +++ b/class-stringer.php @@ -14,17 +14,81 @@ class Stringer { + private $notNeededTags = []; function __construct() { if ( ! function_exists( 'str_get_html' ) ) { require_once 'simple_html_dom.php'; } + $this->setUpNotNeeded(); + } + + function get_strings( $block ) { + $html = str_get_html( $block['innerHTML'] ); + // print_r( $html ); + //$html->dump(); + //foreach ( $html->root->children as $node ) { + //print_r( $node ); + $this->recurse( $html->root ); + } + + function setUpNotNeeded() { + $notNeededTags = [ 'ul', 'ol']; + $this->notNeededTags = array_flip( $notNeededTags ); + } + + function isInnertextNeeded( $tag ) { + if ( isset( $this->notNeededTags[$tag ] ) ) { + return false; + } + return true; } - function get_strings( $content ) { - $html = str_get_html( $content ); - print_r( $html ); + function recurse( $node ) { + static $nest = 0; + $nest++; + + echo PHP_EOL; + echo str_repeat( ' ', $nest); + echo $node->tag; + echo ' '; + echo $node->nodetype; + echo ' '; + + if ( $node->nodetype !== HDOM_TYPE_ROOT) { + if ( $this->isInnertextNeeded( $node->tag )) { + $text=$node->innertext(); + $text=trim( $text ); + echo $text; + } + } + echo implode( ' ', $node->getAllAttributes() ); + + if ( count( $node->children ) ) { + foreach ( $node->children as $child ) { + $this->recurse( $child ); + } + } + + + //echo $node->innertext(); + + //print_r( $node->_ ); + + + //$node->dump( $node ); + //echo $node->text(); + //echo $node->__tostring(); + //if( isset( $node->text )) { + //echo "IT:"; + //echo $node->innertext(); + //echo ":TI"; + //echo $node->text(); + + + $nest--; + echo PHP_EOL; } } \ No newline at end of file diff --git a/html2pot.php b/html2pot.php index 07cab8a..db4c68c 100644 --- a/html2pot.php +++ b/html2pot.php @@ -1,36 +1,55 @@ loadHTML( ); -libxml_use_internal_errors( false ); +$html = file_get_contents( $filename); +print_r( $html ); -$image_tags = $dom_doc->getElementsByTagName( 'img' ); +if ( 0 === strlen( $html) ) { + echo "Invalid file: " . $filename; -// For each image Tag, make sure it can be added to the $images array, and add it. -foreach ( $image_tags as $image_tag ) { - $img_src = $image_tag->getAttribute( 'src' ); + gob(); +} - if ( empty( $img_src ) ) { - continue; - } -*/ +require_once 'class-stringer.php'; +/** + * Use Gutenberg to parse the content into individual blocks. + * I've got a block recreation routine in oik-clone. + */ $parser = new WP_Block_Parser(); -$blocks = $parser->parse( $html); -//print_r( $blocks ); +$blocks = $parser->parse( $html ); + +print_r( $blocks ); + $stringer = new Stringer(); $count = 0; foreach ( $blocks as $block) { @@ -41,93 +60,3 @@ //print_r( $block ); } - -class Stringer { - private $dom_doc = null; - function __construct() { - $this->dom_doc = new DOMDocument(); - - } - - function get_strings( $block ) { - //echo $block['innerHTML']; - $this->dom_doc->loadHTML( $block['innerHTML']); - //print_r( $this->dom_doc ); - echo $this->dom_doc->textContent; - echo PHP_EOL; - $this->showDOMNode( $this->dom_doc); - echo PHP_EOL; - /* - - echo PHP_EOL; - if ( $this->dom_doc->hasChildNodes() ) { - echo "this has child nodes" . PHP_EOL; - foreach ( $this->dom_doc->childNodes as $childNode ) { - print_r( $childNode ); - //gob(); - //$this->get_child_node_strings( $childNode); - } - - } - */ - - } - - function showDOMNode(DOMNode $domNode) { - static $nested; - $nested++; - foreach ($domNode->childNodes as $node) { - - //echo PHP_EOL; - if ( $node->haschildNodes() ) { - echo PHP_EOL; - echo str_repeat( ' ', $nested ); - echo 'PN:' . $node->nodeName . ': '; - echo 'PT:' . $node->nodeType; - $this->showDOMNode( $node ); - - } else { - $value = trim( $node->nodeValue); - if ( !empty( $value ) ) { - echo PHP_EOL; - echo str_repeat( ' ', $nested ); - echo 'N:' . $node->nodeName . ': '; - echo 'V:' . $node->nodeValue; - echo 'T:' . $node->nodeType; - echo 'W:' . $node->wholeText; - } - //print_r( $node); - } - } - $nested--; - } -} - -/* - * DOMText Object -( - [wholeText] => - - [data] => - - [length] => 2 - [nodeName] => #text - [nodeValue] => - - [nodeType] => 3 - [parentNode] => (object value omitted) - [childNodes] => - [firstChild] => - [lastChild] => - [previousSibling] => (object value omitted) - [nextSibling] => - [attributes] => - [ownerDocument] => (object value omitted) - [namespaceURI] => - [prefix] => - [localName] => - [baseURI] => - [textContent] => - -) - */