Skip to content

Commit

Permalink
Find without build html tree (#534)
Browse files Browse the repository at this point in the history
* Rename traverse_with to traverse_html_tree

* Skip HTMLTree for simple filters

* Check tuple matching before HTMLNode
  • Loading branch information
ypconstante authored Feb 9, 2024
1 parent 712c08a commit c688e2a
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 11 deletions.
82 changes: 71 additions & 11 deletions lib/floki/finder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,39 @@ defmodule Floki.Finder do
def find(html_tree_as_tuple, selectors)
when (is_list(html_tree_as_tuple) or is_html_node(html_tree_as_tuple)) and
is_list(selectors) do
tree = HTMLTree.build(html_tree_as_tuple)
results = find(tree, selectors)
Enum.map(results, fn html_node -> HTMLTree.to_tuple(tree, html_node) end)
if traverse_html_tuples?(selectors) do
html_tree_as_tuple = List.wrap(html_tree_as_tuple)
stack = Enum.map(selectors, fn s -> {s, html_tree_as_tuple} end)

results = traverse_html_tuples(stack, [])
Enum.reverse(results)
else
tree = HTMLTree.build(html_tree_as_tuple)
results = find(tree, selectors)
Enum.map(results, fn html_node -> HTMLTree.to_tuple(tree, html_node) end)
end
end

def find(%HTMLTree{} = tree, selectors) when is_list(selectors) do
node_ids = Enum.reverse(tree.node_ids)
stack = Enum.map(selectors, fn s -> {s, node_ids} end)

traverse_with(stack, tree, [])
traverse_html_tree(stack, tree, [])
|> Enum.reverse()
|> Enum.uniq()
end

# some selectors can be applied with the raw html tree tuples instead of
# using an intermediate HTMLTree:
# - single selector
# - no composite selector
# - no pseudo classes
defp traverse_html_tuples?([%Selector{combinator: nil, pseudo_classes: []}]), do: true
defp traverse_html_tuples?(_), do: false

# The stack serves as accumulator when there is another combinator to traverse.
# So the scope of one combinator is the stack (or acc) or the parent one.
defp traverse_with(
defp traverse_html_tree(
[{%Selector{combinator: nil} = selector, [node_id | selector_rest]} | stack],
tree,
acc
Expand All @@ -60,10 +76,10 @@ defmodule Floki.Finder do
acc
end

traverse_with(stack, tree, acc)
traverse_html_tree(stack, tree, acc)
end

defp traverse_with(
defp traverse_html_tree(
[{%Selector{combinator: combinator} = selector, [node_id | selector_rest]} | stack],
tree,
acc
Expand All @@ -79,14 +95,58 @@ defmodule Floki.Finder do
stack
end

traverse_with(stack, tree, acc)
traverse_html_tree(stack, tree, acc)
end

defp traverse_html_tree([{_selector, []} | rest], tree, acc) do
traverse_html_tree(rest, tree, acc)
end

defp traverse_html_tree([], _, acc) do
acc
end

defp traverse_html_tuples(
[
{
%Selector{combinator: nil} = selector,
[{_type, _attributes, children} = html_tuple | selector_rest]
}
| stack
],
acc
) do
stack = [{selector, children}, {selector, selector_rest} | stack]

acc =
if Selector.match?(html_tuple, selector, nil) do
[html_tuple | acc]
else
acc
end

traverse_html_tuples(stack, acc)
end

defp traverse_html_tuples(
[
{
%Selector{combinator: nil} = selector,
[_ | selector_rest]
}
| stack
],
acc
) do
stack = [{selector, selector_rest} | stack]
traverse_html_tuples(stack, acc)
end

defp traverse_with([{_selector, []} | rest], tree, acc) do
traverse_with(rest, tree, acc)
defp traverse_html_tuples([{_selector, []} | rest], acc) do
traverse_html_tuples(rest, acc)
end

defp traverse_with([], _, acc) do
defp traverse_html_tuples([], acc) do
acc
end

Expand Down
5 changes: 5 additions & 0 deletions lib/floki/selector.ex
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ defmodule Floki.Selector do
false
end

defp type_maybe_with_namespace({type, _attributes, _children}) when is_binary(type) do
type_maybe_with_namespace(type)
end

defp type_maybe_with_namespace(%HTMLNode{type: type}) when is_binary(type) do
type_maybe_with_namespace(type)
end
Expand All @@ -267,6 +271,7 @@ defmodule Floki.Selector do
get_attribute_value(attributes, attribute_name)
end

defp attributes({_type, attributes, _children}), do: attributes
defp attributes(%HTMLNode{type: :pi}), do: []
defp attributes(%HTMLNode{attributes: attributes}), do: attributes

Expand Down

0 comments on commit c688e2a

Please sign in to comment.