Skip to content

Commit

Permalink
Merge pull request #1435 from LiteracyFanatic/remove-inner-text-limit…
Browse files Browse the repository at this point in the history
…ations

Remove restrictions for getting innerText
  • Loading branch information
dsyme authored Jun 2, 2022
2 parents d5287e4 + 550eed4 commit 46bcf84
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 27 deletions.
21 changes: 6 additions & 15 deletions src/Html/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,23 @@ module HtmlNode =
let classesToLookFor = cssClass.Split [|' '|]
classesToLookFor |> Array.forall (fun cssClass -> presentClasses |> Array.exists ((=) cssClass))

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match tryGetAttribute "aria-hidden" n with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
let private innerTextExcluding' recurse exclusions n =
let rec innerText' n =
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
| elem ->
if recurse then
yield innerText' false elem
yield innerText' elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n
innerText' n

let innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n
Expand Down
34 changes: 32 additions & 2 deletions src/Html/HtmlRuntime.fs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,36 @@ module HtmlRuntime =
i <- i + 1
i

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match n.TryGetAttribute "aria-hidden" with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
if recurse then
yield innerText' false elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n

let private innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n

let private parseTable inferenceParameters includeLayoutTables makeUnique index (table:HtmlNode, parents:HtmlNode list) =
let rowSpan cell =
max 1 (defaultArg (TextConversions.AsInteger CultureInfo.InvariantCulture cell?rowspan) 0)
Expand Down Expand Up @@ -183,7 +213,7 @@ module HtmlRuntime =
for colindex, cell in cells.[rowindex] do
let data =
let getContents contents =
contents |> List.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
contents |> List.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
match cell with
| HtmlElement("td", _, contents) -> Cell (false, getContents contents)
| HtmlElement("th", _, contents) -> Cell (true, getContents contents)
Expand Down Expand Up @@ -244,7 +274,7 @@ module HtmlRuntime =

let rows =
list.Descendants("li", true)
|> Seq.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.toArray

if rows.Length <= 1 then None else
Expand Down
10 changes: 0 additions & 10 deletions tests/FSharp.Data.Tests/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,3 @@ let ``Can get direct inner text``() =
let ``Inner text on a comment should be String.Empty``() =
let comment = HtmlNode.NewComment "Hello World"
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a style should be String.Empty``() =
let comment = HtmlNode.NewElement("style", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a script should be String.Empty``() =
let comment = HtmlNode.NewElement("script", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty

0 comments on commit 46bcf84

Please sign in to comment.