Skip to content

Commit

Permalink
Remove restrictions for getting innerText
Browse files Browse the repository at this point in the history
Allow getting innerText for styles, scripts, and elements that are
aria-hidden
  • Loading branch information
LiteracyFanatic committed Apr 4, 2022
1 parent 601926e commit 550eed4
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 27 deletions.
21 changes: 6 additions & 15 deletions src/Html/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,23 @@ module HtmlNode =
let classesToLookFor = cssClass.Split [|' '|]
classesToLookFor |> Array.forall (fun cssClass -> presentClasses |> Array.exists ((=) cssClass))

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match tryGetAttribute "aria-hidden" n with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
let private innerTextExcluding' recurse exclusions n =
let rec innerText' n =
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
| elem ->
if recurse then
yield innerText' false elem
yield innerText' elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n
innerText' n

let innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n
Expand Down
34 changes: 32 additions & 2 deletions src/Html/HtmlRuntime.fs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,36 @@ module HtmlRuntime =
i <- i + 1
i

let private innerTextExcluding' recurse exclusions n =
let exclusions = "style" :: "script" :: exclusions
let isAriaHidden (n:HtmlNode) =
match n.TryGetAttribute "aria-hidden" with
| Some a ->
match bool.TryParse(a.Value()) with
| true, v -> v
| false, _ -> false
| None -> false
let rec innerText' inRoot n =
let exclusions = if inRoot then ["style"; "script"] else exclusions
match n with
| HtmlElement(name, _, content) when List.forall ((<>) name) exclusions && not (isAriaHidden n) ->
seq { for e in content do
match e with
| HtmlText(text) -> yield text
| HtmlComment(_) -> yield ""
| elem ->
if recurse then
yield innerText' false elem
else
yield "" }
|> String.Concat
| HtmlText(text) -> text
| _ -> ""
innerText' true n

let private innerTextExcluding exclusions n =
innerTextExcluding' true exclusions n

let private parseTable inferenceParameters includeLayoutTables makeUnique index (table:HtmlNode, parents:HtmlNode list) =
let rowSpan cell =
max 1 (defaultArg (TextConversions.AsInteger CultureInfo.InvariantCulture cell?rowspan) 0)
Expand Down Expand Up @@ -183,7 +213,7 @@ module HtmlRuntime =
for colindex, cell in cells.[rowindex] do
let data =
let getContents contents =
contents |> List.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
contents |> List.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"]) |> String.Concat |> normalizeWs
match cell with
| HtmlElement("td", _, contents) -> Cell (false, getContents contents)
| HtmlElement("th", _, contents) -> Cell (true, getContents contents)
Expand Down Expand Up @@ -244,7 +274,7 @@ module HtmlRuntime =

let rows =
list.Descendants("li", true)
|> Seq.map (HtmlNode.innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.map (innerTextExcluding ["table"; "ul"; "ol"; "dl"; "sup"; "sub"] >> normalizeWs)
|> Seq.toArray

if rows.Length <= 1 then None else
Expand Down
10 changes: 0 additions & 10 deletions tests/FSharp.Data.Tests/HtmlOperations.fs
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,3 @@ let ``Can get direct inner text``() =
let ``Inner text on a comment should be String.Empty``() =
let comment = HtmlNode.NewComment "Hello World"
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a style should be String.Empty``() =
let comment = HtmlNode.NewElement("style", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty

[<Test>]
let ``Inner text on a script should be String.Empty``() =
let comment = HtmlNode.NewElement("script", [HtmlNode.NewText "Hello World"])
HtmlNode.innerText comment |> should equal String.Empty

0 comments on commit 550eed4

Please sign in to comment.