Skip to content

Commit

Permalink
cli: add include & exclude selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesKaufmann committed Nov 17, 2024
1 parent c9b6dbc commit 5af1444
Show file tree
Hide file tree
Showing 20 changed files with 408 additions and 126 deletions.
76 changes: 73 additions & 3 deletions cli/cmd/cmd_convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ import (
"bytes"
"fmt"

"github.com/JohannesKaufmann/dom"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/base"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
"github.com/JohannesKaufmann/html-to-markdown/v2/plugin/strikethrough"
"github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)

func overrideValidationError(e *commonmark.ValidateConfigError) error {
Expand All @@ -26,8 +30,66 @@ func overrideValidationError(e *commonmark.ValidateConfigError) error {
e.KeyWithValue = fmt.Sprintf("--%s=%q", e.Key, e.Value)
return e
}
func (cli *CLI) convert(input []byte) ([]error, error) {

func (cli *CLI) includeNodesFromDoc(doc *html.Node) (*html.Node, error) {
if len(cli.config.includeSelector) == 0 {
return doc, nil
}
nodes := cascadia.QueryAll(doc, cli.config.includeSelector)

root := &html.Node{}
for _, n := range nodes {
dom.RemoveNode(n)
root.AppendChild(n)
}

return root, nil
}
func (cli *CLI) excludeNodesFromDoc(doc *html.Node) error {
if len(cli.config.excludeSelector) == 0 {
return nil
}

var finder func(node *html.Node)
finder = func(node *html.Node) {
if cli.config.excludeSelector.Match(node) {
dom.RemoveNode(node)
return
}

for child := node.FirstChild; child != nil; child = child.NextSibling {
// Because we are sometimes removing a node, this causes problems
// with the for loop. Using `defer` is a cool trick!
// https://gist.github.com/loopthrough/17da0f416054401fec355d338727c46e
defer finder(child)
}
}
finder(doc)

return nil
}
func (cli *CLI) parseInputWithSelectors(input []byte) (*html.Node, error) {
r := bytes.NewReader(input)

doc, err := html.Parse(r)
if err != nil {
return nil, fmt.Errorf("error while parsing html: %w", err)
}

doc, err = cli.includeNodesFromDoc(doc)
if err != nil {
return nil, err
}

err = cli.excludeNodesFromDoc(doc)
if err != nil {
return nil, err
}

return doc, nil
}

func (cli *CLI) convert(input []byte) ([]error, error) {
conv := converter.NewConverter(
converter.WithPlugins(
base.NewBasePlugin(),
Expand All @@ -36,9 +98,17 @@ func (cli *CLI) convert(input []byte) ([]error, error) {
),
),
)
if cli.config.enablePluginStrikethrough {
// TODO: while this works, this does not add the `Name` to the internal list
strikethrough.NewStrikethroughPlugin().Init(conv)
}

r := bytes.NewReader(input)
markdown, err := conv.ConvertReader(r)
doc, err := cli.parseInputWithSelectors(input)
if err != nil {
return nil, err
}

markdown, err := conv.ConvertNode(doc, converter.WithDomain(cli.config.domain))
if err != nil {
e, ok := err.(*commonmark.ValidateConfigError)
if ok {
Expand Down
11 changes: 9 additions & 2 deletions cli/cmd/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"io"
"os"
"strings"

"github.com/andybalholm/cascadia"
)

var (
Expand All @@ -22,13 +24,18 @@ type Config struct {
// args are the positional (non-flag) command-line arguments.
args []string

// - - - - - General - - - - - //
version bool
domain string

// - - - - //
includeSelector cascadia.SelectorGroup
excludeSelector cascadia.SelectorGroup

// - - - - - Options - - - - - //
strongDelimiter string

plugins []string
// - - - - - Plugins - - - - - //
enablePluginStrikethrough bool
}

// Release holds the information (from the 3 ldflags) that goreleaser sets.
Expand Down
Loading

0 comments on commit 5af1444

Please sign in to comment.