-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcmd_outline.go
58 lines (51 loc) · 1.67 KB
/
cmd_outline.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
////////////////////////////////////////////////////////////////////////////
// Program: htmlextract
// Purpose: HTML Extraction Tool
// Authors: Antonio Sun (c) 2018, All rights reserved
////////////////////////////////////////////////////////////////////////////
package main
import (
"net/url"
"os"
"regexp"
"github.com/go-easygen/cli"
)
////////////////////////////////////////////////////////////////////////////
// outline
func outlineCLI(ctx *cli.Context) error {
rootArgv = ctx.RootArgv().(*rootT)
argv := ctx.Argv().(*outlineT)
// fmt.Printf("[outline]:\n %+v\n %+v\n %v\n", rootArgv, argv, ctx.Args())
Opts.Case, Opts.Verbose =
rootArgv.Case, rootArgv.Verbose.Value()
attrPick = append(attrPick, argv.Attributes...)
fin := argv.Filei.Name()
verbose(2, "Input file: '%s'", fin)
if regexp.MustCompile(`(?i)^http`).MatchString(fin) {
u, e := url.QueryUnescape(fin)
abortOn("Input url error", e)
// in case url contains ?param1=...¶m2=...
up, _ := url.Parse(u)
verbose(2, "url: %#v", up)
// in case up.Path is empty
u = up.Host + up.Path
// in case of ending '/'
u = regexp.MustCompile(`^(.*)/$`).ReplaceAllString(u, "${1}.")
// get the name from the last part, less extension
fin = regexp.MustCompile(`^.*/(.*)\.[^.]*$`).ReplaceAllString(u, "${1}")
fin += ".html"
verbose(2, "Input file: '%s'", fin)
}
if !ctx.IsSet("--output") {
fileo, err := os.Create(
regexp.MustCompile(`(?i).html?$`).
ReplaceAllLiteralString(fin, ".json"))
abortOn("Creating output file", err)
argv.Fileo.SetWriter(fileo)
}
fileo := argv.Fileo
defer fileo.Close()
//e := NewExtractor(argv.Filei)
e := NewExtOutliner(argv.Filei)
return Walk(e, fileo)
}