This repository has been archived by the owner on Oct 26, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
/
extract_comic.go
113 lines (88 loc) · 1.68 KB
/
extract_comic.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package swan
import (
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/andybalholm/cascadia"
)
type extractComic struct{}
var (
comicProcessor = &processor{
probe: comicProbe,
runners: []runner{
extractComic{},
},
}
comicDomains = []string{
"xkcd.com",
}
comicKeywords = []string{
"webcomic",
"comic strip",
}
)
func comicProbe(a *Article) uint {
for _, d := range comicDomains {
if a.baseURL.Host == d {
return 100
}
}
score := uint(0)
for _, kw := range comicKeywords {
if strings.Contains(a.Meta.Keywords, kw) {
score += 10
}
}
return score
}
func (e extractComic) run(a *Article) error {
if e.checkOpenGraph(a) {
return nil
}
e.findBestImage(a)
return nil
}
func (e extractComic) setImage(a *Article, img *goquery.Selection) bool {
if img.Length() == 0 {
return false
}
img = img.First()
src, ok := img.Attr("src")
if !ok {
return false
}
i := hitImage(src)
if i == nil {
return false
}
title, _ := img.Attr("title")
if title == "" {
title, _ = img.Attr("alt")
}
a.Img = i
a.CleanedText = title
a.addInlineArticleImageHTML(title)
return true
}
// If the opengraph image exists on the page, that's probably the comic
func (e extractComic) checkOpenGraph(a *Article) bool {
ogimg := a.Meta.OpenGraph["image"]
if ogimg == "" {
return false
}
m, err := cascadia.Compile(fmt.Sprintf("img[src=\"%s\"]", ogimg))
if err != nil {
return false
}
return e.setImage(a, a.Doc.FindMatcher(m))
}
func (e extractComic) findBestImage(a *Article) bool {
a.TopNode = a.Doc.Selection
eImgs := extractImages{}
eImgs.run(a)
a.TopNode = nil
if a.Img != nil {
return e.setImage(a, a.Img.Sel)
}
return false
}