-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGetLinks.go
61 lines (50 loc) · 1.92 KB
/
GetLinks.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package gochan
import (
"regexp"
"strings"
)
//GetImageLinks(*string) []string - The html is sarched for Image links
func GetImageLinks(strPtr *string) []string {
//Input: pointer to the HTML code of the webpage
//Output: A slice containing the links to be downloaded
re := regexp.MustCompile("href=\\\"\\/\\/i\\.4cdn\\.org\\/[a-z]+\\/[0-9]*\\.(jpg|png|jpeg|webm)")
re2 := regexp.MustCompile("href=\\\"\\/\\/is2\\.4chan\\.org\\/[a-z]+\\/[0-9]*\\.(jpg|png|jpeg|webm)")
strList := re.FindAllString((*strPtr), -1)
str2_list := re2.FindAllString((*strPtr),-1)
var n1 int = 0
var n2 int = 0
for i, _ := range strList {
if i%2 == 0 {
strList[n1] = "https:" + strings.TrimLeft(strList[i], "href=\"")
n1 = n1 + 1
}
}
for i, _ := range str2_list {
if i%2 == 0 {
str2_list[n2] = "https:" + strings.TrimLeft(str2_list[i], "href=\"")
n2 = n2 + 1
}
}
strList = strList[0:n1]
str2_list = str2_list[0:n2]
strList = append(strList, str2_list...)
return strList
}
//GetFolderName(*string) string - This funtions scans the html to give the folder a meaningful name
func GetFolderName(strPtr *string) string {
//Input: The pointer to the string containing the html code
//Output: The folder name of the folder where the images are to be saved
re := regexp.MustCompile("\\<span class=\"subject\"\\>[a-zA-Z0-9\\s]*\\</span\\>")
forumName := re.FindString(*strPtr)
/*fmt.Printf("Forum Name before printing: %c\n",rune((*strPtr)[forumName[1]-1]))*/
forumName = strings.TrimRight(strings.TrimLeft(forumName, "<span class=\"subject\">"), "</span>")
re = regexp.MustCompile("<div class=\"thread\" id=\"t[0-9]+\">")
threadName := re.FindString(*strPtr)
threadName = strings.TrimRight(strings.TrimLeft(threadName, "<div class=\"thread\" id=\"t"), "\">")
/*if strings.ContainsAny(forumName,"abcdefghijklmnopqrstuvwxyz1234567890") {*/
if forumName == "" {
return threadName
} else {
return threadName + "-" + forumName
}
}