-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollector.go
122 lines (105 loc) · 2.2 KB
/
collector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main
import (
"bufio"
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
)
const (
urlFileName = "urls.txt"
)
type Download struct {
FileName string
Content []byte
}
func getNumberOfWeek() int {
now := time.Now().UTC()
_, week := now.ISOWeek() // _ stands for year
return week
}
func getFileName(urlString string) string {
url, err := url.Parse(urlString)
if err != nil {
log.Fatal(err)
}
parts := strings.Split(urlString, "/")
// TODO: what if last part is empty? like with html page?
docName := parts[len(parts)-1]
fileName := fmt.Sprintf("%s__KW%d__%s", url.Host, getNumberOfWeek(), docName)
return fileName
}
func makeDirIfNotExists() string {
dirName := fmt.Sprintf("KW%d", getNumberOfWeek())
if _, err := os.Stat(dirName); os.IsNotExist(err) {
os.Mkdir(dirName, 666)
}
return dirName
}
func createRequest(url string) *http.Request {
req, _ := http.NewRequest("GET", url, nil)
return req
}
func doRequest(url string) ([]byte, error) {
req := createRequest(url)
client := &http.Client{}
resp, _ := client.Do(req)
body, err := ioutil.ReadAll(resp.Body)
defer resp.Body.Close()
return body, err
}
func saveFile(fileName string, payload []byte) {
ioutil.WriteFile(fileName, payload, 666)
}
func readFile() []string {
var urls []string
file, err := os.Open(urlFileName)
if err != nil {
log.Fatal(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
urls = append(urls, scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Fatal(err)
}
return urls
}
func download(url string, fileName string, ch chan *Download) {
body, err := doRequest(url)
if err != nil {
log.Fatal(err)
}
download := &Download{
FileName: fileName,
Content: body,
}
ch <- download
}
func main() {
urls := readFile()
fmt.Println(urls)
dirName := makeDirIfNotExists()
ch := make(chan *Download)
var wg sync.WaitGroup
wg.Add(len(urls))
defer wg.Wait()
for _, url := range urls {
fileName := getFileName(url)
go download(url, fileName, ch)
}
go func() {
for downloads := range ch {
saveFile(dirName+"/"+downloads.FileName, downloads.Content)
fmt.Println("Saved " + downloads.FileName)
wg.Done()
}
}()
}