Skip to content

Commit

Permalink
Merge pull request #1 from sorawa/sorawa-patch-1
Browse files Browse the repository at this point in the history
增加处理cookie的函数,修正144的传值问题
  • Loading branch information
sorawa committed Apr 9, 2015
2 parents ca5f02a + d377b2d commit dcacd1f
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions core/spider/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ func (this *Spider) Run() {
this.threadnum = 1
}
this.mc = resource_manage.NewResourceManageChan(this.threadnum)

//init db by sorawa

for {
req := this.pScheduler.Poll()
Expand All @@ -141,7 +143,7 @@ func (this *Spider) Run() {
this.mc.GetOne()

// Asynchronous fetching
go func(*request.Request) {
go func(req *request.Request) {
defer this.mc.FreeOne()
//time.Sleep( time.Duration(rand.Intn(5)) * time.Second)
mlog.StraceInst().Println("start crawl : " + req.GetUrl())
Expand Down Expand Up @@ -265,6 +267,12 @@ func (this *Spider) AddUrl(url string, respType string) *Spider {
return this
}

func (this *Spider) AddUrlWithHeaderFile(url string, respType string,header_file string) *Spider {
req := request.NewRequestWithHeaderFile(url, respType, header_file)
this.AddRequest(req)
return this
}

func (this *Spider) AddUrls(urls []string, respType string) *Spider {
for _, url := range urls {
req := request.NewRequest(url, respType, "", "GET", "", nil, nil, nil, nil)
Expand Down Expand Up @@ -315,15 +323,15 @@ func (this *Spider) pageProcess(req *request.Request) {
if p.IsSucc() { // if fail retry 3 times
break
}

}

if !p.IsSucc() { // if fail do not need process
return
}

this.pPageProcesser.Process(p)
for _, req := range p.GetTargetRequests() {
//fmt.Printf("%v\n",req)
this.AddRequest(req)
}

Expand Down

0 comments on commit dcacd1f

Please sign in to comment.