-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathjandan.py
124 lines (104 loc) · 3.61 KB
/
jandan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
#encoding: utf-8
#author: zengqiu
import urllib2
import urllib
from BeautifulSoup import BeautifulSoup
import MySQLdb
import re
import urlparse
import os
import socket
mysql_host = "localhost"
mysql_port = 3306
mysql_user = "root"
mysql_password = "zengqiu"
mysql_db_name = "jandan"
mysql_table_name = "jandan"
image_path = "/home/mini/jandan"
def spider_ooxx(url):
user_agent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
headers = {'User-Agent': user_agent}
request = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(request)
soup = BeautifulSoup(response.read())
results = []
for content in soup.findAll("li", attrs={"id": re.compile("comment")}):
result = {}
result["id"] = content["id"]
img = content.findNext("p").findNext("img")
result["src"] = img["src"]
#print img["src"]
results.append(result)
return results
def create_database(database):
conn = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_password, port=mysql_port)
cur = conn.cursor()
sql = "create database %s" % (database)
try:
cur.execute(sql)
conn.commit()
except:
conn.rollback()
conn.close()
def create_table(table):
conn = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_password, db=mysql_db_name, port=mysql_port, charset="utf8")
cur = conn.cursor()
sql = "CREATE TABLE %s (`id` int(11) NOT NULL AUTO_INCREMENT, `image_remote` varchar(300) NULL, `image_local` varchar(500) NULL, CONSTRAINT entry UNIQUE (`image_remote`), PRIMARY KEY (`id`)) ENGINE=MyISAM DEFAULT CHARACTER SET=utf8 COLLATE=utf8_general_ci" % (table)
try:
cur.execute(sql)
conn.commit()
except:
conn.rollback()
conn.close()
def insert(table, image_remote, image_local):
conn = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_password, db=mysql_db_name, port=mysql_port, charset="utf8")
cur = conn.cursor()
sql = "insert ignore into " + table + "(image_remote, image_local) values(%s, %s)"
params = (image_remote, image_local)
try:
cur.execute(sql, params)
conn.commit()
except:
conn.rollback()
conn.close()
def download(url, path, filename):
socket.setdefaulttimeout(30)
filepath = os.path.join(path, filename)
if not os.path.isfile(filepath):
urllib.urlretrieve(url, filepath)
def makedir(path):
if not os.path.exists(path):
os.makedirs(path)
def run():
page = 900
enable = True
try:
conn = MySQLdb.connect(host=mysql_host, user=mysql_user, passwd=mysql_password, db=mysql_db_name, port=mysql_port, charset="utf8")
conn.close()
except:
create_database(mysql_db_name)
create_table(mysql_table_name)
while enable:
url = "http://jandan.net/ooxx/page-%d" % page
print url
results = spider_ooxx(url)
if results:
for result in results:
makedir(image_path)
image_remote = result["src"]
filename = result["id"] + "." + image_remote.split('.')[-1]
image_local = os.path.join(image_path, filename)
try:
download(image_remote, image_path, filename)
insert(mysql_table_name, image_remote, image_local)
except:
print filename + " is not exist"
page += 1
else:
enable = False
def main():
print 'Please use it as ./jandan'
run()
if __name__ == '__main__':
main()