This repository has been archived by the owner on Apr 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gitBTCrawler.py
81 lines (72 loc) · 2.26 KB
/
gitBTCrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import github3
import re
import bitcoinaddress as btcAdr
import sqlite3
import string
import time
def getAllUsersAddresses():
token='yourTokenHere'
gh= github3.login(token=token)
users= gh.all_users()
userCount=0
conn = sqlite3.connect( 'btcGitDB.db')
cursor = conn.cursor()
cursor.execute("CREATE TABLE IF NOT EXISTS usernames(username TEXT, btcAddress TEXT)")
cursor.execute("CREATE TABLE IF NOT EXISTS checkedUsers(username TEXT)")
def addressInText(text):
#gets rid of candidates that are too small and cleans for punctuation
regex = re.compile('[%s]' % re.escape(string.punctuation))
possibleKeys= [regex.sub('', x) for x in text.split() if len(x)>23]
keys=[]
for x in possibleKeys:
if btcAdr.validate(x)== True:
keys.append(x)
if len(keys)>0:
return ', '.join(keys)
else:
return False
tries=0
while tries <= 100 :
try:
for user in users:
userCount += 1
#user.refresh()
username= str(user)
cursor.execute("SELECT count(*) FROM checkedUsers WHERE username = ?", (username,))
data=cursor.fetchone()[0]
if data==0:
cursor.execute("INSERT INTO checkedUsers(username) VALUES (?)",(username,))
conn.commit
userRepos= gh.repositories_by(username)
print "on user: " + str(userCount)
repoCount=0
for repo in userRepos:
repoCount+=1
print "repo # : " + str(repoCount)
sqlQuery= "INSERT INTO usernames(username, btcAddress) VALUES (?,?)"
try:
readMe= repo.readme()
knownAddr= addressInText(readMe.decoded)
except Exception as e:
# Rollback in case there is any error
tries +=1
print 'on try: ' + str(tries)
print 'exception: ' +str(e)
if 'limit' in str(e):
print 'sleeping for 10 minutes'
time.sleep(600)
if knownAddr:
cursor.execute(sqlQuery,(username, knownAddr))
# Commit your changes in the database
print '\n added to db \n'
conn.commit()
except Exception as e:
# Rollback in case there is any error
tries +=1
print 'on try: ' + str(tries)
print 'exception: ' +str(e)
if 'limit' in str(e):
print 'sleeping for 10 minutes'
time.sleep(600)
print 'data collection over'
getAllUsersAddresses()