-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtwitter_autodelete.py
executable file
·258 lines (240 loc) · 7.62 KB
/
twitter_autodelete.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#!/usr/bin/python
import sqlite3
import json
import tweepy
import sys
import bz2
import base64
import signal
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from dateutil.parser import parse as date_parse
from zipfile import ZipFile
from io import TextIOWrapper
from time import sleep
### CONFIG ###
# number of tweets to be deleted simultaneously
n_workers = 5
# when do we remove tweets
datetime_modifier = "-25 day"
# path to data (contains connection information and tweet database)
path_data = Path('data')
### END CONFIG ###
path_db = path_data / "db.sqlite"
path_keys = path_data / "keys.json"
thounk = '''
QlpoOTFBWSZTWetNC1UAABnQYX2wQABEQAEAEAAwATqlQSnokkNAKYAAEmoSanpMjgQOUWuHe7O3
e9fbgQKZAPAwVC8KFkXEY1llytzbbCNLZjWWjIrLG0sV9gAE/QACVrWihBBErkJEsMquZXpC9w45
xdcn2ExxN30sCFkTSRFBI4HdtE8cvtDl4epk666gYh+YbSKHSGJItssfULQybukTNcO3Ii1jxPzk
E1wR6ePSGMFgWrLqsZxDzlS00O0lMkUkKZBZv+LuSKcKEh1poWqg
'''
# constants for 'removed' column inside database
S_TO_DELETE = 0
S_DELETED = 1
S_TO_KEEP = 2
S_CANNOT_DELETE = 3
# create directories
path_data.mkdir(parents=True, exist_ok=True)
# connect to database
conn = sqlite3.connect(path_db)
cur = conn.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS tweet
(id INTEGER PRIMARY KEY, time TEXT, removed INTEGER DEFAULT 0)''')
conn.commit()
def connect_twitter():
global path_keys, twitter
with open(path_keys) as f:
keys = json.load(f)
auth = tweepy.OAuthHandler(keys["ck"], keys["cs"])
auth.set_access_token(keys["at"], keys["ats"])
twitter = tweepy.API(auth)
def add_tweet(tweet_id, tweet_time):
global cur
cur.execute("INSERT INTO tweet (id, time) VALUES (?, datetime(?))", (tweet_id, tweet_time))
#print("add %d [%s]" % (tweet_id, tweet_time))
def load_archive(path_archive):
global conn
with ZipFile(path_archive) as archive:
with TextIOWrapper(archive.open("data/js/tweet_index.js")) as f:
while f.read(1) not in ("=",""):
pass
index = [i["file_name"] for i in json.load(f)]
for i in index:
sys.stdout.write("loading %s... " % i)
sys.stdout.flush()
count = 0
with TextIOWrapper(archive.open(i)) as f:
while f.read(1) not in ("=",""):
pass
for i in json.load(f):
try:
add_tweet(i["id"], date_parse(i["created_at"]))
count+=1
except sqlite3.IntegrityError:
pass
conn.commit()
print("new tweets: %d" % count)
def delete_tweet(tweet):
global twitter, stopped
i, t = tweet
if stopped:
return None
#sleep(1)
#print("dummy delete %d [%s]" % (i,t))
#return {"id": i, "removed": S_DELETED}
try:
twitter.destroy_status(i)
print("delete %d [%s]: ok" % (i,t))
return {"id": i, "removed": S_DELETED}
except tweepy.error.TweepError as e:
if e.api_code in {34, 144}:
print("delete %d [%s]: already deleted" % (i,t))
return {"id": i, "removed": S_DELETED}
elif e.api_code == 63:
print("delete %d [%s]: account suspended, marking as CANNOT_DELETE" % (i,t))
return {"id": i, "removed": S_CANNOT_DELETE}
else:
print("delete %d [%s]:" % (i,t), e)
return None
def delete_tweets():
global conn, cur, stopped, n_workers
cur.execute('SELECT id, time FROM tweet WHERE time < datetime("now", ?) AND removed = ?', (datetime_modifier, S_TO_DELETE))
tweets = cur.fetchall()
print("Tweets to delete: %d" % len(tweets))
try:
connect_twitter()
except:
raise RuntimeError("Could not connect to twitter, please check keys.json or run setup.")
stopped = False
deleted = []
try:
pool = ThreadPoolExecutor(n_workers)
r = pool.map(delete_tweet, tweets)
pool.shutdown()
except KeyboardInterrupt:
# first ^C: stop deleting tweets
print("Aborting...")
stopped = True
# ignore any new interruption, we need to update the database
signal.signal(signal.SIGINT, signal.SIG_IGN)
count = 0
for i in r:
if i is not None:
deleted.append(i)
if i["removed"] == S_DELETED:
count += 1
print("Deleted tweets: %d" % count)
print("Updating database...")
cur.executemany('UPDATE tweet SET removed = :removed WHERE id = :id', deleted)
conn.commit()
signal.signal(signal.SIGINT, signal.SIG_DFL)
def setup():
global path_keys
print("=== SETUP ===")
print()
ck = input("Twitter consumer key: ")
cs = input("Twitter consumer secret: ")
print(bz2.decompress(base64.b64decode(thounk)).decode())
auth = tweepy.OAuthHandler(ck, cs)
try:
print("Please visit %s" % auth.get_authorization_url())
except:
print("Invalid key or secret.")
exit(1)
verif = input("Verification code: ")
try:
at, ats = auth.get_access_token(verif)
except:
print("Invalid verification code.")
exit(1)
auth.set_access_token(at, ats)
twitter = tweepy.API(auth)
me = twitter.me()
print()
print("Identified as: %s @%s" % (me.screen_name, me.name))
keys = {"ck":ck,"cs":cs,"at":at,"ats":ats}
with open(path_keys,"w") as f:
json.dump(keys, f)
print()
print("Configuration saved !")
def update_tweets():
global conn, cur, twitter
try:
connect_twitter()
except:
raise RuntimeError("Could not connect to twitter, please check keys.json or run setup.")
cur.execute("SELECT max(id) FROM tweet")
page = 1
since_id, = cur.fetchone()
if since_id is None:
raise RuntimeError("No tweet in database, please run load-archive.")
print("Last tweet ID: %d" % since_id)
print("Reading timelines...")
tl = twitter.user_timeline(since_id = since_id, page = page)
while len(tl) != 0:
print("Page %d, tweets to add: %d" % (page, len(tl)))
for t in tl:
add_tweet(t.id, t.created_at)
page += 1
tl = twitter.user_timeline(since_id = since_id, page = page)
print("Updating database...")
conn.commit()
def save_tweets(tweets):
global conn, cur
data = [(S_TO_KEEP,S_DELETED,int(t.split("/")[-1])) for t in tweets]
cur.executemany("UPDATE tweet SET removed = ? WHERE removed != ? AND id = ?", data)
conn.commit()
def status():
print()
print("-----------------------------------")
cur.execute("SELECT count(*) FROM tweet")
print("Known tweets: %d" % cur.fetchone())
cur.execute("SELECT count(*) FROM tweet WHERE removed = ?", (S_TO_DELETE,))
print("Tweets that can be deleted: %d" % cur.fetchone())
cur.execute("SELECT count(*) FROM tweet WHERE removed = ?", (S_DELETED,))
print("Tweets already deleted: %d" % cur.fetchone())
cur.execute("SELECT count(*) FROM tweet WHERE removed = ?", (S_TO_KEEP,))
print("Tweets we should keep: %d" % cur.fetchone())
cur.execute("SELECT count(*) FROM tweet WHERE removed = ?", (S_CANNOT_DELETE,))
print("Tweets we failed to delete: %d" % cur.fetchone())
cur.execute('SELECT count(*) FROM tweet WHERE time < datetime("now", ?) AND removed = ?', (datetime_modifier,S_TO_DELETE))
print("Tweets to be deleted now: %d" % cur.fetchone())
print("-----------------------------------")
print()
if __name__ == "__main__":
try:
if len(sys.argv) == 3 and sys.argv[1] == "load-archive":
load_archive(sys.argv[2])
elif len(sys.argv) == 2 and sys.argv[1] == "delete-tweets":
delete_tweets()
elif len(sys.argv) == 2 and sys.argv[1] == "update-tweets":
update_tweets()
elif len(sys.argv) >= 3 and sys.argv[1] == "save-tweets":
status()
update_tweets()
status()
save_tweets(sys.argv[2:])
status()
elif len(sys.argv) == 2 and sys.argv[1] == "status":
status()
elif len(sys.argv) == 2 and sys.argv[1] == "setup":
setup()
elif len(sys.argv) == 1:
status()
update_tweets()
status()
delete_tweets()
status()
else:
print("Usage: %s" % sys.argv[0])
print("\t\tsetup")
print("\t\tload-archive <filename>")
print("\t\tdelete-tweets")
print("\t\tupdate-tweets")
print("\t\tsave-tweets <tweet-id>...")
print("\t\tstatus")
print("If no command is specified, update and delete tweets")
except KeyboardInterrupt:
pass
except RuntimeError as e:
print(e)