forked from ernestognw/tweet-scrapper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ernesto_tweetpytest.py
55 lines (45 loc) · 1.64 KB
/
ernesto_tweetpytest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/python
from __future__ import unicode_literals
import tweepy
import json
import time
import sys
import os
# Get env variables
from dotenv import Dotenv
dotenv = Dotenv(os.path.join(os.path.dirname(__file__), ".env"))
os.environ.update(dotenv)
# override tweepy.StreamListener
from tweepy import API
class MyStreamListener(tweepy.StreamListener):
def __init__(self, api=None):
super(MyStreamListener, self).__init__(api)
self.api = api or API()
self.counter = 0
# define the filename with time as prefix
self.output = open('bdatweets_%s.json' %(time.strftime('%Y%m%d-%H%M%S')), 'a')
# researcher ID and searchID
self.output.write('1\n1\n')
def on_status(self, status):
self.counter += 1
print('Reading Twitter Stream...')
json.dump(status._json, self.output)
self.output.write('\n')
if self.counter >= 100:
self.output.close()
self.output = open('bdatweets_%s.json' % (time.strftime('%Y%m%d-%H%M%S')), 'a')
# researcher ID and searchID
self.output.write('1\n1\n')
self.counter = 0
return
def on_error(self, status):
print(status)
consumer_key = os.getenv('CONSUMER_KEY')
consumer_secret = os.getenv('CONSUMER_SECRET')
access_token = os.getenv('ACCESS_TOKEN')
access_token_secret = os.getenv('ACCESS_TOKEN_SECRET')
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
myStream = tweepy.Stream(auth=api.auth, listener=MyStreamListener(api))
myStream.filter(track=['MIT', 'Stanford', 'Harvard'])