Hi I have looked at many guides and tutorials on how to do this, but I am having trouble with being able to use tweepy to store the JSON data in a text file.
class StreamListener(tweepy.StreamListener):
def on_status(self, status):
print(status)
def on_error(self, status):
print status
if status == 420:
return False
if __name__ == '__main__':
stream_listener = StreamListener()
auth = tweepy.OAuthHandler(consumer_token, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = tweepy.Stream(auth, stream_listener)
I have another python file which is supposed to read data into a list:
import pandas
import json
json_data = 'twitter_data.txt'
data_list = []
#load file
tweets_file = open(json_data, "r")
for line in tweets_file:
try:
tweet = json.loads(line) #this line causes problems
data_list.append(tweet)
except:
continue
print len(data_list)
I thought the data received from twitter comes in JSON format, and the guides I'm following all say it does, but it's actually in another object.
Should I just store everything in a list then json dump that list into the new text file?
It seems like you're on the right track. You can modify the stream listener to write tweets to a file directly.
Edit: this now writes out in JSON format.
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
#Variables that contains the user credentials to access Twitter API
CONSUMER_KEY = #YOUR CONSUMER KEY
CONSUMER_SECRET = #YOUR CONSUMER SECRET
ACCESS_TOKEN = #YOUR ACCESS TOKEN
ACCESS_TOKEN_SECRET = #YOUR ACCESS TOKEN SECRET
class FileWriteListener(StreamListener):
def __init__(self):
super(StreamListener, self).__init__()
self.save_file = open('tweets.json','w')
self.tweets = []
def on_data(self, tweet):
self.tweets.append(json.loads(tweet))
self.save_file.write(str(tweet))
def on_error(self, status):
print(status)
return True
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = API(auth)
twitter_stream = Stream(auth, MyListener())
# Here you can filter the stream by:
# - keywords (as shown)
# - users
twitter_stream.filter(track=['hello'])
This code will run indefinitely, so you either need to exit the process after some time (Ctrl-C) or modify the code.
Then you can load the data:
import json
json_data = []
with open('tweets.json','r') as f:
json_data.append( json.loads(f.readline()) )
Hope this helps!