Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from tweepy import OAuthHandler
from tweepy import Stream
from kafka import SimpleProducer, KafkaClient
from six.moves import configparser
from multiprocessing.pool import ThreadPool
#Variables that contains the user credentials to access Twitter API
config = configparser.ConfigParser()
config.read('config.ini')
access_token = config.get('auth', 'access_token')
access_token_secret = config.get('auth', 'access_token_secret')
consumer_key = config.get('auth', 'consumer_key')
consumer_secret = config.get('auth', 'consumer_secret')
class StdOutListener(StreamListener):
def on_data(self, data):
producer.send_messages("tweetdata", data.encode('utf-8'))
print (data)
return True
def on_error(self, status):
print ("Status - ", status)
#This is a basic listener that just prints received tweets to stdout.
def start_streaming(stream):
def map_func(topics):
stream.filter(track=topics)
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json
class listener(StreamListener):
def on_data(self, data):
tweet = data.split(',"text":"')[1].split('","source')[0]
decoded = json.loads(data)
created_at = decoded['created_at']
#location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
user = decoded['user']['screen_name']
fulltweet = decoded['text'].encode('ascii', 'ignore')
tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
for _ in range(1):
try:
saveFile = open('round6.csv', 'a')
if 'RT' not in tweet:
print tweetdata
saveFile.write(tweetdata)
saveFile.close()
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json
class listener(StreamListener):
def on_data(self, data):
tweet = data.split(',"text":"')[1].split('","source')[0]
decoded = json.loads(data)
created_at = decoded['created_at']
#location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
user = decoded['user']['screen_name']
fulltweet = decoded['text'].encode('ascii', 'ignore')
tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
for _ in range(1):
try:
saveFile = open('round3.csv', 'a')
if 'RT' not in tweet:
print tweetdata
saveFile.write(tweetdata)
saveFile.close()
emoticon =":("
regex = re.compile('|'.join(keywords).lower())
linenum_re = re.compile(r'([A-Z][A-Z]\d+)')
retweets_re = re.compile(r'^RT\s')
enc = lambda x: x.encode('latin1', errors='ignore')
def print_debug(er,msg):
if DEBUG:
print "Error: ", er
print "Message: < %s >" % msg
class EmoticonListener(StreamListener):
def __init__(self):
self.count = 0
def on_data(self, data):
if(self.count > 500):
exit(0)
tweet = json.loads(data, encoding='utf-8')
if not tweet.has_key('id'):
print_debug("No Id, skip the tweet","")
return True
elif not tweet.has_key('user'):
print_debug("No user, skip the tweet", "")
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json
class listener(StreamListener):
def on_data(self, data):
tweet = data.split(',"text":"')[1].split('","source')[0]
decoded = json.loads(data)
created_at = decoded['created_at']
#location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
user = decoded['user']['screen_name']
fulltweet = decoded['text'].encode('ascii', 'ignore')
tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
for _ in range(1):
try:
saveFile = open('round1.csv', 'a')
if 'RT' not in tweet:
print tweetdata
saveFile.write(tweetdata)
saveFile.close()
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.utils import import_simplejson
json = import_simplejson()
consumer_key = "ncMZ2CP7YmScHkLYwmfCYaTZz"
consumer_secret = "ZkFEJXxXEOUlqkhrJ14kzWakrXjqIe11de7ks28DyC79P31t9q"
access_token = "1157786504-XB3DXGrMmhvM1PAb6aeys3LJFYI9Y3LzS6veRHj"
access_token_secret = "8w69uDRm9PPA9iv3fNtkHPKP4FIq5SFtVbcE28wtcY5qx"
pos_emo = [":)", ":-)", ": )", ":D", "=)", ";)"]
neg_emo = [":(", ":-(", ": ("]
emo = pos_emo + neg_emo
class StdOutListener(StreamListener):
def on_data(self, raw_data):
data = json.loads(raw_data)
if data['retweet_count'] != 0 or data.has_key('retweeted_status') or data['lang'] != "en":
return True
text = data['text'].encode('ascii','ignore')
pos = False
neg = False
if any(e in text for e in pos_emo):
pos = True
if any(e in text for e in neg_emo):
neg = True
emo_idx = None
if pos and not neg:
i = 0
while True:
self._listener.check_error()
try:
yield self._queue.get(True, 1)
except queue.Empty:
continue
self._queue.task_done()
i += 1
if self._count is not None and self._count <= i:
break
finally:
self._stream.disconnect()
self._thread.join()
class _TwitterStreamListener(tweepy.streaming.StreamListener):
def __init__(self, loader, keys):
self._loader = loader
self._keys = keys
self._error = None
super(_TwitterStreamListener, self).__init__()
def check_error(self):
if self._error:
raise self._error
def on_status(self, status):
row = dict([(key, jq.jq(key).transform(status._json)) for key in self._keys])
self._loader._on_event(row)
def on_error(self, status_code):
self._error = RuntimeError('Twitter Streaming API returned HTTP error {0}'.format(status_code))
from tweepy import OAuthHandler
from tweepy import Stream
import os
import sys
import time
import json
import atexit
# Load config
minerva_ebola_config = json.load(open(
os.path.join(os.path.dirname(__file__), "minerva.json")
))
class EbolaListener(StreamListener):
""" A listener handles tweets are the received from the stream.
This is a basic listener that just prints received tweets to stdout.
"""
def __init__(self):
StreamListener.__init__(self)
self._buffer = []
def on_data(self, data):
json_data = json.loads(data)
if json_data['geo'] is not None:
self._buffer.append(dict({
"location": json_data['geo'],
"text": json_data['text'],
"timestamp_ms": json_data['timestamp_ms'],
"created_at": json_data['created_at']
import logging
from tweepy.streaming import StreamListener, json
from tweepy import OAuthHandler, API
from tweepy import Stream
from dateutil import parser as dtparser
from pyhackers.config import config
import redis
#from kafka.client import KafkaClient
#from kafka.producer import SimpleProducer
class StdOutListener(StreamListener):
"""
A listener handles tweets are the received from the stream.
This is a basic listener that just prints received tweets to stdout.
"""
def __init__(self):
#kafka = KafkaClient("localhost", 9092)
#self.producer = SimpleProducer(kafka, "pyhackers-rt")
super(StdOutListener, self).__init__()
def on_data(self, data):
obj = json.loads(data)
#text = obj.get("text") or ""
if "limit" in obj:
logging.warn(obj)
"""
import re
import json
import traceback
import requests
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
from ontology_handler import add_entity
from publication_parser import SemanticPublication
keys = json.loads(open('./twitter_credentials.json').read())
class Listener(StreamListener):
"""
Listener class for the twitter stream
"""
def on_data(self, data):
try:
json_data = json.loads(data)
tweet_txt = ''
try:
if json_data["retweeted_status"]:
return True
except KeyError:
try:
tweet_txt = json_data["extended_tweet"]["full_text"]
except KeyError: