How to use the tweepy.streaming.StreamListener function in tweepy

To help you get started, we’ve selected a few tweepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github narayave / Insight-GDELT-Feed / kafka / test_kafka_producer.py View on Github external
from tweepy import OAuthHandler
from tweepy import Stream
from kafka import SimpleProducer, KafkaClient
from six.moves import configparser
from multiprocessing.pool import ThreadPool


#Variables that contains the user credentials to access Twitter API
config = configparser.ConfigParser()
config.read('config.ini')
access_token = config.get('auth', 'access_token')
access_token_secret = config.get('auth', 'access_token_secret')
consumer_key = config.get('auth', 'consumer_key')
consumer_secret = config.get('auth', 'consumer_secret')

class StdOutListener(StreamListener):
    def on_data(self, data):
        producer.send_messages("tweetdata", data.encode('utf-8'))
        print (data)
        return True
    def on_error(self, status):
        print ("Status - ", status)


#This is a basic listener that just prints received tweets to stdout.



def start_streaming(stream):

    def map_func(topics):
        stream.filter(track=topics)
github odubno / instagram-analyzer / Scrap / christian / extend / fightnight / round6.py View on Github external
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json

class listener(StreamListener):

    def on_data(self, data):        
        tweet = data.split(',"text":"')[1].split('","source')[0]
        decoded = json.loads(data)
        created_at = decoded['created_at']
        #location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
        user = decoded['user']['screen_name']
        fulltweet = decoded['text'].encode('ascii', 'ignore')
        tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
        for _ in range(1):
            try:
                saveFile = open('round6.csv', 'a')
                if 'RT' not in tweet:
                    print tweetdata
                    saveFile.write(tweetdata)
                    saveFile.close()
github odubno / instagram-analyzer / Scrap / christian / extend / fightnight / round3.py View on Github external
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json

class listener(StreamListener):

    def on_data(self, data):        
        tweet = data.split(',"text":"')[1].split('","source')[0]
        decoded = json.loads(data)
        created_at = decoded['created_at']
        #location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
        user = decoded['user']['screen_name']
        fulltweet = decoded['text'].encode('ascii', 'ignore')
        tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
        for _ in range(1):
            try:
                saveFile = open('round3.csv', 'a')
                if 'RT' not in tweet:
                    print tweetdata
                    saveFile.write(tweetdata)
                    saveFile.close()
github riccardotommasini / twitter-sentiment-analysis / collector.py View on Github external
emoticon =":("

regex = re.compile('|'.join(keywords).lower())
linenum_re = re.compile(r'([A-Z][A-Z]\d+)')
retweets_re = re.compile(r'^RT\s')

enc = lambda x: x.encode('latin1', errors='ignore')


def print_debug(er,msg):
	if DEBUG:
		print "Error: ", er
		print "Message: < %s >" % msg


class EmoticonListener(StreamListener):

	def __init__(self):
		self.count = 0

	def on_data(self, data):

		if(self.count > 500):
			exit(0)

		tweet = json.loads(data, encoding='utf-8')

		if not tweet.has_key('id'):
			print_debug("No Id, skip the tweet","")
			return True
		elif not tweet.has_key('user'):
			print_debug("No user, skip the tweet", "")
github odubno / instagram-analyzer / Scrap / christian / extend / fightnight / round1.py View on Github external
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
import urllib
#from bs4 import BeautifulSoup as bs
import pandas as pd
import sched, time
import datetime
from creds import *
from keywords import keywords
import json

class listener(StreamListener):

    def on_data(self, data):        
        tweet = data.split(',"text":"')[1].split('","source')[0]
        decoded = json.loads(data)
        created_at = decoded['created_at']
        #location = decoded['user']['location'].encode('ascii', 'ignore') #location isn't completely necessary, and can be retrieved later on using username
        user = decoded['user']['screen_name']
        fulltweet = decoded['text'].encode('ascii', 'ignore')
        tweetdata = '%s, @%s, %s \n' % (created_at, user, fulltweet) #having fulltweet as opposed to location (which is often not provided) be the last element will make cleaning df easier
        for _ in range(1):
            try:
                saveFile = open('round1.csv', 'a')
                if 'RT' not in tweet:
                    print tweetdata
                    saveFile.write(tweetdata)
                    saveFile.close()
github xiaohan2012 / twitter-sent-dnn / data_collection.py View on Github external
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy.utils import import_simplejson
json = import_simplejson()

consumer_key = "ncMZ2CP7YmScHkLYwmfCYaTZz"
consumer_secret = "ZkFEJXxXEOUlqkhrJ14kzWakrXjqIe11de7ks28DyC79P31t9q"

access_token = "1157786504-XB3DXGrMmhvM1PAb6aeys3LJFYI9Y3LzS6veRHj"
access_token_secret = "8w69uDRm9PPA9iv3fNtkHPKP4FIq5SFtVbcE28wtcY5qx"

pos_emo = [":)", ":-)", ": )", ":D", "=)", ";)"]
neg_emo = [":(", ":-(", ": ("]
emo = pos_emo + neg_emo
class StdOutListener(StreamListener):

    def on_data(self, raw_data):
        data = json.loads(raw_data)
	if data['retweet_count'] != 0 or data.has_key('retweeted_status') or data['lang'] != "en":
	    return True

	text = data['text'].encode('ascii','ignore')
	pos = False
	neg = False
	if any(e in text for e in pos_emo):
            pos = True
	if any(e in text for e in neg_emo):
            neg = True
	
        emo_idx = None	
	if pos and not neg:
github jubatus / jubakit / jubakit / loader / twitter.py View on Github external
i = 0
      while True:
        self._listener.check_error()
        try:
          yield self._queue.get(True, 1)
        except queue.Empty:
          continue
        self._queue.task_done()
        i += 1
        if self._count is not None and self._count <= i:
          break
    finally:
      self._stream.disconnect()
      self._thread.join()

class _TwitterStreamListener(tweepy.streaming.StreamListener):
  def __init__(self, loader, keys):
    self._loader = loader
    self._keys = keys
    self._error = None
    super(_TwitterStreamListener, self).__init__()

  def check_error(self):
    if self._error:
      raise self._error

  def on_status(self, status):
    row = dict([(key, jq.jq(key).transform(status._json)) for key in self._keys])
    self._loader._on_event(row)

  def on_error(self, status_code):
    self._error = RuntimeError('Twitter Streaming API returned HTTP error {0}'.format(status_code))
github Kitware / minerva / minerva.py View on Github external
from tweepy import OAuthHandler
from tweepy import Stream

import os
import sys
import time
import json
import atexit

# Load config
minerva_ebola_config = json.load(open(
    os.path.join(os.path.dirname(__file__), "minerva.json")
))


class EbolaListener(StreamListener):
    """ A listener handles tweets are the received from the stream.
    This is a basic listener that just prints received tweets to stdout.

    """
    def __init__(self):
        StreamListener.__init__(self)
        self._buffer = []

    def on_data(self, data):
        json_data = json.loads(data)
        if json_data['geo'] is not None:
            self._buffer.append(dict({
                "location": json_data['geo'],
                "text": json_data['text'],
                "timestamp_ms": json_data['timestamp_ms'],
                "created_at": json_data['created_at']
github bcambel / pythonhackers / pyhackers / ext / twitter.py View on Github external
import logging
from tweepy.streaming import StreamListener, json
from tweepy import OAuthHandler, API
from tweepy import Stream
from dateutil import parser as dtparser
from pyhackers.config import config
import redis
#from kafka.client import KafkaClient
#from kafka.producer import SimpleProducer


class StdOutListener(StreamListener):
    """
    A listener handles tweets are the received from the stream.
    This is a basic listener that just prints received tweets to stdout.
    """

    def __init__(self):
        #kafka = KafkaClient("localhost", 9092)
        #self.producer = SimpleProducer(kafka, "pyhackers-rt")
        super(StdOutListener, self).__init__()

    def on_data(self, data):
        obj = json.loads(data)
        #text = obj.get("text") or ""

        if "limit" in obj:
            logging.warn(obj)
github elixir-europe / BioHackathon / data / 05-CSPARQL-bioRxiv / src / publication_fetcher / twitter_stream.py View on Github external
"""

import re
import json
import traceback

import requests
from tweepy import Stream, OAuthHandler
from tweepy.streaming import StreamListener
from ontology_handler import add_entity
from publication_parser import SemanticPublication

keys = json.loads(open('./twitter_credentials.json').read())


class Listener(StreamListener):
    """
    Listener class for the twitter stream
    """

    def on_data(self, data):
        try:
            json_data = json.loads(data)
            tweet_txt = ''

            try:
                if json_data["retweeted_status"]:
                    return True
            except KeyError:
                try:
                    tweet_txt = json_data["extended_tweet"]["full_text"]
                except KeyError: