Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from __future__ import unicode_literals
from datetime import datetime
from bs4 import BeautifulSoup, SoupStrainer
from konlpy.stream import BaseStreamer
from konlpy.data import StringWriter
import requests
import time
import colorama
from colorama import Style, Fore
from konlpy.utils import pprint
class DCInsideStreamer(BaseStreamer):
"""DCInside is a biggest community website in Korea.
DCInsideStreamer helps to stream specific gallery from past to future.
"""
def __init__(self, markup='lxml', is_async=True):
super(DCInsideStreamer, self).__init__(is_async=is_async)
self.is_async = is_async
parser = self.get_parser()
parser.add_argument(
'--include_comments',
help='include comments',
action='store_true'
)
parser.add_argument(
'--comments_per_page',
counts = []
keywords = []
item_list = soup.find("div", {"class": "realtime_part"}).findAll("div", {"class": "roll_txt"})
item_list = [item.find("div", {"class": "rank_cont"}) for item in item_list]
for item in item_list:
count = item.find("span", {"class": "ir_wa"}).getText()
keyword = item.find("span", {"class": "txt_issue"}).getText()
counts.append(count)
keywords.append(keyword)
return counts, keywords
class DaumStreamer(BaseStreamer):
"""DaumStreamer helps to stream daum trending keywords asynchronously.
.. code-block:: python
>>> from konlpy.stream import daum
>>> streamer = daum.DaumStreamer()
>>> streamer.stream()
김민승
이유애린
훈남정음
소유진
...
"""
def __init__(self, is_async=True):
if (self.limit == self.options.tweet_limits) | (
(time.time() - self.init_time) >= self.options.time_limits):
return False
else:
write_tweets_to_files(tweet)
self.limit += 1
if self.limit == self.options.tweet_limits:
return False
def on_error(self, status_code):
if status_code == 420: # if connection failed
return False
class TwitterStreamer(BaseStreamer):
"""Start streaming on Twitter with your api keys and tokens.
Args:
dirname (str): directory to save output files.
word_list (list): list of words to be streamed.
async (bool): if true, apply threading in tweepy layer.
"""
def __init__(self, dirname=DATA_DIR, word_list=ALPHABET, is_async=True):
super(TwitterStreamer, self).__init__(is_async=is_async)
self.is_async = is_async
parser = self.get_parser()
parser.add_argument(
'--consumer_key',
help='consumer key',
from __future__ import unicode_literals
from datetime import datetime, timedelta
from konlpy.stream import BaseStreamer
from konlpy.data import StringWriter
import requests
import time
import json
import colorama
from colorama import Style, Fore
from konlpy.utils import pprint
class GoogleTrendStreamer(BaseStreamer):
"""Google is a biggest website in the world.
GoogleTrendStreamer helps to stream trends from past to future.
"""
def __init__(self, markup='lxml', is_async=True):
super(GoogleTrendStreamer, self).__init__(is_async=is_async)
self.is_async = is_async
parser = self.get_parser()
parser.add_argument(
'--init_date',
help='initial post_id to start crawling',
default=datetime.today().strftime("%Y%m%d")
)
parser.add_argument(
'--final_date',
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from time import sleep
from konlpy.stream import BaseStreamer, TwitterStreamer
from konlpy.stream.naver import get_current_trend
from konlpy.utils import pprint
class NavtterStreamer(BaseStreamer):
"""Start streaming of twitter about naver's current top trending keywords.
In order to use NavtterStreamer, you have to set-up both twitter and Navtter's options.
.. code-block:: python
from konlpy.stream import NavtterStreamer
app = NavtterStreamer()
app.show_options() # Print available options
app.options.interval = 3600 # Update naver trends every 3600 secs
app.options.verbose = True # Print trends
# Your twitter api keys and tokens.
app.twitter.options.consumer_key = 'consumer_key'
app.twitter.options.consumer_secret = 'consumer_secret'
app.twitter.options.access_token = 'access_token'
url = 'https://www.naver.com/'
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')
counts = []
keywords = []
for item in soup.find("div", {"class": "ah_roll_area PM_CL_realtimeKeyword_rolling"}).findAll("li", {"class": "ah_item"}):
count = item.find("span", {"class": "ah_r"}).getText()
keyword = item.find("span", {"class": "ah_k"}).getText()
counts.append(count)
keywords.append(keyword)
return counts, keywords
class NaverStreamer(BaseStreamer):
"""NaverStreamer helps to stream naver trending keywords asynchronously.
.. code-block:: python
>>> from konlpy.stream import naver
>>> streamer = naver.NaverStreamer()
>>> streamer.stream()
cj채용
온주완의 뮤직쇼
유상무
현대차
...
"""
def __init__(self, is_async=True):