Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_overrider_classifier(self):
b = tb.Blobber(classifier=classifier)
blob = b("I am so amazing")
assert_equal(blob.classify(), 'pos')
def setUp(self):
self.blobber = tb.Blobber() # The default blobber
reviewContent.append(review)
review = []
reviewTitle.append(line.split("[-][t]")[1].rstrip("\r\n"))
else:
if "##" in line:
x = line.split("##")
#if len(x[0]) != 0:
for i in range(1, len(x)):
review.append(x[i].rstrip("\r\n"))
else:
continue
reviewContent.append(review)
#tb = Blobber(pos_tagger=PerceptronTagger())
tb = Blobber(pos_tagger=NLTKTagger())
nounScores = dict()
#Writing to a file
f = open('modified.txt', 'w')
for a in range(len(reviewContent)):
f.write("[t]")
#Finding Bigrams in title
text = reviewTitle[a]
x = tb(text).tags #NLTK tagger
e = 0
while e
def findFeatures(reviewContent,filename):
#nounScores is the dict containing nouns from all reviews and their respective scores from HAC algorithm
nounScores = dict()
#adjDict dict contains adjective and the corresponding noun which it is assigned to
adjDict = dict()
tb = Blobber(pos_tagger = NLTKTagger())
for a in range(len(reviewContent)): #Stores the score of the nouns
for i in range(len(reviewContent[a])):
text = ' '.join([word for word in reviewContent[a][i].split() if word not in stopwords.words("english")])
text = ''.join(ch for ch in text if ch not in exclude)
text = nltk.word_tokenize(text)
x = nltk.pos_tag(text)
#Get the noun/adjective words and store it in tagList
tagList = []
for e in x:
if(e[1] == "NN" or e[1] == "JJ"):
tagList.append(e)
#Add the nouns(which are not in the nounScores dict) to the dict
for e in tagList:
def main(args):
f = open(args.filename)
D = {}
tag_set = set([])
tb = Blobber(pos_tagger=PerceptronTagger())
for i, line in enumerate(f):
b1 = tb(line)
for w, t in b1.tags:
tag_set.add(t)
if w not in D:
D[w] = Counter()
D[w][t] = float(D[w][t] + 1)
sorted_pos_tags = sorted(list(tag_set))
rows = []
for w in D.keys():
row = [w]
pos_counts_word = np.array([float(D[w][t]) for t in sorted_pos_tags])
pos_dist_word = pos_counts_word / float(np.sum(pos_counts_word))
assert(np.isclose(np.sum(pos_dist_word), 1.0))
row = row + list(pos_dist_word)
import re
from collections import defaultdict
from textblob import Blobber
from textblob_aptagger import PerceptronTagger
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
from nytnlp.keywords import rake
from nytnlp.util import penn_to_wordnet
from geiger.text.clean import clean_doc
from geiger.knowledge import Bigram
blob = Blobber(pos_tagger=PerceptronTagger())
stops = stopwords.words('english')
lem = WordNetLemmatizer()
import config
bigram = Bigram(remote=config.remote)
def keyword_tokenize(doc):
"""
Tokenizes a document so that only keywords and phrases
are returned. Keywords are returned as lemmas.
"""
doc = clean_doc(doc)
blo = blob(doc)
# Only process tokens which are keywords