Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
num_letters = sum(1 for c in no_code_text if c.isalpha())
num_numbers = sum(1 for c in no_code_text if c.isdigit())
num_alphanum = sum(1 for c in no_code_text if c.isalnum())
num_otherchars = num_chars - num_alphanum
results.append(TextFeature('Number of characters', num_chars, group_by))
results.append(TextFeature('Number of letters', num_letters, group_by))
results.append(TextFeature('Number of numbers', num_numbers, group_by))
results.append(TextFeature('Number of other characters', num_otherchars, group_by))
character_counts = Counter(no_code_text.lower())
for c in sorted(character_counts.items()):
try:
results.append(TextFeature('Character count for "{}"'.format(c[0].encode('unicode_escape')), c[1], group_by))
except AttributeError:
results.append(TextFeature('Character count for "{}"'.format(c[0]), c[1], group_by))
results.append(TextFeature('Number of syllables', textstat.syllable_count(no_code_text), group_by))
results.append(TextFeature('Lexicon Count (without punctuation)', textstat.lexicon_count(no_code_text, True), group_by))
results.append(TextFeature('Lexicon Count (with punctuation)', textstat.lexicon_count(no_code_text, False), group_by))
results.append(TextFeature('Number of lower case characters', num_lower, group_by))
results.append(TextFeature('Number of upper case characters', num_upper, group_by))
return results
num_numbers = sum(1 for c in no_code_text if c.isdigit())
num_alphanum = sum(1 for c in no_code_text if c.isalnum())
num_otherchars = num_chars - num_alphanum
results.append(TextFeature('Number of characters', num_chars, group_by))
results.append(TextFeature('Number of letters', num_letters, group_by))
results.append(TextFeature('Number of numbers', num_numbers, group_by))
results.append(TextFeature('Number of other characters', num_otherchars, group_by))
character_counts = Counter(no_code_text.lower())
for c in sorted(character_counts.items()):
try:
results.append(TextFeature('Character count for "{}"'.format(c[0].encode('unicode_escape')), c[1], group_by))
except AttributeError:
results.append(TextFeature('Character count for "{}"'.format(c[0]), c[1], group_by))
results.append(TextFeature('Number of syllables', textstat.syllable_count(no_code_text), group_by))
results.append(TextFeature('Lexicon Count (without punctuation)', textstat.lexicon_count(no_code_text, True), group_by))
results.append(TextFeature('Lexicon Count (with punctuation)', textstat.lexicon_count(no_code_text, False), group_by))
results.append(TextFeature('Number of lower case characters', num_lower, group_by))
results.append(TextFeature('Number of upper case characters', num_upper, group_by))
return results
for filepath in glob.glob(self.cleaned_files):
# with open(filepath, 'rb') as f:
with open(filepath, 'r') as f:
s = f.read()
s = s.lower()
words = s.split(' ')
filetitle = util.filetitle(filepath)
sentences = tokenize.sent_tokenize(s)
nchars = len(s)
nwords = len(words)
nsentences = len(sentences)
ncharsword = round(nchars/nwords,1)
nwordssentence = round(nwords/nsentences,1)
nuniquewords = len(set(words))
uniquerate = nuniquewords / nwords
grade_level = int(round(textstat.coleman_liau_index(s)))
row = [filetitle, nchars, nwords, nsentences, ncharsword, nwordssentence, nuniquewords, uniquerate, grade_level]
rows.append(row)
nchars = sum([row[1] for row in rows])
nwords = sum([row[2] for row in rows])
row = ['Totals',nchars, nwords, '','','','','','']
rows.append(row)
df = pd.DataFrame(rows, columns=cols)
df = df.drop('Chars',axis=1) # not enough space...
df = df.drop('Sentences',axis=1)
df = df.drop('Unique Rate',axis=1)
return df
def decide_if_assigned_threshold(self, person, threshold):
reading_levels = {}
for my_product in person.all_products:
text = ""
if my_product.title:
text += u" " + my_product.title
if my_product.get_abstract_using_mendeley():
text += u" " + my_product.get_abstract_using_mendeley()
# only do if at least three words between periods,
# otherwise textstat library prints too many Not Enough Words error messages
if text:
sentences = text.split(".")
if any([len(sentence.split())>3 for sentence in sentences]):
try:
grade_level = textstat.flesch_kincaid_grade(text)
# print u"grade level is {} for {}; text: {}".format(grade_level, my_product.doi, text)
if grade_level > 0:
# is sometimes negative, strangely. examples in ethan's profile
reading_levels[my_product.doi] = grade_level
except TypeError: #if text is too short it thows this
pass
if reading_levels.values():
average_reading_level = sum(reading_levels.values()) / float(len(reading_levels))
self.candidate_badge.value = average_reading_level
self.assigned = True
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.content_chars = aggregators.len(
self.datasources.content,
name=self._name + ".content_chars"
)
"""
`int` : The number of characters of viewable content (no markup or
templates
"""
self.flesh_kincaid = Feature(
self._name + ".flesh_kincaid",
textstat.flesch_reading_ease,
depends_on=[self.datasources.content],
returns=float
)
"""
`float` : returns the Flesch reading ease score.
(https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests)
"""
self.headings = aggregators.len(
self.datasources.headings,
name=self._name + ".headings"
)
"`int` : The number of headings"
self.external_links = aggregators.len(
self.datasources.external_links,
from .textstat import textstat
__version__ = (0, 5, 6)
for attribute in dir(textstat):
if callable(getattr(textstat, attribute)):
if not attribute.startswith("_"):
globals()[attribute] = getattr(textstat, attribute)
results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
except IndexError: # Not sure why, but this test throws this error sometimes
results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
try:
results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
except IndexError:
results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
try:
results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
except IndexError: # Not sure why, but this test throws this error sometimes
results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))
try:
results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
except (TypeError, IndexError):
results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
return results
def readability(self):
"""
Return grade school readability level of the text, using consensus of several tests.
"""
s = self.text('merged')
# grade_level = textstat.text_standard(s)
# grade_level = textstat.smog_index(s)
# grade_level = textstat.gunning_fog(s)
grade_level = textstat.coleman_liau_index(s)
grade_level = round(grade_level,1)
return grade_level
from .textstat import textstat
__version__ = (0, 5, 6)
for attribute in dir(textstat):
if callable(getattr(textstat, attribute)):
if not attribute.startswith("_"):
globals()[attribute] = getattr(textstat, attribute)
def DaleChall(text):
return textstat.dale_chall_readability_score(text)