Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for filepath in glob.glob(self.cleaned_files):
# with open(filepath, 'rb') as f:
with open(filepath, 'r') as f:
s = f.read()
s = s.lower()
words = s.split(' ')
filetitle = util.filetitle(filepath)
sentences = tokenize.sent_tokenize(s)
nchars = len(s)
nwords = len(words)
nsentences = len(sentences)
ncharsword = round(nchars/nwords,1)
nwordssentence = round(nwords/nsentences,1)
nuniquewords = len(set(words))
uniquerate = nuniquewords / nwords
grade_level = int(round(textstat.coleman_liau_index(s)))
row = [filetitle, nchars, nwords, nsentences, ncharsword, nwordssentence, nuniquewords, uniquerate, grade_level]
rows.append(row)
nchars = sum([row[1] for row in rows])
nwords = sum([row[2] for row in rows])
row = ['Totals',nchars, nwords, '','','','','','']
rows.append(row)
df = pd.DataFrame(rows, columns=cols)
df = df.drop('Chars',axis=1) # not enough space...
df = df.drop('Sentences',axis=1)
df = df.drop('Unique Rate',axis=1)
return df
def readability(self):
"""
Return grade school readability level of the text, using consensus of several tests.
"""
s = self.text('merged')
# grade_level = textstat.text_standard(s)
# grade_level = textstat.smog_index(s)
# grade_level = textstat.gunning_fog(s)
grade_level = textstat.coleman_liau_index(s)
grade_level = round(grade_level,1)
return grade_level
def get_feat_readability_metrics(self):
# https://github.com/shivam5992/textstat
try:
test_data = self.webscrap.get_body()
out = []
out.append(textstat.flesch_reading_ease(test_data))
out.append(textstat.smog_index(test_data))
out.append(textstat.flesch_kincaid_grade(test_data))
out.append(textstat.coleman_liau_index(test_data))
out.append(textstat.automated_readability_index(test_data))
out.append(textstat.dale_chall_readability_score(test_data))
out.append(textstat.difficult_words(test_data))
out.append(textstat.linsear_write_formula(test_data))
out.append(textstat.gunning_fog(test_data))
#out.append(textstat.text_standard(test_data))
return out, False
except Exception as e:
config.logger.error(repr(e))
return MISSING_FEATURE * 9, True
:return: list of details
"""
group_by = 'Reading Level Analysis '
results = []
results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by)) # higher is better, scale 0 to 100
results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by))
try:
results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by))
except IndexError: # Not sure why, but this test throws this error sometimes
results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by))
try:
results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
except IndexError: # Not sure why, but this test throws this error sometimes
results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
try:
results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
except IndexError:
results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
try:
results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
except IndexError: # Not sure why, but this test throws this error sometimes
results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))
try:
results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
except (TypeError, IndexError):
results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
return results