How to use the textstat.textstat.textstat.coleman_liau_index function in textstat

To help you get started, we’ve selected a few textstat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bburns / LanguageModels / src / wp / data.py View on Github external
for filepath in glob.glob(self.cleaned_files):
            # with open(filepath, 'rb') as f:
            with open(filepath, 'r') as f:
                s = f.read()
                s = s.lower()
                words = s.split(' ')
                filetitle = util.filetitle(filepath)
                sentences = tokenize.sent_tokenize(s)
                nchars = len(s)
                nwords = len(words)
                nsentences = len(sentences)
                ncharsword = round(nchars/nwords,1)
                nwordssentence = round(nwords/nsentences,1)
                nuniquewords = len(set(words))
                uniquerate = nuniquewords / nwords
                grade_level = int(round(textstat.coleman_liau_index(s)))
                row = [filetitle, nchars, nwords, nsentences, ncharsword, nwordssentence, nuniquewords, uniquerate, grade_level]
                rows.append(row)
        nchars = sum([row[1] for row in rows])
        nwords = sum([row[2] for row in rows])
        row = ['Totals',nchars, nwords, '','','','','','']
        rows.append(row)
        df = pd.DataFrame(rows, columns=cols)
        df = df.drop('Chars',axis=1) # not enough space...
        df = df.drop('Sentences',axis=1)
        df = df.drop('Unique Rate',axis=1)
        return df
github bburns / LanguageModels / src / wp / data.py View on Github external
def readability(self):
        """
        Return grade school readability level of the text, using consensus of several tests.
        """
        s = self.text('merged')
        # grade_level = textstat.text_standard(s)
        # grade_level = textstat.smog_index(s)
        # grade_level = textstat.gunning_fog(s)
        grade_level = textstat.coleman_liau_index(s)
        grade_level = round(grade_level,1)
        return grade_level
github DeFacto / DeFacto / python / trustworthiness / features_core.py View on Github external
def get_feat_readability_metrics(self):
        # https://github.com/shivam5992/textstat

        try:
            test_data = self.webscrap.get_body()
            out = []
            out.append(textstat.flesch_reading_ease(test_data))
            out.append(textstat.smog_index(test_data))
            out.append(textstat.flesch_kincaid_grade(test_data))
            out.append(textstat.coleman_liau_index(test_data))
            out.append(textstat.automated_readability_index(test_data))
            out.append(textstat.dale_chall_readability_score(test_data))
            out.append(textstat.difficult_words(test_data))
            out.append(textstat.linsear_write_formula(test_data))
            out.append(textstat.gunning_fog(test_data))
            #out.append(textstat.text_standard(test_data))
            return out, False

        except Exception as e:
            config.logger.error(repr(e))
            return MISSING_FEATURE * 9, True
github AWegnerGitHub / SE_Zephyr_VoteRequest_bot / utils / utils.py View on Github external
:return: list of details
    """
    group_by = 'Reading Level Analysis '
    results = []
    results.append(TextFeature('Flesch Reading Ease', textstat.flesch_reading_ease(no_code_text), group_by))        # higher is better, scale 0 to 100
    results.append(TextFeature('Flesch-Kincaid Grade Level', textstat.flesch_kincaid_grade(no_code_text), group_by))
    try:
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', textstat.gunning_fog(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The Fog Scale (Gunning FOG formula)', "Undetermined", group_by))
    try:
        results.append(TextFeature('The SMOG Index', textstat.smog_index(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('The SMOG Index', "Undetermined", group_by))
    results.append(TextFeature('Automated Readability Index', textstat.automated_readability_index(no_code_text), group_by))
    results.append(TextFeature('The Coleman-Liau Index', textstat.coleman_liau_index(no_code_text), group_by))
    try:
        results.append(TextFeature('Linsear Write Formula', textstat.linsear_write_formula(no_code_text), group_by))
    except IndexError:
        results.append(TextFeature('Linsear Write Formula', "Undetermined", group_by))
    try:
        results.append(TextFeature('Dale Chall Readability Score', textstat.dale_chall_readability_score(no_code_text), group_by))
    except IndexError:  # Not sure why, but this test throws this error sometimes
        results.append(TextFeature('Dale Chall Readability Score', "Undetermined", group_by))

    try:
        results.append(TextFeature('Readability Consensus', textstat.readability_consensus(no_code_text), group_by))
    except (TypeError, IndexError):
        results.append(TextFeature('Readability Consensus', "Undetermined; One of the tests above failed.", group_by))
    return results