How to use the fuzzywuzzy.fuzz function in fuzzywuzzy

To help you get started, we’ve selected a few fuzzywuzzy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Xunius / MeiTingTrunk / lib / testpdfquery.py View on Github external
# compute input vars for fuzzy
        # height ratio wrt main text height
        hrii=hii/main_height
        # lowest y0
        y0ii=[ljj.y0 for ljj in gii]
        y0ii=np.min(y0ii)/page_h
        # number of words
        nwordsii=len(tii.split(' '))
        # similartiy measure between a predefined list of non-title words
        notitlefmii=[fuzz.token_set_ratio(tii,jj) for jj in NON_TITLE_LIST]
        notitlefmii=np.mean(notitlefmii)

        # similarity measure between title obtained from meta data
        if doctitle:
            metatitlefmii=fuzz.ratio(tii, doctitle)
            gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii,metatitlefmii))
        else:
            gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii))

    #pprint(gr_lines)

    #----------------Do fuzzy logic----------------
    fuzz_scores=FCTitleGuess(gr_lines, doctitle)

    title_idx=np.argmax(fuzz_scores)
    title_guess=gr_lines[title_idx]
    title_y0=title_guess[2]*page_h
    title_x0=groups[title_idx][0].x0

    #----------------Guess author list----------------
    top_lines=line_dict.keys()
github juliema / label_reconciliations / lib / column_types / userWeightedText.py View on Github external
def top_partial_ratio(group,trustedUserWeights): #expecting group
    """Return the best partial ratio match from fuzzywuzzy module."""
    def convLine(line):
        line = '\n'.join([' '.join(ln.split()) for ln in str(line).splitlines()])
        return line
    values = group.apply(convLine)
    # generate user lookup dict
    userAttribution = values.reset_index(level=0, drop=True, inplace = False).to_dict()
    # invert it to {text that was ntered:user who entered it}
    userAttribution = {i[1]:i[0] for i in userAttribution.items()}
    scores = []
    
    for combo in combinations(values, 2):
        score = fuzz.partial_ratio(combo[0], combo[1])
        value = combo[0] if len(combo[0]) >= len(combo[1]) else combo[1]
        userName = userAttribution.get(value) # lookup the user who wrote the value
        scoreWeight = trustedUserWeights.get(userName, 0) # lookup that user's weight
        score = score + scoreWeight # add bonus points
        if score > 100: # enforce a ceiling
            score = 100
        
        scores.append(FuzzyRatioScore(score, value))
    scores = sorted(scores,
                    reverse=True,
                    key=lambda s: (s.score, len(s.value)))
    return scores[0]
github Run1e / AceBot / utils / docs_search.py View on Github external
def find_page(query):
		matches = process.extract(
			query,
			docs.keys(),
			scorer=fuzz.partial_ratio,
			limit=99999
		)

		for match, score in matches:
			if query.upper() == ''.join(filter(str.isupper, match)) or match.lower().startswith(query.lower()):
				return match

		return matches[0][0]
github wagoodman / bridgy / bridgy / inventory / source.py View on Github external
allInstances = self.instances()
        matchedInstances = set()

        for host in targets:
            for instance in allInstances:
                names = [instance.name]
                if instance.aliases != None:
                    names += list(instance.aliases)
                for name in names:
                    if host.lower() == name.lower():
                        matchedInstances.add((100, instance))
                    elif partial and host.lower() in name.lower():
                        matchedInstances.add((99, instance))

                    if fuzzy:
                        score = fuzz.partial_ratio(host.lower(), name.lower())
                        if score > 85 or host.lower() in name.lower():
                            matchedInstances.add((score, instance))

        # it is possible for the same instance to be matched, if so, it should only
        # appear on the return list once (still ordered by the most probable match)
        return list(collections.OrderedDict([(v, None) for k, v in sorted(list(matchedInstances))]).keys())
github TakuyaHiraoka / Dialogue-State-Tracking-using-LSTM / dstc4_traindev / scripts / LSTMWithBOW.py View on Github external
def _translateUtteranceIntoInputVector(self,utter, call):
        #Metainfo+BOW+SLOT/Value matching result
        #--CLASS
        convClassInput=None
        if (utter["transcript"] not in LSTMWithBOWTracker.dictFuzzyMatchingResult):
            convClassInput=[0.0]*self.TOTALSIZEOFCLASSFeature
            for topic in self.tagsets.keys():
                for slot in self.tagsets[topic].keys():
                    convClassInput[self.dictIn["CLASS_"+slot]]=fuzz.partial_ratio(slot, utter["transcript"])
                    for value in self.tagsets[topic][slot]:
                        convClassInput[self.dictIn["CLASS_"+value]]=fuzz.partial_ratio(value, utter["transcript"])
            LSTMWithBOWTracker.dictFuzzyMatchingResult[utter["transcript"]]=copy.deepcopy(convClassInput)
        else:
            convClassInput=LSTMWithBOWTracker.dictFuzzyMatchingResult[utter["transcript"]]
        
        #-input
        convSentenceInput=None
        if not self.isUseSentenceRepresentationInsteadofBOW:
            convSentenceInput=[0.0]*self.TOTALSIZEOFSENTENCEFeature
            convSentenceInput[self.dictIn["SPEAKER_"+utter["speaker"]]]=1.0
            convSentenceInput[self.dictIn["TOPIC_"+utter["segment_info"]["topic"]]]=1.0
            splitedtrans=self.__getRegurelisedBOW(utter["transcript"])
            for word in splitedtrans:
                if ("WORD_"+word) in self.dictIn:#IGNORING OOV
                    convSentenceInput[self.dictIn["WORD_"+word]]=1.0
            convSentenceInput[self.dictIn["BIO_"+utter['segment_info']['target_bio']]]=1.0
        elif self.isUseSentenceRepresentationInsteadofBOW:
github arielbeje / uBot / cogs / faq.py View on Github external
if len(faqList) > 0:
                em = discord.Embed(title="List of FAQ tags",
                                   description=", ".join(faqList).title(),
                                   colour=discord.Colour.gold())
            else:
                em = discord.Embed(title="Error",
                                   description="This server does not have any defined FAQ tags.",
                                   colour=discord.Colour.red())

        elif query in await faqdb(ctx, keys=True):
            em = await embed_faq(ctx, self.bot, query)

        else:
            closeItems = []
            for item in await faqdb(ctx, keys=True):
                itemRatio = fuzz.ratio(query, item)
                if itemRatio >= 75:
                    closeItems.append((itemRatio, item.title()))
            if len(closeItems) > 0:
                if len(closeItems) == 1:
                    em = await embed_faq(ctx, self.bot, closeItems[0][1].lower(),
                                         title=f"Could not find \"{query.title()}\" in FAQ tags. Did you mean \"{closeItems[0][1]}\"?",
                                         color=discord.Colour.orange())
                else:
                    em = discord.Embed(title=f"Could not find \"{query.title()}\" in FAQ tags.",
                                       description=f"Did you mean {', '.join([item[1] for item in closeItems])}?",
                                       colour=discord.Colour.orange())
            else:
                em = discord.Embed(title="Error",
                                   description=f"Could not find \"{query.title()}\" or any similarly named tags in FAQ tags." + "\n" +
                                               f"Would you like to search [the wiki](https://wiki.factorio.com/index.php?search={query.replace(' ', '%20')})?",
                                   colour=discord.Colour.red())
github emory-courses / data-science / src / data_analysis.py View on Github external
def match(instructor):
        l = not lastname or fuzz.ratio(lastname.lower(), instructor[0].lower()) >= threshold
        f = not firstname or fuzz.ratio(firstname.lower(), instructor[1].lower()) >= threshold
        return l and f
github tizonia / tizonia-openmax-il / clients / gmusic / gmusicproxy / tizgmusicproxy.py View on Github external
situation_title = situation['name'] if situation.get('name') else ''
                        situation_desc = situation['description'] if situation.get('description') else None
                        if situation_desc:
                            if not additional_keywords:
                                print_nfo("[Google Play Music] [{0}] '{1} : {2}'." \
                                          .format(arg,
                                                  situation_title,
                                                  situation_desc))
                            else:
                                print_nfo("[Google Play Music] [{0} - {1}] '{2} : {3}'." \
                                          .format(arg,
                                                  additional_keywords \
                                                  if additional_keywords else '(no keywords)',
                                                  situation_title,
                                                  situation_desc))
                            if fuzz.partial_ratio(additional_keywords, situation_title) > 50:
                                situation_titles.append(situation_title)
                                situation_dict[situation_title] = situation

                if len(situation_titles) > 1:
                    situation_title = process.extractOne(additional_keywords, situation_titles)[0]
                    situation = situation_dict[situation_title]
                elif len(situation_titles) == 1:
                    situation_title = situation_titles[0]
                    situation = situation_dict[situation_title]

            if situation:
                print_wrn("[Google Play Music] Playing '{0}'." \
                          .format(to_ascii(situation_title)))
                self.__enqueue_station_unlimited_v2(situation)

            if not situation:
github anuragmishra1 / alter-nlu / extractors / extract_entity.py View on Github external
def extract_entity(text, dictionary):
    entity_output = []
    text = create_spacy_clean(text)
    for user_entity in dictionary.extract_keywords(text):
        output = {"value":str(user_entity[0]), "category":str(user_entity[1])}
        if output not in entity_output:
            entity_output.append({"value":str(user_entity[0]), "category":str(user_entity[1])})

    synonyms = list(dictionary.get_all_keywords().keys())
    for synonym in process.extractBests(text, synonyms, score_cutoff=90, scorer=fuzz.token_set_ratio):
        entities = dictionary.extract_keywords(synonym[0])
        output = {"value":str(entities[0][0]), "category":str(entities[0][1])}
        if output not in entity_output:
            entity_output.append({"value":str(entities[0][0]), "category":str(entities[0][1])})
    
    return entity_output
github robertbasic / pugdebug / pugdebug / models / file_search.py View on Github external
def is_fuzzy(self, current_path, search_string):
        return fuzz.partial_ratio(search_string, current_path) > 50