Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# compute input vars for fuzzy
# height ratio wrt main text height
hrii=hii/main_height
# lowest y0
y0ii=[ljj.y0 for ljj in gii]
y0ii=np.min(y0ii)/page_h
# number of words
nwordsii=len(tii.split(' '))
# similartiy measure between a predefined list of non-title words
notitlefmii=[fuzz.token_set_ratio(tii,jj) for jj in NON_TITLE_LIST]
notitlefmii=np.mean(notitlefmii)
# similarity measure between title obtained from meta data
if doctitle:
metatitlefmii=fuzz.ratio(tii, doctitle)
gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii,metatitlefmii))
else:
gr_lines.append((tii,hii,y0ii,hrii,nwordsii,notitlefmii))
#pprint(gr_lines)
#----------------Do fuzzy logic----------------
fuzz_scores=FCTitleGuess(gr_lines, doctitle)
title_idx=np.argmax(fuzz_scores)
title_guess=gr_lines[title_idx]
title_y0=title_guess[2]*page_h
title_x0=groups[title_idx][0].x0
#----------------Guess author list----------------
top_lines=line_dict.keys()
def top_partial_ratio(group,trustedUserWeights): #expecting group
"""Return the best partial ratio match from fuzzywuzzy module."""
def convLine(line):
line = '\n'.join([' '.join(ln.split()) for ln in str(line).splitlines()])
return line
values = group.apply(convLine)
# generate user lookup dict
userAttribution = values.reset_index(level=0, drop=True, inplace = False).to_dict()
# invert it to {text that was ntered:user who entered it}
userAttribution = {i[1]:i[0] for i in userAttribution.items()}
scores = []
for combo in combinations(values, 2):
score = fuzz.partial_ratio(combo[0], combo[1])
value = combo[0] if len(combo[0]) >= len(combo[1]) else combo[1]
userName = userAttribution.get(value) # lookup the user who wrote the value
scoreWeight = trustedUserWeights.get(userName, 0) # lookup that user's weight
score = score + scoreWeight # add bonus points
if score > 100: # enforce a ceiling
score = 100
scores.append(FuzzyRatioScore(score, value))
scores = sorted(scores,
reverse=True,
key=lambda s: (s.score, len(s.value)))
return scores[0]
def find_page(query):
matches = process.extract(
query,
docs.keys(),
scorer=fuzz.partial_ratio,
limit=99999
)
for match, score in matches:
if query.upper() == ''.join(filter(str.isupper, match)) or match.lower().startswith(query.lower()):
return match
return matches[0][0]
allInstances = self.instances()
matchedInstances = set()
for host in targets:
for instance in allInstances:
names = [instance.name]
if instance.aliases != None:
names += list(instance.aliases)
for name in names:
if host.lower() == name.lower():
matchedInstances.add((100, instance))
elif partial and host.lower() in name.lower():
matchedInstances.add((99, instance))
if fuzzy:
score = fuzz.partial_ratio(host.lower(), name.lower())
if score > 85 or host.lower() in name.lower():
matchedInstances.add((score, instance))
# it is possible for the same instance to be matched, if so, it should only
# appear on the return list once (still ordered by the most probable match)
return list(collections.OrderedDict([(v, None) for k, v in sorted(list(matchedInstances))]).keys())
def _translateUtteranceIntoInputVector(self,utter, call):
#Metainfo+BOW+SLOT/Value matching result
#--CLASS
convClassInput=None
if (utter["transcript"] not in LSTMWithBOWTracker.dictFuzzyMatchingResult):
convClassInput=[0.0]*self.TOTALSIZEOFCLASSFeature
for topic in self.tagsets.keys():
for slot in self.tagsets[topic].keys():
convClassInput[self.dictIn["CLASS_"+slot]]=fuzz.partial_ratio(slot, utter["transcript"])
for value in self.tagsets[topic][slot]:
convClassInput[self.dictIn["CLASS_"+value]]=fuzz.partial_ratio(value, utter["transcript"])
LSTMWithBOWTracker.dictFuzzyMatchingResult[utter["transcript"]]=copy.deepcopy(convClassInput)
else:
convClassInput=LSTMWithBOWTracker.dictFuzzyMatchingResult[utter["transcript"]]
#-input
convSentenceInput=None
if not self.isUseSentenceRepresentationInsteadofBOW:
convSentenceInput=[0.0]*self.TOTALSIZEOFSENTENCEFeature
convSentenceInput[self.dictIn["SPEAKER_"+utter["speaker"]]]=1.0
convSentenceInput[self.dictIn["TOPIC_"+utter["segment_info"]["topic"]]]=1.0
splitedtrans=self.__getRegurelisedBOW(utter["transcript"])
for word in splitedtrans:
if ("WORD_"+word) in self.dictIn:#IGNORING OOV
convSentenceInput[self.dictIn["WORD_"+word]]=1.0
convSentenceInput[self.dictIn["BIO_"+utter['segment_info']['target_bio']]]=1.0
elif self.isUseSentenceRepresentationInsteadofBOW:
if len(faqList) > 0:
em = discord.Embed(title="List of FAQ tags",
description=", ".join(faqList).title(),
colour=discord.Colour.gold())
else:
em = discord.Embed(title="Error",
description="This server does not have any defined FAQ tags.",
colour=discord.Colour.red())
elif query in await faqdb(ctx, keys=True):
em = await embed_faq(ctx, self.bot, query)
else:
closeItems = []
for item in await faqdb(ctx, keys=True):
itemRatio = fuzz.ratio(query, item)
if itemRatio >= 75:
closeItems.append((itemRatio, item.title()))
if len(closeItems) > 0:
if len(closeItems) == 1:
em = await embed_faq(ctx, self.bot, closeItems[0][1].lower(),
title=f"Could not find \"{query.title()}\" in FAQ tags. Did you mean \"{closeItems[0][1]}\"?",
color=discord.Colour.orange())
else:
em = discord.Embed(title=f"Could not find \"{query.title()}\" in FAQ tags.",
description=f"Did you mean {', '.join([item[1] for item in closeItems])}?",
colour=discord.Colour.orange())
else:
em = discord.Embed(title="Error",
description=f"Could not find \"{query.title()}\" or any similarly named tags in FAQ tags." + "\n" +
f"Would you like to search [the wiki](https://wiki.factorio.com/index.php?search={query.replace(' ', '%20')})?",
colour=discord.Colour.red())
def match(instructor):
l = not lastname or fuzz.ratio(lastname.lower(), instructor[0].lower()) >= threshold
f = not firstname or fuzz.ratio(firstname.lower(), instructor[1].lower()) >= threshold
return l and f
situation_title = situation['name'] if situation.get('name') else ''
situation_desc = situation['description'] if situation.get('description') else None
if situation_desc:
if not additional_keywords:
print_nfo("[Google Play Music] [{0}] '{1} : {2}'." \
.format(arg,
situation_title,
situation_desc))
else:
print_nfo("[Google Play Music] [{0} - {1}] '{2} : {3}'." \
.format(arg,
additional_keywords \
if additional_keywords else '(no keywords)',
situation_title,
situation_desc))
if fuzz.partial_ratio(additional_keywords, situation_title) > 50:
situation_titles.append(situation_title)
situation_dict[situation_title] = situation
if len(situation_titles) > 1:
situation_title = process.extractOne(additional_keywords, situation_titles)[0]
situation = situation_dict[situation_title]
elif len(situation_titles) == 1:
situation_title = situation_titles[0]
situation = situation_dict[situation_title]
if situation:
print_wrn("[Google Play Music] Playing '{0}'." \
.format(to_ascii(situation_title)))
self.__enqueue_station_unlimited_v2(situation)
if not situation:
def extract_entity(text, dictionary):
entity_output = []
text = create_spacy_clean(text)
for user_entity in dictionary.extract_keywords(text):
output = {"value":str(user_entity[0]), "category":str(user_entity[1])}
if output not in entity_output:
entity_output.append({"value":str(user_entity[0]), "category":str(user_entity[1])})
synonyms = list(dictionary.get_all_keywords().keys())
for synonym in process.extractBests(text, synonyms, score_cutoff=90, scorer=fuzz.token_set_ratio):
entities = dictionary.extract_keywords(synonym[0])
output = {"value":str(entities[0][0]), "category":str(entities[0][1])}
if output not in entity_output:
entity_output.append({"value":str(entities[0][0]), "category":str(entities[0][1])})
return entity_output
def is_fuzzy(self, current_path, search_string):
return fuzz.partial_ratio(search_string, current_path) > 50