Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ahmad(self):
text = u'əhməd'
self.assertEqual('ahmad', ascii_text(text))
def test_empty(self):
self.assertEqual(None, slugify(None))
self.assertEqual(None, ascii_text(None))
self.assertEqual(None, latinize_text(None))
self.assertEqual(None, normalize(None))
self.assertEqual(None, normalize(''))
self.assertEqual(None, normalize(' '))
def test_azeri(self):
text = u'FUAD ALIYEV ƏHMƏD OĞLU'
self.assertEqual('FUAD ALIYEV AHMAD OGLU', ascii_text(text))
'match_phrase_prefix': {'name': prefix.strip()}
}
if schemas is not None and len(schemas):
q = add_filter(q, {'terms': {'schema': schemas}})
# TODO: is this correct? should we allow filter by dataset entities?
# XXXX broken use $physical
q = add_filter(q, {'terms': {'collection_id': authz.collections_read}})
q = {
'size': size,
'sort': [{'doc_count': 'desc'}, '_score'],
'query': q,
'_source': ['name', 'schema', 'fingerprints', 'doc_count']
}
ref = ascii_text(prefix)
result = es.search(index=es_index, doc_type=TYPE_ENTITY, body=q)
for res in result.get('hits', {}).get('hits', []):
ent = res.get('_source')
terms = [ascii_text(t) for t in ent.pop('fingerprints', [])]
ent['match'] = ref in terms
ent['score'] = res.get('_score')
ent['id'] = res.get('_id')
options.append(ent)
return {
'prefix': prefix,
'results': options
}
def __init__(self, manager, name, email):
self.email = ascii_text(stringify(email))
self.name = stringify(name)
if not registry.email.validate(self.email):
self.email = None
if registry.email.validate(self.name):
self.email = self.email or ascii_text(self.name)
self.name = None
# This should be using formataddr, but I cannot figure out how
# to use that without encoding the name.
self.label = None
if self.name is not None and self.email is not None:
self.label = '%s <%s>' % (self.name, self.email)
elif self.name is None and self.email is not None:
self.label = self.email
elif self.email is None and self.name is not None:
self.label = self.name
# TODO: is this correct? should we allow filter by dataset entities?
# XXXX broken use $physical
q = add_filter(q, {'terms': {'collection_id': authz.collections_read}})
q = {
'size': size,
'sort': [{'doc_count': 'desc'}, '_score'],
'query': q,
'_source': ['name', 'schema', 'fingerprints', 'doc_count']
}
ref = ascii_text(prefix)
result = es.search(index=es_index, doc_type=TYPE_ENTITY, body=q)
for res in result.get('hits', {}).get('hits', []):
ent = res.get('_source')
terms = [ascii_text(t) for t in ent.pop('fingerprints', [])]
ent['match'] = ref in terms
ent['score'] = res.get('_score')
ent['id'] = res.get('_id')
options.append(ent)
return {
'prefix': prefix,
'results': options
}
def __init__(self, manager, name, email):
self.email = ascii_text(stringify(email))
self.name = stringify(name)
if not registry.email.validate(self.email):
self.email = None
if registry.email.validate(self.name):
self.email = self.email or ascii_text(self.name)
self.name = None
# This should be using formataddr, but I cannot figure out how
# to use that without encoding the name.
self.label = None
if self.name is not None and self.email is not None:
self.label = '%s <%s>' % (self.name, self.email)
elif self.name is None and self.email is not None:
self.label = self.email
elif self.email is None and self.name is not None:
self.label = self.name
self.entity = None
if self.email is not None:
key = self.email.lower().strip()
fragment = safe_fragment(self.label)
def text_query_string(text, literal=False):
if text is None or not len(text.strip()):
return match_all()
if literal:
text = '"%s"' % ascii_text(text)
return {
'query_string': {
'query': text,
'fields': ['text'],
'default_operator': 'AND',
'use_dis_max': True
}
def normalize_value(self, value):
value = collapse_spaces(value)
return value, ascii_text(value)
# Add inverted properties. This takes all the properties
# of a specific type (names, dates, emails etc.)
invert = prop.type.index_invert
if invert:
if invert not in data:
data[invert] = []
for norm in prop.type.normalize(values):
if norm not in data[invert]:
data[invert].append(norm)
data['fingerprints'] = list(set(data['fingerprints']))
# Add latinised names
names = data.get('names', [])
for name in list(names):
names.append(ascii_text(name))
data['names'] = list(set(names))
# Get implied schemata (i.e. parents of the actual schema)
data['schema'] = schema.name
data['schemata'] = [p.name for p in schema.schemata if not p.hidden]
# Second name field for non-tokenised sorting.
if 'name' in data:
data['name_sort'] = data.get('name')
return data