Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def spanish_swadesh_list(stemmed=True):
"""
Helper function that returns a list of strings with the stems of the
spanish Swadesh entries.
"""
try:
stemmer = SpanishStemmer(True)
except:
print("[!] Warning, Spanish stemmer could not be loaded!")
# load swadesh list
swadesh_file = os.path.join(rcParams['_path'],'data','swadesh','swadesh_spa.txt')
swadesh = codecs.open(swadesh_file, "r", "utf-8")
swadesh_entries = []
for line in swadesh:
line = line.strip()
for e in line.split(","):
e = e.strip()
if stemmed:
stem = stemmer.stem(e)
swadesh_entries.append(stem)
else:
swadesh_entries.append(e)
return swadesh_entries
def __call__(self, args):
for k, v in lingpy.settings.rcParams.items():
if not args.params or k in args.params:
print('{0:20} : {1}'.format(k, repr(v)))
def evaluate_string(self, string, tokens=False, **keywords):
setdefaults(keywords, stress=rcParams['stress'],
diacritics=rcParams['diacritics'], cldf=False)
if not tokens:
tokens = ipa2tokens(string)
score = 1
dist = self.dist['#']
prostring = prosodic_string(tokens, rcParams['art'], cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
if self.classes:
c = tokens2class(tokens, self.model, cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
teststring = list(zip(prostring, c))
else:
teststring = list(zip(prostring, tokens))
def __init__(self, filename, conf=''):
"""
Parse data regularly if the data has not been loaded from a pickled version.
"""
self.log = log.get_logger()
# try to load the data
internal_import = False
# check whether it's a dictionary from which we load
if isinstance(filename, dict):
input_data = filename
if 'filename' not in input_data:
self.filename = rcParams['filename']
internal_import = True
# make check for correct input, there was a bug with a wrong
# evaluation which is hopefully fixed by now
tmp_keys = [k for k in input_data if isinstance(k, int)]
if len(input_data[0]) != len(input_data[tmp_keys[0]]):
log.warning(input_data[0], input_data[tmp_keys[0]])
raise ValueError("[!] Wrong input format!") # pragma: no cover
# check whether it's another wordlist-object
elif hasattr(filename, '_data') and hasattr(filename, '_meta'):
input_data = dict([(key, [v for v in value]) for key, value in \
filename._data.items()])
input_data.update(filename._meta.items())
input_data[0] = [a for a, b in sorted(
filename.header.items(),
key=lambda x: x[1],
reverse=False)]
def get_classes(alm):
classes = []
residue = '<div class="residue {1}">{0}</div>'
for j, char in enumerate(alm):
if char == '-':
d = 'dolgo_GAP'
else:
d = 'dolgo_' + token2class(char, rcParams['dolgo'])
# bad check for three classes named differently
if d == 'dolgo__':
d = 'dolgo_X'
elif d == 'dolgo_1':
d = 'dolgo_TONE'
elif d == 'dolgo_0':
d = 'dolgo_ERROR'
classes += [residue.format(char, d)]
return ''.join(classes)
if tree_calc == 'upgma':
algorithm = cluster.upgma
elif tree_calc == 'neighbor':
algorithm = cluster.neighbor
newick = algorithm(matrix,taxa,distances)
tree = cg.LoadTree(treestring=newick)
if not filename:
return tree
else:
out = codecs.open(filename+'.nwk','w','utf-8')
out.write(str(tree))
out.close()
if rcParams['verbose']: print(rcParams['M_file_written'].format(filename,'nwk'))
test = "consensus"
The name of the column containing the test solutions.
Returns
-------
dist : float
The mean edit distance between gold and test reconstructions.
Notes
-----
This function has an alias ("med"). Calling it will produce the same
results.
"""
setdefaults(
keywords,
merge_vowels=rcParams['merge_vowels'],
model=rcParams['model'])
distances = []
for key, idxs in wordlist.get_etymdict(ref=ref).items():
# get only valid numbers for index-search
idx = [idx[0] for idx in idxs if idx != 0][0]
log.debug('{0}, {1}'.format(idx, idxs))
# get proto and consensus from wordlist
proto = wordlist[idx, gold]
consensus = wordlist[idx, test]
log.debug('{0}, {1}'.format(proto, consensus))
# modified : 2013-07-12 13:26
"""
Module provides namespaces and data for Evolaemp applications.
"""
__author__="Johann-Mattis List"
__date__="2013-07-12"
from ...settings import rcParams,rc
rc(schema='evolaemp')
ipa_diacritics = rcParams['diacritics']
ipa_vowels = rcParams['vowels']
ipa_tones = rcParams['tones']
sca = rcParams['sca']
asjp = rcParams['asjp']
dolgo = rcParams['dolgo']
art = rcParams['art']
_color = rcParams['_color']
def _get_pairwise_alignments(
self,
mode='global',
gop=-2,
scale=0.5,
factor=0.3,
restricted_chars='T_',
**keywords):
"""
Function calculates all pairwise alignments from the data.
"""
if 'transform' not in keywords:
keywords['transform'] = rcParams['align_transform']
# create array for alignments
self._alignments = [[0 for i in range(self.height)] for i in range(self.height)]
# create the distance matrix
self.matrix = []
# check for the mode, if sonority profiles are not chose, take the
# simple alignment function
if self._sonars:
make_pro_weights = partial(prosodic_weights, _transform=keywords['transform'])
# get the weights
if not hasattr(self, 'weights'):
self._weights = list(map(make_pro_weights, self._prostrings))
Select the fileformat to which the tree shall be written.
filename : str
Determine the name of the file to which the data shall be written.
Defaults to a timestamp.
figsize : tuple (default=(10,10))
Determine the size of the figure.
"""
default = dict(
ax_linewidth=0,
bg='black',
bottom=0.05,
change=lambda x: x ** 1.75,
edge_list=[],
figsize=(10, 10),
filename=rcParams['filename'],
fontweight='bold',
frameon=False,
ha='center',
labels=[],
left=0.05,
linecolor='black',
linewidth=5,
no_labels=False,
node_dict={},
nodecolor='black',
nodesize=10,
right=0.95,
start=0,
textcolor='white',
textsize='10',
top=0.95,