Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if line.startswith('@'):
key, value = [s.strip() for s in line[1:].split(':', 1)]
if key == 'tree':
meta["tree"] = cg.LoadTree(treestring=value)
elif key == 'json':
for j1, j2 in json.loads(value).items():
meta[j1] = j2
else:
if key not in meta:
meta[key] = value
else:
if isinstance(meta[key], list):
meta[key].append(value)
else:
log.warning(
"Key '{0}' in input file is not unique! Use JSON-format for "
"these datatypes!".format(key))
meta[key] = [meta[key]] + [value]
# line starts with complex stuff
elif line.startswith('<'):
tmp = line[1:line.index('>')]
# check for specific keywords
if ' ' in tmp:
dtype = tmp.split(' ')[0]
keys = {k: v[1:-1]
for k, v in [key.split('=') for key in tmp.split(' ')[1:]]}
else:
dtype = tmp.strip()
keys = {}
tmp = []
'align="center" bgcolor="{1}"><font color="{2}">{0}</font>\n'
# start with filling the taxon
out = ''
# go on with the colors
for i, char in enumerate(string):
try:
c = rcParams['_color'][char]
fg = '#000000'
except:
try:
c = rcParams['_color'][char[0]]
fg = '#000000'
except KeyError:
log.warning("Unknown character '" + char + "', press ANY key to continue. ")
c = '#ffffff'
fg = '#eb3410'
if i in swaps:
out += td_swap.format(char, c, fg)
else:
out += td_residue.format(char, c, fg)
return out + '<table></table>'
self.log = log.get_logger()
# try to load the data
internal_import = False
# check whether it's a dictionary from which we load
if isinstance(filename, dict):
input_data = filename
if 'filename' not in input_data:
self.filename = rcParams['filename']
internal_import = True
# make check for correct input, there was a bug with a wrong
# evaluation which is hopefully fixed by now
tmp_keys = [k for k in input_data if isinstance(k, int)]
if len(input_data[0]) != len(input_data[tmp_keys[0]]):
log.warning(input_data[0], input_data[tmp_keys[0]])
raise ValueError("[!] Wrong input format!") # pragma: no cover
# check whether it's another wordlist-object
elif hasattr(filename, '_data') and hasattr(filename, '_meta'):
input_data = dict([(key, [v for v in value]) for key, value in \
filename._data.items()])
input_data.update(filename._meta.items())
input_data[0] = [a for a, b in sorted(
filename.header.items(),
key=lambda x: x[1],
reverse=False)]
internal_import = True
self.filename = rcParams['filename']
# or whether the data is an actual file
elif isinstance(filename, string_types) and os.path.isfile(filename):
input_data = read_qlc(filename)
self.filename = filename
alignment = alignment or self._alignment
ref = ref or self._ref
if alignment not in self.header:
raise ValueError(
'No alignments found in your data. ' +
'You should carry out an alignment analysis first!')
# dictionary to add new alignments class afterwards for providing quick
# access
D = {}
for k, d in self._meta['msa'][ref].items():
ralms = reduce_alignment(d[alignment])
if len(ralms[0]) != len(d[alignment][0]):
log.warning('Found an alignment that could be reduced.')
d['_' + alignment] = ralms
for idx, alm in zip(d['ID'], d['_' + alignment]):
D[idx] = alm
for k in self:
if k not in D:
D[k] = ['']
self.add_entries('_' + alignment, D, lambda x: x)
log.warning(
"An identical scoring function has already been calculated, "
"force recalculation by setting 'force' to 'True'.")
return
# check for attribute
if hasattr(self, 'params') and not kw['force']:
if 'cscorer' in self.params:
if self.params['cscorer'] == params:
log.warning(
"An identical scoring function has already been "
"calculated, force recalculation by setting 'force'"
" to 'True'.")
return
else:
log.warning(
"A different scoring function has already been calculated, "
"overwriting previous settings.")
# store parameters
self.params = {'cscorer': params}
self._meta['params'] = self.params
self._stamp += "# Parameters: " + parstring + '\n'
# get the correspondence distribution
self._corrdist = self._get_partial_corrdist(**kw)
# get the random distribution
self._randist = self._get_partial_randist(**kw)
# get the average gop
gop = sum([m[1] for m in kw['modes']]) / len(kw['modes'])
while idx in D:
idx += 1
if not D[0]:
columns = list(s.keys())
D[0] = [c.lower() for c in columns]
D[idx] = [datatypes.get(
namespace.get(
column,
''),
lambda x: x)(
s.get(column, '')) for column in columns]
D[0] = [namespace.get(c, c) for c in columns]
if len(D[0]) != len(set(D[0])):
log.warning('|'.join(columns))
log.warning('|'.join(D[0]))
raise ValueError('name space clashes, cannot parse data')
# convert to wordlist and return
return cls(D, **kwargs)
else:
# For most LingPy applications, it might be best to see whether we got
# a Wordlist module.
raise ValueError("LingPy has no procedures for CLDF {:} data.".format(
dataset.module))
'{vscale:.2f}',
'{runs}',
'{scoring_threshold:.2f}',
'{modestring}',
'{factor:.2f}',
'{restricted_chars}',
'{method}',
'{preprocessing}',
'{preprocessing_threshold}',
'{unexpected:.2f}',
'{unattested:.2f}'
]).format(**params)
# check for existing attributes
if hasattr(self, 'cscorer') and not kw['force']:
log.warning(
"An identical scoring function has already been calculated, "
"force recalculation by setting 'force' to 'True'.")
return
# check for attribute
if hasattr(self, 'params') and not kw['force']:
if 'cscorer' in self.params:
if self.params['cscorer'] == params:
log.warning(
"An identical scoring function has already been "
"calculated, force recalculation by setting 'force'"
" to 'True'.")
return
else:
log.warning(
"A different scoring function has already been calculated, "
)
kw.update(keywords)
function = self._distance_method(
method, scale=scale, factor=factor,
restricted_chars=restricted_chars, mode=mode, gop=gop,
restriction=restriction, external_scorer=kw['external_scorer'])
concepts = [concept] if concept else sorted(self.rows)
for c in concepts:
log.info("Analyzing words for concept <{0}>.".format(c))
indices = self.get_list(row=c, flat=True)
matrix = []
for idxA, idxB in util.combinations2(indices):
try:
d = function(idxA, idxB)
except ZeroDivisionError:
log.warning(
"Encountered Zero-Division for the comparison of "
"{0} ({2}) and {1} ({3})".format(
''.join(self[idxA, self._segments]),
''.join(self[idxB, self._segments]),
idxA, idxB
))
d = 100
matrix += [d]
matrix = misc.squareform(matrix)
if not concept:
yield c, indices, matrix
else:
yield matrix
'{vscale:.2f}',
'{runs}',
'{scoring_threshold:.2f}',
'{modestring}',
'{factor:.2f}',
'{restricted_chars}',
'{method}',
'{preprocessing}',
'{preprocessing_threshold}',
'{unexpected:.2f}',
'{unattested:.2f}'
]).format(**params)
# check for existing attributes
if hasattr(self, 'cscorer') and not kw['force']:
log.warning(
"An identical scoring function has already been calculated, "
"force recalculation by setting 'force' to 'True'.")
return
# check for attribute
if hasattr(self, 'params') and not kw['force']:
if 'cscorer' in self.params:
if self.params['cscorer'] == params:
log.warning(
"An identical scoring function has already been "
"calculated, force recalculation by setting 'force'"
" to 'True'.")
return
else:
log.warning(
"A different scoring function has already been calculated, "