Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.add_entries(
self._segments, self._transcription, kw['tokenize'],
merge_vowels=kw['merge_vowels'],
expand_nasals=kw['expand_nasals'])
# add a debug procedure for tokens
if kw["check"]:
errors = list(_check_tokens(
[(key, self[key, self._segments]) for key in self],
cldf=kw['cldf']))
if errors:
lines = ["ID\tTokens\tError-Type"]
for key, msg, line in errors:
lines.append("{0}\t<{1}>\t{2}".format(
key, msg, ' '.join(line)))
util.write_text_file(kw['errors'], lines)
if kw["apply_checks"] or util.confirm(
"There were errors in the input data - exclude them?"):
self.output(
'tsv',
filename=self.filename + '_cleaned',
subset=True,
rows={"ID": "not in " + str([i[0] for i in errors])})
# load the data in a new LexStat instance
# and copy the __dict__
lexstat = LexStat(self.filename + '_cleaned.tsv', **kw)
lexstat._meta['errors'] = [i[0] for i in errors]
self.__dict__ = copy(lexstat.__dict__)
return
else:
log.info("No obvious errors found in the data.")
if fileformat in ['triple', 'triples', 'triples.tsv']:
return tsv2triple(self, keywords['filename'] + '.' + fileformat)
if fileformat in ['paps.nex', 'paps.csv']:
paps = self.get_paps(
ref=keywords['ref'], entry=keywords['entry'], missing=keywords['missing'])
kw = dict(filename=keywords['filename'] + '.paps')
if fileformat == 'paps.nex':
kw['missing'] = keywords['missing']
return pap2nex(self.cols, paps, **kw)
return pap2csv(self.cols, paps, **kw)
# simple printing of taxa
if fileformat == 'taxa':
assert hasattr(self, 'taxa')
return util.write_text_file(keywords['filename'] + '.taxa', self.cols)
# csv-output
if fileformat in ['csv', 'qlc', 'tsv']:
# get the header line
header = sorted(
[s for s in set(self._alias.values()) if s in self._header],
key=lambda x: self._header[x])
header = [h.upper() for h in header]
self._meta.setdefault('taxa', self.cols)
# get the data, in case a subset is chosen
if not keywords['subset']:
# write stuff to file
return wl2qlc(header, self._data, **keywords)
def _export_score_dict(score_dict):
"""
Function exports a scoring dictionary to a csv-file.
@todo: This function can be better ported to another file.
"""
letters = list(set([key[0] for key in score_dict.keys()]))
rows = [['+'] + letters]
for l1 in letters:
rows.append([l1] + [str(score_dict[(l1, l2)]) for l2 in letters])
util.write_text_file('score_dict.csv', '\n'.join('\t'.join(row) for row in rows))
A ~lingpy.thirdparty.cogent.tree.PhyloNode object for handling tree
files.
"""
if tree_calc == 'upgma':
algorithm = cluster.upgma
elif tree_calc == 'neighbor':
algorithm = cluster.neighbor
else:
raise ValueError(tree_calc)
tree = cg.LoadTree(treestring=algorithm(matrix, taxa, distances))
if not filename:
return tree
util.write_text_file(filename + '.nwk', text_type(tree))
def _graph_or_file(graph, filename):
if filename:
util.write_text_file(filename + '.gml', nx.generate_gml(graph))
return
return graph
* VALUE (the entry in the TSV file)
"""
tstore = []
for head in wordlist.header:
log.debug('tsv2triple: ' + head)
for key in wordlist:
tstore.append((key, head.upper(), wordlist[key, head]))
if outfile:
out = ''
for a, b, c in tstore:
if isinstance(c, list):
c = ' '.join([text_type(x) for x in c])
if c != '-':
out += '{0}\t{1}\t{2}\n'.format(a, b, c)
util.write_text_file(outfile, out, normalize='NFC')
return tstore
text = _template.format(
matrix=_matrix,
ntax=wordlist.width,
nchar=len(matrix[0]),
gap=gap, missing=missing,
dtype='RESTRICTION' if mode == 'MRBAYES' else 'STANDARD',
commands=block.format(blockname, assumptions),
custom=block.format(custom_name, '\n'.join(custom)) if custom else '',
symbols=symbols, chars=charblock
)
text = text.replace("\t", " " * 4) # normalise tab-stops
for i, (cogid, concept) in enumerate(concepts, 1):
text += '\n[MATRIX:{0}=COGID:{1}=CONCEPT:{2}]'.format(i, cogid, concept)
if filename:
util.write_text_file(filename, text)
return text
recB += [1.0]
preP += [1.0]
recP += [1.0]
bp = sum(preB) / len(preB)
br = sum(recB) / len(recB)
bf = 2 * (bp * br) / (bp + br)
pp = sum(preP) / len(preP)
pr = sum(recP) / len(recP)
pf = 2 * (pp * pr) / (pp + pr)
as_string('\n'.join(lines),
pprint=pprint)
if tofile:
write_text_file(filename + '.diff', lines)
return (bp, br, bf), (pp, pr, pf)