Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# first check for alms of different length
alm_lens = [len(alm) for alm in alm_clone]
if alm_lens.count(1) == len(alm_lens):
for i, alm in enumerate(alm_clone):
alm_clone[i] = alm[0].split(' ')
alm_lens[i] = len(alm_clone[i])
if len(set(alm_lens)) > 1:
max_len = max(alm_lens)
for i, alm in enumerate(alm_clone):
new_alm = alm + ['-' for x in range(max_len)]
alm_clone[i] = new_alm[:max_len]
# then check for alms consisting only of gaps
cols = misc.transpose(alm_clone)
idxs = []
for i, col in enumerate(cols):
if set(col) == set('-'):
idxs += [i]
for idx in idxs[::-1]:
for i, alm in enumerate(alm_clone):
del alm_clone[i][idx]
if alignment != alm_clone:
lgtxt = 'Modified the alignment:\n'
for i in range(len(alignment)):
lgtxt += '[!] ' + ' '.join(alignment[i]) + '->'
lgtxt += ' '.join(alm_clone[i]) + '\n'
log.debug(lgtxt)
return alm_clone
else:
return alignment
profileA, profileB, gop, scale, self.scorer, mode, gap_weight)
if return_similarity:
return sim
# trace the gaps inserted in both aligned profiles and insert them
# in the original profiles
for i in range(len(almA)):
if almA[i] == '-':
profileA.insert(i, o * ['X'])
elif almB[i] == '-':
profileB.insert(i, p * ['X'])
# invert the profiles and the weight matrices by turning columns
# into rows and rows into columns
profileA = misc.transpose(profileA)
profileB = misc.transpose(profileB)
# return the aligned profiles and weight matrices
if iterate:
return profileA, profileB
return profileA + profileB
# solution, but for testing it hopefully suffices...)
i = len(matrix) - 1
for score in gap_array[::-1]:
if score >= 1 - average + average / 4:
del matrix[i]
i -= 1
# check for classes
if classes:
# if classes are passed as array, we use this array as is
if isinstance(classes, list):
pass
# if classes is a Model-object
elif hasattr(msa, 'ipa2cls'):
msa.ipa2cls(model=keywords['model'])
classes = misc.transpose(msa.classes)
# if no tree is passed, it is a simple majority-rule principle that outputs
# the consensus string
cons = []
if not classes:
for col in matrix:
count = Counter(col)
if rcParams['gap_symbol'] in count:
count[rcParams['gap_symbol']] *= keywords['gap_scale']
cons.append(count.most_common(1)[0][0])
elif classes:
for i, col in enumerate(classes):
tmpA, tmpB = Counter(col), defaultdict(int)
for j, c in enumerate(col):
tmpB[matrix[i][j]] += 1
# half the weight of gaps
if rcParams['gap_symbol'] in tmpA:
if return_similarity:
return sim
# trace the gaps inserted in both aligned profiles and insert them
# in the original profiles
for i in range(len(almA)):
if almA[i] == '-':
profileA.insert(i, o * ['X'])
elif almB[i] == '-':
profileB.insert(i, p * ['X'])
# invert the profiles and the weight matrices by turning columns
# into rows and rows into columns
profileA = misc.transpose(profileA)
profileB = misc.transpose(profileB)
# return the aligned profiles and weight matrices
if iterate:
return profileA, profileB
return profileA + profileB
def _align_profile(
self,
almsA,
almsB,
mode='global',
gop=-3,
scale=0.5,
factor=0,
gap_weight=0.5,
return_similarity=False,
iterate=False,
restricted_chars="T_"):
profileA = misc.transpose(almsA)
profileB = misc.transpose(almsB)
# calculate profile length and profile depth for both profiles
o = len(profileA[0])
p = len(profileB[0])
# create the weights by which the gap opening penalties will be modified
sonarA = [[self._get(char, value='_sonars', error=('X', 0))
for char in line] for line in profileA]
sonarB = [[self._get(char, value='_sonars', error=('X', 0))
for char in line] for line in profileB]
# get the consensus string for the sonority profiles
try:
consA = [
int(sum([k for k in col if k != 0]) /
def _talign_profile(
self,
almsA,
almsB,
mode='global',
gop=-3,
scale=0.5,
gap_weight=0.5,
return_similarity=False,
iterate=False):
"""
Align profiles for tokens, not sound classes.
"""
profileA = misc.transpose(almsA)
profileB = misc.transpose(almsB)
# calculate profile length and profile depth for both profiles
o = len(profileA[0])
p = len(profileB[0])
# carry out the alignment
almA, almB, sim = talign.align_profile(
profileA, profileB, gop, scale, self.scorer, mode, gap_weight)
if return_similarity:
return sim
# trace the gaps inserted in both aligned profiles and insert them
# in the original profiles
for i in range(len(almA)):
line = []
for taxon in wordlist.taxa:
states = set(taxon_to_cognate_set.get(taxon, ['-']))
# exclude the case len(taxon_to_cognate_set[taxon]) == 0
if len(states) == 1:
line.append(char_map[states.pop()])
elif not states:
line.append(missing)
else:
line.append('({0})'.format(
"".join([char_map[x] for x in sorted(states)])))
matrix.append(line)
return misc.transpose(matrix)
if return_similarity:
return sim
# trace the gaps inserted in both aligned profiles and insert them
# in the original profiles
for i in range(len(almA)):
if almA[i] == '-':
profileA.insert(i, o * ['X'])
elif almB[i] == '-':
profileB.insert(i, p * ['X'])
# invert the profiles and the weight matrices by turning columns
# into rows and rows into columns
profileA = misc.transpose(profileA)
profileB = misc.transpose(profileB)
# return the aligned profiles and weight matrices
if iterate:
return profileA, profileB
return profileA + profileB