Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
j += 1
words += [s]
if len(words) < kw['rands']:
log.warning(
"Could not generate enough distinct words for"
" the random distribution. "
"Will expand automatically")
while len(words) < kw['rands']:
words += [words[random.randint(0, len(words)-1)]]
seqs[taxon], pros[taxon], weights[taxon] = [], [], []
for w in words:
cls = tokens2class(w.split(' '), self.model,
cldf=self._cldf)
pros[taxon].append(prosodic_string(w.split(' ')))
weights[taxon].append(prosodic_weights(pros[taxon][-1]))
seqs[taxon].append([
'{0}.{1}'.format(c, p) for c, p in zip(
cls,
[self._transform[pr] for pr in pros[taxon][-1]]
)])
with util.pb(
desc='RANDOM CORRESPONDENCE CALCULATION',
total=tasks) as progress:
for (i, tA), (j, tB) in util.multicombinations2(
enumerate(self.cols)):
progress.update(1)
log.info(
"Calculating random alignments"
" for pair {0}/{1}.".format(tA, tB)
self.log.warning("There are empty segments in the consensus.")
self.log.info(
'',
extra=dict(lines=[' '.join([str(x) for x in cons])
for cons in [consA, consB]]))
except:
self.log.error(
"Failed to compute the consensus string.",
extra=dict(lines=[
sonarA, sonarB,
almsA[0], [self._get(n_, 'tokens') for n_ in almsA[0]],
almsB[0], [self._get(n_, 'tokens') for n_ in almsB[0]]
]))
prosA = prosodic_string(consA)
prosB = prosodic_string(consB)
self.log.debug('', extra=dict(lines=[(prosA, consA), (prosB, consB)]))
weightsA, weightsB = prosodic_weights(prosA), prosodic_weights(prosB)
# carry out the alignment
almA, almB, sim = calign.align_profile(
profileA,
profileB,
weightsA,
weightsB,
prosA,
prosB,
gop,
scale,
factor,
self.scorer,
# go on with the analysis
cons_dict = {}
with util.pb(desc='CONSENSUS', total=len(self.etd[ref])) as progress:
for cog in self.etd[ref]:
progress.update(1)
if cog in self.msa[ref]:
log.debug("Analyzing cognate set number '{0}'...".format(cog))
# temporary solution for sound-class integration
if classes == True:
_classes = []
if weights:
keywords['weights'] = prosodic_weights(
prosodic_string(self.msa[ref][cog]['_sonority_consensus'])
)
else:
keywords['weights'] = [
1.0 for i in range(len(self.msa[ref][cog]['alignment']))]
for alm in self.msa[ref][cog]['alignment']:
cls = [c for c in tokens2class(
alm,
keywords['model'],
stress=keywords['stress'],
cldf=keywords['cldf'],
diacritics=keywords['diacritics']
) if c != '0']
cls = class2tokens(cls, alm)
_classes.append(cls)
_classes = misc.transpose(_classes)
if sonar and sonars: # == list:
self._sonars = [sonars[key] for key in keys]
self._prostrings = list([prosodic_string(s) for s in self._sonars])
# create sonars if the argument is true
elif sonar:
self._sonars = list(
map(lambda x: [int(t) for t in tokens2class(
x, rcParams['art'], stress=rcParams['stress'])],
[self.tokens[key] for key in keys]))
if log.get_level() <= logging.DEBUG:
for _i, _sonar in enumerate(self._sonars):
if 0 in _sonar:
self.log.warning(
"Sequence {0} contains unrecognized characters!".format(
self.seqs[self.int2ext[_i][0]]))
self._prostrings = list([prosodic_string(s) for s in self._sonars])
# do nothing if no arguments are passed
else:
self._sonars = False
self._prostrings = False
# create a scoredict for the calculation of alignment analyses
# append the scorer if it is given with the model
def scorer(x, y):
if classes:
return self.model.scorer[x, y]
if scoredict:
return scoredict[x, y]
return 1.0 if x == y else -1.0
self.scoredict = {}
for (i, seqA), (j, seqB) in combinations_with_replacement(
# start filling the dictionary
for i, w in enumerate(words):
# check for tokenized string
if not tokens:
tk = ipa2tokens(w, **keywords)
else:
tk = w[:]
self.tokens += [tk]
# create prosodic string
if prostrings:
p = prostrings[i]
else:
tt = tokens2class(tk, rcParams['art'])
p = prosodic_string(
tk,
rcParams['art'],
cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
# create classes
if classes:
c = tokens2class(tk, class_model, cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
bigrams = list(zip(p, c))
self.classes += [c]
else:
# zip the stuff
bigrams = list(zip(p, tk))
len([k for k in col if k >= 0]) + 0.5) for col in sonarB]
self.log.warning("There are empty segments in the consensus.")
self.log.info(
'',
extra=dict(lines=[' '.join([str(x) for x in cons])
for cons in [consA, consB]]))
except:
self.log.error(
"Failed to compute the consensus string.",
extra=dict(lines=[
sonarA, sonarB,
almsA[0], [self._get(n_, 'tokens') for n_ in almsA[0]],
almsB[0], [self._get(n_, 'tokens') for n_ in almsB[0]]
]))
prosA = prosodic_string(consA)
prosB = prosodic_string(consB)
self.log.debug('', extra=dict(lines=[(prosA, consA), (prosB, consB)]))
weightsA, weightsB = prosodic_weights(prosA), prosodic_weights(prosB)
# carry out the alignment
almA, almB, sim = calign.align_profile(
profileA,
profileB,
weightsA,
weightsB,
prosA,
prosB,
gop,
scale,
factor,
"apply_checks": False,
"defaults": False,
"no_bscorer": False,
"errors": "errors.log",
"expand_nasals": False,
"segments": "tokens",
"numbers": "numbers",
"classes": "classes",
"transcription": "ipa",
"prostrings": "prostrings",
"weights": "weights",
"sonars": "sonars",
"langid": "langid",
"duplicates": "duplicates",
"tokenize": ipa2tokens,
"get_prostring": prosodic_string,
"row": "concept",
"col": "doculect",
"conf": None,
'cldf': True
}
kw.update(keywords)
# make segments, numbers and classes persistent across classes
self._segments = kw['segments']
self._numbers = kw['numbers']
self._classes = kw['classes']
self._weights = kw['weights']
self._prostrings = kw['prostrings']
self._sonars = kw['sonars']
self._langid = kw['langid']
self._duplicates = kw['duplicates']