Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if k > kw['limit']:
break
else:
j += 1
words += [s]
if len(words) < kw['rands']:
log.warning(
"Could not generate enough distinct words for"
" the random distribution. "
"Will expand automatically")
while len(words) < kw['rands']:
words += [words[random.randint(0, len(words)-1)]]
seqs[taxon], pros[taxon], weights[taxon] = [], [], []
for w in words:
cls = tokens2class(w.split(' '), self.model,
cldf=self._cldf)
pros[taxon].append(prosodic_string(w.split(' ')))
weights[taxon].append(prosodic_weights(pros[taxon][-1]))
seqs[taxon].append([
'{0}.{1}'.format(c, p) for c, p in zip(
cls,
[self._transform[pr] for pr in pros[taxon][-1]]
)])
with util.pb(
desc='RANDOM CORRESPONDENCE CALCULATION',
total=tasks) as progress:
for (i, tA), (j, tB) in util.multicombinations2(
enumerate(self.cols)):
progress.update(1)
log.info(
lambda x: ''.join(tokens2class(x, kw["model"], cldf=self._cldf,
stress=rcParams['stress'])))
# create IDs for the languages
# start filling the dictionary
for i, w in enumerate(words):
# check for tokenized string
if not tokens:
tk = ipa2tokens(w, **keywords)
else:
tk = w[:]
self.tokens += [tk]
# create prosodic string
if prostrings:
p = prostrings[i]
else:
tt = tokens2class(tk, rcParams['art'])
p = prosodic_string(
tk,
rcParams['art'],
cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
# create classes
if classes:
c = tokens2class(tk, class_model, cldf=keywords['cldf'],
diacritics=keywords['diacritics'],
stress=keywords['stress'])
bigrams = list(zip(p, c))
self.classes += [c]
else:
# zip the stuff
bigrams = list(zip(p, tk))
classify = lambda x: tokens2class(x, self.model)
if cog in self.msa[ref]:
log.debug("Analyzing cognate set number '{0}'...".format(cog))
# temporary solution for sound-class integration
if classes == True:
_classes = []
if weights:
keywords['weights'] = prosodic_weights(
prosodic_string(self.msa[ref][cog]['_sonority_consensus'])
)
else:
keywords['weights'] = [
1.0 for i in range(len(self.msa[ref][cog]['alignment']))]
for alm in self.msa[ref][cog]['alignment']:
cls = [c for c in tokens2class(
alm,
keywords['model'],
stress=keywords['stress'],
cldf=keywords['cldf'],
diacritics=keywords['diacritics']
) if c != '0']
cls = class2tokens(cls, alm)
_classes.append(cls)
_classes = misc.transpose(_classes)
else:
_classes = classes
cons = get_consensus(
self.msa[ref][cog]['alignment'],
classes=_classes,
tree=tree,
lambda x: (
tokens2class(x[0], self.model, stress=keywords['stress']),
tokens2class(x[1], self.model, stress=keywords['stress'])),
self.tokens
cognacy : {0, 1}
The cognacy assertion which is either 0 (words are probably cognate) or
1 (words are not likely to be cognate).
"""
if text_type(model) == model:
model = rcParams[model]
elif not hasattr(model, 'info'):
raise ValueError("[!] No valid model instance selected.")
if isinstance(seqA, string_types):
seqA = ipa2tokens(seqA)
seqB = ipa2tokens(seqB)
classA = tokens2class(seqA, model)
classB = tokens2class(seqB, model)
if classA[0] in model.vowels:
classA[0] = 'H'
if classB[0] in model.vowels:
classB[0] = 'H'
return int(''.join([k for k in classA if k not in model.vowels])[:2] !=
''.join([k for k in classB if k not in model.vowels])[:2])
map(lambda x: [int(t) for t in tokens2class(
x, rcParams['art'], stress=rcParams['stress'])],
[self.tokens[key] for key in keys]))