Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns
-------
model :
A pomegranate HiddenMarkovModel trained on the given dataset.
"""
assert method in ('hmm-tumor', 'hmm-germline', 'hmm')
observations = as_observation_matrix(cnarr.autosomes())
# Estimate standard deviation from the full distribution, robustly
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
assert method in ('hmm-tumor', 'hmm-germline', 'hmm')
observations = as_observation_matrix(cnarr.autosomes())
# Estimate standard deviation from the full distribution, robustly
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
]
n_states = len(distributions)
# Starts -- prefer neutral
binom_coefs = scipy.special.binom(n_states - 1, range(n_states))
# Estimate standard deviation from the full distribution, robustly
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
]
n_states = len(distributions)
# Starts -- prefer neutral
binom_coefs = scipy.special.binom(n_states - 1, range(n_states))
start_probabilities = binom_coefs / binom_coefs.sum()
def load_segmentation_model(modeldata):
model = HiddenMarkovModel('model')
states = {}
for s in modeldata:
if len(s['emission']) == 1:
emission = NormalDistribution(*s['emission'][0][:2])
else:
weights = np.array([w for _, _, w in s['emission']])
dists = [NormalDistribution(mu, sigma)
for mu, sigma, _ in s['emission']]
emission = GeneralMixtureModel(dists, weights=weights)
state = State(emission, name=s['name'])
states[s['name']] = state
model.add_state(state)
if 'start_prob' in s:
model.add_transition(model.start, state, s['start_prob'])
for s in modeldata:
current = states[s['name']]
for nextstate, prob in s['transition']:
model.add_transition(current, states[nextstate], prob)
Returns
-------
model :
A pomegranate HiddenMarkovModel trained on the given dataset.
"""
assert method in ('hmm-tumor', 'hmm-germline', 'hmm')
observations = as_observation_matrix(cnarr.autosomes())
# Estimate standard deviation from the full distribution, robustly
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
]
n_states = len(distributions)
# Starts -- prefer neutral
binom_coefs = scipy.special.binom(n_states - 1, range(n_states))
start_probabilities = binom_coefs / binom_coefs.sum()
# Prefer to keep the current state in each transition
# All other transitions are equally likely, to start
def load_segmentation_model(modeldata):
model = HiddenMarkovModel('model')
states = {}
for s in modeldata:
if len(s['emission']) == 1:
emission = NormalDistribution(*s['emission'][0][:2])
else:
weights = np.array([w for _, _, w in s['emission']])
dists = [NormalDistribution(mu, sigma)
for mu, sigma, _ in s['emission']]
emission = GeneralMixtureModel(dists, weights=weights)
state = State(emission, name=s['name'])
states[s['name']] = state
model.add_state(state)
if 'start_prob' in s:
model.add_transition(model.start, state, s['start_prob'])
for s in modeldata:
current = states[s['name']]
for nextstate, prob in s['transition']:
model.add_transition(current, states[nextstate], prob)
model.bake()
# GMM emissions
# 4 Hidden States:
# 0--start, 1--downstream, 2--upstream, 3--end
numdists = 3 # Three-distribution Gaussian Mixtures
var = 7.5 / (numdists - 1)
means = [[], [], [], []]
for i in range(numdists):
means[3].append(i * 7.5 / ( numdists - 1 ) + 2.5)
means[2].append(i * 7.5 / ( numdists - 1 ))
means[1].append(-i * 7.5 / ( numdists - 1 ))
means[0].append(-i * 7.5 / ( numdists - 1 ) - 2.5)
states = []
for i, m in enumerate(means):
tmp = []
for j in m:
tmp.append(NormalDistribution(j, var))
mixture = GeneralMixtureModel(tmp)
states.append(State(mixture, name=str(i)))
hmm.add_states(*tuple(states))
# Transmission matrix
#A = [[0., 1., 0., 0.],
# [0., 0.5, 0.5, 0.],
# [0., 0., 0.5, 0.5],
# [1., 0., 0., 0.]]
hmm.add_transition(states[0], states[1], 1)
hmm.add_transition(states[1], states[1], 0.5)
hmm.add_transition(states[1], states[2], 0.5)
hmm.add_transition(states[2], states[2], 0.5)
hmm.add_transition(states[2], states[3], 0.5)
hmm.add_transition(states[3], states[0], 1)
# Estimate standard deviation from the full distribution, robustly
stdev = biweight_midvariance(np.concatenate(observations), initial=0)
if method == 'hmm-germline':
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=True),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.585, stdev, frozen=True),
]
elif method == 'hmm-tumor':
state_names = ["del", "loss", "neutral", "gain", "amp"]
distributions = [
pom.NormalDistribution(-2.0, stdev, frozen=False),
pom.NormalDistribution(-0.5, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=True),
pom.NormalDistribution(0.3, stdev, frozen=False),
pom.NormalDistribution(1.0, stdev, frozen=False),
]
else:
state_names = ["loss", "neutral", "gain"]
distributions = [
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
]
n_states = len(distributions)
# Starts -- prefer neutral
binom_coefs = scipy.special.binom(n_states - 1, range(n_states))
start_probabilities = binom_coefs / binom_coefs.sum()
# Prefer to keep the current state in each transition