Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
pom.NormalDistribution(-1.0, stdev, frozen=False),
pom.NormalDistribution(0.0, stdev, frozen=False),
pom.NormalDistribution(0.585, stdev, frozen=False),
]
n_states = len(distributions)
# Starts -- prefer neutral
binom_coefs = scipy.special.binom(n_states - 1, range(n_states))
start_probabilities = binom_coefs / binom_coefs.sum()
# Prefer to keep the current state in each transition
# All other transitions are equally likely, to start
transition_matrix = (np.identity(n_states) * 100
+ np.ones((n_states, n_states)) / n_states)
model = pom.HiddenMarkovModel.from_matrix(transition_matrix, distributions,
start_probabilities, state_names=state_names, name=method)
model.fit(sequences=observations,
weights=[len(obs) for obs in observations],
distribution_inertia = .8, # Allow updating dists, but slowly
edge_inertia=0.1,
# lr_decay=.75,
pseudocount=5,
use_pseudocount=True,
max_iterations=100000,
n_jobs=processes,
verbose=False)
return model
def variants_in_segment(varr, segment, min_variants=50):
if len(varr) > min_variants:
observations = varr.mirrored_baf(above_half=True)
state_names = ["neutral", "alt"]
distributions = [
pom.NormalDistribution(0.5, .1, frozen=True),
pom.NormalDistribution(0.67, .1, frozen=True),
]
n_states = len(distributions)
# Starts -- prefer neutral
start_probabilities = [.95, .05]
# Prefer to keep the current state in each transition
# All other transitions are equally likely, to start
transition_matrix = (np.identity(n_states) * 100
+ np.ones((n_states, n_states)) / n_states)
model = pom.HiddenMarkovModel.from_matrix(transition_matrix, distributions,
start_probabilities, state_names=state_names, name="loh")
model.fit(sequences=[observations],
edge_inertia=0.1,
lr_decay=.75,
pseudocount=5,
use_pseudocount=True,
max_iterations=100000,
#n_jobs=1, # processes,
verbose=False)
states = np.array(model.predict(observations, algorithm='map'))
logging.info("Done, now finalizing")
logging.debug("Model states: %s", model.states)
logging.debug("Predicted states: %s", states[:100])
logging.debug(str(collections.Counter(states)))
for match_state in repeat_match_states:
to_end = 0.7 / len(repeat_match_states)
total = 1 + to_end
for next_state in range(len(mat[match_state])):
if mat[match_state][next_state] != 0:
mat[match_state][next_state] /= total
mat[match_state][model.end_index] = to_end / total
starts = np.zeros(len(model.states))
starts[model.start_index] = 1.0
ends = np.zeros(len(model.states))
ends[model.end_index] = 1.0
state_names = [state.name for state in model.states]
distributions = [state.distribution for state in model.states]
name = 'Read Matcher'
new_model = Model.from_matrix(mat, distributions, starts, ends, name=name, state_names=state_names, merge=None)
new_model.bake(merge=None)
return new_model
for j in range(len(mat[unit_end])):
if mat[unit_end][j] != 0:
next_state = j
mat[unit_end][next_state] = 0.5
mat[unit_end][end_repeats_ind] = 0.5
mat[end_repeats_ind][model.end_index] = 1
starts = np.zeros(states_count + 2)
starts[model.start_index] = 1.0
ends = np.zeros(states_count + 2)
ends[model.end_index] = 1.0
state_names = [state.name for state in states]
distributions = [state.distribution for state in states]
name = 'Repeat Matcher HMM Model'
new_model = Model.from_matrix(mat, distributions, starts, ends, name=name, state_names=state_names, merge=None)
new_model.bake(merge=None)
return new_model