Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.assertRaises(MsticpyException, lambda: StateMatrix(dict(), UNK_TOKEN))
states = {"haha": {"lol": 1, UNK_TOKEN: 1}, UNK_TOKEN: {"hehe": 1}}
)
self.data2["prior_probs"] = StateMatrix(
{START_TOKEN: 0.3, END_TOKEN: 0.3, UNK_TOKEN: 0.4}, UNK_TOKEN
)
self.data2["trans_probs"] = StateMatrix(
{
START_TOKEN: {
END_TOKEN: 0.6666666666666666,
UNK_TOKEN: 0.3333333333333333,
},
UNK_TOKEN: {END_TOKEN: 0.5, UNK_TOKEN: 0.5},
},
UNK_TOKEN,
)
self.data2["param_probs"] = StateMatrix({UNK_TOKEN: 0.3}, UNK_TOKEN)
self.data2["param_cond_cmd_probs"] = StateMatrix(
{
START_TOKEN: {UNK_TOKEN: 0.3333333333333333},
END_TOKEN: {UNK_TOKEN: 0.3333333333333333},
UNK_TOKEN: {UNK_TOKEN: 0.25},
},
UNK_TOKEN,
)
self.data2["value_probs"] = StateMatrix({UNK_TOKEN: 1}, UNK_TOKEN)
self.data2["value_cond_param_probs"] = StateMatrix(
{UNK_TOKEN: {UNK_TOKEN: 1}}, UNK_TOKEN
)
# populate data3
cmd = "Set-User"
self.data3["sessions"] = [
[
"Set-User": 0.3333333333333333,
"##END##": 0.19047619047619047,
"##UNK##": 0.2857142857142857,
}
self.prior_probs = StateMatrix(states=prior_probs, unk_token=UNK_TOKEN)
trans_probs = {
"##START##": {"Set-User": 0.5, "##END##": 0.25, "##UNK##": 0.25},
"Set-User": {"##END##": 0.5, "Set-User": 0.25, "##UNK##": 0.25},
"##UNK##": {
"Set-User": 0.3333333333333333,
"##END##": 0.3333333333333333,
"##UNK##": 0.3333333333333333,
},
}
self.trans_probs = StateMatrix(states=trans_probs, unk_token=UNK_TOKEN)
end_token=end_token,
unk_token=unk_token,
)
# apply laplace smoothing for params
param_counts_ls, cmd_param_counts_ls = laplace_smooth_param_counts(
cmds=cmds,
param_counts=copy.deepcopy(param_counts),
cmd_param_counts=copy.deepcopy(cmd_param_counts),
unk_token=unk_token,
)
seq1_counts_sm = StateMatrix(states=seq1_counts_ls, unk_token=unk_token)
seq2_counts_sm = StateMatrix(states=seq2_counts_ls, unk_token=unk_token)
param_counts_sm = StateMatrix(states=param_counts_ls, unk_token=unk_token)
cmd_param_counts_sm = StateMatrix(states=cmd_param_counts_ls, unk_token=unk_token)
return seq1_counts_sm, seq2_counts_sm, param_counts_sm, cmd_param_counts_sm
trans_probs: DefaultDict[str, DefaultDict[str, float]] = defaultdict(
lambda: defaultdict(lambda: 0)
)
# compute prior probs
for cmd in seq1_counts:
prior_probs[cmd] = seq1_counts[cmd] / total_cmds
# compute trans probs
for prev, currents in seq2_counts.items():
for current in currents:
trans_probs[prev][current] = seq2_counts[prev][current] / sum(
seq2_counts[prev].values()
)
prior_probs_sm = StateMatrix(states=prior_probs, unk_token=unk_token)
trans_probs_sm = StateMatrix(states=trans_probs, unk_token=unk_token)
return prior_probs_sm, trans_probs_sm
unk_token=unk_token,
)
# apply laplace smoothing for the values
value_counts_ls, param_value_counts_ls = laplace_smooth_value_counts(
params=params,
value_counts=copy.deepcopy(value_counts),
param_value_counts=copy.deepcopy(param_value_counts),
unk_token=unk_token,
)
seq1_counts_sm = StateMatrix(states=seq1_counts_ls, unk_token=unk_token)
seq2_counts_sm = StateMatrix(states=seq2_counts_ls, unk_token=unk_token)
param_counts_sm = StateMatrix(states=param_counts_ls, unk_token=unk_token)
cmd_param_counts_sm = StateMatrix(states=cmd_param_counts_ls, unk_token=unk_token)
value_counts_sm = StateMatrix(states=value_counts_ls, unk_token=unk_token)
param_value_counts_sm = StateMatrix(
states=param_value_counts_ls, unk_token=unk_token
)
return (
seq1_counts_sm,
seq2_counts_sm,
param_counts_sm,
cmd_param_counts_sm,
value_counts_sm,
param_value_counts_sm,
)
'##UNK##': {'Set-Mailbox': 1, '##UNK##': 1}}
unk_token: str
dummy token to signify an unseen command (e.g. "##UNK##").
This token should be present in the `states` keys. And if `states`
is a dict of dicts, then the `unk_token` should be present in
the keys of the outer dict and all the inner dicts.
"""
super().__init__(states)
if unk_token not in states:
raise MsticpyException("`unk_token` should be a key in `states`")
self.states = dict(states)
self.unk_token = unk_token
for key, val in self.states.items():
if isinstance(val, dict):
self.states[key] = StateMatrix(self.states[key], unk_token)
cmd_param_counts=copy.deepcopy(cmd_param_counts),
unk_token=unk_token,
)
# apply laplace smoothing for the values
value_counts_ls, param_value_counts_ls = laplace_smooth_value_counts(
params=params,
value_counts=copy.deepcopy(value_counts),
param_value_counts=copy.deepcopy(param_value_counts),
unk_token=unk_token,
)
seq1_counts_sm = StateMatrix(states=seq1_counts_ls, unk_token=unk_token)
seq2_counts_sm = StateMatrix(states=seq2_counts_ls, unk_token=unk_token)
param_counts_sm = StateMatrix(states=param_counts_ls, unk_token=unk_token)
cmd_param_counts_sm = StateMatrix(states=cmd_param_counts_ls, unk_token=unk_token)
value_counts_sm = StateMatrix(states=value_counts_ls, unk_token=unk_token)
param_value_counts_sm = StateMatrix(
states=param_value_counts_ls, unk_token=unk_token
)
return (
seq1_counts_sm,
seq2_counts_sm,
param_counts_sm,
cmd_param_counts_sm,
value_counts_sm,
param_value_counts_sm,
)
param_probs: DefaultDict[str, float] = defaultdict(lambda: 0)
param_cond_cmd_probs: DefaultDict[str, DefaultDict[str, float]] = defaultdict(
lambda: defaultdict(lambda: 0)
)
for cmd, params in cmd_param_counts.items():
n_cmd = seq1_counts[cmd]
for param, count in params.items():
param_cond_cmd_probs[cmd][param] = count / n_cmd
tot_cmd = sum(seq1_counts.values())
for param, count in param_counts.items():
param_probs[param] = count / tot_cmd
param_probs_sm = StateMatrix(states=param_probs, unk_token=unk_token)
param_cond_cmd_probs_sm = StateMatrix(
states=param_cond_cmd_probs, unk_token=unk_token
)
return param_probs_sm, param_cond_cmd_probs_sm
-------
tuple of StateMatrix laplace smoothed counts:
individual command counts,
sequence command (length 2) counts
"""
# apply laplace smoothing
seq1_counts_ls, seq2_counts_ls = laplace_smooth_cmd_counts(
seq1_counts=copy.deepcopy(seq1_counts),
seq2_counts=copy.deepcopy(seq2_counts),
start_token=start_token,
end_token=end_token,
unk_token=unk_token,
)
seq1_counts_st = StateMatrix(states=seq1_counts_ls, unk_token=unk_token)
seq2_counts_st = StateMatrix(states=seq2_counts_ls, unk_token=unk_token)
return seq1_counts_st, seq2_counts_st