Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_seq_dataset_reshape(alphabet_axis, dummy_axis, example_kwargs):
seq_len, alphabet_len = 3, 4
kwargs = example_kwargs
kwargs['auto_resize_len'] = seq_len
kwargs['alphabet_axis'] = alphabet_axis
kwargs['dummy_axis'] = dummy_axis
dummy_axis_int = dummy_axis
if dummy_axis is None:
dummy_axis_int = -2
if (alphabet_axis == dummy_axis_int) or (alphabet_axis == -1) or (dummy_axis_int == -1) or \
(alphabet_axis >= 3) or (dummy_axis_int >= 3) or ((alphabet_axis >= 2) and (dummy_axis is None)):
with pytest.raises(Exception):
seq_dataset = SeqIntervalDl(**kwargs)
return None
seq_dataset = SeqIntervalDl(**kwargs)
# test the single sample works
reshaped = seq_dataset[0]['inputs']
for i in range(len(reshaped.shape)):
if i == dummy_axis:
assert reshaped.shape[i] == 1
elif i == alphabet_axis:
assert reshaped.shape[i] == alphabet_len
else:
assert reshaped.shape[i] == seq_len
def test_min_props():
# minimal set of properties that need to be specified on the object
min_set_props = ["output_schema", "type", "defined_as", "info", "args", "dependencies", "postprocessing",
"source", "source_dir"]
for Dl in [StringSeqIntervalDl, SeqIntervalDl]:
props = dir(Dl)
assert all([el in props for el in min_set_props])
kwargs = example_kwargs
kwargs['auto_resize_len'] = seq_len
kwargs['alphabet_axis'] = alphabet_axis
kwargs['dummy_axis'] = dummy_axis
dummy_axis_int = dummy_axis
if dummy_axis is None:
dummy_axis_int = -2
if (alphabet_axis == dummy_axis_int) or (alphabet_axis == -1) or (dummy_axis_int == -1) or \
(alphabet_axis >= 3) or (dummy_axis_int >= 3) or ((alphabet_axis >= 2) and (dummy_axis is None)):
with pytest.raises(Exception):
seq_dataset = SeqIntervalDl(**kwargs)
return None
seq_dataset = SeqIntervalDl(**kwargs)
# test the single sample works
reshaped = seq_dataset[0]['inputs']
for i in range(len(reshaped.shape)):
if i == dummy_axis:
assert reshaped.shape[i] == 1
elif i == alphabet_axis:
assert reshaped.shape[i] == alphabet_len
else:
assert reshaped.shape[i] == seq_len
def test_seq_dataset(intervals_file, fasta_file):
dl = SeqIntervalDl(intervals_file, fasta_file)
ret_val = dl[0]
assert np.all(ret_val['inputs'] == one_hot_dna("GT"))
assert isinstance(ret_val["inputs"], np.ndarray)
assert ret_val["inputs"].shape == (2, 4)
def test_var_eff_pred_varseq(tmpdir):
model_name = "DeepSEA/variantEffects"
if INSTALL_REQ:
install_model_requirements(model_name, "kipoi", and_dataloaders=True)
#
model = kipoi.get_model(model_name, source="kipoi")
# The preprocessor
Dataloader = SeqIntervalDl
#
dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
"fasta_file": "example_files/hg38_chr22.fa",
"required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
dataloader_arguments.items()}
vcf_path = "tests/data/variants.vcf"
out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
#
vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
writer = kipoi_veff.VcfWriter(
model, vcf_path, out_vcf_fpath, standardise_var_id=True)
vcf_to_region = kipoi_veff.SnvCenteredRg(model_info)
res = sp.predict_snvs(model, Dataloader, vcf_path, dataloader_args=dataloader_arguments,
def test_deepsea():
model = kipoi.get_model("DeepSEA/variantEffects")
mie = ModelInfoExtractor(model, SeqIntervalDl)