Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#mk_matrix: test orientation
@test "mk_matrix_14" {
result=`rm -Rf simple_mat*; cat simple.gtf |gtftk mk_matrix -u 5 -d 5 -t transcript -w 5 -o simple_mat -c simple.chromInfo -V 1 -zn -y simple.bw ; unzip -u simple_mat.zip &>/dev/null; cat simple_mat| grep G0005T001 simple_mat| cut -f8-12| perl -npe 's/\\t/|/g' | sed 's/0000000005//g' | sed 's/69999999995/7/g'`
[ "$result" = "2.0|3.333333|3.333333|2.0|2.666667" ]
}
#mk_matrix: test NO orientation (-nst)
@test "mk_matrix_15" {
result=`rm -Rf simple_mat*; cat simple.gtf |gtftk mk_matrix -nst -u 5 -d 5 -t transcript -w 5 -o simple_mat -c simple.chromInfo -V 1 -zn -y simple.bw ; unzip -u simple_mat.zip &>/dev/null; cat simple_mat| grep G0005T001 simple_mat| cut -f8-12| perl -npe 's/\\t/|/g' | sed 's/0000000005//g' | sed 's/69999999995/7/g'`
[ "$result" = "2.666667|2.0|3.333333|3.333333|2.0" ]
}
'''
CmdObject(name="mk_matrix",
message="Compute a coverage matrix (see profile).",
parser=make_parser(),
fun=os.path.abspath(__file__),
group="coverage",
updated=__updated__,
desc=__doc__,
notes=__notes__,
test=test)
test = '''
#control_list
@test "control_list_1" {
result=`gtftk control_list -i pygtftk/data/control_list/control_list_data.txt -r pygtftk/data/control_list/control_list_reference.txt -D ; cat control_list/control_list.txt | cut -f2| perl -npe 's/\\n/,/'`
[ "$result" = "V1,2.02,4.04,6.06," ]
}
#control_list
@test "control_list_2" {
result=` rm -Rf control_list`
[ "$result" = "" ]
}
'''
cmd = CmdObject(name="control_list",
message="Returns a list of gene matched for expression based on reference values.",
parser=make_parser(),
fun=control_list,
desc=__doc__,
updated=__updated__,
notes=__notes__,
group="miscellaneous",
test=test,
rlib=R_LIB)
#bed_to_gtf: test stdin and ine number
@test "bed_to_gtf_1" {
result=`gtftk select_by_key -i simple.gtf -k feature -v transcript| gtftk convert -f bed | gtftk bed_to_gtf | wc -l`
[ "$result" -eq 15 ]
}
#bed_to_gtf: test column number
@test "bed_to_gtf_2" {
result=`gtftk select_by_key -i simple.gtf -k feature -v transcript| gtftk convert -f bed | gtftk bed_to_gtf | awk 'BEGIN{FS="\\t"}{print NF}' | sort | uniq`
[ "$result" -eq 9 ]
}
"""
CMD = CmdObject(name="bed_to_gtf",
message="Convert a bed file to a gtf but with lots of empty fields...",
parser=make_parser(),
fun=os.path.abspath(__file__),
updated=__updated__,
desc=__doc__,
group="conversion",
test=test)
}
#divergent: the number of exons is as expected.
@test "divergent_4" {
result=`gtftk divergent -i simple.gtf -c simple.chromInfo -u 4 -d 4 | awk '$3=="exon"'| wc -l`
[ "$result" -eq 25 ]
}
#divergent: this region contains 4 divergent tx
@test "divergent_5" {
result=`gtftk divergent -u 18 -d 18 -c simple.chromInfo -i simple.gtf | gtftk select_by_key -k feature -v transcript| grep "dist_to_divergent \\"[0-9]"| gtftk tabulate -H -k transcript_id,dist_to_divergent,divergent| wc -l`
[ "$result" -eq 4 ]
}
"""
CmdObject(name="divergent",
message="Find transcripts with divergent promoters.",
parser=make_parser(),
fun=os.path.abspath(__file__),
desc=__doc__,
updated=__updated__,
group="annotation",
notes=__notes__,
test=test)
#col_from_tab
@test "col_from_tab_2" {
result=`gtftk get_example | gtftk tabulate -k all -x |gtftk col_from_tab -H -c start,end,seqid| wc -l`
[ "$result" -eq 70 ]
}
#col_from_tab
@test "col_from_tab_3" {
result=`gtftk get_example | gtftk tabulate -k all -x |gtftk col_from_tab -c start,end,seqid| awk 'BEGIN{FS=OFS="\\t"}{print NF}'| sort | uniq`
[ "$result" -eq 3 ]
}
"""
from pygtftk.cmd_object import CmdObject
CmdObject(name="col_from_tab",
message="Select columns from a tabulated file based on their names.",
parser=make_parser(),
fun=os.path.abspath(__file__),
updated=__updated__,
desc=__doc__,
group="miscellaneous",
test=test)
# Check that the md5 -r signature is the same after regenerating...
@test "convert_ensembl_11" {
result=`gtftk get_example | grep -v "gene.*gene_id.*G0010"| gtftk convert_ensembl | md5 -r | sed 's/ .*//'`
[ "$result" = "679aa6be7ee8d8402f4d05e05d2b49d5" ]
}
# Delete all genes and transcripts, regenerate, check md5 -r...
@test "convert_ensembl_12" {
result=`gtftk get_example | awk '$3 != "transcript"' | awk '$3 != "gene"' | gtftk convert_ensembl | md5 -r | sed 's/ .*//'`
[ "$result" = "679aa6be7ee8d8402f4d05e05d2b49d5" ]
}
"""
CMD = CmdObject(name="convert_ensembl",
message="Convert the GTF file to ensembl format. Essentially add 'transcript'/'gene' features.",
parser=make_parser(),
fun=os.path.abspath(__file__),
updated=__updated__,
notes=__notes__,
desc=__doc__,
group="conversion",
test=test)
result=`gtftk get_example -d mini_real | gtftk alt_prom H3K4me3_cond_1.bed H3K4me3_cond_2.bed| awk 'BEGIN{FS=OFS="\t"}$3=="CRMP1"||NR==1'| grep ENST00000513911 | grep ENST00000324989 | cut -f 25`
[ "$result" = "1" ]
}
@test "alt_prom_2" {
result=`gtftk get_example -d mini_real | gtftk alt_prom H3K4me3_cond_1.bed H3K4me3_cond_3.bed| awk 'BEGIN{FS=OFS="\t"}$3=="CRMP1"||NR==1'| grep ENST00000513911 | grep ENST00000324989 | cut -f 24`
[ "$result" = "1" ]
}
@test "alt_prom_3" {
result=`gtftk get_example -d mini_real | gtftk alt_prom H3K4me3_cond_2.bed H3K4me3_cond_3.bed| awk 'BEGIN{FS=OFS="\t"}$3=="CRMP1"||NR==1'| grep ENST00000513911 | grep ENST00000324989 | cut -f 24`
[ "$result" = "1" ]
}
"""
CmdObject(name='alt_prom',
message='Search for genes with alternative promoters.',
parser=make_parser(),
fun=alt_prom,
desc=__doc__,
notes=__notes__,
updated=__updated__,
group="annotation",
test=test)
# should be the same as 'cat expected_sequence_minus_rv.fa | md5 -r'
@test "get_tx_seq_20" {
result=`gtftk get_tx_seq -i ids_minus.gtf -g chr1_hg38_10M.fa -l transcript_id | perl -ne 'print uc $_'> observed_sequence_minus_rv.fa; cat observed_sequence_minus_rv.fa | md5 -r | sed 's/ .*//'`
[ "$result" = "6f40e63555a4bb6f849261b0fe9e928c" ]
}
# Check the sequence of tx on minus strand compared to ensembl (no rev_comp).
# should be the same as 'cat expected_sequence_minus_no_rv.fa | md5 -r'
@test "get_tx_seq_21" {
result=`gtftk get_tx_seq -i ids_minus.gtf -g chr1_hg38_10M.fa -l transcript_id -n | perl -ne 'print uc $_'> observed_sequence_minus_no_rv.fa; cat observed_sequence_minus_no_rv.fa | md5 -r | sed 's/ .*//'`
[ "$result" = "87c15b230b6057be091566ac29ada7a1" ]
}
"""
CmdObject(name="get_tx_seq",
message="Get transcript sequences in fasta format.",
parser=make_parser(),
fun=os.path.abspath(__file__),
group="sequences",
desc=__doc__,
notes=__notes__,
test=test)
#profile: create dataset
@test "profile_21" {
result=`gtftk profile -D -i mini_real_promoter_pr.zip -g bwig -f chrom -o profile_prom_5 -ph 15 -c "#66C2A5,#FC8D62,#8DA0CB,#6734AF" -pf png -if example_09.png`
[ -s "example_09.png" ]
}
#profile: create dataset
@test "profile_22" {
result=`gtftk profile -th classic -D -i mini_real_promoter_pr.zip -g bwig -f chrom -o profile_prom_5 -ph 15 -c "#66C2A5,#FC8D62,#8DA0CB,#6734AF" -pf png -if example_09b.png`
[ -s "example_09b.png" ]
}
'''
cmd = CmdObject(name="profile",
message="Create coverage profile using a bigWig as input.",
parser=make_parser(),
fun=draw_profile,
desc=__doc__,
updated=__updated__,
notes=__notes__,
references=__references__,
group="coverage",
test=test,
rlib=R_LIB)
[ "$result" -eq 3 ]
}
# Convert: check zero based (bed6)
@test "convert_4" {
result=`gtftk convert -i simple.gtf -n gene_id,transcript_id,start | cut -f2| head -n 1`
[ "$result" -eq 124 ]
}
# Convert: check zero based (bed3)
@test "convert_4" {
result=`gtftk convert -i simple.gtf -f bed3 | cut -f2| head -n 1`
[ "$result" -eq 124 ]
}
'''
CmdObject(name="convert",
message="Convert a GTF to various format including bed.",
parser=make_parser(),
fun=os.path.abspath(__file__),
updated=__updated__,
desc=__doc__,
group="conversion",
test=test)