How to use pysam - 10 common examples

To help you get started, we’ve selected a few pysam examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pysam-developers / pysam / save / pysam_test2.6.py View on Github external
def setUp(self):
        self.samfile=pysam.Samfile( "ex1.bam","rb" )
github ga4gh / ga4gh-server / tests / unit / test_converters.py View on Github external
converter.convert()
            samFile = pysam.AlignmentFile(fileHandle.name, "r")
            try:
                # TODO suppressed because of pysam output:
                # [W::sam_parse1] mapped mate cannot have zero coordinate;
                # treated as unmapped
                # and
                # [W::sam_parse1] mapped mate cannot have zero coordinate;
                # treated as unmapped
                # see discussion in
                # https://github.com/ga4gh/ga4gh-server/pull/789
                with utils.suppressOutput():
                    convertedReads = list(samFile.fetch())
            finally:
                samFile.close()
            samFile = pysam.AlignmentFile(readGroupSet.getDataUrl(), "rb")
            try:
                sourceReads = []
                referenceName = reference.getName().encode()
                readGroupName = readGroup.getLocalId().encode()
                for readAlignment in samFile.fetch(referenceName):
                    tags = dict(readAlignment.tags)
                    if 'RG' in tags and tags['RG'] == readGroupName:
                        sourceReads.append(readAlignment)
            finally:
                samFile.close()
            self.verifySamRecordsEqual(sourceReads, convertedReads)
github pysam-developers / pysam / save / pysam_test2.6.py View on Github external
def setUp(self):
        self.file=pysam.Fastafile( "ex1.fa" )
github calico / basenji / tests / test_data2.py View on Github external
"""Test that the one hot coded sequences match."""
    for gi in range(2):
      # read sequence coordinates
      seqs_bed_file = '%s/sequences%d.bed' % (self.out_dir, gi)
      seq_coords = read_seq_coords(seqs_bed_file)

      # read one hot coding from TF Records
      train_tfrs_str = '%s/tfrecords/train-%d-0.tfr' % (self.out_dir, gi)
      seqs_1hot, _, genomes = self.read_tfrecords(train_tfrs_str)

      # check genome
      self.assertEqual(len(np.unique(genomes)), 1)
      self.assertEqual(genomes[0], gi)

      # open FASTA
      fasta_open = pysam.Fastafile(self.fasta_files[gi])

      # check random sequences
      seq_indexes = random.sample(range(seqs_1hot.shape[0]), 32)
      for si in seq_indexes:
        sc = seq_coords[si]

        seq_fasta = fasta_open.fetch(sc.chr, sc.start, sc.end).upper()
        seq_1hot_dna = hot1_dna(seqs_1hot[si])
        self.assertEqual(seq_fasta, seq_1hot_dna)
github alimanfoo / pysamstats / test_pysamstats.py View on Github external
def test_binned_pad_wg():
    expected = stat_coverage_binned_refimpl(
        Samfile('fixture/test.bam'),
        Fastafile('fixture/ref.fa'))

    actual = pysamstats.stat_coverage_binned(Samfile('fixture/test.bam'),
                                             Fastafile('fixture/ref.fa'))
    _compare_iterators(expected, actual)
    kwargs = {'window_size': 200,
              'window_offset': 100}
    for f, needs_ref in binned_functions:
        debug(f.__name__)
        if needs_ref:
            a = f(Samfile('fixture/test.bam'), Fastafile('fixture/ref.fa'),
                  **kwargs)
        else:
            a = f(Samfile('fixture/test.bam'), **kwargs)
        assert sorted(set(a['chrom'])) == [b'Pf3D7_01_v3', b'Pf3D7_02_v3',
                                           b'Pf3D7_03_v3']
        eq_(100, a[a['chrom'] == b'Pf3D7_01_v3']['pos'][0])
        eq_(50100, a[a['chrom'] == b'Pf3D7_01_v3']['pos'][-1])
        eq_(100, a[a['chrom'] == b'Pf3D7_02_v3']['pos'][0])
        eq_(60100, a[a['chrom'] == b'Pf3D7_02_v3']['pos'][-1])
github ga4gh / ga4gh-server / tests / datadriven / test_references.py View on Github external
def __init__(self, referenceSetId, fastaFile):
        super(ReferenceSetTest, self).__init__(referenceSetId, fastaFile)
        self._fastaFile = pysam.FastaFile(fastaFile)
github tskit-dev / msprime / tskit_tests / test_vcf.py View on Github external
def test_all_records(self):
        for datum in test_data:
            vcf_reader = vcf.Reader(filename=datum.vcf_file)
            bcf_file = pysam.VariantFile(datum.vcf_file)
            pyvcf_records = list(vcf_reader)
            pysam_records = list(bcf_file)
            self.verify_records(datum, pyvcf_records, pysam_records)
            bcf_file.close()
github galaxyproject / galaxy / test / unit / datatypes / test_vcf.py View on Github external
def test_vcf_gz_set_meta():
    vcf_gz = VcfGz()
    with get_input_files('1.vcf_bgzip') as input_files, get_dataset(input_files[0], index_attr='tabix_index') as dataset:
        vcf_gz.set_meta(dataset)
        f = pysam.VariantFile(dataset.file_name, index_filename=dataset.metadata.tabix_index.file_name)
        assert isinstance(f.index, pysam.libcbcf.TabixIndex) is True
github nspies / svviz2 / test / test_genomesource.py View on Github external
def test_realign_align_params(genome_source):
    read = pysam.AlignedSegment()
    read.query_sequence = genome_source.get_seq("chr1", 101, 114, "+")

    # the default seed length should be too short for this to align
    alns = genome_source.align(Alignment(read))
    assert len(alns) == 0

    genome_source.bwa.SetMinSeedLength(13)

    # but now it should align
    alns = genome_source.align(Alignment(read))
    assert len(alns) == 1
    assert alns[0].cigarstring == "14M"
    assert alns[0].reference_start == 101
github pysam-developers / pysam / save / pysam_test2.6.py View on Github external
("pysam_ex1.depth", (pysam.depth, "ex1.bam" ) ),
                ),
          "faidx" : 
              ( 
                ("ex1.fa.fai", "faidx ex1.fa"), 
                ("pysam_ex1.fa.fai", (pysam.faidx, "ex1.fa") ),
                ),
          "index":
              (
                ("ex1.bam.bai", "index ex1.bam" ),
                ("pysam_ex1.bam.bai", (pysam.index, "pysam_ex1.bam" ) ),
                ),
          "idxstats" :
              ( 
                ("ex1.idxstats", "idxstats ex1.bam > ex1.idxstats" ),
                ("pysam_ex1.idxstats", (pysam.idxstats, "pysam_ex1.bam" ) ),
                ),
          "fixmate" :
              (
                ("ex1.fixmate", "fixmate ex1.bam ex1.fixmate" ),
                ("pysam_ex1.fixmate", (pysam.fixmate, "pysam_ex1.bam pysam_ex1.fixmate") ),
                ),
          "flagstat" :
              (
                ("ex1.flagstat", "flagstat ex1.bam > ex1.flagstat" ),
                ("pysam_ex1.flagstat", (pysam.flagstat, "pysam_ex1.bam") ),
                ),
          "calmd" :
              (
                ("ex1.calmd", "calmd ex1.bam ex1.fa > ex1.calmd" ),
                ("pysam_ex1.calmd", (pysam.calmd, "pysam_ex1.bam ex1.fa") ),
                ),