How to use the pomoxis.util.Region function in pomoxis

To help you get started, we’ve selected a few pomoxis examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nanoporetech / pomoxis / pomoxis / coverage_from_bam.py View on Github external
parser.add_argument('bam', help='.fasta/fastq file.')
    parser.add_argument('-r', '--regions', nargs='+', help='Only process given regions.')
    parser.add_argument('-p', '--prefix', help='Prefix for output, defaults to basename of bam.')
    parser.add_argument('-s', '--stride', type=int, default=1000, help='Stride in genomic coordinate.')
    parser.add_argument('--summary_only', action='store_true',
                        help='Output only the depth_summary.txt file')

    args = parser.parse_args()

    bam = pysam.AlignmentFile(args.bam)
    ref_lengths = dict(zip(bam.references, bam.lengths))

    if args.regions is not None:
        regions = parse_regions(args.regions, ref_lengths=ref_lengths)
    else:
        regions = [Region(ref_name=r, start=0, end=ref_lengths[r]) for r in bam.references]

    summary = {}
    for region in regions:

        # write final depth
        prefix = args.prefix
        if prefix is None:
            prefix = os.path.splitext(os.path.basename(args.bam))[0]

        region_str = '{}_{}_{}'.format(region.ref_name, region.start, region.end)
        df = coverage_of_region(region, args.bam, args.stride)
        summary[region_str] = df['depth'].describe()

        if not args.summary_only:
            depth_fp = '{}_{}.depth.txt'.format(prefix, region_str)
            df.to_csv(depth_fp, sep='\t', index=False)
github nanoporetech / pomoxis / pomoxis / subsample_bam.py View on Github external
pparser.add_argument('-P', '--proportional', default=False, action='store_true',
        help='Activate proportional sampling, thus keeping depth variations of the pileup.')
    pparser.add_argument('-S', '--seed', default=None, type=int,
        help='Random seed for proportional downsampling of reads.')

    args = parser.parse_args()
    if args.threads == -1:
        args.threads = multiprocessing.cpu_count()

    with pysam.AlignmentFile(args.bam) as bam:
        ref_lengths = dict(zip(bam.references, bam.lengths))

        if args.regions is not None:
            regions = parse_regions(args.regions, ref_lengths=ref_lengths)
        else:
            regions = [Region(ref_name=r, start=0, end=ref_lengths[r]) for r in bam.references]

    if args.proportional:
        worker = functools.partial(subsample_region_proportionally, args=args)
    else:
        worker = functools.partial(subsample_region_uniformly, args=args)

    enough_depth = []
    with ProcessPoolExecutor(max_workers=args.threads) as executor:
        for res in executor.map(worker, regions):
            enough_depth.append(res)

    if args.any_fail and not all(enough_depth):
        raise RuntimeError('Insufficient read coverage for one or more requested regions.')
    if args.all_fail and all([not x for x in enough_depth]):
        raise RuntimeError('Insufficient read coverage for all requested regions.')