Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
custom_print( "[ Reading readdb file ]" )
region_fast5_files = dict()
with open (o_readdb, "r") as file:
for line in file:
l = line.split("\t")
read_id = l.pop(0)
if read_id in region_read_ids:
fast5_file = l.pop(0)
region_fast5_files[str(read_id)] = fast5_file.rstrip()
# --------------------------------------------------------
# PART 5: Make a region BAM and BAI file
# --------------------------------------------------------
new_bam = "reads.bam"
custom_print( "[ Writing to a new BAM file ] \t" + new_bam )
region_reads = pysam.view("-b", o_bam, draft_ga_coords, "-o", new_bam, catch_stdout=False)
new_bam_index = new_bam + ".bai"
custom_print( "[ Writing to a new BAI file ] \t" + new_bam_index )
pysam.index(new_bam, new_bam_index)
# --------------------------------------------------------
# PART 6: With user input ref coords, extract all
# aligned reads within these coordinates
# and make new FASTA file
# --------------------------------------------------------
# detect type of sequences file then handle accordingly
file_type = detect_fa_filetype(o_fa)
new_fa = "reads.fasta"
custom_print( "[ Writing to a new fasta file ]\t" + new_fa )
with open (new_fa, "w") as fout:
if ".gz" in file_type:
if not os.path.exists(bam_ngmlr_file):
if stats_trigger in ['y', 'yes']:
log.debug('[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -x ont' % (th, ref, fast_Q_file))
ngmlrline = subprocess.Popen(['ngmlr', '-t', str(th), '-r', str(ref), '-q', str(fast_Q_file), '-x ont'],
stdout=subprocess.PIPE)
PairDict = sam_parser(ngmlrline, ngmlr_ext_dir)
else:
sam_nglmr_file = os.path.join(ngmlr_ext_dir, (prefix + '.sam'))
log.debug(
'[Alignment][ngmlral] - ngmlr -t %s -r %s -q %s -o %s -x ont' % (th, ref, fast_Q_file, sam_ngmlr_file))
ngmlrline = subprocess.Popen(
['ngmlr', '-t', str(th), '-r', str(ref), '-q', str(fast_Q_file), '-o', str(sam_ngmlr_file), '-x ont'],
stdout=subprocess.PIPE).wait()
outputfilebam = os.path.join(ngmlr_ext_dir, (prefix + '.tmp.bam'))
log.debug('[Alignment][ngmlral] - samtools view -Sb -@ %s %s -o %s' % (th, sam_nglmr_file, outputfilebam))
pysam.view("-Sb", "-@%s" % str(th), sam_nglmr_file, "-o%s" % outputfilebam, catch_stdout=False)
os.remove(sam_nglmr_file)
pysam.sort(outputfilebam, "-o%s" % bam_ngmlr_file, catch_stdout=False)
log.debug('[Alignment][ngmlral] - samtools index %s -@%s' % (bam_ngmlr_file, str(th)))
pysam.index(bam_ngmlr_file, "-@%s" % str(th), catch_stdout=False)
os.remove(outputfilebam)
else:
log.warning('[Alignment][ngmlral] - file %s already exists!' % bam_ngmlr_file)
try:
shutil.move(ngmlr_ext_dir, os.path.join(work_dir, out_dir))
except shutil.Error:
log.error("Unable to move %s" % ngmlr_ext_dir)
def _get_sort_order(in_bam, config):
for line in pysam.view("-H", in_bam).split("\r\n"):
if line.startswith("@HD"):
for keyval in line.split()[1:]:
key, val = keyval.split(":")
if key == "SO":
return val
aligned_sam_path = 'aligned.sam'
command_pattern = "/minimap2-2.17_x64-linux/minimap2 -ax sr {REFERENCE_PATH} {SRC_FILES} -R '@RG\tID:RSP11055\tPL:ILLUMINA\tPU:NONE\tSM:RSP11055' > {ALIGNED_SAM_PATH}"
command = command_pattern.format(REFERENCE_PATH=reference_path, SRC_FILES=" ".join(files), ALIGNED_SAM_PATH=aligned_sam_path)
start = datetime.now()
logging.info("Command start: {}".format(command))
subprocess.call(command, shell=True)
delta = datetime.now() - start
logging.info("Command finish in {}: {}".format(delta, command))
aligned_sam = pysam.AlignmentFile(aligned_sam_path, "rb")
aligned_bam_path = 'aligned.bam'
aligned_sorted_bam_path = 'aligned.sorted.bam'
bam_content = pysam.view('-bh', aligned_sam_path)
with open(aligned_bam_path, 'wb') as file:
file.write(bam_content)
pysam.sort('-o', aligned_sorted_bam_path, aligned_bam_path)
def get_sam_data(bam_file, regions=None):
args = [bam_file]
if regions:
args.extend(regions)
return pysam.view(*args)
command = command_pattern.format(REFERENCE_PATH=reference_path, SRC_FILES=" ".join(files),
ALIGNED_SAM_PATH=aligned_sam_path)
start = datetime.now()
logging.info("Alignment start: {}".format(command))
subprocess.call(command, shell=True)
delta = datetime.now() - start
logging.info("Alignment finish in {}: {}".format(delta, command))
aligned_bam_path = file_prefix + '.bam'
aligned_sorted_bam_path = file_prefix + '.sorted.bam'
start = datetime.now()
logging.info("Convert to BAM start: {}".format(aligned_sam_path))
bam_content = pysam.view('-bh', aligned_sam_path)
with open(aligned_bam_path, 'wb') as file:
file.write(bam_content)
delta = datetime.now() - start
logging.info("Convert to BAM finish in {}: {}".format(delta, aligned_sam_path))
start = datetime.now()
logging.info("Sort start: {}".format(aligned_bam_path))
pysam.sort('-o', aligned_sorted_bam_path, aligned_bam_path)
delta = datetime.now() - start
logging.info("Sort finish in {}: {}".format(aligned_bam_path, delta))
gcs_uri = gcs_utils.upload_file(client, aligned_sorted_bam_path,
RESULT_SORTED_DIR_NAME + '/' + get_file_name_from_uri(aligned_sorted_bam_path),
"text/plain", 'cannabis-3k-results')
remove_local_dir(files_dir)
ext = ".uniq{0}".format(ext)
if opts.unmapped:
uopts = [x for x in mopts]
uoutfile = samfile.replace(oext, ".unmapped{0}".format(ext))
uopts.extend(["-f4", samfile, "-o{0}".format(uoutfile)])
view_opts.append(uopts)
outfile = samfile.replace(oext, ".mapped{0}".format(ext))
mopts.extend(["-F4", samfile, "-o{0}".format(outfile)])
view_opts.append(mopts)
for vo in view_opts:
logging.debug('samtools view {0}'.format(" ".join(vo)))
jobs = Jobs(pysam.view, [(z for z in x) for x in view_opts])
jobs.run()
def execute(self, inBam, exclude, readList, outBam, picardOptions=None, JVMmemory=None): # pylint: disable=W0221
picardOptions = picardOptions or []
if tools.samtools.SamtoolsTool().isEmpty(inBam):
# Picard FilterSamReads cannot deal with an empty input BAM file
shutil.copyfile(inBam, outBam)
elif os.path.getsize(readList) == 0:
# Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
if exclude:
shutil.copyfile(inBam, outBam)
else:
tmpf = util.file.mkstempfname('.sam')
if inBam.endswith('.sam'):
# output format (sam/bam) is inferred by samtools based on file extension
header = pysam.view('-o', tmpf, '-H', '-S', inBam, catch_stdout=False)
else:
header = pysam.view('-o', tmpf, '-H', inBam, catch_stdout=False)
# pysam.AlignmentFile cannot write an empty file
# samtools cannot convert SAM -> BAM on an empty file
# but Picard SamFormatConverter can deal with empty files
opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
else:
opts = [
'INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList,
'FILTER=' + (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false'
]
PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)
picardOptions = picardOptions or []
if tools.samtools.SamtoolsTool().isEmpty(inBam):
# Picard FilterSamReads cannot deal with an empty input BAM file
shutil.copyfile(inBam, outBam)
elif os.path.getsize(readList) == 0:
# Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
if exclude:
shutil.copyfile(inBam, outBam)
else:
tmpf = util.file.mkstempfname('.sam')
if inBam.endswith('.sam'):
# output format (sam/bam) is inferred by samtools based on file extension
header = pysam.view('-o', tmpf, '-H', '-S', inBam, catch_stdout=False)
else:
header = pysam.view('-o', tmpf, '-H', inBam, catch_stdout=False)
# pysam.AlignmentFile cannot write an empty file
# samtools cannot convert SAM -> BAM on an empty file
# but Picard SamFormatConverter can deal with empty files
opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
else:
opts = [
'INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList,
'FILTER=' + (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false'
]
PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)
picardOptions = picardOptions or []
if tools.samtools.SamtoolsTool().isEmpty(inBam):
# Picard FilterSamReads cannot deal with an empty input BAM file
shutil.copyfile(inBam, outBam)
elif os.path.getsize(readList) == 0:
# Picard FilterSamReads cannot deal with an empty READ_LIST_FILE
if exclude:
shutil.copyfile(inBam, outBam)
else:
tmpf = util.file.mkstempfname('.sam')
if inBam.endswith('.sam'):
# output format (sam/bam) is inferred by samtools based on file extension
header = pysam.view('-o', tmpf, '-H', '-S', inBam, catch_stdout=False)
else:
header = pysam.view('-o', tmpf, '-H', inBam, catch_stdout=False)
# pysam.AlignmentFile cannot write an empty file
# samtools cannot convert SAM -> BAM on an empty file
# but Picard SamFormatConverter can deal with empty files
opts = ['INPUT=' + tmpf, 'OUTPUT=' + outBam, 'VERBOSITY=ERROR']
PicardTools.execute(self, 'SamFormatConverter', opts, JVMmemory='50m')
else:
opts = [
'INPUT=' + inBam, 'OUTPUT=' + outBam, 'READ_LIST_FILE=' + readList,
'FILTER=' + (exclude and 'excludeReadList' or 'includeReadList'), 'WRITE_READS_FILES=false'
]
PicardTools.execute(self, self.subtoolName, opts + picardOptions, JVMmemory)