Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@dxpy.entry_point("main")
def main(quants_a, quants_b):
# tool_versions.py --applet $script_name --appver $script_ver
sw_versions = subprocess.check_output(['tool_versions.py', '--dxjson', 'dnanexus-executable.json'])
dxfile_a = dxpy.DXFile(quants_a)
dxfile_b = dxpy.DXFile(quants_b)
print "* Downloading files..."
dxpy.download_dxfile(dxfile_a.get_id(), "quants_a")
dxpy.download_dxfile(dxfile_b.get_id(), "quants_b")
# Create and appropriate name for output files
out_root = root_name_from_pair(dxfile_a.name.split('.')[0],dxfile_b.name.split('.')[0])
mad_plot_file = out_root + '_mad_plot.png'
@dxpy.entry_point('main')
def main(input_bam, paired_end, spp_version):
# The following line(s) initialize your data object inputs on the platform
# into dxpy.DXDataObject instances that you can start using immediately.
input_bam_file = dxpy.DXFile(input_bam)
input_bam_filename = input_bam_file.name
input_bam_basename = input_bam_file.name.rstrip('.bam')
dxpy.download_dxfile(input_bam_file.get_id(), input_bam_filename)
intermediate_TA_filename = input_bam_basename + ".tagAlign"
if paired_end:
end_infix = 'PE2SE'
else:
end_infix = 'SE'
@dxpy.entry_point("postprocess")
def postprocess(indexed_reads, unmapped_reads, reference_tar,
bwa_version, samtools_version, debug):
if debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
samtools = SAMTOOLS_PATH.get(samtools_version)
assert samtools, "samtools version %s is not supported" % (samtools_version)
bwa = BWA_PATH.get(bwa_version)
assert bwa, "BWA version %s is not supported" % (bwa_version)
logger.info("In postprocess with samtools %s and bwa %s" % (samtools, bwa))
indexed_reads_filenames = []
unmapped_reads_filenames = []
@dxpy.entry_point('main')
def main(input_tags, prefix=None):
input_tags_file = dxpy.DXFile(input_tags)
input_tags_filename = input_tags_file.name
dxpy.download_dxfile(input_tags_file.get_id(), input_tags_filename)
# introspect the file to determine tagAlign (thus SE) or BEDPE (thus PE)
# strip extension as appropriate
subprocess.check_output('ls', shell=True)
with gzip.open(input_tags_filename) as f:
firstline = f.readline()
logger.info('First line of input_tags:\n%s' % (firstline))
se_cols = 6
@dxpy.entry_point('main')
def main(folder_name, key_name, assembly, noupload, force, debug):
#accessions bams contained within the folder named folder_name/bams
#Requires
#. directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam
#. basename contains one or more ENCFF numbers from which the bam is derived
#. bam_filename.flagstat.qc exists
#. raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc
#if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip
#create a fully qualified project:filename for submitted_file_name and calculate the file size
#if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip
#**INFER the experiment accession number from the bam's containing folder
#calculate the md5
@dxpy.entry_point("crop")
def crop(reads1_file, reads2_file, crop_length, debug):
if debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
logger.setLevel(logging.INFO)
if crop_length == 'native':
output = dict(zip(
["cropped_reads1", "cropped_reads2"], [reads1_file, reads2_file]))
else:
reads1_filename = dxpy.describe(reads1_file)['name']
reads1_basename = strip_extensions(reads1_filename, STRIP_EXTENSIONS)
dxpy.download_dxfile(reads1_file, reads1_filename)
if reads2_file:
@dxpy.entry_point('main')
def main(reads1, reads2, crop_length, reference_tar,
bwa_aln_params, bwa_version, samtools_version,
keyfile, debug, key=None):
# reads1 and reads2 are expected to be an arrays of file identifiers
# indentifiers can be DNAnexus files or ENCODE file accession numbers
# For SE, reads2 is empty
# For PE, len(reads1) = len(reads2)
# Multiple PE pairs or SE files are just catted before mapping
# Error on mixed SE/PE - although this can be implemented as just a
# "" entry at that position in reads2 array
# TODO: Add option to down-sample mixed PE/SE to SE
if debug:
logger.setLevel(logging.DEBUG)
else:
@dxpy.entry_point('main')
def main(input_bams):
# Initialize data object inputs on the platform
# into dxpy.DXDataObject instances.
input_bams = [dxpy.DXFile(item) for item in input_bams]
# Download each file input to a new directory in the the local file system
# using variable names for the filenames.
# Construct output filenames.
# Dispatch jobs to a pool of workers.
out_paths = []
pool = Pool() # default is pool of cpu_count() workers
for i, bam in enumerate(input_bams):
@dxpy.entry_point('main')
def main(rep1_peaks, rep2_peaks, pooled_peaks, idr_threshold, rank, interactive):
# Initialize the data object inputs on the platform into
# dxpy.DXDataObject instances.
idr_version = 1
rep1_peaks_file = dxpy.DXFile(rep1_peaks)
rep2_peaks_file = dxpy.DXFile(rep2_peaks)
pooled_peaks_file = dxpy.DXFile(pooled_peaks)
rep1_peaks_filename = rep1_peaks_file.name
rep2_peaks_filename = rep2_peaks_file.name
pooled_peaks_filename = pooled_peaks_file.name
# Download the file inputs to the local file system.