Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"\t\tshortLabel %s\n" %(name[:17]) + \
"\t\tparent %sviewpeaks on\n" %(accession) + \
"\t\ttype %s\n" %(tracktype) + \
"\t\tvisibility dense\n" + \
"\t\tview PK\n" + \
"\t\tpriority %d\n\n" %(n)
n_stanzas = 1
if not lowpass:
lowpass = []
if isinstance(lowpass,int):
lowpass = [lowpass]
extra_stanza_count = 0
for (i, cutoff) in enumerate(lowpass,start=1):
fn = dx.get_id()
if not os.path.isfile(fn):
dxpy.download_dxfile(dx.get_id(),fn)
cutoffstr = '-lt%d' %(cutoff)
outfn = fn + cutoffstr
print fn, os.path.getsize(fn), subprocess.check_output('wc -l %s' %(fn), shell=True).split()[0]
bed_fn = fn + '.bed'
common.block_on('bigBedToBed %s %s' %(fn, bed_fn))
common.run_pipe([
'cat %s' %(bed_fn),
r"""awk 'BEGIN{FS="\t";OFS="\t"}{if (($3-$2) < %d) {print $0}}'""" %(cutoff)], outfn)
print outfn, os.path.getsize(outfn), subprocess.check_output('wc -l %s' %(outfn), shell=True).split()[0]
if tracktype =='bigBed 6 +':
as_file = 'narrowPeak.as'
elif tracktype == 'bigBed 12 +':
as_file = 'gappedPeak.as'
else:
print "Cannot match tracktype %s to any .as file" %(tracktype)
bb_fn = common.bed2bb(outfn,'mm10.chrom.sizes',as_file)
if file_desc['class'] != 'file':
print("Skipping non-file data object {name} ({id})".format(**file_desc), file=sys.stderr)
return
if file_desc['state'] != 'closed':
print("Skipping file {name} ({id}) because it is not closed".format(**file_desc), file=sys.stderr)
return
try:
show_progress = args.show_progress
except AttributeError:
show_progress = False
try:
dxpy.download_dxfile(
file_desc['id'],
dest_filename,
show_progress=show_progress,
project=project,
describe_output=file_desc)
return
except:
err_exit()
pbc_qc_fh = dxpy.find_one_data_object(
classname="file",
name='*.pbc.qc',
name_mode="glob",
project=dxpy.PROJECT_CONTEXT_ID,
folder=bam.folder,
return_handler=True
)
except:
logger.warning("PBC QC file not found ... skipping")
continue
pbc_qc_fh = None
experiment_accession = re.match('\S*(ENC\S{8})',bam.folder).group(1)
logger.info("Downloading %s" %(bam.name))
dxpy.download_dxfile(bam.get_id(),bam.name)
md5_output = subprocess.check_output(' '.join([md5_command, bam.name]), shell=True)
calculated_md5 = md5_output.partition(' ')[0].rstrip()
encode_object = FILE_OBJ_TEMPLATE
encode_object.update({'assembly': assembly})
notes = {
'filtered_qc': flagstat_parse(bamqc_fh),
'qc': flagstat_parse(raw_bamqc_fh),
'dup_qc': dup_parse(dup_qc_fh),
'xcor_qc': xcor_parse(xcor_qc_fh),
'pbc_qc': pbc_parse(pbc_qc_fh),
'dx-id': bam_description.get('id'),
'dx-createdBy': bam_description.get('createdBy')
}
encode_object.update({
name='Pool replicates')
pooled_replicates_xcor_subjob = \
xcor_only(
pool_replicates_subjob.get_output_ref("pooled"),
paired_end,
spp_version=None,
name='Pool cross-correlation')
pooled_replicates_xcor_subjob.wait_on_done()
pool_ta_link = pool_replicates_subjob.describe()['output'].get("pooled")
pool_xcor_link = pooled_replicates_xcor_subjob.describe()['output'].get("CC_scores_file")
pool_ta_file = dxpy.get_handler(pool_ta_link)
pool_xcor_file = dxpy.get_handler(pool_xcor_link)
pool_ta_filename = 'poolta_%s' % (pool_ta_file.name)
pool_xcor_filename = 'poolcc_%s' % (pool_xcor_file.name)
dxpy.download_dxfile(pool_ta_file.get_id(), pool_ta_filename)
dxpy.download_dxfile(pool_xcor_file.get_id(), pool_xcor_filename)
logger.info(subprocess.check_output('set -x; ls -l', shell=True))
Nt = common.count_lines(reps_peaks_filename)
logger.info("%d peaks from true replicates (Nt)" % (Nt))
N1 = common.count_lines(r1pr_peaks_filename)
logger.info("%d peaks from rep1 self-pseudoreplicates (N1)" % (N1))
N2 = common.count_lines(r2pr_peaks_filename)
logger.info("%d peaks from rep2 self-pseudoreplicates (N2)" % (N2))
Np = common.count_lines(pooledpr_peaks_filename)
logger.info("%d peaks from pooled pseudoreplicates (Np)" % (Np))
# generate the conservative set, which is always based on the IDR peaks
# from true replicates
conservative_set_filename = \
rep1_ta_filename = 'r1ta_%s' % (rep1_ta_file.name)
rep2_ta_filename = 'r2ta_%s' % (rep2_ta_file.name)
rep1_xcor_filename = 'r1cc_%s' % (rep1_xcor_file.name)
rep2_xcor_filename = 'r2cc_%s' % (rep2_xcor_file.name)
chrom_sizes_filename = chrom_sizes_file.name
as_file_filename = as_file_file.name
dxpy.download_dxfile(reps_peaks_file.get_id(), reps_peaks_filename)
dxpy.download_dxfile(r1pr_peaks_file.get_id(), r1pr_peaks_filename)
dxpy.download_dxfile(r2pr_peaks_file.get_id(), r2pr_peaks_filename)
dxpy.download_dxfile(pooledpr_peaks_file.get_id(), pooledpr_peaks_filename)
dxpy.download_dxfile(rep1_ta_file.get_id(), rep1_ta_filename)
dxpy.download_dxfile(rep2_ta_file.get_id(), rep2_ta_filename)
dxpy.download_dxfile(rep1_xcor_file.get_id(), rep1_xcor_filename)
dxpy.download_dxfile(rep2_xcor_file.get_id(), rep2_xcor_filename)
dxpy.download_dxfile(chrom_sizes_file.get_id(), chrom_sizes_filename)
dxpy.download_dxfile(as_file_file.get_id(), as_file_filename)
reps_peaks_filename = common.uncompress(reps_peaks_filename)
r1pr_peaks_filename = common.uncompress(r1pr_peaks_filename)
r2pr_peaks_filename = common.uncompress(r2pr_peaks_filename)
pooledpr_peaks_filename = common.uncompress(pooledpr_peaks_filename)
pool_applet = dxpy.find_one_data_object(
classname='applet',
name='pool',
project=dxpy.PROJECT_CONTEXT_ID,
zero_ok=False,
more_ok=False,
return_handler=True)
pool_replicates_subjob = \
pool_applet.run(
if args.truncate:
trackDb = open(args.tdbpath,'w')
else:
trackDb = open(args.tdbpath,'a')
else:
if not os.path.exists(os.path.dirname(args.tdbpath)):
os.makedirs(os.path.dirname(args.tdbpath))
trackDb = open(args.tdbpath, 'w')
else:
trackDb = open(args.tdbpath,'a')
for (output_name, output) in outputs.iteritems():
local_path = os.path.join(track_directory, output['dx'].name)
print output_name, output['dx'].get_id(), local_path
if not args.nodownload:
dxpy.download_dxfile(output['dx'].get_id(), local_path)
outputs[output_name].update({'local_path' : local_path})
#print "Joining %s and %s" %(url_base, os.path.basename(local_path))
if args.dxf:
url, headers = output['dx'].get_download_url(duration=sys.maxint, preauthenticated=True)
outputs[output_name].update({'url': url})
else:
outputs[output_name].update({'url': urlparse.urljoin(url_base,os.path.basename(local_path))})
#print outputs[output_name]['url']
experiment = encoded_get(urlparse.urljoin(server,'/experiments/%s' %(experiment_accession)), keypair)
description = '%s %s' %(
experiment['target']['label'],
experiment['replicates'][0]['library']['biosample']['biosample_term_name'])
longLabel = 'E3 TF ChIP - %s - %s' %(experiment_accession, description)
if args.tag:
longLabel += ' - %s' %(args.tag)
def main(inputs, prefix=None):
input_filenames = []
for input_file in inputs:
dxf = dxpy.DXFile(input_file)
input_filenames.append(dxf.name)
dxpy.download_dxfile(dxf.get_id(), dxf.name)
# uses last extension - presumably they are all the same
extension = splitext(splitext(input_filenames[-1])[0])[1]
if prefix:
pooled_filename = prefix + "_pooled%s.gz" % (extension)
else:
pooled_filename = \
'-'.join([splitext(splitext(fn)[0])[0] for fn in input_filenames]) + "_pooled%s.gz" % (extension)
out, err = common.run_pipe([
'gzip -dc %s' % (' '.join(input_filenames)),
'gzip -cn'],
outfile=pooled_filename)
pooled = dxpy.upload_local_file(pooled_filename)
output = {
overlapping_peaks_fn = '%s.replicated.%s' % (basename, peak_type)
overlapping_peaks_bb_fn = overlapping_peaks_fn + '.bb'
rejected_peaks_fn = '%s.rejected.%s' % (basename, peak_type)
rejected_peaks_bb_fn = rejected_peaks_fn + '.bb'
# Intermediate filenames
overlap_tr_fn = 'replicated_tr.%s' % (peak_type)
overlap_pr_fn = 'replicated_pr.%s' % (peak_type)
# Download file inputs to the local file system with local filenames
dxpy.download_dxfile(rep1_peaks_file.get_id(), rep1_peaks_fn)
dxpy.download_dxfile(rep2_peaks_file.get_id(), rep2_peaks_fn)
dxpy.download_dxfile(pooled_peaks_file.get_id(), pooled_peaks_fn)
dxpy.download_dxfile(rep1_ta_file.get_id(), rep1_ta_fn)
dxpy.download_dxfile(rep1_xcor_file.get_id(), rep1_xcor_fn)
dxpy.download_dxfile(chrom_sizes_file.get_id(), chrom_sizes_fn)
dxpy.download_dxfile(as_file_file.get_id(), as_file_fn)
logger.info(subprocess.check_output('set -x; ls -l', shell=True))
# the only difference between the peak_types is how the extra columns are
# handled
if peak_type == "narrowPeak":
awk_command = r"""awk 'BEGIN{FS="\t";OFS="\t"}{s1=$3-$2; s2=$13-$12; if (($21/s1 >= 0.5) || ($21/s2 >= 0.5)) {print $0}}'"""
cut_command = 'cut -f 1-10'
bed_type = 'bed6+4'
elif peak_type == "gappedPeak":
awk_command = r"""awk 'BEGIN{FS="\t";OFS="\t"}{s1=$3-$2; s2=$18-$17; if (($31/s1 >= 0.5) || ($31/s2 >= 0.5)) {print $0}}'"""
cut_command = 'cut -f 1-15'
bed_type = 'bed12+3'
elif peak_type == "broadPeak":
def process(rep1_quant_file, rep2_quant_file):
# Change the following to process whatever input this stage
# receives. You may also want to copy and paste the logic to download
# and upload files here as well if this stage receives file input
# and/or makes file output.
rep1_quants = dxpy.DXFile(rep1_quant_file)
rep2_quants = dxpy.DXFile(rep2_quant_file)
# The following line(s) download your file inputs to the local file system
# using variable names for the filenames.
dxpy.download_dxfile(rep1_quants.get_id(), "rep1_quants")
dxpy.download_dxfile(rep2_quants.get_id(), "rep2_quants")
logger.debug("Run MAD.R")
mad_output = subprocess.check_output(['Rscript', '/usr/bin/MAD.R', 'rep1_quants', 'rep2_quants'])
logger.debug(mad_output)
logger.debug("Upload Plot")
filename = rep1_quants.name + '_' + rep2_quants.name + '_MAplot.png'
subprocess.check_call(['mv', "MAplot.png", filename])
plot_dxfile = dxpy.upload_local_file(filename)
return {"output": mad_output, "plot": plot_dxfile}
# dxpy.DXDataObject instances.
idr_version = 1
rep1_peaks_file = dxpy.DXFile(rep1_peaks)
rep2_peaks_file = dxpy.DXFile(rep2_peaks)
pooled_peaks_file = dxpy.DXFile(pooled_peaks)
rep1_peaks_filename = rep1_peaks_file.name
rep2_peaks_filename = rep2_peaks_file.name
pooled_peaks_filename = pooled_peaks_file.name
# Download the file inputs to the local file system.
dxpy.download_dxfile(rep1_peaks_file.get_id(), rep1_peaks_filename)
dxpy.download_dxfile(rep2_peaks_file.get_id(), rep2_peaks_filename)
dxpy.download_dxfile(pooled_peaks_file.get_id(), pooled_peaks_filename)
rep1_peaks_filename = uncompress(rep1_peaks_filename)
rep2_peaks_filename = uncompress(rep2_peaks_filename)
pooled_peaks_filename = uncompress(pooled_peaks_filename)
print subprocess.check_output('ls -l', shell=True, stderr=subprocess.STDOUT)
#rep1_vs_rep2_prefix = '%s_vs_%s.IDRv%d' %(os.path.basename(rep1_peaks_filename), os.path.basename(rep2_peaks_filename), idr_version)
rep1_vs_rep2_prefix = '%sv%s.IDRv%d' %(os.path.basename(rep1_peaks_filename)[0:11], os.path.basename(rep2_peaks_filename)[0:11], idr_version)
pooled_common_peaks_IDR_filename, IDR_overlap_narrowpeak_filename = run_idr(
rep1_peaks_filename,
rep2_peaks_filename,
pooled_peaks_filename,