How to use the dxpy.download_dxfile function in dxpy

To help you get started, we’ve selected a few dxpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ENCODE-DCC / chip-seq-pipeline / dnanexus / make_trackhub_from_analysis.py View on Github external
"\t\tshortLabel %s\n" %(name[:17]) + \
        "\t\tparent %sviewpeaks on\n" %(accession) + \
        "\t\ttype %s\n" %(tracktype) + \
        "\t\tvisibility dense\n" + \
        "\t\tview PK\n" + \
        "\t\tpriority %d\n\n" %(n)
    n_stanzas = 1
    if not lowpass:
        lowpass = []
    if isinstance(lowpass,int):
        lowpass = [lowpass]
    extra_stanza_count = 0
    for (i, cutoff) in enumerate(lowpass,start=1):
        fn = dx.get_id()
        if not os.path.isfile(fn):
            dxpy.download_dxfile(dx.get_id(),fn)
        cutoffstr = '-lt%d' %(cutoff)
        outfn = fn + cutoffstr
        print fn, os.path.getsize(fn), subprocess.check_output('wc -l %s' %(fn), shell=True).split()[0]
        bed_fn = fn + '.bed'
        common.block_on('bigBedToBed %s %s' %(fn, bed_fn))
        common.run_pipe([
            'cat %s' %(bed_fn),
            r"""awk 'BEGIN{FS="\t";OFS="\t"}{if (($3-$2) < %d) {print $0}}'""" %(cutoff)], outfn)
        print outfn, os.path.getsize(outfn), subprocess.check_output('wc -l %s' %(outfn), shell=True).split()[0]
        if tracktype =='bigBed 6 +':
            as_file = 'narrowPeak.as'
        elif tracktype == 'bigBed 12 +':
            as_file = 'gappedPeak.as'
        else:
            print "Cannot match tracktype %s to any .as file" %(tracktype)
        bb_fn = common.bed2bb(outfn,'mm10.chrom.sizes',as_file)
github dnanexus / dx-toolkit / src / python / dxpy / cli / download.py View on Github external
if file_desc['class'] != 'file':
        print("Skipping non-file data object {name} ({id})".format(**file_desc), file=sys.stderr)
        return

    if file_desc['state'] != 'closed':
        print("Skipping file {name} ({id}) because it is not closed".format(**file_desc), file=sys.stderr)
        return

    try:
        show_progress = args.show_progress
    except AttributeError:
        show_progress = False

    try:
        dxpy.download_dxfile(
                            file_desc['id'],
                            dest_filename,
                            show_progress=show_progress,
                            project=project,
                            describe_output=file_desc)
        return
    except:
        err_exit()
github ENCODE-DCC / chip-seq-pipeline / dnanexus / accessionator / src / accessionator.py View on Github external
pbc_qc_fh = dxpy.find_one_data_object(
				classname="file",
				name='*.pbc.qc',
				name_mode="glob",
				project=dxpy.PROJECT_CONTEXT_ID,
				folder=bam.folder,
				return_handler=True
			)
		except:
			logger.warning("PBC QC file not found ... skipping")
			continue
			pbc_qc_fh = None

		experiment_accession = re.match('\S*(ENC\S{8})',bam.folder).group(1)
		logger.info("Downloading %s" %(bam.name))
		dxpy.download_dxfile(bam.get_id(),bam.name)
		md5_output = subprocess.check_output(' '.join([md5_command, bam.name]), shell=True)
		calculated_md5 = md5_output.partition(' ')[0].rstrip()
		encode_object = FILE_OBJ_TEMPLATE
		encode_object.update({'assembly': assembly})


		notes = {
			'filtered_qc': flagstat_parse(bamqc_fh),
			'qc': flagstat_parse(raw_bamqc_fh),
			'dup_qc': dup_parse(dup_qc_fh),
			'xcor_qc': xcor_parse(xcor_qc_fh),
			'pbc_qc': pbc_parse(pbc_qc_fh),
			'dx-id': bam_description.get('id'),
			'dx-createdBy': bam_description.get('createdBy')
		}
		encode_object.update({
github ENCODE-DCC / chip-seq-pipeline / dnanexus / encode_idr / src / encode_idr.py View on Github external
name='Pool replicates')
    pooled_replicates_xcor_subjob = \
        xcor_only(
            pool_replicates_subjob.get_output_ref("pooled"),
            paired_end,
            spp_version=None,
            name='Pool cross-correlation')
    pooled_replicates_xcor_subjob.wait_on_done()

    pool_ta_link = pool_replicates_subjob.describe()['output'].get("pooled")
    pool_xcor_link = pooled_replicates_xcor_subjob.describe()['output'].get("CC_scores_file")
    pool_ta_file = dxpy.get_handler(pool_ta_link)
    pool_xcor_file = dxpy.get_handler(pool_xcor_link)
    pool_ta_filename = 'poolta_%s' % (pool_ta_file.name)
    pool_xcor_filename = 'poolcc_%s' % (pool_xcor_file.name)
    dxpy.download_dxfile(pool_ta_file.get_id(), pool_ta_filename)
    dxpy.download_dxfile(pool_xcor_file.get_id(), pool_xcor_filename)

    logger.info(subprocess.check_output('set -x; ls -l', shell=True))

    Nt = common.count_lines(reps_peaks_filename)
    logger.info("%d peaks from true replicates (Nt)" % (Nt))
    N1 = common.count_lines(r1pr_peaks_filename)
    logger.info("%d peaks from rep1 self-pseudoreplicates (N1)" % (N1))
    N2 = common.count_lines(r2pr_peaks_filename)
    logger.info("%d peaks from rep2 self-pseudoreplicates (N2)" % (N2))
    Np = common.count_lines(pooledpr_peaks_filename)
    logger.info("%d peaks from pooled pseudoreplicates (Np)" % (Np))

    # generate the conservative set, which is always based on the IDR peaks
    # from true replicates
    conservative_set_filename = \
github ENCODE-DCC / chip-seq-pipeline / dnanexus / encode_idr / src / encode_idr.py View on Github external
rep1_ta_filename = 'r1ta_%s' % (rep1_ta_file.name)
    rep2_ta_filename = 'r2ta_%s' % (rep2_ta_file.name)
    rep1_xcor_filename = 'r1cc_%s' % (rep1_xcor_file.name)
    rep2_xcor_filename = 'r2cc_%s' % (rep2_xcor_file.name)
    chrom_sizes_filename = chrom_sizes_file.name
    as_file_filename = as_file_file.name

    dxpy.download_dxfile(reps_peaks_file.get_id(), reps_peaks_filename)
    dxpy.download_dxfile(r1pr_peaks_file.get_id(), r1pr_peaks_filename)
    dxpy.download_dxfile(r2pr_peaks_file.get_id(), r2pr_peaks_filename)
    dxpy.download_dxfile(pooledpr_peaks_file.get_id(), pooledpr_peaks_filename)
    dxpy.download_dxfile(rep1_ta_file.get_id(), rep1_ta_filename)
    dxpy.download_dxfile(rep2_ta_file.get_id(), rep2_ta_filename)
    dxpy.download_dxfile(rep1_xcor_file.get_id(), rep1_xcor_filename)
    dxpy.download_dxfile(rep2_xcor_file.get_id(), rep2_xcor_filename)
    dxpy.download_dxfile(chrom_sizes_file.get_id(), chrom_sizes_filename)
    dxpy.download_dxfile(as_file_file.get_id(), as_file_filename)

    reps_peaks_filename = common.uncompress(reps_peaks_filename)
    r1pr_peaks_filename = common.uncompress(r1pr_peaks_filename)
    r2pr_peaks_filename = common.uncompress(r2pr_peaks_filename)
    pooledpr_peaks_filename = common.uncompress(pooledpr_peaks_filename)

    pool_applet = dxpy.find_one_data_object(
            classname='applet',
            name='pool',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    pool_replicates_subjob = \
        pool_applet.run(
github ENCODE-DCC / chip-seq-pipeline / dnanexus / make_trackhub_from_analysis.py View on Github external
if args.truncate:
                trackDb = open(args.tdbpath,'w')
            else:
                trackDb = open(args.tdbpath,'a')
        else:
            if not os.path.exists(os.path.dirname(args.tdbpath)):
                os.makedirs(os.path.dirname(args.tdbpath))
            trackDb = open(args.tdbpath, 'w')
    else:
        trackDb = open(args.tdbpath,'a')            

    for (output_name, output) in outputs.iteritems():
        local_path = os.path.join(track_directory, output['dx'].name)
        print output_name, output['dx'].get_id(), local_path
        if not args.nodownload:
            dxpy.download_dxfile(output['dx'].get_id(), local_path)
        outputs[output_name].update({'local_path' : local_path})
        #print "Joining %s and %s" %(url_base, os.path.basename(local_path))
        if args.dxf:
            url, headers = output['dx'].get_download_url(duration=sys.maxint, preauthenticated=True)
            outputs[output_name].update({'url': url})
        else:
            outputs[output_name].update({'url': urlparse.urljoin(url_base,os.path.basename(local_path))})
        #print outputs[output_name]['url']

    experiment = encoded_get(urlparse.urljoin(server,'/experiments/%s' %(experiment_accession)), keypair)
    description = '%s %s' %(
        experiment['target']['label'],
        experiment['replicates'][0]['library']['biosample']['biosample_term_name'])
    longLabel = 'E3 TF ChIP - %s - %s' %(experiment_accession, description)
    if args.tag:
        longLabel += ' - %s' %(args.tag)
github ENCODE-DCC / chip-seq-pipeline / dnanexus / pool / src / pool.py View on Github external
def main(inputs, prefix=None):

    input_filenames = []
    for input_file in inputs:
        dxf = dxpy.DXFile(input_file)
        input_filenames.append(dxf.name)
        dxpy.download_dxfile(dxf.get_id(), dxf.name)

    # uses last extension - presumably they are all the same
    extension = splitext(splitext(input_filenames[-1])[0])[1]
    if prefix:
        pooled_filename = prefix + "_pooled%s.gz" % (extension)
    else:
        pooled_filename = \
            '-'.join([splitext(splitext(fn)[0])[0] for fn in input_filenames]) + "_pooled%s.gz" % (extension)
    out, err = common.run_pipe([
        'gzip -dc %s' % (' '.join(input_filenames)),
        'gzip -cn'],
        outfile=pooled_filename)

    pooled = dxpy.upload_local_file(pooled_filename)

    output = {
github ENCODE-DCC / chip-seq-pipeline / dnanexus / overlap_peaks / src / overlap_peaks.py View on Github external
overlapping_peaks_fn    = '%s.replicated.%s' % (basename, peak_type)
    overlapping_peaks_bb_fn = overlapping_peaks_fn + '.bb'
    rejected_peaks_fn       = '%s.rejected.%s' % (basename, peak_type)
    rejected_peaks_bb_fn    = rejected_peaks_fn + '.bb'

    # Intermediate filenames
    overlap_tr_fn = 'replicated_tr.%s' % (peak_type)
    overlap_pr_fn = 'replicated_pr.%s' % (peak_type)

    # Download file inputs to the local file system with local filenames

    dxpy.download_dxfile(rep1_peaks_file.get_id(), rep1_peaks_fn)
    dxpy.download_dxfile(rep2_peaks_file.get_id(), rep2_peaks_fn)
    dxpy.download_dxfile(pooled_peaks_file.get_id(), pooled_peaks_fn)
    dxpy.download_dxfile(rep1_ta_file.get_id(), rep1_ta_fn)
    dxpy.download_dxfile(rep1_xcor_file.get_id(), rep1_xcor_fn)
    dxpy.download_dxfile(chrom_sizes_file.get_id(), chrom_sizes_fn)
    dxpy.download_dxfile(as_file_file.get_id(), as_file_fn)

    logger.info(subprocess.check_output('set -x; ls -l', shell=True))

    # the only difference between the peak_types is how the extra columns are
    # handled
    if peak_type == "narrowPeak":
        awk_command = r"""awk 'BEGIN{FS="\t";OFS="\t"}{s1=$3-$2; s2=$13-$12; if (($21/s1 >= 0.5) || ($21/s2 >= 0.5)) {print $0}}'"""
        cut_command = 'cut -f 1-10'
        bed_type = 'bed6+4'
    elif peak_type == "gappedPeak":
        awk_command = r"""awk 'BEGIN{FS="\t";OFS="\t"}{s1=$3-$2; s2=$18-$17; if (($31/s1 >= 0.5) || ($31/s2 >= 0.5)) {print $0}}'"""
        cut_command = 'cut -f 1-15'
        bed_type = 'bed12+3'
    elif peak_type == "broadPeak":
github ENCODE-DCC / long-rna-seq-pipeline / dnanexus / lrna-qc / src / lrna-qc.py View on Github external
def process(rep1_quant_file, rep2_quant_file):
    # Change the following to process whatever input this stage
    # receives.  You may also want to copy and paste the logic to download
    # and upload files here as well if this stage receives file input
    # and/or makes file output.

    rep1_quants = dxpy.DXFile(rep1_quant_file)
    rep2_quants = dxpy.DXFile(rep2_quant_file)

    # The following line(s) download your file inputs to the local file system
    # using variable names for the filenames.

    dxpy.download_dxfile(rep1_quants.get_id(), "rep1_quants")

    dxpy.download_dxfile(rep2_quants.get_id(), "rep2_quants")
    logger.debug("Run MAD.R")
    mad_output = subprocess.check_output(['Rscript', '/usr/bin/MAD.R', 'rep1_quants', 'rep2_quants'])

    logger.debug(mad_output)
    logger.debug("Upload Plot")
    filename = rep1_quants.name + '_' + rep2_quants.name + '_MAplot.png'
    subprocess.check_call(['mv', "MAplot.png", filename])

    plot_dxfile = dxpy.upload_local_file(filename)

    return {"output": mad_output, "plot": plot_dxfile}
github ENCODE-DCC / chip-seq-pipeline / dnanexus / idr / src / idr.py View on Github external
# dxpy.DXDataObject instances.

    idr_version = 1

    rep1_peaks_file = dxpy.DXFile(rep1_peaks)
    rep2_peaks_file = dxpy.DXFile(rep2_peaks)
    pooled_peaks_file = dxpy.DXFile(pooled_peaks)

    rep1_peaks_filename = rep1_peaks_file.name
    rep2_peaks_filename = rep2_peaks_file.name
    pooled_peaks_filename = pooled_peaks_file.name

    # Download the file inputs to the local file system.

    dxpy.download_dxfile(rep1_peaks_file.get_id(), rep1_peaks_filename)
    dxpy.download_dxfile(rep2_peaks_file.get_id(), rep2_peaks_filename)
    dxpy.download_dxfile(pooled_peaks_file.get_id(), pooled_peaks_filename)    


    rep1_peaks_filename = uncompress(rep1_peaks_filename)
    rep2_peaks_filename = uncompress(rep2_peaks_filename)
    pooled_peaks_filename = uncompress(pooled_peaks_filename)

    print subprocess.check_output('ls -l', shell=True, stderr=subprocess.STDOUT)

    #rep1_vs_rep2_prefix = '%s_vs_%s.IDRv%d' %(os.path.basename(rep1_peaks_filename), os.path.basename(rep2_peaks_filename), idr_version)
    rep1_vs_rep2_prefix = '%sv%s.IDRv%d' %(os.path.basename(rep1_peaks_filename)[0:11], os.path.basename(rep2_peaks_filename)[0:11], idr_version)

    pooled_common_peaks_IDR_filename, IDR_overlap_narrowpeak_filename = run_idr(
        rep1_peaks_filename,
        rep2_peaks_filename,
        pooled_peaks_filename,