How to use the toil.fileStore.FileID.forPath function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / augustus_cgp.py View on Github external
:param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
            if args.cgp_param is not None:
                input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
            else:
                input_file_ids.cgp_param = None
                input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
            input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
            input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
                                    for genome, fasta in args.fasta_files.iteritems()}
            du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
            results, stdout_file_ids, param_file_id = t.start(job)
        else:
            results, stdout_file_ids, param_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.stdout_file)
        with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
            for (chrom, start, chunksize), stdout_file in stdout_file_ids.iteritems():
                outf.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
                t.exportFile(stdout_file, 'file://' + tmp)
                for l in open(tmp):
                    outf.write(l)
        for genome, (raw_gtf_file_id, joined_gtf_file_id, joined_gp_file_id) in results.iteritems():
            tools.fileOps.ensure_file_dir(args.augustus_cgp_raw_gtf[genome])
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / tools / toilInterface.py View on Github external
def write_fasta_to_filestore(toil, fasta_local_path):
    """
    Convenience function that loads a fasta and its associated gdx/flat file into the fileStore.
    Assumes that the paths are consistent with the requirements (i.e. $path.gdx and $path.flat)
    :param toil: Toil context manager
    :param fasta_local_path: Path to local fasta to load.
    :return: List of fileStore IDs for fasta, fasta_gdx, fasta_flat
    """
    fasta_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path), fasta_local_path)
    gdx_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.gdx'), fasta_local_path + '.gdx')
    flat_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.flat'), fasta_local_path + '.flat')
    return fasta_file_id, gdx_file_id, flat_file_id
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / hints_db.py View on Github external
def validate_import_bam(t, bam_path, fasta_sequences, genome):
        validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
        return [FileID.forPath(t.importFile('file://' + bam_path), bam_path),
                FileID.forPath(t.importFile('file://' + bam_path + '.bai'), bam_path + '.bai')]
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / augustus_cgp.py View on Github external
def augustus_cgp(args, toil_options):
    """
    Main entry function for AugustusCGP toil pipeline
    :param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
            if args.cgp_param is not None:
                input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
            else:
                input_file_ids.cgp_param = None
                input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
            input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
            input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
                                    for genome, fasta in args.fasta_files.iteritems()}
            du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
            results, stdout_file_ids, param_file_id = t.start(job)
        else:
            results, stdout_file_ids, param_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.stdout_file)
        with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / tools / toilInterface.py View on Github external
def write_fasta_to_filestore(toil, fasta_local_path):
    """
    Convenience function that loads a fasta and its associated gdx/flat file into the fileStore.
    Assumes that the paths are consistent with the requirements (i.e. $path.gdx and $path.flat)
    :param toil: Toil context manager
    :param fasta_local_path: Path to local fasta to load.
    :return: List of fileStore IDs for fasta, fasta_gdx, fasta_flat
    """
    fasta_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path), fasta_local_path)
    gdx_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.gdx'), fasta_local_path + '.gdx')
    flat_file_id = FileID.forPath(toil.importFile('file:///' + fasta_local_path + '.flat'), fasta_local_path + '.flat')
    return fasta_file_id, gdx_file_id, flat_file_id
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / chaining.py View on Github external
def chaining(args, toil_options):
    """entry point to this program"""
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.query_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.query_two_bit = FileID.forPath(t.importFile('file://' + args.query_two_bit),
                                                          args.query_two_bit)
            target_two_bit_file_ids = {genome: FileID.forPath(t.importFile('file://' + f), f)
                                       for genome, f in args.target_two_bits.iteritems()}
            input_file_ids.target_two_bits = target_two_bit_file_ids
            job = Job.wrapJobFn(setup, args, input_file_ids)
            chain_file_ids = t.start(job)
        else:
            chain_file_ids = t.restart()
        for chain_file, chain_file_id in chain_file_ids.iteritems():
            tools.fileOps.ensure_file_dir(chain_file)
            t.exportFile(chain_file_id, 'file://' + chain_file)
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / hints_db.py View on Github external
def validate_import_bam(t, bam_path, fasta_sequences, genome):
        validate_bam_fasta_pairs(bam_path, fasta_sequences, genome)
        return [FileID.forPath(t.importFile('file://' + bam_path), bam_path),
                FileID.forPath(t.importFile('file://' + bam_path + '.bai'), bam_path + '.bai')]
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / augustus_cgp.py View on Github external
:param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.hal = FileID.forPath(t.importFile('file://' + args.hal), args.hal)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.query_sizes), args.query_sizes)
            input_file_ids.hints_db = FileID.forPath(t.importFile('file://' + args.hints_db), args.hints_db)
            if args.cgp_param is not None:
                input_file_ids.cgp_param = FileID.forPath(t.importFile('file://' + args.cgp_param), args.cgp_param)
            else:
                input_file_ids.cgp_param = None
                input_file_ids.gtf = FileID.forPath(t.importFile('file://' + args.gtf), args.gtf)
            input_file_ids.cgp_cfg = FileID.forPath(t.importFile('file://' + args.cgp_cfg), args.cgp_cfg)
            input_file_ids.fasta = {genome: FileID.forPath(t.importFile('file://' + fasta), fasta)
                                    for genome, fasta in args.fasta_files.iteritems()}
            du = tools.toilInterface.find_total_disk_usage([input_file_ids.hints_db], buffer='4G')
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='8G', disk=du)
            results, stdout_file_ids, param_file_id = t.start(job)
        else:
            results, stdout_file_ids, param_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.stdout_file)
        with open(args.stdout_file, 'w') as outf, tools.fileOps.TemporaryFilePath() as tmp:
            for (chrom, start, chunksize), stdout_file in stdout_file_ids.iteritems():
                outf.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
                t.exportFile(stdout_file, 'file://' + tmp)
                for l in open(tmp):
                    outf.write(l)
        for genome, (raw_gtf_file_id, joined_gtf_file_id, joined_gp_file_id) in results.iteritems():
            tools.fileOps.ensure_file_dir(args.augustus_cgp_raw_gtf[genome])
            t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_cgp_raw_gtf[genome])
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / hints_db.py View on Github external
# load the IsoSeq data, if we have any
            iso_seq_file_ids = []
            if hints_args.genome in hints_args.cfg['ISO_SEQ_BAM']:
                for bam_path in hints_args.cfg['ISO_SEQ_BAM'][hints_args.genome]:
                    validate_bam_fasta_pairs(bam_path, fasta_sequences, hints_args.genome)
                    iso_seq_file_ids.append(validate_import_bam(t, bam_path, fasta_sequences, hints_args.genome))

            if hints_args.annotation is None:
                annotation_file_id = None
            else:
                annotation_file_id = FileID.forPath(t.importFile('file://' + hints_args.annotation),
                                                    hints_args.annotation)
            if hints_args.protein_fasta is None:
                protein_fasta_file_id = genome_fasta_file_id = None
            else:
                protein_fasta_file_id = FileID.forPath(t.importFile('file://' + hints_args.protein_fasta),
                                                       hints_args.protein_fasta)
                genome_fasta_file_id = FileID.forPath(t.importFile('file://' + hints_args.fasta), hints_args.fasta)

            input_file_ids = {'bams': bam_file_ids,
                              'iso_seq_bams': iso_seq_file_ids,
                              'annotation': annotation_file_id,
                              'protein_fasta': protein_fasta_file_id,
                              'genome_fasta': genome_fasta_file_id}
            if len(input_file_ids['bams']) + len(input_file_ids['iso_seq_bams']) > 0:
                logger.info('All BAMs validated for {}. Beginning Toil hints pipeline'.format(hints_args.genome))

            disk_usage = tools.toilInterface.find_total_disk_usage(input_file_ids)
            job = Job.wrapJobFn(setup_hints, input_file_ids, disk=disk_usage)
            combined_hints = t.start(job)
        else:
            logger.info('Restarting Toil hints pipeline for {}.'.format(hints_args.genome))
github ComparativeGenomicsToolkit / Comparative-Annotation-Toolkit / cat / augustus_pb.py View on Github external
def augustus_pb(args, toil_options):
    """
    Main entry function for AugustusPB toil pipeline
    :param args: dictionary of arguments from CAT
    :param toil_options: toil options Namespace object
    :return:
    """
    with Toil(toil_options) as t:
        if not t.options.restart:
            input_file_ids = argparse.Namespace()
            input_file_ids.genome_fasta = tools.toilInterface.write_fasta_to_filestore(t, args.genome_fasta)
            input_file_ids.chrom_sizes = FileID.forPath(t.importFile('file://' + args.chrom_sizes), args.chrom_sizes)
            input_file_ids.pb_cfg = FileID.forPath(t.importFile('file://' + args.pb_cfg), args.pb_cfg)
            input_file_ids.hints_gff = FileID.forPath(t.importFile('file://' + args.hints_gff), args.hints_gff)
            job = Job.wrapJobFn(setup, args, input_file_ids, memory='16G', disk='32G')
            raw_gtf_file_id, gtf_file_id, joined_gp_file_id = t.start(job)
        else:
            raw_gtf_file_id, gtf_file_id, joined_gp_file_id = t.restart()
        tools.fileOps.ensure_file_dir(args.augustus_pb_raw_gtf)
        t.exportFile(raw_gtf_file_id, 'file://' + args.augustus_pb_raw_gtf)
        t.exportFile(gtf_file_id, 'file://' + args.augustus_pb_gtf)
        t.exportFile(joined_gp_file_id, 'file://' + args.augustus_pb_gp)