How to use the gc3pie.gc3libs.application.codeml.CodemlApplication function in gc3pie

To help you get started, we’ve selected a few gc3pie examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
return filename
            else:
                return os.path.join(dirname, filename)
        result = {}
        ctl = open(ctl_path, 'r')
        for line in ctl.readlines():
            # remove comments (from '*' to end-of line)
            line = line.split('*')[0]
            # remove leading and trailing whitespace
            line = line.strip()
            # ignore empty lines
            if len(line) == 0:
                continue
            key, value = CodemlApplication._assignment_re.split(
                line, maxsplit=1)
            if key not in CodemlApplication._aux_file_keys:
                continue
            elif key in ['seqfile', 'treefile']:
                result[key] = abspath(value)
            elif key == 'outfile':
                result[key] = value
            # shortcut: if we already have all files, there's no need
            # for scanning the file any more.
            if len(result) == len(CodemlApplication._aux_file_keys):
                ctl.close()
                return result
        # if we get to this point, the ``seqfile = ...`` and
        # ``treefile = ...`` lines were not found; signal this to the
        # caller by raising an exception
        ctl.close()
        raise RuntimeError(
            "Could not extract path to seqfile and/or treefile from '%s'" %
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
# set result dir: where the expected output files
        # will be copied as part of 'terminated'
        if 'result_dir' in extra_args:
            self.result_dir = extra_args['result_dir']

        # output file paths are read from the '.ctl' file below
        outputs = []
        # for each '.ctl' file, extract the referenced "seqfile" and
        # "treefile" and add them to the input list
        for ctl in ctls:
            try:
                # try getting the seqfile/treefile path before we
                # append the '.ctl' file to inputs; if they cannot be
                # found, we do not append the '.ctl' either...
                for (key, path) in CodemlApplication.aux_files(ctl).items():
                    if key in ['seqfile', 'treefile'] and path not in inputs:
                        inputs[path] = os.path.basename(path)

                    if key == 'seqfile':
                        # Parse phy files and fill `aln_info` attribute
                        try:
                            fd = open(path)
                            aln_infos = fd.readline().strip().split()
                            self.aln_info = {
                                'n_seq': int(aln_infos[0]),
                                'aln_len': int(aln_infos[1]),
                            }
                            fd.close()
                        except Exception as ex:
                            gc3libs.log.warning(
                                "Unable to parse `n_seq` and `aln_len` values"
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
def abspath(filename):
            if os.path.isabs(filename):
                return filename
            else:
                return os.path.join(dirname, filename)
        result = {}
        ctl = open(ctl_path, 'r')
        for line in ctl.readlines():
            # remove comments (from '*' to end-of line)
            line = line.split('*')[0]
            # remove leading and trailing whitespace
            line = line.strip()
            # ignore empty lines
            if len(line) == 0:
                continue
            key, value = CodemlApplication._assignment_re.split(
                line, maxsplit=1)
            if key not in CodemlApplication._aux_file_keys:
                continue
            elif key in ['seqfile', 'treefile']:
                result[key] = abspath(value)
            elif key == 'outfile':
                result[key] = value
            # shortcut: if we already have all files, there's no need
            # for scanning the file any more.
            if len(result) == len(CodemlApplication._aux_file_keys):
                ctl.close()
                return result
        # if we get to this point, the ``seqfile = ...`` and
        # ``treefile = ...`` lines were not found; signal this to the
        # caller by raising an exception
        ctl.close()
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
# if output files parsed OK, then override the exit code and
        # mark the job as successful
        if failed == 0:
            self.execution.returncode = (0, 0)

        # set object attributes based on tag lines in the output
        stdout_path = os.path.join(download_dir, self.stdout)
        if os.path.exists(stdout_path):
            stdout_file = open(stdout_path, 'r')
            for line in stdout_file:
                line = line.strip()
                if line.startswith('HOST:'):
                    tag, self.hostname = CodemlApplication._KEY_VALUE_SEP.split(
                        line, maxsplit=1)
                elif line.startswith('CPU:'):
                    tag, self.cpuinfo = CodemlApplication._KEY_VALUE_SEP.split(
                        line, maxsplit=1)
                    break
            stdout_file.close()

        # set exit code and informational message
        rc = 0
        for n in range(len(self.exists)):
            if not self.exists[n]:
                rc |= 1
            rc *= 2
            if not self.valid[n]:
                rc |= 1
            rc *= 2
        self.execution.exitcode = rc

        # all done
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
os.path.join(
                download_dir,
                filename) for filename in fnmatch.filter(
                os.listdir(download_dir),
                '*.mlc')]
        if len(outputs) == 0:
            # no output retrieved, did ``codeml`` run at all?
            self.execution.exitcode = 127
            return

        # if output files were *not* uploaded to a remote server,
        # then check if they are OK and set exit code based on this
        if self.output_base_url is None:
            failed = 0
            for output_path in outputs:
                match = CodemlApplication._H_WHICH_RE.search(output_path)
                if match:
                    n = int(match.group('n'))
                else:
                    gc3libs.log.debug(
                        "Output file '%s' does not match pattern 'H*.mlc' -- ignoring.")
                    continue  # with next output_path
                duration = CodemlApplication.parse_output_file(output_path)
                if duration == 'no file':
                    self.exists[n] = False
                    self.valid[n] = False
                    failed += 1
                elif duration == 'invalid':
                    self.exists[n] = True
                    self.valid[n] = False
                    failed += 1
                else:
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
def parse_output_file(path):
        if not os.path.exists(path):
            return 'no file'
        output_file = open(path, 'r')
        time_used_found = False
        for line in output_file:
            match = CodemlApplication._TIME_USED_RE.match(line)
            if match:
                if match.group('hours') is not None:
                    hours = int(match.group('hours'))
                else:
                    hours = 0
                minutes = int(match.group('minutes'))
                seconds = int(match.group('seconds'))
                time_used_found = True
                break
        if time_used_found:
            return (hours * 3600 + minutes * 60 + seconds)
        else:
            return 'invalid'
github gc3pie / gc3pie / gc3pie / gc3libs / application / codeml.py View on Github external
self.execution.exitcode = 127
            return

        # if output files were *not* uploaded to a remote server,
        # then check if they are OK and set exit code based on this
        if self.output_base_url is None:
            failed = 0
            for output_path in outputs:
                match = CodemlApplication._H_WHICH_RE.search(output_path)
                if match:
                    n = int(match.group('n'))
                else:
                    gc3libs.log.debug(
                        "Output file '%s' does not match pattern 'H*.mlc' -- ignoring.")
                    continue  # with next output_path
                duration = CodemlApplication.parse_output_file(output_path)
                if duration == 'no file':
                    self.exists[n] = False
                    self.valid[n] = False
                    failed += 1
                elif duration == 'invalid':
                    self.exists[n] = True
                    self.valid[n] = False
                    failed += 1
                else:
                    self.exists[n] = True
                    self.valid[n] = True
                    self.time_used[n] = duration

        # if output files parsed OK, then move them to 'result_dir'
        if self.result_dir:
            import shutil