How to use the oletools.ooxml.BadOOXML function in oletools

To help you get started, we’ve selected a few oletools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github decalage2 / oletools / oletools / ooxml.py View on Github external
elif isstr(args):
                    subfiles = [args, ]
                else:
                    # make a copy in case original args are modified
                    # Not sure whether this really is needed...
                    subfiles = tuple(arg for arg in args)

                for subfile in subfiles:
                    with zipper.open(subfile, 'r') as handle:
                        yield subfile, handle
                if not args:
                    self.did_iter_all = True
            except KeyError as orig_err:
                # Note: do not change text of this message without adjusting
                #       conditions in except handlers
                raise BadOOXML(self.filename,
                               'invalid subfile: ' + str(orig_err))
            except BadZipfile:
                raise BadOOXML(self.filename, 'not in zip format')
            finally:
                if zipper:
                    zipper.close()
github decalage2 / oletools / oletools / ooxml.py View on Github external
return DOCTYPE_NONE

    is_doc = False
    is_xls = False
    is_ppt = False
    try:
        for _, elem, _ in parser.iter_xml(FILE_CONTENT_TYPES):
            logger.debug(u'  ' + debug_str(elem))
            try:
                content_type = elem.attrib['ContentType']
            except KeyError:         # ContentType not an attr
                continue
            is_xls |= content_type.startswith(CONTENT_TYPES_EXCEL)
            is_doc |= content_type.startswith(CONTENT_TYPES_WORD)
            is_ppt |= content_type.startswith(CONTENT_TYPES_PPT)
    except BadOOXML as oo_err:
        if oo_err.more_info.startswith('invalid subfile') and \
                FILE_CONTENT_TYPES in oo_err.more_info:
            # no FILE_CONTENT_TYPES in zip, so probably no ms office xml.
            return DOCTYPE_NONE
        raise

    if is_doc and not is_xls and not is_ppt:
        return DOCTYPE_WORD
    if not is_doc and is_xls and not is_ppt:
        return DOCTYPE_EXCEL
    if not is_doc and not is_xls and is_ppt:
        return DOCTYPE_POWERPOINT
    if not is_doc and not is_xls and not is_ppt:
        return DOCTYPE_NONE
    logger.warning('Encountered contradictory content types')
    return DOCTYPE_MIXED
github decalage2 / oletools / oletools / ooxml.py View on Github external
def iter_files(self, args=None):
        """ Find files in zip or just give single xml file """
        if self.is_single_xml():
            if args:
                raise BadOOXML(self.filename, 'xml has no subfiles')
            with open(self.filename, 'rb') as handle:
                yield None, handle   # the subfile=None is needed in iter_xml
            self.did_iter_all = True
        else:
            zipper = None
            subfiles = None
            try:
                zipper = ZipFile(self.filename)
                if not args:
                    subfiles = zipper.namelist()
                elif isstr(args):
                    subfiles = [args, ]
                else:
                    # make a copy in case original args are modified
                    # Not sure whether this really is needed...
                    subfiles = tuple(arg for arg in args)
github decalage2 / oletools / oletools / msodde.py View on Github external
# have a TAG_W_P
        for curr_elem in subs:
            # check if w:r; parse children to pull out first FLDCHAR/INSTRTEXT
            elem = None
            if curr_elem.tag in TAG_W_R:
                for child in curr_elem:
                    if child.tag in TAG_W_FLDCHAR or \
                            child.tag in TAG_W_INSTRTEXT:
                        elem = child
                        break
                if elem is None:
                    continue   # no fldchar or instrtext in this w:r
            else:
                elem = curr_elem
            if elem is None:
                raise ooxml.BadOOXML(filepath,
                                     'Got "None"-Element from iter_xml')

            # check if FLDCHARTYPE and whether "begin" or "end" tag
            attrib_type = elem.attrib.get(ATTR_W_FLDCHARTYPE[0]) or \
                          elem.attrib.get(ATTR_W_FLDCHARTYPE[1])
            if attrib_type is not None:
                if attrib_type == "begin":
                    level += 1
                if attrib_type == "end":
                    level -= 1
                    if level in (0, -1):  # edge-case; level gets -1
                        all_fields.append(ddetext)
                        ddetext = u''
                        level = 0  # reset edge-case

            # concatenate the text of the field, if present:
github decalage2 / oletools / oletools / ooxml.py View on Github external
"""
        if self._is_single_xml is not None:
            return self._is_single_xml

        if is_zipfile(self.filename):
            self._is_single_xml = False
            return False

        # find prog id in xml prolog
        match = None
        with open(self.filename, 'r') as handle:
            match = re.search(OFFICE_XML_PROGID_REGEX, handle.read(1024))
        if match:
            self._is_single_xml = True
            return True
        raise BadOOXML(self.filename, 'is no zip and has no prog_id')
github decalage2 / oletools / oletools / ooxml.py View on Github external
def __init__(self, filename, more_info=None):
        """ create exception, remember filename and more_info """
        super(BadOOXML, self).__init__(
            '{0} is not an Office XML file{1}'
            .format(filename, ': ' + more_info if more_info else ''))
        self.filename = filename
        self.more_info = more_info