How to use the extruct.rdflibxml.extras.httpheader.ParseError function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
All \-escaped quoted pairs will have been replaced with the actual
    characters they represent, even within the inner nested comments.

    You should note that only a few HTTP headers, such as User-Agent
    or Via, allow ()-style comments within the header value.

    A comment is defined by RFC 2616 section 2.2 as:
    
       comment = "(" *( ctext | quoted-pair | comment ) ")"
       ctext   = 
    """
    if start >= len(s):
        raise ParseError('Starting position is beyond the end of the string',s,start)
    if s[start] != '(':
        raise ParseError('Comment must begin with opening parenthesis',s,start)

    s2 = ''
    nestlevel = 1
    pos = start + 1
    while pos < len(s) and s[pos] in LWS:
        pos += 1

    while pos < len(s):
        c = s[pos]
        if c == '\\':
            # Note this is not C-style escaping; the character after the \ is
            # taken literally.
            pos += 1
            if pos == len(s):
                raise ParseError("End of string while expecting a character after '\\'",s,pos)
            s2 += s[pos]
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
pos += k
        while pos < len(s) and s[pos] in LWS:
            pos += 1
        if pos >= len(s) or s[pos] in ',;':
            itemparms, qvalue, acptparms = [], None, []
            if pos < len(s) and s[pos] == ';':
                pos += 1
                while pos < len(s) and s[pos] in LWS:
                    pos += 1
                parmlist, k = parse_parameter_list(s, pos)
                for p, v in parmlist:
                    if p == 'q' and qvalue is None:
                        try:
                            qvalue = float(v)
                        except ValueError:
                            raise ParseError('qvalue must be a floating point number',s,pos)
                        if qvalue < 0 or qvalue > 1:
                            raise ParseError('qvalue must be between 0 and 1, inclusive',s,pos)
                    elif qvalue is None:
                        itemparms.append( (p,v) )
                    else:
                        acptparms.append( (p,v) )
                pos += k
            if item:
                # Add the item to the list
                if qvalue is None:
                    qvalue = 1
                itemlist.append( (item, itemparms, qvalue, acptparms) )
                item = None
            # skip commas
            while pos < len(s) and s[pos] == ',':
                pos += 1
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
pos += 1
        if pos >= len(s) or s[pos] in ',;':
            itemparms, qvalue, acptparms = [], None, []
            if pos < len(s) and s[pos] == ';':
                pos += 1
                while pos < len(s) and s[pos] in LWS:
                    pos += 1
                parmlist, k = parse_parameter_list(s, pos)
                for p, v in parmlist:
                    if p == 'q' and qvalue is None:
                        try:
                            qvalue = float(v)
                        except ValueError:
                            raise ParseError('qvalue must be a floating point number',s,pos)
                        if qvalue < 0 or qvalue > 1:
                            raise ParseError('qvalue must be between 0 and 1, inclusive',s,pos)
                    elif qvalue is None:
                        itemparms.append( (p,v) )
                    else:
                        acptparms.append( (p,v) )
                pos += k
            if item:
                # Add the item to the list
                if qvalue is None:
                    qvalue = 1
                itemlist.append( (item, itemparms, qvalue, acptparms) )
                item = None
            # skip commas
            while pos < len(s) and s[pos] == ',':
                pos += 1
                while pos < len(s) and s[pos] in LWS:
                    pos += 1
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
else:
            results.append( e[0] )
            pos += e[1]
        while pos < len(s) and s[pos] in LWS:
            pos += 1
        if pos < len(s) and s[pos] != ',':
            break
        while pos < len(s) and s[pos] == ',':
            # skip comma and any "empty" elements
            pos += 1  # skip comma
            while pos < len(s) and s[pos] in LWS:
                pos += 1
    if len(results) < min_count:
        raise ParseError('Comma-separated list does not have enough elements',s,pos)
    elif max_count and len(results) > max_count:
        raise ParseError('Comma-separated list has too many elements',s,pos)
    return (results, pos-start)
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
if not e or e[1] == 0:
            break # end of data?
        else:
            results.append( e[0] )
            pos += e[1]
        while pos < len(s) and s[pos] in LWS:
            pos += 1
        if pos < len(s) and s[pos] != ',':
            break
        while pos < len(s) and s[pos] == ',':
            # skip comma and any "empty" elements
            pos += 1  # skip comma
            while pos < len(s) and s[pos] in LWS:
                pos += 1
    if len(results) < min_count:
        raise ParseError('Comma-separated list does not have enough elements',s,pos)
    elif max_count and len(results) > max_count:
        raise ParseError('Comma-separated list has too many elements',s,pos)
    return (results, pos-start)
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
being raised.

    If allow_quoted is False, then only tokens will be parsed instead
    of either a token or quoted-string.

    If allow_token is False, then only quoted-strings will be parsed
    instead of either a token or quoted-string.
    """
    if not allow_quoted and not allow_token:
        raise ValueError('Parsing can not continue with options provided')

    if start >= len(s):
        raise ParseError('Starting position is beyond the end of the string',s,start)
    has_quote = (s[start] == '"')
    if has_quote and not allow_quoted:
        raise ParseError('A quoted string was not expected', s, start)
    if not has_quote and not allow_token:
        raise ParseError('Expected a quotation mark', s, start)

    s2 = ''
    pos = start
    if has_quote:
        pos += 1
    while pos < len(s):
        c = s[pos]
        if c == '\\' and has_quote:
            # Note this is NOT C-style escaping; the character after the \ is
            # taken literally.
            pos += 1
            if pos == len(s):
                raise ParseError("End of string while expecting a character after '\\'",s,pos)
            s2 += s[pos]
github scrapinghub / extruct / extruct / rdflibxml / extras / httpheader.py View on Github external
def set(self, content_type_string, with_parameters=True):
        """Parses the content type string and sets this object to it's value.

        For a more complete description of the arguments, see the
        documentation for the parse_media_type() function in this module.
        """
        mt, k = parse_media_type( content_type_string, with_parameters=with_parameters )
        if k < len(content_type_string):
            raise ParseError('Not a valid content type',content_type_string, k)
        major, minor, pdict = mt
        self._set_major( major )
        self._set_minor( minor )
        self.parmdict = dict(pdict)