How to use the dateparser.utils.normalize_unicode function in dateparser

To help you get started, we’ve selected a few dateparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / dateparser / tests / test_languages.py View on Github external
def given_string(self, datetime_string):
        if self.settings.NORMALIZE:
            datetime_string = normalize_unicode(datetime_string)
        self.datetime_string = datetime_string
github scrapinghub / dateparser / tests / test_freshness_date_parser.py View on Github external
def test_normalized_relative_dates(self, date_string, ago, period):
        date_string = normalize_unicode(date_string)
        self.given_parser(settings={'NORMALIZE': True})
        self.given_date_string(date_string)
        self.when_date_is_parsed()
        self.then_error_was_not_raised()
        self.then_date_was_parsed_by_freshness_parser()
        self.then_date_obj_is_exactly_this_time_ago(ago)
        self.then_period_is(period)
github scrapinghub / dateparser / dateparser / languages / locale.py View on Github external
def _simplify_split_align(self, original, settings):
        # TODO: Switch to new split method.
        original_tokens = self._word_split(original, settings=settings)
        simplified_tokens = self._word_split(self._simplify(normalize_unicode(original), settings=settings),
                                             settings=settings)
        if len(original_tokens) == len(simplified_tokens):
            return original_tokens, simplified_tokens

        elif len(original_tokens) < len(simplified_tokens):
            add_empty = False
            for i, token in enumerate(simplified_tokens):
                if i < len(original_tokens):
                    if token == normalize_unicode(original_tokens[i].lower()):
                        add_empty = False
                    else:
                        if not add_empty:
                            add_empty = True
                            continue
                        else:
                            original_tokens.insert(i, '')
github scrapinghub / dateparser / dateparser / languages / locale.py View on Github external
def _simplify_split_align(self, original, settings):
        # TODO: Switch to new split method.
        original_tokens = self._word_split(original, settings=settings)
        simplified_tokens = self._word_split(self._simplify(normalize_unicode(original), settings=settings),
                                             settings=settings)
        if len(original_tokens) == len(simplified_tokens):
            return original_tokens, simplified_tokens

        elif len(original_tokens) < len(simplified_tokens):
            add_empty = False
            for i, token in enumerate(simplified_tokens):
                if i < len(original_tokens):
                    if token == normalize_unicode(original_tokens[i].lower()):
                        add_empty = False
                    else:
                        if not add_empty:
                            add_empty = True
                            continue
                        else:
                            original_tokens.insert(i, '')
                else:
                    original_tokens.insert(i, '')
        else:
            add_empty = False
            for i, token in enumerate(original_tokens):
                if i < len(simplified_tokens):
                    if normalize_unicode(token.lower()) == simplified_tokens[i]:
                        add_empty = False
                    else:
github scrapinghub / dateparser / dateparser / languages / dictionary.py View on Github external
def _normalize(self):
        new_dict = {}
        conflicting_keys = []
        for key, value in self._dictionary.items():
            normalized = normalize_unicode(key)
            if key != normalized and normalized in self._dictionary:
                conflicting_keys.append(key)
            else:
                new_dict[normalized] = value
        for key in conflicting_keys:
            normalized = normalize_unicode(key)
            if key in (self.info.get('skip', []) + self.info.get('pertain', [])):
                new_dict[normalized] = self._dictionary[key]
        self._dictionary = new_dict
        self._relative_strings = list(map(normalize_unicode, self._relative_strings))
github scrapinghub / dateparser / dateparser / languages / dictionary.py View on Github external
def _normalize(self):
        new_dict = {}
        conflicting_keys = []
        for key, value in self._dictionary.items():
            normalized = normalize_unicode(key)
            if key != normalized and normalized in self._dictionary:
                conflicting_keys.append(key)
            else:
                new_dict[normalized] = value
        for key in conflicting_keys:
            normalized = normalize_unicode(key)
            if key in (self.info.get('skip', []) + self.info.get('pertain', [])):
                new_dict[normalized] = self._dictionary[key]
        self._dictionary = new_dict
        self._relative_strings = list(map(normalize_unicode, self._relative_strings))
github scrapinghub / dateparser / dateparser / languages / locale.py View on Github external
def _generate_simplifications(self, normalize=False):
        simplifications = []
        for simplification in self.info.get('simplifications', []):
            c_simplification = {}
            key, value = list(simplification.items())[0]
            if normalize:
                key = normalize_unicode(key)

            if isinstance(value, int):
                c_simplification[key] = str(value)
            else:
                c_simplification[key] = normalize_unicode(value) if normalize else value

            simplifications.append(c_simplification)
        return simplifications
github scrapinghub / dateparser / dateparser / languages / locale.py View on Github external
def _generate_relative_translations(self, normalize=False):
        relative_translations = self.info.get('relative-type-regex', {})
        relative_dictionary = OrderedDict()
        for key, value in relative_translations.items():
            if normalize:
                value = list(map(normalize_unicode, value))
            pattern = '|'.join(sorted(value, key=len, reverse=True))
            pattern = DIGIT_GROUP_PATTERN.sub(r'?P\d+', pattern)
            pattern = re.compile(r'^(?:{})$'.format(pattern), re.UNICODE | re.IGNORECASE)
            relative_dictionary[pattern] = key
        return relative_dictionary
github scrapinghub / dateparser / dateparser / languages / locale.py View on Github external
def _generate_simplifications(self, normalize=False):
        simplifications = []
        for simplification in self.info.get('simplifications', []):
            c_simplification = {}
            key, value = list(simplification.items())[0]
            if normalize:
                key = normalize_unicode(key)

            if isinstance(value, int):
                c_simplification[key] = str(value)
            else:
                c_simplification[key] = normalize_unicode(value) if normalize else value

            simplifications.append(c_simplification)
        return simplifications