Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
elif self.suffix_list_urls:
raw_suffix_list_data = find_first_response(
self.suffix_list_urls,
self.cache_fetch_timeout
)
tlds = get_tlds_from_raw_suffix_list_data(
raw_suffix_list_data,
self.include_psl_private_domains
)
if not tlds and self.fallback_to_snapshot:
tlds = self._get_snapshot_tld_extractor()
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
elif not tlds:
raise Exception("tlds is empty, but fallback_to_snapshot is set"
" to false. Cannot proceed without tlds.")
self._cache_tlds(tlds)
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
snapshot = sorted(pickle.load(snapshot_file))
new = sorted(tlds)
for line in difflib.unified_diff(snapshot, new, fromfile=".tld_set_snapshot", tofile=self.cache_file):
if sys.version_info < (3,):
sys.stderr.write(line.encode('utf-8') + "\n")
else:
sys.stderr.write(line + "\n")
if self.cache_file:
try:
with open(self.cache_file, 'wb') as f:
pickle.dump(tlds, f)
except IOError as e:
LOG.warn("unable to cache TLDs in file %s: %s", self.cache_file, e)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
self.include_psl_private_domains
)
if not tlds and self.fallback_to_snapshot:
tlds = self._get_snapshot_tld_extractor()
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
elif not tlds:
raise Exception("tlds is empty, but fallback_to_snapshot is set"
" to false. Cannot proceed without tlds.")
self._cache_tlds(tlds)
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
def _get_tld_extractor(self):
'''Get or compute this object's TLDExtractor. Looks up the TLDExtractor
in roughly the following order, based on the settings passed to
__init__:
1. Memoized on `self`
2. Local system cache file
3. Remote PSL, over HTTP
4. Bundled PSL snapshot file'''
if self._extractor:
return self._extractor
tlds = self._get_cached_tlds()
if tlds:
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
elif self.suffix_list_urls:
raw_suffix_list_data = find_first_response(
self.suffix_list_urls,
self.cache_fetch_timeout
)
tlds = get_tlds_from_raw_suffix_list_data(
raw_suffix_list_data,
self.include_psl_private_domains
)
if not tlds and self.fallback_to_snapshot:
tlds = self._get_snapshot_tld_extractor()
tlds.extend(self.extra_suffixes)
self._extractor = _PublicSuffixListTLDExtractor(tlds)
return self._extractor
except IOError as ioe:
file_not_found = ioe.errno == errno.ENOENT
if not file_not_found:
LOG.error("error reading TLD cache file %s: %s", self.cache_file, ioe)
except Exception as ex:
LOG.error("error reading TLD cache file %s: %s", self.cache_file, ex)
tlds = frozenset()
if self.suffix_list_urls:
raw_suffix_list_data = fetch_file(self.suffix_list_urls)
tlds = get_tlds_from_raw_suffix_list_data(raw_suffix_list_data)
if not tlds:
if self.fallback_to_snapshot:
with closing(pkg_resources.resource_stream(__name__, '.tld_set_snapshot')) as snapshot_file:
self._extractor = _PublicSuffixListTLDExtractor(pickle.load(snapshot_file))
return self._extractor
else:
raise Exception("tlds is empty, but fallback_to_snapshot is set"
" to false. Cannot proceed without tlds.")
LOG.info("computed TLDs: [%s, ...]", ', '.join(list(tlds)[:10]))
if LOG.isEnabledFor(logging.DEBUG):
import difflib
with closing(pkg_resources.resource_stream(__name__, '.tld_set_snapshot')) as snapshot_file:
snapshot = sorted(pickle.load(snapshot_file))
new = sorted(tlds)
for line in difflib.unified_diff(snapshot, new, fromfile=".tld_set_snapshot", tofile=self.cache_file):
if sys.version_info < (3,):
sys.stderr.write(line.encode('utf-8') + "\n")
else:
sys.stderr.write(line + "\n")
def _get_tld_extractor(self):
if self._extractor:
return self._extractor
if self.cache_file:
try:
with open(self.cache_file) as f:
self._extractor = _PublicSuffixListTLDExtractor(pickle.load(f))
return self._extractor
except IOError as ioe:
file_not_found = ioe.errno == errno.ENOENT
if not file_not_found:
LOG.error("error reading TLD cache file %s: %s", self.cache_file, ioe)
except Exception as ex:
LOG.error("error reading TLD cache file %s: %s", self.cache_file, ex)
tlds = frozenset()
if self.suffix_list_urls:
raw_suffix_list_data = fetch_file(self.suffix_list_urls)
tlds = get_tlds_from_raw_suffix_list_data(raw_suffix_list_data)
if not tlds:
if self.fallback_to_snapshot:
with closing(pkg_resources.resource_stream(__name__, '.tld_set_snapshot')) as snapshot_file: