Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_normalized_hostname(url, normalize_amp=True, strip_lang_subdomains=False,
infer_redirection=True):
if infer_redirection:
url = resolve(url)
if isinstance(url, SplitResult):
splitted = url
else:
try:
splitted = urlsplit(ensure_protocol(url))
except ValueError:
return None
if not splitted.hostname:
return None
hostname = splitted.hostname.lower()
pattern = IRRELEVANT_SUBDOMAIN_AMP_RE if normalize_amp else IRRELEVANT_SUBDOMAIN_RE
hostname = pattern.sub('', hostname)
if normalize_amp and hostname.startswith('amp-'):
hostname = hostname[4:]
hostname = decode_punycode(hostname)
def is_shortened_url(url):
hostname = urlsplit(ensure_protocol(url)).hostname
return bool(TRIE.longest(reversed(hostname.split('.'))))
def lru_stems(url, tld_aware=False):
"""
Function returning the parts of the given url in the hierarchical order (lru).
Args:
url (str): Target URL as a string.
Returns:
list: The lru, with a prefix identifying the type of each part.
"""
full_url = ensure_protocol(url)
return lru_stems_from_parsed_url(urlsplit(full_url), tld_aware=tld_aware)
def get_hostname(url):
try:
return urlsplit(ensure_protocol(url)).hostname or None
except ValueError:
return None
# Inferring redirection
url = infer_redirection(url)
# Continuation urls
m = NEXT_V_RE.search(url) or NESTED_NEXT_V_RE.search(url)
if m:
return YoutubeVideo(id=m.group(1))
# Parsing
if isinstance(url, SplitResult):
parsed = url
else:
url = ensure_protocol(url)
parsed = urlsplit(url)
if not is_youtube_url(parsed):
return
_, _, path, query, fragment = parsed
# youtu.be
if parsed.hostname.endswith('youtu.be'):
if path.count('/') > 0:
v = urlpathsplit(path)[0]
if fix_common_mistakes:
v = v[:11]
if not is_youtube_video_id(v):
def convert_facebook_url_to_mobile(url):
"""
Function parsing the given facebook url and returning the same but for
the mobile website.
"""
safe_url = ensure_protocol(url)
has_protocol = safe_url == url
scheme, netloc, path, query, fragment = urlsplit(safe_url)
if 'facebook' not in netloc:
raise Exception('ural.facebook.convert_facebook_url_to_mobile: %s is not a facebook url' % url)
netloc = re.sub(MOBILE_REPLACE_RE, 'm.facebook.', netloc)
result = (
scheme,
netloc,
path,
query,
fragment
)
result = urlunsplit(result)
def lru_stems(url, tld_aware=False):
"""
Function returning the parts of the given url in the hierarchical order (lru).
Args:
url (str): Target URL as a string.
Returns:
list: The lru, with a prefix identifying the type of each part.
"""
full_url = ensure_protocol(url)
return lru_stems_from_parsed_url(urlsplit(full_url), tld_aware=tld_aware)
if infer_redirection:
url = resolve(url)
if isinstance(url, SplitResult):
has_protocol = bool(splitted.scheme)
splitted = url
else:
has_protocol = PROTOCOL_RE.match(url)
# Ensuring scheme so parsing works correctly
if not has_protocol:
url = 'http://' + url
# Parsing
try:
splitted = urlsplit(url)
except ValueError:
return original_url_arg
scheme, netloc, path, query, fragment = splitted
# Fixing common mistakes
if fix_common_mistakes:
if query:
query = re.sub(MISTAKES_RE, '&', query)
# Handling punycode
netloc = decode_punycode(netloc)
# Dropping :80 & :443
if netloc.endswith(':80'):
netloc = netloc[:-3]