Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_normalized_hostname(url, normalize_amp=True, strip_lang_subdomains=False,
infer_redirection=True):
if infer_redirection:
url = resolve(url)
if isinstance(url, SplitResult):
splitted = url
else:
try:
splitted = urlsplit(ensure_protocol(url))
except ValueError:
return None
if not splitted.hostname:
return None
hostname = splitted.hostname.lower()
pattern = IRRELEVANT_SUBDOMAIN_AMP_RE if normalize_amp else IRRELEVANT_SUBDOMAIN_RE
hostname = pattern.sub('', hostname)
# Dropping trailing slash
if strip_trailing_slash and path.endswith('/'):
path = path.rstrip('/')
# Quoting or not
if quoted:
path = quote(path)
query = quote(query, RESERVED_CHARACTERS)
fragment = quote(fragment, SAFE_CHARACTERS)
else:
path = unquote(path)
query = unquote(query)
fragment = unquote(fragment)
# Result
result = SplitResult(
scheme,
netloc.lower(),
path,
query,
fragment
)
if not unsplit:
return result
# TODO: check if works with `unsplit=False`
if strip_protocol or not has_protocol:
result = urlunsplit(result)[2:]
else:
result = urlunsplit(result)
fix_common_mistakes (bool, optional): Whether to fix common mistakes
in Youtube urls as you can find them on the web. Defaults to `True`.
"""
# Inferring redirection
url = infer_redirection(url)
# Continuation urls
m = NEXT_V_RE.search(url) or NESTED_NEXT_V_RE.search(url)
if m:
return YoutubeVideo(id=m.group(1))
# Parsing
if isinstance(url, SplitResult):
parsed = url
else:
url = ensure_protocol(url)
parsed = urlsplit(url)
if not is_youtube_url(parsed):
return
_, _, path, query, fragment = parsed
# youtu.be
if parsed.hostname.endswith('youtu.be'):
if path.count('/') > 0:
v = urlpathsplit(path)[0]
Defaults to True.
infer_redirection (bool, optional): Whether to attempt resolving common
redirects by leveraging well-known GET parameters. Defaults to `False`.
quoted (bool, optional): Normalizing to quoted or unquoted.
Defaults to True.
Returns:
string: The normalized url.
"""
original_url_arg = url
if infer_redirection:
url = resolve(url)
if isinstance(url, SplitResult):
has_protocol = bool(splitted.scheme)
splitted = url
else:
has_protocol = PROTOCOL_RE.match(url)
# Ensuring scheme so parsing works correctly
if not has_protocol:
url = 'http://' + url
# Parsing
try:
splitted = urlsplit(url)
except ValueError:
return original_url_arg
scheme, netloc, path, query, fragment = splitted
def is_facebook_url(url):
"""
Function returning whether the given url is a valid Facebook url.
Args:
url (str): Url to test.
Returns:
bool: Whether given url is from Facebook.
"""
if isinstance(url, SplitResult):
return bool(re.search(FACEBOOK_DOMAIN_RE, url.hostname))
return bool(re.match(FACEBOOK_URL_RE, url))
def is_youtube_url(url):
"""
Function returning whether the given url is a valid Youtube url.
Args:
url (str): Url to test.
Returns:
bool: Whether given url is from Youtube.
"""
if isinstance(url, SplitResult):
return bool(re.search(YOUTUBE_DOMAINS_RE, url.hostname))
return bool(re.match(YOUTUBE_URL_RE, url))