Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def convert_facebook_url_to_mobile(url):
"""
Function parsing the given facebook url and returning the same but for
the mobile website.
"""
safe_url = ensure_protocol(url)
has_protocol = safe_url == url
scheme, netloc, path, query, fragment = urlsplit(safe_url)
if 'facebook' not in netloc:
raise Exception('ural.facebook.convert_facebook_url_to_mobile: %s is not a facebook url' % url)
netloc = re.sub(MOBILE_REPLACE_RE, 'm.facebook.', netloc)
result = (
scheme,
netloc,
path,
query,
fragment
def lru_stems(url, tld_aware=False):
"""
Function returning the parts of the given url in the hierarchical order (lru).
Args:
url (str): Target URL as a string.
Returns:
list: The lru, with a prefix identifying the type of each part.
"""
full_url = ensure_protocol(url)
return lru_stems_from_parsed_url(urlsplit(full_url), tld_aware=tld_aware)
"""
# Inferring redirection
url = infer_redirection(url)
# Continuation urls
m = NEXT_V_RE.search(url) or NESTED_NEXT_V_RE.search(url)
if m:
return YoutubeVideo(id=m.group(1))
# Parsing
if isinstance(url, SplitResult):
parsed = url
else:
url = ensure_protocol(url)
parsed = urlsplit(url)
if not is_youtube_url(parsed):
return
_, _, path, query, fragment = parsed
# youtu.be
if parsed.hostname.endswith('youtu.be'):
if path.count('/') > 0:
v = urlpathsplit(path)[0]
if fix_common_mistakes:
v = v[:11]
Args:
url (str): Target URL as a string.
sort_query (bool, optional): Whether to sort query items or not.
Defaults to `True`.
strip_authentication (bool, optional): Whether to drop authentication.
Defaults to `True`.
strip_trailing_slash (bool, optional): Whether to drop trailing slash.
Defaults to `False`.
strip_index (bool, optional): Whether to drop trailing index at the end
of the url. Defaults to `True`.
Returns:
list: The normalized lru, with a prefix identifying the type of each part.
"""
full_url = ensure_protocol(url, protocol=default_protocol)
return parsed_url_to_lru(normalize_url(
full_url, parsed=True, **kwargs))
from ural.normalize_url import normalize_url
from ural.ensure_protocol import ensure_protocol
with open('./scripts/data/amp-urls.txt') as f:
for url in f:
url = url.strip()[1:-1]
url = normalize_url(url)
print(ensure_protocol(url))
def normalized_lru_stems(url, tld_aware=False, **kwargs):
full_url = ensure_protocol(url)
parsed_url = normalize_url(full_url, unsplit=False, **kwargs)
return lru_stems_from_parsed_url(parsed_url, tld_aware=tld_aware)