Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from hist import Histogram
import matplotlib.pyplot as plt
servers = {
"apache": "Apache",
"nginx": "nginx",
"iis": "IIS",
"lighttpd": "lighttpd"
}
h = Histogram()
with open("result.txt", "r") as f:
lines = f.read().split("\n")
for line in lines:
for server in servers:
if server in line.lower():
count = line.split(":")[1]
count = int(count)
for _ in range(count):
h.add(servers[server])
h = h.get_dict()
print(h)
keys = list(h.keys())
def has_tld(url, tld):
return domain_from_url(url).endswith(tld)
def get_html(url):
return requests.get(url).text
REGISTER = "http://register.start.bg"
HEADERS = {
"User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:12.0) Gecko/20100101 Firefox/21.0"
}
visited = set()
h = Histogram()
links = [link.get("href") for link in BeautifulSoup(get_html(REGISTER)).find_all("a")]
for link in links:
if link is not None and "link.php" in link:
try:
target_url = REGISTER + "/" + link
r = requests.head(target_url, headers=HEADERS, allow_redirects=True, timeout=10)
target_url = domain_from_url(r.url)
if target_url not in visited:
visited.add(target_url)
if has_tld(target_url, ".bg"):
print(target_url)