Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
with open(database.DATABASE_FILE) as f:
self.db = database.merge_databases(self.db, json.load(f))
# Output text only
self.output_format = Format['text']
# Default user agent
self.USER_AGENT = default_user_agent()
if options is None:
return
if options.get('database_file'):
try:
with open(options.get('database_file')) as f:
self.db = database.merge_databases(self.db, json.load(f))
except (FileNotFoundException, ValueError) as e:
print(e)
exit(-1)
self.urls = options.get('urls') or []
if options.get('urls_file'):
try:
with open(options.get('urls_file')) as f:
self.urls = [line.rstrip() for line in f]
except FileNotFoundException as e:
print(e)
exit(-1)
if options.get('user_agent'):
self.USER_AGENT = options.get('user_agent')
self.db = database.merge_databases(self.db, json.load(f))
# Output text only
self.output_format = Format['text']
# Default user agent
self.USER_AGENT = default_user_agent()
if options is None:
return
self.scrape_url = options.get('scrape')
if options.get('database_file'):
try:
with open(options.get('database_file')) as f:
self.db = database.merge_databases(self.db, json.load(f))
except (FileNotFoundException, ValueError) as e:
print(e)
exit(-1)
self.urls = options.get('urls') or []
if options.get('urls_file'):
try:
with open(options.get('urls_file')) as f:
self.urls = [line.rstrip() for line in f]
except FileNotFoundException as e:
print(e)
exit(-1)
if options.get('user_agent'):
self.USER_AGENT = options.get('user_agent')
if options.get('urls_file'):
try:
with open(options.get('urls_file')) as f:
self.urls = [line.rstrip() for line in f]
except FileNotFoundException as e:
print(e)
exit(-1)
if options.get('user_agent'):
self.USER_AGENT = options.get('user_agent')
elif options.get('random_user_agent'):
self.USER_AGENT = get_random_user_agent()
if options.get('grep'):
# Greppable output
self.output_format = Format['grep']
elif options.get('json'):
# JSON output
self.output_format = Format['json']
try:
self.timeout = int(options.get('timeout', '10'))
except ValueError:
self.timeout = 10
help="output grepable report", default=False)
parser.add_option(
"--update-db", "--udb", action="store_true",
help="force update of remote db files", default=False)
parser.add_option(
"--timeout", type="float", help="maximum timeout for scrape requests", default=10)
(options, _args) = parser.parse_args(sys.argv)
options = vars(options)
if options.get('urls') is None and options.get('urls_file') is None and not options.get('update_db'):
print("No URL(s) given!")
parser.print_help()
exit()
wt = WebTech(options)
wt.start()
parser.add_option(
"--json", "--oj", action="store_true",
help="output json-encoded report", default=False)
parser.add_option(
"--grep", "--og", action="store_true",
help="output grepable report", default=False)
parser.add_option(
"--update-db", "--udb", action="store_true",
help="force update of remote db files", default=False)
parser.add_option(
"--timeout", type="float", help="maximum timeout for scrape requests", default=10)
(options, _args) = parser.parse_args(sys.argv)
options = vars(options)
wt = WebTech(options)
if options.get('scrape'):
"""
Bad style of making arguments mutually exclusive.
Use Argparse's mutually exclusive groups.
"""
wt.scraping()
else:
if options.get('urls') is None and options.get('urls_file') is None and not options.get('update_db'):
print("No URL(s) given!")
parser.print_help()
exit()
wt.start()
if __name__ == "__main__":
def get_random_user_agent():
"""
Get a random user agent from a file
"""
ua_file = os.path.join(os.path.realpath(os.path.dirname(__file__)), "ua.txt")
try:
with open(ua_file) as f:
agents = f.readlines()
return random.choice(agents).strip()
except FileNotFoundException as e:
print(e)
print('Please: Reinstall webtech correctly or provide a valid User-Agent list')
exit(-1)
if options.get('database_file'):
try:
with open(options.get('database_file')) as f:
self.db = database.merge_databases(self.db, json.load(f))
except (FileNotFoundException, ValueError) as e:
print(e)
exit(-1)
self.urls = options.get('urls') or []
if options.get('urls_file'):
try:
with open(options.get('urls_file')) as f:
self.urls = [line.rstrip() for line in f]
except FileNotFoundException as e:
print(e)
exit(-1)
if options.get('user_agent'):
self.USER_AGENT = options.get('user_agent')
elif options.get('random_user_agent'):
self.USER_AGENT = get_random_user_agent()
if options.get('grep'):
# Greppable output
self.output_format = Format['grep']
elif options.get('json'):
# JSON output
self.output_format = Format['json']
try:
def check_script(self, tech, script):
"""
Check if request script src from page's HTML contains some database matches
"""
# FIX repair to some database inconsistencies
if isinstance(script, str) or isinstance(script, unicode):
script = [script]
for source in script:
attr, extra = parse_regex_string(source)
for src in self.data['script']:
matches = re.search(attr, src, re.IGNORECASE)
# Attr is empty for a "generic" tech meta
if attr is '' or matches is not None:
matched_tech = Tech(name=tech, version=None)
# The version extra data is present
if extra and extra['version']:
if matches.group(1):
matched_tech = matched_tech._replace(version=matches.group(1))
self.report['tech'].add(matched_tech)
# this tech is matched, GOTO next
return
def check_meta(self, tech, meta):
"""
Check if request meta from page's HTML contains some database matches
"""
for m in meta:
content = self.data['meta'].get(m)
# filter not-available meta
if content is None:
continue
attr, extra = parse_regex_string(meta[m])
matches = re.search(attr, content, re.IGNORECASE)
# Attr is empty for a "generic" tech meta
if attr is '' or matches is not None:
matched_tech = Tech(name=tech, version=None)
# The version extra data is present
if extra and 'version' in extra:
if matches.group(1):
matched_tech = matched_tech._replace(version=matches.group(1))
self.report['tech'].add(matched_tech)
# this tech is matched, GOTO next
return
Check if request cookies match some database cookies
"""
for cookie in cookies:
# cookies in db are regexes so we must test them all
cookie = cookie.replace("*","") # FIX for "Fe26.2**" hapi.js cookie in the database
for biscuit in self.data['cookies'].keys():
matches = re.search(cookie, biscuit, re.IGNORECASE)
if matches is not None:
if cookies[cookie] != '':
# Let's check the cookie content
content = self.data['cookies'][biscuit]
matches = re.search(cookies[cookie], content, re.IGNORECASE)
if matches is None:
# No match, exit
return
matched_tech = Tech(name=tech, version=None)
self.report['tech'].add(matched_tech)
# this tech is matched, GOTO next
return