Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if u in self.links:
text = text.replace(u, self.links[u].markdownify(summary=True))
return text
# make markdown
output = ""
if back_link:
output = "%s\n\n" % back_link
output = "%s_%s:_\n" % (output, self.initiator.name)
initial_message = self.message
initial_lines = initial_message.split('\n')
for l in initial_lines:
output = "%s### %s\n" % (output, l)
ouput = "%s---\n" % output
if self.link and self.link in self.links:
output = "%s%s\n" % (output, self.links[self.link].markdownify(summary=True))
urls = utils.extract_urls(initial_message)
for u in urls:
if not u==self.link and u in self.links:
output = "%s%s\n" % (output, self.links[u].markdownify(summary=True))
output = "%s\n" % output
for c in self.comments:
output = "%s**%s:**\n%s\n\n" % (output, c.user.name, mdl(c.message))
self.save_markdown(output, save_location=save_location)
#
users_str = ' - '.join([self.users[k].name for k in self.users])
if not save_location:
title = initial_message if initial_message else self.link if hasattr(self, 'link') and self.link else 'no title available'
title = utils.truncate(re.sub('[\r\n]', ' / ', title), 180)
files.save_txt(title, subdir=self.id, filename="title.txt")
def content(url, force=False):
if force or not os.path.exists(files.get_content_path(url)):
try:
response = requests.get(url, timeout=10)
c = "this is an archive. [go to %s](%s)\n\n" % (url,url)
c = "%s---\n\n" % c
if response.ok:
c = "%s### %s\n" % (c, Document(response.content).short_title())
c = "%s%s" % (c, html2text.html2text(Document(response.content).summary()))
else:
c = "%sFor one reason or another, this document couldn't be archived." % c
files.save_content(c, url)
except:
print "Failed to make markdown from: ", url
def save(self, filename, obj):
files.save_obj(obj, self.id, filename)
def screenshot(url, force=False):
path = files.screenshot_path(url)
if not os.path.exists(path):
driver = webdriver.PhantomJS()
driver.set_window_size(1024, 768)
driver.get(url)
driver.save_screenshot(path)
files.square_crop(path)
files.thumbnail(path)
return path
#print "Screenshot saved to: ",filepath
def absurl(path):
return files.tnurl(path)
title_to_use = self.title if hasattr(self,'title') and self.title else self.url
def image(url, force=False):
if force or not os.path.exists(files.get_image_path(url)):
response = requests.get(url, stream=True)
path = files.save_image(response, url)
del response
return path
else:
return files.get_image_path(url)
if preferred_image:
output = "%s> [ ![%s](%s \"%s\") ](%s \"%s\") <br>\n" % (output, title_to_use_bracket_safe, absurl(preferred_image), title_to_use_quote_safe, files.lurl(self.url), title_to_use_quote_safe)
output = "%s> [%s](%s \"%s\") <br>\n" % (output, title_to_use_bracket_safe, files.lurl(self.url), title_to_use_quote_safe)
if self.summary:
about = "%s... %s" % (self.summary, self.description)
about = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', about)
about = re.sub(r'[\r\n]*', '', about)
output = "%s> %s<br>\n" % (output, about[:500])
return output
else:
if not text_only:
#[ ![Image](/images/image.jpg "Image title") ](http://google.com "Google")
if preferred_image:
return "[ ![%s](%s \"%s\") ](%s \"%s\")" % (title_to_use_bracket_safe, absurl(preferred_image), title_to_use_quote_safe, files.lurl(self.url), title_to_use_quote_safe)
# fallback to text only
return "[%s](%s \"%s\")" % (title_to_use_bracket_safe, files.lurl(self.url), title_to_use_quote_safe)
# fallback
return ""
def file_load(self, filename):
return files.load_obj(self.id, filename)
def summarize(self, force=False):
if force or not self.title:
try:
self.title, self.summary, image_url = sponge.summary(self.url)
except:
image_url = None
print "Failed extraction of ", self.url
if image_url and (force or not self.image):
self.image = sponge.image(image_url)
# try and make thumbnail
files.thumbnail(self.image)
def content(url, force=False):
if force or not os.path.exists(files.get_content_path(url)):
try:
response = requests.get(url, timeout=10)
c = "this is an archive. [go to %s](%s)\n\n" % (url,url)
c = "%s---\n\n" % c
if response.ok:
c = "%s### %s\n" % (c, Document(response.content).short_title())
c = "%s%s" % (c, html2text.html2text(Document(response.content).summary()))
else:
c = "%sFor one reason or another, this document couldn't be archived." % c
files.save_content(c, url)
except:
print "Failed to make markdown from: ", url