Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def extract_text(self):
PDF_file = self.filename
out_folder_name = os.path.basename(self.filename)
if not os.path.exists(self.image_out_path):
os.mkdir(self.image_out_path)
if not os.path.exists(os.path.abspath(os.path.join(self.image_out_path,\
out_folder_name))):
os.mkdir(os.path.abspath(os.path.join(self.image_out_path,\
out_folder_name)))
index=0
maxPages = pdf2image._page_count(PDF_file)
for page in range(0, maxPages, 10):
pages = pdf2image.convert_from_path(PDF_file, dpi=200,
first_page=page,
last_page=min(page + 10 - 1, maxPages))
for tpage in pages:
tpage.save(os.path.abspath(os.path.join(self.image_out_path,
out_folder_name ,
str(index) + ".jpg"
)),'JPEG')
index = index + 1
print("Successfully saved images for each page for {}".format(self.image_out_path))
english_text = list()
for filename in sorted(os.listdir(os.path.join(self.image_out_path,