Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Read headphone model from the PDF
f = open(input_file, 'rb')
text = PyPDF2.PdfFileReader(f).getPage(0).extractText()
if 'crinacle' in text.lower():
raise ValueError('Measured by Crinacle')
# Convert to image with ghostscript
# Using temporary paths with Ghostscript because it seems to be unable to work with non-ascii characters
tmp_in = os.path.join(os.path.split(input_file)[0], '__tmp.pdf')
tmp_out = os.path.join(os.path.split(output_file)[0], '__tmp.png')
if tmp_in == input_file or tmp_out == output_file:
# Skip tmp files in case it was passed as input
raise ValueError('tmp file')
shutil.copy(input_file, tmp_in)
gs = Ghostscript(
b'pdf2png',
b'-dNOPAUSE',
b'-sDEVICE=png16m',
b'-dBATCH',
b'-r600',
b'-dUseCropBox',
f'-sOutputFile={tmp_out}'.encode('utf-8'),
tmp_in.encode('utf-8')
)
gs.exit()
shutil.copy(tmp_out, output_file)
print('\nSaved image to "{}"\n'.format(output_file))
f.close()
return Image.open(output_file)
def pdf_to_image(input_file, output_file):
input_file = os.path.abspath(input_file)
output_file = os.path.abspath(output_file)
# Read headphone model from the PDF
f = open(input_file, 'rb')
# Convert to image with ghostscript
# Using temporary paths with Ghostscript because it seems to be unable to work with non-ascii characters
tmp_in = os.path.join(os.path.split(input_file)[0], '__tmp.pdf')
tmp_out = os.path.join(os.path.split(output_file)[0], '__tmp.png')
if tmp_in == input_file or tmp_out == output_file:
return
shutil.copy(input_file, tmp_in)
Ghostscript(
b'pdf2png',
b'-dNOPAUSE',
b'-sDEVICE=png16m',
b'-dBATCH',
b'-r600',
b'-dUseCropBox',
f'-sOutputFile={tmp_out}'.encode('utf-8'),
tmp_in.encode('utf-8')
)
shutil.copy(tmp_out, output_file)
print('\nSaved image to "{}"\n'.format(output_file))
f.close()
return Image.open(output_file)
stored = True
process_time = None
f = request['file']
filename = f.name
destination = open('%s/%s' % (settings.MEDIA_ROOT + '/uploads/', filename), 'wb')
for chunk in f.chunks():
destination.write(chunk)
destination.close()
file = settings.MEDIA_ROOT + '/uploads/' + filename
hash = get_hash_data(file, "md5") #grab the hash so we can see if the file is present
image_path = settings.MEDIA_ROOT + '/previews/' + hash + ".png"
args = ["-dSAFER","-dBATCH","-dNOPAUSE","-sDEVICE=png16m","-r300","-dFirstPage=1","-dLastPage=1","-sOutputFile=" + image_path,file]
try:
ghostscript.Ghostscript(*args)
image_generation = True
except:
image_generation = False
is_present = get_sample(hash) #grabs the sample if it is there, if not then it runs
if is_present == None:
user = None
t = time()
output = build_obj(file) #build the raw object
process_time = time() - t
data = jPdf(json.loads(output)) #build the class object
store_it = store_sample(output) #try and store the raw data
if store_it == None:
error_text = hash + '_store_error'
rsesh[error_text] = True #we can let the user know if it stored with this (true an error happened)
stored = False
f = open(input_file, 'rb')
pdf = PyPDF2.PdfFileReader(f)
page = pdf.getPage(0)
try:
t = page.extractText()
start_ind = t.index('All rights reserved.') + len('All rights reserved.')
end_ind = t.index('%THD+noise')
name = t[start_ind:end_ind]
print('Read "{name}" in "{fp}"'.format(name=name, fp=input_file))
except:
print('Fail to read "{}"'.format(input_file))
return
# Convert to image with ghostscript
output_file_path = '{}.png'.format(os.path.join(output_dir, name))
Ghostscript(
b'pdf2png',
b'-dNOPAUSE',
b'-sDEVICE=png16m',
b'-dBATCH',
b'-r600',
b'-dUseCropBox',
'-sOutputFile={}'.format(output_file_path).encode('utf-8'),
input_file.encode('utf-8')
)
print('\nSaved image to "{}"\n'.format(output_file_path))
f.close()
tmp_pdf.write(document.get_file_obj().read())
tmp_pdf.close()
args = [
'gs',
'-q', # Quiet
'-dSAFER',
'-sDEVICE=png16m', # Type. PNG used
'-r10', # resolution of the thumbnail
'-dBATCH', # Quit GS after converting
'-dNOPAUSE', # Do not stop on pages
'-dFirstPage=1',
'-dLastPage=1',
'-sOutputFile=%s.png' % thumbnail_temporary, # Destination
'%s' % thumbnail_temporary, # Source
]
ghostscript.Ghostscript(*args)
# Deleting the temp PDF
os.unlink(thumbnail_temporary)