Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_jp2(resources):
pdf = Pdf.open(resources / 'pike-jp2.pdf')
xobj = next(iter(pdf.pages[0].images.values()))
pim = PdfImage(xobj)
assert '/JPXDecode' in pim.filters
assert pim.colorspace == '/DeviceRGB'
assert not pim.is_inline
assert not pim.indexed
assert pim.mode == 'RGB'
assert pim.bits_per_component == 8
outstream = BytesIO()
pim.extract_to(stream=outstream)
del pim
del xobj.ColorSpace
# If there is no explicit ColorSpace metadata we should get it from the
# compressed data stream
pim = PdfImage(xobj)
def test_ccitt_encodedbytealign(sandwich):
xobj, _pdf = sandwich
# Pretend this is image is "EncodedByteAlign". We don't have a FOSS
# example of such an image.
xobj.DecodeParms.EncodedByteAlign = True
pim = PdfImage(xobj)
with pytest.raises(UnsupportedImageTypeError):
pim.as_pil_image()
def test_direct_extract(resources, filename, bpc, filters, ext, mode, format_):
xobj, _pdf = first_image_in(resources / filename)
pim = PdfImage(xobj)
assert pim.bits_per_component == bpc
assert pim.filters == filters
outstream = BytesIO()
outext = pim.extract_to(stream=outstream)
assert outext == ext, 'unexpected output file'
outstream.seek(0)
im = Image.open(outstream)
assert im.mode == mode
assert im.format == format_
def test_icc_extract(resources):
xobj, _pdf = first_image_in(resources / 'tree-icc.pdf')
pim = PdfImage(xobj)
assert pim.as_pil_image().info['icc_profile'] == pim.icc.tobytes()
def test_icc_use(resources):
xobj, _pdf = first_image_in(resources / '1biticc.pdf')
pim = PdfImage(xobj)
assert pim.mode == '1'
assert pim.colorspace == '/ICCBased'
assert pim.bits_per_component == 1
assert pim.icc.profile.xcolor_space == 'GRAY'
def test_extract_filepath(congress, outdir):
xobj, _pdf = congress
pim = PdfImage(xobj)
# fspath is for Python 3.5
result = pim.extract_to(fileprefix=fspath(outdir / 'image'))
assert Path(result).exists()
assert (outdir / 'image.jpg').exists()
def extract_image_filter(pike, root, log, image, xref):
if image.Subtype != Name.Image:
return None
if image.Length < 100:
log.debug("Skipping small image, xref %s", xref)
return None
pim = pikepdf.PdfImage(image)
if len(pim.filter_decodeparms) > 1:
log.debug("Skipping multiply filtered, xref %s", xref)
return None
filtdp = pim.filter_decodeparms[0]
if pim.bits_per_component > 8:
return None # Don't mess with wide gamut images
if filtdp[0] == Name.JPXDecode:
return None # Don't do JPEG2000
if Name.Decode in image:
return None # Don't mess with custom Decode tables
return pim, filtdp