Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_unletterbox_noblackbars():
image = hashers.tools.read(testing.DEFAULT_TEST_IMAGES[0])
(x1, x2), (y1, y2) = hashers.tools.unletterbox(image)
assert x1 == 0
assert y1 == 0
assert x2 == image.shape[1]
assert y2 == image.shape[0]
assert hasher.compute_distance(hash1_1, hash1_3) == 0
# Ensure the conversion to and from vectors works for both base64 and hex.
assert hasher.vector_to_string(hasher.string_to_vector(hash2_1)) == hash2_1
assert hasher.vector_to_string(
hasher.string_to_vector(
hasher.vector_to_string(
hasher.string_to_vector(hash2_1), hash_format='hex'),
hash_format='hex')) == hash2_1
# Ensure parallelization works properly.
test_hasher_parallelization(hasher=hasher, test_filepaths=test_images)
# Ensure the isometric hashes computation work properly
for image in test_images:
transforms = hashers.tools.get_isometric_transforms(image)
hashes_exp = {
key: hasher.compute(value)
for key, value in transforms.items()
}
hashes_act = hasher.compute_isometric(transforms['r0'])
for transform_name in hashes_exp.keys():
assert hasher.compute_distance(
hashes_exp[transform_name],
hashes_act[transform_name]) < transform_threshold
# Verify that hashes are the correct length.
hash_bits = hasher.hash_length * SIZES[hasher.dtype]
words_base64 = math.ceil(
hash_bits / 6) # Base64 uses 8 bits for every 6 bits
words_base64 += 0 if words_base64 % 4 == 0 else 4 - (
def test_opencv_hasher(hasher: hashers.ImageHasher, image1: str, image2: str):
# For OpenCV hashers we make sure the distance we compute
# is the same as inside OpenCV
f1 = image1
f2 = image2
opencv_distance = hasher.hasher.compare(
hasher.hasher.compute(hashers.tools.read(f1)),
hasher.hasher.compute(hashers.tools.read(f2)))
if hasher.distance_metric == 'hamming':
opencv_distance /= hasher.hash_length
np.testing.assert_approx_equal(
opencv_distance,
hasher.compute_distance(hasher.compute(f1), hasher.compute(f2)),
significant=4)
[(hashers.AverageHash, 0.1, 0.1, False),
(hashers.WaveletHash, 0.1, 0.1, False), (hashers.PHash, 0.1, 0.1, False),
(PDQHash, 0.1, 0.15, False), (hashers.DHash, 0.1, 0.1, False),
(hashers.MarrHildreth, 0.1, 0.1, True),
(hashers.BlockMean, 0.1, 0.1, True),
(hashers.ColorMoment, 10, 0.1, True)])
def test_image_hashing_common(hasher_class, pil_opencv_threshold,
transform_threshold, opencv_hasher):
testing.test_image_hasher_integrity(
hasher=hasher_class(),
pil_opencv_threshold=pil_opencv_threshold,
transform_threshold=transform_threshold,
opencv_hasher=opencv_hasher)
(hashers.ColorMoment, 10, 0.1, True)])
def test_image_hashing_common(hasher_class, pil_opencv_threshold,
transform_threshold, opencv_hasher):
testing.test_image_hasher_integrity(
hasher=hasher_class(),
pil_opencv_threshold=pil_opencv_threshold,
transform_threshold=transform_threshold,
opencv_hasher=opencv_hasher)
benchmarking.video_transforms.get_simple_transform(clip_s=(1, None)),
'blackpad':
benchmarking.video_transforms.get_black_frame_padding_transform(
duration_s=1),
'slideshow':
benchmarking.video_transforms.get_slideshow_transform(
frame_input_rate=1, frame_output_rate=1),
}
transformed = video_dataset.transform(
storage_dir=tempfile.TemporaryDirectory().name, transforms=transforms)
assert len(transformed._df) == len(transforms) * len(video_dataset._df)
assert transformed._df['filepath'].isnull().sum() == 0
# We will compute hashes for each of the transformed
# videos and check the results for correctness.
phash_framewise_hasher = hashers.FramewiseHasher(
frame_hasher=hashers.PHash(),
interframe_threshold=-1,
frames_per_second=2)
hashes = transformed.compute_hashes(
hashers={'phashframewise': phash_framewise_hasher})
guid = hashes._df.guid.iloc[0]
df = hashes._df[hashes._df['guid'] == guid]
clip1s = df[(df.transform_name == 'clip1s')]
noop = df[(df.transform_name == 'noop')]
blackpad = df[(df.transform_name == 'blackpad')]
slideshow = df[(df.transform_name == 'slideshow')]
# We should have dropped two hashes from the beginning
# on the clipped video.
assert len(clip1s) == len(noop) - 2
def test_deduplicate():
tempdir = tempfile.TemporaryDirectory()
new_file = os.path.join(tempdir.name, 'dup_file.jpg')
shutil.copy(files[0], new_file)
duplicated_files = files + [new_file]
deduplicated, duplicates = benchmarking.BenchmarkImageDataset.from_tuples(
[(fn, i % 2) for i, fn in enumerate(duplicated_files)]).deduplicate(
hasher=hashers.AverageHash(), threshold=1e-2)
assert len(duplicates) == 1
assert len(deduplicated._df) == len(files)