Last active 1763486415

Revision 17b42dc45780ec7c54ea2fce8018e99e5ea5a611

phash.py Raw
1#!/usr/bin/env -S uv run --script
2# /// script
3# requires-python = ">=3.12"
4# dependencies = [
5# "imagehash"
6# ]
7# ///
8
9from PIL import Image
10import imagehash
11import numpy
12
13def net_phash(image):
14 # type: (Image.Image) -> imagehash.ImageHash
15 """
16 Modified phash implementation to match the output of https://github.com/coenm/ImageHash
17
18 --
19 Perceptual Hash computation.
20
21 Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
22
23 @image must be a PIL instance.
24 """
25 if hash_size < 2:
26 raise ValueError('Hash size must be greater than or equal to 2')
27
28 import scipy.fftpack
29 hash_size = 8
30 img_size = 64
31 image = image.convert('L').resize((img_size, img_size), Image.Resampling.BICUBIC)
32 pixels = numpy.asarray(image, dtype=numpy.float64)
33 rows_dct = scipy.fftpack.dct(pixels, axis=1, norm='ortho')
34 cols_dct_full = scipy.fftpack.dct(rows_dct[:, :hash_size], axis=0, norm='ortho')
35 dctlowfreq = cols_dct_full[:hash_size, :].T
36 med = numpy.median(dctlowfreq)
37 diff = dctlowfreq > med
38 return imagehash.ImageHash(diff)
39
40def ulong_to_hash(value: int):
41 """
42 Converts a 64-bit unsigned long integer into an ImageHash object.
43
44 The integer is transformed into an 8x8 binary numpy array, which is the
45 standard representation for many perceptual hashes.
46
47 :param value: The 64-bit integer hash value.
48 Must be between 0 and 2**64 - 1.
49 :return: An ImageHash object representing the integer.
50 """
51 if not (0 <= value < 2**64):
52 raise ValueError("Value must be a 64-bit unsigned integer (between 0 and 2**64 - 1).")
53
54 # 1. Convert the integer to its 64-bit binary string representation.
55 # numpy.binary_repr is perfect for this, as it handles padding.
56 # Example: 1 -> '000...001'
57 binary_string = numpy.binary_repr(value, width=64)
58
59 # 2. Convert the binary string into a numpy array of booleans.
60 # '1' becomes True, '0' becomes False.
61 binary_array = numpy.array([char == '1' for char in binary_string], dtype=bool)
62
63 # 3. Reshape the 64-element flat array into an 8x8 matrix. This is a
64 # common convention for image hashes (e.g., ahash, phash).
65 hash_matrix = binary_array.reshape((8, 8))
66
67 # 4. Create and return the ImageHash instance.
68 return imagehash.ImageHash(hash_matrix)
69
70hash = net_phash(Image.open('peppers.png'))
71hash2 = ulong_to_hash(15500626565295817037)
72print(hash - hash2)