Revision of phash.py

adien revised this gist 1763486415. Go to revision

1 file changed, 3 insertions, 4 deletions

phash.py

			@@ -44,9 +44,8 @@ def to64bit_hash(value: imagehash.ImageHash) -> int:
44	44		return numpy.frombuffer(buffer, dtype='>u8').item()
45	45
46	46		if __name__ == '__main__':
47		-	h = imagehash.phash(Image.open('peppers.png'))
48		-	i = to64bit_hash(h)
	47	+	ph = imagehash.phash(Image.open('peppers.png'))
	48	+	x = to64bit_hash(ph)
49	49
50	50		#round trip test
51		-	print(h == from64bit_hash(i))
52		-	print(i == to64bit_hash(h) == i)
	51	+	print(ph == from64bit_hash(x))

adien revised this gist 1763486259. Go to revision

1 file changed, 10 insertions, 3 deletions

phash.py

			@@ -39,7 +39,14 @@ def from64bit_hash(value: int) -> imagehash.ImageHash:
39	39		# 4. Create and return the ImageHash instance.
40	40		return imagehash.ImageHash(matrix)
41	41
	42	+	def to64bit_hash(value: imagehash.ImageHash) -> int:
	43	+	buffer = numpy.packbits(value.hash.flatten(order='F'))
	44	+	return numpy.frombuffer(buffer, dtype='>u8').item()
	45	+
42	46		if __name__ == '__main__':
43		-	result = imagehash.phash(Image.open('peppers.png'))
44		-	result2 = from64bit_hash(15500626565295817037)
45		-	print(result - result2)
	47	+	h = imagehash.phash(Image.open('peppers.png'))
	48	+	i = to64bit_hash(h)
	49	+
	50	+	#round trip test
	51	+	print(h == from64bit_hash(i))
	52	+	print(i == to64bit_hash(h) == i)

adien revised this gist 1763484952. Go to revision

1 file changed, 3 insertions, 3 deletions

phash.py

			@@ -6,11 +6,11 @@
6	6		# ]
7	7		# ///
8	8
9		-	from PIL import Image
10	9		import imagehash
11	10		import numpy
	11	+	from PIL import Image
12	12
13		-	def from64bit_hash(value: int):
	13	+	def from64bit_hash(value: int) -> imagehash.ImageHash:
14	14		"""
15	15		Converts a 64-bit unsigned long integer into an ImageHash object.
16	16
			@@ -40,6 +40,6 @@ def from64bit_hash(value: int):
40	40		return imagehash.ImageHash(matrix)
41	41
42	42		if __name__ == '__main__':
43		-	result = imagehash.phash(Image.open('pepper.png'))
	43	+	result = imagehash.phash(Image.open('peppers.png'))
44	44		result2 = from64bit_hash(15500626565295817037)
45	45		print(result - result2)

adien revised this gist 1763481893. Go to revision

1 file changed, 1 insertion, 1 deletion

phash.py

			@@ -41,5 +41,5 @@ def from64bit_hash(value: int):
41	41
42	42		if __name__ == '__main__':
43	43		result = imagehash.phash(Image.open('pepper.png'))
44		-	result2 = from64bit_hash(17839823311430827566)
	44	+	result2 = from64bit_hash(15500626565295817037)
45	45		print(result - result2)

adien revised this gist 1763481445. Go to revision

1 file changed, 9 insertions, 36 deletions

phash.py

			@@ -10,38 +10,11 @@ from PIL import Image
10	10		import imagehash
11	11		import numpy
12	12
13		-	def net_phash(image):
14		-	# type: (Image.Image) -> imagehash.ImageHash
15		-	"""
16		-	Modified phash implementation to match the output of https://github.com/coenm/ImageHash
17		-
18		-	--
19		-	Perceptual Hash computation.
20		-
21		-	Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
22		-
23		-	@image must be a PIL instance.
24		-	"""
25		-	if hash_size < 2:
26		-	raise ValueError('Hash size must be greater than or equal to 2')
27		-
28		-	import scipy.fftpack
29		-	hash_size = 8
30		-	img_size = 64
31		-	image = image.convert('L').resize((img_size, img_size), Image.Resampling.BICUBIC)
32		-	pixels = numpy.asarray(image, dtype=numpy.float64)
33		-	rows_dct = scipy.fftpack.dct(pixels, axis=1, norm='ortho')
34		-	cols_dct_full = scipy.fftpack.dct(rows_dct[:, :hash_size], axis=0, norm='ortho')
35		-	dctlowfreq = cols_dct_full[:hash_size, :].T
36		-	med = numpy.median(dctlowfreq)
37		-	diff = dctlowfreq > med
38		-	return imagehash.ImageHash(diff)
39		-
40		-	def ulong_to_hash(value: int):
	13	+	def from64bit_hash(value: int):
41	14		"""
42	15		Converts a 64-bit unsigned long integer into an ImageHash object.
43	16
44		-	The integer is transformed into an 8x8 binary numpy array, which is the
	17	+	The integer is transformed into a 8x8 binary numpy array, which is the
45	18		standard representation for many perceptual hashes.
46	19
47	20		:param value: The 64-bit integer hash value.
			@@ -60,13 +33,13 @@ def ulong_to_hash(value: int):
60	33		# '1' becomes True, '0' becomes False.
61	34		binary_array = numpy.array([char == '1' for char in binary_string], dtype=bool)
62	35
63		-	# 3. Reshape the 64-element flat array into an 8x8 matrix. This is a
64		-	# common convention for image hashes (e.g., ahash, phash).
65		-	hash_matrix = binary_array.reshape((8, 8))
	36	+	# 3. Reshape the 64-element flat array into a 8x8 matrix.
	37	+	matrix = binary_array.reshape((8, 8), order='F')
66	38
67	39		# 4. Create and return the ImageHash instance.
68		-	return imagehash.ImageHash(hash_matrix)
	40	+	return imagehash.ImageHash(matrix)
69	41
70		-	hash = net_phash(Image.open('peppers.png'))
71		-	hash2 = ulong_to_hash(15500626565295817037)
72		-	print(hash - hash2)
	42	+	if __name__ == '__main__':
	43	+	result = imagehash.phash(Image.open('pepper.png'))
	44	+	result2 = from64bit_hash(17839823311430827566)
	45	+	print(result - result2)

adien revised this gist 1763465989. Go to revision

1 file changed, 72 insertions

phash.py(file created)

		@@ -0,0 +1,72 @@
1	+	#!/usr/bin/env -S uv run --script
2	+	# /// script
3	+	# requires-python = ">=3.12"
4	+	# dependencies = [
5	+	# "imagehash"
6	+	# ]
7	+	# ///
8	+
9	+	from PIL import Image
10	+	import imagehash
11	+	import numpy
12	+
13	+	def net_phash(image):
14	+	# type: (Image.Image) -> imagehash.ImageHash
15	+	"""
16	+	Modified phash implementation to match the output of https://github.com/coenm/ImageHash
17	+
18	+	--
19	+	Perceptual Hash computation.
20	+
21	+	Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
22	+
23	+	@image must be a PIL instance.
24	+	"""
25	+	if hash_size < 2:
26	+	raise ValueError('Hash size must be greater than or equal to 2')
27	+
28	+	import scipy.fftpack
29	+	hash_size = 8
30	+	img_size = 64
31	+	image = image.convert('L').resize((img_size, img_size), Image.Resampling.BICUBIC)
32	+	pixels = numpy.asarray(image, dtype=numpy.float64)
33	+	rows_dct = scipy.fftpack.dct(pixels, axis=1, norm='ortho')
34	+	cols_dct_full = scipy.fftpack.dct(rows_dct[:, :hash_size], axis=0, norm='ortho')
35	+	dctlowfreq = cols_dct_full[:hash_size, :].T
36	+	med = numpy.median(dctlowfreq)
37	+	diff = dctlowfreq > med
38	+	return imagehash.ImageHash(diff)
39	+
40	+	def ulong_to_hash(value: int):
41	+	"""
42	+	Converts a 64-bit unsigned long integer into an ImageHash object.
43	+
44	+	The integer is transformed into an 8x8 binary numpy array, which is the
45	+	standard representation for many perceptual hashes.
46	+
47	+	:param value: The 64-bit integer hash value.
48	+	Must be between 0 and 2**64 - 1.
49	+	:return: An ImageHash object representing the integer.
50	+	"""
51	+	if not (0 <= value < 2**64):
52	+	raise ValueError("Value must be a 64-bit unsigned integer (between 0 and 2**64 - 1).")
53	+
54	+	# 1. Convert the integer to its 64-bit binary string representation.
55	+	# numpy.binary_repr is perfect for this, as it handles padding.
56	+	# Example: 1 -> '000...001'
57	+	binary_string = numpy.binary_repr(value, width=64)
58	+
59	+	# 2. Convert the binary string into a numpy array of booleans.
60	+	# '1' becomes True, '0' becomes False.
61	+	binary_array = numpy.array([char == '1' for char in binary_string], dtype=bool)
62	+
63	+	# 3. Reshape the 64-element flat array into an 8x8 matrix. This is a
64	+	# common convention for image hashes (e.g., ahash, phash).
65	+	hash_matrix = binary_array.reshape((8, 8))
66	+
67	+	# 4. Create and return the ImageHash instance.
68	+	return imagehash.ImageHash(hash_matrix)
69	+
70	+	hash = net_phash(Image.open('peppers.png'))
71	+	hash2 = ulong_to_hash(15500626565295817037)
72	+	print(hash - hash2)

Newer Older