-
Notifications
You must be signed in to change notification settings - Fork 0
/
pifind.py
287 lines (253 loc) · 10.4 KB
/
pifind.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""Look for target.png in the digits of π.
This script should be executed in the same directory as an image, target.png,
which should be as small as possible (max around 22x22 pixels) and use
as few distinct colors as possible (max about six).
If the file pi_hex_1b.txt, or pi_hex_1b.zip, as downloaded from
https://archive.org/details/pi_hex_1b is present in the same directory,
it will be used as a source of hexadecimal digits from π. Otherwise, digits
will be fetched from the API provided by Google at https://pi.delivery.
In the case of using the online service, it will continue until stopped
(e.g. Ctrl-C) because there are 50 trillion digits available.
In the case of using pi_hex_1b.txt, it will continue until finishing the
billion digits, which might take a few days.
The best image found so far is saved as found.gif.
"""
import os
import sys
import signal
from itertools import chain
from collections import Counter, deque
from PIL import Image
HEXFILE = 'pi_hex_1b.txt'
ZIPFILE = 'pi_hex_1b.zip'
def PiFileReader(fid, numread=10_000_000):
"""Given a file-like object, containing text hexadecimal digits,
yield lists of numread digits. fid should provide bytes."""
fid.read(2) # skip '3.'. ZipExtFile doesn't support seek until 3.7, so read and discard.
while True:
data = fid.read(numread)
yield data
if len(data) < numread: # hit end of file
break
def PiZipFileReader(filename):
import zipfile
zf = zipfile.ZipFile(filename)
if zf.namelist() != [HEXFILE]:
raise FileNotFoundError(f'zip file does not contain exactly one file, {HEXFILE}, as expected')
fid = zf.open(HEXFILE)
yield from PiFileReader(fid)
def PiDelivery(numread=1000): # 1000 is the max digits supported per request
from urllib import request
import json
base = 'https://api.pi.delivery/v1/pi'
start = 1
while True:
with request.urlopen(f'{base}?start={start}&numberOfDigits={numread}&radix=16') as req:
j = json.load(req)
yield j['content'].encode()
start += numread
def colhex(col):
"""RGB hex code #000000 for a color triple."""
return '#' + ''.join(f'{c:02X}' for c in col)
def colorsummary(C, maxlist=10):
"""Summarize a Counter of RGB colors."""
for i, (col, num) in enumerate(C.most_common(maxlist), 1):
print(f'{i:2}. {colhex(col)}, {num:3}')
def coldist(col1, col2):
"""Simple color distance metric."""
# could be math.dist in Python 3.8+
return sum((a-b)**2 for a,b in zip(col1, col2))
def colpick(colors, col):
"""Given a list of colors, pick the one closest to col."""
return min(colors, key=lambda c: coldist(col, c))
def colavg(wts):
"""Given a dictionary of RGB triples : counts, return an averaged color."""
R = G = B = 0
totct = 0
for (cr, cg, cb), num in wts.items():
R += cr*num
G += cg*num
B += cb*num
totct += num
if not totct:
return (0, 0, 0)
return (round(R/totct), round(G/totct), round(B/totct))
def colorfamilies(C, dist=250):
"""Find sets of colors (from a Counter) which are near each other.
We use a very simple color-nearness metric: the sum of squared differences
between R, G, and B being less than 250.
The returned set of colors is the weighted means of the constitutent
colors in each family.
This does not find the most optimal breakdown, even with such a simple metric,
because it compares each color in the image to the most common color of each family so far,
and does not explore the color space for potential "family centers" closer to more colors.
If you want a better breakdown into few colors, recolor your target image manually!
"""
families = {}
for col, num in C.most_common():
for famcol in families:
if coldist(col, famcol) < dist:
families[famcol][col] = num
break
else: # not close to any family
families[col] = {col: num}
return Counter({colavg(wts) : sum(wts.values()) for wts in families.values()})
def sievecolors(C, numpix):
"""Make a list of the colors in a Counter with non-trivial numbers of pixels.
Ignore colors covering less than 1% of the image.
"""
thresh = max(round(.01*numpix), 2)
return [col for col in C if C[col] > thresh]
def reducecolors(C, numpix):
"""Iterate color family reduction, trying to merge families."""
numcol = len(C)
oldnum = numcol + 1
dist = 250
while 6 < numcol and dist < 1000:
if numcol == oldnum:
dist += 20
oldnum = numcol
C = colorfamilies(C, dist)
numcol = len(C)
return sievecolors(C, numpix)
def recolor(img, colors):
"""Create a version of img with colors limited to those given."""
newimg = Image.new('RGB', img.size)
newimg.putdata([colpick(colors, c) for c in img.getdata()])
return newimg
def printpattern(pat, width, height):
"""Print a list of ints in the shape given."""
lines = [''.join(str(i) for i in pat[width*j:width*(j+1)]) for j in range(height)]
print(*lines, sep='\n')
if os.path.exists(HEXFILE):
pigen = PiFileReader(open(HEXFILE, 'rb'))
elif os.path.exists(ZIPFILE):
pigen = PiZipFileReader(ZIPFILE)
else:
pigen = PiDelivery()
target = Image.open('target.png').convert('RGB')
numpix = target.size[0] * target.size[1] # math.prod in Python 3.8+
C = Counter(target.getdata())
numcol = len(C)
if numcol > 6:
print(f'target.png has {numcol} colors, which is too many.')
colorsummary(C)
print('Should we:',
'1. Limit to the most common {3, 4, 5, or 6} colors?',
'2. Try to identify colors which are close to eachother?',
'3. Quit to edit the image?', sep='\n\t')
choice = input('Choose 1, 2, or 3: ')
if choice == '1':
numcol = int(input('Limit to 3, 4, 5, or 6? '))
if numcol < 3 or numcol > 6:
sys.exit('Invalid choice')
colors = [col for col, num in C.most_common(numcol)]
elif choice == '2':
colors = reducecolors(C, numpix)
numcol = len(colors)
if numcol > 6:
print(f'Sorry, I was only able to reduce it to {numcol} families of nearby colors,'
' which is still too many.')
sys.exit('Please edit target.png manually to have fewer distinct colors.')
else:
sys.exit('Quitting')
print('Saving color-limited file as newtarget.png')
recolor(target, colors).save('newtarget.png')
else:
colors = list(C.keys())
print(f'Selected {numcol} colors for the pattern.')
pattern = [colors.index(colpick(colors, c)) for c in target.getdata()]
colorsummary(Counter(colors[i] for i in pattern))
print('Pattern of colors to try to match:')
printpattern(pattern, *target.size)
def dribble(gen):
for bunch in gen:
for digit in bunch:
yield digit - (48 if digit < 58 else 87)
# converts 0..9, a..f (ASCII 48..57, 97..102) to int
def countmismatch(bytarr):
"""Given a list of 256 lists, with # of each color, determine least mismatches.
This is the number of pixels not matching the 'dominant' (most common) color
for that byte.
Also return the number of "muddled" pixels that mix every color (as a tiebreaker).
"""
mis = 0
muddle = 0
for colcounts in bytarr:
totpix = sum(colcounts) # total pixels mapped by this byte
mis += totpix - max(colcounts) # if we use the byte to map the most frequent color,
# the rest are mismatches.
if min(colcounts): # byte needs to map all colors
muddle += totpix
return mis, muddle
def headline():
"""Print headers for the status line."""
print('Digits checked Best match at Pixel mismatches Bytes in match')
print('-------------- ------------- ---------------- --------------')
def status(pos, minmisrpt, index, bestbytes):
"""Print a status line (overwriting current line)."""
bytepreview = bestbytes[:12].hex()
print(f'\r{pos:14,} {index:13,} {minmisrpt!s:16} {bytepreview}…', end='', flush=True)
def makepalette(bytarr, colors):
"""Create a palette (list of 256 RGB triples) mapping each byte to the weighted
average of its colors.
"""
return [colavg(dict(zip(colors, b))) for b in bytarr]
def saveimg(name, size, palette, byts):
img = Image.new('P', size)
# chain flattens the list of 256 triples to an iterator of 768 values
img.putpalette(chain(*palette))
img.putdata(byts)
img.save(name)
def deferinterrupt(signum, frame):
"""Wait for nice point before quitting, unless re-signaled."""
if deferinterrupt.nomore:
raise KeyboardInterrupt
deferinterrupt.nomore = True
deferinterrupt.nomore = False
def ordinal(n):
"""Ordinal version (1st, 2nd, 3rd, 4th, ...) of n."""
ends = ['th', 'st', 'nd', 'rd', 'th']
if (n % 100) // 10 == 1:
return f'{n:,}th'
return f'{n:,}' + ends[min(n%10, 4)]
cur = deque(maxlen=numpix) # bytes constructed from even-aligned hex digits 3.'24','3f','6a', etc.
other = deque(maxlen=numpix) # bytes constructed from odd-aligned hex digits '32' '43', 'f6', 'a8', etc.
oldhalf = 3
minmisrpt = (numpix, numpix)
signal.signal(signal.SIGINT, deferinterrupt)
signal.signal(signal.SIGTERM, deferinterrupt)
if hasattr(signal, 'SIGBREAK'):
signal.signal(signal.SIGBREAK, deferinterrupt)
if hasattr(signal, 'SIGQUIT'):
signal.signal(signal.SIGQUIT, deferinterrupt)
headline()
message = 'Finished'
for pos, halfbyte in enumerate(dribble(pigen)):
cur, other = other, cur
cur.append(oldhalf << 4 | halfbyte)
oldhalf = halfbyte
if len(cur) < numpix:
continue
bytarr = [[0]*numcol for _ in range(256)]
for b, p in zip(cur, pattern):
bytarr[b][p] += 1
misrpt = countmismatch(bytarr)
if misrpt < minmisrpt:
minmisrpt = misrpt
index = pos - numpix*2 + 1
bestbytes = bytes(cur)
status(pos, minmisrpt, index, bestbytes)
fitpal = makepalette(bytarr, colors)
saveimg('found.gif', target.size, fitpal, bestbytes)
if misrpt[0] == 0:
message = f'Success at {index}'
break
if not pos % 5000:
print(f'\r{pos:14,}', end='', flush=True)
if deferinterrupt.nomore:
message = 'Interrupted'
break
print(f'\n{message}. Best result is saved as found.gif.\n'
f'It contains the {ordinal(index+1)} through {ordinal(index+2*numpix)} hexadecimal digits of π.\n'
f'Equivalently, the {ordinal(4*index+1)} through {ordinal(4*index+8*numpix)} bits.')