-
Notifications
You must be signed in to change notification settings - Fork 0
/
ipfsjigsaw.py
executable file
·170 lines (154 loc) · 5.09 KB
/
ipfsjigsaw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/python3
import hashlib
import mmap
import multibase
import os
import re
import subprocess
import sys
import unixfs_pb2
try:
isofile = sys.argv[1]
except Exception:
print("usage: %s ISO" % sys.argv[0])
exit(1)
isohashfile = isofile+".hashes"
if not os.path.isfile(isohashfile):
file_path = os.path.realpath(__file__)
subprocess.run([os.path.dirname(file_path)+"/prehash.py", isofile])
hashfd = open(isohashfile, "r")
hashdict = dict()
while True:
line = hashfd.readline()
if not line:
break
a = line.strip().split(" ")
if not a[2] in hashdict:
hashdict[a[2]] = list()
if int(a[1]) <= 0:
continue
hashdict[a[2]].append([a[0], int(a[1]), a[3]])
hashfd.close()
if len(hashdict) < 70:
print("too small hashdict - aborting - please check")
exit(1)
isofd = open(isofile, "rb")
mm = mmap.mmap(isofd.fileno(), 0, prot=mmap.PROT_READ)
offs = 0
unixfsnode = unixfs_pb2.PBNode()
dataparsed = unixfs_pb2.Data()
dataparsed.Type = unixfs_pb2.Data.DataType.File
dataparsed.filesize = 0
aggregateddata = b""
filesn = 0
paddingn = 0
nonfileblockn = 0
paddinghashes = {}
with subprocess.Popen(["ipfs", "ls", "--size=false", "--resolve-type=false", "bafybeidobwhbiidzroyewbj6vzgudp7zen6x6cjm5xm4j3sp3zmvwzc5u4"], stdout=subprocess.PIPE) as proc:
while(1):
line = proc.stdout.readline()
if not line:
break
m = re.search("^(\\S+) +([0-9]+)$", line.decode())
cid = m.group(1)
file = m.group(2)
paddinghashes[int(file)] = cid
def debug(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def add_chunk(CID, size):
link = unixfs_pb2.PBLink()
link.Hash = multibase.decode(CID)
link.Tsize = size
unixfsnode.Links.extend([link])
dataparsed.filesize += size
dataparsed.blocksizes.extend([size])
def add_block(data):
# store block
with subprocess.Popen(["ipfs", "add", "--pin=false", "--cid-version", "1", "--raw-leaves", "--inline", "-Q"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) as proc:
proc.stdin.write(data)
proc.stdin.close()
CID = proc.stdout.readline().decode()
CID = CID[0:-1]
debug("Adding CID="+CID)
add_chunk(CID, len(data))
return 0
def flush_aggregate():
global aggregateddata
if not aggregateddata:
return
add_block(aggregateddata)
aggregateddata = b""
def add_block_aggregate(data):
# check if we can/should aggregate
if len(data) < 2048 and not re.search(b"[^\000]", data):
flush_aggregate()
CID = paddinghashes[len(data)]
add_chunk(CID, len(data))
return
global aggregateddata
aggregateddata = aggregateddata + data
if len(aggregateddata) >= 256*1024:
flush_aggregate()
while True:
data = isofd.read(2048)
if len(data) == 0:
break
found = False
m = hashlib.sha256(data)
shahash = m.hexdigest()
if shahash in hashdict:
f = hashdict[shahash]
size = f[0][1]
debug("found file at offs %i with %i candidates, size=%i" % (offs, len(f), size)) # debug
fullmatches = list()
for candidate in f:
# find the longest match
size = candidate[1]
if size <= 2048: # small files are already verified
fullmatches.append(candidate)
else:
# for others we need to verify the full hash
m = hashlib.sha256()
m.update(mm[offs:offs+size])
if m.hexdigest() == candidate[2]:
fullmatches.append(candidate)
if len(fullmatches):
largestmatch = fullmatches[0]
for fullmatch in fullmatches[1:]:
if fullmatch[1] > largestmatch[1]:
largestmatch = fullmatch
size = largestmatch[1]
debug("largest match", largestmatch)
flush_aggregate()
add_chunk(largestmatch[0], size)
isofd.seek(offs+size, 0)
# add padding chunk
paddingbytes = (2048-size) % 2048
if paddingbytes > 0:
data = isofd.read(paddingbytes)
debug("add padding chunk of %i bytes" % paddingbytes)
add_block_aggregate(data)
paddingn += 1
offs += size+paddingbytes-2048
found = True
filesn += 1
if not found:
debug("no file found at offset %i" % offs)
# add non-file data chunk (can merge chunks later)
add_block_aggregate(data)
nonfileblockn += 1
offs += 2048
flush_aggregate()
debug("finalizing...")
unixfsnode.Data = dataparsed.SerializeToString()
nodebytes = unixfsnode.SerializeToString()
debug("Got dag-pb bytes=%i files=%i padding=%i nonfile=%i" % (len(nodebytes), filesn, paddingn, nonfileblockn))
if len(nodebytes) >= 1024*1024:
print("The jigsaw DAG object is too large")
exit(1)
with open(isofile+".dag-pb", "wb") as f:
f.write(nodebytes)
f.close()
putcmd = "ipfs --upgrade-cidv0-in-output dag put --pin --input-codec dag-pb --store-codec dag-pb".split(" ")
putcmd.append(isofile+".dag-pb")
subprocess.run(putcmd)