-
Notifications
You must be signed in to change notification settings - Fork 13
/
buildmap.py
326 lines (272 loc) · 10.9 KB
/
buildmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
"""
Intake a map of IP prefixes -> AS numbers and output instructions that will
allow a decoder to match an IP address to an ASN by following a sequence
of instructions.
The instructions describe a prefix tree that can be navigated using the bits of
an IP address (i.e. 0 for left child, 1 for right child, leaf nodes
corresponding to a given ASN). The types of instructions are denoted by the
*Type() functions defined below. Once an IP address specifies a bit for which
there is no path in the tree (i.e. the part of its address more specific than
any known network prefix), the tree returns a "default" ASN value that has been
set based on the last valid location in the tree.
See `testmap.py:Interpret` for an illustration of how the decoding process
works.
Before the prefix tree is encoded into instructions using bits, it is compacted
(e.g. duplicate subtrees removed) and annotated with which default ASN values
should be set for particular regions of the tree.
"""
import sys
import ipaddress
from collections import namedtuple
def Parse(entries: list):
"""
Read in a file of the format
1.0.0.0/24 AS13335 # ipv4.dump:4856343
1.0.4.0/22 AS56203 # ipv4.dump:2759291
...
Ignoring comments following '#'. Creates an Entry object for each line.
Maps IPv4 networks into IPv6 space.
Args:
entries: modified in place with the new Entrys.
"""
for line in sys.stdin:
line = line.split('#')[0].lstrip(' ').rstrip(' \r\n')
prefix, asn = line.split(' ')
assert(len(asn) > 2 and asn[:2] == "AS")
network = ipaddress.ip_network(prefix)
prefix_len = network.prefixlen
net_addr = int.from_bytes(network.network_address.packed, 'big')
# Map an IPv4 prefix into IPv6 space.
if isinstance(network, ipaddress.IPv4Network):
prefix_len += 96
net_addr += 0xffff00000000
entries.append(Entry(prefix_len, net_addr, int(asn[2:])))
Entry = namedtuple('Entry', (
# The length of the network prefix in bits. E.g. '26' for 255.255.0.0/26.
'prefix_len',
# An int containing the bits of the network address.
'net_addr',
# An int for the autonomous system (AS) number.
'asn',
))
def UpdateTree(gtree, addrlen: int, entries: [Entry]):
"""
Returns a prefix tree such that following a path down through the
tree based on the bits of a network prefix (in order of most significant
bit) leads to an ASN.
Args:
gtree: tree structure to encode the mappings into. Modified in-place.
addrlen: The maximum number of bits in a network address.
This is 128 for IPv6 (16 bytes).
entries: The network prefix -> ASN mappings to encode.
"""
for prefix, val, asn in sorted(entries):
tree = gtree
default = None
# Iterate through each bit in the network prefix, starting with the
# most significant bit.
for i in range(prefix):
bit = (val >> (addrlen - 1 - i)) & 1
# If we have passed the end of the network prefix, all entries
# under subsequent bits will be associated with the same ASN.
needs_inner = i < prefix - 1
if tree[bit] is None:
if needs_inner:
tree[bit] = [default, default]
tree = tree[bit]
continue
else:
tree[bit] = asn
break
if isinstance(tree[bit], list):
assert(needs_inner)
tree = tree[bit]
continue
assert(isinstance(tree[bit], int))
if tree[bit] == asn:
break
if not needs_inner:
tree[bit] = asn
break
default = tree[bit]
tree[bit] = [default, default]
tree = tree[bit]
return gtree
def CompactTree(tree, approx=True) -> (list, set):
"""
Remove redundancy from a tree.
E.g. if all nodes in a subtree point to the same ASN, compact the subtree
into a single int.
Returns:
(the compacted tree, a set of all ASNs in the tree)
Args:
approx: if True, unassigned ranges may get reassigned to arbitrary ASNs.
"""
num = 0
if tree is None:
return (tree, set())
if isinstance(tree, int):
return (tree, set([tree]))
tree[0], leftas = CompactTree(tree[0], approx)
tree[1], rightas = CompactTree(tree[1], approx)
allas = leftas | rightas
if len(allas) == 0:
return (None, allas)
if approx and len(allas) == 1:
return (list(allas)[0], allas)
if isinstance(tree[0], int) and isinstance(tree[1], int) and tree[0] == tree[1]:
return tree[0], set([tree[0]])
return (tree, allas)
def PropTree(tree, approx=True) -> (list, Counter, bool):
"""
Annotate internal nodes in the tree with the most common leafs below it.
The binary serialization later uses this.
This changes the shape of the `tree` datastructure from
`[left_child, right_child]` to `[lc, rc, max_ASN_in_tree]`.
Returns:
(tree, Counter of ASNs in tree, whether or not tree is empty)
"""
if tree is None:
return (tree, Counter(), True)
if isinstance(tree, int):
return (tree, Counter({tree: 1}), False)
tree[0], leftcnt, leftnone = PropTree(tree[0], approx)
tree[1], rightcnt, rightnone = PropTree(tree[1], approx)
allcnt = leftcnt + rightcnt
allnone = leftnone | rightnone
maxasn, maxcount = allcnt.most_common(1)[0]
if maxcount is not None and maxcount >= 2 and (approx or not allnone):
return ([tree[0], tree[1], maxasn], Counter({maxasn: 1}), allnone)
return (tree, allcnt, allnone)
def EncodeBits(val, minval, bit_sizes) -> [int]:
"""
Perform a variable-length encoding of a value to bits, least significant
bit first.
For each `bit_sizes` passed, attempt to encode the value with that number
of bits + 1. Normalize the encoded value by `minval` to potentially save
bits - the value will be corrected during decoding.
Returns:
a list of bits representing the value to encode.
"""
val -= minval
ret = []
for pos in range(len(bit_sizes)):
bit_size = bit_sizes[pos]
# If the value will not fit in `bit_size` bits, absorb the largest
# value for this bitsize and continue to the next smallest size.
if val >= (1 << bit_size):
val -= (1 << bit_size)
ret += [1]
else:
# If we aren't encoding the largest possible value per the largest
# bitsize...
if (pos + 1 < len(bit_sizes)):
ret += [0]
# Use remaining bits to encode the rest of val.
for b in range(bit_size):
ret += [(val >> (bit_size - 1 - b)) & 1]
return ret
# Couldn't fit val into any of the bit_sizes
assert(False)
def MatchType() -> [int]:
"""
The match instruction descends into the tree based on a bit path. If at any
point the match fails to hit a valid path through the tree, it will fail
and return the current default ASN (which changes as we move through the
tree).
"""
return EncodeType(2)
def JumpType() -> [int]:
"""
The jump instruction allows us to quickly seek to one side of the tree
or the other. By encoding the length of the left child, we can skip over
it to the right child if need be.
"""
return EncodeType(1)
def LeafType() -> [int]:
"""The leaf instruction encodes an ASN at the end of a bit path."""
return EncodeType(0)
def SetNewDefaultType() -> [int]:
"""
This instruction establishes a new default ASN to return should we fail
while traversing this path.
"""
return EncodeType(3)
def EncodeType(v) -> [int]:
return EncodeBits(v, 0, [0, 0, 1])
def EncodeASN(v) -> [int]:
# It's reasonable to ask why "15" (indicating 16 bits) is the minimum size
# we might try to pack an ASN into, given there are many ASNs below 2**16.
#
# The reason that we start at 15 here is because we want the first bitsize
# we specify to contain ~50% of the values we are trying to encode - this
# is because each separate bitsize we try will add a digit to our encoded
# values, so we simultaneously want to minimize the number of bitsizes we
# allow while also minimizing the bit length of the encoded data, which
# is a trade-off.
return EncodeBits(v, 1, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24])
def EncodeMatch(v) -> [int]:
return EncodeBits(v, 2, [1, 2, 3, 4, 5, 6, 7, 8])
def EncodeJump(v) -> [int]:
return EncodeBits(v, 17, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])
def EncodeBytes(bits) -> [int]:
"""Encode a sequence of bits as a sequence of bytes."""
val = 0
nbits = 0
bytes = []
for bit in bits:
val += (bit << nbits)
nbits += 1
if (nbits == 8):
bytes += [val]
val = 0
nbits = 0
if nbits:
bytes += [val]
return bytes
def TreeSer(tree, default):
match = 1
assert(tree is not None)
assert(not (isinstance(tree, int) and tree == default))
# If one side of the tree is empty (i.e. represents a path without
# choices), encode a match instruction up to 8 bits.
while isinstance(tree, list) and match <= 0xFF:
if tree[0] is None or tree[0] == default:
match = (match << 1) + 1
tree = tree[1]
elif tree[1] is None or tree[1] == default:
match = (match << 1) + 0
tree = tree[0]
else:
break
if match >= 2:
return MatchType() + EncodeMatch(match) + TreeSer(tree, default)
# Leaf node: return the ASN.
if isinstance(tree, int):
return LeafType() + EncodeASN(tree)
# Return the tree along with a new "default" ASN value should we fail to
# match while along this path.
if len(tree) > 2 and tree[2] != default:
return SetNewDefaultType() + EncodeASN(tree[2]) + TreeSer(tree, tree[2])
left = TreeSer(tree[0], default)
right = TreeSer(tree[1], default)
# Start the program by specifying a possible jump to either child of the
# first node.
return JumpType() + EncodeJump(len(left)) + left + right
def BuildTree(entries, approx=True):
tree = [None, None]
tree = UpdateTree(tree, 128, entries)
return tree
entries: [Entry] = []
print("[INFO] Loading", file=sys.stderr)
Parse(entries)
print("[INFO] Read %i prefixes" % len(entries), file=sys.stderr)
print("[INFO] Constructing trie", file=sys.stderr)
tree = BuildTree(entries)
print("[INFO] Compacting tree", file=sys.stderr)
tree, _ = CompactTree(tree, True)
print("[INFO] Computing inner prefixes", file=sys.stderr)
tree, _, _ = PropTree(tree, True)
ser = TreeSer(tree, None)
print("[INFO] Total bits: %i" % (len(ser)), file=sys.stderr)
sys.stdout.buffer.write(bytes(EncodeBytes(ser)))