-
Notifications
You must be signed in to change notification settings - Fork 0
/
entitylist-parse.py
executable file
·66 lines (50 loc) · 1.8 KB
/
entitylist-parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
# -*- mode: python -*-
#
# Parser for the Mozilla tracking protection entity list:
# https://github.com/mozilla-services/shavar-prod-lists/blob/master/disconnect-entitylist.json
#
# WARNING WARNING WARNING
#
# This parser doesn't have any error checking, do not use it with
# untrusted input! It's only meant for debugging purposes.
import argparse
import json
import os
import sys
first_parties = {}
def parse_category(category):
for i in range(len(category)):
for org in category[i]:
for url in category[i][org]:
if url[:7] != 'http://' and url[:8] != 'https://':
continue
for tracker in category[i][org][url]:
#print("%s (%s): %s" % (org, url, tracker))
trackers.add(tracker)
def parse_file(jsonfile):
with open(jsonfile, 'r') as f:
data = json.loads(f.read())
for org in data:
resources = []
for resource in data[org]['resources']:
resources.append(resource)
for property in data[org]['properties']:
#print("%s: %s = %s" % (org, property, ",".join(resources)))
first_parties[property] = sorted(resources)
def print_first_parties():
print("nb_first_parties = " + str(len(first_parties)))
for v in sorted(first_parties):
print("%s: %s" % (v, ",".join(first_parties[v])))
def main():
parser = argparse.ArgumentParser()
parser.add_argument('jsonfile', type=str, help='the file to dump')
args = parser.parse_args()
# Validate the parameters
if not os.path.isfile(args.jsonfile):
print("Error: '%s' not found" % args.jsonfile, file=sys.stderr)
return 1
parse_file(args.jsonfile)
print_first_parties()
return 0
exit(main())