forked from Senzing/mapper-opensanctions
-
Notifications
You must be signed in to change notification settings - Fork 0
/
senzing_resolver.py
67 lines (54 loc) · 2.06 KB
/
senzing_resolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import json
import click
import logging
from pathlib import Path
from pprint import pprint
from followthemoney.dedupe import Judgement
from nomenklatura.resolver import Resolver, Identifier
from nomenklatura.util import PathLike
log = logging.getLogger("senzing_resolver")
def read_senzing_export(senzing_export: PathLike):
with open(senzing_export, "r") as fh:
while True:
line = fh.readline()
if not line:
break
yield json.loads(line)
@click.command()
@click.argument("senzing_export", type=click.Path(exists=True, file_okay=True))
@click.argument("resolver_file", type=click.Path())
def make_resolver(senzing_export: PathLike, resolver_file: PathLike):
logging.basicConfig(level=logging.INFO)
resolver = Resolver.load(Path(resolver_file).resolve())
for idx, entity in enumerate(read_senzing_export(senzing_export)):
if idx % 10000 == 0 and idx > 0:
log.info("Converting record groups: %d ...", idx)
resolved = entity.pop("RESOLVED_ENTITY", {})
# related = entity.pop("RELATED_ENTITIES", [])
records = resolved.pop("RECORDS", [])
if len(records) == 1:
continue
if len(records) > 1000:
log.warning("Mega-node: %d entities (skipping)", len(records))
continue
target = None
for record in records:
record_id = Identifier.get(record["RECORD_ID"])
if target is None:
target = record_id
continue
if not resolver.check_candidate(target, record_id):
# log.warning("Logic error: %s <> %s", target, record_id)
continue
# log.warning("Match: %s == %s", target, record_id)
target = resolver.decide(
target,
record_id,
judgement=Judgement.POSITIVE,
user="senzing",
)
# pprint((resolved["ENTITY_NAME"], record))
print("done, saving")
resolver.save()
if __name__ == "__main__":
make_resolver()