-
Notifications
You must be signed in to change notification settings - Fork 1
/
category_check.py
129 lines (110 loc) · 4.61 KB
/
category_check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import argparse
import datetime
import webbrowser
import category_tools
import csv_iterate
import data_reader
def is_residence(location, places):
for name in places:
if name.lower() in location.lower():
return True
return False
def category_check(version, args, *, target_places, category_name=None):
db = data_reader.Database(version)
category_db = category_tools.CategoryDb(version)
residents = set()
for user in csv_iterate.iterate_users(version=version):
for place in (user.birth_location(), user.death_location()):
if place and is_residence(place, target_places):
residents.add(user.user_num())
for marriage in csv_iterate.iterate_marriages(version=version):
if marriage.marriage_location() and \
is_residence(marriage.marriage_location(), target_places):
for user_num in marriage.user_nums():
residents.add(user_num)
print(f"# Residents = {len(residents):_}")
editable_residents = {
user_num for user_num in residents
if db.get(user_num, "privacy_level") >= 60
and (not db.birth_date_of(user_num)
or db.birth_date_of(user_num) >= datetime.date(1500, 1, 1))}
print(f"# Editable residents = {len(editable_residents):_}")
if category_name:
in_category = category_db.list_people_in_category(category_name)
print(f"# in category = {len(in_category):_}")
cat_not_resident = in_category - residents
print(f"# in category, not resident = {len(cat_not_resident):_}")
residents_not_in_cat = residents - in_category
print(f"# Residents not in category = {len(residents_not_in_cat):_}")
# Only list editable residents. Can't fix the private ones.
editable_residents_not_in_cat = editable_residents - in_category
print(f"# Editable residents not in category = {len(editable_residents_not_in_cat):_}")
# Limit to opening 100 URLs!
for person in sorted(editable_residents_not_in_cat)[:100]:
url = f"https://www.wikitree.com/wiki/{db.num2id(person)}"
print(" *", url)
if args.open_links:
webbrowser.open(url)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--version", help="Data version (defaults to most recent).")
parser.add_argument("--open-links", action="store_true")
parser.add_argument("--no-shapinsay", dest="shapinsay", action="store_false")
parser.add_argument("--no-inowroclaw", dest="inowroclaw", action="store_false")
parser.add_argument("--no-kalmar", dest="kalmar", action="store_false")
parser.add_argument("--no-honhardt", dest="honhardt", action="store_false")
args = parser.parse_args()
if args.shapinsay:
print("Shapinsay parish, Orkney, Scotland (pop 1,000)")
category_check(
args.version, args,
category_name="Shapinsay_Parish,_Orkney",
target_places=["Shapinsay"])
print()
if args.inowroclaw:
print("Inowrocław county, Poland (pop 70,000)")
category_check(
args.version, args,
# TODO: There was recently a category change. Wait for things to settle a bit.
# category_name="Inowrocław_County,_Kuyavian-Pomeranian_Voivodeship",
target_places=[
# Inowrocław in various Polish and German spellings
"Inowrocław", "Inowroclaw", "Inowrazlaw", "Hohensalza", "Jungleslau",
# Gminas in Inowrocław county
"Kruszwica", "Kruschwitz",
"Gniewkowo", # "Argenau", TODO: matches Margenau
"Janikowo", # "Amsee", TODO: matches Zallamsee
"Pakość", "Pakosc", "Pakosch",
"Złotniki Kujawskie", "Güldenhof", "Guldenhof",
"Dąbrowa Biskupia", "Luisenfelde",
# "Rojewo", TODO: matches Dobrojewo # "Roneck", TODO: matches Mamaroneck
# Towns in Strelno, Posen that are now in Inowrocław county
"Ludzisko", "Ludzisk",
# Removed: "Polanowitz", "Polanowice" (there are several: https://pl.wikipedia.org/wiki/Polanowice)
# Removed "Piaski" because there's Piaski, Warsaw too :/
# Specific towns in Inowrocław
"Płonkowo", "Plonkowo", "Tuczno",
])
print()
if args.kalmar:
print("Kalmar county, Sweden (pop 240,000)")
category_check(
args.version, args,
# TODO: We really want all subcategories of this ...
# category_name="Kalmar_County",
target_places=["Kalmar"])
print()
if args.honhardt:
print("Honhardt parish, Württemberg, Germany (pop 5,000)")
category_check(
args.version, args,
# TODO: category_name="Honhardt,_Württemberg",
target_places=[
"Honhardt",
# Honhardt is now part of the town Frankenhardt
"Frankenhardt",
# Location within Honhardt
"Hirschhof",
])
print()
main()