-
Notifications
You must be signed in to change notification settings - Fork 8
/
enwp_import_wikidata_shortdesc.py
108 lines (92 loc) · 2.96 KB
/
enwp_import_wikidata_shortdesc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# !/usr/bin/python
# -*- coding: utf-8 -*-
# Synchronise enwp short description and wikidata en descriptions
# Mike Peel 03-Aug-2020 v1 - start
import pywikibot
from pywikibot import pagegenerators
from pywikibot.data import api
def get_pageinfo(site, itemtitle):
params = { 'action' :'query',
'format' : 'json',
'prop' : 'pageprops',
'titles': itemtitle}
request = api.Request(site=site, parameters=params)
return request.submit()
maxnum = 10000
nummodified = 0
debug = True
trip = True
replace_existing = False
wikidata_site = pywikibot.Site("wikidata", "wikidata")
repo = wikidata_site.data_repository() # this is a DataSite object
commons = pywikibot.Site('commons', 'commons')
wikipedia = pywikibot.Site('en', 'wikipedia')
templates = ['Short description', 'short description']
for page in pagegenerators.RandomPageGenerator(total=100, site=wikipedia, namespaces=[0]):
enwiki_description = ''
wikidata_description = ''
if not trip:
if "Varanasi" in page.title():
trip = True
else:
print(page.title())
continue
try:
wd_item = pywikibot.ItemPage.fromPage(page)
item_dict = wd_item.get()
qid = wd_item.title()
except:
print('Huh - no page found')
continue
print("\n" + qid)
print('https://en.wikipedia.org/wiki/'+page.title().replace(' ','_'))
# Get the short description from enwp
test = get_pageinfo(wikipedia,page)
for item in test['query']['pages']:
try:
enwiki_description = test['query']['pages'][item]['pageprops']['wikibase-shortdesc']
except:
null = 0
if len(enwiki_description) > 0:
enwiki_description = enwiki_description[0].lower() + enwiki_description[1:]
else:
continue
# Get the description from Wikidata
try:
wikidata_description = item_dict['descriptions']['en']
except:
null = 0
# Save it to enwiki
if enwiki_description == '' and wikidata_description != '':
target_text = page.get()
print(wikidata_description)
if debug:
test = input('No description, import it?')
else:
test = 'y'
target_text = '{{Short description|' + wikidata_description + '}}' + target_text
if test == 'y':
page.text = target_text
savemessage = 'Importing short description from Wikidata'
page.save(savemessage,minor=False)
nummodified += 1
elif wikidata_description.lower() != enwiki_description.lower() and replace_existing:
# The Wikidata description doesn't match enwp, update it using wikidata
print('enwp: ' + enwiki_description)
print('wikidata: ' + wikidata_description)
if debug:
test = input('Change description?')
else:
test = 'y'
# To improve code to find existing short descriptions
target_text = '{{Short description|' + wikidata_description + '}}' + target_text
mydescriptions = {u'en': enwiki_description}
if test == 'y':
page.text = target_text
savemessage = 'Matching short description from Wikidata'
page.save(savemessage,minor=False)
nummodified += 1
if nummodified > maxnum:
break
print('Done! Edited ' + str(nummodified) + ' entries')
# EOF