-
Notifications
You must be signed in to change notification settings - Fork 8
/
example.py
171 lines (146 loc) · 4.89 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Example pywikibot code description here
# Mike Peel 17-Sep-2018 v1 - start
# Import modules
import pywikibot
from pywikibot import pagegenerators
from pywikibot.data import api
import numpy as np
import requests
# You may need to enforce the use of utf-8
import sys
reload(sys)
sys.setdefaultencoding('UTF8')
# Connect to ptwiki
ptwiki = pywikibot.Site('pt', 'wikipedia')
# and then to wikidata
ptwiki_repo = ptwiki.data_repository()
def editarticle(page):
text = page.get()
text = text + "\nThis is a test edit"
page.text = text
try:
page.save("Saving test edit")
return 1
except:
print "That didn't work!"
return 0
def editarticle2(page):
text = page.get()
text = text.replace('This is a test edit','Isto é uma edição de teste')
page.text = text
try:
page.save("Saving test edit")
return 1
except:
print "That didn't work!"
return 0
def printwikidata(wd_item):
qid = wd_item.title()
print qid
item_dict = wd_item.get()
try:
print 'Name: ' + item_dict['labels']['en']
except:
print 'No English label!'
try:
print 'ptwiki article: ' + item_dict['sitelinks']['ptwiki'].title
except:
print 'No Portuguese article!'
try:
print item_dict['claims']['P31']
except:
print 'No P31'
try:
for claim in item_dict['claims']['P31']:
p31_value = claim.getTarget()
p31_item_dict = p31_value.get()
print 'P31 value: ' + p31_value.title()
print 'P31 label: ' + p31_item_dict['labels']['en']
except:
print "That didn't work!"
return 0
def editwikidata(wd_item, propertyid, value):
qid = wd_item.title()
print qid
item_dict = wd_item.get()
claim_target = pywikibot.ItemPage(ptwiki_repo, value)
newclaim = pywikibot.Claim(ptwiki_repo, propertyid)
newclaim.setTarget(claim_target)
print newclaim
text = raw_input("Save? ")
if text == 'y':
wd_item.addClaim(newclaim, summary=u'Adding test claim')
return 0
def parsesite(url):
try:
r = requests.get(url)
websitetext = r.text
except:
print 'Problem fetching page!'
return 0
# print websitetext
split = websitetext.split("<h1 style='display:none'>")
i = 0
for item in split:
i+=1
# Skip the top part
if i > 2:
# print item
print 'Title: ' + item.split('</h1>')[0].strip() + '\n'
print 'Museum: ' + item.split("strong>Museu:</strong><span itemprop='publisher'>")[1].split("</span>")[0].strip() + "\n"
return 0
# From https://gist.github.com/ettorerizza/7eaebbd731781b6007d9bdd9ddd22713
def search_entities(site, itemtitle):
params = { 'action' :'wbsearchentities',
'format' : 'json',
'language' : 'en',
'type' : 'item',
'search': itemtitle}
request = api.Request(site=site, parameters=params)
return request.submit()
# Page must exist already!
page = pywikibot.Page(ptwiki, 'Usuário(a):Mike_Peel/teste')
test = editarticle(page)
print test
test = editarticle2(page)
page = pywikibot.ItemPage(ptwiki_repo, 'Q511405')
test = printwikidata(page)
sparql = "SELECT ?item WHERE { ?item wdt:P31 wd:Q184356 } LIMIT 10"
generator = pagegenerators.WikidataSPARQLPageGenerator(sparql, site=ptwiki_repo)
for page in generator:
printwikidata(page)
targetcat = 'Categoria:Telescópios'
cat = pywikibot.Category(ptwiki, targetcat)
subcats = pagegenerators.SubCategoriesPageGenerator(cat, recurse=False);
for subcat in subcats:
print subcat.title()
pages = pagegenerators.CategorizedPageGenerator(cat, recurse=False);
for page in pages:
print page.title()
template = pywikibot.Page(ptwiki, 'Predefinição:Info/Telescópio')
targets = template.embeddedin()
for target in targets:
print target.title()
targets = pagegenerators.RandomPageGenerator(total=10, site=ptwiki, namespaces='14')
for target in targets:
print target.title()
wikidataEntries = search_entities(ptwiki_repo, "Neuromat")
if wikidataEntries['search'] != []:
results = wikidataEntries['search']
numresults = len(results)
for i in range(0,numresults):
qid = results[i]['id']
label = results[i]['label']
print qid + " - " + label
# Do a test edit to Wikidata
testqid = 'Q4115189' # Wikidata sandbox
testproperty = 'P31' # instance of
testvalue = 'Q3938' # Sandbox
wd_item = pywikibot.ItemPage(ptwiki_repo, testqid)
dictionary = wd_item.get()
print(dictionary)
print(api.normalize_paraminfo(dictionary))
# print editwikidata(wd_item, testproperty, testvalue)
# parsesite('http://www.museusdoestado.rj.gov.br/sisgam/index.php?pagina=1&operador=or&busca=a%20b%20c%20d%20e%20f%20g%20h%20i%20j%20k%20l%20m%20n%20o%20p%20q%20r%20s%20t%20u%20v%20w%20x%20y%20z&museu=todos&qresultados=40')