import pywikibot import wikidataStuff.helpers as helpers import wikidataStuff.wdqsLookup as query # retrieve info from Wikidata def process_data(raw_data): """Process the raw output of query.make_select_wdqs_query.""" data = {} for entry in raw_data: idno = entry['id'] image = False if entry.get('P18'): image = True qid = entry['item'][len('http://www.wikidata.org/entity/'):] if idno in data and data[idno]['qid'] != qid: print 'duplicate idno for {}'.format(idno) data[idno] = {'image': image, 'qid': qid} return data wle_items = process_data( query.make_select_wdqs_query( '?item wdt:P3613 ?id', optional_props=['P18'], select_value='id', allow_multiple=True, raw=True)) # retrieve info from Commons commons = pywikibot.Site('commons', 'commons') cat_name = 'Category:Protected areas of Sweden with known IDs' commons_data = {} for member in pywikibot.data.api.QueryGenerator( site=commons, list='categorymembers', cmprop='title|sortkeyprefix', cmtitle=cat_name): if member['sortkeyprefix'] not in commons_data: commons_data[member['sortkeyprefix']] = [] commons_data[member['sortkeyprefix']].append(member['title']) # generate list of bad ids and unused images def quick_output(output_data): for k, v in output_data.iteritems(): print k print u'\t{}'.format(u'\n\t'.join(v)) unknown_ids = {} for k, v in commons_data.iteritems(): if k not in wle_items: unknown_ids[k] = v unused_images = {} for k, v in commons_data.iteritems(): if k in wle_items and not wle_items[k]['image']: key = '{idno} ({qid})'.format(idno=k, qid=wle_items[k]['qid']) unused_images[key] = v # quick_output(unknown_ids) # quick_output(unused_images)