#!/usr/bin/python # -*- coding: utf-8 -*- """List archive.org REST API URLs needed to archive all external links from a given wiki to a given domain. Usage: list_archive_links.py """ import bisect import urllib from docopt import docopt from wikitools import wiki, api arguments = docopt(__doc__) site = wiki.Wiki('https://%s/w/api.php' % arguments['']) params = { "action": "query", "format": "json", "list": "exturlusage", "euprop": "url", "euquery": arguments[''], "eulimit": "max", "euexpandurl": 1, } protocols = ['http', 'https']; archive_url = 'https://web.archive.org/save/%s' urls = {} for protocol in protocols: params['euprotocol'] = protocol for result in api.APIRequest(site, params).queryGen(): for item in result['query']['exturlusage']: urls[item['url']] = True for url in urls: print archive_url % url