#! /usr/bin/python # -*- coding: UTF-8 -*- import re import subprocess import urllib2 import urlparse def main(): indexurl = 'https://tools.wmflabs.org/admin/tools' html = urllib2.urlopen(indexurl).read() for tool in re.findall(r'', html): tool = urlparse.urljoin(indexurl, tool) process = subprocess.Popen(['slimerjs-0.10.3/slimerjs', 'do-tool.js', tool], stdin=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) while process.poll() is None: # for line in process.stdout.readlines(): # http://bugs.python.org/issue3907 while True: line = process.stdout.readline() if not line: break line = line.strip() parsed = urlparse.urlparse(line) if not re.search(r'\.(wikimedia|wmflabs|wikipedia)\.org(:(80|443))?$', parsed.netloc): print '%s: %s' % (tool, line) if __name__ == '__main__': main()