import re import json import requests domain = 'https://fr.wikipedia.org' page_url = '/w/index.php?title=MediaWiki:Spam-blacklist&action=raw' response = requests.get(domain + page_url) spam_text = response.text spam_entries = re.findall(r'(.*?)\s+#(.*)', spam_text) spam_list = [] for entry in spam_entries: domain, notes = entry if domain: if notes: domain = domain.replace('\\b', '').replace('\\.', '.').replace('\\-', '-') special_chars = '()[]{}*\\' special_chars_in_domain = [char for char in special_chars if char in domain] if not special_chars_in_domain: spam_list.append({ "domain": domain.strip(), "notes": notes.strip() }) print(json.dumps(spam_list, ensure_ascii=False, indent='\t'))