Index: spellcheck.py =================================================================== --- spellcheck.py (revision 10724) +++ spellcheck.py (working copy) @@ -63,6 +63,7 @@ import wikipedia as pywikibot from pywikibot import i18n import pagegenerators +import json class SpecialTerm(object): @@ -142,7 +143,7 @@ if word[0].isupper(): pywikibot.output(u"c: Add '%s' as correct" % (uncap(word))) pywikibot.output(u"i: Ignore once (default)") - pywikibot.output(u"p: Ignore on this page") + pywikibot.output(u"p: Always ignore on this page") pywikibot.output(u"r: Replace text") pywikibot.output(u"s: Replace text, but do not save as alternative") pywikibot.output(u"g: Guess (give me a list of similar words)") @@ -156,7 +157,10 @@ knownwords[word] = word newwords.append(word) elif answer in "pP": - pageskip.append(word) + try: + pageSkipWords[title].append(word) + except: + pageSkipWords[title] = [word] elif answer in "rRsS": correct = pywikibot.input(u"What should I replace it by?") if answer in "rR": @@ -244,7 +248,6 @@ return result def spellcheck(page, checknames = True, knownonly = False, title=''): - pageskip = [] text = page if correct_html_codes: text = removeHTML(text) @@ -258,7 +261,7 @@ loc += len(match.group(1)) bigword = Word(match.group(2)) smallword = bigword.derive() - if not Word(smallword).isCorrect(checkalternative = knownonly) and \ + if not Word(smallword).isCorrect(checkalternative = knownonly, title=title) and \ (checknames or not smallword[0].isupper()): replacement = askAlternative(smallword, context=text[max(0,loc-40):loc + len(match.group(2))+40], @@ -283,7 +286,6 @@ loc += len(match.group(2)) if correct_html_codes: text = removeHTML(text) - pageskip = [] return text @@ -349,14 +351,14 @@ % (self.derive(), rep, self.word)) return self.word.replace(self.derive(),rep) - def isCorrect(self,checkalternative = False): + def isCorrect(self,checkalternative = False, title = ''): # If checkalternative is True, the word will only be found incorrect if # it is on the spelling list as a spelling error. Otherwise it will # be found incorrect if it is not on the list as a correctly spelled # word. if self.word == "": return True - if self.word in pageskip: + if title in pageSkipWords and self.word in pageSkipWords[title]: return True try: if knownwords[self.word] == self.word: @@ -419,12 +421,12 @@ page.put(text, summary) try: - pageskip = [] edit = SpecialTerm("edit") endpage = SpecialTerm("end page") title = [] knownwords = {} newwords = [] + pageSkipWords = {} start = None newpages = False longpages = False @@ -459,6 +461,7 @@ mysite = pywikibot.getSite() if not checklang: checklang = mysite.language() + filename = pywikibot.config.datafilepath('spelling', 'spelling-' + checklang + '.txt') print "Getting wordlist" @@ -488,6 +491,17 @@ print "Warning! There is no wordlist for your language!" else: print "Wordlist successfully loaded." + + pageSkipWordsFilepath = pywikibot.config.datafilepath('spelling', 'spelling-{}-{}.txt'.format(checklang, mysite.family.name)) + print "Loading the page-specific wordlist for your language and site" + try: + with codecs.open(pageSkipWordsFilepath, 'r', encoding = mysite.encoding()) as f: + pageSkipWords = json.loads(f.read()) + except IOError: + print "Warning! There is no page-specific wordlist for the combination of your language and site!" + else: + print "Page-specific wordlist successfully loaded." + # This is a purely interactive bot, we therefore do not want to put-throttle pywikibot.put_throttle.setDelay(1) except: @@ -520,6 +534,8 @@ title = pywikibot.input(u"Which page to check now? (enter to stop)") finally: pywikibot.stopme() + + # Language-level known words. filename = pywikibot.config.datafilepath('spelling', 'spelling-' + checklang + '.txt') if rebuild: @@ -540,3 +556,7 @@ else: f.write("0 %s %s\n"%(word," ".join(knownwords[word]))) f.close() + + # Known words for a specific page and language. + with codecs.open(pageSkipWordsFilepath, 'w', encoding = mysite.encoding()) as f: + f.write(json.dumps(pageSkipWords))