import sys import re dump = len(sys.argv)>1 and sys.argv[1]=='-d' titleRE = re.compile('(.*)') subsupRE = re.compile('</?su[pb]>') pageEndRE = re.compile('') supOc = 0 supCc = 0 subOc = 0 subCc = 0 title ="" for line in sys.stdin: m = titleRE.search(line) if m : title = m.group(1) supOc = 0 supCc = 0 subOc = 0 subCc = 0 if dump : print line a = subsupRE.findall(line) c1 = a.count('<sub>') c2 = a.count('</sub>') c3 = a.count('<sup>') c4 = a.count('</sup>') subOc += c1 subCc += c2 supOc += c3 supCc += c4 if dump and (c1!=c2 or c3!=c4) : print c1,c2,c3,c4,line.replace('<','<').replace('>','>') if pageEndRE.search(line) and ( supOc > 0 or supCc > 0 or subOc > 0 or subCc > 0) : if dump : print title, supOc, supCc, subOc, subCc elif supOc <> supCc or subOc <> subCc : print title