import csv
import json
import os
import sys

csv.field_size_limit(sys.maxsize)


def create_tasks_json(lang, statements_filename, wiki_filename,
                      tasks_filename):
    with open(statements_filename, 'r') as statements_file,\
         open(wiki_filename, 'r') as wiki_file,\
         open(tasks_filename, 'w') as tasks_file:
        statement_reader = csv.reader(
            statements_file, delimiter='\t', quoting=csv.QUOTE_NONE)
        next(statement_reader)
        statements = {}
        for statement_row in statement_reader:
            try:
                task = {
                    'lang': lang,
                    'id': statement_row[0],
                    'revision': statement_row[1],
                    'tid': statement_row[2],
                    'title': statement_row[3],
                    # 'section': statement_row[4],
                    'start': int(statement_row[5]),
                    'offset': int(statement_row[6]),
                    'statement': statement_row[7]
                    # 'paragraph': statement_row[8]
                }
                if task['title'] in statements:
                    statements[task['title']].append(task)
                else:
                    statements[task['title']] = [task]
            except (ValueError, IndexError):
                print('skipping a problematic line')
                continue

        wiki_reader = csv.reader(
            wiki_file, delimiter='\t', quoting=csv.QUOTE_NONE)
        next(wiki_reader)
        for wiki_row in wiki_reader:
            title = wiki_row[0]
            html = wiki_row[1]
            if title in statements:
                for statement in statements[title]:
                    statement['html'] = html[:statement['start']] +\
                                        statement['statement'] +\
                                        html[statement['offset']:]
                    tasks_file.write(json.dumps(statement))
                    tasks_file.write('\n')


def main(argv):
    USAGE = "Usage: %s lang statements.tsv wiki.tsv out.json\n" %\
            os.path.basename(__file__)

    if len(argv) != 5:
        sys.stderr.write(USAGE)
        sys.exit(1)

    create_tasks_json(*argv[1:])


if __name__ == '__main__':
    main(sys.argv)
