
import dataset
from sqlalchemy.types import *
from glob import glob
from datetime import datetime

connect_string = 'postgresql://jsalsman@/sctest?host=/var/opt/gitlab/postgresql'

def create_database(dbname='sctest', clobber=False):

    db = dataset.connect(connect_string)

    t = db['users'] # makes primary key autoincrementing integer 'id'
    if clobber: t.drop()
    t.create_column('email', String(length=100))
    t.create_index('email')
    t.create_column('name', String(length=100))
    t.create_index('name')
    t.create_column('registered', DateTime)
    t.create_column('telephone', String(length=30))
    t.create_column('passhash', String(length=144)) # 512 bits as hex plus salt
    t.create_column('utterances', ARRAY(Integer)) # foreign keys: utterances/id
    t.create_column('native', String(length=20)) # native language
    t.create_column('learning', String(length=20)) # goal language
    t.create_column('currentlevel', Float(precision=1))
    t.create_column('goallevel', Float(precision=1))
    t.create_column('goalmonths', Float(precision=1))
    # TODO: billing/payments/methods
    t.create_column('categories', ARRAY(String(length=15)))
    t.create_column('disabled', Boolean)
    t.create_column('students', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('teachers', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('parents', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('schools', ARRAY(Integer)) # foreign keys: schools/id
    t.create_column('about', Text)

    t = db['utterances']
    if clobber: t.drop()
    t.create_column('promptid', Integer) # foreign key: prompts/id
    t.create_index('promptid')
    t.create_column('userid', Integer) # foreign key: users/id
    t.create_index('userid')
    t.create_column('pcm', LargeBinary)
    t.create_column('mp3', LargeBinary)
    t.create_column('wav', LargeBinary)
    t.create_column('exemplar', Boolean)
    t.create_column('adult', Boolean)
    t.create_column('male', Boolean)
    t.create_column('phonemes', ARRAY(String(length=3))) # CMUBET, no dipthongs
    t.create_column('alignment', ARRAY(Float(precision=3)))
    t.create_column('features', ARRAY(Float(precision=3))) # ten per phoneme +1
    t.create_column('worstphonemeposn', Integer)
    t.create_column('worstdiphoneposn', Integer)
    t.create_column('worstphoneme', String(length=3))
    t.create_column('worstdiphone', String(length=6))
    t.create_column('score', Float(precision=2))
    t.create_column('transcriptions', ARRAY(Integer)) # fk.s: transcriptions/id
    t.create_column('at', DateTime)

    t = db['log']
    if clobber: t.drop()
    t.create_column('userid', Integer) # foreign key: users/id
    t.create_index('userid')
    t.create_column('at', DateTime)
    t.create_index('at')
    t.create_column('event', String(length=20))
    t.create_index('event')
    t.create_column('address', String(length=300)) # IPv4 or v6["="domain name]
    t.create_index('address')
    t.create_column('details', Text)
    t.create_column('mediatype', Text)
    t.create_column('media', LargeBinary)

    t = db['words']
    if clobber: t.drop()
    t.create_column('spelling', String(length=30))
    t.create_index('spelling')
    t.create_column('phonemes', ARRAY(String(length=3))) # CMUBET, no dipthongs
    t.create_column('homographs', ARRAY(Integer)) # foreign keys: words/id
    t.create_column('homophones', ARRAY(Integer)) # foreign keys: words/id
    t.create_column('speechrank', Integer) # nth most common spoken word
    t.create_column('speechpart', String(length=1)) # see below
    #
    # TEMPORARY LITERAL HOMOPHONES:
    t.create_column('homops', ARRAY(Text))
#
# speechpart codes:
#
# q: quantifier
# n: noun
# v: verb
# x: negative
# w: adverb
# m: adjective
# o: pronoun
# s: possessive
# p: preposition
# c: conjunction
# a: article

    t = db['prompts']
    if clobber: t.drop()
    t.create_column('display', Text)
    t.create_index('display')
    t.create_column('language', String(length=20))
    t.create_column('words', ARRAY(Integer)) # foreign keys: words/id
    t.create_column('utterances', ARRAY(Integer)) # foreign keys: utterances/id
    t.create_column('mediatype', Text)
    t.create_column('media', LargeBinary)
    t.create_column('level', Float(precision=1))
    t.create_column('phonemes', ARRAY(String(length=3))) # CMUBET, no dipthongs
    t.create_column('freeform', Boolean)

    t = db['transcriptions']
    if clobber: t.drop()
    t.create_column('utteranceid', Integer) # foreign key: utterances/id
    t.create_column('promptid', Integer) # foreign key: prompts/id
    t.create_index('promptid')
    t.create_column('transcription', Text)
    t.create_column('intelligible', Boolean)
    t.create_column('userid', Integer) # foreign key: users/id
    t.create_column('transcribed', DateTime)
    t.create_column('source', String(length=1)) # (m)turk (p)eer (s)urvey (a)d
    t.create_column('mturkuid', String(length=30)) #was length=14 until 21 seen
    t.create_column('mturkhit', String(length=35)) #was length=30

    t = db['choices'] # no index except 'id'
    if clobber: t.drop()
    t.create_column('question', Text)
    t.create_column('responses', ARRAY(Integer)) # foreign keys: responses/id
    t.create_column('activity', Text) #e.g. CMS assignment URL (w/type prefix?)
    t.create_column('mediatype', Text)
    t.create_column('media', LargeBinary)
    t.create_column('proofread', Boolean)
    t.create_column('disabled', Boolean)
    t.create_column('level', Float(precision=1))

    t = db['responses']
    if clobber: t.drop()
    t.create_column('choiceid', Integer) # foreign key: choices/id
    t.create_column('promptid', Integer) # foreign key: prompts/id
    t.create_column('resultid', Integer) # foreign key: choices/id
    t.create_column('terminal', Boolean)
    t.create_column('activity', Text) # e.g. assignment URL (with type prefix?)
    t.create_column('action', Text) # javascript?

    t = db['lessons']
    if clobber: t.drop()
    t.create_column('name', Text)
    t.create_column('choices', ARRAY(Integer)) # foreign keys: choices/id
    t.create_column('level', Float(precision=1))

    t = db['topics']
    if clobber: t.drop()
    t.create_column('name', Text)
    t.create_index('name')
    t.create_column('lessons', ARRAY(Integer)) # foreign keys: lessons/id
    t.create_column('level', Float(precision=1))
    t.create_index('level')

    t = db['schools']
    if clobber: t.drop()
    t.create_column('name', String(length=100))
    t.create_index('name')
    t.create_column('students', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('teachers', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('admins', ARRAY(Integer)) # foreign keys: users/id
    t.create_column('telephone', String(length=30))
    t.create_column('about', Text)


def load_database(dbname='sctest', clobber=False):
    db = dataset.connect(connect_string)
    wt = db['words']
    pt = db['prompts']
    ut = db['utterances']
    tt = db['transcriptions']
    if clobber:
        print('deleting words, prompt, utterance, and transcription data...')
        wt.delete()
        pt.delete()
        ut.delete()
        tt.delete()
        print('...data deleted')
    pris = {}
    for pr in sorted(glob('../database/db/p??????.txt')):
        with open(pr, 'r') as f:
            ps = f.read().strip().replace(' .','.').replace(' ,',','
                    ).replace(' ?','?').replace(' !','!')
        pri = pr.replace('../database/db/p','').replace('.txt','')
        pris[pri] = ps
        ws = ps.replace(',','').replace('-',' ').replace('.',''
                ).replace('!','').replace('?','').lower().split()
        wids = []
        for wd in ws:
            w = wt.find_one(spelling=wd)
            if w == None:
                wid = wt.insert(dict(spelling=wd))
                print('word:', wd, 'id:', wid)
            else:
                wid = w['id']
            wids.append(wid)
        pid = pt.insert(dict(display=ps, words=wids, language='en',
            freeform=False)) # gets utterances= below
        print('prompt:', ps, ws, 'id:', pid)
        # TODO: phonemes=["__",...] (SANS DIPHTHONGS)
        uids = []
        for us in sorted(glob('../database/*-mp3s/p'+pri+'s??????.mp3')):
            with open(us, 'rb') as f:
                mp3 = f.read()
            uid = ut.insert(dict(promptid=pid, mp3=mp3, exemplar=False,
                adult=False, at=datetime(2017,5,1)))
            # gets transcriptions=[id,...] below;
            # TODO: phonemes=["__",...], alignment=..., features=[featex-vta10]
            print (us, 'len:', len(mp3), 'id:', uid)
            uids.append(uid)
            tids = []
            for ts in sorted(glob(us.replace('.mp3','n*.txt'
                ).replace('shorter-mp3s','db').replace('longer-mp3s','db'))):
                with open(ts, 'r') as f:
                    tf = f.readlines()
                    tsn = tf[0].strip()
                    mturkuid = tf[2].strip().split()[1]
                    mturkhit = tf[3].strip().split()[1]
                tid = tt.insert(dict(utteranceid=uid, promptid=pid,
                    transcription=tsn, transcribed=datetime(2017,9,1),
                    source='m', mturkuid=mturkuid, mturkhit=mturkhit))
                    # TODO: intelligible=boolean
                print('transcription id', tid, 'was:', tsn)
                tids.append(tid)
            ut.update(dict(id=uid, transcriptions=tids), ['id'])
            print(len(tids), 'transcriptions')
        pt.update(dict(id=pid, utterances=uids), ['id'])
        print(len(uids), 'utterances')
    print(len(pris), 'prompts:', pris)


def load_homophones_and_phonemes(spelling_schema=True):
    db = dataset.connect(connect_string)
    if spelling_schema:
        t = db['words']
        t.create_column('homops', ARRAY(Text))
    # load words, prompts, and utterances, in that order


def load_exemplars():
    pass # load exemplars

# load_homographs()?

def load_iscores():
    pass

def load_cscores():
    pass

# def diphone_remix()

# user input

# login

# learner status

# logout?

# sequencing


# multiple choices


## print('replacing schema...')
## create_database(clobber=True)
## print('...schema replaced')
##
## print('loading database...')
## load_database(clobber=True)
## print('...database loaded')

print('loading homophones and phonemes...')
load_homophones_and_phonemes()
print('...homophones and phonemes loaded')

#print('loading exemplars...')
#load_exemplars()
#print('...exemplars loaded')
