| import pandas as pd |
| import numpy as np |
| import csv,pickle,json,bz2 |
| from romtoslp import * |
| loaded_DCS = pickle.load(open('../Simultaneous_DCS_ho.p', 'rb')) |
| folder = '../NewData/skt_dcs_DS.bz2_4K_bigram_mir_heldout/' |
|
|
| def open_dsbz2(filename): |
| with bz2.BZ2File(filename, 'r') as f: |
| loader = pickle.load(f) |
| |
| conflicts_Dict_correct = loader['conflicts_Dict_correct'] |
| nodelist_to_correct_mapping = loader['nodelist_to_correct_mapping'] |
| nodelist_correct = loader['nodelist_correct'] |
| featVMat_correct = loader['featVMat_correct'] |
| featVMat = loader['featVMat'] |
| conflicts_Dict = loader['conflicts_Dict'] |
| nodelist = loader['nodelist'] |
| |
| return (nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\ |
| nodelist, conflicts_Dict, featVMat) |
|
|
| |
| with open('groundtruth2.csv','w') as fh: |
| rd = csv.writer(fh) |
| rd.writerow(['File','Lemma','CNG','lemmaCorr','lemmaCNGcorr','predCNG','Conflicts']) |
| count=0 |
| for ii in range(4): |
| with open("BM2_NLoss_proc"+str(ii)+".csv",'r') as fh: |
| rd = csv.reader(fh) |
| while(True): |
| try: |
| print(count) |
| count+=1 |
| x=next(rd) |
| sentid = x[0] |
| dcsobj = loaded_DCS[str(sentid)+'.p2'] |
| |
| |
| |
| nodelist_correct, conflicts_Dict_correct, featVMat_correct, nodelist_to_correct_mapping,\ |
| nodelist, conflicts_Dict, featVMat = open_dsbz2(folder+str(sentid)+'.ds.bz2') |
| |
| |
| |
| dll = 0 |
| for i in dcsobj.lemmas: |
| dll+=len(i) |
| if(dll!=len(nodelist_correct)): |
| print('here') |
| print(dcsobj.lemmas) |
| print(nodelist_correct) |
| gtlemmas = [] |
| for outerlist in dcsobj.lemmas: |
| for element in outerlist: |
| gtlemmas.append(rom_slp(element)) |
| pdlemmas = x[1:] |
| |
| x=next(rd) |
| gtcngs = [] |
| i = 0 |
| for outerlist in dcsobj.cng: |
| for element in outerlist: |
| gtcngs.append((element,len(conflicts_Dict_correct[i]))) |
| i+=1 |
| pdcngs = x[1:] |
| for i in range(4): |
| x=(next(rd)) |
| |
| |
| |
| |
| pdldict = dict() |
| gtldict = dict() |
| for i in range(len(gtlemmas)): |
| if(gtlemmas[i] in gtldict): |
| gtldict[gtlemmas[i]].append(gtcngs[i]) |
| else: |
| gtldict[gtlemmas[i]] = [gtcngs[i]] |
| |
| for i in range(len(pdlemmas)): |
| if(pdlemmas[i] in pdldict): |
| pdldict[pdlemmas[i]].append(pdcngs[i]) |
| else: |
| pdldict[pdlemmas[i]] = [pdcngs[i]] |
| |
| |
| |
| |
| lemmaround2 = [] |
| cnground2 = [] |
| for gtl in gtldict.keys(): |
| for gtlcng in gtldict[gtl]: |
| lemmacorr = 0 |
| lemmaCNGcorr=0 |
| predictedcng = 'nil' |
| confcount = gtlcng[1] |
| gtlcng = gtlcng[0] |
| if(gtl in pdldict.keys()): |
| if(len(pdldict[gtl])>0): |
| if(gtlcng in pdldict[gtl]): |
| lemmacorr = 1 |
| predictedcng = gtlcng |
| lemmaCNGcorr = 1 |
| pdldict[gtl].remove(gtlcng) |
| with open('groundtruth2.csv','a') as fh: |
| rwd = csv.writer(fh) |
| row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,gtlcng,confcount] |
| rwd.writerow(row) |
| else: |
| lemmaround2.append(gtl) |
| cnground2.append((gtlcng,confcount)) |
| else: |
| with open('groundtruth2.csv','a') as fh: |
| rwd = csv.writer(fh) |
| row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount] |
| rwd.writerow(row) |
| else: |
| with open('groundtruth2.csv','a') as fh: |
| rwd = csv.writer(fh) |
| row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount] |
| rwd.writerow(row) |
| |
| |
| for i in range(len(lemmaround2)): |
| gtl = lemmaround2[i] |
| gtlcng = cnground2[i] |
| confcount = gtlcng[1] |
| gtlcng = gtlcng[0] |
| lemmacorr = 0 |
| lemmaCNGcorr = 0 |
| predictedcng = 'nil' |
| if(gtl in pdldict.keys()): |
| if(len(pdldict[gtl])>0): |
| lemmacorr = 1 |
| predictedcng = pdldict[gtl][0] |
| pdldict[gtl].remove(pdldict[gtl][0]) |
| with open('groundtruth2.csv','a') as fh: |
| rwd = csv.writer(fh) |
| row = [sentid,gtl,gtlcng,lemmacorr,lemmaCNGcorr,predictedcng,confcount] |
| rwd.writerow(row) |
| |
| except Exception as e: |
| print(e) |
| print('been there') |
| |
| continue |
| |