From 94aa95b9bbb9c92fcee069a476bf8e9750af9f9b Mon Sep 17 00:00:00 2001 From: skarozis Date: Thu, 30 Jul 2020 15:39:23 +0300 Subject: [PATCH] Add order parameter calculation, correction in whole workflow etc --- CHANGELOG | 14 ++++ README.md | 2 +- main.py | 189 ++++++++++++++++++++++++++++++++++++++++----------- tooba_f.py | 4 +- tooba_gmx.py | 136 +++++++++++++++++++++++++++++++++--- 5 files changed, 293 insertions(+), 52 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2cf8448..26e1613 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.9] - 2020-07-30 +### Added +- calculate rdf profile for all combination of molecules +- create ndx for order parameter calculation +- calculate order parameter properties +- debug save function + +### Changed +- text that is converted to unique hashed id, is now independent of group name and filename +- combine index and gmx ndx function + +### Removed +- None + ## [0.0.8] - 2020-07-17 ### Added - memory optimizations diff --git a/README.md b/README.md index 82e3304..71696e7 100644 --- a/README.md +++ b/README.md @@ -2,4 +2,4 @@ ## Tool for Bilayer in Blocks Analysis (TooBBA) -It is an tool that reads .trr files, perform a domain decomposition (user defined) and analyzes each block in terms of lipids distribution. +It is an tool that reads .trr files, perform a domain decomposition (user defined) and calculates structural properties for each block (domain). The final output constitute a domain dataset, aiming to feed Machine Learning (ML) algorithms and identify coherent groups of observations, sharing the same characteristics and properties. diff --git a/main.py b/main.py index 5890609..83082a4 100644 --- a/main.py +++ b/main.py @@ -29,20 +29,33 @@ TPR=SYSTEM_NAME+'/eq_run.tpr' # surf: Determine surface from atoms (ex. Head of lipid) # vector: Determine vector that fits atoms (ex. Tail of lipid) # tilt: Use surf and vector result to calculate angle (if NAME is COMBINE) -# index: Creates unique code (md5) for every subdomain to use in data saving process +# index+FOLDER: Saves one ndx for every subdomain and creates unique code (md5) +# for every subdomain to use in data saving process +# index_order+FOLDER: Saves one ndx for order parameter calculations for every subdomain +# and creates unique code (md5) for every subdomain to use in data saving process # density: Detrmine density profile of x,y,z and save peaks of directions with the least number -# gmx_ndx: Saves one ndx for every subdomain -# [save, [type], save_name]: Save result of previous function, type: pkl, json +# rdf: Calculate the 2D radial deistribution function g(r) and save peaks of directions with the least number +# order: Calculate the order parameter of atom chain +# [save, [TYPE], SAVE_NAME]: Save result of previous function, type: pkl, json # ################################################### -GROUPS={'ALL':['gmx_ndx','index',['save', ['pkl'],'index'],'density',['save', ['pkl'],'dens']], - 'HD_GROUP':['surf',['save', ['pkl', 'json'],'time_domain_c-normal-cg']], +#todo save function faulty, save last result to existing pkl(s) +GROUPS={'ALL':['index+ALL_ndx',['save', ['pkl'],'index'],'density',['save', ['pkl'],'dens']], + 'HD_GROUP':['surf',['save', ['pkl', 'json'],'surf'],'index+HD_ndx',['save', ['pkl'],'index'],'rdf',['save', ['pkl'],'rdf']], 'TL_GROUP':['vector',['save', ['pkl'],'vec']], + 'ORDER_NS_SPH':['index_order+ORDER_NS_SPH_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], + 'ORDER_NS_ACYL':['index_order+ORDER_NS_ACYL_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], + 'ORDER_FFA':['index_order+ORDER_FFA_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], + 'ORDER_CHOL':['index_order+ORDER_CHOL_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], 'COMBINE':[['HD_GROUP','surf'],['TL_GROUP','vector'],['COMB','tilt'],['save', ['pkl'],'tilt']] } -ALL={'NS':['C6', 'Na', 'P4', 'P3', 'C7','C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['ROH','R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['AC','C1', 'C2', 'C3', 'C4']} +ALL={'NS':['C1', 'C2', 'C3', 'C4', 'C5','C6', 'Na', 'P4', 'P3', 'C7','C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['ROH','R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['AC','C1', 'C2', 'C3', 'C4']} HD_GROUP={'NS':['C6', 'Na', 'P4', 'P3', 'C7'], 'CHOL':['ROH'], 'FFA':['AC']} TL_GROUP={'NS':['C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['C1', 'C2', 'C3', 'C4']} +ORDER_NS_SPH={'NS':['C1', 'C2', 'C3', 'C4', 'C5']} #propable problem with the same atomname of NS, FFA +ORDER_NS_ACYL={'NS':['C8', 'C9', 'C10']} +ORDER_FFA={'FFA':['C1', 'C2', 'C3', 'C4']} +ORDER_CHOL={'CHOL':['R2', 'R3', 'R4', 'R5']} ################################################### ################################################### print(' ') @@ -85,31 +98,44 @@ else: #Check save files if exist in order to skip functions prev=0 sv_index={} +ndx_fl={} for i in GROUPS.keys(): + sv_index[i]={} + ndx_fl[i]={} + cnt_ndx=-1 for j in GROUPS[i]: + cnt_ndx=cnt_ndx+1 try: - sv_index[j]={} - sv_index[j]['status']='not exist' - sv_index[j]['name']='None' + if j.split('+')[0]=='index' or j.split('+')[0]=='index_order': + ndx_fl[i][j.split('+')[0]]=j.split('+')[1] + j=j.split('+')[0] #generalize index function + GROUPS[i][cnt_ndx]=j + except: + pass + + try: + sv_index[i][j]={} + sv_index[i][j]['status']='not exist' + sv_index[i][j]['name']='None' except TypeError: - sv_index[str(j)]={} - sv_index[str(j)]['status']='not exist' - sv_index[str(j)]['name']='None' + sv_index[i][str(j)]={} + sv_index[i][str(j)]['status']='not exist' + sv_index[i][str(j)]['name']='None' if len(j)==3: if j[0]=='save': for k in j[1]: if k=='pkl': if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl'): - sv_index[prev]['status']='exist' - sv_index[prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' + sv_index[i][prev]['status']='exist' + sv_index[i][prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' else: - sv_index[prev]['status']='not exist' + sv_index[i][prev]['status']='not exist' if k=='json': if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl'): - sv_index[prev]['status']='exist' - sv_index[prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' + sv_index[i][prev]['status']='exist' + sv_index[i][prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' else: - sv_index[prev]['status']='not exist' + sv_index[i][prev]['status']='not exist' prev=str(j) ################################################### #-------------------------------------------------- @@ -121,6 +147,9 @@ for i in GROUPS.keys(): pass else: #Find atom type index in lists created above + print('') + print('Group: ',i) + print('++++++++++++++++++++++++') group_ndx=tbf.atomid_data(res_num, res_type, atom_type, atom_num, group=locals()[i]) tbf.topickle(fl=group_ndx, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_ndx') del group_ndx @@ -156,7 +185,7 @@ for i in GROUPS.keys(): ################################################### for j in GROUPS[i]: if len(j) > 1: - if j=='surf' and sv_index[j]['status']=='not exist': + if j=='surf' and sv_index[i][j]['status']=='not exist': if j not in locals(): surf={} #Creates dictionary with c, normal per subdomain for each frame @@ -164,34 +193,60 @@ for i in GROUPS.keys(): surf[i]=tbf.coord2norm2cg(coord_vector,img=False) del coord_vector sv_data=surf[i] - elif j=='surf' and sv_index[j]['status']=='exist': + elif j=='surf' and sv_index[i][j]['status']=='exist': if j not in locals(): surf={} - surf[i]=tbf.frompickle(sv_index[j]['name']) + surf[i]=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- - if j=='vector' and sv_index[j]['status']=='not exist': + if j=='vector' and sv_index[i][j]['status']=='not exist': if j not in locals(): vector={} coord_vector=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_FR'+str(NUM_FR)+'.pkl') vector[i]=tbf.coord2vector(coord_vector) del coord_vector sv_data=vector[i] - elif j=='vector' and sv_index[j]['status']=='exist': + elif j=='vector' and sv_index[i][j]['status']=='exist': if j not in locals(): vector={} - vector[i]=tbf.frompickle(sv_index[j]['name']) + vector[i]=tbf.frompickle(sv_index[i][j]['name']) +#-------------------------------------------------- +#ToDo: make more generic file with ndx files and ndx for order parameter +#As for now the hash value is generic (system+domain coord), but needs to run for every input group + if j=='index' and sv_index[i][j]['status']=='not exist': + box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') + tbf.togmxndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], sv_name=SYSTEM_NAME+'_'+i) + del box_res + + uniq_id=tbgmx.ndx_index(SYSTEM_NAME,ndx_fl[i][j]) + sv_data=uniq_id + elif j=='index' and sv_index[i][j]['status']=='exist': + box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') + tbf.togmxndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], sv_name=SYSTEM_NAME+'_'+i) + del box_res + + uniq_id=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- - if j=='index' and sv_index[j]['status']=='not exist': - uniq_id=tbgmx.ndx_index(SYSTEM_NAME) + if j=='index_order' and sv_index[i][j]['status']=='not exist': + box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') + for mol, atoms in locals()[i].items(): + tbgmx.order_ndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], atoms=atoms, sv_name=SYSTEM_NAME+'_'+i) + del box_res + + uniq_id=tbgmx.ndx_index(SYSTEM_NAME,ndx_fl[i][j]) sv_data=uniq_id - elif j=='index' and sv_index[j]['status']=='exist': - uniq_id=tbf.frompickle(sv_index[j]['name']) + elif j=='index_order' and sv_index[i][j]['status']=='exist': + box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') + for mol, atoms in locals()[i].items(): + tbgmx.order_ndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], atoms=atoms, sv_name=SYSTEM_NAME+'_'+i) + del box_res + + uniq_id=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- - if j=='density' and sv_index[j]['status']=='not exist': + if j=='density' and sv_index[i][j]['status']=='not exist': dens_dict={} for iidd in uniq_id.keys(): dens_dict[iidd]={} - fl='./'+uniq_id[iidd]['system']+'/gmx_ndx/'+uniq_id[iidd]['ndx_file'] + fl='./'+uniq_id[iidd]['system']+'/'+uniq_id[iidd]['fld']+'/'+uniq_id[iidd]['ndx_file'] cnt=-1 for mol in locals()[i].keys(): ind=tbf.search_pattern(fl,mol) @@ -199,11 +254,11 @@ for i in GROUPS.keys(): break cnt=cnt+1 for d in ('x','y','z'): - peaks = tbgmx.density_picks(TRR=TRAJ,TPR=TPR,IND=fl,SLC=400,ST=time_index[ST_FR],EN=-1,normal=d,fld='./'+uniq_id[iidd]['system'],arg=cnt,dist_pk=20) + peaks = tbgmx.density_peaks(TRR=TRAJ,TPR=TPR,IND=fl,SLC=400,ST=time_index[ST_FR],EN=-1,normal=d,fld='./'+uniq_id[iidd]['system'],arg=cnt,dist_pk=20) if d=='x': tmp=peaks else: - print(len(tmp),len(peaks)) + #print(len(tmp),len(peaks)) if len(tmp)max(yy): + yy=tmp + tmp=yy + order_nm=mol+'_order' + order_dict[iidd][order_nm]=yy + sv_data=order_dict + mrg_data[j]=[order_dict,[]] + del order_dict + elif j=='order' and sv_index[i][j]['status']=='exist': + order_dict=tbf.frompickle(sv_index[i][j]['name']) + mrg_data[j]=[order_dict,[]] + del order_dict #-------------------------------------------------- # Save module if len(j)==3: @@ -234,6 +338,7 @@ for i in GROUPS.keys(): tbf.topickle(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) if k=='json': tbf.tojson(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) + del sv_data ################################################### #COMBINE section else: @@ -248,7 +353,7 @@ for i in GROUPS.keys(): #del vector[j[0]] #Calculate COMBINE property if j[0]=='COMB': - if j[1]=='tilt' and sv_index[str(j)]['status']=='not exist': + if j[1]=='tilt' and sv_index[i][str(j)]['status']=='not exist': tilt=tbf.SurfVector_angle(surf,vector) del surf del vector @@ -278,8 +383,8 @@ for i in GROUPS.keys(): sv_data=tot_avg mrg_data[j[1]]=[tot_avg,['Tilt[degrees]']] del tot_avg - elif j[1]=='tilt' and sv_index[str(j)]['status']=='exist': - tot_avg=tbf.frompickle(sv_index[str(j)]['name']) + elif j[1]=='tilt' and sv_index[i][str(j)]['status']=='exist': + tot_avg=tbf.frompickle(sv_index[i][str(j)]['name']) mrg_data[j[1]]=[tot_avg,['Tilt[degrees]']] del tot_avg #-------------------------------------------------- @@ -295,7 +400,8 @@ for i in GROUPS.keys(): if k=='pkl': tbf.topickle(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) if k=='json': - tbf.tojson(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) + tbf.tojson(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) + del sv_data ################################################### #Merge data tbf.topickle(fl=mrg_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_merge') @@ -313,5 +419,6 @@ for tp in mrg_data.keys(): data_df=df.copy() continue tbf.topickle(fl=data_df, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_dataset') +print(data_df.head()) ################################################### ################################################### \ No newline at end of file diff --git a/tooba_f.py b/tooba_f.py index 62dfd51..585077b 100644 --- a/tooba_f.py +++ b/tooba_f.py @@ -721,10 +721,10 @@ def SurfVector_angle(surf,vector): def togmxndx(box_res, fld, sv_name): print(' ') - print('Save to gmx_ndx progress:') + print('Save to ndx progress:') print('==============================') cnt=0 - fl_save=fld+'/gmx_ndx/' + fl_save=fld+'/' if not os.path.exists(fl_save): os.makedirs(fl_save) else: diff --git a/tooba_gmx.py b/tooba_gmx.py index abd4d03..e660868 100644 --- a/tooba_gmx.py +++ b/tooba_gmx.py @@ -4,6 +4,8 @@ import numpy as np from scipy.signal import find_peaks from scipy.signal import savgol_filter import hashlib +from progress.bar import Bar + #cmd="gmx covar -s test/eq_run.tpr -f test/eq_traj.trr -n test/gmx_ndx/test_\(1\,\ 0\,\ 3\).ndx" #os.system(cmd) @@ -17,15 +19,18 @@ import hashlib #os.system(cmd) -def ndx_index(SYSTEM_NAME): +def ndx_index(SYSTEM_NAME, pth): uniq_id={} - path='./'+SYSTEM_NAME+'/gmx_ndx/' + path='./'+SYSTEM_NAME+'/'+pth+'/' for f in os.listdir(path): - text=SYSTEM_NAME+'_'+f + dom=f.split('(')[1].split(')')[0] + dom='('+dom+')' + #dom=f.split('_')[2].split('.')[0] + text=SYSTEM_NAME+'_'+dom iidd=hashlib.md5(text.encode('utf-8')).hexdigest() - dom=f.split('_')[2].split('.')[0] + #dom=f.split('_')[2].split('.')[0] uniq_id[iidd]={} - uniq_id[iidd]={'system':SYSTEM_NAME,'ndx_file':f,'domain':dom} + uniq_id[iidd]={'system':SYSTEM_NAME,'fld':pth,'ndx_file':f,'domain':dom} return uniq_id def read_xvg(XVG): @@ -43,7 +48,7 @@ def read_xvg(XVG): y = np.asarray(y) return x,y -def density_picks(TRR,TPR,IND,SLC,ST,EN,normal,fld,arg,dist_pk=1): +def density_peaks(TRR,TPR,IND,SLC,ST,EN,normal,fld,arg,dist_pk=1): cmd=str(arg)+'\n'+str(arg)+'\n' if EN==-1: @@ -54,7 +59,7 @@ def density_picks(TRR,TPR,IND,SLC,ST,EN,normal,fld,arg,dist_pk=1): '-o', fld+'/tmp.xvg'], stdin=subprocess.PIPE) else: - p = subprocess.Popen(['gmx', 'density', '-symm', '-b', str(ST), '-e', str(EN),'-sl',str(SLC), '-d', normal, + p = subprocess.Popen(['gmx', 'density', '-symm', '-b', str(ST), '-e', str(EN), '-sl',str(SLC), '-d', normal, '-f', TRR, '-s',TPR, '-n',IND, @@ -70,4 +75,119 @@ def density_picks(TRR,TPR,IND,SLC,ST,EN,normal,fld,arg,dist_pk=1): pathname = os.path.abspath(os.path.join(fld, 'tmp.xvg')) os.remove(pathname) - return peaks #res \ No newline at end of file + return peaks + +def rdf_peaks(TRR,TPR,IND,ST,EN,fld,arg1,arg2,dist_pk): + + cmd=str(arg1)+'\n'+str(arg2)+'\n' + if EN==-1: + p = subprocess.Popen(['gmx', 'rdf', '-b', str(ST), '-selrpos', 'mol_com', + '-f', TRR, + '-s',TPR, + '-n',IND, + '-o', fld+'/tmp.xvg'], + stdin=subprocess.PIPE) + else: + p = subprocess.Popen(['gmx', 'rdf', '-b', str(ST), '-e', str(EN), '-selrpos', 'mol_com', + '-f', TRR, + '-s',TPR, + '-n',IND, + '-o', fld+'/tmp.xvg'], + stdin=subprocess.PIPE) + p.communicate(cmd.encode('UTF-8')) + p.send_signal('-D') + p.wait() + + x,y=read_xvg(XVG=fld+'/tmp.xvg') + yhat = savgol_filter(y, 15, 4) # window size 15, polynomial order 4 + peaks, _ = find_peaks(yhat, distance=dist_pk) + + pathname = os.path.abspath(os.path.join(fld, 'tmp.xvg')) + os.remove(pathname) + + return peaks + +def order_ndx(box_res,fld,atoms,sv_name): +#def togmxndx(box_res, fld, sv_name): + print(' ') + print('Save to order_ndx progress:') + print('==============================') + cnt=0 + fl_save=fld+'/' + if not os.path.exists(fl_save): + os.makedirs(fl_save) + else: + print('WARNING: .ndx files exists. Nothing to do!') + return + + ndx={} + for step in box_res.keys(): + if cnt==1: + break + for resid in box_res[step].keys(): + for res in box_res[step][resid].keys(): + for subdomain in box_res[step][resid][res].keys(): + ndx[subdomain]={} + + rs=[] + bar = Bar('Create format: ', max=len(box_res[step].keys())) + for resid in box_res[step].keys(): + for res in box_res[step][resid].keys(): + for subdomain in box_res[step][resid][res]: + posl=-1 + for at in atoms: + posl=posl+1 + if (at,subdomain) not in rs: + rs.append((at,subdomain)) + ndx[subdomain][at]=[] + posf=-1 + for atomnum in box_res[step][resid][res][subdomain]: + posf=posf+1 + if posl==posf: + ndx[subdomain][at].append(atomnum) + bar.next() + bar.finish() + cnt=cnt+1 + + bar = Bar('Save file: ', max=len(ndx.keys())) + for subdomain in ndx.keys(): + save=fl_save+sv_name+'_'+str(subdomain)+'.ndx' + with open(save, "a") as myfile: + for res in ndx[subdomain].keys(): + myfile.write("["+res+"]\n") + for atomnum in ndx[subdomain][res]: + myfile.write(atomnum+'\n') + + bar.next() + bar.finish() + +def order(TRR,TPR,IND,ST,EN,normal,fld,dist_pk=1): + + if EN==-1: + p = subprocess.Popen(['gmx', 'order', '-b', str(ST), '-d', normal, + '-f', TRR, + '-s',TPR, + '-n',IND, + '-o', fld+'/tmp1.xvg', + '-od', fld+'/tmp2.xvg'], + stdin=subprocess.PIPE) + else: + p = subprocess.Popen(['gmx', 'order', '-b', str(ST), '-e', str(EN), '-d', normal, + '-f', TRR, + '-s',TPR, + '-n',IND, + '-o', fld+'/tmp1.xvg', + '-od', fld+'/tmp2.xvg'], + stdin=subprocess.PIPE) + p.wait() + + x,y=read_xvg(XVG=fld+'/tmp1.xvg') + #yhat = savgol_filter(y, 15, 4) # window size 15, polynomial order 4 + #peaks, _ = find_peaks(yhat, distance=dist_pk) + #exit() + pathname = os.path.abspath(os.path.join(fld, 'tmp1.xvg')) + os.remove(pathname) + pathname = os.path.abspath(os.path.join(fld, 'tmp2.xvg')) + os.remove(pathname) + + return y \ No newline at end of file -- 2.24.1