import os import pandas as pd import tooba_f as tbf import tooba_gmx as tbgmx ################################################### #NOTICE: resids of head in each subdomain may differ in tail case # keep all atoms of group in the first occurent subdomain # in case of tail is the one closest to the head, hence # the code is a good approximation ################################################### SYSTEM_NAME='20190322_10' DISCET=[3.5, 3.5, 3.5] NUM_FR=750 TRAJ=SYSTEM_NAME+'/eq_traj.trr' GRO=SYSTEM_NAME+'/eq_final.gro' TPR=SYSTEM_NAME+'/eq_run.tpr' #ITP_DIC={'NS':'CER_SOVOVA.itp','FFA':'FFA_CG.itp','CHOL':'CHOL_CG.itp'} ################################################### # {NAME:[QUEUE OF PROCESSES]} # # NAME: It is user defined. A dictionary must follows with the same name. # The dict structure has to be: {res_type:[atom_types]} # # if NAME is COMBINE then it needs part or all the info from aforementioned # groups to execute a process. You cannot use combination as first group. # # QUEUE OF PROCESSES: surf, vector, tilt, index, density, gmx_ndx, [save, [type], save_name] # # surf: Determine surface from atoms (ex. Head of lipid) # vector: Determine vector that fits atoms (ex. Tail of lipid) # tilt: Use surf and vector result to calculate angle (if NAME is COMBINE) # index+FOLDER: Saves one ndx for every subdomain and creates unique code (md5) # for every subdomain to use in data saving process # index_order+FOLDER: Saves one ndx for order parameter calculations for every subdomain # and creates unique code (md5) for every subdomain to use in data saving process # density: Detrmine density profile of x,y,z and save peaks of directions with the least number # rdf: Calculate the 2D radial deistribution function g(r) and save peaks of directions with the least number # order: Calculate the order parameter of atom chain # [save, [TYPE], SAVE_NAME]: Save result of previous function, type: pkl, json # ################################################### GROUPS={'ALL':['index+ALL_ndx',['save', ['pkl'],'index'],'density',['save', ['pkl'],'dens']], 'HD_GROUP':['surf',['save', ['pkl', 'json'],'surf'],'index+HD_ndx',['save', ['pkl'],'index'],'rdf',['save', ['pkl'],'rdf']], 'TL_GROUP':['vector',['save', ['pkl'],'vec']], 'ORDER_NS_SPH':['index_order+ORDER_NS_SPH_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], 'ORDER_NS_ACYL':['index_order+ORDER_NS_ACYL_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], 'ORDER_FFA':['index_order+ORDER_FFA_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], 'ORDER_CHOL':['index_order+ORDER_CHOL_ndx',['save', ['pkl'],'index'],'order',['save',['pkl'], 'order']], 'COMBINE':[['HD_GROUP','surf'],['TL_GROUP','vector'],['COMB','tilt'],['save', ['pkl'],'tilt']] } ALL={'NS':['C1', 'C2', 'C3', 'C4', 'C5','C6', 'Na', 'P4', 'P3', 'C7','C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['ROH','R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['AC','C1', 'C2', 'C3', 'C4']} HD_GROUP={'NS':['C6', 'Na', 'P4', 'P3', 'C7'], 'CHOL':['ROH'], 'FFA':['AC']} TL_GROUP={'NS':['C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['C1', 'C2', 'C3', 'C4']} ORDER_NS_SPH={'NS':['C1', 'C2', 'C3', 'C4', 'C5']} #propable problem with the same atomname of NS, FFA ORDER_NS_ACYL={'NS':['C8', 'C9', 'C10']} ORDER_FFA={'FFA':['C1', 'C2', 'C3', 'C4']} ORDER_CHOL={'CHOL':['R2', 'R3', 'R4', 'R5']} ################################################### ################################################### print(' ') print('================') print('Starting process') print('================') ################################################### ################################################### #Read .gro file _,data_num,_,res_num,res_type,atom_type,atom_num,_ = tbf.read_gro(GRO) print(' ') ################################################### #-------------------------------------------------- #Count frames & calculate time of calculations cnt_fr,time_index=tbf.count_frames(TRAJ,True) MAX_FR=list(time_index.keys())[-1] #print(MAX_FR,time_index[MAX_FR]) ST_FR=MAX_FR-NUM_FR #print(ST_FR,time_index[ST_FR]) ################################################### #-------------------------------------------------- #Read .itp files #weights={} #for MOL in ITP_DIC.keys(): # weights_tmp = tbf.read_itp(SYSTEM_NAME+'/'+ITP_DIC[MOL]) # weights[MOL]=weights_tmp # print(' ') #print(weights) ################################################### #-------------------------------------------------- if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_data.pkl'): pass else: #Read .trr file data_all=tbf.fr_export(trajfile=TRAJ,num_frames=NUM_FR) tbf.topickle(fl=data_all, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_data') del data_all ################################################### #-------------------------------------------------- #Check save files if exist in order to skip functions prev=0 sv_index={} ndx_fl={} for i in GROUPS.keys(): sv_index[i]={} ndx_fl[i]={} cnt_ndx=-1 for j in GROUPS[i]: cnt_ndx=cnt_ndx+1 try: if j.split('+')[0]=='index' or j.split('+')[0]=='index_order': ndx_fl[i][j.split('+')[0]]=j.split('+')[1] j=j.split('+')[0] #generalize index function GROUPS[i][cnt_ndx]=j except: pass try: sv_index[i][j]={} sv_index[i][j]['status']='not exist' sv_index[i][j]['name']='None' except TypeError: sv_index[i][str(j)]={} sv_index[i][str(j)]['status']='not exist' sv_index[i][str(j)]['name']='None' if len(j)==3: if j[0]=='save': for k in j[1]: if k=='pkl': if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl'): sv_index[i][prev]['status']='exist' sv_index[i][prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' else: sv_index[i][prev]['status']='not exist' if k=='json': if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl'): sv_index[i][prev]['status']='exist' sv_index[i][prev]['name']='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]+'.pkl' else: sv_index[i][prev]['status']='not exist' prev=str(j) ################################################### #-------------------------------------------------- mrg_data={} for i in GROUPS.keys(): mrg_data[i]={} #not COMBINE section if i!='COMBINE': if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_ndx.pkl'): pass else: #Find atom type index in lists created above print('') print('Group: ',i) print('++++++++++++++++++++++++') group_ndx=tbf.atomid_data(res_num, res_type, atom_type, atom_num, group=locals()[i]) tbf.topickle(fl=group_ndx, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_ndx') del group_ndx #-------------------------------------------------- if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl'): pass else: data_all=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_data.pkl') group_ndx=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_ndx.pkl') #Create subdomains coordinates box_p=tbf.domain_decomposition(data=data_all,dx=DISCET[0],dy=DISCET[1],dz=DISCET[2]) #Assign desired atoms (from above function) to subdomains ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}} ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}} #todo keep fixed the initial domain name and the molecules that are grouped for all the steps _,box_res=tbf.atom2group(data_all,box_p, group_ndx) tbf.topickle(fl=box_res, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box') del data_all del group_ndx del box_p del box_res ################################################### if os.path.isfile('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_FR'+str(NUM_FR)+'.pkl'): pass else: #Creates dictionary with coordinates per subdomain for each frame data_all=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_data.pkl') box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') _,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) tbf.topickle(fl=coord_vector, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_FR'+str(NUM_FR)) del data_all del box_res del coord_vector ################################################### for j in GROUPS[i]: if len(j) > 1: if j=='surf' and sv_index[i][j]['status']=='not exist': if j not in locals(): surf={} #Creates dictionary with c, normal per subdomain for each frame coord_vector=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_FR'+str(NUM_FR)+'.pkl') surf[i]=tbf.coord2norm2cg(coord_vector,img=False) del coord_vector sv_data=surf[i] elif j=='surf' and sv_index[i][j]['status']=='exist': if j not in locals(): surf={} surf[i]=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- if j=='vector' and sv_index[i][j]['status']=='not exist': if j not in locals(): vector={} coord_vector=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_FR'+str(NUM_FR)+'.pkl') vector[i]=tbf.coord2vector(coord_vector) del coord_vector sv_data=vector[i] elif j=='vector' and sv_index[i][j]['status']=='exist': if j not in locals(): vector={} vector[i]=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- #ToDo: make more generic file with ndx files and ndx for order parameter #As for now the hash value is generic (system+domain coord), but needs to run for every input group if j=='index' and sv_index[i][j]['status']=='not exist': box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') tbf.togmxndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], sv_name=SYSTEM_NAME+'_'+i) del box_res uniq_id=tbgmx.ndx_index(SYSTEM_NAME,ndx_fl[i][j]) sv_data=uniq_id elif j=='index' and sv_index[i][j]['status']=='exist': box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') tbf.togmxndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], sv_name=SYSTEM_NAME+'_'+i) del box_res uniq_id=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- if j=='index_order' and sv_index[i][j]['status']=='not exist': box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') for mol, atoms in locals()[i].items(): tbgmx.order_ndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], atoms=atoms, sv_name=SYSTEM_NAME+'_'+i) del box_res uniq_id=tbgmx.ndx_index(SYSTEM_NAME,ndx_fl[i][j]) sv_data=uniq_id elif j=='index_order' and sv_index[i][j]['status']=='exist': box_res=tbf.frompickle('./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_box.pkl') for mol, atoms in locals()[i].items(): tbgmx.order_ndx(box_res, fld='./'+SYSTEM_NAME+'/'+ndx_fl[i][j], atoms=atoms, sv_name=SYSTEM_NAME+'_'+i) del box_res uniq_id=tbf.frompickle(sv_index[i][j]['name']) #-------------------------------------------------- if j=='density' and sv_index[i][j]['status']=='not exist': dens_dict={} for iidd in uniq_id.keys(): dens_dict[iidd]={} fl='./'+uniq_id[iidd]['system']+'/'+uniq_id[iidd]['fld']+'/'+uniq_id[iidd]['ndx_file'] cnt=-1 for mol in locals()[i].keys(): ind=tbf.search_pattern(fl,mol) if ind=='not exist': break cnt=cnt+1 for d in ('x','y','z'): peaks = tbgmx.density_peaks(TRR=TRAJ,TPR=TPR,IND=fl,SLC=400,ST=time_index[ST_FR],EN=-1,normal=d,fld='./'+uniq_id[iidd]['system'],arg=cnt,dist_pk=20) if d=='x': tmp=peaks else: #print(len(tmp),len(peaks)) if len(tmp)max(yy): yy=tmp tmp=yy order_nm=mol+'_order_'+uniq_id[iidd]['fld'] order_dict[iidd][order_nm]=yy sv_data=order_dict mrg_data[i][j]=[order_dict,[]] del order_dict elif j=='order' and sv_index[i][j]['status']=='exist': order_dict=tbf.frompickle(sv_index[i][j]['name']) mrg_data[i][j]=[order_dict,[]] del order_dict #-------------------------------------------------- # Save module if len(j)==3: if j[0]=='save': try: sv_data except NameError: pass else: for k in j[1]: if k=='pkl': tbf.topickle(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) if k=='json': tbf.tojson(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) del sv_data ################################################### #COMBINE section else: for j in GROUPS[i]: #Input to COMBINE property if j[0]!='COMB': if j[1]=='surf': surf_inuse=surf[j[0]] #del surf[j[0]] if j[1]=='vector': vector_inuse=vector[j[0]] #del vector[j[0]] #Calculate COMBINE property if j[0]=='COMB': if j[1]=='tilt' and sv_index[i][str(j)]['status']=='not exist': tilt=tbf.SurfVector_angle(surf_inuse,vector_inuse) #ToDo: check "if str(value['domain']).strip() == str(sub).strip():" del surf_inuse del vector_inuse #Loop over timesteps and keep avgs tilts for each step avg={} for step in tilt.keys(): ss=[] for sub in tilt[step].keys(): avgs=tilt[step][sub]['avg/frame'] if sub not in ss: ss.append(sub) avg[sub]=avgs else: avg[sub].append(avgs) #del tilt #Calculate total average tot_avg={} for sub in avg.keys(): for key, value in uniq_id.items(): if str(value['domain']).strip() == str(sub).strip(): hsh=key break try: tot_avg[hsh]=sum(avg[sub])/len(avg[sub]) except TypeError: #in case of one frame tot_avg[hsh]=sum([avg[sub]])/len([avg[sub]]) sv_data=tot_avg mrg_data[i][j[1]]=[tot_avg,['Tilt[degrees]']] del tot_avg elif j[1]=='tilt' and sv_index[i][str(j)]['status']=='exist': tot_avg=tbf.frompickle(sv_index[i][str(j)]['name']) mrg_data[i][j[1]]=[tot_avg,['Tilt[degrees]']] del tot_avg #-------------------------------------------------- # Save module if len(j)==3: if j[0]=='save': try: sv_data except NameError: pass else: for k in j[1]: if k=='pkl': tbf.topickle(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) if k=='json': tbf.tojson(fl=sv_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_'+i+'_'+j[2]) del sv_data ################################################### #Merge data tbf.topickle(fl=mrg_data, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_merge') print(' ') print('Merging data of:') print('==============================') for grp in mrg_data.keys(): for tp in mrg_data[grp].keys(): if len(mrg_data[grp][tp][1])!=0: df=pd.DataFrame.from_dict(mrg_data[grp][tp][0], orient='index',columns=mrg_data[grp][tp][1]) else: df=pd.DataFrame.from_dict(mrg_data[grp][tp][0], orient='index') try: data_df=data_df.join(df) except: data_df=df.copy() continue tbf.topickle(fl=data_df, sv_name='./'+SYSTEM_NAME+'/'+SYSTEM_NAME+'_dataset') print(data_df.head()) ################################################### ###################################################