From 1272fa88654222c2d8e08d56d3fc7ec95ecb708d Mon Sep 17 00:00:00 2001 From: skarozis Date: Wed, 20 May 2020 18:19:40 +0300 Subject: [PATCH] New functions+debuging --- .gitignore | 4 +- CHANGELOG | 12 ++++ main.py | 72 +++++++++++++++------- tooba_f.py | 177 ++++++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 222 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index 5a81c17..4bd1fc0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ __pycache__/ *.pkl *.trr *.gro -*.code-workspace \ No newline at end of file +*.code-workspace +*.itp +system/ \ No newline at end of file diff --git a/CHANGELOG b/CHANGELOG index c1a3b89..bfed0ca 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.5] - 2020-05-20 +### Added +- Add progress bar per function +- Save preprocessing arrays to pickles and read them if exist for speed up whole process +- new function to calculate surface per subdomain (different input) + +### Changed +- debug atomid_data function + +### Removed +- None + ## [0.0.4] - 2020-05-19 ### Added - code for print data in json and pickle format diff --git a/main.py b/main.py index b06d708..7b70e82 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +import os import tooba_f as tbf ################################################### #NOTICE: resids of head in each subdomain may differ in tail case @@ -5,46 +6,75 @@ import tooba_f as tbf # in case of tail is the one closest to the head, hence # the code is a good approximation ################################################### -TRAJ='traj.trr' +TRAJ='system/eq_traj.trr' NUM_FR=1 -GRO='initial.gro' +GRO='system/eq_final.gro' HEAD=True -HD_GROUP={'MMA':['C1', 'C2']} +HD_GROUP={'NS':['C6', 'Na', 'P4', 'P3', 'C7'], 'CHOL':['ROH'], 'FFA':['AC']} TAIL=False -TL_GROUP={'MMA':['C3', 'C4', 'C5']} -DISCET=[1, 1, 1] +TL_GROUP={'NS':['C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['C1', 'C2', 'C3', 'C4']} +DISCET=[3, 3, 3] RES_NAME='time_domain_c-normal-cg' ################################################### -#Read .trr file -data_all=tbf.fr_export(trajfile=TRAJ,num_frames=NUM_FR) +################################################### +print(' ') +print('================') +print('Starting process') +print('================') +################################################### +################################################### #Read .gro file _,data_num,_,res_num,res_type,atom_type,atom_num,_ = tbf.read_gro(GRO) -#Create subdomains coordinates -box_p=tbf.domain_decomposition(data=data_all,dx=DISCET[0],dy=DISCET[1],dz=DISCET[2]) - +print(' ') +################################################### +if os.path.isfile('./data.pkl'): + print('WARNING: Preprocessing files exist.') + print(' Erase data.pkl if the system is new.') + print('--------------------------------------------') + data_all=tbf.frompickle('./data.pkl') +else: + #Read .trr file + data_all=tbf.fr_export(trajfile=TRAJ,num_frames=NUM_FR) + tbf.topickle(fl=data_all, sv_name='data') ################################################### if HEAD==True: - #Find atom type index in lists created above - group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group=HD_GROUP) + if os.path.isfile('./ndx_HEAD.pkl'): + print('WARNING: Preprocessing files exist.') + print(' Erase ndx_HEAD.pkl if the system is new.') + print('--------------------------------------------') + group_ndx=tbf.frompickle('./ndx_HEAD.pkl') + else: + #Find atom type index in lists created above + group_ndx=tbf.atomid_data(res_num, res_type, atom_type, atom_num, group=HD_GROUP) + tbf.topickle(fl=group_ndx, sv_name='ndx_HEAD') +################################################### +if os.path.isfile('./box.pkl'): + print('WARNING: Preprocessing files exist.') + print(' Erase box.pkl if the system is new') + print(' or new grid is applied !') + print('--------------------------------------------') + box_res=tbf.frompickle('./box.pkl') +else: + #Create subdomains coordinates + box_p=tbf.domain_decomposition(data=data_all,dx=DISCET[0],dy=DISCET[1],dz=DISCET[2]) #Assign desired atoms (from above function) to subdomains ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}} ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}} _,box_res=tbf.atom2grid(data_all,box_p, group_ndx) + tbf.topickle(fl=box_res, sv_name='box') +################################################### +################################################### +if HEAD==True: #Creates dictionary with coordinates per subdomain for each frame - coord_norm,_=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) + coord_norm,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) #Creates dictionary with c, normal per subdomain for each frame - surf=tbf.coord2norm(coord_norm,img=True) - + surf=tbf.coord2norm2cg(coord_vector,img=True) + print(surf) + exit() tbf.topickle(fl=surf, sv_name=RES_NAME) tbf.tojson(fl=surf, sv_name=RES_NAME) ################################################### if TAIL==True: - #Find atom type index in lists created above - group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group=TL_GROUP) - #Assign desired atoms (from above function) to subdomains - ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}} - ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}} - _,box_res=tbf.atom2grid(data_all,box_p, group_ndx) #Creates dictionary with coordinates per subdomain for each frame _,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) vector=tbf.coord2vector(coord_vector) diff --git a/tooba_f.py b/tooba_f.py index 3fbc733..7aa0f04 100644 --- a/tooba_f.py +++ b/tooba_f.py @@ -1,7 +1,9 @@ +import os import numpy as np import scipy.optimize import pickle as pkl import json +from progress.bar import Bar #from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt @@ -53,6 +55,12 @@ def topickle(fl, sv_name): with open(sv_name+'.pkl', 'wb') as handle: pkl.dump(fl, handle, protocol=pkl.HIGHEST_PROTOCOL) +def frompickle(fl): + with open(fl, 'rb') as handle: + b = pkl.load(handle) + return b + + def tojson(fl, sv_name): with open(sv_name+'.json', 'w') as file: file.write(json.dumps(str(fl))) @@ -137,19 +145,28 @@ def fr_export(trajfile='traj.trr', num_frames=1): num_frames = [number of frames to keep counting from the end of file] """ + print(' ') + print('Read gmx files progress:') + print('==============================') cnt_fr=count_frames(trajfile) data_all={} with open(trajfile, 'rb') as inputfile: + bar = Bar('Skipping frames from .trr', max=cnt_fr-num_frames) for i in range(cnt_fr-num_frames): header = read_trr_header(inputfile) #print('Step: {step}, time: {time}'.format(**header)) skip_trr_data(inputfile, header) + bar.next() + bar.finish() + bar = Bar('Read frames from .trr', max=num_frames) for i in range(cnt_fr-num_frames,cnt_fr): header = read_trr_header(inputfile) #print('Step: {step}, time: {time}'.format(**header)) data = read_trr_data(inputfile, header) step='{step}'.format(**header) data_all[step]=data + bar.next() + bar.finish() return data_all def read_gro(gro): @@ -174,7 +191,9 @@ def read_gro(gro): atom_num = [] rest_dt = [] cnt_atoms=0 + num_lines = sum(1 for line in open(gro)) with open(gro, 'r') as F: + bar = Bar('Read .gro', max=num_lines) for line in F: cnt=cnt+1 #print(cnt) @@ -194,8 +213,10 @@ def read_gro(gro): data_num=int(line[:7]) elif cnt>data_num+2: box_size=line[:50] + bar.next() + bar.finish() #print(system,data_num,box_size) - return system,data_num,box_size,res_num,res_type,atom_type,atom_num,rest_dt + return system,data_num,box_size,res_num,res_type,atom_type,atom_num,rest_dt def domain_decomposition(data,dx,dy,dz): """ @@ -210,7 +231,11 @@ def domain_decomposition(data,dx,dy,dz): output: dictionary of x,y,z grid points per step (frame) """ box_p={} + print(' ') + print('Domain decomposition progress:') + print('==============================') for step in data.keys(): + print('Step: '+step) box_p[step]={} xs=int(round(data[step]['box'][0][0]+0.1)) #to round always to bigger number ys=int(round(data[step]['box'][1][1]+0.1)) @@ -221,25 +246,35 @@ def domain_decomposition(data,dx,dy,dz): xx=[] + bar = Bar('X-axis', max=box_x) # for i in range(0,xs+1,dx): for i in range(0,box_x): xx.append(i) + bar.next() box_p[step]['x']=xx + bar.finish() yy=[] + bar = Bar('Y-axis', max=box_y) # for i in range(0,ys+1,dy): for i in range(0,box_y): yy.append(i) + bar.next() box_p[step]['y']=yy + bar.finish() zz=[] + bar = Bar('Z-axis', max=box_z) # for i in range(0,zs+1,dz): for i in range(0,box_z): zz.append(i) + bar.next() box_p[step]['z']=zz + bar.finish() xyz=[] + bar = Bar('XYZ-axis', max=box_x*box_y*box_z) # for ix in range(0,xs+1,dx): for ix in range(0,box_x): # for iy in range(0,ys+1,dy): @@ -247,11 +282,13 @@ def domain_decomposition(data,dx,dy,dz): # for iz in range(0,zs+1,dz): for iz in range(0,box_z): xyz.append([ix,iy,iz]) + bar.next() + bar.finish() box_p[step]['xyz']=xyz return box_p -def atomid_data(res_num, atom_type, atom_num, group={}): +def atomid_data(res_num, res_type, atom_type, atom_num, group={}): """ Finds the index in list that 'def read_gro' returns, and correspond to the atom types in group list @@ -263,18 +300,39 @@ def atomid_data(res_num, atom_type, atom_num, group={}): output: dictionary {resid:{res_type:{atom_type:[atom_num]}}} """ - res_ndx=[] + print(' ') + print('Create index progress:') + print('======================') + #bar = Bar('Create index step', max=len(group.keys())) + res_ndx={} for res,atom in group.items(): + res_ndx[res]=[] for element in atom: - res_ndx=[res_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip()] - + tmp=[res_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip()] + res_ndx[res].append(tmp) + #bar.next() + #bar.finish() + + res_ndx_flat={} + for res in res_ndx.keys(): + res_ndx_flat[res] = [y for x in res_ndx[res] for y in x] + ndx={} - for resid in res_ndx: - ndx[resid.strip()]={} - for res,atom in group.items(): - ndx[resid][res]={} - for element in atom: - ndx[resid][res][element]=[atom_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip()] + for rs in res_ndx_flat.keys(): + bar = Bar(rs, max=len(res_ndx_flat[rs])) + for resid in res_ndx_flat[rs]: + ndx[resid.strip()]={} + for res,atom in group.items(): + ndx[resid][res]={} + for element in atom: + tmp=[atom_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip() and res.strip()==res_type[i].strip()] + if not tmp: + ndx[resid].pop(res,None) + pass + else: + ndx[resid][res][element]=tmp + bar.next() + bar.finish() return ndx def atom2grid(data, box_p, ndx): @@ -291,10 +349,15 @@ def atom2grid(data, box_p, ndx): 1. box_res = {step:{resid:{res_type:{atom_type:{atom_num:(subX,subYsubZ)}}}}} 2. box_res_rev = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}} """ + print(' ') + print('Assing atom to grid progress:') + print('==============================') box_res={} box_res_rev = {} for step in data.keys(): + #print('Step: '+step) + bar = Bar('Step: '+step, max=len(ndx.keys())) box_res[step]={} box_res_rev[step] = {} for resid in ndx.keys(): @@ -359,7 +422,8 @@ def atom2grid(data, box_p, ndx): #Keep first key and results box_res_rev[step][resid][res][kk[0]]=flattened_list #box_res_rev[step][resid][res]=[i for i, e in enumerate(box_res_rev[step][resid][res]) if e.strip() != .strip()] - + bar.next() + bar.finish() return box_res, box_res_rev def sub_coord(box, data, res_num=[]): @@ -373,27 +437,39 @@ def sub_coord(box, data, res_num=[]): output: norm = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}} vector = {step:{subdomain:resid:{[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}} """ + print(' ') + print('Groupby subdomain progress:') + print('==============================') + coord_norm={} coord_vector={} for step in box.keys(): - sb=[] + coord_norm[step]={} coord_vector[step]={} + for resid in box[step].keys(): + for res in box[step][resid].keys(): + for subdomain in box[step][resid][res].keys(): + coord_norm[step][subdomain]=[] + coord_vector[step][subdomain]={} + + rs=[] + bar = Bar('Step: '+step, max=len(box[step].keys())) for resid in box[step].keys(): for res in box[step][resid].keys(): for subdomain in box[step][resid][res].keys(): - tmp=[] - if subdomain not in sb: - sb.append(subdomain) - coord_vector[step][subdomain]={} - coord_norm[step][subdomain]=[] - coord_vector[step][subdomain][resid]=[] + #tmp=[] + if (resid,subdomain) not in rs: + rs.append((resid,subdomain)) + coord_vector[step][subdomain][resid]=[] for atomnum in box[step][resid][res][subdomain]: #tmp_atom=[] - tmp.append(data[step]['x'][int(atomnum)-1]) + #tmp.append(data[step]['x'][int(atomnum)-1]) #tmp_atom.append(list(data[step]['x'][int(atomnum)-1])) #not append to previous - coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1])) coord_norm[step][subdomain].append(list(data[step]['x'][int(atomnum)-1])) + coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1])) + bar.next() + bar.finish() return coord_norm, coord_vector def coord2norm(coord,img=True): @@ -407,9 +483,14 @@ def coord2norm(coord,img=True): output: dictionary = {step:{subdomain:{c:int, normal:array[]}}} """ + print(' ') + print('Finding surface progress:') + print('==============================') + surf={} for step in coord.keys(): surf[step]={} + bar = Bar('Step: '+step, max=len(coord[step].keys())) for subdomain in coord[step].keys(): c,normal = fitPlaneLTSQ(np.array(coord[step][subdomain])) cgx = center_gravity(coord[step][subdomain]) @@ -423,7 +504,55 @@ def coord2norm(coord,img=True): plot_surf(np.array(coord[step][subdomain]), normal, c, save) except: print("ERROR: Folder png/ doesn't exist in root") + bar.next() + bar.finish() + return surf +def coord2norm2cg(coord_vector,img=True): + """ + Use the coord_vector from 'def sub_coord' and replaces XYZ coordinates + with c, normal of surface that fits the points and calculates center of gravity + for each resid. Also it can plot the surfaces for each subdomain + + parameters: coord = {step:{subdomain:{resid:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}} + img = True or False + + output: dictionary = {step:{subdomain:{c:int, normal:array[], resid:[cgx, cgy, cgz]}}} + """ + print(' ') + print('Finding surface and gravity point progress:') + print('==========================================') + + surf={} + for step in coord_vector.keys(): + surf[step]={} + bar = Bar('Step: '+step, max=len(coord_vector[step].keys())) + for subdomain in coord_vector[step].keys(): + surf[step][subdomain]={} + #surf_points=[] + res_cg={} + cnt=0 + for resid in coord_vector[step][subdomain].keys(): + cnt=cnt+1 + cgx = center_gravity(coord_vector[step][subdomain][resid]) + cgy = center_gravity(coord_vector[step][subdomain][resid]) + cgz = center_gravity(coord_vector[step][subdomain][resid]) + res_cg[resid]=[cgx,cgy,cgz] + if cnt==1: + surf_points=coord_vector[step][subdomain][resid] + else: + surf_points=surf_points+coord_vector[step][subdomain][resid] + c,normal = fitPlaneLTSQ(np.array(surf_points)) + surf[step][subdomain]={'c':c, 'normal':normal} + surf[step][subdomain].update(res_cg) + save='png/'+str(subdomain) + if img==True: + try: + plot_surf(np.array(surf_points), normal, c, save) + except: + print("ERROR: Folder png/ doesn't exist in root") + bar.next() + bar.finish() return surf def coord2vector(coord_vector): @@ -435,14 +564,20 @@ def coord2vector(coord_vector): output: dictionary = {step:{subdomain:{resid:[x, y, z]}} """ + print(' ') + print('Finding surface progress:') + print('==============================') vector={} for step in coord_vector.keys(): vector[step]={} + bar = Bar('Step: '+step, max=len(coord_vector[step].keys())) for subdomain in coord_vector[step].keys(): vector[step][subdomain]={} for resid in coord_vector[step][subdomain].keys(): vv = points2vector(np.array(coord_vector[step][subdomain][resid])) vector[step][subdomain][resid]=vv + bar.next() + bar.finish() return vector def SurfVector_angle(surf,vector): -- 2.24.1