New functions+debuging

1272fa88 · Stelios Karozis · 4d277115 · 1272fa88 · 1272fa88 · 1272fa88
Commit 1272fa88 authored May 20, 2020 by Stelios Karozis
Hide whitespace changes
Inline Side-by-side

Showing with 222 additions and 43 deletions

.gitignore .gitignore +3 -1

CHANGELOG CHANGELOG +12 -0

main.py main.py +51 -21

tooba_f.py tooba_f.py +156 -21

No files found.
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,6 @@ __pycache__/
 *.pkl
 *.trr
 *.gro
-*.code-workspace
\ No newline at end of file
+*.code-workspace
+*.itp
+system/
\ No newline at end of file
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+## [0.0.5] - 2020-05-20
+### Added
+- Add progress bar per function
+- Save preprocessing arrays to pickles and read them if exist for speed up whole process
+- new function to calculate surface per subdomain (different input) 
+
+### Changed
+- debug atomid_data function
+
+### Removed
+- None
+
 ## [0.0.4] - 2020-05-19
 ### Added
 - code for print data in json and pickle format

--- a/main.py
+++ b/main.py
+import os
 import tooba_f as tbf
 ###################################################
 #NOTICE:    resids of head in each subdomain may differ in tail case
@@ -5,46 +6,75 @@ import tooba_f as tbf
 #           in case of tail is the one closest to the head, hence
 #           the code is a good approximation 
 ###################################################
-TRAJ='traj.trr'
+TRAJ='system/eq_traj.trr'
 NUM_FR=1
-GRO='initial.gro'
+GRO='system/eq_final.gro'
 HEAD=True
-HD_GROUP={'MMA':['C1', 'C2']}
+HD_GROUP={'NS':['C6', 'Na', 'P4', 'P3', 'C7'], 'CHOL':['ROH'], 'FFA':['AC']}
 TAIL=False
-TL_GROUP={'MMA':['C3', 'C4', 'C5']}
-DISCET=[1, 1, 1]
+TL_GROUP={'NS':['C3', 'C4', 'C5', 'C8', 'C9', 'C10'], 'CHOL':['R1', 'R2', 'R3', 'R4', 'R5'], 'FFA':['C1', 'C2', 'C3', 'C4']}
+DISCET=[3, 3, 3]
 RES_NAME='time_domain_c-normal-cg'
 ###################################################
-#Read .trr file
-data_all=tbf.fr_export(trajfile=TRAJ,num_frames=NUM_FR)
+###################################################
+print(' ')
+print('================')
+print('Starting process')
+print('================')
+###################################################
+###################################################
 #Read .gro file
 _,data_num,_,res_num,res_type,atom_type,atom_num,_ = tbf.read_gro(GRO)
-#Create subdomains coordinates
-box_p=tbf.domain_decomposition(data=data_all,dx=DISCET[0],dy=DISCET[1],dz=DISCET[2])
-
+print(' ')
+###################################################
+if os.path.isfile('./data.pkl'):
+    print('WARNING: Preprocessing files exist.')
+    print('         Erase data.pkl if the system is new.')
+    print('--------------------------------------------')
+    data_all=tbf.frompickle('./data.pkl')
+else:
+    #Read .trr file
+    data_all=tbf.fr_export(trajfile=TRAJ,num_frames=NUM_FR)
+    tbf.topickle(fl=data_all, sv_name='data')
 ###################################################
 if HEAD==True:
-    #Find atom type index in lists created above
-    group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group=HD_GROUP)
+    if os.path.isfile('./ndx_HEAD.pkl'): 
+        print('WARNING: Preprocessing files exist.')
+        print('         Erase ndx_HEAD.pkl if the system is new.')
+        print('--------------------------------------------')
+        group_ndx=tbf.frompickle('./ndx_HEAD.pkl')
+    else:
+        #Find atom type index in lists created above
+        group_ndx=tbf.atomid_data(res_num, res_type, atom_type, atom_num, group=HD_GROUP)
+        tbf.topickle(fl=group_ndx, sv_name='ndx_HEAD')
+###################################################
+if os.path.isfile('./box.pkl'):
+    print('WARNING: Preprocessing files exist.')
+    print('         Erase box.pkl if the system is new')
+    print('         or new grid is applied !')
+    print('--------------------------------------------')
+    box_res=tbf.frompickle('./box.pkl')
+else:
+    #Create subdomains coordinates
+    box_p=tbf.domain_decomposition(data=data_all,dx=DISCET[0],dy=DISCET[1],dz=DISCET[2])
    #Assign desired atoms (from above function) to subdomains
    ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}}
    ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}}
    _,box_res=tbf.atom2grid(data_all,box_p, group_ndx)
+    tbf.topickle(fl=box_res, sv_name='box')
+###################################################
+###################################################
+if HEAD==True:
    #Creates dictionary with coordinates per subdomain for each frame
-    coord_norm,_=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num)
+    coord_norm,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num)
    #Creates dictionary with c, normal per subdomain for each frame
-    surf=tbf.coord2norm(coord_norm,img=True)
-
+    surf=tbf.coord2norm2cg(coord_vector,img=True)
+    print(surf)
+    exit()
    tbf.topickle(fl=surf, sv_name=RES_NAME)
    tbf.tojson(fl=surf, sv_name=RES_NAME)
 ###################################################
 if TAIL==True:
-    #Find atom type index in lists created above
-    group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group=TL_GROUP)
-    #Assign desired atoms (from above function) to subdomains
-    ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}}
-    ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}}
-    _,box_res=tbf.atom2grid(data_all,box_p, group_ndx)
    #Creates dictionary with coordinates per subdomain for each frame
    _,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num)
    vector=tbf.coord2vector(coord_vector)

--- a/tooba_f.py
+++ b/tooba_f.py
+import os
 import numpy as np
 import scipy.optimize
 import pickle as pkl
 import json
+from progress.bar import Bar
 #from mpl_toolkits.mplot3d import Axes3D
 import matplotlib.pyplot as plt

@@ -53,6 +55,12 @@ def topickle(fl, sv_name):
    with open(sv_name+'.pkl', 'wb') as handle:
        pkl.dump(fl, handle, protocol=pkl.HIGHEST_PROTOCOL)

+def frompickle(fl):
+    with open(fl, 'rb') as handle:
+        b = pkl.load(handle)
+    return b
+
+
 def tojson(fl, sv_name):
    with open(sv_name+'.json', 'w') as file:
        file.write(json.dumps(str(fl)))
@@ -137,19 +145,28 @@ def fr_export(trajfile='traj.trr', num_frames=1):
                    num_frames = [number of frames to keep
                                  counting from the end of file]
    """
+    print(' ')
+    print('Read gmx files progress:')
+    print('==============================')
    cnt_fr=count_frames(trajfile)
    data_all={}
    with open(trajfile, 'rb') as inputfile:
+        bar = Bar('Skipping frames from .trr', max=cnt_fr-num_frames)
        for i in range(cnt_fr-num_frames):
            header = read_trr_header(inputfile)
            #print('Step: {step}, time: {time}'.format(**header))
            skip_trr_data(inputfile, header)
+            bar.next()
+        bar.finish()
+        bar = Bar('Read frames from .trr', max=num_frames)
        for i in range(cnt_fr-num_frames,cnt_fr):
            header = read_trr_header(inputfile)
            #print('Step: {step}, time: {time}'.format(**header))
            data = read_trr_data(inputfile, header)
            step='{step}'.format(**header)
            data_all[step]=data
+            bar.next()
+        bar.finish()
        return data_all

 def read_gro(gro):
@@ -174,7 +191,9 @@ def read_gro(gro):
    atom_num  = []
    rest_dt = []
    cnt_atoms=0
+    num_lines = sum(1 for line in open(gro))
    with open(gro, 'r') as F:
+        bar = Bar('Read .gro', max=num_lines)
        for line in F:
            cnt=cnt+1
            #print(cnt)
@@ -194,8 +213,10 @@ def read_gro(gro):
                data_num=int(line[:7])
            elif cnt>data_num+2:
                box_size=line[:50]
+            bar.next()
+        bar.finish()
        #print(system,data_num,box_size)
-        return system,data_num,box_size,res_num,res_type,atom_type,atom_num,rest_dt
+    return system,data_num,box_size,res_num,res_type,atom_type,atom_num,rest_dt

 def domain_decomposition(data,dx,dy,dz):
    """
@@ -210,7 +231,11 @@ def domain_decomposition(data,dx,dy,dz):
    output:         dictionary of x,y,z grid points per step (frame)
    """
    box_p={}
+    print(' ')
+    print('Domain decomposition progress:')
+    print('==============================')
    for step in data.keys():
+        print('Step: '+step)
        box_p[step]={}
        xs=int(round(data[step]['box'][0][0]+0.1)) #to round always to bigger number
        ys=int(round(data[step]['box'][1][1]+0.1))
@@ -221,25 +246,35 @@ def domain_decomposition(data,dx,dy,dz):


        xx=[]
+        bar = Bar('X-axis', max=box_x)
 #        for i in range(0,xs+1,dx):
        for i in range(0,box_x):
            xx.append(i)
+            bar.next()
        box_p[step]['x']=xx
+        bar.finish()

        yy=[]
+        bar = Bar('Y-axis', max=box_y)
 #        for i in range(0,ys+1,dy):
        for i in range(0,box_y):
            yy.append(i)
+            bar.next()
        box_p[step]['y']=yy
+        bar.finish()

        zz=[]
+        bar = Bar('Z-axis', max=box_z)
 #        for i in range(0,zs+1,dz):
        for i in range(0,box_z):
            zz.append(i)
+            bar.next()
        box_p[step]['z']=zz
+        bar.finish()


        xyz=[]
+        bar = Bar('XYZ-axis', max=box_x*box_y*box_z)
 #        for ix in range(0,xs+1,dx):
        for ix in range(0,box_x):
 #          for iy in range(0,ys+1,dy):
@@ -247,11 +282,13 @@ def domain_decomposition(data,dx,dy,dz):
 #            for iz in range(0,zs+1,dz):
            for iz in range(0,box_z):
                xyz.append([ix,iy,iz])
+                bar.next()
+        bar.finish()
        box_p[step]['xyz']=xyz

    return box_p

-def atomid_data(res_num, atom_type, atom_num, group={}):
+def atomid_data(res_num, res_type, atom_type, atom_num, group={}):
    """
    Finds the index in list that 'def read_gro' returns,
    and correspond to the atom types in group list
@@ -263,18 +300,39 @@ def atomid_data(res_num, atom_type, atom_num, group={}):

    output:         dictionary {resid:{res_type:{atom_type:[atom_num]}}}
    """
-    res_ndx=[]
+    print(' ')
+    print('Create index  progress:')
+    print('======================')
+    #bar = Bar('Create index step', max=len(group.keys()))
+    res_ndx={}
    for res,atom in group.items():
+        res_ndx[res]=[]
        for element in atom:
-            res_ndx=[res_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip()]
-        
+            tmp=[res_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip()]
+        res_ndx[res].append(tmp)
+        #bar.next()
+    #bar.finish()
+
+    res_ndx_flat={}
+    for res in res_ndx.keys():
+        res_ndx_flat[res] = [y for x in res_ndx[res] for y in x]
+
    ndx={}
-    for resid in res_ndx:
-        ndx[resid.strip()]={}
-        for res,atom in group.items():
-            ndx[resid][res]={}
-            for element in atom:
-                ndx[resid][res][element]=[atom_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip()]
+    for rs in res_ndx_flat.keys():
+        bar = Bar(rs, max=len(res_ndx_flat[rs]))
+        for resid in res_ndx_flat[rs]:
+            ndx[resid.strip()]={}
+            for res,atom in group.items():
+                ndx[resid][res]={}
+                for element in atom:
+                    tmp=[atom_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip() and res.strip()==res_type[i].strip()]
+                    if not tmp:
+                        ndx[resid].pop(res,None)
+                        pass
+                    else:
+                        ndx[resid][res][element]=tmp
+            bar.next()
+        bar.finish()
    return ndx

 def atom2grid(data, box_p, ndx):
@@ -291,10 +349,15 @@ def atom2grid(data, box_p, ndx):
                        1. box_res = {step:{resid:{res_type:{atom_type:{atom_num:(subX,subYsubZ)}}}}}
                        2. box_res_rev = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}}
    """
+    print(' ')
+    print('Assing atom to grid progress:')
+    print('==============================')

    box_res={}
    box_res_rev = {}
    for step in data.keys():
+        #print('Step: '+step)
+        bar = Bar('Step: '+step, max=len(ndx.keys()))
        box_res[step]={}
        box_res_rev[step] = {}
        for resid in ndx.keys():
@@ -359,7 +422,8 @@ def atom2grid(data, box_p, ndx):
                        #Keep first key and results
                        box_res_rev[step][resid][res][kk[0]]=flattened_list
                    #box_res_rev[step][resid][res]=[i  for i, e in enumerate(box_res_rev[step][resid][res]) if e.strip() != .strip()]
-
+            bar.next()
+        bar.finish()
    return box_res, box_res_rev

 def sub_coord(box, data, res_num=[]):
@@ -373,27 +437,39 @@ def sub_coord(box, data, res_num=[]):
    output:             norm = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
                        vector = {step:{subdomain:resid:{[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}}
    """
+    print(' ')
+    print('Groupby subdomain progress:')
+    print('==============================')
+
    coord_norm={}
    coord_vector={}
    for step in box.keys():
-        sb=[]
+
        coord_norm[step]={}
        coord_vector[step]={}
+        for resid in box[step].keys():
+                for res in box[step][resid].keys():
+                    for subdomain in box[step][resid][res].keys():
+                        coord_norm[step][subdomain]=[]
+                        coord_vector[step][subdomain]={}
+
+        rs=[]
+        bar = Bar('Step: '+step, max=len(box[step].keys()))
        for resid in box[step].keys():
            for res in box[step][resid].keys():
                for subdomain in box[step][resid][res].keys():
-                    tmp=[]
-                    if subdomain not in sb:
-                        sb.append(subdomain)
-                        coord_vector[step][subdomain]={}
-                        coord_norm[step][subdomain]=[]
-                    coord_vector[step][subdomain][resid]=[]
+                    #tmp=[]
+                    if (resid,subdomain) not in rs:
+                        rs.append((resid,subdomain))
+                        coord_vector[step][subdomain][resid]=[]
                    for atomnum in box[step][resid][res][subdomain]:
                        #tmp_atom=[]
-                        tmp.append(data[step]['x'][int(atomnum)-1])
+                        #tmp.append(data[step]['x'][int(atomnum)-1])
                        #tmp_atom.append(list(data[step]['x'][int(atomnum)-1])) #not append to previous
-                        coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1]))
                        coord_norm[step][subdomain].append(list(data[step]['x'][int(atomnum)-1]))
+                        coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1]))
+            bar.next()
+        bar.finish()
    return coord_norm, coord_vector

 def coord2norm(coord,img=True):
@@ -407,9 +483,14 @@ def coord2norm(coord,img=True):
    
    output:             dictionary = {step:{subdomain:{c:int, normal:array[]}}}
    """
+    print(' ')
+    print('Finding surface progress:')
+    print('==============================')
+
    surf={}
    for step in coord.keys():
        surf[step]={}
+        bar = Bar('Step: '+step, max=len(coord[step].keys()))
        for subdomain in coord[step].keys():
            c,normal = fitPlaneLTSQ(np.array(coord[step][subdomain]))
            cgx = center_gravity(coord[step][subdomain])
@@ -423,7 +504,55 @@ def coord2norm(coord,img=True):
                    plot_surf(np.array(coord[step][subdomain]), normal, c, save)
                except:
                    print("ERROR: Folder png/ doesn't exist in root")
+            bar.next()
+        bar.finish()
+    return surf

+def coord2norm2cg(coord_vector,img=True):
+    """
+    Use the coord_vector from 'def sub_coord' and replaces XYZ coordinates
+    with c, normal of surface that fits the points and calculates center of gravity
+    for each resid. Also it can plot the surfaces for each subdomain
+
+    parameters:         coord = {step:{subdomain:{resid:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
+                        img = True or False
+    
+    output:             dictionary = {step:{subdomain:{c:int, normal:array[], resid:[cgx, cgy, cgz]}}}
+    """
+    print(' ')
+    print('Finding surface and gravity point progress:')
+    print('==========================================')
+
+    surf={}
+    for step in coord_vector.keys():
+        surf[step]={}
+        bar = Bar('Step: '+step, max=len(coord_vector[step].keys()))
+        for subdomain in coord_vector[step].keys():
+            surf[step][subdomain]={}
+            #surf_points=[]
+            res_cg={}
+            cnt=0
+            for resid in coord_vector[step][subdomain].keys():
+                cnt=cnt+1
+                cgx = center_gravity(coord_vector[step][subdomain][resid])
+                cgy = center_gravity(coord_vector[step][subdomain][resid])
+                cgz = center_gravity(coord_vector[step][subdomain][resid])
+                res_cg[resid]=[cgx,cgy,cgz]
+                if cnt==1:
+                    surf_points=coord_vector[step][subdomain][resid]
+                else:
+                    surf_points=surf_points+coord_vector[step][subdomain][resid]
+            c,normal = fitPlaneLTSQ(np.array(surf_points))
+            surf[step][subdomain]={'c':c, 'normal':normal}
+            surf[step][subdomain].update(res_cg)
+            save='png/'+str(subdomain)
+            if img==True:
+                try:
+                    plot_surf(np.array(surf_points), normal, c, save)
+                except:
+                    print("ERROR: Folder png/ doesn't exist in root")
+            bar.next()
+        bar.finish()
    return surf

 def coord2vector(coord_vector):
@@ -435,14 +564,20 @@ def coord2vector(coord_vector):
        
    output:             dictionary = {step:{subdomain:{resid:[x, y, z]}}
    """
+    print(' ')
+    print('Finding surface progress:')
+    print('==============================')
    vector={}
    for step in coord_vector.keys():
        vector[step]={}
+        bar = Bar('Step: '+step, max=len(coord_vector[step].keys()))
        for subdomain in coord_vector[step].keys():
            vector[step][subdomain]={}
            for resid in coord_vector[step][subdomain].keys():
                vv = points2vector(np.array(coord_vector[step][subdomain][resid]))
                vector[step][subdomain][resid]=vv
+            bar.next()
+        bar.finish()
    return vector

 def SurfVector_angle(surf,vector):