From 592ec4e6c9ddcdd0fe30773acfb4a4a0a501839d Mon Sep 17 00:00:00 2001
From: skarozis <skarozis@ipta.demokritos.gr>
Date: Thu, 14 May 2020 15:57:32 +0300
Subject: [PATCH] Calculate Surface of HEADS & vector of each TAIL for every
 subdomain

---
 .gitignore |   2 +
 CHANGELOG  |  13 +++
 main.py    |  31 +++++--
 tooba_f.py | 250 ++++++++++++++++++++++++++++++++++++++++++-----------
 4 files changed, 240 insertions(+), 56 deletions(-)

diff --git a/.gitignore b/.gitignore
index 043820d..b43ee8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@ FORTRAN_vscode/
 __pycache__/
 .venv/
 .vscode/
+*.png
+*test*
diff --git a/CHANGELOG b/CHANGELOG
index 5bfc074..1ce064b 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.0.3] - 2020-05-14
+### Added
+- code for assign coordinates to subdomain
+- code for creating vectors for each tail of lipid in subdomain
+- code for creating surface for heads of lipids in subdomain
+
+### Changed
+- change output in def atomid_data() to include resid
+- adjust function that use def atomid_data() output accordigly
+
+### Removed
+- None
+
 ## [0.0.2] - 2020-05-11
 ### Added
 - code for domain decomposition function
diff --git a/main.py b/main.py
index 15284a1..1eea020 100644
--- a/main.py
+++ b/main.py
@@ -10,9 +10,30 @@ data_all=tbf.fr_export(trajfile='traj.trr',num_frames=1)
 _,data_num,_,res_num,res_type,atom_type,atom_num,_ = tbf.read_gro('initial.gro')
 #Create subdomains coordinates
 box_p=tbf.domain_decomposition(data=data_all,dx=2,dy=2,dz=2)
-#Find atom type index in lists created above
-group_ndx=tbf.resid_data(atom_type, group=['C1'])
-#Assign desired atoms (from above function) to subdomains
-_,box_res=tbf.res2grid(data_all,atom_num,box_p, group_ndx)
 
-print(box_res)
\ No newline at end of file
+###################################################
+HEAD=False
+if HEAD==True:
+    #Find atom type index in lists created above
+    group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group={'MMA':['C1', 'C2']})
+    #Assign desired atoms (from above function) to subdomains
+    ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}}
+    ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}}
+    _,box_res=tbf.atom2grid(data_all,box_p, group_ndx)
+    #Creates dictionary with coordinates per subdomain for each frame
+    coord_norm,_=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num)
+    #Creates dictionary with c, normal per subdomain for each frame
+    surf=tbf.coord2norm(coord_norm,img=True)
+
+###################################################
+TAIL=True
+if TAIL==True:
+    #Find atom type index in lists created above
+    group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group={'MMA':['C3', 'C4', 'C5']})
+    #Assign desired atoms (from above function) to subdomains
+    ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}}
+    ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}}
+    _,box_res=tbf.atom2grid(data_all,box_p, group_ndx)
+    #Creates dictionary with coordinates per subdomain for each frame
+    _,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num)
+    vector=tbf.coord2vector(coord_vector)
\ No newline at end of file
diff --git a/tooba_f.py b/tooba_f.py
index f9f80dc..053c650 100644
--- a/tooba_f.py
+++ b/tooba_f.py
@@ -11,6 +11,7 @@ from pytrr import (
     )
 
 def fitPlaneLTSQ(XYZ):
+    #Source: https://gist.github.com/RustingSword/e22a11e1d391f2ab1f2c
     (rows, cols) = XYZ.shape
     G = np.ones((rows, 3))
     G[:, 0] = XYZ[:, 0]  #X
@@ -42,7 +43,7 @@ def angle_between3D(v1, v2):
     else:
         return 360 - angle
 
-def plot_surf(data, normal, c):
+def plot_surf(data, normal, c, save_name):
     #Plot surface
     fig = plt.figure()
     ax = fig.gca(projection='3d')
@@ -69,7 +70,29 @@ def plot_surf(data, normal, c):
     ax.set_xlabel('x')
     ax.set_ylabel('y')
     ax.set_zlabel('z')
-    plt.show()
+    plt.savefig(save_name+'.png')
+    #plt.show()
+
+def points2vector(data):
+    """
+    Defines the line whose direction vector is the eigenvector 
+    of the covariance matrix corresponding to the largest eigenvalue, 
+    that passes through the mean of the data. 
+
+    parameters:     data = array[[x y z]]
+
+    output:         vector = [X,Y,Z]
+    """
+    # Calculate the mean of the points, i.e. the 'center' of the cloud
+    datamean = data.mean(axis=0)
+
+    # Do an SVD on the mean-centered data.
+    uu, dd, vv = np.linalg.svd(data - datamean)
+
+    # Now vv[0] contains the first principal component, i.e. the direction
+    # vector of the 'best fit' line in the least squares sense.
+
+    return vv[0]
 
 def count_frames(trajfile='traj.trr'):
     """
@@ -133,6 +156,7 @@ def read_gro(gro):
     atom_type = [] 
     atom_num  = []
     rest_dt = []
+    cnt_atoms=0
     with open(gro, 'r') as F:
         for line in F:
             cnt=cnt+1
@@ -140,8 +164,12 @@ def read_gro(gro):
             if cnt>2 and cnt<=data_num+2:
                 res_num.append(line[:5])  
                 res_type.append(line[5:10])  
-                atom_type.append(line[10:15]) 
-                atom_num.append(line[15:20])
+                atom_type.append(line[10:15])
+                #Compensate for large .gro files
+                #ToDo check if data index is the same in the function that follows
+                #atom_num.append(line[15:20])
+                cnt_atoms=cnt_atoms+1
+                atom_num.append(str(cnt_atoms))
                 rest_dt.append(line[20:])
             elif cnt==1:
                 system=line[:10]
@@ -199,34 +227,45 @@ def domain_decomposition(data,dx,dy,dz):
 
     return box_p
 
-def resid_data(atom_type, group=[]):
+def atomid_data(res_num, atom_type, atom_num, group={}):
     """
     Finds the index in list that 'def read_gro' returns,
     and correspond to the atom types in group list
 
-    parameters:     atom_type=[]
-                    group=[]
+    parameters:     res_num=[]
+                    atom_type=[]
+                    atom_num=[]
+                    group={}
 
-    output:         dictionary
+    output:         dictionary {resid:{res_type:{atom_type:[atom_num]}}}
     """
+    res_ndx=[]
+    for res,atom in group.items():
+        for element in atom:
+            res_ndx=[res_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip()]
+        
     ndx={}
-    for element in group:
-        ndx[element]=[i for i, e in enumerate(atom_type) if e.strip() == element.strip()]
-        #print(element,ndx)
+    for resid in res_ndx:
+        ndx[resid.strip()]={}
+        for res,atom in group.items():
+            ndx[resid][res]={}
+            for element in atom:
+                ndx[resid][res][element]=[atom_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip()]
     return ndx
 
-def res2grid(data, atom_num, box_p, ndx):
+def atom2grid(data, box_p, ndx):
     """
-    Assign residue that corresponds to ndx (see 'def resid_data')
+    Assign atom number that corresponds to ndx (see 'def atomid_data')
     to sudomains created from 'def domain_decomposition'. The output
     is a the box location (non zero based) in xyz-space for each atom number.
 
     parameters:     data = {dictionary input from 'def fr_export'}
-                    atom_num = [list from 'def read_gro']
                     box_p = {dictionary input from 'def domain_decomposition'}
-                    ndx = {dictionary input from 'def resid_data'}
+                    ndx = {dictionary input from 'def atomid_data'}
 
-    output:         dictionaries
+    output:         dictionaries 
+                        1. box_res = {step:{resid:{res_type:{atom_type:{atom_num:(subX,subYsubZ)}}}}}
+                        2. box_res_rev = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}}
     """
 
     box_res={}
@@ -234,39 +273,148 @@ def res2grid(data, atom_num, box_p, ndx):
     for step in data.keys():
         box_res[step]={}
         box_res_rev[step] = {}
-        for key in ndx.keys():
-            for i in ndx[key]:
-                #data[step]['x'][atom_num][x(0),y(1),z(2)]
-
-                xx=data[step]['x'][i][0]
-                cnt_x=-1
-                prev=-1
-                for ix in box_p[step]['x']:
-                    cnt_x=cnt_x+1
-                    if xx<ix and xx>prev:
-                        prev=ix
-                        break
-
-                yy=data[step]['x'][i][1]
-                cnt_y=-1
-                prev=-1
-                for iy in box_p[step]['y']:
-                    cnt_y=cnt_y+1
-                    if yy<iy and yy>prev:
-                        prev=iy
-                        break
-
-                zz=data[step]['x'][i][2]
-                cnt_z=-1
-                prev=-1
-                for iz in box_p[step]['z']:
-                    cnt_z=cnt_z+1
-                    if zz<iz and zz>prev:
-                        prev=iz
-                        break
-                box_res[step][atom_num[i]]=(cnt_x,cnt_y,cnt_z)
-        #Make subdomain position the key and group the residues
-        for key, value in sorted(box_res[step].items()):
-            box_res_rev[step].setdefault(value, []).append(key.strip())
+        for resid in ndx.keys():
+            box_res[step][resid]={}
+            box_res_rev[step][resid] = {}
+            for res in ndx[resid].keys():
+                box_res[step][resid][res]={}
+                box_res_rev[step][resid][res]={}
+                for atom in ndx[resid][res].keys():
+                    box_res[step][resid][res][atom]={}
+                    box_res_rev[step][resid][res][atom] = {}
+                    for atomnum in ndx[resid][res][atom]:
+                        #data[step]['x'][atom_num-1][x(0),y(1),z(2)]
+
+                        xx=data[step]['x'][int(atomnum)-1][0]
+                        cnt_x=-1
+                        prev=-1
+                        for ix in box_p[step]['x']:
+                            cnt_x=cnt_x+1
+                            if xx<ix and xx>prev:
+                                prev=ix
+                                break
+
+                        yy=data[step]['x'][int(atomnum)-1][1]
+                        cnt_y=-1
+                        prev=-1
+                        for iy in box_p[step]['y']:
+                            cnt_y=cnt_y+1
+                            if yy<iy and yy>prev:
+                                prev=iy
+                                break
+
+                        zz=data[step]['x'][int(atomnum)-1][2]
+                        cnt_z=-1
+                        prev=-1
+                        for iz in box_p[step]['z']:
+                            cnt_z=cnt_z+1
+                            if zz<iz and zz>prev:
+                                prev=iz
+                                break
+
+                        box_res[step][resid][res][atom][atomnum]=(cnt_x,cnt_y,cnt_z)
+
+                    #Make subdomain position the key and group the residues
+                    for key, value in sorted(box_res[step][resid][res][atom].items()):
+                        box_res_rev[step][resid][res].setdefault(value, []).append(key.strip())
+                    #Remove  atom_type as key of dictionary
+                    box_res_rev[step][resid][res].pop(atom,None)
+                    #If atoms of residue lie in more than one subdomain, put the all in the first one
+                    if len(box_res_rev[step][resid][res]) > 1:
+                        tmp=[]
+                        flattened_list=[]
+                        kk=[]
+                        for sb in box_res_rev[step][resid][res].keys():
+                            kk.append(sb)
+                            tmp.append(box_res_rev[step][resid][res][sb])
+                        #Remove keys
+                        for k in kk: 
+                            box_res_rev[step][resid][res].pop(k,None)
+                        #flatten the lists
+                        flattened_list = [y for x in tmp for y in x]
+                        #Keep first key and results
+                        box_res_rev[step][resid][res][kk[0]]=flattened_list
+                    #box_res_rev[step][resid][res]=[i  for i, e in enumerate(box_res_rev[step][resid][res]) if e.strip() != .strip()]
+
+    return box_res, box_res_rev
+
+def sub_coord(box, data, res_num=[]):
+    """
+    Use the box_res_rev from 'def atom2grid' and data from 'def fr_export'
+    and groups all the XYZ coordinates per subdomain
+
+    parameters:         box = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}}
+                        data = {step:{obj:[x,y,z]}}
     
-    return box_res,box_res_rev
\ No newline at end of file
+    output:             norm = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
+                        vector = {step:{subdomain:resid:{[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}}
+    """
+    coord_norm={}
+    coord_vector={}
+    for step in box.keys():
+        sb=[]
+        coord_norm[step]={}
+        coord_vector[step]={}
+        for resid in box[step].keys():
+            for res in box[step][resid].keys():
+                for subdomain in box[step][resid][res].keys():
+                    tmp=[]
+                    if subdomain not in sb:
+                        sb.append(subdomain)
+                        coord_vector[step][subdomain]={}
+                        coord_norm[step][subdomain]=[]
+                    coord_vector[step][subdomain][resid]=[]
+                    for atomnum in box[step][resid][res][subdomain]:
+                        #tmp_atom=[]
+                        tmp.append(data[step]['x'][int(atomnum)-1])
+                        #tmp_atom.append(list(data[step]['x'][int(atomnum)-1])) #not append to previous
+                        coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1]))
+                        coord_norm[step][subdomain].append(list(data[step]['x'][int(atomnum)-1]))
+    return coord_norm, coord_vector
+
+def coord2norm(coord,img=True):
+    """
+    Use the coord from 'def sub_coord' and replaces XYZ coordinates
+    with c, normal of surface that fits the points. Also it can plot
+    the surfaces for each subdomain
+
+    parameters:         coord = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
+                        img = True or False
+    
+    output:             dictionary = {step:{subdomain:{c:int, normal:array[]}}}
+    """
+    surf={}
+    for step in coord.keys():
+        surf[step]={}
+        for subdomain in coord[step].keys():
+            c,normal = fitPlaneLTSQ(np.array(coord[step][subdomain]))
+            surf[step][subdomain]={'c':c, 'normal':normal}
+            #Change save variable if you want to save .png elsewere
+            save='png/'+str(subdomain)
+            if img==True:
+                try:
+                    plot_surf(np.array(coord[step][subdomain]), normal, c, save)
+                except:
+                    print("ERROR: Folder png/ doesn't exist in root")
+
+    return surf
+
+def coord2vector(coord_vector):
+    """
+    Use the coord_vector from 'def sub_coord' and replaces XYZ coordinates
+    with vector of best fit line
+
+    parameters:         coord = {step:{subdomain:array[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
+                        img = True or False
+    
+    output:             dictionary = {step:{subdomain:[x, y, z]}
+    """
+    vector={}
+    for step in coord_vector.keys():
+        vector[step]={}
+        for subdomain in coord_vector[step].keys():
+            vector[step][subdomain]={}
+            for resid in coord_vector[step][subdomain].keys():
+                vv = points2vector(np.array(coord_vector[step][subdomain][resid]))
+                vector[step][subdomain][resid]=vv
+    return vector
\ No newline at end of file
-- 
2.24.1