tooba_f.py

import numpy as np
import scipy.optimize

#from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

from pytrr import (
    read_trr_header,
    read_trr_data,
    skip_trr_data,
    )

def fitPlaneLTSQ(XYZ):
    #Source: https://gist.github.com/RustingSword/e22a11e1d391f2ab1f2c
    (rows, cols) = XYZ.shape
    G = np.ones((rows, 3))
    G[:, 0] = XYZ[:, 0]  #X
    G[:, 1] = XYZ[:, 1]  #Y
    Z = XYZ[:, 2]
    (a, b, c),resid,rank,s = np.linalg.lstsq(G, Z, rcond=None)
    normal = (a, b, -1)
    nn = np.linalg.norm(normal)
    normal = normal / nn
    return (c, normal)

def angle_between2D(p1, p2):
    #arctan2 is anticlockwise
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    angle=np.rad2deg((ang1 - ang2) % (2 * np.pi)) #degree
    if angle<180:
        return angle
    else:
        return 360 - angle

def angle_between3D(v1, v2):
# v1 is your firsr vector
# v2 is your second vector
    angle = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))) #rad
    angle=180*angle/np.pi #degrees
    if angle<180:
        return angle
    else:
        return 360 - angle

def plot_surf(data, normal, c, save_name):
    #Plot surface
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    # plot fitted plane
    maxx = np.max(data[:,0])
    maxy = np.max(data[:,1])
    minx = np.min(data[:,0])
    miny = np.min(data[:,1])

    point = np.array([0.0, 0.0, c])
    d = -point.dot(normal)

    # plot original points
    ax.scatter(data[:, 0], data[:, 1], data[:, 2])

    # compute needed points for plane plotting
    xx, yy = np.meshgrid([minx, maxx], [miny, maxy])
    z = (-normal[0]*xx - normal[1]*yy - d)*1. / normal[2]

    # plot plane
    ax.plot_surface(xx, yy, z, alpha=0.2)

    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    plt.savefig(save_name+'.png')
    #plt.show()

def points2vector(data):
    """
    Defines the line whose direction vector is the eigenvector 
    of the covariance matrix corresponding to the largest eigenvalue, 
    that passes through the mean of the data. 

    parameters:     data = array[[x y z]]

    output:         vector = [X,Y,Z]
    """
    # Calculate the mean of the points, i.e. the 'center' of the cloud
    datamean = data.mean(axis=0)

    # Do an SVD on the mean-centered data.
    uu, dd, vv = np.linalg.svd(data - datamean)

    # Now vv[0] contains the first principal component, i.e. the direction
    # vector of the 'best fit' line in the least squares sense.

    return vv[0]

def count_frames(trajfile='traj.trr'):
    """
    Count total frames of .trr file

    parameters:     trajfile = [.trr]
    """
    cnt_fr=0
    with open(trajfile, 'rb') as inputfile:
        for i in range(1000):
            try: 
                header = read_trr_header(inputfile)
                #print('Step: {step}, time: {time}'.format(**header))
                skip_trr_data(inputfile, header)
                cnt_fr=cnt_fr+1
            except EOFError:
                pass
    return cnt_fr

def fr_export(trajfile='traj.trr', num_frames=1):
    """
    Export frames from gromacs .trr file. 

    parameters:     trajfile = [.trr]
                    num_frames = [number of frames to keep
                                  counting from the end of file]
    """
    cnt_fr=count_frames(trajfile)
    data_all={}
    with open(trajfile, 'rb') as inputfile:
        for i in range(cnt_fr-num_frames):
            header = read_trr_header(inputfile)
            #print('Step: {step}, time: {time}'.format(**header))
            skip_trr_data(inputfile, header)
        for i in range(cnt_fr-num_frames,cnt_fr):
            header = read_trr_header(inputfile)
            #print('Step: {step}, time: {time}'.format(**header))
            data = read_trr_data(inputfile, header)
            step='{step}'.format(**header)
            data_all[step]=data
        return data_all

def read_gro(gro):
    """
    Read .gro file and exports 
    1. system name
    2. data number
    3. box size
    4. residue number
    5. residue type
    6. atom type
    7. atom number
    8. free format data (x,y,z,v,u,w)

    parameters:     gro = [.gro]
    """
    cnt=0
    data_num=0
    res_num = [] 
    res_type = []  
    atom_type = [] 
    atom_num  = []
    rest_dt = []
    cnt_atoms=0
    with open(gro, 'r') as F:
        for line in F:
            cnt=cnt+1
            #print(cnt)
            if cnt>2 and cnt<=data_num+2:
                res_num.append(line[:5])  
                res_type.append(line[5:10])  
                atom_type.append(line[10:15])
                #Compensate for large .gro files
                #ToDo check if data index is the same in the function that follows
                #atom_num.append(line[15:20])
                cnt_atoms=cnt_atoms+1
                atom_num.append(str(cnt_atoms))
                rest_dt.append(line[20:])
            elif cnt==1:
                system=line[:10]
            elif cnt==2:
                data_num=int(line[:7])
            elif cnt>data_num+2:
                box_size=line[:50]
        #print(system,data_num,box_size)
        return system,data_num,box_size,res_num,res_type,atom_type,atom_num,rest_dt

def domain_decomposition(data,dx,dy,dz):
    """
    Identify subdomain for each frame of 
    of .trr frame

    parameters:     data = {dictionary input from 'def fr_export'}
                    dx = desired size X of subdomain {units of input}
                    dy = desired size Y of subdomain {units of input}
                    dz = desired size Z of subdomain {units of input}
    
    output:         dictionary of x,y,z grid points per step (frame)
    """
    box_p={}
    for step in data.keys():
        box_p[step]={}
        xs=int(round(data[step]['box'][0][0]+0.1)) #to round always to bigger number
        ys=int(round(data[step]['box'][1][1]+0.1))
        zs=int(round(data[step]['box'][2][2]+0.1))
        box_x=int(xs/dx)
        box_y=int(ys/dy)
        box_z=int(zs/dz)

        xx=[]
        for i in range(0,xs+1,box_x):
            xx.append(i)
        box_p[step]['x']=xx

        yy=[]
        for i in range(0,ys+1,box_y):
            yy.append(i)
        box_p[step]['y']=yy

        zz=[]
        for i in range(0,zs+1,box_z):
            zz.append(i)
        box_p[step]['z']=zz


        xyz=[]
        for ix in range(0,xs+1,box_x):
          for iy in range(0,ys+1,box_y):
            for iz in range(0,zs+1,box_z):
                xyz.append([ix,iy,iz])
        box_p[step]['xyz']=xyz

    return box_p

def atomid_data(res_num, atom_type, atom_num, group={}):
    """
    Finds the index in list that 'def read_gro' returns,
    and correspond to the atom types in group list

    parameters:     res_num=[]
                    atom_type=[]
                    atom_num=[]
                    group={}

    output:         dictionary {resid:{res_type:{atom_type:[atom_num]}}}
    """
    res_ndx=[]
    for res,atom in group.items():
        for element in atom:
            res_ndx=[res_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip()]
        
    ndx={}
    for resid in res_ndx:
        ndx[resid.strip()]={}
        for res,atom in group.items():
            ndx[resid][res]={}
            for element in atom:
                ndx[resid][res][element]=[atom_num[i].strip()  for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip()]
    return ndx

def atom2grid(data, box_p, ndx):
    """
    Assign atom number that corresponds to ndx (see 'def atomid_data')
    to sudomains created from 'def domain_decomposition'. The output
    is a the box location (non zero based) in xyz-space for each atom number.

    parameters:     data = {dictionary input from 'def fr_export'}
                    box_p = {dictionary input from 'def domain_decomposition'}
                    ndx = {dictionary input from 'def atomid_data'}

    output:         dictionaries 
                        1. box_res = {step:{resid:{res_type:{atom_type:{atom_num:(subX,subYsubZ)}}}}}
                        2. box_res_rev = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}}
    """

    box_res={}
    box_res_rev = {}
    for step in data.keys():
        box_res[step]={}
        box_res_rev[step] = {}
        for resid in ndx.keys():
            box_res[step][resid]={}
            box_res_rev[step][resid] = {}
            for res in ndx[resid].keys():
                box_res[step][resid][res]={}
                box_res_rev[step][resid][res]={}
                for atom in ndx[resid][res].keys():
                    box_res[step][resid][res][atom]={}
                    box_res_rev[step][resid][res][atom] = {}
                    for atomnum in ndx[resid][res][atom]:
                        #data[step]['x'][atom_num-1][x(0),y(1),z(2)]

                        xx=data[step]['x'][int(atomnum)-1][0]
                        cnt_x=-1
                        prev=-1
                        for ix in box_p[step]['x']:
                            cnt_x=cnt_x+1
                            if xx<ix and xx>prev:
                                prev=ix
                                break

                        yy=data[step]['x'][int(atomnum)-1][1]
                        cnt_y=-1
                        prev=-1
                        for iy in box_p[step]['y']:
                            cnt_y=cnt_y+1
                            if yy<iy and yy>prev:
                                prev=iy
                                break

                        zz=data[step]['x'][int(atomnum)-1][2]
                        cnt_z=-1
                        prev=-1
                        for iz in box_p[step]['z']:
                            cnt_z=cnt_z+1
                            if zz<iz and zz>prev:
                                prev=iz
                                break

                        box_res[step][resid][res][atom][atomnum]=(cnt_x,cnt_y,cnt_z)

                    #Make subdomain position the key and group the residues
                    for key, value in sorted(box_res[step][resid][res][atom].items()):
                        box_res_rev[step][resid][res].setdefault(value, []).append(key.strip())
                    #Remove  atom_type as key of dictionary
                    box_res_rev[step][resid][res].pop(atom,None)
                    #If atoms of residue lie in more than one subdomain, put the all in the first one
                    if len(box_res_rev[step][resid][res]) > 1:
                        tmp=[]
                        flattened_list=[]
                        kk=[]
                        for sb in box_res_rev[step][resid][res].keys():
                            kk.append(sb)
                            tmp.append(box_res_rev[step][resid][res][sb])
                        #Remove keys
                        for k in kk: 
                            box_res_rev[step][resid][res].pop(k,None)
                        #flatten the lists
                        flattened_list = [y for x in tmp for y in x]
                        #Keep first key and results
                        box_res_rev[step][resid][res][kk[0]]=flattened_list
                    #box_res_rev[step][resid][res]=[i  for i, e in enumerate(box_res_rev[step][resid][res]) if e.strip() != .strip()]

    return box_res, box_res_rev

def sub_coord(box, data, res_num=[]):
    """
    Use the box_res_rev from 'def atom2grid' and data from 'def fr_export'
    and groups all the XYZ coordinates per subdomain

    parameters:         box = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}}
                        data = {step:{obj:[x,y,z]}}
    
    output:             norm = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
                        vector = {step:{subdomain:resid:{[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}}
    """
    coord_norm={}
    coord_vector={}
    for step in box.keys():
        sb=[]
        coord_norm[step]={}
        coord_vector[step]={}
        for resid in box[step].keys():
            for res in box[step][resid].keys():
                for subdomain in box[step][resid][res].keys():
                    tmp=[]
                    if subdomain not in sb:
                        sb.append(subdomain)
                        coord_vector[step][subdomain]={}
                        coord_norm[step][subdomain]=[]
                    coord_vector[step][subdomain][resid]=[]
                    for atomnum in box[step][resid][res][subdomain]:
                        #tmp_atom=[]
                        tmp.append(data[step]['x'][int(atomnum)-1])
                        #tmp_atom.append(list(data[step]['x'][int(atomnum)-1])) #not append to previous
                        coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1]))
                        coord_norm[step][subdomain].append(list(data[step]['x'][int(atomnum)-1]))
    return coord_norm, coord_vector

def coord2norm(coord,img=True):
    """
    Use the coord from 'def sub_coord' and replaces XYZ coordinates
    with c, normal of surface that fits the points. Also it can plot
    the surfaces for each subdomain

    parameters:         coord = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
                        img = True or False
    
    output:             dictionary = {step:{subdomain:{c:int, normal:array[]}}}
    """
    surf={}
    for step in coord.keys():
        surf[step]={}
        for subdomain in coord[step].keys():
            c,normal = fitPlaneLTSQ(np.array(coord[step][subdomain]))
            surf[step][subdomain]={'c':c, 'normal':normal}
            #Change save variable if you want to save .png elsewere
            save='png/'+str(subdomain)
            if img==True:
                try:
                    plot_surf(np.array(coord[step][subdomain]), normal, c, save)
                except:
                    print("ERROR: Folder png/ doesn't exist in root")

    return surf

def coord2vector(coord_vector):
    """
    Use the coord_vector from 'def sub_coord' and replaces XYZ coordinates
    with vector of best fit line

    parameters:         coord = {step:{subdomain:{resid:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}
        
    output:             dictionary = {step:{subdomain:{resid:[x, y, z]}}
    """
    vector={}
    for step in coord_vector.keys():
        vector[step]={}
        for subdomain in coord_vector[step].keys():
            vector[step][subdomain]={}
            for resid in coord_vector[step][subdomain].keys():
                vv = points2vector(np.array(coord_vector[step][subdomain][resid]))
                vector[step][subdomain][resid]=vv
    return vector

def SurfVector_angle(surf,vector):
    """
    Use the surf and vector from 'def coord2norm' and 'def coord2vector'
    and calculate the angle between them for every step, sudomain and resid

    parameters:         surf = {step:{subdomain:{c:int, normal:array[]}}}
                        vector = {step:{subdomain:{resid:[x, y, z]}}
    
    output:             angle = {step:{subdomain:{resid:angle}}
    """
    angle={}
    for step in surf.keys():
        angle[step]={}
        for sudomain in surf[step].keys():
            angle[step][sudomain]={}
            for resid in vector[step][sudomain].keys():
                P1=tuple(surf[step][sudomain]['normal'])
                P2=tuple(vector[step][sudomain][resid])
                #print(tbf.angle_between3D(P1,P2))
                angle[step][sudomain][resid]=angle_between3D(P1,P2)
    return angle