From 592ec4e6c9ddcdd0fe30773acfb4a4a0a501839d Mon Sep 17 00:00:00 2001 From: skarozis Date: Thu, 14 May 2020 15:57:32 +0300 Subject: [PATCH] Calculate Surface of HEADS & vector of each TAIL for every subdomain --- .gitignore | 2 + CHANGELOG | 13 +++ main.py | 31 +++++-- tooba_f.py | 250 ++++++++++++++++++++++++++++++++++++++++++----------- 4 files changed, 240 insertions(+), 56 deletions(-) diff --git a/.gitignore b/.gitignore index 043820d..b43ee8c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ FORTRAN_vscode/ __pycache__/ .venv/ .vscode/ +*.png +*test* diff --git a/CHANGELOG b/CHANGELOG index 5bfc074..1ce064b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.3] - 2020-05-14 +### Added +- code for assign coordinates to subdomain +- code for creating vectors for each tail of lipid in subdomain +- code for creating surface for heads of lipids in subdomain + +### Changed +- change output in def atomid_data() to include resid +- adjust function that use def atomid_data() output accordigly + +### Removed +- None + ## [0.0.2] - 2020-05-11 ### Added - code for domain decomposition function diff --git a/main.py b/main.py index 15284a1..1eea020 100644 --- a/main.py +++ b/main.py @@ -10,9 +10,30 @@ data_all=tbf.fr_export(trajfile='traj.trr',num_frames=1) _,data_num,_,res_num,res_type,atom_type,atom_num,_ = tbf.read_gro('initial.gro') #Create subdomains coordinates box_p=tbf.domain_decomposition(data=data_all,dx=2,dy=2,dz=2) -#Find atom type index in lists created above -group_ndx=tbf.resid_data(atom_type, group=['C1']) -#Assign desired atoms (from above function) to subdomains -_,box_res=tbf.res2grid(data_all,atom_num,box_p, group_ndx) -print(box_res) \ No newline at end of file +################################################### +HEAD=False +if HEAD==True: + #Find atom type index in lists created above + group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group={'MMA':['C1', 'C2']}) + #Assign desired atoms (from above function) to subdomains + ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}} + ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}} + _,box_res=tbf.atom2grid(data_all,box_p, group_ndx) + #Creates dictionary with coordinates per subdomain for each frame + coord_norm,_=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) + #Creates dictionary with c, normal per subdomain for each frame + surf=tbf.coord2norm(coord_norm,img=True) + +################################################### +TAIL=True +if TAIL==True: + #Find atom type index in lists created above + group_ndx=tbf.atomid_data(res_num, atom_type, atom_num, group={'MMA':['C3', 'C4', 'C5']}) + #Assign desired atoms (from above function) to subdomains + ##result1: {step:{res:{atom_type:{atom_num:(subX,subYsubZ)}}}} + ##result2: {step:{res:{atom_type:{(subX,subYsubZ):[atom_num]}}}} + _,box_res=tbf.atom2grid(data_all,box_p, group_ndx) + #Creates dictionary with coordinates per subdomain for each frame + _,coord_vector=tbf.sub_coord(box=box_res, data=data_all, res_num=res_num) + vector=tbf.coord2vector(coord_vector) \ No newline at end of file diff --git a/tooba_f.py b/tooba_f.py index f9f80dc..053c650 100644 --- a/tooba_f.py +++ b/tooba_f.py @@ -11,6 +11,7 @@ from pytrr import ( ) def fitPlaneLTSQ(XYZ): + #Source: https://gist.github.com/RustingSword/e22a11e1d391f2ab1f2c (rows, cols) = XYZ.shape G = np.ones((rows, 3)) G[:, 0] = XYZ[:, 0] #X @@ -42,7 +43,7 @@ def angle_between3D(v1, v2): else: return 360 - angle -def plot_surf(data, normal, c): +def plot_surf(data, normal, c, save_name): #Plot surface fig = plt.figure() ax = fig.gca(projection='3d') @@ -69,7 +70,29 @@ def plot_surf(data, normal, c): ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') - plt.show() + plt.savefig(save_name+'.png') + #plt.show() + +def points2vector(data): + """ + Defines the line whose direction vector is the eigenvector + of the covariance matrix corresponding to the largest eigenvalue, + that passes through the mean of the data. + + parameters: data = array[[x y z]] + + output: vector = [X,Y,Z] + """ + # Calculate the mean of the points, i.e. the 'center' of the cloud + datamean = data.mean(axis=0) + + # Do an SVD on the mean-centered data. + uu, dd, vv = np.linalg.svd(data - datamean) + + # Now vv[0] contains the first principal component, i.e. the direction + # vector of the 'best fit' line in the least squares sense. + + return vv[0] def count_frames(trajfile='traj.trr'): """ @@ -133,6 +156,7 @@ def read_gro(gro): atom_type = [] atom_num = [] rest_dt = [] + cnt_atoms=0 with open(gro, 'r') as F: for line in F: cnt=cnt+1 @@ -140,8 +164,12 @@ def read_gro(gro): if cnt>2 and cnt<=data_num+2: res_num.append(line[:5]) res_type.append(line[5:10]) - atom_type.append(line[10:15]) - atom_num.append(line[15:20]) + atom_type.append(line[10:15]) + #Compensate for large .gro files + #ToDo check if data index is the same in the function that follows + #atom_num.append(line[15:20]) + cnt_atoms=cnt_atoms+1 + atom_num.append(str(cnt_atoms)) rest_dt.append(line[20:]) elif cnt==1: system=line[:10] @@ -199,34 +227,45 @@ def domain_decomposition(data,dx,dy,dz): return box_p -def resid_data(atom_type, group=[]): +def atomid_data(res_num, atom_type, atom_num, group={}): """ Finds the index in list that 'def read_gro' returns, and correspond to the atom types in group list - parameters: atom_type=[] - group=[] + parameters: res_num=[] + atom_type=[] + atom_num=[] + group={} - output: dictionary + output: dictionary {resid:{res_type:{atom_type:[atom_num]}}} """ + res_ndx=[] + for res,atom in group.items(): + for element in atom: + res_ndx=[res_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip()] + ndx={} - for element in group: - ndx[element]=[i for i, e in enumerate(atom_type) if e.strip() == element.strip()] - #print(element,ndx) + for resid in res_ndx: + ndx[resid.strip()]={} + for res,atom in group.items(): + ndx[resid][res]={} + for element in atom: + ndx[resid][res][element]=[atom_num[i].strip() for i, e in enumerate(atom_type) if e.strip() == element.strip() and resid.strip()==res_num[i].strip()] return ndx -def res2grid(data, atom_num, box_p, ndx): +def atom2grid(data, box_p, ndx): """ - Assign residue that corresponds to ndx (see 'def resid_data') + Assign atom number that corresponds to ndx (see 'def atomid_data') to sudomains created from 'def domain_decomposition'. The output is a the box location (non zero based) in xyz-space for each atom number. parameters: data = {dictionary input from 'def fr_export'} - atom_num = [list from 'def read_gro'] box_p = {dictionary input from 'def domain_decomposition'} - ndx = {dictionary input from 'def resid_data'} + ndx = {dictionary input from 'def atomid_data'} - output: dictionaries + output: dictionaries + 1. box_res = {step:{resid:{res_type:{atom_type:{atom_num:(subX,subYsubZ)}}}}} + 2. box_res_rev = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}} """ box_res={} @@ -234,39 +273,148 @@ def res2grid(data, atom_num, box_p, ndx): for step in data.keys(): box_res[step]={} box_res_rev[step] = {} - for key in ndx.keys(): - for i in ndx[key]: - #data[step]['x'][atom_num][x(0),y(1),z(2)] - - xx=data[step]['x'][i][0] - cnt_x=-1 - prev=-1 - for ix in box_p[step]['x']: - cnt_x=cnt_x+1 - if xxprev: - prev=ix - break - - yy=data[step]['x'][i][1] - cnt_y=-1 - prev=-1 - for iy in box_p[step]['y']: - cnt_y=cnt_y+1 - if yyprev: - prev=iy - break - - zz=data[step]['x'][i][2] - cnt_z=-1 - prev=-1 - for iz in box_p[step]['z']: - cnt_z=cnt_z+1 - if zzprev: - prev=iz - break - box_res[step][atom_num[i]]=(cnt_x,cnt_y,cnt_z) - #Make subdomain position the key and group the residues - for key, value in sorted(box_res[step].items()): - box_res_rev[step].setdefault(value, []).append(key.strip()) + for resid in ndx.keys(): + box_res[step][resid]={} + box_res_rev[step][resid] = {} + for res in ndx[resid].keys(): + box_res[step][resid][res]={} + box_res_rev[step][resid][res]={} + for atom in ndx[resid][res].keys(): + box_res[step][resid][res][atom]={} + box_res_rev[step][resid][res][atom] = {} + for atomnum in ndx[resid][res][atom]: + #data[step]['x'][atom_num-1][x(0),y(1),z(2)] + + xx=data[step]['x'][int(atomnum)-1][0] + cnt_x=-1 + prev=-1 + for ix in box_p[step]['x']: + cnt_x=cnt_x+1 + if xxprev: + prev=ix + break + + yy=data[step]['x'][int(atomnum)-1][1] + cnt_y=-1 + prev=-1 + for iy in box_p[step]['y']: + cnt_y=cnt_y+1 + if yyprev: + prev=iy + break + + zz=data[step]['x'][int(atomnum)-1][2] + cnt_z=-1 + prev=-1 + for iz in box_p[step]['z']: + cnt_z=cnt_z+1 + if zzprev: + prev=iz + break + + box_res[step][resid][res][atom][atomnum]=(cnt_x,cnt_y,cnt_z) + + #Make subdomain position the key and group the residues + for key, value in sorted(box_res[step][resid][res][atom].items()): + box_res_rev[step][resid][res].setdefault(value, []).append(key.strip()) + #Remove atom_type as key of dictionary + box_res_rev[step][resid][res].pop(atom,None) + #If atoms of residue lie in more than one subdomain, put the all in the first one + if len(box_res_rev[step][resid][res]) > 1: + tmp=[] + flattened_list=[] + kk=[] + for sb in box_res_rev[step][resid][res].keys(): + kk.append(sb) + tmp.append(box_res_rev[step][resid][res][sb]) + #Remove keys + for k in kk: + box_res_rev[step][resid][res].pop(k,None) + #flatten the lists + flattened_list = [y for x in tmp for y in x] + #Keep first key and results + box_res_rev[step][resid][res][kk[0]]=flattened_list + #box_res_rev[step][resid][res]=[i for i, e in enumerate(box_res_rev[step][resid][res]) if e.strip() != .strip()] + + return box_res, box_res_rev + +def sub_coord(box, data, res_num=[]): + """ + Use the box_res_rev from 'def atom2grid' and data from 'def fr_export' + and groups all the XYZ coordinates per subdomain + + parameters: box = {step:{resid:{res:{(subX,subYsubZ):[atom_num]}}}} + data = {step:{obj:[x,y,z]}} - return box_res,box_res_rev \ No newline at end of file + output: norm = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}} + vector = {step:{subdomain:resid:{[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}}} + """ + coord_norm={} + coord_vector={} + for step in box.keys(): + sb=[] + coord_norm[step]={} + coord_vector[step]={} + for resid in box[step].keys(): + for res in box[step][resid].keys(): + for subdomain in box[step][resid][res].keys(): + tmp=[] + if subdomain not in sb: + sb.append(subdomain) + coord_vector[step][subdomain]={} + coord_norm[step][subdomain]=[] + coord_vector[step][subdomain][resid]=[] + for atomnum in box[step][resid][res][subdomain]: + #tmp_atom=[] + tmp.append(data[step]['x'][int(atomnum)-1]) + #tmp_atom.append(list(data[step]['x'][int(atomnum)-1])) #not append to previous + coord_vector[step][subdomain][resid].append(list(data[step]['x'][int(atomnum)-1])) + coord_norm[step][subdomain].append(list(data[step]['x'][int(atomnum)-1])) + return coord_norm, coord_vector + +def coord2norm(coord,img=True): + """ + Use the coord from 'def sub_coord' and replaces XYZ coordinates + with c, normal of surface that fits the points. Also it can plot + the surfaces for each subdomain + + parameters: coord = {step:{subdomain:[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}} + img = True or False + + output: dictionary = {step:{subdomain:{c:int, normal:array[]}}} + """ + surf={} + for step in coord.keys(): + surf[step]={} + for subdomain in coord[step].keys(): + c,normal = fitPlaneLTSQ(np.array(coord[step][subdomain])) + surf[step][subdomain]={'c':c, 'normal':normal} + #Change save variable if you want to save .png elsewere + save='png/'+str(subdomain) + if img==True: + try: + plot_surf(np.array(coord[step][subdomain]), normal, c, save) + except: + print("ERROR: Folder png/ doesn't exist in root") + + return surf + +def coord2vector(coord_vector): + """ + Use the coord_vector from 'def sub_coord' and replaces XYZ coordinates + with vector of best fit line + + parameters: coord = {step:{subdomain:array[[X1,Y1,Z1] ... [Xn,Yn,Zn]]}} + img = True or False + + output: dictionary = {step:{subdomain:[x, y, z]} + """ + vector={} + for step in coord_vector.keys(): + vector[step]={} + for subdomain in coord_vector[step].keys(): + vector[step][subdomain]={} + for resid in coord_vector[step][subdomain].keys(): + vv = points2vector(np.array(coord_vector[step][subdomain][resid])) + vector[step][subdomain][resid]=vv + return vector \ No newline at end of file -- 2.24.1