#from __future__ import unicode_literals
#
'''
SASMOL: Copyright (C) 2011 Joseph E. Curtis, Ph.D.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
'''
import sys
import string
import locale
import numpy
import io
import logging
from . import config as config
# PDB_IO
#
# 12/5/2009 -- initial coding : jc
# 12/10/2009 -- doc strings : jc
# 01/01/2011 -- added dcdio wrappers : jc
# 08/26/2016 -- forked from file_io : jc
#
#LC 1 2 3 4 5 6 7
#LC4567890123456789012345678901234567890123456789012345678901234567890123456789
# * **
'''
PDB_IO is the main module that contains classes that
read and write protein data bank files from and to the hard disk,
The methods (read_pdb, write_pdb) are used to read and write data
using the PDB file format as described at the following
web-site:
http://deposit.rcsb.org/adit/docs/pdb_atom_format.html
These classes are accessed by the Atom class found in
the sasmol.system module through the file_io File() class.
'''
[docs]class PDB(object):
def _pdb_optional_field_value(self, field_name, atom_index, default_value, fill_missing_optional):
if not fill_missing_optional:
return getattr(self, field_name)[atom_index]
try:
return getattr(self, field_name)[atom_index]
except (AttributeError, IndexError, TypeError):
return default_value
[docs] def print_error(self,name,my_message):
error = []
message = '\nELEMENT NAME NOT IN CHARMM DATABASE AND SINGLE CHARACTER OPTION NOT APPLICABLE\n'
message += '\n'+name+'\n'
message += my_message+'\n'
message += ' stopping now: name = '+name+'\n'
error.append(message)
return error
[docs] def check_error(self,error):
if(len(error)>0):
print(error)
sys.exit()
return
[docs] def element_filter(self):
'''
This function filters the PDB file to sraighten
out various things (add to this as you go along).
Filter element list...
'''
unique_elements = []
error = []
single_atom_names = ['H','F','B','D','C','N','O','S','P','I','K','U','V','W','Y']
for i in range(len(self._name)):
name = self._name[i].upper()
resname = self._resname[i].upper()
if (self._element[i]=='' or self._element[i]==' ' or self._element[i]==' '):
natoms = len(self._name[i])
error,self._element[i] = self.get_element(name,resname)
self.check_error(error)
#
### OPEN Error exception handling stub
#
if(self._element[i] not in unique_elements):
unique_elements.append(self._element[i])
return unique_elements
[docs] def get_element(self,name,resname):
'''
Get elment from charmm27 atom name list plus extras from periodic table
and resolve name conflicts
'''
error = []
element_name = ''
#hydrogen,carbon,nitrogen,oxygen,sulfur,phosphorus,other = self.charmm_names()
charmm_names = self.charmm_names()
hydrogen = charmm_names['hydrogen']
carbon = charmm_names['carbon']
nitrogen = charmm_names['nitrogen']
oxygen = charmm_names['oxygen']
sulfur = charmm_names['sulfur']
phosphorus = charmm_names['phosphorus']
other = charmm_names['other']
conflict_atoms = ['CD','CE','HE','HG','NE','ND','NB','PB','PA']
if(name in conflict_atoms and (name == resname)):
element_name = name
elif(name in hydrogen): element_name = 'H'
elif(name in carbon): element_name = 'C'
elif(name in nitrogen): element_name = 'N'
elif(name in oxygen): element_name = 'O'
elif(name in sulfur): element_name = 'S'
elif(name in phosphorus): element_name = 'P'
elif(name == '1H'): element_name = name
elif(name == '2H'): element_name = "D"
elif(name == "SOD"): element_name = 'NA'
elif(name == "POT"): element_name = 'K'
elif(name == "CAL"): element_name = 'CA'
elif(name == "CLA"): element_name = 'CL'
elif(name == "CES"): element_name = 'CS'
elif(name == "F2'"): element_name = 'F'
elif(name in other): element_name = name
elif(len(name)>0):
if(name[0].isalpha()):
element_name = name[0]
elif(name[0].isdigit() and len(name)>1):
if(name[1].isalpha()):
element_name = name[1]
else:
my_message = '\nFIRST AND SECOND CHARACTER IN ATOM NAME CAN NOT BE A NUMBER\n'
error = self.print_error(name,my_message)
else:
my_message = '\nFIRST CHARACTER IN ATOM NAME CAN NOT BE A NUMBER\n'
error = self.print_error(name,my_message)
else:
my_message = '\nNO CHARACTERS FOUND FOR ATOM NAME\n'
error = self.print_error(name,my_message)
return error,element_name
[docs] def write_pdb(self,filename,frame,flag,**kwargs):
'''
This method writes the PDB file
'''
debug=0
result=1
conect = False
fill_missing_optional = kwargs.get('fill_missing_optional', False)
logger = logging.getLogger(__name__)
if(flag=='w' or flag=='W'):
infile=open(filename,'w')
elif(flag=='a' or flag=='A'):
infile=open(filename,'a')
if 'conect' in kwargs:
conect = kwargs['conect']
if 'model' in kwargs:
this_frame = kwargs['model']
infile.write("MODEL "+str(this_frame)+"\n")
if(debug==1):
infile.write(" 1 2 3 4 5 6 7 \n")
infile.write("01234567890123456789012345678901234567890123456789012345678901234567890123456789\n")
for i in range(len(self._atom)):
this_index = self._index[i]
this_resid = self._resid[i]
if(this_index > 99999):
this_index = '99999'
elif(this_index < -9999):
this_index = '-9999'
else:
this_index = str(this_index)
if(this_resid > 9999):
this_resid = '9999'
elif(this_resid < -999):
this_resid = '-999'
try:
sx = "{0:8.3f}".format(float(self._coor[frame,i,0]))[:8]
sy = "{0:8.3f}".format(float(self._coor[frame,i,1]))[:8]
sz = "{0:8.3f}".format(float(self._coor[frame,i,2]))[:8]
loc = self._pdb_optional_field_value('_loc',i,' ',fill_missing_optional)
rescode = self._pdb_optional_field_value('_rescode',i,' ',fill_missing_optional)
occupancy = self._pdb_optional_field_value('_occupancy',i,'0.00',fill_missing_optional)
beta = self._pdb_optional_field_value('_beta',i,'0.00',fill_missing_optional)
segname = self._pdb_optional_field_value('_segname',i,' ',fill_missing_optional)
element = self._pdb_optional_field_value('_element',i,' ',fill_missing_optional)
charge = self._pdb_optional_field_value('_charge',i,' ',fill_missing_optional)
infile.write("%-6s%5s %-4s%1s%-4s%1s%4s%1s %8s%8s%8s%6s%6s %-4s%2s%2s\n" % (self._atom[i],this_index,self._name[i],loc,self._resname[i],self._chain[i],this_resid,rescode,sx,sy,sz,occupancy,beta,segname,element,charge))
except Exception:
if config.__logging_level__ == 'DEBUG':
logger.debug('Unexpected error writing PDB atom index %s', i, exc_info=True)
if fill_missing_optional:
infile.close()
raise
# TODO: Check with Joseph to see if logic acceptable -
# i.e. is 'final' always accompanied by 'model'?
if ('final' in kwargs) or ('model' not in kwargs):
if conect:
conect_lines = self.create_conect_pdb_lines()
for line in conect_lines:
infile.write(line + '\n')
infile.write("END\n")
else:
infile.write("ENDMDL\n")
# if 'model' in kwargs:
# infile.write("ENDMDL\n")
# else:
# if conect:
# conect_lines = self.create_conect_pdb_lines()
# for line in conect_lines:
# infile.write(line + '\n')
#
# infile.write("END\n")
#
# if 'final' in kwargs:
# infile.write("END\n")
infile.close()
return result
[docs] def get_resnames(self):
'''
This method holds names of residues to use to set moltype. Based on Charmm 27 naming.
'''
protein_resnames=['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','HSD','HSE','HSP','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']
dna_resnames=['NUSA','NUSG','NUSC','NUSU','DA','DG','DC','DT','ADE','GUA','CYT','THY']
rna_resnames=['RNUS','RNUA','RUUG','RNUC','A', 'C', 'G', 'U','ADE','CYT','GUA','URA']
nucleic_resnames = ['GUA','ADE','CYT','THY','URA','G', 'A', 'C', 'T', 'U','DA','DG','DC','DT']
water_resnames=['TIP3','SPCE','TIP','SPC','TIP4','TP3M']
return protein_resnames,dna_resnames,rna_resnames,nucleic_resnames,water_resnames
[docs] def initialize_children(self):
'''
This initializes the following "children" with their
masks already defined to the "parent" molecule
names() : names_mask()
resnames() : resnames_mask()
resids() : resids_mask()
chains() : chains_mask()
segnames() : segnames_mask()
occupancies() : occupancies_mask()
betas() : betas_mask()
elements() : elements_mask()
The objects on the left contain the unique values and
the objects on the right contain the masks that have
the indices to extract the information for each unique
value from the parent molecule.
'''
number_of_names = self.number_of_names() ; unique_names = self.names()
number_of_resnames = self.number_of_resnames() ; unique_resnames = self.resnames()
number_of_resids = self.number_of_resids() ; unique_resids = self.resids()
number_of_chains = self.number_of_chains() ; unique_chains = self.chains()
number_of_occupancies = self.number_of_occupancies() ; unique_occupancies = self.occupancies()
number_of_betas = self.number_of_betas() ; unique_betas = self.betas()
number_of_elements = self.number_of_elements() ; unique_elements = self.elements()
number_of_segnames = self.number_of_segnames() ; unique_segnames = self.segnames()
natoms = self.natoms()
name = self.name() ; resname = self.resname() ; resid = self.resid() ; chain = self.chain()
occupancy = self.occupancy() ; beta = self.beta() ; element = self.element() ; segname = self.segname()
names = [] ; resnames = [] ; resids = [] ; chains = []
occupancies = [] ; betas = [] ; elements = [] ; segments = []
names_mask = numpy.zeros((number_of_names,natoms),int)
resnames_mask = numpy.zeros((number_of_resnames,natoms),int)
resids_mask = numpy.zeros((number_of_resids,natoms),int)
chains_mask = numpy.zeros((number_of_chains,natoms),int)
occupancies_mask = numpy.zeros((number_of_occupancies,natoms),int)
betas_mask = numpy.zeros((number_of_betas,natoms),int)
elements_mask = numpy.zeros((number_of_elements,natoms),int)
segnames_mask = numpy.zeros((number_of_segnames,natoms),int)
nresid = 0
for i in range(natoms):
names_mask[unique_names.index(name[i])][i] = 1
resnames_mask[unique_resnames.index(resname[i])][i] = 1
resids_mask[unique_resids.index(resid[i])][i] = 1
chains_mask[unique_chains.index(chain[i])][i] = 1
occupancies_mask[unique_occupancies.index(occupancy[i])][i] = 1
betas_mask[unique_betas.index(beta[i])][i] = 1
elements_mask[unique_elements.index(element[i])][i] = 1
segnames_mask[unique_segnames.index(segname[i])][i] = 1
self._names_mask = names_mask
self._resnames_mask = resnames_mask
self._resids_mask = resids_mask
self._chains_mask = chains_mask
self._occupancies_mask = occupancies_mask
self._betas_mask = betas_mask
self._elements_mask = elements_mask
self._segnames_mask = segnames_mask
return
[docs] def check_for_all_zero_columns(self, coor, frame=0):
'''
Make sure there are no two all zero columns in coordinates
This is needed for the alignment module
'''
SMALL = 1.0E-10
x0,y0,z0=False,False,False
if sum(coor[frame][:,0]**2)<SMALL:
x0=True
if sum(coor[frame][:,1]**2)<SMALL:
y0=True
if sum(coor[frame][:,2]**2)<SMALL:
z0=True
if x0:
coor[frame][0][0]=SMALL
if y0:
coor[frame][0][1]=SMALL
if z0:
coor[frame][0][2]=SMALL
return
[docs] def read_pdb(self,filename,**kwargs):
'''
This method reads a PDB file.
'''
debug=0
result=1
protein_resnames,dna_resnames,rna_resnames,nucleic_resnames,water_resnames = self.get_resnames()
### LONG Need to properly import header information
# see: http://deposit.rcsb.org/adit/docs/pdb_atom_format.html
fastread = False
pdbscan = False
printme = True
printme = False
if 'verbose' in kwargs:
printme = kwargs['verbose']
if 'fastread' in kwargs:
fastread = kwargs['fastread']
if(fastread):
printme = False
if 'pdbscan' in kwargs:
pdbscan = kwargs['pdbscan']
with io.open(filename, 'r') as temp_infile:
infile = temp_infile.readlines()
if(printme): print('reading filename: ',filename)
atom=[] ; index=[] ; original_index=[] ; name=[] ; loc=[] ; resname=[] ; chain=[] ; resid=[] ; rescode=[]
x=[] ; y=[] ; z=[]
occupancy=[] ; beta=[] ; segname=[] ; element=[] ; charge=[] ; moltype=[] ; conect = {}
residue_flag = [] ; original_resid=[] ; header = []
# first check to see how many frames are in the file
num_model = 0
num_endmdl = 0
num_end = 0
num_frames = 1
count_index = 0
num_counts_this_model = 0 # number of atoms encompassed by "MODEL" and "ENDMDL" lines
num_counts_this_end = 0 # number of atoms before the first "END" line or between two consecutive "END" lines
num_counts_per_model = [] # array of number of atoms encompassed by "MODEL" and "ENDMDL" lines
num_counts_per_end = [] # array of number of atoms before the first "END" line or between two consecutive "END" lines
modelON = False # Flag to indicate that a "MODEL" frame is being read
for i in range(len(infile)):
lin=infile[i]
#lins=string.split(infile[i])
lins=infile[i].split()
if (len(lins)==0):
if ((i+1)==len(infile)):
lins=['']
if modelON:
raise Exception('There should be an ENDMDL pairing with MODEL')
else:
continue
#record_name = string.strip(lin[0:6])
record_name = lin[0:6].strip()
#
### OPEN need to re-factor the exception statements to a uniform reporting mechanism
#
try:
if(lins[0]=='MODEL'):
if modelON:
raise Exception('Encountered two consecutive MODEL lines')
if (num_counts_this_model != 0):
raise Exception('There should not be atoms after ENDMDL and before MODEL lines')
modelON = True
elif(lins[0]=='ENDMDL'):
if not modelON:
raise Exception('Encountered two consecutive ENDMDL lines')
modelON = False
num_counts_per_model.append(num_counts_this_model)
num_counts_this_model = 0
elif(lins[0]=='END'):
num_counts_per_end.append(num_counts_this_end)
num_counts_this_end = 0
except:
pass
#
if((record_name == 'ATOM' or record_name == 'HETATM')):
count_index += 1
num_counts_this_model += 1
num_counts_this_end += 1
#
#
if ( (len(num_counts_per_end)==0) and (len(num_counts_per_model)!=0) ):
raise Exception('According to Protein Data Bank Contents Guide, END line must appear in each coor entry')
if (len(num_counts_per_model)!=0 and (len(num_counts_per_end)>1 or sum(num_counts_per_model)!=sum(num_counts_per_end))):
if(printme): print(num_counts_per_model,num_counts_per_end)
raise Exception('Only one terminating END line is allowed for pdb entries with multiple MODEL')
#
if (len(num_counts_per_model)>0):
num_frames = len(num_counts_per_model)
num_atoms = num_counts_per_model[0]
if not all(x == num_atoms for x in num_counts_per_model):
raise Exception('number of atoms per frame is not equal')
elif (len(num_counts_per_end)>0):
num_frames = len(num_counts_per_end)
num_atoms = num_counts_per_end[0]
if not all(x == num_atoms for x in num_counts_per_end):
raise Exception('number of atoms per frame is not equal')
elif ( (len(num_counts_per_model)==0) and (len(num_counts_per_end)==0) ):
num_frames = 1
num_atoms = num_counts_this_model
else:
raise Exception('unexpected error!')
if(printme): print('num_atoms = ',num_atoms)
if(printme): print('>>> found ',num_frames,' model(s) or frame(s)')
this_frame = 1
true_index = 0
coor=numpy.zeros((num_frames,num_atoms,3),config.COORD_DTYPE)
unique_names = [] ; unique_resnames = [] ; unique_resids = [] ; unique_chains = []
unique_occupancies = [] ; unique_betas = [] ; unique_segnames = [] ; unique_moltypes = []
for i in range(len(infile)):
lin=infile[i]
#lins=string.split(infile[i])
lins=infile[i].split()
#record_name = string.strip(lin[0:6])
record_name = lin[0:6].strip()
if((record_name == 'ATOM' or record_name == 'HETATM') and this_frame == 1):
true_index += 1
#atom.append(string.strip(lin[0:6])) # 1-6 record name
atom.append(lin[0:6].strip()) # 1-6 record name
original_index.append(lin[6:11]) # 7-11 atom serial number
index.append(str(true_index)) # set index so that > 99,999 atoms can be read and counted
#this_name = string.strip(lin[12:16]) # 13-16 atom name
this_name = lin[12:16].strip()
#name.append(string.strip(lin[12:16])) # 13-16 atom name
name.append(lin[12:16].strip()) # 13-16 atom name
if pdbscan:
loc.append(lin[16])
else:
loc.append(' ')
#this_resname = string.strip(lin[17:21]) # 18-20 residue name
this_resname = lin[17:21].strip() # 18-20 residue name
resname.append(lin[17:21].strip()) # 18-20 residue name
this_chain = lin[21] # 22 chain identifier
chain.append(lin[21]) # 22 chain identifier
this_resid = locale.atoi(lin[22:26]) # 23-26 residue sequence number
original_resid.append(lin[22:26]) # 23-26 residue sequence number
resid.append(lin[22:26]) # 23-26 residue sequence number
rescode.append(lin[26]) # 27 code for insertion of residues
x.append(lin[30:38]) # 31-38 Real(8.3) X: angstroms
y.append(lin[38:46]) # 39-46 Real(8.3) Y: angstroms
z.append(lin[46:54]) # 47-54 Real(8.3) Z: angstroms
residue_flag.append(False)
if not pdbscan:
try:
#occupancy.append(string.strip(lin[54:60])) # 55-60 occupancy
occupancy.append(lin[54:60].strip()) # 55-60 occupancy
#this_occupancy =string.strip(lin[54:60]) # 55-60 occupancy
this_occupancy =lin[54:60].strip() # 55-60 occupancy
if(occupancy[-1] == ''):
occupancy[-1] = " 1.00"
this_occupancy[-1] = " 1.00"
except:
occupancy.append(" 0.00")
this_occupancy = " 0.00"
try:
#beta.append(string.strip(lin[60:66])) # 61-66 temperature factor
beta.append(lin[60:66].strip()) # 61-66 temperature factor
#this_beta = string.strip(lin[60:66]) # 61-66 temperature factor
this_beta = lin[60:66].strip() # 61-66 temperature factor
if(beta[-1] == ''):
beta[-1] = " 0.00"
except:
beta.append(" 0.00")
this_beta = " 0.00"
try:
#segname.append(string.strip(lin[72:76])) # 73-76 segment identifier
segname.append(lin[72:76].strip()) # 73-76 segment identifier
#this_segname = string.strip(lin[72:76]) # 73-76 segment identifier
this_segname = lin[72:76].strip() # 73-76 segment identifier
if(segname[-1] == '' and this_chain !=''):
segname[-1] = this_chain
this_segname = this_chain
except:
this_segname = ""
segname.append("")
try:
#element.append(string.strip(lin[76:78])) # 77-78 element symbol
element.append(lin[76:78].strip()) # 77-78 element symbol
if(element[-1] == ''):
element[-1] = " "
except:
element.append(" ")
try:
#charge.append(string.strip(lin[78:80])) # 79-80 charge on the atom
charge.append(lin[78:80].strip()) # 79-80 charge on the atom
if(charge[-1] == ''):
charge[-1] = " "
except:
charge.append(" ")
else:
#occupancy.append(string.strip(lin[54:60])) # 55-60 occupancy
occupancy.append(lin[54:60].strip()) # 55-60 occupancy
#this_occupancy =string.strip(lin[54:60]) # 55-60 occupancy
this_occupancy =lin[54:60].strip() # 55-60 occupancy
#beta.append(string.strip(lin[60:66])) # 61-66 temperature factor
beta.append(lin[60:66].strip()) # 61-66 temperature factor
#this_beta = string.strip(lin[60:66]) # 61-66 temperature factor
this_beta = lin[60:66].strip() # 61-66 temperature factor
#segname.append(string.strip(lin[72:76])) # 73-76 segment identifier
segname.append(lin[72:76].strip()) # 73-76 segment identifier
#this_segname = string.strip(lin[72:76]) # 73-76 segment identifier
this_segname = lin[72:76].strip() # 73-76 segment identifier
#element.append(string.strip(lin[76:78])) # 77-78 element symbol
element.append(lin[76:78].strip()) # 77-78 element symbol
#charge.append(string.strip(lin[78:80])) # 79-80 charge on the atom
charge.append(lin[78:80].strip()) # 79-80 charge on the atom
if(this_name not in unique_names): unique_names.append(this_name)
if(this_resname not in unique_resnames): unique_resnames.append(this_resname)
if(this_resid not in unique_resids): unique_resids.append(this_resid)
if(this_chain not in unique_chains): unique_chains.append(this_chain)
if(this_segname not in unique_segnames): unique_segnames.append(this_segname)
if(this_occupancy not in unique_occupancies): unique_occupancies.append(this_occupancy)
if(this_beta not in unique_betas): unique_betas.append(this_beta)
#this_resname=(string.strip(lin[17:21]))
this_resname=(lin[17:21].strip())
if this_resname in protein_resnames:
moltype.append('protein')
this_moltype = 'protein'
elif this_resname in rna_resnames:
moltype.append('rna')
this_moltype = 'rna'
elif this_resname in dna_resnames:
moltype.append('dna')
this_moltype = 'dna'
elif this_resname in water_resnames:
moltype.append('water')
this_moltype = 'water'
else:
moltype.append('other')
this_moltype = 'other'
if(this_moltype not in unique_moltypes): unique_moltypes.append(this_moltype)
if(true_index == num_atoms):
if(printme): print('finished reading frame = ',this_frame)
index=numpy.array(index,int)
original_index=numpy.array(original_index,int)
resid=numpy.array(resid,int)
original_resid=numpy.array(original_resid,int)
x=numpy.array(x,config.COORD_DTYPE)
y=numpy.array(y,config.COORD_DTYPE)
z=numpy.array(z,config.COORD_DTYPE)
coor[0,:,0]=x ; coor[0,:,1]=y ; coor[0,:,2]=z
true_index = 0
x=[] ; y=[] ; z=[]
if(this_frame == 1):
self._atom=atom ; self._index=index ; self._original_index = original_index ; self._name=name ; self._loc=loc ; self._resname=resname ; self._residue_flag = residue_flag
self._chain=chain ; self._resid=resid ; self._rescode=rescode ; self._original_resid=original_resid
self._occupancy=occupancy ; self._beta=beta ; self._segname=segname ; self._element=element
self._charge=charge ; self._moltype=moltype
self._number_of_names = len(unique_names) ; self._names = unique_names
self._number_of_resnames = len(unique_resnames) ; self._resnames = unique_resnames
self._number_of_resids = len(unique_resids) ; self._resids = unique_resids
self._number_of_chains = len(unique_chains) ; self._chains = unique_chains
self._number_of_segnames = len(unique_segnames) ; self._segnames = unique_segnames
self._number_of_occupancies = len(unique_occupancies) ; self._occupancies = unique_occupancies
self._number_of_betas = len(unique_betas) ; self._betas = unique_betas
self._number_of_moltypes = len(unique_moltypes) ; self._moltypes = unique_moltypes
if(fastread):
self._natoms=len(index)
break
this_frame += 1
elif((record_name != 'ATOM' or record_name != 'HETATM' and record_name != 'CONECT') and this_frame == 1):
header.append(lin)
elif((record_name == 'ATOM' or record_name == 'HETATM') and this_frame > 1):
true_index += 1
x.append(lin[30:38]) # 31-38 Real(8.3) X: angstroms
y.append(lin[38:46]) # 39-46 Real(8.3) Y: angstroms
z.append(lin[46:54]) # 47-54 Real(8.3) Z: angstroms
if(true_index == num_atoms):
if(printme): print('finished reading frame = ',this_frame)
x=numpy.array(x,config.COORD_DTYPE)
y=numpy.array(y,config.COORD_DTYPE)
z=numpy.array(z,config.COORD_DTYPE)
coor[this_frame-1,:,0]=x ; coor[this_frame-1,:,1]=y ; coor[this_frame-1,:,2]=z
true_index = 0
this_frame += 1
x=[] ; y=[] ; z=[]
elif ((record_name == 'CONECT') and pdbscan):
# Format of CONECT line:
# Record name CONECT followed by list of atom indexes in 6
# character columns. First is the base atom, following atoms
# are those connected to it.
# Input line is filtered to ignore blank columns.
ndxs = [int(lin[i:i+5]) for i in range(6, len(lin), 5) if lin[i:i+5].strip()]
conect[ndxs[0]] = ndxs[1:]
if(((this_frame - 1) != num_frames) and not fastread):
if(printme): print('>>> WARNING: pdb file had ',num_frames,' file_io read ',this_frame-1,' frames')
self._coor=numpy.array(coor)
if 'check_zero_coor' in kwargs:
self.check_for_all_zero_columns(self._coor)
unique_elements = self.element_filter()
self._number_of_elements = len(unique_elements) ; self._elements = unique_elements
self._natoms=len(index)
total_mass = self.calculate_mass()
error = []
if 'saspdbrx_topology' in kwargs:
if kwargs['saspdbrx_topology']:
error = self.check_charmm_atomic_order_reorganize()
return error
self._header = header
self._conect = conect
return
[docs] def create_conect_pdb_lines(self):
"""
Output stored conect information in PDB record format.
Indices are converted from those read in - original_index - to
those used in output.
Format of CONECT line:
Record name CONECT followed by list of atom indexes in 6 character
columns. First is the base atom, following atoms are those
connected to it.
"""
original_conect = self.conect()
original_indexs = self.original_index()
indexs = self.index()
# Convert from original to current indexing
convert_index = dict(list(zip(original_indexs, indexs)))
new_conect = {}
for base, linked in original_conect.items():
new_base = convert_index[base]
new_linked = []
for index in linked:
new_linked.append(convert_index[index])
new_conect[new_base] = new_linked
# Output should be in the order of the atom indexes
ordered_keys = sorted(new_conect.keys())
conect_lines = []
for base in ordered_keys:
ndxs = ''.join([str(i).rjust(5) for i in new_conect[base]])
conect_lines.append('CONECT' + str(base).rjust(5) + ndxs)
return conect_lines