Source code for sasmol.pdb_io




#from __future__ import unicode_literals
#
'''
    SASMOL: Copyright (C) 2011 Joseph E. Curtis, Ph.D. 

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
'''
import sys
import string
import locale
import numpy
import io
import logging

from . import config as config

#	PDB_IO
#
#	12/5/2009	--	initial coding			                        :	jc
#	12/10/2009	--	doc strings 			                        :	jc
#	01/01/2011	--	added dcdio wrappers		                    :	jc
#	08/26/2016	--	forked from file_io                             :	jc
#
#LC	 1         2         3         4         5         6         7
#LC4567890123456789012345678901234567890123456789012345678901234567890123456789
#								       *      **
'''
	PDB_IO is the main module that contains classes that 
	read and write protein data bank files from and to the hard disk,

	The methods (read_pdb, write_pdb) are used to read and write data
	using the PDB file format as described at the following
	web-site:

	http://deposit.rcsb.org/adit/docs/pdb_atom_format.html

    These classes are accessed by the Atom class found in
    the sasmol.system module through the file_io File() class.
'''

[docs]class PDB(object):

    def _pdb_optional_field_value(self, field_name, atom_index, default_value, fill_missing_optional):
        if not fill_missing_optional:
            return getattr(self, field_name)[atom_index]

        try:
            return getattr(self, field_name)[atom_index]
        except (AttributeError, IndexError, TypeError):
            return default_value

[docs]    def print_error(self,name,my_message):

        error = []
        message = '\nELEMENT NAME NOT IN CHARMM DATABASE AND SINGLE CHARACTER OPTION NOT APPLICABLE\n'
        message += '\n'+name+'\n'
        message +=  my_message+'\n'
        message += ' stopping now: name = '+name+'\n'
        error.append(message)

        return error

[docs]    def check_error(self,error):

        if(len(error)>0):
            print(error)
            sys.exit()

        return

[docs]    def element_filter(self):
        '''	
        This function filters the PDB file to sraighten
        out various things (add to this as you go along).
        
        Filter element list... 
        '''
        unique_elements = []
        error = []
        single_atom_names = ['H','F','B','D','C','N','O','S','P','I','K','U','V','W','Y']

        for i in range(len(self._name)):
            name = self._name[i].upper()
            resname = self._resname[i].upper()
            if (self._element[i]=='' or self._element[i]==' ' or self._element[i]=='  '):
                natoms = len(self._name[i])
                error,self._element[i] = self.get_element(name,resname)
                self.check_error(error)
#
###	OPEN	Error exception handling stub
#
            if(self._element[i] not in unique_elements):
                unique_elements.append(self._element[i])

        return unique_elements

[docs]    def get_element(self,name,resname):
        '''
        Get elment from charmm27 atom name list plus extras from periodic table
        and resolve name conflicts	
        '''

        error = []
        element_name = ''

        #hydrogen,carbon,nitrogen,oxygen,sulfur,phosphorus,other = self.charmm_names()	
        charmm_names = self.charmm_names()
        hydrogen = charmm_names['hydrogen']
        carbon = charmm_names['carbon']
        nitrogen = charmm_names['nitrogen']
        oxygen = charmm_names['oxygen']
        sulfur = charmm_names['sulfur']
        phosphorus = charmm_names['phosphorus']
        other = charmm_names['other']

        conflict_atoms = ['CD','CE','HE','HG','NE','ND','NB','PB','PA']

        if(name in conflict_atoms and (name == resname)):
            element_name = name
        elif(name in hydrogen): element_name = 'H'
        elif(name in carbon): element_name = 'C'
        elif(name in nitrogen): element_name = 'N'
        elif(name in oxygen): element_name = 'O'
        elif(name in sulfur): element_name = 'S'
        elif(name in phosphorus): element_name = 'P'
        elif(name == '1H'): element_name = name
        elif(name == '2H'): element_name = "D"
        elif(name == "SOD"): element_name = 'NA'
        elif(name == "POT"): element_name = 'K'
        elif(name == "CAL"): element_name = 'CA'
        elif(name == "CLA"): element_name = 'CL'
        elif(name == "CES"): element_name = 'CS'
        elif(name == "F2'"): element_name = 'F'
        elif(name in other): element_name = name
        elif(len(name)>0):
            if(name[0].isalpha()):
                element_name = name[0]
            elif(name[0].isdigit() and len(name)>1):
                if(name[1].isalpha()):
                    element_name = name[1]
                else:
                    my_message = '\nFIRST AND SECOND CHARACTER IN ATOM NAME CAN NOT BE A NUMBER\n'
                    error = self.print_error(name,my_message)
            else:
                my_message = '\nFIRST CHARACTER IN ATOM NAME CAN NOT BE A NUMBER\n'
                error = self.print_error(name,my_message)
        else:
            my_message = '\nNO CHARACTERS FOUND FOR ATOM NAME\n'
            error = self.print_error(name,my_message)

        return error,element_name

[docs]    def write_pdb(self,filename,frame,flag,**kwargs):

        '''
        This method writes the PDB file
        '''

        debug=0
        result=1
        conect = False
        fill_missing_optional = kwargs.get('fill_missing_optional', False)
        logger = logging.getLogger(__name__)

        if(flag=='w' or flag=='W'):
            infile=open(filename,'w')
        elif(flag=='a' or flag=='A'):
            infile=open(filename,'a')

        if 'conect' in kwargs:
            conect = kwargs['conect']

        if 'model' in kwargs:
            this_frame = kwargs['model']
            infile.write("MODEL "+str(this_frame)+"\n")

        if(debug==1):
            infile.write("          1         2         3         4         5         6         7         \n")
            infile.write("01234567890123456789012345678901234567890123456789012345678901234567890123456789\n")

        for i in range(len(self._atom)):

            this_index = self._index[i]

            this_resid = self._resid[i]

            if(this_index > 99999):
                this_index = '99999'
            elif(this_index < -9999):
                this_index = '-9999'
            else:
                this_index = str(this_index)

            if(this_resid > 9999):
                this_resid = '9999'
            elif(this_resid < -999):
                this_resid = '-999'

            try:
                sx = "{0:8.3f}".format(float(self._coor[frame,i,0]))[:8]
                sy = "{0:8.3f}".format(float(self._coor[frame,i,1]))[:8]
                sz = "{0:8.3f}".format(float(self._coor[frame,i,2]))[:8]

                loc = self._pdb_optional_field_value('_loc',i,' ',fill_missing_optional)
                rescode = self._pdb_optional_field_value('_rescode',i,' ',fill_missing_optional)
                occupancy = self._pdb_optional_field_value('_occupancy',i,'0.00',fill_missing_optional)
                beta = self._pdb_optional_field_value('_beta',i,'0.00',fill_missing_optional)
                segname = self._pdb_optional_field_value('_segname',i,' ',fill_missing_optional)
                element = self._pdb_optional_field_value('_element',i,' ',fill_missing_optional)
                charge = self._pdb_optional_field_value('_charge',i,' ',fill_missing_optional)

                infile.write("%-6s%5s %-4s%1s%-4s%1s%4s%1s   %8s%8s%8s%6s%6s      %-4s%2s%2s\n" % (self._atom[i],this_index,self._name[i],loc,self._resname[i],self._chain[i],this_resid,rescode,sx,sy,sz,occupancy,beta,segname,element,charge))
            except Exception:
                if config.__logging_level__ == 'DEBUG':
                    logger.debug('Unexpected error writing PDB atom index %s', i, exc_info=True)
                if fill_missing_optional:
                    infile.close()
                    raise

        # TODO: Check with Joseph to see if logic acceptable -
        # i.e. is 'final' always accompanied by 'model'?
        if ('final' in kwargs) or ('model' not in kwargs):

            if conect:
                conect_lines = self.create_conect_pdb_lines()
                for line in conect_lines:
                    infile.write(line + '\n')

            infile.write("END\n")

        else:

            infile.write("ENDMDL\n")

        # if 'model' in kwargs:
        #     infile.write("ENDMDL\n")
        # else:
        #     if conect:
        #         conect_lines = self.create_conect_pdb_lines()
        #         for line in conect_lines:
        #             infile.write(line + '\n')
        #
        #     infile.write("END\n")
        #
        # if 'final' in kwargs:
        #     infile.write("END\n")

        infile.close()

        return result

[docs]    def get_resnames(self):
        '''
        This method holds names of residues to use to set moltype.  Based on Charmm 27 naming.
        '''

        protein_resnames=['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','HSD','HSE','HSP','ILE','LEU','LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']

        dna_resnames=['NUSA','NUSG','NUSC','NUSU','DA','DG','DC','DT','ADE','GUA','CYT','THY']

        rna_resnames=['RNUS','RNUA','RUUG','RNUC','A', 'C', 'G', 'U','ADE','CYT','GUA','URA']

        nucleic_resnames = ['GUA','ADE','CYT','THY','URA','G', 'A', 'C', 'T', 'U','DA','DG','DC','DT']

        water_resnames=['TIP3','SPCE','TIP','SPC','TIP4','TP3M']

        return protein_resnames,dna_resnames,rna_resnames,nucleic_resnames,water_resnames

[docs]    def initialize_children(self):

        '''
            This initializes the following "children" with their
            masks already defined to the "parent" molecule

            names() 		: names_mask()
            resnames()		: resnames_mask()
            resids()		: resids_mask()
            chains()		: chains_mask()
            segnames()		: segnames_mask()
            occupancies()	: occupancies_mask()
            betas()		: betas_mask()
            elements()		: elements_mask()
			
            The objects on the left contain the unique values and
            the objects on the right contain the masks that have
            the indices to extract the information for each unique
            value from the parent molecule.

        '''

        number_of_names = self.number_of_names() ; unique_names = self.names()
        number_of_resnames = self.number_of_resnames() ; unique_resnames = self.resnames()
        number_of_resids = self.number_of_resids() ; unique_resids = self.resids()
        number_of_chains = self.number_of_chains() ; unique_chains = self.chains()
        number_of_occupancies = self.number_of_occupancies() ; unique_occupancies = self.occupancies()
        number_of_betas = self.number_of_betas() ; unique_betas = self.betas()
        number_of_elements = self.number_of_elements() ; unique_elements = self.elements()
        number_of_segnames = self.number_of_segnames() ; unique_segnames = self.segnames()
        natoms = self.natoms()
        name = self.name() ; resname = self.resname() ; resid = self.resid() ; chain = self.chain()
        occupancy = self.occupancy() ; beta = self.beta() ; element = self.element() ; segname = self.segname()

        names = [] ; resnames = [] ; resids = [] ; chains = []
        occupancies = [] ; betas = [] ; elements = [] ; segments = []

        names_mask = numpy.zeros((number_of_names,natoms),int)
        resnames_mask = numpy.zeros((number_of_resnames,natoms),int)
        resids_mask = numpy.zeros((number_of_resids,natoms),int)
        chains_mask = numpy.zeros((number_of_chains,natoms),int)
        occupancies_mask = numpy.zeros((number_of_occupancies,natoms),int)
        betas_mask = numpy.zeros((number_of_betas,natoms),int)
        elements_mask = numpy.zeros((number_of_elements,natoms),int)
        segnames_mask = numpy.zeros((number_of_segnames,natoms),int)

        nresid = 0

        for i in range(natoms):

            names_mask[unique_names.index(name[i])][i] = 1
            resnames_mask[unique_resnames.index(resname[i])][i] = 1
            resids_mask[unique_resids.index(resid[i])][i] = 1
            chains_mask[unique_chains.index(chain[i])][i] = 1
            occupancies_mask[unique_occupancies.index(occupancy[i])][i] = 1
            betas_mask[unique_betas.index(beta[i])][i] = 1
            elements_mask[unique_elements.index(element[i])][i] = 1
            segnames_mask[unique_segnames.index(segname[i])][i] = 1

        self._names_mask = names_mask
        self._resnames_mask = resnames_mask
        self._resids_mask = resids_mask
        self._chains_mask = chains_mask
        self._occupancies_mask = occupancies_mask
        self._betas_mask = betas_mask
        self._elements_mask = elements_mask
        self._segnames_mask = segnames_mask

        return


[docs]    def check_for_all_zero_columns(self, coor, frame=0):
        '''
            Make sure there are no two all zero columns in coordinates
            This is needed for the alignment module 
        '''
        SMALL = 1.0E-10
        x0,y0,z0=False,False,False
        if sum(coor[frame][:,0]**2)<SMALL:
            x0=True
        if sum(coor[frame][:,1]**2)<SMALL:
            y0=True
        if sum(coor[frame][:,2]**2)<SMALL:
            z0=True
        if x0:
            coor[frame][0][0]=SMALL
        if y0:
            coor[frame][0][1]=SMALL
        if z0:
            coor[frame][0][2]=SMALL

        return

[docs]    def read_pdb(self,filename,**kwargs):
        '''
        This method reads a PDB file.  
        '''
        debug=0
        result=1

        protein_resnames,dna_resnames,rna_resnames,nucleic_resnames,water_resnames = self.get_resnames()

        ### LONG	Need to properly import header information

        # see: http://deposit.rcsb.org/adit/docs/pdb_atom_format.html

        fastread = False
        pdbscan = False
        printme = True
        printme = False

        if 'verbose' in kwargs:
            printme = kwargs['verbose']

        if 'fastread' in kwargs:
            fastread = kwargs['fastread']
            if(fastread):
                printme = False

        if 'pdbscan' in kwargs:
            pdbscan = kwargs['pdbscan']

        with io.open(filename, 'r') as temp_infile:
            infile = temp_infile.readlines()

        if(printme): print('reading filename: ',filename)

        atom=[] ; index=[] ; original_index=[] ; name=[] ; loc=[] ; resname=[] ; chain=[] ; resid=[] ; rescode=[]
        x=[] ; y=[] ; z=[]
        occupancy=[] ; beta=[] ; segname=[] ; element=[] ; charge=[] ; moltype=[] ; conect = {}
        residue_flag = [] ; original_resid=[] ; header = []

        # first check to see how many frames are in the file

        num_model = 0
        num_endmdl = 0
        num_end = 0

        num_frames = 1

        count_index = 0

        num_counts_this_model = 0 # number of atoms encompassed by "MODEL" and "ENDMDL" lines
        num_counts_this_end = 0 # number of atoms before the first "END" line or between two consecutive "END" lines
        num_counts_per_model = [] # array of number of atoms encompassed by "MODEL" and "ENDMDL" lines
        num_counts_per_end = [] # array of number of atoms before the first "END" line or between two consecutive "END" lines
        modelON = False # Flag to indicate that a "MODEL" frame is being read
        for i in range(len(infile)):
            lin=infile[i]
            #lins=string.split(infile[i])
            lins=infile[i].split()
            if (len(lins)==0):
                if ((i+1)==len(infile)):
                    lins=['']
                    if modelON:
                        raise Exception('There should be an ENDMDL pairing with MODEL')
                    else:
                        continue

            #record_name = string.strip(lin[0:6])
            record_name = lin[0:6].strip()
#
### 	OPEN 	need to re-factor the exception statements to a uniform reporting mechanism
#
            try:
                if(lins[0]=='MODEL'):
                    if modelON:
                        raise Exception('Encountered two consecutive MODEL lines')
                    if (num_counts_this_model != 0):
                        raise Exception('There should not be atoms after ENDMDL and before MODEL lines')
                    modelON = True
                elif(lins[0]=='ENDMDL'):
                    if not modelON:
                        raise Exception('Encountered two consecutive ENDMDL lines')
                    modelON = False
                    num_counts_per_model.append(num_counts_this_model)
                    num_counts_this_model = 0
                elif(lins[0]=='END'):
                    num_counts_per_end.append(num_counts_this_end)
                    num_counts_this_end = 0
            except:
                pass
			#
            if((record_name == 'ATOM' or record_name == 'HETATM')):
                count_index += 1
                num_counts_this_model += 1
                num_counts_this_end += 1
		#
		#

        if ( (len(num_counts_per_end)==0) and (len(num_counts_per_model)!=0) ):
            raise Exception('According to Protein Data Bank Contents Guide, END line must appear in each coor entry')
        if (len(num_counts_per_model)!=0 and (len(num_counts_per_end)>1 or sum(num_counts_per_model)!=sum(num_counts_per_end))):
            if(printme): print(num_counts_per_model,num_counts_per_end)
            raise Exception('Only one terminating END line is allowed for pdb entries with multiple MODEL')
		#
        if (len(num_counts_per_model)>0):
            num_frames = len(num_counts_per_model)
            num_atoms = num_counts_per_model[0]
            if not all(x == num_atoms for x in num_counts_per_model):
                raise Exception('number of atoms per frame is not equal')
        elif (len(num_counts_per_end)>0):
            num_frames = len(num_counts_per_end)
            num_atoms = num_counts_per_end[0]
            if not all(x == num_atoms for x in num_counts_per_end):
                raise Exception('number of atoms per frame is not equal')
        elif ( (len(num_counts_per_model)==0) and (len(num_counts_per_end)==0) ):
            num_frames = 1
            num_atoms = num_counts_this_model
        else:
            raise Exception('unexpected error!')

        if(printme): print('num_atoms = ',num_atoms)

        if(printme): print('>>> found ',num_frames,' model(s) or frame(s)')
        this_frame = 1

        true_index = 0

        coor=numpy.zeros((num_frames,num_atoms,3),config.COORD_DTYPE)

        unique_names = [] ; unique_resnames = [] ; unique_resids = [] ; unique_chains = []
        unique_occupancies = [] ; unique_betas = [] ; unique_segnames = [] ; unique_moltypes = []

        for i in range(len(infile)):
            lin=infile[i]
            #lins=string.split(infile[i])
            lins=infile[i].split()

            #record_name = string.strip(lin[0:6])
            record_name = lin[0:6].strip()

            if((record_name == 'ATOM' or record_name == 'HETATM') and this_frame == 1):
                true_index += 1
                #atom.append(string.strip(lin[0:6]))		#	1-6		record name	
                atom.append(lin[0:6].strip())		#	1-6		record name	
                original_index.append(lin[6:11])				#	7-11		atom serial number
                index.append(str(true_index))   	        #   set index so that > 99,999 atoms can be read and counted
                #this_name = string.strip(lin[12:16])		#	13-16		atom name
                this_name = lin[12:16].strip()
                #name.append(string.strip(lin[12:16]))		#	13-16		atom name
                name.append(lin[12:16].strip())		#	13-16		atom name
                if pdbscan:
                    loc.append(lin[16])
                else:
                    loc.append(' ')
                #this_resname = string.strip(lin[17:21])	#	18-20		residue name
                this_resname = lin[17:21].strip()	#	18-20		residue name
                resname.append(lin[17:21].strip())	#	18-20		residue name
                this_chain = lin[21]				#	22		chain identifier
                chain.append(lin[21])				#	22		chain identifier
                this_resid = locale.atoi(lin[22:26])			#	23-26		residue sequence number
                original_resid.append(lin[22:26])			#	23-26		residue sequence number
                resid.append(lin[22:26])			#	23-26		residue sequence number
                rescode.append(lin[26])				#	27		code for insertion of residues
                x.append(lin[30:38])				#	31-38		Real(8.3) X: angstroms	
                y.append(lin[38:46])				#	39-46		Real(8.3) Y: angstroms	
                z.append(lin[46:54])				#	47-54		Real(8.3) Z: angstroms	

                residue_flag.append(False)

                if not pdbscan:

                    try:
                        #occupancy.append(string.strip(lin[54:60]))      #	55-60		occupancy
                        occupancy.append(lin[54:60].strip())      #	55-60		occupancy
                        #this_occupancy =string.strip(lin[54:60])      #	55-60		occupancy
                        this_occupancy =lin[54:60].strip()      #	55-60		occupancy
                        if(occupancy[-1] == ''):
                            occupancy[-1] = "  1.00"
                            this_occupancy[-1] = "  1.00"
                    except:
                        occupancy.append("  0.00")
                        this_occupancy = "  0.00"
                    try:
                        #beta.append(string.strip(lin[60:66]))		#	61-66		temperature factor
                        beta.append(lin[60:66].strip())		#	61-66		temperature factor
                        #this_beta = string.strip(lin[60:66])		#	61-66		temperature factor
                        this_beta = lin[60:66].strip()		#	61-66		temperature factor
                        if(beta[-1] == ''):
                            beta[-1] = "  0.00"
                    except:
                        beta.append("  0.00")
                        this_beta = "  0.00"
                    try:
                        #segname.append(string.strip(lin[72:76]))	#	73-76		segment identifier
                        segname.append(lin[72:76].strip())	#	73-76		segment identifier
                        #this_segname = string.strip(lin[72:76])	#	73-76		segment identifier
                        this_segname = lin[72:76].strip()	#	73-76		segment identifier
                        if(segname[-1] == '' and this_chain !=''):
                            segname[-1] = this_chain
                            this_segname = this_chain
                    except:
                        this_segname = ""
                        segname.append("")
                    try:
                        #element.append(string.strip(lin[76:78]))	#	77-78		element symbol
                        element.append(lin[76:78].strip())	#	77-78		element symbol
                        if(element[-1] == ''):
                            element[-1] = "  "
                    except:
                        element.append("  ")
                    try:
                        #charge.append(string.strip(lin[78:80]))		#	79-80		charge on the atom
                        charge.append(lin[78:80].strip())		#	79-80		charge on the atom
                        if(charge[-1] == ''):
                            charge[-1] = "  "
                    except:
                        charge.append("  ")

                else:
                    #occupancy.append(string.strip(lin[54:60]))      #	55-60		occupancy
                    occupancy.append(lin[54:60].strip())      #	55-60		occupancy
                    #this_occupancy =string.strip(lin[54:60])      #	55-60		occupancy
                    this_occupancy =lin[54:60].strip()      #	55-60		occupancy
                    #beta.append(string.strip(lin[60:66]))		#	61-66		temperature factor
                    beta.append(lin[60:66].strip())		#	61-66		temperature factor
                    #this_beta = string.strip(lin[60:66])		#	61-66		temperature factor
                    this_beta = lin[60:66].strip()		#	61-66		temperature factor
                    #segname.append(string.strip(lin[72:76]))	#	73-76		segment identifier
                    segname.append(lin[72:76].strip())	#	73-76		segment identifier
                    #this_segname = string.strip(lin[72:76])	#	73-76		segment identifier
                    this_segname = lin[72:76].strip()	#	73-76		segment identifier
                    #element.append(string.strip(lin[76:78]))	#	77-78		element symbol
                    element.append(lin[76:78].strip())	#	77-78		element symbol
                    #charge.append(string.strip(lin[78:80]))		#	79-80		charge on the atom
                    charge.append(lin[78:80].strip())		#	79-80		charge on the atom

                if(this_name not in unique_names): unique_names.append(this_name)
                if(this_resname not in unique_resnames): unique_resnames.append(this_resname)
                if(this_resid not in unique_resids): unique_resids.append(this_resid)
                if(this_chain not in unique_chains): unique_chains.append(this_chain)
                if(this_segname not in unique_segnames): unique_segnames.append(this_segname)
                if(this_occupancy not in unique_occupancies): unique_occupancies.append(this_occupancy)
                if(this_beta not in unique_betas): unique_betas.append(this_beta)

                #this_resname=(string.strip(lin[17:21]))
                this_resname=(lin[17:21].strip())
                if this_resname in protein_resnames:
                    moltype.append('protein')
                    this_moltype = 'protein'
                elif this_resname in rna_resnames:
                    moltype.append('rna')
                    this_moltype = 'rna'
                elif this_resname in dna_resnames:
                    moltype.append('dna')
                    this_moltype = 'dna'
                elif this_resname in water_resnames:
                    moltype.append('water')
                    this_moltype = 'water'
                else:
                    moltype.append('other')
                    this_moltype = 'other'

                if(this_moltype not in unique_moltypes): unique_moltypes.append(this_moltype)

                if(true_index == num_atoms):
                    if(printme): print('finished reading frame = ',this_frame)
                    index=numpy.array(index,int)
                    original_index=numpy.array(original_index,int)
                    resid=numpy.array(resid,int)
                    original_resid=numpy.array(original_resid,int)

                    x=numpy.array(x,config.COORD_DTYPE)
                    y=numpy.array(y,config.COORD_DTYPE)
                    z=numpy.array(z,config.COORD_DTYPE)
                    coor[0,:,0]=x ; coor[0,:,1]=y ; coor[0,:,2]=z
                    true_index = 0
                    x=[] ; y=[] ; z=[]
                    if(this_frame == 1):
                        self._atom=atom ; self._index=index  ; self._original_index = original_index ; self._name=name ; self._loc=loc ; self._resname=resname ; self._residue_flag = residue_flag
                        self._chain=chain ; self._resid=resid ; self._rescode=rescode ; self._original_resid=original_resid
                        self._occupancy=occupancy ; self._beta=beta ; self._segname=segname ; self._element=element
                        self._charge=charge ; self._moltype=moltype

                        self._number_of_names = len(unique_names) ; self._names = unique_names
                        self._number_of_resnames = len(unique_resnames) ; self._resnames = unique_resnames
                        self._number_of_resids = len(unique_resids) ; self._resids = unique_resids
                        self._number_of_chains = len(unique_chains) ; self._chains = unique_chains
                        self._number_of_segnames = len(unique_segnames) ; self._segnames = unique_segnames
                        self._number_of_occupancies = len(unique_occupancies) ; self._occupancies = unique_occupancies
                        self._number_of_betas = len(unique_betas) ; self._betas = unique_betas
                        self._number_of_moltypes = len(unique_moltypes) ; self._moltypes = unique_moltypes
                        if(fastread):
                            self._natoms=len(index)
                            break

                    this_frame += 1
            elif((record_name != 'ATOM' or record_name != 'HETATM' and record_name != 'CONECT') and this_frame == 1):
                header.append(lin)

            elif((record_name == 'ATOM' or record_name == 'HETATM') and this_frame > 1):
                true_index += 1
                x.append(lin[30:38])				#	31-38		Real(8.3) X: angstroms	
                y.append(lin[38:46])				#	39-46		Real(8.3) Y: angstroms	
                z.append(lin[46:54])				#	47-54		Real(8.3) Z: angstroms	

                if(true_index == num_atoms):
                    if(printme): print('finished reading frame = ',this_frame)
                    x=numpy.array(x,config.COORD_DTYPE)
                    y=numpy.array(y,config.COORD_DTYPE)
                    z=numpy.array(z,config.COORD_DTYPE)
                    coor[this_frame-1,:,0]=x ; coor[this_frame-1,:,1]=y ; coor[this_frame-1,:,2]=z
                    true_index = 0
                    this_frame += 1
                    x=[] ; y=[] ; z=[]

            elif ((record_name == 'CONECT') and pdbscan):
                # Format of CONECT line:
                # Record name CONECT followed by list of atom indexes in 6 
                # character columns. First is the base atom, following atoms 
                # are those connected to it. 
                # Input line is filtered to ignore blank columns.
                ndxs = [int(lin[i:i+5]) for i in range(6, len(lin), 5) if lin[i:i+5].strip()]
                conect[ndxs[0]] = ndxs[1:]

        if(((this_frame - 1) != num_frames) and not fastread):
            if(printme): print('>>> WARNING: pdb file had ',num_frames,' file_io read ',this_frame-1,' frames')

        self._coor=numpy.array(coor)

        if 'check_zero_coor' in kwargs:
            self.check_for_all_zero_columns(self._coor)

        unique_elements = self.element_filter()

        self._number_of_elements = len(unique_elements) ; self._elements = unique_elements

        self._natoms=len(index)

        total_mass = self.calculate_mass()
        error = []

        if 'saspdbrx_topology' in kwargs:
            if kwargs['saspdbrx_topology']:
                error = self.check_charmm_atomic_order_reorganize()
                return error

        self._header = header
        self._conect = conect

        return 

[docs]    def create_conect_pdb_lines(self):
        """
            Output stored conect information in PDB record format.
            Indices are converted from those read in - original_index - to 
            those used in output.
            Format of CONECT line:
            Record name CONECT followed by list of atom indexes in 6 character 
            columns. First is the base atom, following atoms are those 
            connected to it.
        """

        original_conect = self.conect()
        original_indexs = self.original_index()
        indexs = self.index()

        # Convert from original to current indexing
        convert_index = dict(list(zip(original_indexs, indexs)))

        new_conect = {}

        for base, linked in original_conect.items():

            new_base = convert_index[base]
            new_linked = []

            for index in linked:
                new_linked.append(convert_index[index])

                new_conect[new_base] = new_linked

        # Output should be in the order of the atom indexes
        ordered_keys = sorted(new_conect.keys())

        conect_lines = []

        for base in ordered_keys:
            ndxs = ''.join([str(i).rjust(5) for i in new_conect[base]])
            conect_lines.append('CONECT' + str(base).rjust(5) + ndxs)

        return conect_lines
Source code for sasmol.pdb_io

sasmol

Navigation

Related Topics