Source code for sasmol.topology

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# from __future__ import unicode_literals

#   SASMOL: Copyright (C) 2011 Joseph E. Curtis, Ph.D.
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# TOPOLOGY
#
# 1/26/2012	--	initial coding				: jc
# 12/26/2015	--	refactored for release      : jc
# 08/18/2016	--	documentation               : jc
#
# 1         2         3         4         5         6         7
# LC4567890123456789012345678901234567890123456789012345678901234567890123456789
# *      **

import numpy
import sasmol.charmm_topology as charmm_topology
import sasmol.properties as properties


[docs]class Topology(charmm_topology.CharmmTopology):

    '''
    This class contains topology information used other modules.

    '''

    def __init__(self):
        pass

[docs]    def renumber(self, **kwargs):
        '''
        Method to renumber index and resid fields

        Parameters
        ----------
        kwargs 
            index :
            resid :

        Returns
        -------
        None
            updated system object 
                index
                resid

        Examples
        -------

        >>> import sasmol.system as system
        >>> molecule = system.Molecule('hiv1_gag.pdb')
        >>> molecule.index()[0]
        1
        >>> molecule.resid()[0]
        1

        default renumber() with no arguments renumbers
        both index and resid starting at 1 

        >>> molecule.renumber()
        >>> molecule.index()[0]
        1
        >>> molecule.resid()[0]
        1

        passing a kwarg with a value will renumber appropriate
        attribute with value passed

        >>> molecule.renumber(index=3)
        >>> molecule.index()[0]
        3
        >>> molecule.resid()[0]
        1

        >>> molecule.renumber(resid=8)
        >>> molecule.index()[0]
        3
        >>> molecule.resid()[0]
        8

        >>> molecule.renumber(index=223, resid=18)
        >>> molecule.index()[0]
        223
        >>> molecule.resid()[0]
        18

        '''

        renumber_index_flag = False
        renumber_index_start = 1

        renumber_resid_flag = False
        renumber_resid_start = 1

        if len(kwargs) == 0:
            renumber_index_flag = True
            renumber_resid_flag = True

        try:
            if kwargs['index'] is not None:
                renumber_index_flag = True
                renumber_index_start = kwargs['index']
        except:
            pass

        try:
            if kwargs['resid'] is not None:
                renumber_resid_flag = True
                renumber_resid_start = kwargs['resid']
        except:
            pass

        if renumber_index_flag:
            start = renumber_index_start
            index = numpy.array(
                [x for x in range(start, start + self._natoms)])
            self._index = index

        if renumber_resid_flag:
            resid = self._resid
            resid_array = []
            count = renumber_resid_start
            for i in range(len(resid)):
                this_resid = resid[i]
                if (i == 0):
                    last_resid = this_resid
                else:
                    if (this_resid != last_resid):
                        count += 1
                resid_array.append(count)
                last_resid = this_resid

            self._resid = numpy.array(resid_array, int)

        return

[docs]    def make_constraint_pdb(self, filename, basis_type, **kwargs):
        ''' 

        Method to rename attribute fields and assign values
        of 1.00 for the given basis type.

        "beta" is the default attribute field to reassign

        Default usage sets all attribute fields to zero
        prior to re-assigning basis type atoms to 1.00

        Parameters
        ----------
        filename    string : name of output PDB file to write 

        basis types: 

            'heavy : all atoms except hydrogen

            'protein' : all atoms in moltype protein

            'nucleic' : all atoms in moltype nucleic

            'backbone'
                -> proteins: N, CA, C, O

            'solute'    : all protein and nucleic


        kwargs 
            optional arguments

                field='beta' : write 1.00 to beta field

                field='occupancy' : write 1.00 to occupancy field

                reset=True : set all values to 0.00 before setting
                                 individual vaules to 1.00

                reset=False: only change desired values to 1.00
                                 ignoring all other values

        Returns
        -------
        None
            updated system object
            writes pdb to disk

        Examples
        --------
        >>> import sasmol.system as system
        >>> molecule = system.Molecule('hiv1_gag.pdb')
        >>> molecule.make_constraint_pdb('constrainted_backbone_hiv1_gag.pdb', 'backbone')

        >>> molecule.make_constraint_pdb('constrainted_backbone_occupancy_hiv1_gag.pdb', 'backbone', field='occupancy')

        constrtain all heavy atoms

        >>> molecule.make_constraint_pdb('constrainted_heavy_hiv1_gag.pdb', 'heavy')

        constrain all protein atoms

        >>> molecule.make_constraint_pdb('constrainted_protein_hiv1_gag.pdb', 'protein')

        constrain all non-solvent atoms

        >>> molecule.make_constraint_pdb('constrainted_solute_hiv1_gag.pdb', 'solute')

        Note
        ----

        Only moltype `protein` and `nucleic` are supported.

        Output PDB file will contain coordinates from frame 0

        '''

        try:
            field = kwargs['field']
        except:
            field = 'beta'

        try:
            reset = kwargs['reset']
        except:
            reset = True

        if reset:
            new_value = ['0.00' for x in range(self._natoms)]
            if field == 'beta':
                self._beta = new_value
            elif field == 'occupancy':
                self._occupancy = new_value

        if basis_type == 'backbone':
            basis_string = "name[i] == 'N' or name[i] == 'CA' or name[i] == 'C' or name[i] == 'O'"

        elif basis_type == 'heavy':
            basis_string = "name[i][0] != 'H'"

        elif basis_type == 'protein':
            basis_string = "moltype[i] == 'protein'"

        elif basis_type == 'nucleic':
            basis_string = "moltype[i] == 'nucleic'"

        elif basis_type == 'solute':
            basis_string = "moltype[i] == 'protein' or moltype[i] == 'nucleic'"

        error, mask = self.get_subset_mask(basis_string)

        if len(error) > 0:
            print('error = ', error)
            return

        if field == 'beta':
            descriptor = self._beta
        elif field == 'occupancy':
            descriptor = self._occupancy

        error = self.set_descriptor_using_mask(mask, descriptor, '1.00')

        if len(error) > 0:
            print('error = ', error)
            return

        if field == 'beta':
            self._beta = descriptor
        elif field == 'occupancy':
            self._occupancy = descriptor

        self.write_pdb(filename, 0, 'w')

        return

[docs]    def make_backbone_pdb_from_fasta(self, filename, moltype, **kwargs):
        """
        Method to write a PDB file of one atom per residue
        based on input FASTA sequence

        http://en.wikipedia.org/wiki/FASTA_format

        Parameters
        ----------
        filename    string : name of output PDB file to write 

        moltype string
                -> 'protein' ; fasta sequence of a protein 
                -> 'nucleic' ; fasta sequence of a nucleic acid 

        kwargs 
            optional future arguments

        Returns
        -------
        self._fasta
            sets the fasta attribute in a system object 

        Examples
        -------

        >>> import sasmol.system as system
        >>> molecule = system.Molecule("hiv1_gag.pdb")
        >>> molecule.create_fasta()
        >>> molecule.fasta()[:5]
        ['G', 'A', 'R', 'A', 'S'] 

        >>> molecule.make_backbone_pdb_from_fasta('sequence.pdb', 'protein')


        Note
        ____

        Only protein and nucleic acid one-letter codes are supported
        Protein PDB files are written with one "CA" atom per residue
        Nucleic acid PDF files are written with on "O5'" atom per residue

        Protein PDB residue names are written as standard residue names; terminal
        patch names such as GLYP and PROP belong in topology/psfgen contexts.

        All coordinate values are set to 0.000 

        """

        if moltype == "protein":
            residue_dictionary = self.one_to_three_letter_protein_residue_dictionary
            sequence_name = "CA"

        elif moltype == "nucleic":
            residue_dictionary = self.one_to_three_letter_nucleic_residue_dictionary
            sequence_name = "O5'"

        sequence = []
        first_flag = True

        for residue in self._fasta:

            if moltype == "protein":
                if residue == 'G' and first_flag:
                    sequence.append('GLYP')
                elif residue == 'P' and first_flag:
                    sequence.append('PROP')
                else:
                    sequence.append(residue_dictionary[residue])

            elif moltype == "nucleic":
                sequence.append(residue_dictionary[residue])

            first_flag = False

        import sasmol.system as system

        molecule = system.Molecule_Maker(len(sequence), name=sequence_name)

        molecule._resname = sequence

        molecule.write_pdb(filename, 0, 'w')

        return

[docs]    def create_fasta(self, **kwargs):
        """
        Method to make a fasta file compatible lists of residue names

        http://en.wikipedia.org/wiki/FASTA_format

        Parameters
        ----------
        kwargs 
            optional future arguments

        Returns
        -------
        self._fasta
            sets the fasta attribute in a system object 

        Examples
        -------

        >>> import sasmol.system as system
        >>> molecule = system.Molecule("hiv1_gag.pdb")
        >>> molecule.create_fasta()
        >>> print(molecule.fasta()[:5]) # doctest: +NORMALIZE_WHITESPACE
        ['G', 'A', 'R', 'A', 'S'] 

        >>> molecule.create_fasta(fasta_format=True)
        >>> print(molecule.fasta()[:5])
        >
        GAR
        >>> molecule.create_fasta(fasta_format=True,width='60')
        >>> print(molecule.fasta()[:-1])
        >
        GARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQIL
        GQLQPSLQTGSEELRSLYNTIAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKAQQAAAD
        TGNNSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATP
        QDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTS
        TLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFY
        KTLRAEQASQEVKNAATETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKAR
        VIAEAMSQVTNSATIMMQKGNFRNQRKTVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGH
        QMKDCTERQAN

        >>> molecule.create_fasta(fasta_format=True,width='60',name='aar')
        >>> print(molecule.fasta()[:90])
        >aar
        GARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQIL
        GQLQPSLQTGSEELRSLYNTIAVL


        Note
        ----
        Other use types below

        molecule.create_fasta(fasta_format=True,exclude_hetatm=True,by_chain=True)
        print(molecule.fasta())

        print('>>> testing by_chain with HETATM (t.py): ')

        molecule.create_fasta(fasta_format=True,by_chain=True)
        print(molecule.fasta())

        print('>>> testing by_segname (t.py): ')

        molecule.create_fasta(fasta_format=True,exclude_hetatm=True,by_segname=True)
        print(molecule.fasta())

        Note that this creates a simple string that is associated with the molecule (self)
        and it will return without assigning a string if a non-standard three letter code
        is supplied.

        """

        fasta_format = False
        by_chain = False
        by_segname = False
        single_chain = False
        single_segname = False
        exclude_hetatm = False

        if 'fasta_format' in kwargs:
            fasta_format = True

        if 'name' in kwargs:
            name = kwargs['name']
            header = '>' + name
        else:
            header = '>'

        if 'width' in kwargs:
            width = kwargs['width']
        else:
            width = '80'

        if 'exclude_hetatm' in kwargs:
            exclude_hetatm = True

        if 'by_chain' in kwargs:
            by_chain = True

        elif 'by_segname' in kwargs:
            by_segname = True

        one_resname = []

        resname = self.resname()
        atom = self.atom()
        chain = self.chain()
        segname = self.segname()

        residue_dictionary = {
            'ALA': 'A',
            'ARG': 'R',
            'ASP': 'D',
            'ASN': 'N',
            'CYS': 'C',
            'GLU': 'E',
            'GLN': 'Q',
            'GLY': 'G',
            'HSD': 'H',
            'HIS': 'H',
            'HSE': 'H',
            'HSP': 'H',
            'ILE': 'I',
            'LEU': 'L',
            'LYS': 'K',
            'MET': 'M',
            'PHE': 'F',
            'PRO': 'P',
            'SER': 'S',
            'THR': 'T',
            'TRP': 'W',
            'TYR': 'Y',
            'VAL': 'V',
            'GUA': 'G',
            'CYT': 'C',
            'ADE': 'A',
            'THY': 'T',
            'URA': 'U'
        }

        for i in range(len(resname)):
            this_resname = resname[i]
            if this_resname in residue_dictionary:
                one_resname.append(residue_dictionary[this_resname])
            elif (atom[i] == 'HETATM'):
                # print 'skipping non-standard resname in HETATM record:
                # ',this_resname
                one_resname.append("X")
            else:
                print('non standard resname: ', this_resname)
                print('unable to make Fasta conversion')
                return

        self.setOne_letter_resname(one_resname)

        resid = self.resid()

        last_resid = None
        last_chain = None
        last_segname = None
        fasta = []
        local_fasta = []
        number_of_chains = 0
        number_of_segnames = 0
        chain_name = []
        segname_name = []
        first = True
        for i in range(len(resid)):
            this_resid = resid[i]

            if by_chain:
                this_chain = chain[i]
                if this_chain != last_chain:
                    number_of_chains += 1
                    chain_name.append(this_chain)
                    if first:
                        first = False
                    else:
                        fasta.append(local_fasta)

                    last_chain = this_chain
                    local_fasta = []

            if by_segname:
                this_segname = segname[i]
                if this_segname != last_segname:
                    number_of_segnames += 1
                    segname_name.append(this_segname)
                    if first:
                        first = False
                    else:
                        fasta.append(local_fasta)

                    last_segname = this_segname
                    local_fasta = []

            this_resname = self.one_letter_resname()[i]
            if this_resid != last_resid:
                local_fasta.append(this_resname)
                last_resid = this_resid

        if by_chain or by_segname:
            fasta.append(local_fasta)

        elif not by_chain and not by_segname:
            fasta = local_fasta
            number_of_chains = 1
            chain_name = chain[0]

        final_fasta = ''

        if by_segname:
            number_of_chains = number_of_segnames

        for i in range(number_of_chains):
            saveme = False
            if fasta_format:
                from textwrap import TextWrapper
                wrapper = TextWrapper(width=int(width))
                if by_chain or by_segname:
                    if exclude_hetatm:
                        while "X" in fasta[i]:
                            fasta[i].remove("X")

                    joined_fasta = ''.join(fasta[i])
                else:
                    if exclude_hetatm:
                        while "X" in fasta:
                            fasta.remove("X")
                    joined_fasta = ''.join(fasta)

                formatted_fasta = "\n".join(wrapper.wrap(joined_fasta))

                if (len(formatted_fasta.strip()) > 0):
                    saveme = True

                if (len(header) > 1):
                    if by_chain:
                        formatted_fasta = header + ' chain:' + \
                            chain_name[i] + '\n' + formatted_fasta
                    elif by_segname:
                        formatted_fasta = header + ' segname:' + \
                            segname_name[i] + '\n' + formatted_fasta
                    else:
                        formatted_fasta = header + '\n' + formatted_fasta

                else:
                    if by_chain:
                        formatted_fasta = header + 'chain:' + \
                            chain_name[i] + '\n' + formatted_fasta
                    elif by_segname:
                        formatted_fasta = header + 'segname:' + \
                            segname_name[i] + '\n' + formatted_fasta
                    else:
                        formatted_fasta = header + '\n' + formatted_fasta

                if saveme:
                    final_fasta += formatted_fasta + '\n'
                else:
                    final_fasta += '\n'

        if fasta_format:
            self.setFasta(final_fasta)
        else:
            self.setFasta(fasta)

        return


if __name__ == "__main__":
    import doctest
    # doctest.testmod(verbose=True)
    doctest.testmod()
Source code for sasmol.topology

sasmol

Navigation

Related Topics