Source code for sasmol.topology

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# from __future__ import unicode_literals

#   SASMOL: Copyright (C) 2011 Joseph E. Curtis, Ph.D.
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# TOPOLOGY
#
# 1/26/2012	--	initial coding				: jc
# 12/26/2015	--	refactored for release      : jc
# 08/18/2016	--	documentation               : jc
#
# 1         2         3         4         5         6         7
# LC4567890123456789012345678901234567890123456789012345678901234567890123456789
# *      **

import numpy
import sasmol.charmm_topology as charmm_topology
import sasmol.properties as properties


[docs]class Topology(charmm_topology.CharmmTopology): ''' This class contains topology information used other modules. ''' def __init__(self): pass
[docs] def renumber(self, **kwargs): ''' Method to renumber index and resid fields Parameters ---------- kwargs index : resid : Returns ------- None updated system object index resid Examples ------- >>> import sasmol.system as system >>> molecule = system.Molecule('hiv1_gag.pdb') >>> molecule.index()[0] 1 >>> molecule.resid()[0] 1 default renumber() with no arguments renumbers both index and resid starting at 1 >>> molecule.renumber() >>> molecule.index()[0] 1 >>> molecule.resid()[0] 1 passing a kwarg with a value will renumber appropriate attribute with value passed >>> molecule.renumber(index=3) >>> molecule.index()[0] 3 >>> molecule.resid()[0] 1 >>> molecule.renumber(resid=8) >>> molecule.index()[0] 3 >>> molecule.resid()[0] 8 >>> molecule.renumber(index=223, resid=18) >>> molecule.index()[0] 223 >>> molecule.resid()[0] 18 ''' renumber_index_flag = False renumber_index_start = 1 renumber_resid_flag = False renumber_resid_start = 1 if len(kwargs) == 0: renumber_index_flag = True renumber_resid_flag = True try: if kwargs['index'] is not None: renumber_index_flag = True renumber_index_start = kwargs['index'] except: pass try: if kwargs['resid'] is not None: renumber_resid_flag = True renumber_resid_start = kwargs['resid'] except: pass if renumber_index_flag: start = renumber_index_start index = numpy.array( [x for x in range(start, start + self._natoms)]) self._index = index if renumber_resid_flag: resid = self._resid resid_array = [] count = renumber_resid_start for i in range(len(resid)): this_resid = resid[i] if (i == 0): last_resid = this_resid else: if (this_resid != last_resid): count += 1 resid_array.append(count) last_resid = this_resid self._resid = numpy.array(resid_array, int) return
[docs] def make_constraint_pdb(self, filename, basis_type, **kwargs): ''' Method to rename attribute fields and assign values of 1.00 for the given basis type. "beta" is the default attribute field to reassign Default usage sets all attribute fields to zero prior to re-assigning basis type atoms to 1.00 Parameters ---------- filename string : name of output PDB file to write basis types: 'heavy : all atoms except hydrogen 'protein' : all atoms in moltype protein 'nucleic' : all atoms in moltype nucleic 'backbone' -> proteins: N, CA, C, O 'solute' : all protein and nucleic kwargs optional arguments field='beta' : write 1.00 to beta field field='occupancy' : write 1.00 to occupancy field reset=True : set all values to 0.00 before setting individual vaules to 1.00 reset=False: only change desired values to 1.00 ignoring all other values Returns ------- None updated system object writes pdb to disk Examples -------- >>> import sasmol.system as system >>> molecule = system.Molecule('hiv1_gag.pdb') >>> molecule.make_constraint_pdb('constrainted_backbone_hiv1_gag.pdb', 'backbone') >>> molecule.make_constraint_pdb('constrainted_backbone_occupancy_hiv1_gag.pdb', 'backbone', field='occupancy') constrtain all heavy atoms >>> molecule.make_constraint_pdb('constrainted_heavy_hiv1_gag.pdb', 'heavy') constrain all protein atoms >>> molecule.make_constraint_pdb('constrainted_protein_hiv1_gag.pdb', 'protein') constrain all non-solvent atoms >>> molecule.make_constraint_pdb('constrainted_solute_hiv1_gag.pdb', 'solute') Note ---- Only moltype `protein` and `nucleic` are supported. Output PDB file will contain coordinates from frame 0 ''' try: field = kwargs['field'] except: field = 'beta' try: reset = kwargs['reset'] except: reset = True if reset: new_value = ['0.00' for x in range(self._natoms)] if field == 'beta': self._beta = new_value elif field == 'occupancy': self._occupancy = new_value if basis_type == 'backbone': basis_string = "name[i] == 'N' or name[i] == 'CA' or name[i] == 'C' or name[i] == 'O'" elif basis_type == 'heavy': basis_string = "name[i][0] != 'H'" elif basis_type == 'protein': basis_string = "moltype[i] == 'protein'" elif basis_type == 'nucleic': basis_string = "moltype[i] == 'nucleic'" elif basis_type == 'solute': basis_string = "moltype[i] == 'protein' or moltype[i] == 'nucleic'" error, mask = self.get_subset_mask(basis_string) if len(error) > 0: print('error = ', error) return if field == 'beta': descriptor = self._beta elif field == 'occupancy': descriptor = self._occupancy error = self.set_descriptor_using_mask(mask, descriptor, '1.00') if len(error) > 0: print('error = ', error) return if field == 'beta': self._beta = descriptor elif field == 'occupancy': self._occupancy = descriptor self.write_pdb(filename, 0, 'w') return
[docs] def make_backbone_pdb_from_fasta(self, filename, moltype, **kwargs): """ Method to write a PDB file of one atom per residue based on input FASTA sequence http://en.wikipedia.org/wiki/FASTA_format Parameters ---------- filename string : name of output PDB file to write moltype string -> 'protein' ; fasta sequence of a protein -> 'nucleic' ; fasta sequence of a nucleic acid kwargs optional future arguments Returns ------- self._fasta sets the fasta attribute in a system object Examples ------- >>> import sasmol.system as system >>> molecule = system.Molecule("hiv1_gag.pdb") >>> molecule.create_fasta() >>> molecule.fasta()[:5] ['G', 'A', 'R', 'A', 'S'] >>> molecule.make_backbone_pdb_from_fasta('sequence.pdb', 'protein') Note ____ Only protein and nucleic acid one-letter codes are supported Protein PDB files are written with one "CA" atom per residue Nucleic acid PDF files are written with on "O5'" atom per residue Protein PDB residue names are written as standard residue names; terminal patch names such as GLYP and PROP belong in topology/psfgen contexts. All coordinate values are set to 0.000 """ if moltype == "protein": residue_dictionary = self.one_to_three_letter_protein_residue_dictionary sequence_name = "CA" elif moltype == "nucleic": residue_dictionary = self.one_to_three_letter_nucleic_residue_dictionary sequence_name = "O5'" sequence = [] first_flag = True for residue in self._fasta: if moltype == "protein": if residue == 'G' and first_flag: sequence.append('GLYP') elif residue == 'P' and first_flag: sequence.append('PROP') else: sequence.append(residue_dictionary[residue]) elif moltype == "nucleic": sequence.append(residue_dictionary[residue]) first_flag = False import sasmol.system as system molecule = system.Molecule_Maker(len(sequence), name=sequence_name) molecule._resname = sequence molecule.write_pdb(filename, 0, 'w') return
[docs] def create_fasta(self, **kwargs): """ Method to make a fasta file compatible lists of residue names http://en.wikipedia.org/wiki/FASTA_format Parameters ---------- kwargs optional future arguments Returns ------- self._fasta sets the fasta attribute in a system object Examples ------- >>> import sasmol.system as system >>> molecule = system.Molecule("hiv1_gag.pdb") >>> molecule.create_fasta() >>> print(molecule.fasta()[:5]) # doctest: +NORMALIZE_WHITESPACE ['G', 'A', 'R', 'A', 'S'] >>> molecule.create_fasta(fasta_format=True) >>> print(molecule.fasta()[:5]) > GAR >>> molecule.create_fasta(fasta_format=True,width='60') >>> print(molecule.fasta()[:-1]) > GARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQIL GQLQPSLQTGSEELRSLYNTIAVLYCVHQRIDVKDTKEALDKIEEEQNKSKKKAQQAAAD TGNNSQVSQNYPIVQNLQGQMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATP QDLNTMLNTVGGHQAAMQMLKETINEEAAEWDRVHPVHAGPIAPGQMREPRGSDIAGTTS TLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPTSILDIRQGPKEPFRDYVDRFY KTLRAEQASQEVKNAATETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKAR VIAEAMSQVTNSATIMMQKGNFRNQRKTVKCFNCGKEGHIAKNCRAPRKKGCWKCGKEGH QMKDCTERQAN >>> molecule.create_fasta(fasta_format=True,width='60',name='aar') >>> print(molecule.fasta()[:90]) >aar GARASVLSGGELDKWEKIRLRPGGKKQYKLKHIVWASRELERFAVNPGLLETSEGCRQIL GQLQPSLQTGSEELRSLYNTIAVL Note ---- Other use types below molecule.create_fasta(fasta_format=True,exclude_hetatm=True,by_chain=True) print(molecule.fasta()) print('>>> testing by_chain with HETATM (t.py): ') molecule.create_fasta(fasta_format=True,by_chain=True) print(molecule.fasta()) print('>>> testing by_segname (t.py): ') molecule.create_fasta(fasta_format=True,exclude_hetatm=True,by_segname=True) print(molecule.fasta()) Note that this creates a simple string that is associated with the molecule (self) and it will return without assigning a string if a non-standard three letter code is supplied. """ fasta_format = False by_chain = False by_segname = False single_chain = False single_segname = False exclude_hetatm = False if 'fasta_format' in kwargs: fasta_format = True if 'name' in kwargs: name = kwargs['name'] header = '>' + name else: header = '>' if 'width' in kwargs: width = kwargs['width'] else: width = '80' if 'exclude_hetatm' in kwargs: exclude_hetatm = True if 'by_chain' in kwargs: by_chain = True elif 'by_segname' in kwargs: by_segname = True one_resname = [] resname = self.resname() atom = self.atom() chain = self.chain() segname = self.segname() residue_dictionary = { 'ALA': 'A', 'ARG': 'R', 'ASP': 'D', 'ASN': 'N', 'CYS': 'C', 'GLU': 'E', 'GLN': 'Q', 'GLY': 'G', 'HSD': 'H', 'HIS': 'H', 'HSE': 'H', 'HSP': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V', 'GUA': 'G', 'CYT': 'C', 'ADE': 'A', 'THY': 'T', 'URA': 'U' } for i in range(len(resname)): this_resname = resname[i] if this_resname in residue_dictionary: one_resname.append(residue_dictionary[this_resname]) elif (atom[i] == 'HETATM'): # print 'skipping non-standard resname in HETATM record: # ',this_resname one_resname.append("X") else: print('non standard resname: ', this_resname) print('unable to make Fasta conversion') return self.setOne_letter_resname(one_resname) resid = self.resid() last_resid = None last_chain = None last_segname = None fasta = [] local_fasta = [] number_of_chains = 0 number_of_segnames = 0 chain_name = [] segname_name = [] first = True for i in range(len(resid)): this_resid = resid[i] if by_chain: this_chain = chain[i] if this_chain != last_chain: number_of_chains += 1 chain_name.append(this_chain) if first: first = False else: fasta.append(local_fasta) last_chain = this_chain local_fasta = [] if by_segname: this_segname = segname[i] if this_segname != last_segname: number_of_segnames += 1 segname_name.append(this_segname) if first: first = False else: fasta.append(local_fasta) last_segname = this_segname local_fasta = [] this_resname = self.one_letter_resname()[i] if this_resid != last_resid: local_fasta.append(this_resname) last_resid = this_resid if by_chain or by_segname: fasta.append(local_fasta) elif not by_chain and not by_segname: fasta = local_fasta number_of_chains = 1 chain_name = chain[0] final_fasta = '' if by_segname: number_of_chains = number_of_segnames for i in range(number_of_chains): saveme = False if fasta_format: from textwrap import TextWrapper wrapper = TextWrapper(width=int(width)) if by_chain or by_segname: if exclude_hetatm: while "X" in fasta[i]: fasta[i].remove("X") joined_fasta = ''.join(fasta[i]) else: if exclude_hetatm: while "X" in fasta: fasta.remove("X") joined_fasta = ''.join(fasta) formatted_fasta = "\n".join(wrapper.wrap(joined_fasta)) if (len(formatted_fasta.strip()) > 0): saveme = True if (len(header) > 1): if by_chain: formatted_fasta = header + ' chain:' + \ chain_name[i] + '\n' + formatted_fasta elif by_segname: formatted_fasta = header + ' segname:' + \ segname_name[i] + '\n' + formatted_fasta else: formatted_fasta = header + '\n' + formatted_fasta else: if by_chain: formatted_fasta = header + 'chain:' + \ chain_name[i] + '\n' + formatted_fasta elif by_segname: formatted_fasta = header + 'segname:' + \ segname_name[i] + '\n' + formatted_fasta else: formatted_fasta = header + '\n' + formatted_fasta if saveme: final_fasta += formatted_fasta + '\n' else: final_fasta += '\n' if fasta_format: self.setFasta(final_fasta) else: self.setFasta(fasta) return
if __name__ == "__main__": import doctest # doctest.testmod(verbose=True) doctest.testmod()