Source code for chisurf.fio.structure.coordinates

"""Read PDB files.

PDB files contain atomic coordinates.

`PDB <http://www.wwpdb.org/documentation/file-format>`_

:Author:
  `Thomas-Otavio Peulen <http://tpeulen.github.io>`_

Requirements
------------


Revisions
---------

Notes
-----
The API is not stable yet and might change between revisions.

References
----------

Examples
--------

"""

from __future__ import annotations

import os
import urllib.request
import numpy as np

import chisurf
import chisurf.common

keys_formats = [
    ('i', 'i4'),
    ('chain', '|U1'),
    ('res_id', 'i4'),
    ('res_name', '|U5'),
    ('atom_id', 'i4'),
    ('atom_name', '|U5'),
    ('element', '|U1'),
    ('xyz', '3f8'),
    ('charge', 'f8'),
    ('radius', 'f8'),
    ('bfactor', 'f8'),
    ('mass', 'f8')
]

keys, formats = list(zip(*keys_formats))


[docs]def fetch_pdb_string( pdb_id: str ) -> str: """Downloads from the RCSB a PDB file with the specified PDB-ID :param pdb_id: The PDB-ID that is downloaded :param get_binary: If get_binary is True a binary string is returned. :return: """ url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdb_id[:4] binary = urllib.request.urlopen(url).read() return binary.decode("utf-8")
[docs]def fetch_pdb( pdb_id: str, **kwargs ): st = fetch_pdb_string(pdb_id) return parse_string_pdb(st, **kwargs)
[docs]def assign_element_to_atom_name( atom_name: str ): """Tries to guess element from atom name if not recognised. :param atom_name: string Examples -------- >>> assign_element_to_atom_name('CA') C """ element = atom_name if atom_name.upper() not in chisurf.common.atom_weights: # Inorganic elements have their name shifted left by one position # (is a convention in PDB, but not part of the standard). # isdigit() check on last two characters to avoid mis-assignment of # hydrogens atoms (GLN HE21 for example) # Hs may have digit in [0] putative_element = atom_name[1] if atom_name[0].isdigit() else \ atom_name[0] if putative_element.capitalize() in chisurf.common.atom_weights.keys(): element = putative_element return element
[docs]def parse_string_pdb( string: str, assign_charge: bool = False, verbose: bool = chisurf.verbose ): """ :param string: :param assign_charge: :param verbose: :return: """ rows = string.splitlines() atoms = np.zeros( len(rows), dtype={ 'names': keys, 'formats': formats } ) ni = 0 for line in rows: if verbose: print(line) if line.startswith('ATOM'): atom_name = line[12:16].strip().upper() atoms['i'][ni] = ni atoms['chain'][ni] = line[21] atoms['res_name'][ni] = line[17:20].strip().upper() atoms['atom_name'][ni] = atom_name atoms['res_id'][ni] = line[22:26] atoms['atom_id'][ni] = line[6:11] atoms['xyz'][ni][0] = line[30:38] atoms['xyz'][ni][1] = line[38:46] atoms['xyz'][ni][2] = line[46:54] atoms['bfactor'][ni] = line[60:65] atoms['element'][ni] = assign_element_to_atom_name(atom_name) try: if assign_charge: if atoms['res_name'][ni] in chisurf.common.CHARGE_DICT: if atoms['atom_name'][ni] == chisurf.common.TITR_ATOM_COARSE[atoms['res_name'][ni]]: atoms['charge'][ni] = chisurf.common.CHARGE_DICT[ atoms['res_name'][ni] ] atoms['mass'][ni] = chisurf.common.atom_weights[atoms['element'][ni]] atoms['radius'][ni] = chisurf.common.VDW_DICT[atoms['element'][ni]] except KeyError: print("Cloud not assign parameters to: %s" % line) ni += 1 atoms = atoms[:ni] if verbose: print("Number of atoms: %s" % (ni + 1)) return atoms
[docs]def read( filename: str, assign_charge: bool = False, verbose: bool = chisurf.verbose, **kwargs ) -> np.ndarray: """ Open pdb_file and read each line into pdb (a list of lines) :param filename: :param assign_charge: :return: numpy structured array containing the PDB info and VdW-radii and charges Examples -------- >>> import chisurf.fio >>> pdb_file = './test/data/atomic_coordinates/pdb_files/hGBP1_closed.pdb' >>> pdb = chisurf.fio.structure.coordinates.read(pdb_file, verbose=True) >>> pdb[:5] array([ (0, ' ', 7, 'MET', 1, 'N', 'N', [72.739, -17.501, 8.879], 0.0, 1.65, 0.0, 14.0067), (1, ' ', 7, 'MET', 2, 'CA', 'C', [73.841, -17.042, 9.747], 0.0, 1.76, 0.0, 12.0107), (2, ' ', 7, 'MET', 3, 'C', 'C', [74.361, -18.178, 10.643], 0.0, 1.76, 0.0, 12.0107), (3, ' ', 7, 'MET', 4, 'O', 'O', [73.642, -18.708, 11.489], 0.0, 1.4, 0.0, 15.9994), (4, ' ', 7, 'MET', 5, 'CB', 'C', [73.384, -15.89, 10.649], 0.0, 1.76, 0.0, 12.0107)], dtype=[('i', '<i4'), ('chain', 'S1'), ('res_id', '<i4'), ('res_name', 'S5'), ('atom_id', '<i4'), ('atom_name', 'S5 '), ('element', 'S1'), ('xyz', '<f8', (3,)), ('charge', '<f8'), ('radius', '<f8'), ('bfactor', '<f8'), ('mass', '<f8') ]) """ if os.path.isfile(filename): with chisurf.fio.zipped.open_maybe_zipped( filename=filename, mode='r' ) as f: string = f.read() if verbose: path, baseName = os.path.split(filename) print("======================================") print("Filename: %s" % filename) print("Path: %s" % path) fn1, ext1 = os.path.splitext(filename) _, ext2 = os.path.splitext(fn1) if '.pdb' in [ext1, ext2]: atoms = parse_string_pdb(string, assign_charge, **kwargs) else: # elif filename.endswith('.pqr'): atoms = parse_string_pqr(string, **kwargs) return atoms else: return np.zeros(0, dtype={'names': keys, 'formats': formats})
[docs]def write_pdb( filename: str, atoms=None, append_model: bool = False, append_coordinates: bool = False, verbose: bool = False ): """ Writes a structured numpy array containing the PDB-info to a PDB-file If append_model and append_coordinates are False the file is overwritten. Otherwise the atomic-coordinates are appended to the existing file. :param filename: target-filename :param atoms: structured numpy array :param append_model: bool If True the atoms are appended as a new models :param append_coordinates: If True the coordinates are appended to the file """ mode = 'a+' if append_model or append_coordinates else 'w' if verbose: print("Writing to file: ", filename) with chisurf.fio.zipped.open_maybe_zipped( filename=filename, mode=mode ) as fp: # http://cupnet.net/pdb_format/ al = [ "%-6s%5d %4s%1s%3s %1s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f %2s%2s\n" % ( "ATOM ", at['atom_id'], at['atom_name'], " ", at['res_name'], at['chain'], at['res_id'], " ", at['xyz'][0], at['xyz'][1], at['xyz'][2], 0.0, at['bfactor'], at['element'], " " ) for at in atoms ] if append_model: fp.write('MODEL') fp.write("".join(al)) if append_model: fp.write('ENDMDL')
[docs]def write_points( filename: str, points: np.ndarray, verbose: bool = False, mode: str = 'xyz', density: np.ndarray = None ): """ :param filename: :param points: :param verbose: :param mode: :param density: :return: """ if mode == 'pdb': atoms = np.empty( len(points), dtype={ 'names': keys, 'formats': formats } ) atoms['xyz'] = points if density is not None: atoms['bfactor'] = density write_pdb( filename, atoms, verbose=verbose ) else: write_xyz( filename, points, verbose=verbose )
[docs]def get_atom_index( atoms: np.array, chain_identifier: str, residue_seq_number: int, atom_name: str, residue_name: str, **kwargs ): """ Get the atom index by the the identifier :param atoms: :param chain_identifier: :param residue_seq_number: :param atom_name: :param residue_name: :param kwargs: :return: """ # Determine Labeling position if residue_seq_number is None or atom_name is None: raise ValueError( "Either attachment_atom number or residue and atom_name have to be provided." ) verbose = kwargs.get('verbose', chisurf.verbose) ignore_multiple_selections = kwargs.get('ignore_multiple_selections', True) if verbose: print("Labeling position") print("Chain ID: %s" % chain_identifier) print("Residue seq. number: %s" % residue_seq_number) print("Residue name: %s" % residue_name) print("Atom name: %s" % atom_name) if chain_identifier is None or chain_identifier == '': attachment_atom_index = np.where( (atoms['res_id'] == residue_seq_number) & (atoms['atom_name'] == atom_name) )[0] else: attachment_atom_index = np.where( (atoms['res_id'] == residue_seq_number) & (atoms['atom_name'] == atom_name) & (atoms['chain'] == chain_identifier) )[0] if len(attachment_atom_index) != 1 and not ignore_multiple_selections: print("Labeling position") print("Chain ID: %s" % chain_identifier) print("Residue seq. number: %s" % residue_seq_number) print("Residue name: %s" % residue_name) print("Atom name: %s" % atom_name) raise ValueError("Invalid selection of attachment atom") else: attachment_atom_index = attachment_atom_index[0] if verbose: print("Atom index: %s" % attachment_atom_index) return attachment_atom_index
[docs]def parse_string_pqr( string: str, verbose: bool = chisurf.verbose ): rows = string.splitlines() atoms = np.zeros(len(rows), dtype={'names': keys, 'formats': formats}) ni = 0 for line in rows: if line.startswith('ATOM'): atom_name = line[12:16].strip().upper() atoms['i'][ni] = ni atoms['chain'][ni] = line[21] atoms['atom_name'][ni] = atom_name.upper() atoms['res_name'][ni] = line[17:20].strip().upper() atoms['res_id'][ni] = line[21:27] atoms['atom_id'][ni] = line[6:11] atoms['xyz'][ni][0] = float(line[30:38].strip()) atoms['xyz'][ni][1] = float(line[38:46].strip()) atoms['xyz'][ni][2] = float(line[46:54].strip()) atoms['radius'][ni] = float(line[63:70].strip()) atoms['element'][ni] = assign_element_to_atom_name(atom_name) atoms['charge'][ni] = float(line[55:62].strip()) atoms['element'][ni] = assign_element_to_atom_name(atom_name) try: atoms['mass'][ni] = chisurf.common.atom_weights[atoms['element'][ni]] except KeyError: print("Cloud not assign parameters to: %s" % line) ni += 1 atoms = atoms[:ni] if verbose: print("Number of atoms: %s" % (ni + 1)) return atoms
[docs]def write_xyz( filename: str, points: np.array, verbose: bool = chisurf.verbose ): """ Writes the points as xyz-format file. The xyz-format file can be opened and displayed for instance in PyMol :param filename: string :param points: array :param verbose: bool """ if verbose: print("write_xyz\n") print("Filename: %s\n" % filename) with chisurf.fio.zipped.open_maybe_zipped( filename=filename, mode='w' ) as fp: npoints = len(points) fp.write('%i\n' % npoints) fp.write('Name\n') for p in points: fp.write('D %.3f %.3f %.3f\n' % (p[0], p[1], p[2]))
[docs]def read_xyz( filename: str ) -> np.array: t = np.loadtxt( filename, skiprows=2, usecols=(1, 2, 3), delimiter=" " ) return t
# # Removed for now because mmcif does not exist for Windows # def read_mmcif( # filename: str # ): # data = [] # with chisurf.fio.zipped.open_maybe_zipped( # filename=filename, # mode='r' # ) as fp: # reader = mmcif.fio.PdbxReader.PdbxReader(fp) # reader.read(data) # mmcif_atoms = data[0]['atom_site'] # n_atoms = len(mmcif_atoms) # atoms = np.zeros(n_atoms, dtype={'names': keys, 'formats': formats}) # for i, d in enumerate(mmcif_atoms): # if d[0] == "ATOM": # atoms[i]['xyz'] = float(d[10]), float(d[11]), float(d[12]) # atoms[i]['i'] = int(d[1]) # atoms[i]['element'] = int(d[2]) # else: # continue #