"""Read PDB files.
PDB files contain atomic coordinates.
`PDB <http://www.wwpdb.org/documentation/file-format>`_
:Author:
`Thomas-Otavio Peulen <http://tpeulen.github.io>`_
Requirements
------------
Revisions
---------
Notes
-----
The API is not stable yet and might change between revisions.
References
----------
Examples
--------
"""
from __future__ import annotations
import os
import urllib.request
import numpy as np
import chisurf
import chisurf.common
keys_formats = [
('i', 'i4'),
('chain', '|U1'),
('res_id', 'i4'),
('res_name', '|U5'),
('atom_id', 'i4'),
('atom_name', '|U5'),
('element', '|U1'),
('xyz', '3f8'),
('charge', 'f8'),
('radius', 'f8'),
('bfactor', 'f8'),
('mass', 'f8')
]
keys, formats = list(zip(*keys_formats))
[docs]def fetch_pdb_string(
pdb_id: str
) -> str:
"""Downloads from the RCSB a PDB file with the specified PDB-ID
:param pdb_id: The PDB-ID that is downloaded
:param get_binary: If get_binary is True a binary string is returned.
:return:
"""
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdb_id[:4]
binary = urllib.request.urlopen(url).read()
return binary.decode("utf-8")
[docs]def fetch_pdb(
pdb_id: str,
**kwargs
):
st = fetch_pdb_string(pdb_id)
return parse_string_pdb(st, **kwargs)
[docs]def assign_element_to_atom_name(
atom_name: str
):
"""Tries to guess element from atom name if not recognised.
:param atom_name: string
Examples
--------
>>> assign_element_to_atom_name('CA')
C
"""
element = atom_name
if atom_name.upper() not in chisurf.common.atom_weights:
# Inorganic elements have their name shifted left by one position
# (is a convention in PDB, but not part of the standard).
# isdigit() check on last two characters to avoid mis-assignment of
# hydrogens atoms (GLN HE21 for example)
# Hs may have digit in [0]
putative_element = atom_name[1] if atom_name[0].isdigit() else \
atom_name[0]
if putative_element.capitalize() in chisurf.common.atom_weights.keys():
element = putative_element
return element
[docs]def parse_string_pdb(
string: str,
assign_charge: bool = False,
verbose: bool = chisurf.verbose
):
"""
:param string:
:param assign_charge:
:param verbose:
:return:
"""
rows = string.splitlines()
atoms = np.zeros(
len(rows),
dtype={
'names': keys,
'formats': formats
}
)
ni = 0
for line in rows:
if verbose:
print(line)
if line.startswith('ATOM'):
atom_name = line[12:16].strip().upper()
atoms['i'][ni] = ni
atoms['chain'][ni] = line[21]
atoms['res_name'][ni] = line[17:20].strip().upper()
atoms['atom_name'][ni] = atom_name
atoms['res_id'][ni] = line[22:26]
atoms['atom_id'][ni] = line[6:11]
atoms['xyz'][ni][0] = line[30:38]
atoms['xyz'][ni][1] = line[38:46]
atoms['xyz'][ni][2] = line[46:54]
atoms['bfactor'][ni] = line[60:65]
atoms['element'][ni] = assign_element_to_atom_name(atom_name)
try:
if assign_charge:
if atoms['res_name'][ni] in chisurf.common.CHARGE_DICT:
if atoms['atom_name'][ni] == chisurf.common.TITR_ATOM_COARSE[atoms['res_name'][ni]]:
atoms['charge'][ni] = chisurf.common.CHARGE_DICT[
atoms['res_name'][ni]
]
atoms['mass'][ni] = chisurf.common.atom_weights[atoms['element'][ni]]
atoms['radius'][ni] = chisurf.common.VDW_DICT[atoms['element'][ni]]
except KeyError:
print("Cloud not assign parameters to: %s" % line)
ni += 1
atoms = atoms[:ni]
if verbose:
print("Number of atoms: %s" % (ni + 1))
return atoms
[docs]def read(
filename: str,
assign_charge: bool = False,
verbose: bool = chisurf.verbose,
**kwargs
) -> np.ndarray:
""" Open pdb_file and read each line into pdb (a list of lines)
:param filename:
:param assign_charge:
:return:
numpy structured array containing the PDB info and VdW-radii and charges
Examples
--------
>>> import chisurf.fio
>>> pdb_file = './test/data/atomic_coordinates/pdb_files/hGBP1_closed.pdb'
>>> pdb = chisurf.fio.structure.coordinates.read(pdb_file, verbose=True)
>>> pdb[:5]
array([ (0, ' ', 7, 'MET', 1, 'N', 'N', [72.739, -17.501, 8.879], 0.0, 1.65, 0.0, 14.0067),
(1, ' ', 7, 'MET', 2, 'CA', 'C', [73.841, -17.042, 9.747], 0.0, 1.76, 0.0, 12.0107),
(2, ' ', 7, 'MET', 3, 'C', 'C', [74.361, -18.178, 10.643], 0.0, 1.76, 0.0, 12.0107),
(3, ' ', 7, 'MET', 4, 'O', 'O', [73.642, -18.708, 11.489], 0.0, 1.4, 0.0, 15.9994),
(4, ' ', 7, 'MET', 5, 'CB', 'C', [73.384, -15.89, 10.649], 0.0, 1.76, 0.0, 12.0107)],
dtype=[('i', '<i4'), ('chain', 'S1'), ('res_id', '<i4'), ('res_name', 'S5'), ('atom_id', '<i4'), ('atom_name', 'S5
'), ('element', 'S1'), ('xyz', '<f8', (3,)), ('charge', '<f8'), ('radius', '<f8'), ('bfactor', '<f8'), ('mass', '<f8')
])
"""
if os.path.isfile(filename):
with chisurf.fio.zipped.open_maybe_zipped(
filename=filename,
mode='r'
) as f:
string = f.read()
if verbose:
path, baseName = os.path.split(filename)
print("======================================")
print("Filename: %s" % filename)
print("Path: %s" % path)
fn1, ext1 = os.path.splitext(filename)
_, ext2 = os.path.splitext(fn1)
if '.pdb' in [ext1, ext2]:
atoms = parse_string_pdb(string, assign_charge, **kwargs)
else: # elif filename.endswith('.pqr'):
atoms = parse_string_pqr(string, **kwargs)
return atoms
else:
return np.zeros(0, dtype={'names': keys, 'formats': formats})
[docs]def write_pdb(
filename: str,
atoms=None,
append_model: bool = False,
append_coordinates: bool = False,
verbose: bool = False
):
""" Writes a structured numpy array containing the PDB-info to a PDB-file
If append_model and append_coordinates are False the file is overwritten. Otherwise the atomic-coordinates
are appended to the existing file.
:param filename: target-filename
:param atoms: structured numpy array
:param append_model: bool
If True the atoms are appended as a new models
:param append_coordinates:
If True the coordinates are appended to the file
"""
mode = 'a+' if append_model or append_coordinates else 'w'
if verbose:
print("Writing to file: ", filename)
with chisurf.fio.zipped.open_maybe_zipped(
filename=filename,
mode=mode
) as fp:
# http://cupnet.net/pdb_format/
al = [
"%-6s%5d %4s%1s%3s %1s%4d%1s %8.3f%8.3f%8.3f%6.2f%6.2f %2s%2s\n" %
(
"ATOM ", at['atom_id'], at['atom_name'], " ", at['res_name'],
at['chain'], at['res_id'], " ",
at['xyz'][0], at['xyz'][1], at['xyz'][2], 0.0, at['bfactor'],
at['element'], " "
)
for at in atoms
]
if append_model:
fp.write('MODEL')
fp.write("".join(al))
if append_model:
fp.write('ENDMDL')
[docs]def write_points(
filename: str,
points: np.ndarray,
verbose: bool = False,
mode: str = 'xyz',
density: np.ndarray = None
):
"""
:param filename:
:param points:
:param verbose:
:param mode:
:param density:
:return:
"""
if mode == 'pdb':
atoms = np.empty(
len(points),
dtype={
'names': keys,
'formats': formats
}
)
atoms['xyz'] = points
if density is not None:
atoms['bfactor'] = density
write_pdb(
filename,
atoms,
verbose=verbose
)
else:
write_xyz(
filename,
points,
verbose=verbose
)
[docs]def get_atom_index(
atoms: np.array,
chain_identifier: str,
residue_seq_number: int,
atom_name: str,
residue_name: str,
**kwargs
):
"""
Get the atom index by the the identifier
:param atoms:
:param chain_identifier:
:param residue_seq_number:
:param atom_name:
:param residue_name:
:param kwargs:
:return:
"""
# Determine Labeling position
if residue_seq_number is None or atom_name is None:
raise ValueError(
"Either attachment_atom number or residue and atom_name have to be provided."
)
verbose = kwargs.get('verbose', chisurf.verbose)
ignore_multiple_selections = kwargs.get('ignore_multiple_selections', True)
if verbose:
print("Labeling position")
print("Chain ID: %s" % chain_identifier)
print("Residue seq. number: %s" % residue_seq_number)
print("Residue name: %s" % residue_name)
print("Atom name: %s" % atom_name)
if chain_identifier is None or chain_identifier == '':
attachment_atom_index = np.where(
(atoms['res_id'] == residue_seq_number) &
(atoms['atom_name'] == atom_name)
)[0]
else:
attachment_atom_index = np.where(
(atoms['res_id'] == residue_seq_number) &
(atoms['atom_name'] == atom_name) &
(atoms['chain'] == chain_identifier)
)[0]
if len(attachment_atom_index) != 1 and not ignore_multiple_selections:
print("Labeling position")
print("Chain ID: %s" % chain_identifier)
print("Residue seq. number: %s" % residue_seq_number)
print("Residue name: %s" % residue_name)
print("Atom name: %s" % atom_name)
raise ValueError("Invalid selection of attachment atom")
else:
attachment_atom_index = attachment_atom_index[0]
if verbose:
print("Atom index: %s" % attachment_atom_index)
return attachment_atom_index
[docs]def parse_string_pqr(
string: str,
verbose: bool = chisurf.verbose
):
rows = string.splitlines()
atoms = np.zeros(len(rows), dtype={'names': keys, 'formats': formats})
ni = 0
for line in rows:
if line.startswith('ATOM'):
atom_name = line[12:16].strip().upper()
atoms['i'][ni] = ni
atoms['chain'][ni] = line[21]
atoms['atom_name'][ni] = atom_name.upper()
atoms['res_name'][ni] = line[17:20].strip().upper()
atoms['res_id'][ni] = line[21:27]
atoms['atom_id'][ni] = line[6:11]
atoms['xyz'][ni][0] = float(line[30:38].strip())
atoms['xyz'][ni][1] = float(line[38:46].strip())
atoms['xyz'][ni][2] = float(line[46:54].strip())
atoms['radius'][ni] = float(line[63:70].strip())
atoms['element'][ni] = assign_element_to_atom_name(atom_name)
atoms['charge'][ni] = float(line[55:62].strip())
atoms['element'][ni] = assign_element_to_atom_name(atom_name)
try:
atoms['mass'][ni] = chisurf.common.atom_weights[atoms['element'][ni]]
except KeyError:
print("Cloud not assign parameters to: %s" % line)
ni += 1
atoms = atoms[:ni]
if verbose:
print("Number of atoms: %s" % (ni + 1))
return atoms
[docs]def write_xyz(
filename: str,
points: np.array,
verbose: bool = chisurf.verbose
):
"""
Writes the points as xyz-format file. The xyz-format file can be opened and displayed for instance
in PyMol
:param filename: string
:param points: array
:param verbose: bool
"""
if verbose:
print("write_xyz\n")
print("Filename: %s\n" % filename)
with chisurf.fio.zipped.open_maybe_zipped(
filename=filename,
mode='w'
) as fp:
npoints = len(points)
fp.write('%i\n' % npoints)
fp.write('Name\n')
for p in points:
fp.write('D %.3f %.3f %.3f\n' % (p[0], p[1], p[2]))
[docs]def read_xyz(
filename: str
) -> np.array:
t = np.loadtxt(
filename,
skiprows=2,
usecols=(1, 2, 3),
delimiter=" "
)
return t
# # Removed for now because mmcif does not exist for Windows
# def read_mmcif(
# filename: str
# ):
# data = []
# with chisurf.fio.zipped.open_maybe_zipped(
# filename=filename,
# mode='r'
# ) as fp:
# reader = mmcif.fio.PdbxReader.PdbxReader(fp)
# reader.read(data)
# mmcif_atoms = data[0]['atom_site']
# n_atoms = len(mmcif_atoms)
# atoms = np.zeros(n_atoms, dtype={'names': keys, 'formats': formats})
# for i, d in enumerate(mmcif_atoms):
# if d[0] == "ATOM":
# atoms[i]['xyz'] = float(d[10]), float(d[11]), float(d[12])
# atoms[i]['i'] = int(d[1])
# atoms[i]['element'] = int(d[2])
# else:
# continue
#