In [1]:
# a Python function to retrieve PDB file from the provided PDB ID,
# and save coordinate data to a pandas dataframe
def pdb_to_dataframe(pdb_id):
from Bio import PDB
import pandas as pd
# Download PDB file
pdbl = PDB.PDBList()
pdb_file_path = pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format="pdb")
# Read PDB file
pdb_parser = PDB.PDBParser(QUIET=True)
structure = pdb_parser.get_structure(pdb_id, pdb_file_path)
# Extract ATOM data
atom_data = []
for model in structure:
for chain in model:
for residue in chain:
for atom in residue:
atom_data.append({
'atom_name': atom.get_name(),
'residue_name': residue.resname,
'chain_id': chain.id,
'residue_number': residue.id[1],
'x': atom.coord[0],
'y': atom.coord[1],
'z': atom.coord[2],
'b': atom.bfactor,
})
# Create a DataFrame
df = pd.DataFrame(atom_data)
return df
In [2]:
# Example: Retrieve atomic coordinates for PDB ID "7l3u"
pdb_id = "7l3u"
df = pdb_to_dataframe(pdb_id)
# Print the DataFrame
print(df)
Structure exists: './pdb7l3u.ent' atom_name residue_name chain_id residue_number x y z \ 0 N VAL A 1 -8.469 -8.360 10.710 1 CA VAL A 1 -7.840 -7.437 11.652 2 C VAL A 1 -6.372 -7.809 11.847 3 O VAL A 1 -6.051 -8.980 12.024 4 CB VAL A 1 -8.591 -7.388 12.994 ... ... ... ... ... ... ... ... 1401 O HOH A 441 2.231 6.639 11.973 1402 O HOH A 442 4.713 1.622 14.629 1403 O HOH A 443 -8.433 15.713 -9.062 1404 O HOH A 444 12.564 13.715 1.647 1405 O HOH A 445 4.148 5.888 10.888 b 0 50.87 1 50.51 2 49.62 3 50.06 4 51.92 ... ... 1401 44.39 1402 42.52 1403 38.80 1404 31.93 1405 38.42 [1406 rows x 8 columns]
In [4]:
# Print the x coordinates
print(df['x'])
0 -8.469 1 -7.840 2 -6.372 3 -6.051 4 -8.591 ... 1401 2.231 1402 4.713 1403 -8.433 1404 12.564 1405 4.148 Name: x, Length: 1406, dtype: float32