01D. Exercises

Mingyang Lu

01/07/2024

How to access PDB data in MATLAB?

You will need to install Bioinformatics Toolbox.
% Example: Retrieve atomic coordinates for PDB ID "7l3u"
pdb_id = "7l3u"
pdb_id = "7l3u"
df = proc_pdb(pdb_id)
df = 1300×8 table
 atom_nameresidue_namechain_idresidue_numberxyzb
1'N''VAL''A'1-8.4690-8.360010.710050.8700
2'CA''VAL''A'1-7.8400-7.437011.652050.5100
3'C''VAL''A'1-6.3720-7.809011.847049.6200
4'O''VAL''A'1-6.0510-8.980012.024050.0600
5'CB''VAL''A'1-8.5910-7.388012.994051.9200
6'N''LEU''A'2-5.4870-6.812011.811030.4800
7'CA''LEU''A'2-4.0530-7.034011.972029.1900
8'C''LEU''A'2-3.6630-6.855013.432029.1100
9'O''LEU''A'2-4.2920-6.082014.161029.6800
10'CB''LEU''A'2-3.2420-6.051011.117028
11'CG''LEU''A'2-2.9950-6.38009.640027.4100
12'CD1''LEU''A'2-4.2730-6.17408.866028.1400
13'CD2''LEU''A'2-1.9760-5.42009.087026.1300
14'N''SER''A'3-2.6330-7.596013.845028.6500
15'CA''SER''A'3-2.0280-7.453015.169028.7700
16'C''SER''A'3-1.2170-6.156015.226027.9900
17'O''SER''A'3-0.8410-5.584014.201027.0100
18'CB''SER''A'3-1.1200-8.653015.475028.7100
19'OG''SER''A'30.1060-8.542014.795027.7300
20'N''GLU''A'4-0.9790-5.663016.444035.4400
21'CA''GLU''A'4-0.1680-4.459016.568034.9900
22'C''GLU''A'41.2390-4.684015.987033.8800
23'O''GLU''A'41.8320-3.787015.376033.0500
24'CB''GLU''A'4-0.1250-4.005018.027036.1200
25'CG''GLU''A'40.9480-2.976018.291036.0200
26'CD''GLU''A'40.6950-1.643017.619035.8300
27'OE1''GLU''A'4-0.4370-1.357017.131036.2200
28'OE2''GLU''A'41.6630-0.859017.534036.0600
29'N''GLY''A'51.7980-5.874016.216027.7700
30'CA''GLY''A'53.0810-6.210015.634027.2500
31'C''GLY''A'53.0510-6.213014.123025.5400
32'O''GLY''A'53.9960-5.765013.488024.6800
33'N''GLU''A'61.9530-6.683013.531025.9400
34'CA''GLU''A'61.8410-6.595012.072025.3100
35'C''GLU''A'61.8050-5.144011.591024.6000
36'O''GLU''A'62.5140-4.784010.651023.7700
37'CB''GLU''A'60.6390-7.405011.563026.0100
38'CG''GLU''A'60.8740-8.905011.562026.4700
39'CD''GLU''A'6-0.4270-9.688011.490027.5600
40'OE1''GLU''A'6-1.5110-9.164011.824028.2900
41'OE2''GLU''A'6-0.3650-10.869011.102028.2700
42'N''TRP''A'71.0060-4.290012.233021.1100
43'CA''TRP''A'71.0050-2.874011.865020.2600
44'C''TRP''A'72.3920-2.260011.997020.1100
45'O''TRP''A'72.7960-1.441011.173019.1900
46'CB''TRP''A'7-0.0400-2.087012.665021.1000
47'CG''TRP''A'7-1.4910-2.322012.218021.5200
48'CD1''TRP''A'7-2.4740-2.823012.968022.6600
49'CD2''TRP''A'7-2.0480-2.095010.913021.1800
50'NE1''TRP''A'7-3.6440-2.901012.261023.1900
51'CE2''TRP''A'7-3.3950-2.499010.971022.3500
52'CE3''TRP''A'7-1.5440-1.60309.715020.1100
53'CZ2''TRP''A'7-4.2770-2.39809.876022.6000
54'CZ3''TRP''A'7-2.4060-1.51808.630020.2100
55'CH2''TRP''A'7-3.7640-1.90808.725021.5200
56'N''GLN''A'83.1200-2.613013.060023.5500
57'CA''GLN''A'84.4560-2.041013.209023.3400
58'C''GLN''A'85.3500-2.416012.031022.6900
59'O''GLN''A'86.1580-1.604011.591022.3600
60'CB''GLN''A'85.0600-2.506014.519024.1200
61'CG''GLN''A'84.4350-1.788015.753024.9800
62'CD''GLN''A'85.0310-2.262017.011026.1900
63'OE1''GLN''A'86.1550-2.774017.035026.6100
64'NE2''GLN''A'84.2970-2.127018.124027.5700
65'N''LEU''A'95.2410-3.646011.528023.0500
66'CA''LEU''A'96.0430-4.052010.379022.6900
67'C''LEU''A'95.6180-3.31209.119022.1300
68'O''LEU''A'96.4630-2.91708.318021.7300
69'CB''LEU''A'95.9030-5.550010.157023.0400
70'CG''LEU''A'96.6550-6.422011.173023.6900
71'CD1''LEU''A'96.3210-7.882011.034024.5300
72'CD2''LEU''A'98.1410-6.173011.078023.9900
73'N''VAL''A'104.3150-3.12308.933020.8700
74'CA''VAL''A'103.8330-2.33507.804020.4600
75'C''VAL''A'104.4420-0.94207.838019.8300
76'O''VAL''A'104.9070-0.41506.808019.4200
77'CB''VAL''A'102.2930-2.27407.819020.7600
78'CG1''VAL''A'101.7330-1.20606.796020.3300
79'CG2''VAL''A'101.7120-3.63807.544021.4300
80'N''LEU''A'114.3900-0.29009.011022.0400
81'CA''LEU''A'114.81401.10309.110021.8800
82'C''LEU''A'116.32801.23309.060021.6300
83'O''LEU''A'116.84602.28108.640021.3700
84'CB''LEU''A'114.20401.786010.353022.6500
85'CG''LEU''A'112.66701.809010.233023.1700
86'CD1''LEU''A'112.18502.583011.483024.2300
87'CD2''LEU''A'112.12402.37408.852022.8300
88'N''HIS''A'127.04800.19509.520022.0500
89'CA''HIS''A'128.49800.20109.425022.1400
90'C''HIS''A'128.95200.19107.972021.7000
91'O''HIS''A'129.80800.97907.571021.7400
92'CB''HIS''A'129.0240-1.010010.173022.8200
93'CG''HIS''A'1210.4880-1.215010.002023.0700
94'ND1''HIS''A'1211.4420-0.482010.674023.4400
95'CD2''HIS''A'1211.1700-2.04909.184023.1500
96'CE1''HIS''A'1212.6490-0.920010.355023.7800
97'NE2''HIS''A'1212.5100-1.82309.392023.6300
98'N''VAL''A'138.3200-0.62207.129022.5400
99'CA''VAL''A'138.7410-0.57805.735022.5000
100'C''VAL''A'138.22100.66905.048021.9600

Define MATLAB functions

function df = proc_pdb(pdb_id)
% Download PDB file
pdb_url = sprintf('https://files.rcsb.org/download/%s.pdb', pdb_id);
pdb_file_path = websave([pdb_id '.pdb'], pdb_url);
 
% Read PDB file using pdbread
pdbStruct = pdbread(pdb_file_path);
 
pdbModel = pdbStruct.Model.Atom;
 
% Extract ATOM data
atom_data = struct(...
'atom_name', {pdbModel.AtomName}, ...
'residue_name', {pdbModel.resName}, ...
'chain_id', {pdbModel.chainID}, ...
'residue_number', {pdbModel.resSeq}, ...
'x', {pdbModel.X}, ...
'y', {pdbModel.Y}, ...
'z', {pdbModel.Z}, ...
'b', {pdbModel.tempFactor} ...
);
 
% Create a table
df = struct2table(atom_data);
 
% Clean up: delete downloaded PDB file
delete(pdb_file_path);
end