AlphaFold Predictions
Download and analyze AlphaFold predicted protein structures from the AlphaFold Protein Structure Database.
Download Structures
Single Structure by UniProt ID
import requests
def download_alphafold(uniprot_id, output_dir='.'): '''Download AlphaFold structure for UniProt accession''' base_url = 'https://alphafold.ebi.ac.uk/files' pdb_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.pdb' cif_url = f'{base_url}/AF-{uniprot_id}-F1-model_v4.cif'
response = requests.get(pdb_url)
if response.status_code == 200:
output_path = f'{output_dir}/AF-{uniprot_id}-F1-model_v4.pdb'
with open(output_path, 'w') as f:
f.write(response.text)
return output_path
return None
pdb_file = download_alphafold('P04637') # Human p53
Check Availability
def check_alphafold_exists(uniprot_id): '''Check if AlphaFold prediction exists''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) return response.status_code == 200
if check_alphafold_exists('P04637'): print('AlphaFold structure available')
Get Metadata
def get_alphafold_info(uniprot_id): '''Get AlphaFold prediction metadata''' url = f'https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}' response = requests.get(url) if response.status_code == 200: return response.json()[0] return None
info = get_alphafold_info('P04637') print(f"Gene: {info['gene']}") print(f"Organism: {info['organismScientificName']}") print(f"Model version: {info['latestVersion']}")
File Types Available
Database version v4 (current as of 2025). The version number refers to the database release, not the AlphaFold model version.
File URL Pattern Description
PDB AF-{id}-F1-model_v4.pdb
Structure coordinates
mmCIF AF-{id}-F1-model_v4.cif
Structure with metadata
PAE JSON AF-{id}-F1-predicted_aligned_error_v4.json
Predicted aligned error
def download_pae(uniprot_id, output_dir='.'): '''Download PAE (predicted aligned error) matrix''' url = f'https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-predicted_aligned_error_v4.json' response = requests.get(url) if response.status_code == 200: output_path = f'{output_dir}/AF-{uniprot_id}-F1-pae.json' with open(output_path, 'w') as f: f.write(response.text) return output_path return None
Analyze pLDDT Confidence Scores
Extract from PDB B-factors
AlphaFold stores pLDDT scores in the B-factor column.
from Bio.PDB import PDBParser
def extract_plddt(pdb_file): '''Extract pLDDT confidence scores from AlphaFold PDB''' parser = PDBParser(QUIET=True) structure = parser.get_structure('protein', pdb_file)
residue_plddt = {}
for model in structure:
for chain in model:
for residue in chain:
if residue.id[0] == ' ': # Standard residue
ca = residue['CA'] if 'CA' in residue else list(residue.get_atoms())[0]
residue_plddt[residue.id[1]] = ca.get_bfactor()
return residue_plddt
plddt = extract_plddt('AF-P04637-F1-model_v4.pdb') avg_plddt = sum(plddt.values()) / len(plddt) print(f'Average pLDDT: {avg_plddt:.1f}')
Confidence Interpretation
pLDDT Confidence Interpretation
90 Very high High accuracy, can be used as experimental
70-90 Confident Good backbone, may have sidechain errors
50-70 Low Caution, may be disordered
<50 Very low Likely disordered or wrong
Plot pLDDT per Residue
import matplotlib.pyplot as plt
def plot_plddt(plddt_dict, output='plddt_plot.png'): residues = sorted(plddt_dict.keys()) scores = [plddt_dict[r] for r in residues]
plt.figure(figsize=(12, 4))
plt.fill_between(residues, scores, alpha=0.3)
plt.plot(residues, scores)
plt.axhline(y=70, color='orange', linestyle='--', label='Confident threshold')
plt.axhline(y=90, color='green', linestyle='--', label='Very high threshold')
plt.xlabel('Residue')
plt.ylabel('pLDDT')
plt.ylim(0, 100)
plt.legend()
plt.savefig(output)
plt.close()
plot_plddt(plddt)
Analyze PAE (Predicted Aligned Error)
import json import numpy as np import matplotlib.pyplot as plt
def load_pae(pae_file): '''Load PAE matrix from JSON''' with open(pae_file) as f: data = json.load(f)
# AlphaFold v4 format
if 'predicted_aligned_error' in data[0]:
return np.array(data[0]['predicted_aligned_error'])
# Older format
return np.array(data['predicted_aligned_error'])
def plot_pae(pae_matrix, output='pae_plot.png'): plt.figure(figsize=(8, 8)) plt.imshow(pae_matrix, cmap='Greens_r', vmin=0, vmax=30) plt.colorbar(label='Expected position error (A)') plt.xlabel('Scored residue') plt.ylabel('Aligned residue') plt.title('Predicted Aligned Error') plt.savefig(output) plt.close()
pae = load_pae('AF-P04637-F1-pae.json') plot_pae(pae)
PAE Interpretation
-
Low PAE (green): Residues have well-defined relative positions
-
High PAE (white): Uncertain relative positions (flexible linkers, domains)
-
Diagonal blocks: Distinct structural domains
Batch Download
def batch_download_alphafold(uniprot_ids, output_dir='.'): '''Download multiple AlphaFold structures''' import os os.makedirs(output_dir, exist_ok=True)
results = {}
for uid in uniprot_ids:
pdb_file = download_alphafold(uid, output_dir)
results[uid] = pdb_file
if pdb_file:
print(f'Downloaded: {uid}')
else:
print(f'Not found: {uid}')
return results
ids = ['P04637', 'P53_HUMAN', 'Q9Y6K9'] files = batch_download_alphafold(ids, 'alphafold_structures')
Compare with Experimental Structure
from Bio.PDB import PDBParser, Superimposer
def compare_structures(alphafold_pdb, experimental_pdb): '''Calculate RMSD between AlphaFold and experimental structure''' parser = PDBParser(QUIET=True) af_struct = parser.get_structure('af', alphafold_pdb) exp_struct = parser.get_structure('exp', experimental_pdb)
# Get CA atoms from first chain
af_atoms = [r['CA'] for r in af_struct[0].get_residues() if 'CA' in r]
exp_atoms = [r['CA'] for r in exp_struct[0].get_residues() if 'CA' in r]
# Align by length (simple approach)
min_len = min(len(af_atoms), len(exp_atoms))
af_atoms = af_atoms[:min_len]
exp_atoms = exp_atoms[:min_len]
super_imposer = Superimposer()
super_imposer.set_atoms(exp_atoms, af_atoms)
rmsd = super_imposer.rms
return rmsd
Related Skills
-
structural-biology/structure-io - Load and parse PDB/mmCIF files
-
structural-biology/geometric-analysis - RMSD, superimposition
-
database-access/uniprot-access - Get UniProt IDs for proteins
-
structural-biology/structure-navigation - Navigate structure hierarchy