methylKit Analysis
Read Bismark Coverage Files
library(methylKit)
file_list <- list('sample1.bismark.cov.gz', 'sample2.bismark.cov.gz', 'sample3.bismark.cov.gz', 'sample4.bismark.cov.gz') sample_ids <- c('ctrl_1', 'ctrl_2', 'treat_1', 'treat_2') treatment <- c(0, 0, 1, 1) # 0 = control, 1 = treatment
meth_obj <- methRead( location = as.list(file_list), sample.id = as.list(sample_ids), treatment = treatment, assembly = 'hg38', context = 'CpG', pipeline = 'bismarkCoverage' )
Read Bismark cytosine Report
meth_obj <- methRead( location = as.list(file_list), sample.id = as.list(sample_ids), treatment = treatment, assembly = 'hg38', context = 'CpG', pipeline = 'bismarkCytosineReport' )
Basic Statistics
Coverage statistics
getMethylationStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)
Coverage per sample
getCoverageStats(meth_obj[[1]], plot = TRUE, both.strands = FALSE)
Filter by Coverage
Remove CpGs with very low or very high coverage
meth_filtered <- filterByCoverage( meth_obj, lo.count = 10, # Minimum 10 reads lo.perc = NULL, hi.count = NULL, hi.perc = 99.9 # Remove top 0.1% (likely PCR artifacts) )
Normalize Coverage
Normalize coverage between samples (recommended)
meth_norm <- normalizeCoverage(meth_filtered, method = 'median')
Merge Samples (Unite)
Find common CpGs across all samples
meth_united <- unite(meth_norm, destrand = TRUE) # Combine strands
Allow some missing data
meth_united <- unite(meth_norm, destrand = TRUE, min.per.group = 2L)
Visualize Samples
Correlation between samples
getCorrelation(meth_united, plot = TRUE)
PCA of samples
PCASamples(meth_united, screeplot = TRUE) PCASamples(meth_united)
Clustering
clusterSamples(meth_united, dist = 'correlation', method = 'ward.D', plot = TRUE)
Differential Methylation (Single CpGs)
Calculate differential methylation
diff_meth <- calculateDiffMeth( meth_united, overdispersion = 'MN', # Use shrinkage test = 'Chisq', mc.cores = 4 )
Get significant differentially methylated CpGs
dmcs <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01)
Hyper vs hypomethylated
dmcs_hyper <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hyper') dmcs_hypo <- getMethylDiff(diff_meth, difference = 25, qvalue = 0.01, type = 'hypo')
Tile-Based Analysis (Regions)
Aggregate CpGs into tiles/windows
tiles <- tileMethylCounts(meth_obj, win.size = 1000, step.size = 1000) tiles_united <- unite(tiles, destrand = TRUE)
Differential methylation on tiles
diff_tiles <- calculateDiffMeth(tiles_united, overdispersion = 'MN', mc.cores = 4) dmrs <- getMethylDiff(diff_tiles, difference = 25, qvalue = 0.01)
Export Results
To data frame
diff_df <- getData(dmcs) write.csv(diff_df, 'dmcs_results.csv', row.names = FALSE)
To BED file
library(genomation) dmcs_gr <- as(dmcs, 'GRanges') export(dmcs_gr, 'dmcs.bed', format = 'BED')
Annotate with Genomic Features
library(genomation)
gene_obj <- readTranscriptFeatures('genes.bed')
annotated <- annotateWithGeneParts(as(dmcs, 'GRanges'), gene_obj)
Or with annotatr
library(annotatr) annotations <- build_annotations(genome = 'hg38', annotations = 'hg38_basicgenes') dmcs_annotated <- annotate_regions(regions = as(dmcs, 'GRanges'), annotations = annotations)
Reorganize for Multi-Group Comparison
For more than 2 groups
meth_obj <- reorganize( meth_united, sample.ids = c('A1', 'A2', 'B1', 'B2', 'C1', 'C2'), treatment = c(0, 0, 1, 1, 2, 2) )
Pool Replicates
Combine biological replicates
meth_pooled <- pool(meth_united, sample.ids = c('control', 'treatment'))
Key Functions
Function Purpose
methRead Read methylation files
filterByCoverage Remove low/high coverage
normalizeCoverage Normalize between samples
unite Find common CpGs
calculateDiffMeth Statistical test
getMethylDiff Filter significant results
tileMethylCounts Region-level analysis
PCASamples PCA visualization
getCorrelation Sample correlation
Key Parameters for calculateDiffMeth
Parameter Default Description
overdispersion none MN (shrinkage) or shrinkMN
test Chisq Chisq, F, fast.fisher
mc.cores 1 Parallel cores
slim TRUE Remove unused columns
Related Skills
-
bismark-alignment - Generate input BAM files
-
methylation-calling - Extract coverage files
-
dmr-detection - Advanced DMR methods
-
pathway-analysis/go-enrichment - Functional annotation