DE Visualization

Create visualizations for differential expression analysis using DESeq2 and edgeR built-in plotting functions.

Scope

This skill covers DE-specific built-in functions:

DESeq2: plotMA() , plotPCA() , plotDispEsts() , plotCounts()
edgeR: plotMD() , plotBCV() , plotMDS()
Sample distance heatmaps and p-value distributions

For custom ggplot2/matplotlib implementations of volcano, MA, and PCA plots, see data-visualization/specialized-omics-plots .

Required Libraries

library(DESeq2) library(ggplot2) library(pheatmap) library(RColorBrewer) library(ggrepel) # For labeled points

Installation

install.packages(c('ggplot2', 'pheatmap', 'RColorBrewer', 'ggrepel'))

Optional: Enhanced volcano plots

BiocManager::install('EnhancedVolcano')

MA Plot

DESeq2 MA Plot

Built-in MA plot

plotMA(res, ylim = c(-5, 5), main = 'MA Plot')

With custom alpha

plotMA(res, alpha = 0.05, ylim = c(-5, 5))

Highlight specific genes

plotMA(res, ylim = c(-5, 5)) with(subset(res, padj < 0.01 & abs(log2FoldChange) > 2), points(baseMean, log2FoldChange, col = 'red', pch = 20))

Custom ggplot2 MA Plot

res_df <- as.data.frame(res) res_df$significant <- res_df$padj < 0.05 & !is.na(res_df$padj)

ggplot(res_df, aes(x = log10(baseMean), y = log2FoldChange, color = significant)) + geom_point(alpha = 0.5, size = 1) + scale_color_manual(values = c('grey60', 'red')) + geom_hline(yintercept = 0, linetype = 'dashed') + labs(x = 'log10(Mean Expression)', y = 'log2 Fold Change', title = 'MA Plot') + theme_bw() + theme(legend.position = 'bottom')

edgeR MA Plot

Using plotMD (mean-difference plot)

plotMD(qlf, main = 'MD Plot') abline(h = c(-1, 1), col = 'blue', lty = 2)

Volcano Plot

Basic Volcano Plot

res_df <- as.data.frame(res) res_df$significant <- res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1

ggplot(res_df, aes(x = log2FoldChange, y = -log10(pvalue), color = significant)) + geom_point(alpha = 0.5, size = 1) + scale_color_manual(values = c('grey60', 'red')) + geom_vline(xintercept = c(-1, 1), linetype = 'dashed', color = 'blue') + geom_hline(yintercept = -log10(0.05), linetype = 'dashed', color = 'blue') + labs(x = 'log2 Fold Change', y = '-log10(p-value)', title = 'Volcano Plot') + theme_bw()

Volcano with Gene Labels

res_df <- as.data.frame(res) res_df$gene <- rownames(res_df) res_df$significant <- res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1

Label top genes

top_genes <- head(res_df[order(res_df$padj), ], 10)

ggplot(res_df, aes(x = log2FoldChange, y = -log10(pvalue))) + geom_point(aes(color = significant), alpha = 0.5, size = 1) + scale_color_manual(values = c('grey60', 'red')) + geom_text_repel(data = top_genes, aes(label = gene), size = 3, max.overlaps = 20) + geom_vline(xintercept = c(-1, 1), linetype = 'dashed') + geom_hline(yintercept = -log10(0.05), linetype = 'dashed') + labs(x = 'log2 Fold Change', y = '-log10(p-value)') + theme_bw()

EnhancedVolcano

library(EnhancedVolcano)

EnhancedVolcano(res, lab = rownames(res), x = 'log2FoldChange', y = 'pvalue', pCutoff = 0.05, FCcutoff = 1, title = 'Differential Expression', subtitle = 'Treatment vs Control')

PCA Plot

DESeq2 PCA

Variance stabilizing transformation first

vsd <- vst(dds, blind = FALSE)

Basic PCA

plotPCA(vsd, intgroup = 'condition')

With more options

plotPCA(vsd, intgroup = c('condition', 'batch'), ntop = 500)

Custom PCA with ggplot2

vsd <- vst(dds, blind = FALSE) pca_data <- plotPCA(vsd, intgroup = c('condition', 'batch'), returnData = TRUE) percentVar <- round(100 * attr(pca_data, 'percentVar'))

ggplot(pca_data, aes(x = PC1, y = PC2, color = condition, shape = batch)) + geom_point(size = 4) + xlab(paste0('PC1: ', percentVar[1], '% variance')) + ylab(paste0('PC2: ', percentVar[2], '% variance')) + ggtitle('PCA Plot') + theme_bw() + theme(legend.position = 'right')

edgeR PCA (via limma)

library(limma) log_cpm <- cpm(y, log = TRUE) plotMDS(log_cpm, col = as.numeric(group), pch = 16) legend('topright', legend = levels(group), col = 1:nlevels(group), pch = 16)

Heatmaps

Top DE Genes Heatmap

library(pheatmap)

Get top significant genes

sig_genes <- rownames(subset(res, padj < 0.01))

Get normalized counts

vsd <- vst(dds, blind = FALSE) mat <- assay(vsd)[sig_genes, ]

Scale by row (z-score)

mat_scaled <- t(scale(t(mat)))

Create annotation

annotation_col <- data.frame( condition = colData(dds)$condition, row.names = colnames(mat) )

pheatmap(mat_scaled, annotation_col = annotation_col, show_rownames = FALSE, clustering_distance_rows = 'correlation', clustering_distance_cols = 'correlation', color = colorRampPalette(c('blue', 'white', 'red'))(100), main = 'Top DE Genes')

Sample Distance Heatmap

vsd <- vst(dds, blind = FALSE)

Calculate sample distances

sampleDists <- dist(t(assay(vsd))) sampleDistMatrix <- as.matrix(sampleDists)

Annotation

annotation <- data.frame( condition = colData(dds)$condition, row.names = colnames(dds) )

pheatmap(sampleDistMatrix, annotation_col = annotation, annotation_row = annotation, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists, color = colorRampPalette(c('white', 'steelblue'))(100), main = 'Sample Distance Matrix')

Gene Expression Heatmap

Select genes of interest

genes_of_interest <- c('gene1', 'gene2', 'gene3', 'gene4', 'gene5') mat <- assay(vsd)[genes_of_interest, ]

pheatmap(mat, scale = 'row', annotation_col = annotation_col, show_rownames = TRUE, cluster_cols = TRUE, cluster_rows = TRUE, main = 'Genes of Interest')

Dispersion Plot

DESeq2

plotDispEsts(dds, main = 'Dispersion Estimates')

edgeR

plotBCV(y, main = 'Biological Coefficient of Variation')

Counts Plot for Individual Genes

DESeq2

Plot counts for a specific gene

plotCounts(dds, gene = 'GENE_NAME', intgroup = 'condition')

With ggplot2

d <- plotCounts(dds, gene = 'GENE_NAME', intgroup = 'condition', returnData = TRUE) ggplot(d, aes(x = condition, y = count, color = condition)) + geom_point(position = position_jitter(width = 0.1), size = 3) + scale_y_log10() + ggtitle('GENE_NAME Expression') + theme_bw()

edgeR

Get CPM for a gene

gene_idx <- which(rownames(y) == 'GENE_NAME') cpm_gene <- cpm(y)[gene_idx, ]

Plot

df <- data.frame(cpm = cpm_gene, group = group) ggplot(df, aes(x = group, y = cpm, color = group)) + geom_point(position = position_jitter(width = 0.1), size = 3) + scale_y_log10() + labs(y = 'CPM', title = 'GENE_NAME Expression') + theme_bw()

P-value Histogram

Check p-value distribution (should be uniform under null with peak near 0)

res_df <- as.data.frame(res) ggplot(res_df, aes(x = pvalue)) + geom_histogram(bins = 50, fill = 'steelblue', color = 'white') + labs(x = 'P-value', y = 'Frequency', title = 'P-value Distribution') + theme_bw()

Saving Plots

Save as PDF (vector)

pdf('volcano_plot.pdf', width = 8, height = 6)

... plot code ...

dev.off()

Save as PNG (raster)

png('volcano_plot.png', width = 800, height = 600, res = 150)

... plot code ...

dev.off()

Using ggsave for ggplot objects

p <- ggplot(...) + ... ggsave('plot.pdf', p, width = 8, height = 6) ggsave('plot.png', p, width = 8, height = 6, dpi = 300)

Color Palettes

For heatmaps

library(RColorBrewer)

Diverging (for expression: blue-white-red)

colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(100)

Sequential (for distances)

colorRampPalette(brewer.pal(n = 9, name = 'Blues'))(100)

For categorical groups

brewer.pal(n = 8, name = 'Set1')

Quick Reference: Common Plots

Plot Purpose Function

MA plot LFC vs mean expression plotMA() , plotMD()

Volcano LFC vs significance ggplot2, EnhancedVolcano

PCA Sample clustering plotPCA() , plotMDS()

Heatmap Gene patterns pheatmap()

Dispersion Model fit plotDispEsts() , plotBCV()

Counts Individual genes plotCounts()

Related Skills

deseq2-basics - Generate DESeq2 results for visualization
edger-basics - Generate edgeR results for visualization
de-results - Filter genes before visualization
data-visualization/specialized-omics-plots - Custom ggplot2 volcano/MA/PCA functions
data-visualization/heatmaps-clustering - Advanced heatmap customization

bio-de-visualization

Safety Notice

Copy this and send it to your AI assistant to learn

Optional: Enhanced volcano plots

Built-in MA plot

With custom alpha

Highlight specific genes

Using plotMD (mean-difference plot)

Label top genes

Variance stabilizing transformation first

Basic PCA

With more options

Get top significant genes

Get normalized counts

Scale by row (z-score)

Create annotation

Calculate sample distances

Annotation

Select genes of interest

Plot counts for a specific gene

With ggplot2

Get CPM for a gene

Plot

Check p-value distribution (should be uniform under null with peak near 0)

Save as PDF (vector)

... plot code ...

Save as PNG (raster)

... plot code ...

Using ggsave for ggplot objects

For heatmaps

Diverging (for expression: blue-white-red)

Sequential (for distances)

For categorical groups

Source Transparency

Related Skills

bio-microbiome-diversity-analysis

bio-pdb-geometric-analysis

bio-proteomics-dia-analysis