DE Results
Extract, filter, and export differential expression results.
Required Libraries
library(DESeq2) # or library(edgeR) library(dplyr) # For data manipulation
Extracting DESeq2 Results
Basic results
res <- results(dds)
With specific alpha (adjusted p-value threshold)
res <- results(dds, alpha = 0.05)
With log fold change shrinkage
res <- lfcShrink(dds, coef = 'condition_treated_vs_control', type = 'apeglm')
Convert to data frame
res_df <- as.data.frame(res) res_df$gene <- rownames(res_df)
Extracting edgeR Results
Get all results
results <- topTags(qlf, n = Inf)$table
Add gene column
results$gene <- rownames(results)
Filtering Significant Genes
By Adjusted P-value
DESeq2
sig_genes <- subset(res, padj < 0.05)
edgeR
sig_genes <- subset(results, FDR < 0.05)
Using dplyr
sig_genes <- res_df %>% filter(padj < 0.05) %>% arrange(padj)
By Fold Change
Absolute log2 fold change > 1 (2-fold change)
sig_genes <- subset(res, padj < 0.05 & abs(log2FoldChange) > 1)
Up-regulated only
up_genes <- subset(res, padj < 0.05 & log2FoldChange > 1)
Down-regulated only
down_genes <- subset(res, padj < 0.05 & log2FoldChange < -1)
Combined Filters
Stringent filtering
sig_genes <- res_df %>% filter(padj < 0.01, abs(log2FoldChange) > 1, baseMean > 10) %>% arrange(padj)
Ordering Results
By adjusted p-value (most significant first)
res_ordered <- res[order(res$padj), ]
By absolute fold change (largest changes first)
res_ordered <- res[order(abs(res$log2FoldChange), decreasing = TRUE), ]
By base mean expression
res_ordered <- res[order(res$baseMean, decreasing = TRUE), ]
Combined: significant genes ordered by fold change
sig_ordered <- res_df %>% filter(padj < 0.05) %>% arrange(desc(abs(log2FoldChange)))
Summary Statistics
DESeq2 summary
summary(res)
Manual counts
n_tested <- sum(!is.na(res$padj)) n_sig <- sum(res$padj < 0.05, na.rm = TRUE) n_up <- sum(res$padj < 0.05 & res$log2FoldChange > 0, na.rm = TRUE) n_down <- sum(res$padj < 0.05 & res$log2FoldChange < 0, na.rm = TRUE)
cat(sprintf('Tested: %d genes\n', n_tested)) cat(sprintf('Significant (padj < 0.05): %d genes\n', n_sig)) cat(sprintf('Up-regulated: %d genes\n', n_up)) cat(sprintf('Down-regulated: %d genes\n', n_down))
edgeR summary
summary(decideTests(qlf))
Adding Gene Annotations
From Bioconductor Annotation Package
library(org.Hs.eg.db) # Human; use org.Mm.eg.db for mouse
If gene IDs are Ensembl
res_df$symbol <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'SYMBOL', keytype = 'ENSEMBL', multiVals = 'first')
res_df$entrez <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'ENTREZID', keytype = 'ENSEMBL', multiVals = 'first')
res_df$description <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'GENENAME', keytype = 'ENSEMBL', multiVals = 'first')
From BioMart
library(biomaRt)
mart <- useMart('ensembl', dataset = 'hsapiens_gene_ensembl')
annotations <- getBM( attributes = c('ensembl_gene_id', 'external_gene_name', 'description'), filters = 'ensembl_gene_id', values = rownames(res_df), mart = mart )
Merge with results
res_annotated <- merge(res_df, annotations, by.x = 'row.names', by.y = 'ensembl_gene_id', all.x = TRUE)
From Custom File
Load annotation file
gene_info <- read.csv('gene_annotations.csv')
Merge with results
res_annotated <- merge(res_df, gene_info, by = 'gene', all.x = TRUE)
Exporting Results
To CSV
All results
write.csv(res_df, file = 'deseq2_all_results.csv', row.names = FALSE)
Significant only
sig_genes <- res_df %>% filter(padj < 0.05) write.csv(sig_genes, file = 'deseq2_significant.csv', row.names = FALSE)
To Excel
library(openxlsx)
Create workbook with multiple sheets
wb <- createWorkbook()
addWorksheet(wb, 'All Results') writeData(wb, 'All Results', res_df)
addWorksheet(wb, 'Significant') writeData(wb, 'Significant', sig_genes)
addWorksheet(wb, 'Up-regulated') writeData(wb, 'Up-regulated', up_genes)
addWorksheet(wb, 'Down-regulated') writeData(wb, 'Down-regulated', down_genes)
saveWorkbook(wb, 'de_results.xlsx', overwrite = TRUE)
Gene Lists for Pathway Analysis
Just gene IDs for GO/KEGG analysis
sig_gene_list <- rownames(subset(res, padj < 0.05)) write.table(sig_gene_list, file = 'significant_genes.txt', quote = FALSE, row.names = FALSE, col.names = FALSE)
With fold changes for GSEA
gsea_input <- res_df %>% filter(!is.na(log2FoldChange)) %>% select(gene, log2FoldChange) %>% arrange(desc(log2FoldChange)) write.table(gsea_input, file = 'gsea_input.rnk', sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)
Comparing Results Between Methods
Get significant genes from both methods
deseq2_sig <- rownames(subset(deseq2_res, padj < 0.05)) edger_sig <- rownames(subset(edger_results, FDR < 0.05))
Overlap
common <- intersect(deseq2_sig, edger_sig) deseq2_only <- setdiff(deseq2_sig, edger_sig) edger_only <- setdiff(edger_sig, deseq2_sig)
cat(sprintf('DESeq2 significant: %d\n', length(deseq2_sig))) cat(sprintf('edgeR significant: %d\n', length(edger_sig))) cat(sprintf('Common: %d\n', length(common))) cat(sprintf('DESeq2 only: %d\n', length(deseq2_only))) cat(sprintf('edgeR only: %d\n', length(edger_only)))
Venn diagram
library(VennDiagram) venn.diagram( x = list(DESeq2 = deseq2_sig, edgeR = edger_sig), filename = 'de_overlap.png', fill = c('steelblue', 'coral') )
Multiple Testing Correction
DESeq2 uses Benjamini-Hochberg by default
To use different methods:
Independent Hypothesis Weighting (more powerful)
library(IHW) res_ihw <- results(dds, filterFun = ihw)
Manual p-value adjustment
res_df$padj_bonferroni <- p.adjust(res_df$pvalue, method = 'bonferroni') res_df$padj_bh <- p.adjust(res_df$pvalue, method = 'BH') res_df$padj_fdr <- p.adjust(res_df$pvalue, method = 'fdr')
Handling NA Values
Count NAs
sum(is.na(res$padj))
Remove genes with NA padj
res_complete <- res[!is.na(res$padj), ]
Understand why NAs occur
- baseMean = 0: No counts
- NA only in padj: Outlier or low count filtered by independent filtering
Check outliers
res[which(is.na(res$pvalue) & res$baseMean > 0), ]
Quick Reference: Result Columns
DESeq2
Column Description
baseMean
Mean normalized counts
log2FoldChange
Log2 fold change
lfcSE
Standard error of LFC
stat
Wald statistic
pvalue
Raw p-value
padj
Adjusted p-value (BH)
edgeR
Column Description
logFC
Log2 fold change
logCPM
Average log2 CPM
F
Quasi-likelihood F-statistic
PValue
Raw p-value
FDR
False discovery rate
Related Skills
-
deseq2-basics - Run DESeq2 analysis
-
edger-basics - Run edgeR analysis
-
de-visualization - Visualize results
-
pathway-analysis - Use gene lists for enrichment