DE Results

Extract, filter, and export differential expression results.

Required Libraries

library(DESeq2) # or library(edgeR) library(dplyr) # For data manipulation

Extracting DESeq2 Results

Basic results

res <- results(dds)

With specific alpha (adjusted p-value threshold)

res <- results(dds, alpha = 0.05)

With log fold change shrinkage

res <- lfcShrink(dds, coef = 'condition_treated_vs_control', type = 'apeglm')

Convert to data frame

res_df <- as.data.frame(res) res_df$gene <- rownames(res_df)

Extracting edgeR Results

Get all results

results <- topTags(qlf, n = Inf)$table

Add gene column

results$gene <- rownames(results)

Filtering Significant Genes

By Adjusted P-value

DESeq2

sig_genes <- subset(res, padj < 0.05)

edgeR

sig_genes <- subset(results, FDR < 0.05)

Using dplyr

sig_genes <- res_df %>% filter(padj < 0.05) %>% arrange(padj)

By Fold Change

Absolute log2 fold change > 1 (2-fold change)

sig_genes <- subset(res, padj < 0.05 & abs(log2FoldChange) > 1)

Up-regulated only

up_genes <- subset(res, padj < 0.05 & log2FoldChange > 1)

Down-regulated only

down_genes <- subset(res, padj < 0.05 & log2FoldChange < -1)

Combined Filters

Stringent filtering

sig_genes <- res_df %>% filter(padj < 0.01, abs(log2FoldChange) > 1, baseMean > 10) %>% arrange(padj)

Ordering Results

By adjusted p-value (most significant first)

res_ordered <- res[order(res$padj), ]

By absolute fold change (largest changes first)

res_ordered <- res[order(abs(res$log2FoldChange), decreasing = TRUE), ]

By base mean expression

res_ordered <- res[order(res$baseMean, decreasing = TRUE), ]

Combined: significant genes ordered by fold change

sig_ordered <- res_df %>% filter(padj < 0.05) %>% arrange(desc(abs(log2FoldChange)))

Summary Statistics

DESeq2 summary

summary(res)

Manual counts

n_tested <- sum(!is.na(res$padj)) n_sig <- sum(res$padj < 0.05, na.rm = TRUE) n_up <- sum(res$padj < 0.05 & res$log2FoldChange > 0, na.rm = TRUE) n_down <- sum(res$padj < 0.05 & res$log2FoldChange < 0, na.rm = TRUE)

cat(sprintf('Tested: %d genes\n', n_tested)) cat(sprintf('Significant (padj < 0.05): %d genes\n', n_sig)) cat(sprintf('Up-regulated: %d genes\n', n_up)) cat(sprintf('Down-regulated: %d genes\n', n_down))

edgeR summary

summary(decideTests(qlf))

Adding Gene Annotations

From Bioconductor Annotation Package

library(org.Hs.eg.db) # Human; use org.Mm.eg.db for mouse

If gene IDs are Ensembl

res_df$symbol <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'SYMBOL', keytype = 'ENSEMBL', multiVals = 'first')

res_df$entrez <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'ENTREZID', keytype = 'ENSEMBL', multiVals = 'first')

res_df$description <- mapIds(org.Hs.eg.db, keys = rownames(res_df), column = 'GENENAME', keytype = 'ENSEMBL', multiVals = 'first')

From BioMart

library(biomaRt)

mart <- useMart('ensembl', dataset = 'hsapiens_gene_ensembl')

annotations <- getBM( attributes = c('ensembl_gene_id', 'external_gene_name', 'description'), filters = 'ensembl_gene_id', values = rownames(res_df), mart = mart )

Merge with results

res_annotated <- merge(res_df, annotations, by.x = 'row.names', by.y = 'ensembl_gene_id', all.x = TRUE)

From Custom File

Load annotation file

gene_info <- read.csv('gene_annotations.csv')

Merge with results

res_annotated <- merge(res_df, gene_info, by = 'gene', all.x = TRUE)

Exporting Results

To CSV

All results

write.csv(res_df, file = 'deseq2_all_results.csv', row.names = FALSE)

Significant only

sig_genes <- res_df %>% filter(padj < 0.05) write.csv(sig_genes, file = 'deseq2_significant.csv', row.names = FALSE)

To Excel

library(openxlsx)

Create workbook with multiple sheets

wb <- createWorkbook()

addWorksheet(wb, 'All Results') writeData(wb, 'All Results', res_df)

addWorksheet(wb, 'Significant') writeData(wb, 'Significant', sig_genes)

addWorksheet(wb, 'Up-regulated') writeData(wb, 'Up-regulated', up_genes)

addWorksheet(wb, 'Down-regulated') writeData(wb, 'Down-regulated', down_genes)

saveWorkbook(wb, 'de_results.xlsx', overwrite = TRUE)

Gene Lists for Pathway Analysis

Just gene IDs for GO/KEGG analysis

sig_gene_list <- rownames(subset(res, padj < 0.05)) write.table(sig_gene_list, file = 'significant_genes.txt', quote = FALSE, row.names = FALSE, col.names = FALSE)

With fold changes for GSEA

gsea_input <- res_df %>% filter(!is.na(log2FoldChange)) %>% select(gene, log2FoldChange) %>% arrange(desc(log2FoldChange)) write.table(gsea_input, file = 'gsea_input.rnk', sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)

Comparing Results Between Methods

Get significant genes from both methods

deseq2_sig <- rownames(subset(deseq2_res, padj < 0.05)) edger_sig <- rownames(subset(edger_results, FDR < 0.05))

Overlap

common <- intersect(deseq2_sig, edger_sig) deseq2_only <- setdiff(deseq2_sig, edger_sig) edger_only <- setdiff(edger_sig, deseq2_sig)

cat(sprintf('DESeq2 significant: %d\n', length(deseq2_sig))) cat(sprintf('edgeR significant: %d\n', length(edger_sig))) cat(sprintf('Common: %d\n', length(common))) cat(sprintf('DESeq2 only: %d\n', length(deseq2_only))) cat(sprintf('edgeR only: %d\n', length(edger_only)))

Venn diagram

library(VennDiagram) venn.diagram( x = list(DESeq2 = deseq2_sig, edgeR = edger_sig), filename = 'de_overlap.png', fill = c('steelblue', 'coral') )

Multiple Testing Correction

DESeq2 uses Benjamini-Hochberg by default

To use different methods:

Independent Hypothesis Weighting (more powerful)

library(IHW) res_ihw <- results(dds, filterFun = ihw)

Manual p-value adjustment

res_df$padj_bonferroni <- p.adjust(res_df$pvalue, method = 'bonferroni') res_df$padj_bh <- p.adjust(res_df$pvalue, method = 'BH') res_df$padj_fdr <- p.adjust(res_df$pvalue, method = 'fdr')

Handling NA Values

Count NAs

sum(is.na(res$padj))

Remove genes with NA padj

res_complete <- res[!is.na(res$padj), ]

Understand why NAs occur

- baseMean = 0: No counts

- NA only in padj: Outlier or low count filtered by independent filtering

Check outliers

res[which(is.na(res$pvalue) & res$baseMean > 0), ]

Quick Reference: Result Columns

DESeq2

Column Description

baseMean

Mean normalized counts

log2FoldChange

Log2 fold change

lfcSE

Standard error of LFC

stat

Wald statistic

pvalue

Raw p-value

padj

Adjusted p-value (BH)

edgeR

Column Description

logFC

Log2 fold change

logCPM

Average log2 CPM

Quasi-likelihood F-statistic

PValue

Raw p-value

FDR

False discovery rate

Related Skills

deseq2-basics - Run DESeq2 analysis
edger-basics - Run edgeR analysis
de-visualization - Visualize results
pathway-analysis - Use gene lists for enrichment

bio-de-results

Safety Notice

Copy this and send it to your AI assistant to learn

Basic results

With specific alpha (adjusted p-value threshold)

With log fold change shrinkage

Convert to data frame

Get all results

Add gene column

DESeq2

edgeR

Using dplyr

Absolute log2 fold change > 1 (2-fold change)

Up-regulated only

Down-regulated only

Stringent filtering

By adjusted p-value (most significant first)

By absolute fold change (largest changes first)

By base mean expression

Combined: significant genes ordered by fold change

DESeq2 summary

Manual counts

edgeR summary

If gene IDs are Ensembl

Merge with results

Load annotation file

Merge with results

All results

Significant only

Create workbook with multiple sheets

Just gene IDs for GO/KEGG analysis

With fold changes for GSEA

Get significant genes from both methods

Overlap

Venn diagram

DESeq2 uses Benjamini-Hochberg by default

To use different methods:

Independent Hypothesis Weighting (more powerful)

Manual p-value adjustment

Count NAs

Remove genes with NA padj

Understand why NAs occur

- baseMean = 0: No counts

- NA only in padj: Outlier or low count filtered by independent filtering

Check outliers

Source Transparency

Related Skills

bioskills

bio-data-visualization-genome-tracks

bio-data-visualization-multipanel-figures

bio-epitranscriptomics-merip-preprocessing