bio-de-results
3
总安装量
3
周安装量
#57027
全站排名
安装命令
npx skills add https://github.com/gptomics/bioskills --skill bio-de-results
Agent 安装分布
trae
2
windsurf
1
opencode
1
codex
1
claude-code
1
antigravity
1
Skill 文档
DE Results
Extract, filter, and export differential expression results.
Required Libraries
library(DESeq2) # or library(edgeR)
library(dplyr) # For data manipulation
Extracting DESeq2 Results
# Basic results
res <- results(dds)
# With specific alpha (adjusted p-value threshold)
res <- results(dds, alpha = 0.05)
# With log fold change shrinkage
res <- lfcShrink(dds, coef = 'condition_treated_vs_control', type = 'apeglm')
# Convert to data frame
res_df <- as.data.frame(res)
res_df$gene <- rownames(res_df)
Extracting edgeR Results
# Get all results
results <- topTags(qlf, n = Inf)$table
# Add gene column
results$gene <- rownames(results)
Filtering Significant Genes
By Adjusted P-value
# DESeq2
sig_genes <- subset(res, padj < 0.05)
# edgeR
sig_genes <- subset(results, FDR < 0.05)
# Using dplyr
sig_genes <- res_df %>%
filter(padj < 0.05) %>%
arrange(padj)
By Fold Change
# Absolute log2 fold change > 1 (2-fold change)
sig_genes <- subset(res, padj < 0.05 & abs(log2FoldChange) > 1)
# Up-regulated only
up_genes <- subset(res, padj < 0.05 & log2FoldChange > 1)
# Down-regulated only
down_genes <- subset(res, padj < 0.05 & log2FoldChange < -1)
Combined Filters
# Stringent filtering
sig_genes <- res_df %>%
filter(padj < 0.01,
abs(log2FoldChange) > 1,
baseMean > 10) %>%
arrange(padj)
Ordering Results
# By adjusted p-value (most significant first)
res_ordered <- res[order(res$padj), ]
# By absolute fold change (largest changes first)
res_ordered <- res[order(abs(res$log2FoldChange), decreasing = TRUE), ]
# By base mean expression
res_ordered <- res[order(res$baseMean, decreasing = TRUE), ]
# Combined: significant genes ordered by fold change
sig_ordered <- res_df %>%
filter(padj < 0.05) %>%
arrange(desc(abs(log2FoldChange)))
Summary Statistics
# DESeq2 summary
summary(res)
# Manual counts
n_tested <- sum(!is.na(res$padj))
n_sig <- sum(res$padj < 0.05, na.rm = TRUE)
n_up <- sum(res$padj < 0.05 & res$log2FoldChange > 0, na.rm = TRUE)
n_down <- sum(res$padj < 0.05 & res$log2FoldChange < 0, na.rm = TRUE)
cat(sprintf('Tested: %d genes\n', n_tested))
cat(sprintf('Significant (padj < 0.05): %d genes\n', n_sig))
cat(sprintf('Up-regulated: %d genes\n', n_up))
cat(sprintf('Down-regulated: %d genes\n', n_down))
# edgeR summary
summary(decideTests(qlf))
Adding Gene Annotations
From Bioconductor Annotation Package
library(org.Hs.eg.db) # Human; use org.Mm.eg.db for mouse
# If gene IDs are Ensembl
res_df$symbol <- mapIds(org.Hs.eg.db,
keys = rownames(res_df),
column = 'SYMBOL',
keytype = 'ENSEMBL',
multiVals = 'first')
res_df$entrez <- mapIds(org.Hs.eg.db,
keys = rownames(res_df),
column = 'ENTREZID',
keytype = 'ENSEMBL',
multiVals = 'first')
res_df$description <- mapIds(org.Hs.eg.db,
keys = rownames(res_df),
column = 'GENENAME',
keytype = 'ENSEMBL',
multiVals = 'first')
From BioMart
library(biomaRt)
mart <- useMart('ensembl', dataset = 'hsapiens_gene_ensembl')
annotations <- getBM(
attributes = c('ensembl_gene_id', 'external_gene_name', 'description'),
filters = 'ensembl_gene_id',
values = rownames(res_df),
mart = mart
)
# Merge with results
res_annotated <- merge(res_df, annotations,
by.x = 'row.names', by.y = 'ensembl_gene_id',
all.x = TRUE)
From Custom File
# Load annotation file
gene_info <- read.csv('gene_annotations.csv')
# Merge with results
res_annotated <- merge(res_df, gene_info, by = 'gene', all.x = TRUE)
Exporting Results
To CSV
# All results
write.csv(res_df, file = 'deseq2_all_results.csv', row.names = FALSE)
# Significant only
sig_genes <- res_df %>% filter(padj < 0.05)
write.csv(sig_genes, file = 'deseq2_significant.csv', row.names = FALSE)
To Excel
library(openxlsx)
# Create workbook with multiple sheets
wb <- createWorkbook()
addWorksheet(wb, 'All Results')
writeData(wb, 'All Results', res_df)
addWorksheet(wb, 'Significant')
writeData(wb, 'Significant', sig_genes)
addWorksheet(wb, 'Up-regulated')
writeData(wb, 'Up-regulated', up_genes)
addWorksheet(wb, 'Down-regulated')
writeData(wb, 'Down-regulated', down_genes)
saveWorkbook(wb, 'de_results.xlsx', overwrite = TRUE)
Gene Lists for Pathway Analysis
# Just gene IDs for GO/KEGG analysis
sig_gene_list <- rownames(subset(res, padj < 0.05))
write.table(sig_gene_list, file = 'significant_genes.txt',
quote = FALSE, row.names = FALSE, col.names = FALSE)
# With fold changes for GSEA
gsea_input <- res_df %>%
filter(!is.na(log2FoldChange)) %>%
select(gene, log2FoldChange) %>%
arrange(desc(log2FoldChange))
write.table(gsea_input, file = 'gsea_input.rnk',
sep = '\t', quote = FALSE, row.names = FALSE, col.names = FALSE)
Comparing Results Between Methods
# Get significant genes from both methods
deseq2_sig <- rownames(subset(deseq2_res, padj < 0.05))
edger_sig <- rownames(subset(edger_results, FDR < 0.05))
# Overlap
common <- intersect(deseq2_sig, edger_sig)
deseq2_only <- setdiff(deseq2_sig, edger_sig)
edger_only <- setdiff(edger_sig, deseq2_sig)
cat(sprintf('DESeq2 significant: %d\n', length(deseq2_sig)))
cat(sprintf('edgeR significant: %d\n', length(edger_sig)))
cat(sprintf('Common: %d\n', length(common)))
cat(sprintf('DESeq2 only: %d\n', length(deseq2_only)))
cat(sprintf('edgeR only: %d\n', length(edger_only)))
# Venn diagram
library(VennDiagram)
venn.diagram(
x = list(DESeq2 = deseq2_sig, edgeR = edger_sig),
filename = 'de_overlap.png',
fill = c('steelblue', 'coral')
)
Multiple Testing Correction
# DESeq2 uses Benjamini-Hochberg by default
# To use different methods:
# Independent Hypothesis Weighting (more powerful)
library(IHW)
res_ihw <- results(dds, filterFun = ihw)
# Manual p-value adjustment
res_df$padj_bonferroni <- p.adjust(res_df$pvalue, method = 'bonferroni')
res_df$padj_bh <- p.adjust(res_df$pvalue, method = 'BH')
res_df$padj_fdr <- p.adjust(res_df$pvalue, method = 'fdr')
Handling NA Values
# Count NAs
sum(is.na(res$padj))
# Remove genes with NA padj
res_complete <- res[!is.na(res$padj), ]
# Understand why NAs occur
# - baseMean = 0: No counts
# - NA only in padj: Outlier or low count filtered by independent filtering
# Check outliers
res[which(is.na(res$pvalue) & res$baseMean > 0), ]
Quick Reference: Result Columns
DESeq2
| Column | Description |
|---|---|
baseMean |
Mean normalized counts |
log2FoldChange |
Log2 fold change |
lfcSE |
Standard error of LFC |
stat |
Wald statistic |
pvalue |
Raw p-value |
padj |
Adjusted p-value (BH) |
edgeR
| Column | Description |
|---|---|
logFC |
Log2 fold change |
logCPM |
Average log2 CPM |
F |
Quasi-likelihood F-statistic |
PValue |
Raw p-value |
FDR |
False discovery rate |
Related Skills
- deseq2-basics – Run DESeq2 analysis
- edger-basics – Run edgeR analysis
- de-visualization – Visualize results
- pathway-analysis – Use gene lists for enrichment