bio-de-visualization
4
总安装量
4
周安装量
#53998
全站排名
安装命令
npx skills add https://github.com/gptomics/bioskills --skill bio-de-visualization
Agent 安装分布
trae
2
windsurf
1
opencode
1
codex
1
github-copilot
1
claude-code
1
Skill 文档
DE Visualization
Create visualizations for differential expression analysis using DESeq2 and edgeR built-in plotting functions.
Scope
This skill covers DE-specific built-in functions:
- DESeq2:
plotMA(),plotPCA(),plotDispEsts(),plotCounts() - edgeR:
plotMD(),plotBCV(),plotMDS() - Sample distance heatmaps and p-value distributions
For custom ggplot2/matplotlib implementations of volcano, MA, and PCA plots, see data-visualization/specialized-omics-plots.
Required Libraries
library(DESeq2)
library(ggplot2)
library(pheatmap)
library(RColorBrewer)
library(ggrepel) # For labeled points
Installation
install.packages(c('ggplot2', 'pheatmap', 'RColorBrewer', 'ggrepel'))
# Optional: Enhanced volcano plots
BiocManager::install('EnhancedVolcano')
MA Plot
DESeq2 MA Plot
# Built-in MA plot
plotMA(res, ylim = c(-5, 5), main = 'MA Plot')
# With custom alpha
plotMA(res, alpha = 0.05, ylim = c(-5, 5))
# Highlight specific genes
plotMA(res, ylim = c(-5, 5))
with(subset(res, padj < 0.01 & abs(log2FoldChange) > 2),
points(baseMean, log2FoldChange, col = 'red', pch = 20))
Custom ggplot2 MA Plot
res_df <- as.data.frame(res)
res_df$significant <- res_df$padj < 0.05 & !is.na(res_df$padj)
ggplot(res_df, aes(x = log10(baseMean), y = log2FoldChange, color = significant)) +
geom_point(alpha = 0.5, size = 1) +
scale_color_manual(values = c('grey60', 'red')) +
geom_hline(yintercept = 0, linetype = 'dashed') +
labs(x = 'log10(Mean Expression)', y = 'log2 Fold Change', title = 'MA Plot') +
theme_bw() +
theme(legend.position = 'bottom')
edgeR MA Plot
# Using plotMD (mean-difference plot)
plotMD(qlf, main = 'MD Plot')
abline(h = c(-1, 1), col = 'blue', lty = 2)
Volcano Plot
Basic Volcano Plot
res_df <- as.data.frame(res)
res_df$significant <- res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1
ggplot(res_df, aes(x = log2FoldChange, y = -log10(pvalue), color = significant)) +
geom_point(alpha = 0.5, size = 1) +
scale_color_manual(values = c('grey60', 'red')) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed', color = 'blue') +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', color = 'blue') +
labs(x = 'log2 Fold Change', y = '-log10(p-value)', title = 'Volcano Plot') +
theme_bw()
Volcano with Gene Labels
res_df <- as.data.frame(res)
res_df$gene <- rownames(res_df)
res_df$significant <- res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1
# Label top genes
top_genes <- head(res_df[order(res_df$padj), ], 10)
ggplot(res_df, aes(x = log2FoldChange, y = -log10(pvalue))) +
geom_point(aes(color = significant), alpha = 0.5, size = 1) +
scale_color_manual(values = c('grey60', 'red')) +
geom_text_repel(data = top_genes, aes(label = gene),
size = 3, max.overlaps = 20) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed') +
labs(x = 'log2 Fold Change', y = '-log10(p-value)') +
theme_bw()
EnhancedVolcano
library(EnhancedVolcano)
EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 0.05,
FCcutoff = 1,
title = 'Differential Expression',
subtitle = 'Treatment vs Control')
PCA Plot
DESeq2 PCA
# Variance stabilizing transformation first
vsd <- vst(dds, blind = FALSE)
# Basic PCA
plotPCA(vsd, intgroup = 'condition')
# With more options
plotPCA(vsd, intgroup = c('condition', 'batch'), ntop = 500)
Custom PCA with ggplot2
vsd <- vst(dds, blind = FALSE)
pca_data <- plotPCA(vsd, intgroup = c('condition', 'batch'), returnData = TRUE)
percentVar <- round(100 * attr(pca_data, 'percentVar'))
ggplot(pca_data, aes(x = PC1, y = PC2, color = condition, shape = batch)) +
geom_point(size = 4) +
xlab(paste0('PC1: ', percentVar[1], '% variance')) +
ylab(paste0('PC2: ', percentVar[2], '% variance')) +
ggtitle('PCA Plot') +
theme_bw() +
theme(legend.position = 'right')
edgeR PCA (via limma)
library(limma)
log_cpm <- cpm(y, log = TRUE)
plotMDS(log_cpm, col = as.numeric(group), pch = 16)
legend('topright', legend = levels(group), col = 1:nlevels(group), pch = 16)
Heatmaps
Top DE Genes Heatmap
library(pheatmap)
# Get top significant genes
sig_genes <- rownames(subset(res, padj < 0.01))
# Get normalized counts
vsd <- vst(dds, blind = FALSE)
mat <- assay(vsd)[sig_genes, ]
# Scale by row (z-score)
mat_scaled <- t(scale(t(mat)))
# Create annotation
annotation_col <- data.frame(
condition = colData(dds)$condition,
row.names = colnames(mat)
)
pheatmap(mat_scaled,
annotation_col = annotation_col,
show_rownames = FALSE,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
color = colorRampPalette(c('blue', 'white', 'red'))(100),
main = 'Top DE Genes')
Sample Distance Heatmap
vsd <- vst(dds, blind = FALSE)
# Calculate sample distances
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
# Annotation
annotation <- data.frame(
condition = colData(dds)$condition,
row.names = colnames(dds)
)
pheatmap(sampleDistMatrix,
annotation_col = annotation,
annotation_row = annotation,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
color = colorRampPalette(c('white', 'steelblue'))(100),
main = 'Sample Distance Matrix')
Gene Expression Heatmap
# Select genes of interest
genes_of_interest <- c('gene1', 'gene2', 'gene3', 'gene4', 'gene5')
mat <- assay(vsd)[genes_of_interest, ]
pheatmap(mat,
scale = 'row',
annotation_col = annotation_col,
show_rownames = TRUE,
cluster_cols = TRUE,
cluster_rows = TRUE,
main = 'Genes of Interest')
Dispersion Plot
DESeq2
plotDispEsts(dds, main = 'Dispersion Estimates')
edgeR
plotBCV(y, main = 'Biological Coefficient of Variation')
Counts Plot for Individual Genes
DESeq2
# Plot counts for a specific gene
plotCounts(dds, gene = 'GENE_NAME', intgroup = 'condition')
# With ggplot2
d <- plotCounts(dds, gene = 'GENE_NAME', intgroup = 'condition', returnData = TRUE)
ggplot(d, aes(x = condition, y = count, color = condition)) +
geom_point(position = position_jitter(width = 0.1), size = 3) +
scale_y_log10() +
ggtitle('GENE_NAME Expression') +
theme_bw()
edgeR
# Get CPM for a gene
gene_idx <- which(rownames(y) == 'GENE_NAME')
cpm_gene <- cpm(y)[gene_idx, ]
# Plot
df <- data.frame(cpm = cpm_gene, group = group)
ggplot(df, aes(x = group, y = cpm, color = group)) +
geom_point(position = position_jitter(width = 0.1), size = 3) +
scale_y_log10() +
labs(y = 'CPM', title = 'GENE_NAME Expression') +
theme_bw()
P-value Histogram
# Check p-value distribution (should be uniform under null with peak near 0)
res_df <- as.data.frame(res)
ggplot(res_df, aes(x = pvalue)) +
geom_histogram(bins = 50, fill = 'steelblue', color = 'white') +
labs(x = 'P-value', y = 'Frequency', title = 'P-value Distribution') +
theme_bw()
Saving Plots
# Save as PDF (vector)
pdf('volcano_plot.pdf', width = 8, height = 6)
# ... plot code ...
dev.off()
# Save as PNG (raster)
png('volcano_plot.png', width = 800, height = 600, res = 150)
# ... plot code ...
dev.off()
# Using ggsave for ggplot objects
p <- ggplot(...) + ...
ggsave('plot.pdf', p, width = 8, height = 6)
ggsave('plot.png', p, width = 8, height = 6, dpi = 300)
Color Palettes
# For heatmaps
library(RColorBrewer)
# Diverging (for expression: blue-white-red)
colorRampPalette(rev(brewer.pal(n = 7, name = 'RdBu')))(100)
# Sequential (for distances)
colorRampPalette(brewer.pal(n = 9, name = 'Blues'))(100)
# For categorical groups
brewer.pal(n = 8, name = 'Set1')
Quick Reference: Common Plots
| Plot | Purpose | Function |
|---|---|---|
| MA plot | LFC vs mean expression | plotMA(), plotMD() |
| Volcano | LFC vs significance | ggplot2, EnhancedVolcano |
| PCA | Sample clustering | plotPCA(), plotMDS() |
| Heatmap | Gene patterns | pheatmap() |
| Dispersion | Model fit | plotDispEsts(), plotBCV() |
| Counts | Individual genes | plotCounts() |
Related Skills
- deseq2-basics – Generate DESeq2 results for visualization
- edger-basics – Generate edgeR results for visualization
- de-results – Filter genes before visualization
- data-visualization/specialized-omics-plots – Custom ggplot2 volcano/MA/PCA functions
- data-visualization/heatmaps-clustering – Advanced heatmap customization