bio-data-visualization-heatmaps-clustering
4
总安装量
4
周安装量
#53797
全站排名
安装命令
npx skills add https://github.com/gptomics/bioskills --skill bio-data-visualization-heatmaps-clustering
Agent 安装分布
trae
2
windsurf
1
opencode
1
github-copilot
1
claude-code
1
Skill 文档
Heatmaps and Clustering
pheatmap (R) – Quick Heatmaps
library(pheatmap)
library(RColorBrewer)
# Basic heatmap with clustering
pheatmap(mat, scale = 'row', cluster_rows = TRUE, cluster_cols = TRUE)
# With annotations
annotation_col <- data.frame(
Condition = metadata$condition,
Batch = metadata$batch,
row.names = colnames(mat)
)
annotation_row <- data.frame(
Pathway = gene_info$pathway,
row.names = rownames(mat)
)
pheatmap(mat, scale = 'row',
annotation_col = annotation_col,
annotation_row = annotation_row,
color = colorRampPalette(rev(brewer.pal(9, 'RdBu')))(100),
show_rownames = FALSE,
fontsize = 8)
pheatmap Customization
# Custom annotation colors
ann_colors <- list(
Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'),
Batch = c(A = '#00A087', B = '#3C5488', C = '#F39B7F'),
Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2')
)
pheatmap(mat, scale = 'row',
annotation_col = annotation_col,
annotation_colors = ann_colors,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'euclidean',
clustering_method = 'ward.D2',
cutree_rows = 4,
cutree_cols = 2,
gaps_col = c(5, 10),
border_color = NA,
main = 'Gene Expression Heatmap')
ComplexHeatmap (R) – Advanced
library(ComplexHeatmap)
library(circlize)
# Color function
col_fun <- colorRamp2(c(-2, 0, 2), c('blue', 'white', 'red'))
# Basic heatmap
Heatmap(mat, name = 'Z-score', col = col_fun,
cluster_rows = TRUE, cluster_columns = TRUE,
show_row_names = FALSE, show_column_names = TRUE)
ComplexHeatmap with Annotations
# Column annotation
ha_col <- HeatmapAnnotation(
Condition = metadata$condition,
Batch = metadata$batch,
Age = anno_barplot(metadata$age),
col = list(
Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'),
Batch = c(A = '#00A087', B = '#3C5488')
)
)
# Row annotation
ha_row <- rowAnnotation(
Pathway = gene_info$pathway,
LogFC = anno_barplot(gene_info$log2FC, baseline = 0,
gp = gpar(fill = ifelse(gene_info$log2FC > 0, 'red', 'blue'))),
col = list(Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2'))
)
Heatmap(mat, name = 'Z-score', col = col_fun,
top_annotation = ha_col,
left_annotation = ha_row,
row_split = gene_info$pathway,
column_split = metadata$condition)
Multiple Heatmaps
# Combine heatmaps horizontally
ht1 <- Heatmap(mat1, name = 'Expression', col = col_fun)
ht2 <- Heatmap(mat2, name = 'Methylation', col = colorRamp2(c(0, 0.5, 1), c('blue', 'white', 'red')))
ht_list <- ht1 + ht2
draw(ht_list, row_title = 'Genes', column_title = 'Samples')
seaborn (Python)
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
# Basic clustermap
g = sns.clustermap(df, cmap='RdBu_r', center=0, figsize=(10, 12),
row_cluster=True, col_cluster=True,
standard_scale=0) # 0 = rows, 1 = columns
plt.savefig('heatmap.png', dpi=150, bbox_inches='tight')
seaborn with Annotations
# Create color mappings
condition_colors = {'Control': '#4DBBD5', 'Treatment': '#E64B35'}
batch_colors = {'A': '#00A087', 'B': '#3C5488', 'C': '#F39B7F'}
col_colors = pd.DataFrame({
'Condition': metadata['condition'].map(condition_colors),
'Batch': metadata['batch'].map(batch_colors)
})
row_colors = gene_info['pathway'].map({'Metabolism': '#8491B4', 'Signaling': '#91D1C2'})
g = sns.clustermap(df, cmap='RdBu_r', center=0,
row_colors=row_colors,
col_colors=col_colors,
figsize=(12, 14),
dendrogram_ratio=0.15,
cbar_pos=(0.02, 0.8, 0.03, 0.15))
g.ax_heatmap.set_xlabel('Samples')
g.ax_heatmap.set_ylabel('Genes')
Clustering Methods
# Distance metrics
# 'euclidean', 'correlation', 'manhattan', 'maximum', 'canberra', 'binary'
# Linkage methods
# 'complete', 'single', 'average', 'ward.D', 'ward.D2', 'mcquitty', 'median', 'centroid'
pheatmap(mat, clustering_distance_rows = 'correlation',
clustering_distance_cols = 'euclidean',
clustering_method = 'ward.D2')
Extract Cluster Assignments
# pheatmap
p <- pheatmap(mat, scale = 'row', cutree_rows = 4, silent = TRUE)
row_clusters <- cutree(p$tree_row, k = 4)
# ComplexHeatmap
ht <- Heatmap(mat, row_split = 4)
ht <- draw(ht)
row_order <- row_order(ht)
# seaborn
g = sns.clustermap(df, cmap='RdBu_r')
row_linkage = g.dendrogram_row.linkage
from scipy.cluster.hierarchy import fcluster
clusters = fcluster(row_linkage, t=4, criterion='maxclust')
Save Heatmaps
# pheatmap to file
pheatmap(mat, filename = 'heatmap.pdf', width = 8, height = 10)
# ComplexHeatmap to file
pdf('heatmap.pdf', width = 8, height = 10)
draw(ht)
dev.off()
Related Skills
- data-visualization/ggplot2-fundamentals – General plotting
- data-visualization/color-palettes – Color selection
- differential-expression/de-visualization – Expression heatmaps