bio-data-visualization-heatmaps-clustering

📁 gptomics/bioskills 📅 Jan 24, 2026
4
总安装量
4
周安装量
#53797
全站排名
安装命令
npx skills add https://github.com/gptomics/bioskills --skill bio-data-visualization-heatmaps-clustering

Agent 安装分布

trae 2
windsurf 1
opencode 1
github-copilot 1
claude-code 1

Skill 文档

Heatmaps and Clustering

pheatmap (R) – Quick Heatmaps

library(pheatmap)
library(RColorBrewer)

# Basic heatmap with clustering
pheatmap(mat, scale = 'row', cluster_rows = TRUE, cluster_cols = TRUE)

# With annotations
annotation_col <- data.frame(
    Condition = metadata$condition,
    Batch = metadata$batch,
    row.names = colnames(mat)
)

annotation_row <- data.frame(
    Pathway = gene_info$pathway,
    row.names = rownames(mat)
)

pheatmap(mat, scale = 'row',
         annotation_col = annotation_col,
         annotation_row = annotation_row,
         color = colorRampPalette(rev(brewer.pal(9, 'RdBu')))(100),
         show_rownames = FALSE,
         fontsize = 8)

pheatmap Customization

# Custom annotation colors
ann_colors <- list(
    Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'),
    Batch = c(A = '#00A087', B = '#3C5488', C = '#F39B7F'),
    Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2')
)

pheatmap(mat, scale = 'row',
         annotation_col = annotation_col,
         annotation_colors = ann_colors,
         clustering_distance_rows = 'correlation',
         clustering_distance_cols = 'euclidean',
         clustering_method = 'ward.D2',
         cutree_rows = 4,
         cutree_cols = 2,
         gaps_col = c(5, 10),
         border_color = NA,
         main = 'Gene Expression Heatmap')

ComplexHeatmap (R) – Advanced

library(ComplexHeatmap)
library(circlize)

# Color function
col_fun <- colorRamp2(c(-2, 0, 2), c('blue', 'white', 'red'))

# Basic heatmap
Heatmap(mat, name = 'Z-score', col = col_fun,
        cluster_rows = TRUE, cluster_columns = TRUE,
        show_row_names = FALSE, show_column_names = TRUE)

ComplexHeatmap with Annotations

# Column annotation
ha_col <- HeatmapAnnotation(
    Condition = metadata$condition,
    Batch = metadata$batch,
    Age = anno_barplot(metadata$age),
    col = list(
        Condition = c(Control = '#4DBBD5', Treatment = '#E64B35'),
        Batch = c(A = '#00A087', B = '#3C5488')
    )
)

# Row annotation
ha_row <- rowAnnotation(
    Pathway = gene_info$pathway,
    LogFC = anno_barplot(gene_info$log2FC, baseline = 0,
                          gp = gpar(fill = ifelse(gene_info$log2FC > 0, 'red', 'blue'))),
    col = list(Pathway = c(Metabolism = '#8491B4', Signaling = '#91D1C2'))
)

Heatmap(mat, name = 'Z-score', col = col_fun,
        top_annotation = ha_col,
        left_annotation = ha_row,
        row_split = gene_info$pathway,
        column_split = metadata$condition)

Multiple Heatmaps

# Combine heatmaps horizontally
ht1 <- Heatmap(mat1, name = 'Expression', col = col_fun)
ht2 <- Heatmap(mat2, name = 'Methylation', col = colorRamp2(c(0, 0.5, 1), c('blue', 'white', 'red')))

ht_list <- ht1 + ht2
draw(ht_list, row_title = 'Genes', column_title = 'Samples')

seaborn (Python)

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Basic clustermap
g = sns.clustermap(df, cmap='RdBu_r', center=0, figsize=(10, 12),
                   row_cluster=True, col_cluster=True,
                   standard_scale=0)  # 0 = rows, 1 = columns
plt.savefig('heatmap.png', dpi=150, bbox_inches='tight')

seaborn with Annotations

# Create color mappings
condition_colors = {'Control': '#4DBBD5', 'Treatment': '#E64B35'}
batch_colors = {'A': '#00A087', 'B': '#3C5488', 'C': '#F39B7F'}

col_colors = pd.DataFrame({
    'Condition': metadata['condition'].map(condition_colors),
    'Batch': metadata['batch'].map(batch_colors)
})

row_colors = gene_info['pathway'].map({'Metabolism': '#8491B4', 'Signaling': '#91D1C2'})

g = sns.clustermap(df, cmap='RdBu_r', center=0,
                   row_colors=row_colors,
                   col_colors=col_colors,
                   figsize=(12, 14),
                   dendrogram_ratio=0.15,
                   cbar_pos=(0.02, 0.8, 0.03, 0.15))

g.ax_heatmap.set_xlabel('Samples')
g.ax_heatmap.set_ylabel('Genes')

Clustering Methods

# Distance metrics
# 'euclidean', 'correlation', 'manhattan', 'maximum', 'canberra', 'binary'

# Linkage methods
# 'complete', 'single', 'average', 'ward.D', 'ward.D2', 'mcquitty', 'median', 'centroid'

pheatmap(mat, clustering_distance_rows = 'correlation',
         clustering_distance_cols = 'euclidean',
         clustering_method = 'ward.D2')

Extract Cluster Assignments

# pheatmap
p <- pheatmap(mat, scale = 'row', cutree_rows = 4, silent = TRUE)
row_clusters <- cutree(p$tree_row, k = 4)

# ComplexHeatmap
ht <- Heatmap(mat, row_split = 4)
ht <- draw(ht)
row_order <- row_order(ht)
# seaborn
g = sns.clustermap(df, cmap='RdBu_r')
row_linkage = g.dendrogram_row.linkage
from scipy.cluster.hierarchy import fcluster
clusters = fcluster(row_linkage, t=4, criterion='maxclust')

Save Heatmaps

# pheatmap to file
pheatmap(mat, filename = 'heatmap.pdf', width = 8, height = 10)

# ComplexHeatmap to file
pdf('heatmap.pdf', width = 8, height = 10)
draw(ht)
dev.off()

Related Skills

  • data-visualization/ggplot2-fundamentals – General plotting
  • data-visualization/color-palettes – Color selection
  • differential-expression/de-visualization – Expression heatmaps