Making a heatmap with R

Update 15th May 2018: I recommend using the pheatmap package for creating heatmaps.

Heatmaps are great for visualising large tables of data; they are definitely popular in many transcriptome papers. Here are the basic commands for making your own heatmap:

data <- read.table("test.txt",sep="\t",header=TRUE,row.names=1)
data_matrix <- data.matrix(data)
heatmap(data_matrix)
#For more information see help(heatmap)
#If you want cooler colours than the default
install.packages("RColorBrewer")
library("RColorBrewer")
#display all colour schemes
display.brewer.all()
heatmap(data_matrix,col=brewer.pal(9,"Blues"))
#if you want to preserve the column order
#since the order may be informative
heatmap(data_matrix,Colv=NA,col=brewer.pal(9,"Blues"))

display_brewer_allThe colour schemes from the function display.brewer.all()

Returning the values used for the heatmap

Taken from the email thread: [BioC] heatmap.2() get matrix after hierarchical clustering


## Some input sample matrix
y <- matrix(rnorm(50), 10, 5, dimnames=list(paste("g", 1:10, sep=""), paste("t", 1:5, sep="")))

y
             t1          t2          t3           t4         t5
g1   0.09010160  0.18099063  0.06491397  0.976286072 -0.1365132
g2   1.46478627  0.07811639  0.92728521 -0.006493938 -0.4911211
g3   0.31297844 -1.56484613 -0.85887941  0.353068737 -1.0221706
g4   0.09407695  0.43792430  0.46242189  1.491145523 -0.7022191
g5   0.69112967  0.36843583  1.19093386 -0.032736910  1.8298866
g6  -1.02819430 -0.43686436  0.49728848 -0.120586171 -2.9141145
g7   0.47593269 -1.87135877  1.02256261 -1.036563429  0.8059392
g8  -0.10274882 -0.50991134  1.38989532  0.416684239 -0.4231279
g9  -1.17358163 -0.93794222 -0.83479604 -0.872205800  0.1767881
g10  0.23001732  0.10358520  0.69702767 -1.797501815 -0.4973721

## Run heatmap.2 on this matrix
install.packages("gplots")
library(gplots)
test <- heatmap.2(y)
y[rev(test$rowInd), test$colInd]

            t5          t2           t4          t1          t3
g5   1.8298866  0.36843583 -0.032736910  0.69112967  1.19093386
g7   0.8059392 -1.87135877 -1.036563429  0.47593269  1.02256261
g10 -0.4973721  0.10358520 -1.797501815  0.23001732  0.69702767
g4  -0.7022191  0.43792430  1.491145523  0.09407695  0.46242189
g1  -0.1365132  0.18099063  0.976286072  0.09010160  0.06491397
g8  -0.4231279 -0.50991134  0.416684239 -0.10274882  1.38989532
g2  -0.4911211  0.07811639 -0.006493938  1.46478627  0.92728521
g6  -2.9141145 -0.43686436 -0.120586171 -1.02819430  0.49728848
g3  -1.0221706 -1.56484613  0.353068737  0.31297844 -0.85887941
g9   0.1767881 -0.93794222 -0.872205800 -1.17358163 -0.83479604

## Row clustering (adjust here distance/linkage methods to what you need!)
hr <- hclust(as.dist(1-cor(t(y), method="pearson")), method="complete")

## Column clustering (adjust here distance/linkage methods to what you need!)
hc <- hclust(as.dist(1-cor(y, method="spearman")), method="complete")

## Plot heatmap
heatmap.2(y, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hc), scale="row", density.info="none", trace="none")

## Return matrix with row/column sorting as in heatmap
y[rev(hr$labels[hr$order]), hc$labels[hc$order]]

             t2           t4          t1          t3         t5
g7  -1.87135877 -1.036563429  0.47593269  1.02256261  0.8059392
g5   0.36843583 -0.032736910  0.69112967  1.19093386  1.8298866
g9  -0.93794222 -0.872205800 -1.17358163 -0.83479604  0.1767881
g10  0.10358520 -1.797501815  0.23001732  0.69702767 -0.4973721
g2   0.07811639 -0.006493938  1.46478627  0.92728521 -0.4911211
g4   0.43792430  1.491145523  0.09407695  0.46242189 -0.7022191
g1   0.18099063  0.976286072  0.09010160  0.06491397 -0.1365132
g3  -1.56484613  0.353068737  0.31297844 -0.85887941 -1.0221706
g8  -0.50991134  0.416684239 -0.10274882  1.38989532 -0.4231279
g6  -0.43686436 -0.120586171 -1.02819430  0.49728848 -2.9141145

test <- heatmap.2(y)

heatmap.2(y, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hc), scale=”row”, density.info=”none”, trace=”none”)

An example of creating a heatmap using transcriptome data

I will use the TagSeqExample.tab file from DESeq.

#install DESeq if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("DESeq")
#load DESeq
library("DESeq")
example_file <- system.file ("extra/TagSeqExample.tab", package="DESeq")
data <- read.delim(example_file, header=T, row.names="gene")
head(data)
           T1a T1b  T2  T3  N1  N2
Gene_00001   0   0   2   0   0   1
Gene_00002  20   8  12   5  19  26
Gene_00003   3   0   2   0   0   0
Gene_00004  75  84 241 149 271 257
Gene_00005  10  16   4   0   4  10
Gene_00006 129 126 451 223 243 149
nrow(data)
[1] 18760
#check distribution of row sums
quantile(rowSums(data))
    0%    25%    50%    75%   100% 
     0     31    228    883 234506
#create a workable set
data_subset <- data[rowSums(data)>50000,]
nrow(data_subset)
[1] 49
data_matrix <- data.matrix(data_subset)
heatmap(data_matrix)

Now using heatmap.2():

#install if necessary
install.packages("gplots")
library(gplots)
heatmap.2(data_matrix)

heatmap.2(data_matrix,scale="row")

#using a red and blue colour scheme without traces and scaling each row
library("RColorBrewer")
heatmap.2(data_matrix,col=brewer.pal(11,"RdBu"),scale="row", trace="none")

deseq_heatmap2_rdbu

#picking your own colours
#black and red
colfunc <- colorRampPalette(c("black", "red"))
heatmap.2(data_matrix,col=colfunc(15),scale="row", trace="none")

heatmap_2_black_red

#picking your own colours
#black, white and red
#to see all colours type colours()
colfunc <- colorRampPalette(c("black", "white", "red"))
heatmap.2(data_matrix,col=colfunc(15),scale="row", trace="none")

heatmap_2_black_white_redPerhaps you can pick better colours than me!.

Manually checking some of the values in heatmap.2()

#The first row in the heatmap
data_matrix["Gene_08743",]
  T1a   T1b    T2    T3    N1    N2 
10404  4626 32103 14288 19626 16079 
summary(data_matrix["Gene_08743",])
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   4626   11380   15180   16190   18740   32100
test <- heatmap.2(data_matrix,scale="row")
#create function to calculate z-score
z_score <- function(x){
+    (x-mean(x))/sd(x)
+ }
z_score(data_matrix["Gene_08743",])
        T1a         T1b          T2          T3          N1          N2 
-0.61945966 -1.23831240  1.70461190 -0.20346381  0.36826272 -0.01163874
#compare the manually calculating values with the ones used for the heatmap
#they are the same, except the ordering
test$carpet[,"Gene_08743"]
         N2         T1b         T1a          T2          T3          N1 
-0.01163874 -1.23831240 -0.61945966  1.70461190 -0.20346381  0.36826272
Print Friendly, PDF & Email



Creative Commons License
This work is licensed under a Creative Commons
Attribution 4.0 International License
.
23 comments Add yours
  1. Hi Dave,
    I’m trying to generate heatmap. But an error shown

    ‘Error in vector(“double”, length) : vector size specified is too large’

    From googling, it maybe due to big dataset. Oh FYI, my dataset is from DGE which have around ~700 rows

    Have u encounter this thing before??

  2. Hi, thanks for the helpful information. Would you be able to provide the command necessary to show the final heatmap image? Thanks for your assistance.

    1. Hi Katherine,

      Thanks for the comment. If you follow the section “An example of creating a heatmap using transcriptome data”, you should be able to recreate the final heatmap image.

      Cheers,

      Dave

  3. Hi Dave,

    I have question regarding heatmap.2, and I thought maybe you’re able to help me. I have to make several heatmaps, for visualizing expression data in one figure so I would like to have one Z-score color key. However, each heatmap is a bit differently scaled. Do you have suggestions how to standardize this? Does heatmap.2 or any other heatmap-function has this capability?

    Many thanks!

    Regards,
    Inge

    1. Hi Inge,

      the Z-score is the standardised score. So if you use “scale=row”, it scales the rows using the Z-scores.

      Cheers,

      Dave

  4. Hi Dave, I wanted to generate a simple heat map without clustering and with some missing values. Can you please tel me the script for this. Suppose I have 1 row name, 100 rows and 15 columns and some missing values in it. Fist column contains gene name. It will be very helpful for me.

  5. Hi Dave,

    I am trying to make the gene names (row names) less “squished” in heatmap.2, can you help? I have 157 genes and the final map, you can’t read the names.

    Thanks!

    1. Hi Tracy,

      that is a common problem when plotting a lot of features. You can try adjusting the cexRow parameter; for example, heatmap.2(data_matrix, scale=’row’, trace=’none’, cexRow=0.5).

      Cheers,

      Dave

  6. any body know, how to change Z-score values in gplots heatmap2
    When I make heatmap with following data set, it gives Z-score between -3 and 3. I want to make/shrink Z-score between -1 and 1.
    my data set is:
    gene_short_name A B C D E F G H I J K L M N
    PRDM16 -0.999991942 -0.999994397 -0.999994083 -0.99999156 -0.999992855 -0.999991277 -0.999991841 -0.999994516 -0.999994443 -1 -0.999999964 -1 -1 -0.999994628
    RBP7 -0.999977227 -0.999956207 -0.999957412 -0.999914381 -0.99989872 -0.999897662 -0.999955386 -0.999985699 -0.999962159 -0.999997992 -1 -1 -0.999939133 -1
    MIR4632,TNFRSF1B -0.999954863 -0.999890674 -0.999937486 -0.999937096 -0.999909162 -0.99994317 -0.99996912 -0.999954411 -0.999927464 -0.999998751 -0.999999916 -1 -0.99999982 -0.999966946
    PDPN -0.999967012 -0.99996654 -0.999958155 -0.999972382 -0.99993592 -0.999967687 -0.999953373 -0.999945401 -0.999935017 -1 -1 -1 -1 -0.999940805
    PADI1 -1 -0.999999953 -0.999999453 -1 -1 -0.999999952 -1 -1 -0.999999946 -0.999997611 -0.999978801 -0.999997505 -0.999997095 -0.999944804
    PADI3 -1 -1 -1 -1 -1 -0.999999952 -1 -1 -1 -0.99999984 -0.999946494 -0.999999822 -0.999999896 -0.999997434
    FAM43B -0.99999725 -0.999999297 -0.999999383 -0.999998539 -0.999998108 -0.999999437 -0.999999423 -1 -0.999999674 -1 -0.999999753 -1 -0.999864341 -0.999999727
    thank you so much

  7. Hi Dave,
    I created a heatmap with the fold-change expression of 50 genes (raws) and several unrelated conditions like expression in different tissues and developmental stages. I thought it was OK, but when I took a closer look I noticed some unexpected values and genes that had been assigned the “wrong” colour. I have used Pearson’s correlation to calculate the distances and I believe this is the mistake as it is clustering my samples using the mean expression across all my conditions.
    Do you have any suggestions? I feel like I should create a colour map first based on the absolute expression values and then create a hierarchical cluster based on the raws only; however, I don’t know how I could do this.
    I would really appreciate your help 🙂
    Cheers,
    Greta

  8. Dave, that’s a great tutorial but for omics data this leads to large heatmaps that are almost always cropped by the R studio plotting function. Do you have a fix for that? My row names are consistently outside of the plotting area

    1. Hi George, you can try adjusting the margins; here’s a great tutorial http://research.stowers.org/mcm/efg/R/Graphics/Basics/mar-oma/index.htm. You can also get the row indices if you save your heatmap.

      my_mat <- as.matrix(iris[,-5])
      my_heatmap <- gplots::heatmap.2(my_mat)
      head(my_heatmap$rowInd)
      [1]  61  99  58  94 107  63
      iris[head(my_heatmap$rowInd),]
          Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
      61           5.0         2.0          3.5         1.0 versicolor
      99           5.1         2.5          3.0         1.1 versicolor
      58           4.9         2.4          3.3         1.0 versicolor
      94           5.0         2.3          3.3         1.0 versicolor
      107          4.9         2.5          4.5         1.7  virginica
      63           6.0         2.2          4.0         1.0 versicolor
      
  9. Hi Dave,
    Thank you for useful guidance on generating heatmap. Can you please interpret color key, histogram and Z-score generated by heatmap.2 ?
    Regards
    Trieu

  10. Hi Davo, thank you for the amazing heatmap script. Is it possible to change the color of the row names? I have already ordered my row names into 4 sets of genes and would like to assign each set a different color. Thanks!

    1. Hi Joseph,

      I actually don’t use heatmap.2() anymore and have switched to pheatmap. You can colour the 4 sets of genes using the parameter annotation_row in the pheatmap() function.

      Cheers,

      Dave

  11. Hi Dave,

    My code for heatmap
    heatmap.2(input,
    Rowv = NULL,
    Colv = NULL,
    scale = “none”,
    margins=c(18,8),
    trace=”none”,
    symbreaks = F,
    dendrogram = ‘none’,
    density.info = “none”,
    keysize = 1,
    key.par=list(mar=c(3,1,1,2), cex.lab=1.5, cex=1, cex.axis=1.5),
    col=bluered(100), cexRow=1.8, cexCol=2,
    rowsep=c(1:16), colsep=c(1:42), sepwidth=c(0.01,0.02), sepcolor=”white”,
    main = “Heatmap FCR”, colRow = c(“pink”, “blue”))

    has a problem: when I don’t use colRow specifications, the column with the labels for each row is aligned perfectly to the right of the heatmap (where I want it). But when I use this specification, the column with the labels moves to the right, and I want it to stay in the same place as before, otherwise I can’t see the whole names of each row.
    colRow is the means to change color of names of row labels, it should be a vector with length equal to the number of rows, if it is smaller, it will color only the first rows (in my example the first 2 row names).

    Could you help me?

    Many thanks,

    Joana

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.