Thesis cover art

I was recommended to have some sort of cover art on my thesis, so I decided to make my cover using the awesome Gviz package. Below is all the code for how I generated my thesis cover art. Suggestions and comments most welcome!

Firstly I downloaded some CAGE, ChIP-Seq, and DNase I data generated on the H1 human embryonic stem cells from ENCODE. I also downloaded some conservation tracks from the UCSC database.

#CAGE
dcc=http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC
wget $dcc/wgEncodeRikenCage/wgEncodeRikenCageH1hescCellPapAlnRep1.bam
wget $dcc/wgEncodeRikenCage/wgEncodeRikenCageH1hescCellPapAlnRep2.bam
#the index files at the DCC are old
#I want newer version indices
samtools index wgEncodeRikenCageH1hescCellPapAlnRep1.bam
samtools index wgEncodeRikenCageH1hescCellPapAlnRep2.bam

#RNA pol II ChIP-Seq
wget $dcc/wgEncodeHaibTfbs/wgEncodeHaibTfbsH1hescPol2V0416102PkRep1.broadPeak.gz
wget $dcc/wgEncodeHaibTfbs/wgEncodeHaibTfbsH1hescPol2V0416102RawRep1.bigWig
wget $dcc/wgEncodeHaibTfbs/wgEncodeHaibTfbsH1hescPol2V0416102PkRep2.broadPeak.gz
wget $dcc/wgEncodeHaibTfbs/wgEncodeHaibTfbsH1hescPol2V0416102RawRep2.bigWig

#DNase
wget $dcc/wgEncodeOpenChromDnase/wgEncodeOpenChromDnaseH1hescPk.narrowPeak.gz
wget $dcc/wgEncodeUwDnase/wgEncodeUwDnaseH1hescPkRep1.narrowPeak.gz

#conservation
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/phyloP46way/vertebrate/chr22.phyloP46way.wigFix.gz
wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/phastCons46way/vertebrate/chr22.phastCons46way.wigFix.gz

mv chr22.phyloP46way.wigFix.gz chr22.phyloP46way.wig.gz
mv chr22.phastCons46way.wigFix.gz chr22.phastCons46way.wig.gz

Now comes the R code for generating the plots.

#install some packages
source("http://bioconductor.org/biocLite.R")
biocLite("Gviz")
#transcript annotation package
biocLite("TxDb.Hsapiens.UCSC.hg19.knownGene")
install.packages("devtools")
library("devtools")
install_github('davetang/bedr')

#load libraries
library(Gviz)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(bedr)

#set genome and chromosome
gen <- 'hg19'
chr <- 'chr22'

#genome axis track, as the function suggests
gtrack <- GenomeAxisTrack()
#ideogram track, requires Internet connection
itrack <- IdeogramTrack(genome = gen, chromosome = chr)
#TxDb object
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#gene track
grtrack <- GeneRegionTrack(txdb,
                           genome = gen,
                           chromosome = chr,
                           fill='black',
                           transcriptAnnotation = "symbol",
                           name = "UCSC known genes")

#raw ChIP-Seq data for RNA pol II
raw_1 <-  DataTrack(range = 'wgEncodeHaibTfbsH1hescPol2V0416102RawRep1.bigWig',
                    genome = gen,
                    type = "l",
                    chromosome = chr,
                    name = "ChIP-Seq",
                    col="#34D13F")

#peaks from ChIP-Seq data
peak_1 <- AnnotationTrack(range = bed_to_granges('wgEncodeHaibTfbsH1hescPol2V0416102PkRep1.broadPeak.gz'),
                    genome = gen,
                    name = 'ChIP-Seq',
                    chromosome = chr,
                    fill='#34D13F')

#replicate raw ChIP-Seq data for RNA pol II
raw_2 <- DataTrack(range = 'wgEncodeHaibTfbsH1hescPol2V0416102RawRep2.bigWig',
                   genome = gen,
                   type = "l",
                   chromosome = chr,
                   name = "bigWig",
                   col="#DE2840")

#replicate peaks from ChIP-Seq data
peak_2 <- AnnotationTrack(range = bed_to_granges('wgEncodeHaibTfbsH1hescPol2V0416102PkRep2.broadPeak.gz'),
                          genome = gen,
                          name = 'Peaks',
                          chromosome = chr,
                          fill="#DE2840")

#DNase I hypersensitive tracks
dnase_1 <- AnnotationTrack(range = bed_to_granges('wgEncodeUwDnaseH1hescPkRep1.narrowPeak.gz'),
                          genome = gen,
                          name = 'DNase I',
                          chromosome = chr,
                          fill='#273BA3')
dnase_2 <- AnnotationTrack(range = bed_to_granges('wgEncodeOpenChromDnaseH1hescPk.narrowPeak.gz'),
                          genome = gen,
                          name = 'DNase I',
                          chromosome = chr,
                          fill = '#273BA3')

#conservation tracks
phylop <- DataTrack(range = 'chr22.phyloP46way.wig.gz',
                   genome = gen,
                   chromosome = 'chr22',
                   name = "PhyloP",
                   type="horizon"
                   )
phastcons <- DataTrack(range = 'chr22.phastCons46way.wig.gz',
                    genome = gen,
                    chromosome = 'chr22',
                    name = "phastCons",
                    type="horizon"
                    )

my_bam_file <- c('wgEncodeRikenCageH1hescCellPapAlnRep1.bam',
                 'wgEncodeRikenCageH1hescCellPapAlnRep2.bam')
my_bam_size <- vector()

#this only works for samtools r595 or later
#RStudio couldn't read my $PATH
for (i in 1:length(my_bam_file)){
  my_command <- paste("~/bin/samtools idxstats",
                      my_bam_file[1],
                      "| awk '{s+=$3} END {print s}'")
  n <- system(my_command, intern = T)
  my_bam_size[i] <- n
}

#CAGE tracks as a DataTrack
cage_1 <- DataTrack(range = my_bam_file[1],
                    genome = gen,
                    type = "l",
                    name = "CAGE",
                    window = -1,
                    chromosome = chr,
                    ylim=c(0,60),
                    transformation=function(x){ x <- (x * 1000000)/as.numeric(my_bam_size[1])})
cage_2 <- DataTrack(range = my_bam_file[2],
                    genome = gen,
                    type = "l",
                    name = "CAGE",
                    window = -1,
                    chromosome = chr,
                    ylim=c(0,60),
                    transformation=function(x){ x <- (x * 1000000)/as.numeric(my_bam_size[2])})

#CAGE tracks as AlignmentsTrack
cage_aln_1 <- AlignmentsTrack(range = 'wgEncodeRikenCageH1hescCellPapAlnRep1.bam',
                              name = 'CAGE reads')
cage_aln_2 <- AlignmentsTrack(range = 'wgEncodeRikenCageH1hescCellPapAlnRep2.bam',
                              name = 'CAGE reads')

#chr22:29166375-29200166
from <- 29186375
to <- 29200166

#overlay ChIP-Seq raw tracks
chip_overlay <- OverlayTrack(trackList = list(raw_1, raw_2))

ht1 <- HighlightTrack(trackList=list(gtrack, grtrack, cage_1, cage_2, chip_overlay, peak_1, peak_2, dnase_1, dnase_2, phylop, phastcons),
                      start = c(29196100), width = 650,
                      chromosome = chr)

plotTracks(list(itrack, ht1), from = from, to = to)

#zoom
from <- 29196200
to   <- 29196950

plotTracks(list(itrack, gtrack, grtrack, cage_aln_1, cage_aln_2, chip_overlay, peak_1, peak_2, dnase_1, dnase_2, phylop, phastcons),
           from = from, to = to)

After saving the images as PDFs, I rotated the tracks in Illustrator because everything looks fancier in pseudo-3D.

thesis_coverMission accomplished; my thesis cover art is reproducible.

Print Friendly, PDF & Email



Creative Commons License
This work is licensed under a Creative Commons
Attribution 4.0 International License
.
4 comments Add yours
    1. The red and green show results from two separate experiments. Specifically, they show DNA that were fished out using an RNA polymerase II (Pol II) antibody, i.e. it represents where Pol II was binding. The blue lines show reads from a CAGE experiment; CAGE reads are supposed to represent the 5′ of RNA.

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.