Check whether two genes belong to the same pathway

Updated: 2013 December 16th to include the Reactome database

#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("org.Hs.eg.db")

#load library
library("org.Hs.eg.db")
 
twoGenes <- c("LRRK2","PINK1")
#convert them to Entrez Gene ids
twoGenesEG <- unlist(mget(twoGenes, org.Hs.egSYMBOL2EG))
twoGenesEGkegg <- mget(twoGenesEG, org.Hs.egPATH)
commonKegg <- intersect(twoGenesEGkegg[[1]], twoGenesEGkegg[[2]])
print(commonKegg)
#[1] "05012"
#what are these pathways

#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("KEGG.db")

library("KEGG.db")
mget(commonKegg, KEGGPATHID2NAME)
#$`05012`
#[1] "Parkinson's disease"

Source: Bioconductor mailing list: thread [BioC] How to quikly know whether two genes are in the same pathway or not?

Using reactome

If you ran the above code, you should get this warning:

KEGG.db contains mappings based on older data because the original
resource was removed from the the public domain before the most
recent update was produced. This package should now be considered
deprecated and future versions of Bioconductor may not have it
available. Users who want more current data are encouraged to look
at the KEGGREST or reactome.db packages

Here I use the Reactome database instead:

#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("reactome.db")

#load library
library("reactome.db")

#You can learn what objects this package supports
#with the following command:
ls("package:reactome.db")
# [1] "reactome"              "reactome.db"           "reactome_dbconn"      
# [4] "reactome_dbfile"       "reactome_dbInfo"       "reactome_dbschema"    
# [7] "reactomeEXTID2PATHID"  "reactomeGO2REACTOMEID" "reactomeMAPCOUNTS"    
#[10] "reactomePATHID2EXTID"  "reactomePATHID2NAME"   "reactomePATHNAME2ID"  
#[13] "reactomeREACTOMEID2GO"

#reactomeEXTID2PATHID
#an annotation data object that maps Entrez Gene identifiers
#to Reactome pathway identifiers

#convert to list
xx <- as.list(reactomeEXTID2PATHID)
#check contents of xx
head(xx, 1)
#$`10`
#[1] "211859"  "1430728" "156580"  "156582"

#another way of selecting the Reactome IDs
#find which keytypes can be selected
keytypes(reactome.db)
#[1] "ENTREZID"   "GO"         "PATHNAME"   "PATHID"     "REACTOMEID"

AnnotationDbi::select(reactome.db,
                      keys=10, 
                      keytype="ENTREZID",
                      columns=c("ENTREZID","REACTOMEID")
                     )
#  ENTREZID REACTOMEID
#1       10    1430728
#2       10     156580
#3       10     156582
#4       10     211859

#obtain pathway names
AnnotationDbi::select(reactome.db,
                      keys=10, 
                      keytype="ENTREZID",
                      columns=c("ENTREZID","REACTOMEID","PATHNAME")
                     )
#  ENTREZID REACTOMEID                            PATHNAME
#1       10    1430728            Homo sapiens: Metabolism
#2       10     156580  Homo sapiens: Phase II conjugation
#3       10     156582           Homo sapiens: Acetylation
#4       10     211859 Homo sapiens: Biological oxidations

#two keys
AnnotationDbi::select(reactome.db,
                      keys=c(10, 100), 
                      keytype="ENTREZID",
                      columns=c("ENTREZID","REACTOMEID","PATHNAME")
                     )

#  ENTREZID REACTOMEID                                PATHNAME
#1       10    1430728                Homo sapiens: Metabolism
#2       10     156580      Homo sapiens: Phase II conjugation
#3       10     156582               Homo sapiens: Acetylation
#4       10     211859     Homo sapiens: Biological oxidations
#5      100    1430728                Homo sapiens: Metabolism
#6      100      15869 Homo sapiens: Metabolism of nucleotides
#7      100      73847         Homo sapiens: Purine metabolism
#8      100      74217            Homo sapiens: Purine salvage

#common pathway between Entrez Gene id 10 and 100
first <- xx[grep("^10$", names(xx), perl=T)]
second <- xx[grep("^100$", names(xx), perl=T)]
intersect(first[[1]], second[[1]])
[1] "1430728"

#reactomePATHID2NAME
#An annotation data object that maps
#Reactome pathway identifiers to Reactome pathway names

yy <- as.list(reactomePATHID2NAME)
yy[grep("^1430728$", names(yy), perl=T)]
$`1430728`
[1] "Homo sapiens: Metabolism"

#If you have gene symbols convert them to Entrez Gene ids like so:

#install if necessary
source("http://bioconductor.org/biocLite.R")
biocLite("org.Hs.eg.db")
#load library
library("org.Hs.eg.db")
 
twoGenes <- c("LRRK2","PINK1")
#convert gene symbol to Entrez Gene ids
twoGenesEG <- unlist(mget(twoGenes, org.Hs.egSYMBOL2EG))
twoGenesEG
#   LRRK2    PINK1 
#"120892"  "65018"

This work is licensed under a Creative Commons
Attribution 4.0 International License.

2 comments Add yours

Pingback: Bioconductor annotation packages - Musings from a PhD candidate
ricardo says:

March 1, 2014 at 19:29

Analysis with kegg seems to be more specific than with reactome, in spite of the risc to be deprecated. “Human metabolism” seems to be too generic.

ps: Your posts are very well described and clear. Helped a lot.
Thanx

Using reactome

Leave a Reply Cancel reply