Updated 2016 September 15th: I've made this into an R package, which is available at my GitHub repository
A short post on utilising the OMIM API via some wrapper functions I wrote in R. A wrapper, as explained in the Wikipedia article, is simply a subroutine that calls another subroutine. If you plan on using the OMIM API, first register for your very own API key. After you submit the form you will get an email with the key, along with this message:
The API key will be activated within TWO hours, trying to use it before then will result in a error. It is for your own use, we ask that you do not share it with others.
So if you haven't already registered, stop reading now, and submit the form.
The help page is very well written and explains how to use the API. Basically, you build a URL and send the request to the OMIM API server. The server parses your request and sends you back the results; the default format is in XML. Here's an example of a request:
http://api.omim.org/api/entry?mimNumber=100100&apiKey=NOT_A_REAL_KEY&include=text&include=geneMap
The link doesn't work because NOT_A_REAL_KEY is not a real API key. There are other ways to set the API key; including the key in the query is one way. In the above example, we want information on the OMIM ID 100100, including the text field sections and gene map/phenotype map data. What if we want to make a list of queries? This is where my wrapper function (written in R) comes in useful. R has an XML package that makes it easy to parse the results.
#install if necessary
install.packages("XML")
#load library
library(XML)
#function to set API key
#the my_key object becomes a global object
set_key <- function(key){
my_key <- paste('apiKey=', key, sep='')
}
#function to build the URL
#and to perform the query
get_omim <- function(omim_id = 100100, #default OMIM ID
text = FALSE, #Includes the text field sections with the entry.
existflags = FALSE, #Include the 'exists' flags with the entry (clinical synopsis, allelic variant, gene map & phenotype map).
allelicVariantList = FALSE, #Includes the allelic variant list with the entry.
clinicalSynopsis = FALSE, #Include the clinical synopsis with the entry.
seeAlso = FALSE, #Includes the 'see also' field with the entry.
referenceList = FALSE, #Include the reference list with the entry.
geneMap = FALSE, #Include the gene map/phenotype map data with the entry.
externalLinks = FALSE, #Include the external links with the entry.
contributors = FALSE, #Includes the 'contributors' field with the entry.
creationDate = FALSE, #Includes the 'creation date' field with the entry.
editHistory = FALSE, #Includes the 'edit history' field with the entry.
dates = FALSE, #Include the dates data with the entry.
all = FALSE #Include the above data with the entry.
){
#get all the arguments of the function call
a <- as.list(match.call())
my_mim <- paste('mimNumber=', omim_id, sep='')
my_link <- 'http://api.omim.org/api/entry?'
my_query <- paste(my_link, my_mim, my_key, sep = "&")
#loop through all the arguments
for (i in names(a)){
#skip the omid_id and blank argument
if(!i %in% '' && !i %in% 'omim_id'){
my_include <- paste('&', 'include=', i, sep='')
my_query <- paste(my_query, my_include, sep='')
}
}
xmlParse(my_query)
}
#function to parse the XML
#for the title of an OMIM ID
get_title <- function(xml){
xml_list <- xmlToList(xml)
return(xml_list$entryList$entry$titles$preferredTitle)
}
In the above code I wrote three functions, namely set_key(), get_omim(), and get_title(). The set_key() function simply sets the key. The get_omim() function builds the URL and performs the query. The get_title() function extracts the preferred title field from the XML result. Here's a demonstration of the functions.
set_key('NOT_A_REAL_KEY')
#I made the default OMIM ID 100100
#for the get_omim() function
get_omim()
<?xml version="1.0" encoding="UTF-8"?>
<omim version="1.0">
<entryList>
<entry>
<prefix>#</prefix>
<mimNumber>100100</mimNumber>
<status>live</status>
<titles>
<preferredTitle>ABDOMINAL MUSCLES, ABSENCE OF, WITH URINARY TRACT ABNORMALITY AND CRYPTORCHIDISM</preferredTitle>
<alternativeTitles>PRUNE BELLY SYNDROME;;
EAGLE-BARRETT SYNDROME; EGBRS</alternativeTitles>
</titles>
</entry>
</entryList>
</omim>
get_omim(geneMap=TRUE)
<?xml version="1.0" encoding="UTF-8"?>
<omim version="1.0">
<entryList>
<entry>
<prefix>#</prefix>
<mimNumber>100100</mimNumber>
<status>live</status>
<titles>
<preferredTitle>ABDOMINAL MUSCLES, ABSENCE OF, WITH URINARY TRACT ABNORMALITY AND CRYPTORCHIDISM</preferredTitle>
<alternativeTitles>PRUNE BELLY SYNDROME;;
EAGLE-BARRETT SYNDROME; EGBRS</alternativeTitles>
</titles>
<phenotypeMapList>
<phenotypeMap>
<mimNumber>118494</mimNumber>
<phenotype>?Eagle-Barrett syndrome</phenotype>
<phenotypeMimNumber>100100</phenotypeMimNumber>
<phenotypeMappingKey>3</phenotypeMappingKey>
<phenotypeInheritance>Autosomal recessive</phenotypeInheritance>
<sequenceID>1484</sequenceID>
<chromosome>1</chromosome>
<chromosomeSymbol>1</chromosomeSymbol>
<chromosomeSort>1484</chromosomeSort>
<chromosomeLocationStart>239549875</chromosomeLocationStart>
<chromosomeLocationEnd>240078749</chromosomeLocationEnd>
<transcript>uc001hyp.3</transcript>
<cytoLocation>1q41-q44</cytoLocation>
<computedCytoLocation>1q43</computedCytoLocation>
<geneSymbols>CHRM3, EGBRS</geneSymbols>
<geneInheritance/>
</phenotypeMap>
</phenotypeMapList>
</entry>
</entryList>
</omim>
get_omim(100200)
<?xml version="1.0" encoding="UTF-8"?>
<omim version="1.0">
<entryList>
<entry>
<mimNumber>100200</mimNumber>
<status>live</status>
<titles>
<preferredTitle>ABDUCENS PALSY</preferredTitle>
</titles>
</entry>
</entryList>
</omim>
get_title(get_omim(100200))
[1] "ABDUCENS PALSY"
Here's how to look up a list of 10 OMIM IDs
my_list <- list(100070,100100,100300,100600,100800,100820,101000,101200,101400,101600)
my_list_title <- lapply(lapply(my_list, get_omim), get_title)
df <- data.frame(id=unlist(my_list),
title=unlist(my_list_title))
df
1 100070 AORTIC ANEURYSM, FAMILIAL ABDOMINAL, 1; AAA1
2 100100 ABDOMINAL MUSCLES, ABSENCE OF, WITH URINARY TRACT ABNORMALITY AND CRYPTORCHIDISM
3 100300 ADAMS-OLIVER SYNDROME 1; AOS1
4 100600 ACANTHOSIS NIGRICANS
5 100800 ACHONDROPLASIA; ACH
6 100820 ACHOO SYNDROME
7 101000 NEUROFIBROMATOSIS, TYPE II; NF2
8 101200 APERT SYNDROME
9 101400 SAETHRE-CHOTZEN SYNDROME; SCS
10 101600 PFEIFFER SYNDROME
Limits
Here's the usage limit from the OMIM API help page:
The API will limit how many entries can be retrieved in a single request. Entries and clinical synopses are limited to 20 per request if any 'includes' are specified, otherwise there is no limit. Gene map entries are limited to 100 per request.
From this description, it seems there are no limits if I simply want to look up the preferred titles of OMIM IDs. Let's see!
my_table <- read.table("mim2gene.txt",header=F,sep="\t")
dim(my_table)
[1] 23999 5
head(my_table)
V1 V2 V3 V4 V5
1 100050 predominantly phenotypes - - -
2 100070 phenotype 100329167 - -
3 100100 phenotype - - -
4 100200 predominantly phenotypes - - -
5 100300 phenotype - - -
6 100500 moved/removed - - -
system.time(my_big_list_title <- lapply(lapply(my_table$V1, get_omim), get_title))
user system elapsed
231.361 6.824 10071.044
#almost 3 hours
10071/60/60
[1] 2.7975
df <- data.frame(id=my_table$V1,
title=unlist(my_big_list_title))
df[4:12,1:2]
id title
4 100200 ABDUCENS PALSY
5 100300 ADAMS-OLIVER SYNDROME 1; AOS1
6 100500 MOVED TO 200150
7 100600 ACANTHOSIS NIGRICANS
8 100640 ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1
9 100650 ALDEHYDE DEHYDROGENASE 2 FAMILY; ALDH2
10 100660 ALDEHYDE DEHYDROGENASE, FAMILY 3, SUBFAMILY A, MEMBER 1; ALDH3A1
11 100670 ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER B1; ALDH1B1
12 100675 ACETAMINOPHEN METABOLISM
#name the columns to prepare for merging
colnames(my_table) <- c('id',
'type',
'entrez_gene_id',
'hgnc_symbol',
'ensembl_gene_id')
#merge the original table with the OMIM titles
new_table <- merge(x=my_table, y=df, by='id')
dim(new_table)
[1] 23999 6
head(new_table)
id type entrez_gene_id hgnc_symbol ensembl_gene_id
1 100050 predominantly phenotypes - - -
2 100070 phenotype 100329167 - -
3 100100 phenotype - - -
4 100200 predominantly phenotypes - - -
5 100300 phenotype - - -
6 100500 moved/removed - - -
title
1 AARSKOG SYNDROME, AUTOSOMAL DOMINANT
2 AORTIC ANEURYSM, FAMILIAL ABDOMINAL, 1; AAA1
3 ABDOMINAL MUSCLES, ABSENCE OF, WITH URINARY TRACT ABNORMALITY AND CRYPTORCHIDISM
4 ABDUCENS PALSY
5 ADAMS-OLIVER SYNDROME 1; AOS1
6 MOVED TO 200150
tail(new_table)
id type entrez_gene_id hgnc_symbol ensembl_gene_id
23994 616257 gene 11328 FKBP9 ENSG00000122642
23995 616258 phenotype - - -
23996 616260 phenotype - - -
23997 616261 gene 54517 PUS7 ENSG00000091127
23998 616262 gene 9903 KLHL21 ENSG00000162413
23999 616263 phenotype - - -
title
23994 FK506-BINDING PROTEIN 9; FKBP9
23995 MECKEL SYNDROME 12; MKS12
23996 TENORIO SYNDROME; TNORS
23997 PSEUDOURIDYLATE SYNTHASE 7, PUTATIVE; PUS7
23998 KELCH-LIKE 21; KLHL21
23999 NEUROLOGIC, ENDOCRINE, AND PANCREATIC DISEASE, MULTISYSTEM, INFANTILE-ONSET; IMNEPD
write.table(new_table, file='mim2gene_title.txt', quote=F, sep="\t", row.names=F)

This work is licensed under a Creative Commons
Attribution 4.0 International License.