First download the relevant tables from here. Download tss_library_expr_summary.tsv.bz2, tss_summary.tsv.bz2 and rna_lib_summary.tsv.bz2 and bunzip2 the files.
According to information derived from rna_lib_summary.tsv there are 13,897 CAGE tags from the prostate gland (rna_lib_id = HBA).
Double check this:
cat tss_library_expr_summary.tsv | awk '$3=="HBA" {print}' | cut -f5 | perl -e 'while(<>){ chomp; $i+=$_; } print "$i\n";'
13897
cat tss_library_expr_summary.tsv | awk '$3=="HBA" {print}' | cut -f2 | sort -u | wc -l
7029
Genomic coordinates of TSSs can be obtained from tss_summary.tsv. Here's a script which generates bedgraph files of all the CAGE tags for each tissue. Bugs may be present in the code; use at your own risk. Note I commented out strictures because I wanted to use scalars as filehandles (which is ambiguous code).
#!/usr/bin/perl
#use strict;
use warnings;
my $mapping = 'tss_summary.tsv';
my $tss = 'tss_library_expr_summary.tsv';
my $library = 'rna_lib_summary.tsv';
my %library = ();
open(IN,'<',$library) || die "Could not open $library: $!\n";
while(){
chomp;
my ($lib,$junk,$junk2,$junk3,$desc,@junk) = split(/\t/);
next unless $lib =~ /^H/;
$library{$lib} = $desc;
}
close(IN);
my %mapping = ();
open(IN,'<',$mapping) || die "Could not open $mapping: $!\n";
while(){
chomp;
my ($tss_id,$junk,$junk2,$chr,$junk3,$strand,$start,$end,@junk) = split(/\t/);
if ($strand eq 'F'){
$strand = '+';
} else {
$strand = '-';
}
$mapping{$tss_id}->{'CHR'} = $chr;
$mapping{$tss_id}->{'STRAND'} = $strand;
$mapping{$tss_id}->{'START'} = $start;
$mapping{$tss_id}->{'END'} = $end;
}
close(IN);
foreach my $lib (keys %library){
my $library = $lib;
open($lib,'>',"${lib}.bed") || die "Could not open $lib.bed for writing: $!\n";
print $lib <track type=bedGraph name="$library{$library}" description="FANTOM3 CAGE $library{$library}" visibility=full color=0,0,255 altColor=255,0,0 priority=20
EOF
}
open(IN,'<',$tss) || die "Could not open $tss: $!\n";
while(){
chomp;
my ($tss_rna_lib_id,$tss_id,$rna_lib_id,$priming,$tss_tags,$tpm) = split(/\t/);
next unless $rna_lib_id =~ /^H/;
if (exists $mapping{$tss_id}){
my $chr = $mapping{$tss_id}->{'CHR'};
my $strand = $mapping{$tss_id}->{'STRAND'};
my $start = $mapping{$tss_id}->{'START'};
my $end = $mapping{$tss_id}->{'END'};
if ($strand eq '-'){
$tss_tags = '-' . $tss_tags;
}
print $rna_lib_id join("\t",$chr,$start,$end,$tss_tags),"\n";
} else {
warn "Missing information for $tss_id\n";
}
}
close(IN);
exit(0);
This will generate 42 bedgraph files.
Double check:
cat HBA.bed | cut -f4 | sed 's/-//' | perl -e 'while(<>){ chomp; $i+=$_; } print "$i\n";'
13897
Loaded two of the bedgraphs into the UCSC Genome Browser.

This work is licensed under a Creative Commons
Attribution 4.0 International License.
