BLAST

From Dave's wiki
Revision as of 08:30, 23 September 2019 by Admin (talk | contribs) (Created page with "Download necessary files wget -N -c http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit wget http://hgdownload.soe.ucsc.edu/admin/exe/macOSX.x86_64/twoBitToFa...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Download necessary files

wget -N -c http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
wget http://hgdownload.soe.ucsc.edu/admin/exe/macOSX.x86_64/twoBitToFa
wget -c -O hg38.refGene.txt.gz http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/refGene.txt.gz
ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast-2.7.1+-x64-macosx.tar.gz

Create database

makeblastdb -dbtype nucl \
            -in hg38.fa \
            -input_type fasta \
            -title hg38 \
            -out hg38

Convert output to BED

#!/usr/bin/env perl

use strict;
use warnings;

my $usage = "Usage: $0 <refGene.txt.gz>\n";
my $infile = shift or die $usage;

if ($infile =~ /\.gz$/){
   open(IN, '-|', "gunzip -c $infile") || die "Could not open $infile: $!\n";
} else {
   open(IN, '<', $infile) || die "Could not open $infile: $!\n";
}

while(<IN>){
   chomp;
   next if /^#/;
   my ($bin, $name, $chr, $strand, $tx_start, $tx_end, $cds_start, $cds_end, $exon_count, $exon_starts, $exon_ends, $score, $name2, @rest) = split(/\t/);
   print join("\t", $chr, $tx_start, $tx_end, $name2, 0, $strand), "\n";
}
close(IN);

exit(0);