#!/usr/bin/perl -w # kkisnp.pl - KKI SNP Analysis Tool # # Copyright (c) 2004 Jason Ting # All Rights Reserved. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER "AS IS" AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. use strict; use CGI; use GD::Graph::bars; use Term::ANSIColor; use Fcntl qw( :DEFAULT :flock ); use constant UPLOAD_DIR => "/tmp/SNPscan"; use constant BUFFER_SIZE => 16_384; use constant MAX_FILE_SIZE => 1_048_576 * 40; # Limit each upload to 40 MB use constant MAX_DIR_SIZE => 1_048_576 * 800; # Limit total uploads to 800 MB use constant MAX_OPEN_TRIES => 50; $CGI::DISABLE_UPLOADS = 0; $CGI::POST_MAX = MAX_FILE_SIZE; my %HchromGroup = ( 1 => ['1'], 2 => ['2'], 3 => ['3'], 4 => ['4'], 5 => ['5'], 6 => ['6'], 7 => ['7'], 8 => ['8'], 9 => ['9'], 10 => ['10'], 11 => ['11'], 12 => ['12'], 13 => ['13'], 14 => ['14'], 15 => ['15'], 16 => ['16'], 17 => ['17'], 18 => ['18'], 19 => ['19'], 20 => ['20'], 21 => ['21'], 22 => ['22'], X => ['X'], XY => ['XY'], Y => ['Y'], M => ['M'], A => ['1', '2', '3'], B => ['4', '5'], C => ['6', '7', '8', '9', '10', '11', '12'], D => ['13', '14', '15'], E => ['16', '17', '18'], F => ['19', '20'], G => ['21', '22'], '*' => ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22'] ); my @data; my $q = new CGI; $q->cgi_error and error( $q, "Error transferring file: " . $q->cgi_error ); my $minsize = $q->param( "minsize" ) || error( $q, "No minimum size entered." ); my $maxsize = $q->param( "maxsize" ) || error( $q, "No maximum size entered." ); my $file = $q->param( "file" ) || error( $q, "No file received." ); my $filename = $file; # $q->param( "filename" ) || error( $q, "No filename entered." ); $filename =~ s/.*[\/\\](.*)/$1/; my $action = $q->param( "action" ) || error( $q, "No action selected." ); my $track = $q->param( "track" ) || error( $q, "No track title entered." ); my $source = $q->param( "source" ) || error( $q, "No source entered." ); my $showSize = $q->param( "minSHOWsize"); my $chrm = $q->param( "chrm" ); my $sbase = $q->param( "sbase" ); my $fh = $q->upload( "file" ); my $buffer = ""; if ( dir_size( UPLOAD_DIR ) + $ENV{CONTENT_LENGTH} > MAX_DIR_SIZE ) { error( $q, "Upload directory is full." ); } # Allow letters, digits, periods, underscores, dashes # Convert anything else to an underscore $filename =~ s/[^\w.-]/_/g; if ( $filename =~ /^(\w[\w.-]*)/ ) { $filename = $1; } else { error( $q, "Invalid file name; files must start with a letter or number." ); } my $before = 0; # location, in bp, of the SNP just before a found region my $begin = 0; # index, of the stating SNP of a regoin found my $end = 0; # index, of the ending SNP of a regoin found my $after = 0; # location, in bp, of the SNP just after a found region my $chrom = 0; my $size = 0; # size of consecutive homozygosity my $id = 0; my $LOHsum = 0; my @SNPs; my $sizeNC = 0; # size of consecutive NoCall my $beforeNC = 0; # there are the same as above, but used for NoCall regions my $beginNC = 0; my $endNC = 0; my $afterNC = 0; my @SNPsNC; my @dbSNP; my @chromosome; my @location; my @call; my @CN; my @p; my @meta_p; my @LOH; my @lines; my %HoAresults; my %HoAnocalls; my %snpCN; # used for generating copy number's WIG output my %snpP; # used for generating p-value's WIG output my $line = <$fh>; # skip header line while ( $line = <$fh> ) { push @lines, $line; # $line =~ /^(\S*)\t\s?((\d*)|X|Y)\t(\d*)\t(\w*)\t(\S*)\t(\S*)\t(\S*)\t(\S*)/; next unless ( $line =~ /^(\S+)\t\s?((\d+)|X|Y)\t(\d+)\t(\w*)\t(\S*)\t(\S*)\t(\S*)\t(\S*)/ ); $snpCN{$2}{$4} = $6; # save info for WIG output $snpP{$2}{$4} = $7; # save info for WIG output my $inGroup = 0; foreach (@{$HchromGroup{$chrm}}) { if ($2 eq $_) { $inGroup = 1; last; } } unless ($chrm && !$inGroup ) { push @dbSNP, $1 ? $1 : '???????'; push @chromosome, $2 ? $2 : '???????'; push @location, $4 ? $4 : '???????'; push @call, $5 ? $5 : '???????'; push @CN, $6 ? $6 : '0'; push @p, $7 ? $7 : '0'; push @meta_p, $8 ? $8 : '0'; push @LOH, $9 ? $9 : '0'; }; } push @chromosome, '99'; # insert an end flag my $i; for ( $i = 0; $i < $#location; $i++) { if ($chromosome[$i] ne $chrom) { # the beginning of a new chromosome if ($size) { # terminate the finding at the end of previous chrom $end = $i - 1; $after = $location[$end]; # set it to the last know basepair if ( $action eq 'CHSG' || $action eq 'CHST' || $action eq 'LOHG' || $action eq 'CHLOHG' || $action eq 'CHLOHT' ) { # save all results for these cases, since the min. & max. values are not SNP counts my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } elsif ( ($size >= $minsize) && ($size <= $maxsize) ) { # the min. & max. values specify the range of valid SNP counts, save only if it's within this range my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } $size = 0; @SNPs = (); $LOHsum = 0; } $chrom = $chromosome[$i]; $before = 0; $begin = $i; $size = 0; if (($sizeNC > 1) && ($sizeNC >= $minsize) && ($sizeNC <= $maxsize) ) { # save the CNC at the end of previous chromosome $endNC = $i -1; $afterNC = $location[$endNC]; my @SNPlist = @SNPsNC; push @{$HoAnocalls{$sizeNC}}, [ $chrom, $beforeNC, $location[$beginNC], $location[$endNC], $afterNC, \@SNPlist ]; } $sizeNC = 0; @SNPsNC = (); } if ($call[$i] eq 'AA' || $call[$i] eq 'BB' || ( (($call[$i] eq 'NoCall') || ($call[$i] eq 'NC')) && # treat a single NoCall falls between ($i) && # AA or BB as one of them ($i < $#call) && (($call[$i-1] eq 'AA') || ($call[$i-1] eq 'BB')) && (($call[$i+1] eq 'AA') || ($call[$i+1] eq 'BB')) ) ) { if ($size == 0) { # the begining of a new match $before = $before ? $location[$i-1] : '1'; # set it to basepair #1 $begin = $i; $LOHsum = 0; } $size++; push @SNPs, $dbSNP[$i]; $LOHsum += $LOH[$i]; } elsif ($size) { # found the end of a consecutive region or $end = $i - 1; # found a AB or a NoCall which is not surranded by AA or BB $after = $location[$i]; if ( $action eq 'CHSG' || $action eq 'CHST' || $action eq 'LOHG' || $action eq 'CHLOHG' || $action eq 'CHLOHT' ) { # save all results for these cases, since the min. & max. values are not SNP counts my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } elsif ( ($size >= $minsize) && ($size <= $maxsize) ) { # the min. & max. values specify the range of valid SNP counts, save only if it's within this range my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } $size = 0; @SNPs = (); $LOHsum = 0; } if ($call[$i] eq 'NoCall') { if ($i) { if ($call[$i-1] ne 'NoCall') { $beginNC = $i; # the start of a new NoCall region ($chromosome[$i] eq $chromosome[$i-1]) ? $beforeNC = $location[$i-1] : $beforeNC = $location[$i]; } } else { # this NoCall is the first SNP of the input file $beginNC = 0; $beforeNC = $location[$i]; } $sizeNC++; push @SNPsNC, $dbSNP[$i]; } else { if (($sizeNC > 1) && ($sizeNC >= $minsize) && ($sizeNC <= $maxsize) ) { # just reached the end of consecutive NoCalls $afterNC = $location[$i+1]; my @SNPlist = @SNPsNC; push @{$HoAnocalls{$sizeNC}}, [ $chrom, $beforeNC, $location[$beginNC], $location[$i], $afterNC, \@SNPlist ]; } $sizeNC = 0; @SNPsNC = (); } } # check for the case of one found at the end of the last chromosome if ($size) { $end = $i; $after = $location[$i]; if ( $action eq 'CHSG' || $action eq 'CHST' || $action eq 'LOHG' || $action eq 'CHLOHG' || $action eq 'CHLOHT' ) { # save all results for these cases, since the min. & max. values are not SNP counts my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } elsif ( ($size >= $minsize) && ($size <= $maxsize) ) { # the min. & max. values specify the range of valid SNP counts, save only if it's within this range my @SNPlist = @SNPs; my $LOHavg = $LOHsum / $size; push @{$HoAresults{$size}}, [ $chrom, $before, $location[$begin], $location[$end], $after, $LOHavg, \@SNPlist ]; } $size = 0; @SNPs = (); $LOHsum = 0; if (($sizeNC > 1) && ($sizeNC >= $minsize) && ($sizeNC <= $maxsize) ) { # save the last consecutive NoCall at the end of input file $afterNC = $location[$i]; my @SNPlist = @SNPsNC; push @{$HoAnocalls{$sizeNC}}, [ $chrom, $beforeNC, $location[$beginNC], $location[$i], $afterNC, \@SNPlist ]; } $sizeNC = 0; @SNPsNC = (); } if ($action eq "table") { print_GFF(); } elsif ($action eq "CNCG") { graph_CNC(); } elsif ($action eq "CNCT") { table_CNC(); } elsif ($action eq "CHG") { graph_CH(); } elsif ($action eq "CHT") { table_CH(); } elsif ($action eq "CHSG") { graph_CHS(); } elsif ($action eq "CHST") { table_CHS(); } elsif ($action eq "CHLOHG") { graph_CHLOH(); } elsif ($action eq "CHLOHT") { table_CHLOH(); } elsif ($action eq "LOHG") { graph_LOH(); } elsif ($action eq "CNG") { graph_CN(); } elsif ($action eq "CNT") { table_CN(); } exit 1; sub print_GFF { # send the top part of the new HTML code to the browser print $q->header( "text/plain" ); print <[0]}++; my $size_100K = int( ($arrayref->[3] - $arrayref->[2] + 1) / 100000 ); my $size_M = $size_100K / 10; my $score = ($arrayref->[5] > 10) ? 1000 : int ($arrayref->[5] * 100) ; print "chr", $arrayref->[0], "\t$source\tLOH\t$arrayref->[2]\t$arrayref->[3]\t$score\t\.\t\.\tchr$arrayref->[0]_$counts{$arrayref->[0]}_$size_M\n"; } } # generate WIG for the CN track print "browser full altGraph\n", 'track type=wiggle_0 name="', $track, '-CN " description="KKI - Copy Number" ', 'visibility=full autoScale=off viewLimits=0.0:5.0 color=0,200,0 useScore=1 ', 'yLineMark=2.47 yLineOnOff=on priority=10 gridDefault=on maxHeightPixels=30:30:11 ', # 'url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=snp&cmd=search&term=$$"', "\n"; foreach my $chr (sort keys %snpCN) { print 'variableStep chrom=chr', $chr, " span=1\n"; foreach my $snp_pos (sort {$a <=> $b} keys %{$snpCN{$chr}}) { print "$snp_pos\t$snpCN{$chr}{$snp_pos}\n"; } } # generate WIG for the p-value track print "browser full altGraph\n", 'track type=wiggle_0 name="', $track, '-p_value" description="KKI - p-value" ', 'visibility=full autoScale=off viewLimits=-6.0:6.0 color=0,0,200 useScore=1 ', 'yLineMark=0.0 yLineOnOff=on priority=20 gridDefault=on maxHeightPixels=30:30:11 ', # 'url="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=snp&cmd=search&term=$$"', "\n"; foreach my $chr (sort keys %snpP) { print 'variableStep chrom=chr', $chr, " span=1\n"; foreach my $snp_pos (sort {$a <=> $b} keys %{$snpP{$chr}}) { print "$snp_pos\t$snpP{$chr}{$snp_pos}\n"; } } } #-------- end of print_text ------------ sub graph_CNC { my @counts; foreach my $key (sort {$a <=> $b} keys %HoAnocalls) { push @counts, scalar(@{$HoAnocalls{$key}}); } push @data, [ sort {$a <=> $b} keys %HoAnocalls ]; push @data, \@counts; my $max_count = $counts[0]; unless ($max_count) { print $q->header( "text/html" ), $q->start_html( "Search Result" ), $q->h1( "Search Result - 0" ), $q->p( "The input file does not contain any consecutive NoCalls." ), $q->end_html; exit; } my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 400 + ( ($#counts > 10 ) ? ($#counts - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "Consecutive NoCalls - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "Size in SNP counts", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => $#counts < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub table_CNC { # send the top part of the new HTML code to the browser print $q->header( "text/html" ), $q->start_html( "Consecutive NoCall Report" ); print "\n", "

Consecutive NoCall Report : $filename

\n"; print "\n", " \n", " \n", " \n", "
Search Criterion    Chromosome:   ", $chrm ? $chrm : 'All', "
Minimum Value  :   ", $minsize, "
Maximum Value  :   ", $maxsize, "
\n"; print <Search Summary END_top foreach my $key (reverse sort {$a <=> $b} keys %HoAnocalls) { print "\n"; } print "
Consecutive NoCall Size Occurrences
" . $key . "" . scalar(@{$HoAnocalls{$key}}) . "
\n" . "

\n" . "

Detailed Report

\n" ; foreach my $key (reverse sort {$a <=> $b} keys %HoAnocalls) { if ( $showSize && ($showSize> $key) ) { next; } print "

Consecutive NoCall SNPs: $key

\n"; while ( my $arrayref = shift @{$HoAnocalls{$key}} ) { print "

Size in bp (min.~max.) :   ", commify($arrayref->[3]-$arrayref->[2]+1), " ~ ", commify($arrayref->[4]-$arrayref->[1]-($arrayref->[1]==1?0:1)), "

\n"; my $loh = sprintf("%.2f", $arrayref->[5]); print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . # " \n" . " \n" . " \n" . "
Chromosome : " . $arrayref->[0] . "before : " . $arrayref->[1] . "start : " . $arrayref->[2] . "end : " . $arrayref->[3] . "after : " . $arrayref->[4] . "LOH : " . $loh . "
" ; # "
" ; my $i = 0; foreach my $rs (@{$arrayref->[5]}) { if (length($rs)) { print "  $rs"; if (++$i == 10) { print "  
\n"; $i = 0; } } } print "
\n" . "

 \n"; } } print <

Back to KKISNP Home Page END_bottom } #-------- end of table_CNC ------------ sub graph_CH { my @counts; foreach my $key (sort {$a <=> $b} keys %HoAresults) { push @counts, scalar(@{$HoAresults{$key}}); } push @data, [ sort {$a <=> $b} keys %HoAresults ]; push @data, \@counts; my $max_count = $counts[0]; my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 460 + ( ($#counts > 10 ) ? ($#counts - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "Consecutive Homo/Hemizygous SNPs - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "Number of Consecutive Homozygous SNPs", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => $#counts < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub table_CH { # send the top part of the new HTML code to the browser print $q->header( "text/html" ), $q->start_html( "Consecutive Homozygosity Report" ); print "\n", "

Consecutive Homozygosity/Hemizygosity Report : $filename

\n"; print "\n", " \n", " \n", " \n", "
Search Criterion    Chromosome:   ", $chrm ? $chrm : 'All', "
Minimum Value  :   ", $minsize, "
Maximum Value  :   ", $maxsize, "
\n"; print <Search Summary END_top foreach my $key (reverse sort {$a <=> $b} keys %HoAresults) { print "\n"; } print "
Consecutive Homozygous or Hemizygous SNPs Occurrences
" . $key . "" . scalar(@{$HoAresults{$key}}) . "
\n" . "

\n" . "

Detailed Report

\n" ; foreach my $key (reverse sort {$a <=> $b} keys %HoAresults) { if ( $showSize && ($showSize> $key) ) { next; } print "

Consecutive Homozygous or Hemizygous SNPs : $key

\n"; while ( my $arrayref = shift @{$HoAresults{$key}} ) { print "

Size in bp (min.~max.) :   ", commify($arrayref->[3]-$arrayref->[2]+1), " ~ ", commify($arrayref->[4]-$arrayref->[1]-($arrayref->[1]==1?0:1)), "

\n"; my $loh = sprintf("%.2f", $arrayref->[5]); print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . "
Chromosome : " . $arrayref->[0] . "before : " . $arrayref->[1] . "start : " . $arrayref->[2] . "end : " . $arrayref->[3] . "after : " . $arrayref->[4] . "LOH : " . $loh . "
" ; my $i = 0; foreach my $rs (@{$arrayref->[6]}) { if (length($rs)) { print "  $rs"; if (++$i == 10) { print "  
\n"; $i = 0; } } } print "
\n" . "

 \n"; } } print <

Back to KKISNP Home Page END_bottom } #-------- end of table_CH ------------ sub graph_CHS { my %counts; foreach my $key (keys %HoAresults) { while ( my $arrayref = shift @{$HoAresults{$key}} ) { my $size_100K = int( ($arrayref->[3] - $arrayref->[2] + 1) / 100000 ); my $size_M = $size_100K / 10; my $Rsize = $arrayref->[3] - $arrayref->[2] + 1; if ( ($size_M < $minsize) || ($size_M > $maxsize) ) { next; } if ($size_M < 0.25 ) { $counts{'0.0'}++; } elsif ($size_M < 0.75) { $counts{'0.5'}++; } elsif ($size_M < 1.25) { $counts{'1.0'}++; } elsif ($size_M < 1.75) { $counts{'1.5'}++; } elsif ($size_M < 2.25) { $counts{'2.0'}++; } elsif ($size_M < 2.75) { $counts{'2.5'}++; } elsif ($size_M < 3.25) { $counts{'3.0'}++; } elsif ($size_M < 3.75) { $counts{'3.5'}++; } elsif ($size_M < 4.25) { $counts{'4.0'}++; } elsif ($size_M < 4.75) { $counts{'4.5'}++; } elsif ($size_M < 5.25) { $counts{'5.0'}++; } elsif ($size_M < 5.75) { $counts{'5.5'}++; } elsif ($size_M < 6.25) { $counts{'6.0'}++; } elsif ($size_M < 6.75) { $counts{'6.5'}++; } elsif ($size_M < 7.25) { $counts{'7.0'}++; } elsif ($size_M < 7.75) { $counts{'7.5'}++; } elsif ($size_M < 8.25) { $counts{'8.0'}++; } elsif ($size_M < 8.75) { $counts{'8.5'}++; } elsif ($size_M < 9.25) { $counts{'9.0'}++; } elsif ($size_M < 9.75) { $counts{'9.5'}++; } elsif ($size_M < 10.25) { $counts{'10.0'}++; } else { $counts{'10.0+'}++; } } } my @sums; foreach my $key (sort {$a <=> $b} keys %counts) { push @sums, $counts{$key}; #print STDERR ">>$counts{$key}<<\n";; } push @data, [ sort {$a <=> $b} keys %counts ]; push @data, \@sums; my $max_count = 0; for (my $i = 0; $i <= $#sums; $i++) { $max_count = $sums[$i] if ($max_count < $sums[$i]); } my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 540 + ( ($#sums > 10 ) ? ($#sums - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "Consecutive Homo/Hemizygous Regions by Size - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "Size in Mbp", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => scalar(keys(%counts)) < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub table_CHS { # reverse sort the whole %HoAresults based on bp sizes, keep results in a hash of array my %HoAsorted; foreach my $key (keys %HoAresults) { while (my $arrayref = shift @{$HoAresults{$key}} ) { my $size = $arrayref->[3] - $arrayref->[2] + 1; push @{$HoAsorted{$size}}, $arrayref; } } my $minMbp = (scalar $minsize) * 1000000; my $maxMbp = (scalar $maxsize) * 1000000; # send the top part of the new HTML code to the browser print $q->header( "text/html" ), $q->start_html( "Size of Consecutive Homozygosity Regions" ); print "\n", "

Size of Consecutive Homozygosity Regions :   $filename

\n"; print "\n", " \n", " \n", " \n", "
Search Criterion    Chromosome:   ", $chrm ? $chrm : 'All', "
Minimum Value  :   ", $minsize, " Mbp
Maximum Value  :   ", $maxsize, " Mbp
\n"; foreach my $key (reverse sort {$a <=> $b} keys %HoAsorted) { if ( $showSize && ($showSize> $key) ) { next; } while ( my $arrayref = shift @{$HoAsorted{$key}} ) { my $Rsize = $arrayref->[3] - $arrayref->[2] + 1; if ( ($Rsize < $minMbp) || ($Rsize > $maxMbp) ) { next; } print "

Consecutive Homozygosity Size (min.~max.) :   ", commify($key), " ~ ", commify($arrayref->[4]-$arrayref->[1]-($arrayref->[1]==1?0:1)), "
", "SNP Count :   ", scalar @{$arrayref->[6]}, "

\n"; my $loh = sprintf("%.2f", $arrayref->[5]); print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . "
Chromosome : " . $arrayref->[0] . "before : " . $arrayref->[1] . "start : " . $arrayref->[2] . "end : " . $arrayref->[3] . "after : " . $arrayref->[4] . "LOH : " . $loh . "
" ; my $i = 0; foreach my $rs (@{$arrayref->[6]}) { if (length($rs)) { print "  $rs"; if (++$i == 10) { print "  
\n"; $i = 0; } } } print "
\n" . "

 \n"; } } print <

Back to KKISNP Home Page END_bottom } #-------- end of table_CHS ------------ sub graph_CHLOH { my %counts; foreach my $key (keys %HoAresults) { while ( my $arrayref = shift @{$HoAresults{$key}} ) { my $loh = $arrayref->[5]; if ( ($loh < $minsize) || ($loh > $maxsize) ) { next; } if ($loh < 0.25 ) { $counts{'0.0'}++; } elsif ($loh < 0.75) { $counts{'0.5'}++; } elsif ($loh < 1.25) { $counts{'1.0'}++; } elsif ($loh < 1.75) { $counts{'1.5'}++; } elsif ($loh < 2.25) { $counts{'2.0'}++; } elsif ($loh < 2.75) { $counts{'2.5'}++; } elsif ($loh < 3.25) { $counts{'3.0'}++; } elsif ($loh < 3.75) { $counts{'3.5'}++; } elsif ($loh < 4.25) { $counts{'4.0'}++; } elsif ($loh < 4.75) { $counts{'4.5'}++; } elsif ($loh < 5.25) { $counts{'5.0'}++; } elsif ($loh < 5.75) { $counts{'5.5'}++; } elsif ($loh < 6.25) { $counts{'6.0'}++; } elsif ($loh < 6.75) { $counts{'6.5'}++; } elsif ($loh < 7.25) { $counts{'7.0'}++; } elsif ($loh < 7.75) { $counts{'7.5'}++; } elsif ($loh < 8.25) { $counts{'8.0'}++; } elsif ($loh < 8.75) { $counts{'8.5'}++; } elsif ($loh < 9.25) { $counts{'9.0'}++; } elsif ($loh < 9.75) { $counts{'9.5'}++; } elsif ($loh < 10.25) { $counts{'10.0'}++; } else { $counts{'10.0+'}++; } } } my @sums; foreach my $key (sort {$a <=> $b} keys %counts) { push @sums, $counts{$key}; #print STDERR ">>$counts{$key}<<\n";; } push @data, [ sort {$a <=> $b} keys %counts ]; push @data, \@sums; my $max_count = 0; for (my $i = 0; $i <= $#sums; $i++) { $max_count = $sums[$i] if ($max_count < $sums[$i]); } my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 540 + ( ($#sums > 10 ) ? ($#sums - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "Consecutive Homo/Hemizygous Regions by LOH - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "LOH Value", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => scalar(keys(%counts)) < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub table_CHLOH { # reverse sort the whole %HoAresults based on CH Regions' LOH values, keep results in a hash of array my %HoAsorted; foreach my $key (keys %HoAresults) { while (my $arrayref = shift @{$HoAresults{$key}} ) { push @{$HoAsorted{$arrayref->[5]}}, $arrayref; } } # send the top part of the new HTML code to the browser print $q->header( "text/html" ), $q->start_html( "LOH of Consecutive Homozygosity Regions" ); print "\n", "

LOH of Consecutive Homozygosity/Hemogyzosity Regions :   $filename

\n"; print "\n", " \n", " \n", " \n", "
Search Criterion    Chromosome:   ", $chrm ? $chrm : 'All', "
Minimum Value  :   ", $minsize, "
Maximum Value  :   ", $maxsize, "
\n"; foreach my $key (reverse sort {$a <=> $b} keys %HoAsorted) { if ( $showSize && ($showSize> $key) ) { next; } while ( my $arrayref = shift @{$HoAsorted{$key}} ) { my $loh = sprintf("%.2f", $arrayref->[5]); if ( ($loh < $minsize) || ($loh > $maxsize) ) { next; } print "

Consecutive Homozygosity Size (min.~max.) :   ", commify($arrayref->[3]-$arrayref->[2]+1), " ~ ", commify($arrayref->[4]-$arrayref->[1]-($arrayref->[1]==1?0:1)), "
", "SNP count :   ", scalar @{$arrayref->[6]}, "

\n"; print "\n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . " \n" . "
Chromosome : " . $arrayref->[0] . "before : " . $arrayref->[1] . "start : " . $arrayref->[2] . "end : " . $arrayref->[3] . "after : " . $arrayref->[4] . "LOH : " . $loh . "
" ; my $i = 0; foreach my $rs (@{$arrayref->[6]}) { if (length($rs)) { print "  $rs"; if (++$i == 10) { print "  
\n"; $i = 0; } } } print "
\n" . "

 \n"; } } print <

Back to KKISNP Home Page END_bottom } #-------- end of table_CHLOH ------------ sub graph_LOH { my %counts; foreach my $cn (@LOH) { if ($cn < 0.25 ) { $counts{'0.0'}++; } elsif ($cn < 0.75) { $counts{'0.5'}++; } elsif ($cn < 1.25) { $counts{'1.0'}++; } elsif ($cn < 1.55) { $counts{'1.5'}++; } elsif ($cn < 2.25) { $counts{'2.0'}++; } elsif ($cn < 2.75) { $counts{'2.5'}++; } elsif ($cn < 3.25) { $counts{'3.0'}++; } elsif ($cn < 3.75) { $counts{'3.5'}++; } elsif ($cn < 4.25) { $counts{'4.0'}++; } elsif ($cn < 4.75) { $counts{'4.5'}++; } elsif ($cn < 5.25) { $counts{'5.0'}++; } elsif ($cn < 5.75) { $counts{'5.5'}++; } elsif ($cn < 6.25) { $counts{'6.0'}++; } elsif ($cn < 6.75) { $counts{'6.5'}++; } elsif ($cn < 7.25) { $counts{'7.0'}++; } elsif ($cn < 7.75) { $counts{'7.5'}++; } elsif ($cn < 8.25) { $counts{'8.0'}++; } elsif ($cn < 8.75) { $counts{'8.5'}++; } elsif ($cn < 9.25) { $counts{'9.0'}++; } elsif ($cn < 9.75) { $counts{'9.5'}++; } elsif ($cn < 10.25) { $counts{'10.0'}++; } else { $counts{'10+'}++; } } my @sums; foreach my $key (sort {$a <=> $b} keys %counts) { push @sums, $counts{$key}; } push @data, [ sort {$a <=> $b} keys %counts ]; push @data, \@sums; my $max_count = 0; for (my $i = 0; $i <= $#sums; $i++) { $max_count = $sums[$i] if ($max_count < $sums[$i]); } my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 400 + ( ($#sums > 10 ) ? ($#sums - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "LOH Value of Individual SNPs - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "LOH Value", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => $#sums < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub graph_CN { my %counts; foreach my $cn (@CN) { if ($cn < 0.25) { $counts{'0.0'}++; } elsif ($cn < 0.75) { $counts{'0.5'}++; } elsif ($cn < 1.25) { $counts{'1.0'}++; } elsif ($cn < 1.75) { $counts{'1.5'}++; } elsif ($cn < 2.25) { $counts{'2.0'}++; } elsif ($cn < 2.75) { $counts{'2.5'}++; } elsif ($cn < 3.25) { $counts{'3.0'}++; } elsif ($cn < 3.75) { $counts{'3.5'}++; } elsif ($cn < 4.25) { $counts{'4.0'}++; } elsif ($cn < 4.75) { $counts{'4.5'}++; } elsif ($cn < 5.25) { $counts{'5.0'}++; } elsif ($cn < 5.75) { $counts{'5.5'}++; } elsif ($cn < 6.25) { $counts{'6.0'}++; } elsif ($cn < 6.75) { $counts{'6.5'}++; } elsif ($cn < 7.25) { $counts{'7.0'}++; } elsif ($cn < 7.75) { $counts{'7.5'}++; } elsif ($cn < 8.25) { $counts{'8.0'}++; } elsif ($cn < 8.75) { $counts{'8.5'}++; } elsif ($cn < 9.25) { $counts{'9.0'}++; } elsif ($cn < 9.75) { $counts{'9.5'}++; } elsif ($cn < 10.25) { $counts{'10.0'}++; } else { $counts{'10+'}++; } } my @sums; foreach my $key (sort {$a <=> $b} keys %counts) { push @sums, $counts{$key}; } push @data, [ sort {$a <=> $b} keys %counts ]; push @data, \@sums; my $max_count = 0; for (my $i = 0; $i <= $#sums; $i++) { $max_count = $sums[$i] if ($max_count < $sums[$i]); } my $digits = length scalar $max_count; my $max_y = 10 ** $digits; my $min_y = $max_y / 10; my $good_y = (int ($max_count / $min_y) + 1) * $min_y; $good_y = $good_y < 10 ? 10 : $good_y; my $good_x = 400 + ( ($#sums > 10 ) ? ($#sums - 10) * 20 : 0 ); my $graph = new GD::Graph::bars( $good_x, 600 ); my $title = "SNPs Copy Number - $filename" . ($chrm ? " : Chrm$chrm" : ''); $graph->set( title => $title, x_label => "Copy Number", y_label => "Occurrences", long_ticks => 1, y_max_value => $good_y, y_min_value => 0, y_tick_number => 10, y_label_skip => 2, bar_spacing => $#sums < 7 ? 30 : 5, types => [ "bars" ], show_values => 1, ); my $gd_image = $graph->plot( \@data ); print $q->header( -type => "image/png", -expires => "now" ); binmode STDOUT; print $gd_image->png; } sub table_CN { my %counts; my %CNbins; # bins used to hold SNP_IDs belongs to each range of Copy Numbers. # ie, 0.25 - <0.75 for the bin with a lable of '0.5' foreach my $cn (@CN) { my $SNP_ID = shift @dbSNP; if ($cn < 0.25) { $counts{'0.0'}++; push @{$CNbins{'0.0'}}, $SNP_ID; } elsif ($cn < 0.75) { $counts{'0.5'}++; push @{$CNbins{'0.5'}}, $SNP_ID; } elsif ($cn < 1.25) { $counts{'1.0'}++; push @{$CNbins{'1.0'}}, $SNP_ID; } elsif ($cn < 1.75) { $counts{'1.5'}++; push @{$CNbins{'1.5'}}, $SNP_ID; } elsif ($cn < 2.25) { $counts{'2.0'}++; push @{$CNbins{'2.0'}}, $SNP_ID; } elsif ($cn < 2.75) { $counts{'2.5'}++; push @{$CNbins{'2.5'}}, $SNP_ID; } elsif ($cn < 3.25) { $counts{'3.0'}++; push @{$CNbins{'3.0'}}, $SNP_ID; } elsif ($cn < 3.75) { $counts{'3.5'}++; push @{$CNbins{'3.5'}}, $SNP_ID; } elsif ($cn < 4.25) { $counts{'4.0'}++; push @{$CNbins{'4.0'}}, $SNP_ID; } elsif ($cn < 4.75) { $counts{'4.5'}++; push @{$CNbins{'4.5'}}, $SNP_ID; } elsif ($cn < 5.25) { $counts{'5.0'}++; push @{$CNbins{'5.0'}}, $SNP_ID; } elsif ($cn < 5.75) { $counts{'5.5'}++; push @{$CNbins{'5.5'}}, $SNP_ID; } elsif ($cn < 6.25) { $counts{'6.0'}++; push @{$CNbins{'6.0'}}, $SNP_ID; } elsif ($cn < 6.75) { $counts{'6.5'}++; push @{$CNbins{'6.5'}}, $SNP_ID; } elsif ($cn < 7.25) { $counts{'7.0'}++; push @{$CNbins{'7.0'}}, $SNP_ID; } elsif ($cn < 7.75) { $counts{'7.5'}++; push @{$CNbins{'7.5'}}, $SNP_ID; } elsif ($cn < 8.25) { $counts{'8.0'}++; push @{$CNbins{'8.0'}}, $SNP_ID; } elsif ($cn < 8.75) { $counts{'8.5'}++; push @{$CNbins{'8.5'}}, $SNP_ID; } elsif ($cn < 9.25) { $counts{'9.0'}++; push @{$CNbins{'9.0'}}, $SNP_ID; } elsif ($cn < 9.75) { $counts{'9.5'}++; push @{$CNbins{'9.5'}}, $SNP_ID; } elsif ($cn < 10.25) { $counts{'10.0'}++; push @{$CNbins{'10.0'}}, $SNP_ID; } else { $counts{'10+'}++; push @{$CNbins{'10.0'}}, $SNP_ID; } } # send the top part of the new HTML code to the browser print $q->header( "text/html" ), $q->start_html( "KKI SNP Report" ); print "\n", "

SNP Copy Number Report : $filename

\n"; print "\n", " \n", " \n", " \n", "
Search Criterion    Chromosome:   ", $chrm ? $chrm : 'All', "
Minimum Value  :   ", $minsize, "
Maximum Value  :   ", $maxsize, "
\n"; print <Search Summary END_top foreach my $bin (reverse sort {$a <=> $b} keys %CNbins) { print "\n"; } print "
SNP Copy Number Bins Occurrences
" . $bin . "" . $counts{$bin} . "
\n" . "

\n" . "

Detailed Report

\n" ; foreach my $bin (reverse sort {$a <=> $b} keys %CNbins) { my $bin_min = ($bin ne '0.0') ? ($bin - 0.25) : '0.0'; my $bin_max = ($bin ne '10+') ? ($bin + 0.25) : '10+'; print "

SNP Copy Number Bin  \[" . $bin_min . ' ~ ' . $bin_max . "\)  :  $bin

\n"; print "\n" . " \n" . "
" ; my $i = 0; foreach my $snp_id (@{$CNbins{$bin}}) { if (length($snp_id)) { print "  $snp_id"; if (++$i == 10) { print "  
\n"; $i = 0; } } } print "
\n" . "

 \n"; } print <

Back to KKISNP Home Page END_bottom } sub dir_size { my $dir = shift; my $dir_size = 0; # Loop through files and sum the sizes; doesn't descend down subdirs opendir DIR, $dir or die "Unable to open $dir: $!"; while ( my $ldfile = readdir DIR ) { $dir_size += -s "$dir/$ldfile"; } return $dir_size; } sub error { my( $q, $reason ) = @_; print $q->header( "text/html" ), $q->start_html( "Error" ), $q->h1( "Error" ), $q->p( "Your file was not procesed because the following error occured: " ), $q->p( $q->i( $reason ) ), $q->end_html; exit; } sub commify { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text; }