#!/usr/bin/perl -w # SNPscan.pl - 2005-04-22 Written by Jason Ting # last updated: 2006-05-04 # # Copyright (c) 2005-2006 Jason Ting and Jonathan Pevsner. # All Rights Reserved. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. use strict; use CGI; use GD::Graph::bars; use Fcntl qw( :DEFAULT :flock ); use Image::Magick; use constant BUFFER_SIZE => 16_384; use constant MAX_FILE_SIZE => 1_048_576 * 80; # Limit each upload to 80 MB use constant MAX_DIR_SIZE => 1_048_576 * 800; # Limit total uploads to 800 MB use constant MAX_OPEN_TRIES => 20; $CGI::DISABLE_UPLOADS = 0; $CGI::POST_MAX = MAX_FILE_SIZE; my $upload_directory = "/tmp/SNPscan"; my $q = new CGI; $q->cgi_error and error( $q, "Error transferring file: " . $q->cgi_error ); my $file = $q->param( "file" ) || error( $q, "No file received." ); my $filename = $file; # $q->param( "filename" ) || error( $q, "No filename entered." ); $filename =~ s/.*[\/\\](.*)/$1/; my $x_axis = $q->param( "x-pixels" ) || error( $q, "No X_axis size specified." ); my $y_axis = $q->param( "y-pixels" ) || error( $q, "No Y_axis size specified." ); my $t_margin = $q->param( "t-margin" ); my $build = $q->param( "build" ); my $max_CN = $q->param( "max-CN" ); my $chr_in = $q->param( "chr" ); my $LOH = $q->param( "LOH" ); my $pValue = $q->param( "pValue" ); my $paired = $q->param( "paired" ); my $non_informative = $q->param( "non_informative" ); my $retention = $q->param( "retention" ); my $allelic_gain = $q->param( "allelic_gain" ); my $allelic_loss = $q->param( "allelic_loss" ); my $NoCall_1 = $q->param( "NoCall_1" ); my $NoCall_2 = $q->param( "NoCall_2" ); my $NoCall_both = $q->param( "NoCall_both" ); my $diploid_switch = $q->param( "diploid_switch" ); my $fh = $q->upload( "file" ); my $buffer = ""; # check the list of chromosomes my $chr_list; my $selected_chr; if ($chr_in) { foreach my $chr (split / /, $chr_in) { if ((1 <= $chr) && ($chr <= 22)) { $chr_list = $chr_list . ",$chr"; $selected_chr = $selected_chr . "\|Chromosome=='$chr'"; } elsif ($chr eq 'X') { $chr_list = $chr_list . ",23"; $selected_chr = $selected_chr . "\|Chromosome=='X'"; } elsif ($chr eq 'Y') { $chr_list = $chr_list . ",24"; $selected_chr = $selected_chr . "\|Chromosome=='Y'"; } elsif ($chr eq 'M') { $chr_list = $chr_list . ",$25"; $selected_chr = $selected_chr . "\|Chromosome=='M'"; } elsif ($chr eq '*') { $chr_list = $chr_list . ",1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22"; $selected_chr = "\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'\|Chromosome=='4'". "\|Chromosome=='5'\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'". "\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'". "\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'\|Chromosome=='16'". "\|Chromosome=='17'\|Chromosome=='18'\|Chromosome=='19'\|Chromosome=='20'". "\|Chromosome=='21'\|Chromosome=='22'"; } elsif ($chr eq 'A') { $chr_list = $chr_list . ",1,2,3"; $selected_chr = "\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'"; } elsif ($chr eq 'B') { $chr_list = $chr_list . ",4,5"; $selected_chr = "\|Chromosome=='4'\|Chromosome=='5'"; } elsif ($chr eq 'C') { $chr_list = $chr_list . ",6,7,8,9,10,11,12"; $selected_chr = "\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'". "\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'"; } elsif ($chr eq 'D') { $chr_list = $chr_list . ",13,14,15"; $selected_chr = "\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'"; } elsif ($chr eq 'E') { $chr_list = $chr_list . ",16,17,18"; $selected_chr = "\|Chromosome=='16'\|Chromosome=='17'\|Chromosome=='18'"; } elsif ($chr eq 'F') { $chr_list = $chr_list . ",19,20"; $selected_chr = "\|Chromosome=='19'\|Chromosome=='20'"; } elsif ($chr eq 'G') { $chr_list = $chr_list . ",21,22"; $selected_chr = "\|Chromosome=='21'\|Chromosome=='22'"; } else { error( $q, "Invalid chromosome ID." ); } } } else { $chr_list = $chr_list . ",1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23"; $selected_chr = "\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'\|Chromosome=='4'". "\|Chromosome=='5'\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'". "\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'". "\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'\|Chromosome=='16'". "\|Chromosome=='17'\|Chromosome=='18'\|Chromosome=='19'\|Chromosome=='20'". "\|Chromosome=='21'\|Chromosome=='22'\|Chromosome=='X'"; } if ( dir_size( $upload_directory ) + $ENV{CONTENT_LENGTH} > MAX_DIR_SIZE ) { error( $q, "Upload directory is full." ); } # Allow letters, digits, periods, underscores, dashes # Convert anything else to an underscore $filename =~ s/[^\w.-]/_/g; if ( $filename =~ /^(\w[\w.-]*)/ ) { $filename = $1; } else { error( $q, "Invalid file name; files must start with a letter or number." ); } # Open output file, making sure the name is unique until ( sysopen OUTPUT, $upload_directory . "/$filename", O_CREAT | O_RDWR | O_EXCL ) { $filename =~ s/^(\d*)(\.?\S+)$/($1||0) + 1 . $2/e; $1 and $1 >= MAX_OPEN_TRIES and error( $q, "Unable to save your file on our server.\nYou may want to rename it and upload it again.\nAvoid filename starting with numbers." ); } # This is necessary for non-Unix systems; does nothing on Unix binmode $fh; binmode OUTPUT; # Write contents to output file while ( read( $fh, $buffer, BUFFER_SIZE ) ) { print OUTPUT $buffer; } close OUTPUT; # if the user selected paired data, check the uploaded file if ($paired eq "yes") { open USERDATA, "<$upload_directory/$filename" or die "Unable to check $upload_directory/$filename: $!"; # ; my $line1 = ; my @matches = $line1 =~ m/(_Call)/g; unless (($#matches > 0) && (($#matches % 2) == 1)) { error( $q, "You have selected the paired data option. Please make sure that the file uploaded contains paired SNP data!

$#matches" ); } close USERDATA; } ############# This section deal with R and graphics issues ################ # read in R templet from file open RCODE_TMP, "; $/ = $save_input_separator; close RCODE_TMP; # or error( $q, "Can't close R templet: $!" ); # replace filename place holder with real name $Rcode =~ s|your-input-file|$upload_directory/$filename|g; $Rcode =~ s|your-file-name|$filename|g; # replace output size holder with user's input $Rcode =~ s|x-axis-size|$x_axis|g; $Rcode =~ s|y-axis-size|$y_axis|g; $Rcode =~ s|your-text-margin|$t_margin|g; # replace the build information $Rcode =~ s|your-build-info|$build|g; # replace maximum Copy Number if it is specified if ($max_CN) { $Rcode =~ s|your-maximum-copy-number|$max_CN|g; } else { $Rcode =~ s|your-maximum-copy-number|max(MT[,patient.col])|g; } # replace chromosome place holder with selected chromosome number $selected_chr =~ s/^\|//; $Rcode =~ s|your-selected-chromosomes|$selected_chr|g; $chr_list =~ s|^,|c\(|; $chr_list = $chr_list . ')'; $Rcode =~ s|your-chromosome-list|$chr_list|g; # replace the HOL option holder if ($LOH eq "yes") { $Rcode =~ s|your-LOH-option|lines(accumAddr,MT[,LOH.cols[i]],col="ivory3",type="h")|g; } else { $Rcode =~ s|your-LOH-option||g; } # replace the p-Value option holder if ($pValue eq "SPA_pValue") { $Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_SPA_pVal",names(MT))|g; $Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g; } elsif ($pValue eq "CPA_pValue") { $Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_CPA_pVal",names(MT))|g; $Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g; } elsif ($pValue eq "GSA_pValue") { $Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_GSA_pVal",names(MT))|g; $Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g; } else { $Rcode =~ s|your-pValue-columns||g; $Rcode =~ s|your-pValue-option||g; } # replace the paired data option holder if ($paired eq "yes") { $Rcode =~ s|your-paired-data-selection|*1.5 |g; $Rcode =~ s|your-paired-option| # begin of R corde for Paried Data ------------------------ if ((pat.num %% 2) == 1) { # keep Array_1 info (calls and copy nubmers) for later use Array_1_Call = MT[,patient.col-1] Array_1_CN = MT[,patient.col] patient.ID_1 <- patient.ID } else { # keey Array_2 info Array_2_Call = MT[,patient.col-1] Array_2_CN = MT[,patient.col] # calculate changes in log2: Array_2 / Array_1 i.e., new_CN / old_CN changes = log2(Array_2_CN / Array_1_CN) # calculate type of changes. note: it's type["old" row,"new" col] change_type <- c() for (s in 1:length(MT[,patient.col-1])) { change_type[s] = type[Array_1_Call[s],Array_2_Call[s]] } # point to the specific screen and set margins scrn <- scrn + 1 screen(scrn) par(mai=c(0.05,0.5,0.05,0.1)) par(ps=9) # plot a blank to set up X and Y textMargin <- (maxAddr - minAddr) * your-text-margin plot(accumAddr,MT[,LOH.cols[i]],type="n", xlim=c(minAddr,maxAddr+textMargin), xlab="", ylab="",xaxs="i", ylim=c(-2,2), axes=T) # plot lines for copy number equals to 1,-1 lines(c(minAddr,max(accumAddr)),c(1,1),col="gray") lines(c(minAddr,max(accumAddr)),c(-1,-1),col="gray") # plot the result, starts with non-informatics - blue dots points(accumAddr[change_type==1],changes[change_type==1], non_informative_color,pch=20,cex=0.05) # plot Retention - blue dots points(accumAddr[change_type==2],changes[change_type==2], retention_color,pch=20,cex=0.05) # plot NoCall in "old" Array_1 - gray dots points(accumAddr[change_type==4],changes[change_type==4], NoCall_1_color,pch=20,cex=0.05) # plot NoCall in "new" Array_2 - gray dots points(accumAddr[change_type==5],changes[change_type==5], NoCall_2_color,pch=20,cex=0.05) # plot NoCall in both - gray dots points(accumAddr[change_type==6],changes[change_type==6], NoCall_both_color,pch=20,cex=0.05) # plot Allelic Loss (from old to new) - red dots points(accumAddr[change_type==3],changes[change_type==3], allelic_loss_color,pch=20,cex=0.05) # plot Allelic Gain (from old to new)- green dots points(accumAddr[change_type==7],changes[change_type==7], allelic_gain_color,pch=20,cex=0.05) # plot Diploid switch - yellow dots points(accumAddr[change_type==8],changes[change_type==8], diploid_switch_color,pch=20,cex=0.05) # plot lines for copy number equals to 0 lines(c(minAddr,max(accumAddr)),c(0,0),col="white") # plot lines to partition chromosomes if (length(chrList) == 1) { chr <- chrList[1] lines(c(0,0),c(-0.2,1),lwd=1) lines(c(chromSize\$size[chr],chromSize\$size[chr]),c(-1.5,1.5),lwd=1) } else { for (j in 1:24) { lines(c(chromSize\$offset[j]+1,chromSize\$offset[j]+1),c(-1.5,1.5), lwd=1,col="red") } } # plot centromere "bars" if (length(chrList) == 1) { chr <- chrList[1] polygon(c(centromere\$chromStart[chr], centromere\$chromStart[chr], centromere\$chromEnd[chr], centromere\$chromEnd[chr] ), c(-1.5,1.5,1.5,-1.5), border="gray", col="gray" ) } else { for (j in 1:23) { polygon(c(centromere\$chromStart[j]+chromSize\$offset[j], centromere\$chromStart[j]+chromSize\$offset[j], centromere\$chromEnd[j] +chromSize\$offset[j], centromere\$chromEnd[j] +chromSize\$offset[j] ), c(-1.5,1.5,1.5,-1.5), border="gray", col="gray" ) } } # write patient ID at the right text(maxAddr+200000, 1,patient.ID_1,adj=c(0, 1),cex=1) text(maxAddr+200000, 0," to", adj=c(0,0.5),cex=1) text(maxAddr+200000,-1,patient.ID, adj=c(0, 0),cex=1) text(maxAddr+textMargin, 2,paste("M", signif(mean(changes),digits=5)),adj=c(1,1),cex=0.7) text(maxAddr+textMargin,-2,paste("SD",signif( sd(changes),digits=5)),adj=c(1,0),cex=0.7) box() } # end of R corde for Paried Data ------------------------|g; # do this again to set text margin to be the same as the other two plots $Rcode =~ s|your-text-margin|$t_margin|g; if ($non_informative eq "none") { $Rcode =~ s|non_informative_color|type="n"|g; } else { $Rcode =~ s|non_informative_color|col="$non_informative"|g; } if ($retention eq "none") { $Rcode =~ s|retention_color|type="n"|g; } else { $Rcode =~ s|retention_color|col="$retention"|g; } if ($NoCall_1 eq "none") { $Rcode =~ s|NoCall_1_color|type="n"|g; } else { $Rcode =~ s|NoCall_1_color|col="$NoCall_1"|g; } if ($NoCall_2 eq "none") { $Rcode =~ s|NoCall_2_color|type="n"|g; } else { $Rcode =~ s|NoCall_2_color|col="$NoCall_2"|g; } if ($NoCall_both eq "none") { $Rcode =~ s|NoCall_both_color|type="n"|g; } else { $Rcode =~ s|NoCall_both_color|col="$NoCall_both"|g; } if ($allelic_loss eq "none") { $Rcode =~ s|allelic_loss_color|type="n"|g; } else { $Rcode =~ s|allelic_loss_color|col="$allelic_loss"|g; } if ($allelic_gain eq "none") { $Rcode =~ s|allelic_gain_color|type="n"|g; } else { $Rcode =~ s|allelic_gain_color|col="$allelic_gain"|g; } if ($diploid_switch eq "none") { $Rcode =~ s|diploid_switch_color|type="n"|g; } else { $Rcode =~ s|diploid_switch_color|col="$diploid_switch"|g; } } else { $Rcode =~ s|your-paired-data-selection||g; $Rcode =~ s|your-paired-option||g; } # write out the code to be run by R open RCODE_REAL, ">$upload_directory/$filename.R"; print RCODE_REAL $Rcode; close RCODE_REAL; # run the R code system("cd $upload_directory; /usr/bin/R --no-save --no-environ < $upload_directory/$filename.R > $upload_directory/$filename.debug"); ######################################################################### # CONVERT IMAGES TO GIF # ######################################################################### my $image = Image::Magick->new; my $x = $image->Read("$upload_directory/$filename.ps"); $x = $image->Rotate(degrees=>90); $x = $image->Sharpen(2); $x = $image->Write("$upload_directory/$filename.tif"); $x = $image->Write("$upload_directory/$filename.pdf"); my $width = $image->Get('width'); my $height = $image->Get('height'); my $new_width = int($width/1.0); my $new_height = int($height/1.0); $x = $image->Scale(geometry=>"$new_width x $new_height"); $x = $image->Write("$upload_directory/$filename.png"); system("mv $upload_directory/$filename.png /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.tif /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.ps /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.pdf /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.R /home/httpd/html/uploads/SNPscan"); # handle CN_mean $image = Image::Magick->new; $x = $image->Read("$upload_directory/$filename.mean.ps"); $x = $image->Rotate(degrees=>90); $x = $image->Sharpen(2); $x = $image->Write("$upload_directory/$filename.mean.tif"); $x = $image->Write("$upload_directory/$filename.mean.pdf"); $x = $image->Scale(geometry=>"$new_width x $new_height"); $x = $image->Write("$upload_directory/$filename.mean.png"); system("mv $upload_directory/$filename.mean.png /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.mean.tif /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.mean.ps /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.mean.pdf /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.mean.csv /home/httpd/html/uploads/SNPscan"); # handle CN_sd $image = Image::Magick->new; $x = $image->Read("$upload_directory/$filename.sd.ps"); $x = $image->Rotate(degrees=>90); $x = $image->Sharpen(2); $x = $image->Write("$upload_directory/$filename.sd.tif"); $x = $image->Write("$upload_directory/$filename.sd.pdf"); $x = $image->Scale(geometry=>"$new_width x $new_height"); $x = $image->Write("$upload_directory/$filename.sd.png"); system("mv $upload_directory/$filename.sd.png /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.sd.tif /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.sd.ps /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.sd.pdf /home/httpd/html/uploads/SNPscan"); system("mv $upload_directory/$filename.sd.csv /home/httpd/html/uploads/SNPscan"); plot_copy_number($file, $filename); exit 1; sub dir_size { my $dir = shift; my $dir_size = 0; # Loop through files and sum the sizes; doesn't descend down subdirs opendir DIR, $dir or die "Unable to open $dir: $!"; while ( my $ldfile = readdir DIR ) { $dir_size += -s "$dir/$ldfile"; } return $dir_size; } sub error { my( $q, $reason ) = @_; print $q->header( "text/html" ), $q->start_html( "Error" ), $q->h1( "Error" ), $q->p( "Your file was not procesed because the following error occured: " ), $q->p( $q->i( $reason ) ), $q->end_html; exit; } sub plot_copy_number { my( $file, $filename ) = @_; print $q->header( "text/html" ), $q->start_html( "SNPscan Plot" ); print <

SNPscan Plot

END_top print "

File uploaded  :  $file    ", "File name stored on SNPscan server  :  $filename

\n", "

Copy Number are represented in dots, color coded according to each SNP's Call info: AA, BB, AB, NoCall.

\n", "

If selected, LOHs are shown as gray bars, and p-Values are shown as yellow lines.

\n", "

CR: Call Rate of all selected chromosomes.  AB: AB calls of selected autosomes, or chromosome X when by itself.

M and SD: Mean and Standard Deviation of Copy Number for selected autosomes, or for chromosome X when it's the only one selected.

\n", "
", ""; print <The PNG output is shown above. Click on it to get the full size output in the TIF format.

Click on HERE (.pdf) to get the full size output in the PDF format.

Click on HERE (.ps) to get the full size output in the PostScript format.

Click on HERE (.R) to get a copy of the R codes generated by SNPscan. You need to change the output directory (/tmp/SNPscan) in this R code according to your system before you run it locally.


Sample Patterns:
Normal AutosomesDuplication
Normal Female Chromosome XDiploid Deletion
Normal Male Chromosome XHemizygous Deletion
Mosaic Female Chromosome XMosaic Microdeletion
Mislabelled 100K Chr XUniparental Isodisomy


The PNG output is shown above. Click on it to get the full size output in the TIF format.

Click on HERE (.pdf) to get the full size output in the PDF format.

Click on HERE (.ps) to get the full size output in the PostScript format.

Click on HERE (.csv) to get a table of detailed copy number means.


The PNG output is shown above. Click on it to get the full size output in the TIF format.

Click on HERE (.pdf) to get the full size output in the PDF format.

Click on HERE (.ps) to get the full size output in the PostScript format.

Click on HERE (.csv) to get a table of detailed copy number standard deviations.


Back to SNPscan

Go to KKI SNP Analysis Tool END_bottom } sub echo_info { my( $filename, $Rcode ) = @_; print $q->header( "text/html" ), $q->start_html( "SNPscan Plot" ); print <

SNPscan Plot

END_top print "

File name string is: '$file'

\n"; print "

File name string is: '$filename'

\n"; $Rcode =~ s/\n/
/g; print "

R codes are:

$Rcode

\n"; print <

Back to SNPscan Home Page END_bottom }