#!/usr/bin/perl -w
# SNPscan.pl - 2005-04-22 Written by Jason Ting
# last updated: 2006-05-04
#
# Copyright (c) 2005-2006 Jason Ting and Jonathan Pevsner.
# All Rights Reserved.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
use strict;
use CGI;
use GD::Graph::bars;
use Fcntl qw( :DEFAULT :flock );
use Image::Magick;
use constant BUFFER_SIZE => 16_384;
use constant MAX_FILE_SIZE => 1_048_576 * 80; # Limit each upload to 80 MB
use constant MAX_DIR_SIZE => 1_048_576 * 800; # Limit total uploads to 800 MB
use constant MAX_OPEN_TRIES => 20;
$CGI::DISABLE_UPLOADS = 0;
$CGI::POST_MAX = MAX_FILE_SIZE;
my $upload_directory = "/tmp/SNPscan";
my $q = new CGI;
$q->cgi_error and error( $q, "Error transferring file: " . $q->cgi_error );
my $file = $q->param( "file" ) || error( $q, "No file received." );
my $filename = $file; # $q->param( "filename" ) || error( $q, "No filename entered." );
$filename =~ s/.*[\/\\](.*)/$1/;
my $x_axis = $q->param( "x-pixels" ) || error( $q, "No X_axis size specified." );
my $y_axis = $q->param( "y-pixels" ) || error( $q, "No Y_axis size specified." );
my $t_margin = $q->param( "t-margin" );
my $build = $q->param( "build" );
my $max_CN = $q->param( "max-CN" );
my $chr_in = $q->param( "chr" );
my $LOH = $q->param( "LOH" );
my $pValue = $q->param( "pValue" );
my $paired = $q->param( "paired" );
my $non_informative = $q->param( "non_informative" );
my $retention = $q->param( "retention" );
my $allelic_gain = $q->param( "allelic_gain" );
my $allelic_loss = $q->param( "allelic_loss" );
my $NoCall_1 = $q->param( "NoCall_1" );
my $NoCall_2 = $q->param( "NoCall_2" );
my $NoCall_both = $q->param( "NoCall_both" );
my $diploid_switch = $q->param( "diploid_switch" );
my $fh = $q->upload( "file" );
my $buffer = "";
# check the list of chromosomes
my $chr_list;
my $selected_chr;
if ($chr_in) {
foreach my $chr (split / /, $chr_in) {
if ((1 <= $chr) && ($chr <= 22)) {
$chr_list = $chr_list . ",$chr";
$selected_chr = $selected_chr . "\|Chromosome=='$chr'";
}
elsif ($chr eq 'X') {
$chr_list = $chr_list . ",23";
$selected_chr = $selected_chr . "\|Chromosome=='X'";
}
elsif ($chr eq 'Y') {
$chr_list = $chr_list . ",24";
$selected_chr = $selected_chr . "\|Chromosome=='Y'";
}
elsif ($chr eq 'M') {
$chr_list = $chr_list . ",$25";
$selected_chr = $selected_chr . "\|Chromosome=='M'";
}
elsif ($chr eq '*') {
$chr_list = $chr_list . ",1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22";
$selected_chr =
"\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'\|Chromosome=='4'".
"\|Chromosome=='5'\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'".
"\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'".
"\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'\|Chromosome=='16'".
"\|Chromosome=='17'\|Chromosome=='18'\|Chromosome=='19'\|Chromosome=='20'".
"\|Chromosome=='21'\|Chromosome=='22'";
}
elsif ($chr eq 'A') {
$chr_list = $chr_list . ",1,2,3";
$selected_chr = "\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'";
}
elsif ($chr eq 'B') {
$chr_list = $chr_list . ",4,5";
$selected_chr = "\|Chromosome=='4'\|Chromosome=='5'";
}
elsif ($chr eq 'C') {
$chr_list = $chr_list . ",6,7,8,9,10,11,12";
$selected_chr = "\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'".
"\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'";
}
elsif ($chr eq 'D') {
$chr_list = $chr_list . ",13,14,15";
$selected_chr = "\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'";
}
elsif ($chr eq 'E') {
$chr_list = $chr_list . ",16,17,18";
$selected_chr = "\|Chromosome=='16'\|Chromosome=='17'\|Chromosome=='18'";
}
elsif ($chr eq 'F') {
$chr_list = $chr_list . ",19,20";
$selected_chr = "\|Chromosome=='19'\|Chromosome=='20'";
}
elsif ($chr eq 'G') {
$chr_list = $chr_list . ",21,22";
$selected_chr = "\|Chromosome=='21'\|Chromosome=='22'";
}
else {
error( $q, "Invalid chromosome ID." );
}
}
} else {
$chr_list = $chr_list . ",1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23";
$selected_chr =
"\|Chromosome=='1'\|Chromosome=='2'\|Chromosome=='3'\|Chromosome=='4'".
"\|Chromosome=='5'\|Chromosome=='6'\|Chromosome=='7'\|Chromosome=='8'".
"\|Chromosome=='9'\|Chromosome=='10'\|Chromosome=='11'\|Chromosome=='12'".
"\|Chromosome=='13'\|Chromosome=='14'\|Chromosome=='15'\|Chromosome=='16'".
"\|Chromosome=='17'\|Chromosome=='18'\|Chromosome=='19'\|Chromosome=='20'".
"\|Chromosome=='21'\|Chromosome=='22'\|Chromosome=='X'";
}
if ( dir_size( $upload_directory ) + $ENV{CONTENT_LENGTH} > MAX_DIR_SIZE ) {
error( $q, "Upload directory is full." );
}
# Allow letters, digits, periods, underscores, dashes
# Convert anything else to an underscore
$filename =~ s/[^\w.-]/_/g;
if ( $filename =~ /^(\w[\w.-]*)/ ) {
$filename = $1;
}
else {
error( $q, "Invalid file name; files must start with a letter or number." );
}
# Open output file, making sure the name is unique
until ( sysopen OUTPUT, $upload_directory . "/$filename", O_CREAT | O_RDWR | O_EXCL ) {
$filename =~ s/^(\d*)(\.?\S+)$/($1||0) + 1 . $2/e;
$1 and $1 >= MAX_OPEN_TRIES and error( $q, "Unable to save your file on our server.\nYou may want to rename it and upload it again.\nAvoid filename starting with numbers." );
}
# This is necessary for non-Unix systems; does nothing on Unix
binmode $fh;
binmode OUTPUT;
# Write contents to output file
while ( read( $fh, $buffer, BUFFER_SIZE ) ) {
print OUTPUT $buffer;
}
close OUTPUT;
# if the user selected paired data, check the uploaded file
if ($paired eq "yes") {
open USERDATA, "<$upload_directory/$filename" or die "Unable to check $upload_directory/$filename: $!";
# ;
my $line1 = ;
my @matches = $line1 =~ m/(_Call)/g;
unless (($#matches > 0) && (($#matches % 2) == 1)) {
error( $q, "You have selected the paired data option. Please make sure that the file uploaded contains paired SNP data!
$#matches" );
}
close USERDATA;
}
############# This section deal with R and graphics issues ################
# read in R templet from file
open RCODE_TMP, ";
$/ = $save_input_separator;
close RCODE_TMP; # or error( $q, "Can't close R templet: $!" );
# replace filename place holder with real name
$Rcode =~ s|your-input-file|$upload_directory/$filename|g;
$Rcode =~ s|your-file-name|$filename|g;
# replace output size holder with user's input
$Rcode =~ s|x-axis-size|$x_axis|g;
$Rcode =~ s|y-axis-size|$y_axis|g;
$Rcode =~ s|your-text-margin|$t_margin|g;
# replace the build information
$Rcode =~ s|your-build-info|$build|g;
# replace maximum Copy Number if it is specified
if ($max_CN) {
$Rcode =~ s|your-maximum-copy-number|$max_CN|g;
} else {
$Rcode =~ s|your-maximum-copy-number|max(MT[,patient.col])|g;
}
# replace chromosome place holder with selected chromosome number
$selected_chr =~ s/^\|//;
$Rcode =~ s|your-selected-chromosomes|$selected_chr|g;
$chr_list =~ s|^,|c\(|;
$chr_list = $chr_list . ')';
$Rcode =~ s|your-chromosome-list|$chr_list|g;
# replace the HOL option holder
if ($LOH eq "yes") {
$Rcode =~ s|your-LOH-option|lines(accumAddr,MT[,LOH.cols[i]],col="ivory3",type="h")|g;
} else {
$Rcode =~ s|your-LOH-option||g;
}
# replace the p-Value option holder
if ($pValue eq "SPA_pValue") {
$Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_SPA_pVal",names(MT))|g;
$Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g;
} elsif ($pValue eq "CPA_pValue") {
$Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_CPA_pVal",names(MT))|g;
$Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g;
} elsif ($pValue eq "GSA_pValue") {
$Rcode =~ s|your-pValue-columns|pValue.cols <- grep("_GSA_pVal",names(MT))|g;
$Rcode =~ s|your-pValue-option|lines(accumAddr,abs(MT[,pValue.cols[i]]),col="yellow",type="s")|g;
} else {
$Rcode =~ s|your-pValue-columns||g;
$Rcode =~ s|your-pValue-option||g;
}
# replace the paired data option holder
if ($paired eq "yes") {
$Rcode =~ s|your-paired-data-selection|*1.5 |g;
$Rcode =~ s|your-paired-option|
# begin of R corde for Paried Data ------------------------
if ((pat.num %% 2) == 1) {
# keep Array_1 info (calls and copy nubmers) for later use
Array_1_Call = MT[,patient.col-1]
Array_1_CN = MT[,patient.col]
patient.ID_1 <- patient.ID
} else {
# keey Array_2 info
Array_2_Call = MT[,patient.col-1]
Array_2_CN = MT[,patient.col]
# calculate changes in log2: Array_2 / Array_1 i.e., new_CN / old_CN
changes = log2(Array_2_CN / Array_1_CN)
# calculate type of changes. note: it's type["old" row,"new" col]
change_type <- c()
for (s in 1:length(MT[,patient.col-1])) {
change_type[s] = type[Array_1_Call[s],Array_2_Call[s]]
}
# point to the specific screen and set margins
scrn <- scrn + 1
screen(scrn)
par(mai=c(0.05,0.5,0.05,0.1))
par(ps=9)
# plot a blank to set up X and Y
textMargin <- (maxAddr - minAddr) * your-text-margin
plot(accumAddr,MT[,LOH.cols[i]],type="n",
xlim=c(minAddr,maxAddr+textMargin), xlab="", ylab="",xaxs="i",
ylim=c(-2,2), axes=T)
# plot lines for copy number equals to 1,-1
lines(c(minAddr,max(accumAddr)),c(1,1),col="gray")
lines(c(minAddr,max(accumAddr)),c(-1,-1),col="gray")
# plot the result, starts with non-informatics - blue dots
points(accumAddr[change_type==1],changes[change_type==1],
non_informative_color,pch=20,cex=0.05)
# plot Retention - blue dots
points(accumAddr[change_type==2],changes[change_type==2],
retention_color,pch=20,cex=0.05)
# plot NoCall in "old" Array_1 - gray dots
points(accumAddr[change_type==4],changes[change_type==4],
NoCall_1_color,pch=20,cex=0.05)
# plot NoCall in "new" Array_2 - gray dots
points(accumAddr[change_type==5],changes[change_type==5],
NoCall_2_color,pch=20,cex=0.05)
# plot NoCall in both - gray dots
points(accumAddr[change_type==6],changes[change_type==6],
NoCall_both_color,pch=20,cex=0.05)
# plot Allelic Loss (from old to new) - red dots
points(accumAddr[change_type==3],changes[change_type==3],
allelic_loss_color,pch=20,cex=0.05)
# plot Allelic Gain (from old to new)- green dots
points(accumAddr[change_type==7],changes[change_type==7],
allelic_gain_color,pch=20,cex=0.05)
# plot Diploid switch - yellow dots
points(accumAddr[change_type==8],changes[change_type==8],
diploid_switch_color,pch=20,cex=0.05)
# plot lines for copy number equals to 0
lines(c(minAddr,max(accumAddr)),c(0,0),col="white")
# plot lines to partition chromosomes
if (length(chrList) == 1) {
chr <- chrList[1]
lines(c(0,0),c(-0.2,1),lwd=1)
lines(c(chromSize\$size[chr],chromSize\$size[chr]),c(-1.5,1.5),lwd=1)
} else {
for (j in 1:24) {
lines(c(chromSize\$offset[j]+1,chromSize\$offset[j]+1),c(-1.5,1.5),
lwd=1,col="red")
}
}
# plot centromere "bars"
if (length(chrList) == 1) {
chr <- chrList[1]
polygon(c(centromere\$chromStart[chr],
centromere\$chromStart[chr],
centromere\$chromEnd[chr],
centromere\$chromEnd[chr] ),
c(-1.5,1.5,1.5,-1.5), border="gray", col="gray" )
} else {
for (j in 1:23) {
polygon(c(centromere\$chromStart[j]+chromSize\$offset[j],
centromere\$chromStart[j]+chromSize\$offset[j],
centromere\$chromEnd[j] +chromSize\$offset[j],
centromere\$chromEnd[j] +chromSize\$offset[j] ),
c(-1.5,1.5,1.5,-1.5), border="gray", col="gray" )
}
}
# write patient ID at the right
text(maxAddr+200000, 1,patient.ID_1,adj=c(0, 1),cex=1)
text(maxAddr+200000, 0," to", adj=c(0,0.5),cex=1)
text(maxAddr+200000,-1,patient.ID, adj=c(0, 0),cex=1)
text(maxAddr+textMargin, 2,paste("M", signif(mean(changes),digits=5)),adj=c(1,1),cex=0.7)
text(maxAddr+textMargin,-2,paste("SD",signif( sd(changes),digits=5)),adj=c(1,0),cex=0.7)
box()
}
# end of R corde for Paried Data ------------------------|g;
# do this again to set text margin to be the same as the other two plots
$Rcode =~ s|your-text-margin|$t_margin|g;
if ($non_informative eq "none") {
$Rcode =~ s|non_informative_color|type="n"|g;
} else {
$Rcode =~ s|non_informative_color|col="$non_informative"|g;
}
if ($retention eq "none") {
$Rcode =~ s|retention_color|type="n"|g;
} else {
$Rcode =~ s|retention_color|col="$retention"|g;
}
if ($NoCall_1 eq "none") {
$Rcode =~ s|NoCall_1_color|type="n"|g;
} else {
$Rcode =~ s|NoCall_1_color|col="$NoCall_1"|g;
}
if ($NoCall_2 eq "none") {
$Rcode =~ s|NoCall_2_color|type="n"|g;
} else {
$Rcode =~ s|NoCall_2_color|col="$NoCall_2"|g;
}
if ($NoCall_both eq "none") {
$Rcode =~ s|NoCall_both_color|type="n"|g;
} else {
$Rcode =~ s|NoCall_both_color|col="$NoCall_both"|g;
}
if ($allelic_loss eq "none") {
$Rcode =~ s|allelic_loss_color|type="n"|g;
} else {
$Rcode =~ s|allelic_loss_color|col="$allelic_loss"|g;
}
if ($allelic_gain eq "none") {
$Rcode =~ s|allelic_gain_color|type="n"|g;
} else {
$Rcode =~ s|allelic_gain_color|col="$allelic_gain"|g;
}
if ($diploid_switch eq "none") {
$Rcode =~ s|diploid_switch_color|type="n"|g;
} else {
$Rcode =~ s|diploid_switch_color|col="$diploid_switch"|g;
}
} else {
$Rcode =~ s|your-paired-data-selection||g;
$Rcode =~ s|your-paired-option||g;
}
# write out the code to be run by R
open RCODE_REAL, ">$upload_directory/$filename.R";
print RCODE_REAL $Rcode;
close RCODE_REAL;
# run the R code
system("cd $upload_directory; /usr/bin/R --no-save --no-environ < $upload_directory/$filename.R > $upload_directory/$filename.debug");
#########################################################################
# CONVERT IMAGES TO GIF #
#########################################################################
my $image = Image::Magick->new;
my $x = $image->Read("$upload_directory/$filename.ps");
$x = $image->Rotate(degrees=>90);
$x = $image->Sharpen(2);
$x = $image->Write("$upload_directory/$filename.tif");
$x = $image->Write("$upload_directory/$filename.pdf");
my $width = $image->Get('width');
my $height = $image->Get('height');
my $new_width = int($width/1.0);
my $new_height = int($height/1.0);
$x = $image->Scale(geometry=>"$new_width x $new_height");
$x = $image->Write("$upload_directory/$filename.png");
system("mv $upload_directory/$filename.png /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.tif /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.ps /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.pdf /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.R /home/httpd/html/uploads/SNPscan");
# handle CN_mean
$image = Image::Magick->new;
$x = $image->Read("$upload_directory/$filename.mean.ps");
$x = $image->Rotate(degrees=>90);
$x = $image->Sharpen(2);
$x = $image->Write("$upload_directory/$filename.mean.tif");
$x = $image->Write("$upload_directory/$filename.mean.pdf");
$x = $image->Scale(geometry=>"$new_width x $new_height");
$x = $image->Write("$upload_directory/$filename.mean.png");
system("mv $upload_directory/$filename.mean.png /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.mean.tif /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.mean.ps /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.mean.pdf /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.mean.csv /home/httpd/html/uploads/SNPscan");
# handle CN_sd
$image = Image::Magick->new;
$x = $image->Read("$upload_directory/$filename.sd.ps");
$x = $image->Rotate(degrees=>90);
$x = $image->Sharpen(2);
$x = $image->Write("$upload_directory/$filename.sd.tif");
$x = $image->Write("$upload_directory/$filename.sd.pdf");
$x = $image->Scale(geometry=>"$new_width x $new_height");
$x = $image->Write("$upload_directory/$filename.sd.png");
system("mv $upload_directory/$filename.sd.png /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.sd.tif /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.sd.ps /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.sd.pdf /home/httpd/html/uploads/SNPscan");
system("mv $upload_directory/$filename.sd.csv /home/httpd/html/uploads/SNPscan");
plot_copy_number($file, $filename);
exit 1;
sub dir_size {
my $dir = shift;
my $dir_size = 0;
# Loop through files and sum the sizes; doesn't descend down subdirs
opendir DIR, $dir or die "Unable to open $dir: $!";
while ( my $ldfile = readdir DIR ) {
$dir_size += -s "$dir/$ldfile";
}
return $dir_size;
}
sub error {
my( $q, $reason ) = @_;
print $q->header( "text/html" ),
$q->start_html( "Error" ),
$q->h1( "Error" ),
$q->p( "Your file was not procesed because the following error occured: " ),
$q->p( $q->i( $reason ) ),
$q->end_html;
exit;
}
sub plot_copy_number {
my( $file, $filename ) = @_;
print $q->header( "text/html" ),
$q->start_html( "SNPscan Plot" );
print <
SNPscan Plot
END_top
print "File uploaded : $file ",
"File name stored on SNPscan server : $filename
\n",
"Copy Number are represented in dots, color coded according to each SNP's Call info: AA, BB, AB, NoCall.
\n",
"If selected, LOHs are shown as gray bars, and p-Values are shown as yellow lines.
\n",
"CR: Call Rate of all selected chromosomes. AB: AB calls of selected autosomes, or chromosome X when by itself.
M and SD: Mean and Standard Deviation of Copy Number for selected autosomes, or for chromosome X when it's the only one selected.
\n",
"
",
"
";
print <The PNG output is shown above. Click on it to get the full size output in the TIF format.
Click on HERE (.pdf) to get the full size output in the PDF format.
Click on HERE (.ps) to get the full size output in the PostScript format.
Click on HERE (.R) to get a copy of the R codes generated by SNPscan. You need to change the output directory (/tmp/SNPscan) in this R code according to your system before you run it locally.
Sample Patterns:
 | Normal Autosomes |  | Duplication |
 | Normal Female Chromosome X |  | Diploid Deletion |
 | Normal Male Chromosome X |  | Hemizygous Deletion |
 | Mosaic Female Chromosome X |  | Mosaic Microdeletion |
 | Mislabelled 100K Chr X |  | Uniparental Isodisomy |
The PNG output is shown above. Click on it to get the full size output in the TIF format.
Click on HERE (.pdf) to get the full size output in the PDF format.
Click on HERE (.ps) to get the full size output in the PostScript format.
Click on HERE (.csv) to get a table of detailed copy number means.
The PNG output is shown above. Click on it to get the full size output in the TIF format.
Click on HERE (.pdf) to get the full size output in the PDF format.
Click on HERE (.ps) to get the full size output in the PostScript format.
Click on HERE (.csv) to get a table of detailed copy number standard deviations.
Back to SNPscan
Go to KKI SNP Analysis Tool