# SNPscan.R - plot the SNPscan info for all patients in input file # 2005-05-02 Jason Ting # last updated: 2006-05-04 # # Copyright (c) 2005-2006 Jason Ting and Jonathan Pevsner. # All Rights Reserved. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # read in tabales MTraw <- read.table("your-input-file", strip.white=T, header=T,sep="\t") chromSize <- read.table("/home/httpd/cgi-bin/SNPscan/chromSize.your-build-info", header=T,sep="\t") centromere <- read.table("/home/httpd/cgi-bin/SNPscan/centromere.your-build-info", header=T,sep="\t") # define type metrix type <- c(1,3,8,4,7,2,7,4,8,3,1,4,5,5,5,6) dim(type) <- c(4,4) rownames(type) <- c("AA","AB","BB","NoCall") colnames(type) <- c("AA","AB","BB","NoCall") # select only the specified chromosomes, chrX need to be identified as 'X' here MT <- subset(MTraw, your-selected-chromosomes ) # adjust positions to accumulatives # X in MT$Chromosome will cause as.numeric to generate a warning and saved as a 'NA' in o o <- as.numeric(levels(MT$Chromosome)[as.numeric(MT$Chromosome)]) # replace the 'NA' for chromosome X with '23', as needed by the offset of chromSize for (i in 1:length(o)) { if( is.na(o[i]) ) o[i] <- 23 } accumAddr <- MT$Physical.Position + chromSize$offset[o] # set list of chromosome numbers, using '23' for chrX ('24' for chrY, and '25' for chrM) chrList <- your-chromosome-list screen.ideo <- 0 # set min and max chromosomal physical positions if (length(chrList) == 1) { minAddr <- 0 maxAddr <- chromSize$size[chrList[1]] + 1000000 - (chromSize$size[chrList[1]] %% 1000000) accumAddr <- MT$Physical.Position # set it back, i.e. without offset screen.ideo <- 1 # add in the ideogram track } else { if (min(chrList) == 1) { minAddr <- 0 } else { minAddr <- chromSize$offset[min(chrList)] - (chromSize$offset[min(chrList)] %% 1000000) } maxAddr <- chromSize$offset[max(chrList)+1] } # set plot size ps.options(paper="special", width=x-axis-size, height=y-axis-size) postscript(file = "your-input-file.ps", bg="white") # set outer margins and plot a blank page par(omi=c(0.3,0.3,0.3,0.3)) # set outer margins plot(c(0,1),c(0,1),type="n",xlab="",ylab="",axes=F,xaxs="i") # count patients and set number of screens patient.cols <- grep("_SPA_CN", names(MT)) # one row per patient (+ ideogram) + footer split.screen(c(length(patient.cols) your-paired-data-selection +screen.ideo+1,1),erase=F) # get column numbers for LOH columns LOH.cols <- grep("_LOH", names(MT)) # get column numbers for Call columns Call.cols <- grep("_Call", names(MT)) # plot all the copy numbers for all patients pat.num <- 0 scrn <- 0 pat.IDs <- c() for (i in 1:length(patient.cols)) { patient.col <- patient.cols[i] your-pValue-columns # get this patient's ID patient.ID <- substring(names(MT)[patient.col],1,(nchar(names(MT)[patient.col])-7)) pat.IDs <- c(pat.IDs,patient.ID) # point to the specific screen and set margins pat.num <- pat.num + 1; scrn <- scrn + 1 screen(scrn) par(mai=c(0.05,0.5,0.05,0.1)) par(ps=9) # plot a blank to set up X and Y textMargin <- (maxAddr - minAddr) * your-text-margin plot(accumAddr,MT[,LOH.cols[i]],type="n", xlim=c(minAddr,maxAddr+textMargin), xlab="", ylab="",xaxs="i", ylim=c(0,your-maximum-copy-number), axes=T) # the textMargin above is to leave space for patient # optional HOL at back ground your-LOH-option # optional pValue line at back ground your-pValue-option # plot all the copy numbers for all chromosomes points(accumAddr,MT[,patient.col], col="royalblue",pch=20,cex=0.05) # plot over all AB calls with red color points(accumAddr[MT[,patient.col-1]=="AB"],MT[MT[,patient.col-1]=="AB",patient.col], col="red",pch=20,cex=0.05) # plot over all NoCall with green color points(accumAddr[MT[,patient.col-1]=="NoCall"],MT[MT[,patient.col-1]=="NoCall",patient.col], col="green",pch=20,cex=0.05) # plot lines for copy number equals to 1,2,3 lines(c(minAddr,max(accumAddr)),c(1,1),col="white") lines(c(minAddr,max(accumAddr)),c(2,2),col="white") lines(c(minAddr,max(accumAddr)),c(3,3),col="white") # plot lines to partition chromosomes if (length(chrList) == 1) { chr <- chrList[1] lines(c(0,0),c(-0.2,1),lwd=1) lines(c(chromSize$size[chr],chromSize$size[chr]),c(0.5,3.5),lwd=1) } else { for (j in 1:24) { lines(c(chromSize$offset[j]+1,chromSize$offset[j]+1),c(0.5,3.5), lwd=1,col="red") } } # plot centromere "bars" if (length(chrList) == 1) { chr <- chrList[1] polygon(c(centromere$chromStart[chr], centromere$chromStart[chr], centromere$chromEnd[chr], centromere$chromEnd[chr] ), c(0.5,3.5,3.5,0.5), border="gray", col="gray" ) } else { for (j in chrList) { # for (j in 1:23) { polygon(c(centromere$chromStart[j]+chromSize$offset[j], centromere$chromStart[j]+chromSize$offset[j], centromere$chromEnd[j]+chromSize$offset[j], centromere$chromEnd[j]+chromSize$offset[j] ), c(0.5,3.5,3.5,0.5), border="gray", col="gray" ) } } # get call rate info (note: the call rate info always includes chromosome X) Calls <- summary(MT[,Call.cols[i]]) AA <- Calls[grep("AA",names(Calls))] AB <- Calls[grep("AB",names(Calls))] BB <- Calls[grep("BB",names(Calls))] NoCall <- Calls[grep("NoCall",names(Calls))] text(maxAddr+100,your-maximum-copy-number,paste("CR ", signif((1-NoCall/(AA+AB+BB+NoCall))*100,digits=4),"%",spe=""),adj=c(0,1),cex=0.7) # write patient ID at the right text(maxAddr+200000,your-maximum-copy-number/2,patient.ID,adj=c(0,0.5),cex=1) # write Mean and Standard Deviation if (length(chrList)==1 & chrList[1]==23) { # chrX only text(maxAddr+textMargin-100,your-maximum-copy-number,paste("M", signif(mean(MT[,patient.col][MT$Chromosome=="X"]),digits=5)),adj=c(1,1),cex=0.7) text(maxAddr+textMargin-100,0,paste(" SD", signif( sd(MT[,patient.col][MT$Chromosome=="X"]),digits=5)),adj=c(1,0),cex=0.7) } else { text(maxAddr+textMargin-100,your-maximum-copy-number,paste("M", signif(mean(MT[,patient.col][MT$Chromosome!="X"]),digits=5)),adj=c(1,1),cex=0.7) text(maxAddr+textMargin-100,0,paste(" SD", signif( sd(MT[,patient.col][MT$Chromosome!="X"]),digits=5)),adj=c(1,0),cex=0.7) # get AB call info (note: this is excluding chromosome X) Calls <- summary(MT[,Call.cols[i]][MT$Chromosome!="X"]) AA <- Calls[grep("AA",names(Calls))] AB <- Calls[grep("AB",names(Calls))] BB <- Calls[grep("BB",names(Calls))] NoCall <- Calls[grep("NoCall",names(Calls))] } # write AB call rate text(maxAddr+100,0,paste("AB ", signif(AB/(AA+AB+BB+NoCall)*100,digits=4),"%",spe=""),adj=c(0,0),cex=0.7) box() your-paired-option } scrn.last_chr <- scrn # if single chromosome, plot that chromosomes' ideogram if (length(chrList) == 1) { if (chrList[1]==23) { chrom <- "chrX" } else if (chrList[1]==24) { chrom <- "chrY" } else { chrom <- paste("chr",chrList[1],sep="") } cytoBand <- read.table("/home/httpd/cgi-bin/SNPscan/cytoBandIdeo.your-build-info",head=T) cytoBand_chr <- cytoBand[as.character(cytoBand$chrom)==chrom,] cytoBand_chr_p <- cytoBand_chr[grep("^p",as.character(cytoBand_chr$name)),] cytoBand_chr_q <- cytoBand_chr[grep("^q",as.character(cytoBand_chr$name)),] p.bands <- length(cytoBand_chr_p$chromEnd) cut.left <- c() cut.right <- c() # 1st band of arm or 1st band after "stalk" # last band of arm or last band before "stalk" for (band in 1:length(cytoBand_chr$chromEnd)) { if (band==1) { cut.left[band] = T; cut.right[band] = F } else if (band==p.bands) { cut.left[band] = F; cut.right[band] = T } else if (band==(p.bands+1)) { cut.left[band] = T; cut.right[band] = F } else if (band==length(cytoBand_chr$chromEnd)) { cut.left[band] = F; cut.right[band] = T } else { cut.left[band] = F; cut.right[band] = F } } for (band in 1:length(cytoBand_chr$chromEnd)) { if (as.character(cytoBand_chr$gieStain[band])=="stalk") { cut.right[band-1] = T cut.left[band] = NA cut.right[band] = NA cut.left[band+1] = T } } scrn <- scrn +1 scrn.cytoBand <- split.screen(c(2,1),screen=scrn) screen(scrn.cytoBand[1]) par(mai=c(0.05,0.5,0.05,0.1)) plot(c(0,cytoBand_chr$chromEnd[length(cytoBand_chr$chromEnd)]),c(0,2), xlim=c(minAddr,maxAddr+textMargin),type="n",xlab="",ylab="",axes=F,xaxs="i") for (i in 1:length(cytoBand_chr$chromEnd)) { start <- cytoBand_chr$chromStart[i] end <- cytoBand_chr$chromEnd[i] delta = (end-start)/4 # decide color stain <- as.character(cytoBand_chr$gieStain[i]) if (stain=="gneg") { color <- "grey100" } else if (stain=="gpos25") { color <- "grey90" } else if (stain=="gpos50") { color <- "grey70" } else if (stain=="gpos75") { color <- "grey40" } else if (stain=="gpos100") { color <- "grey0" } else if (stain=="gvar") { color <- "grey100" } else if (stain=="acen") { color <- "brown4" } else if (stain=="stalk") { color <- "brown3" } else { color <- "white" } # drow this band if (is.na(cut.left[i]) & is.na(cut.right[i])) { # this is a "stalk", do not drow box. Drow two vertival lines instead delta <- (end-start)/3 lines(c(start+delta,start+delta),c(0,2),col=color) lines(c(end-delta,end-delta),c(0,2),col=color) } else if (cut.left[i] & cut.right[i]) { # cut both ends polygon(c(start,start+delta,end-delta,end,end,end-delta,start+delta,start), c(0.3,0,0,0.3,1.7,2,2,1.7),col=color) } else if (cut.left[i]) { # cut left end only polygon(c(start,start+delta,end,end,start+delta,start), c(0.3,0,0,2,2,1.7),col=color) } else if (cut.right[i]) { # cut right end only polygon(c(start,end-delta,end,end,end-delta,start), c(0,0,0.3,1.7,2,2),col=color) } else { polygon(c(start,end,end,start), c(0,0,2,2),col=color) } } screen(scrn.cytoBand[2]) par(mai=c(0.05,0.5,0.05,0.1),ps=6) plot(c(0,cytoBand_chr$chromEnd[length(cytoBand_chr$chromEnd)]),c(0,2), xlim=c(minAddr,maxAddr+textMargin),type="n",xlab="",ylab="",axes=F,xaxs="i") par(srt=90) # rotate text to vertical for (i in 1:length(cytoBand_chr$name)) { my.x <- (cytoBand_chr$chromStart[i]+cytoBand_chr$chromEnd[i])/2 text(x=my.x,y=1.7,labels=cytoBand_chr$name[i],adj=c(1,0.5)) lines(c(my.x,my.x),c(1.9,2)) } scrn <- scrn.last_chr + 1 # set back to before scree split par(srt=0) # rotate text back to horizontal } # write the footer row with chromsome numbers scrn <- scrn +1 screen(scrn) par(mai=c(0.5,0.5,0.05,0.1),ps=10) plot(c(minAddr,maxAddr+textMargin),c(-1,1),xlab="i",ylab="",axes=F,type="n",xaxs="i", xlim=c(minAddr,maxAddr+textMargin), bg="transparent") axis(1) # add the chromosome position at bottom if (length(chrList) == 1) { chr <- chrList[1] # draw chromosome bounderies first prior to changing chr23 to chrX lines(c(0,0),c(-0.2,1),lwd=1,col="red") lines(c(chromSize$size[chr],chromSize$size[chr]),c(-0.2,1),lwd=1,col="red") # write single chromosome number cntr <- chromSize$size[chr] / 2 if (chr==23) chr <- 'X' text(cntr,0.4,chr,col="red") } else { for (chr in c(1:23)) { cntr <- chromSize$offset[chr] + (chromSize$size[chr] / 2) if (chr==23) chr <- 'X' text(cntr,0.4,chr,col="red") } # plot lines to partition chromosomes for (j in 1:24) { lines(c(chromSize$offset[j]+1,chromSize$offset[j]+1),c(-0.2,1),lwd=1,col="red") } } close.screen(all=TRUE) dev.off() CN_mean <- c() # to be built as a data frame containning per chrom arm's mean CN CN_sd <- c() # or SD of CN, one column per patient case_count <- 0 case_colors <- c("red","orange","yellow","green","blue","violetred","purple", "orchid","khaki","skyblue","plum","salmon","lightcyan","brown", "lightpink","black","royalblue","darkseagreen","gold", "maroon","sienna","tomato","thistle","palegreen","steelblue", "azure","burlywood","springgreen","deeppink","indianred") #### Calculate mean and sd of Copy Number per chromosome arm for all patients for (case in patient.cols) { # form a vector per case, to be later combined into a data frame case_count <- case_count + 1 # calculate Copy Number related info case_CN_mean <- c() case_CN_sd <- c() for (chr in chrList) { # chrX is '23' in chrList if (chr == 23) { Chr <- 'X' } else { Chr <- chr } p_arm <- MT[,case][MT$Chromosome == Chr & MT$Physical.Position < centromere$chromStart[chr]] q_arm <- MT[,case][MT$Chromosome == Chr & MT$Physical.Position > centromere$chromStart[chr]] if (length(p_arm) > 0) { case_CN_mean <- c(case_CN_mean,mean(p_arm),mean(q_arm)) case_CN_sd <- c(case_CN_sd, sd(p_arm), sd(q_arm)) } else { case_CN_mean <- c(case_CN_mean,NA,mean(q_arm)) case_CN_sd <- c(case_CN_sd,NA, sd(q_arm)) } } if (case_count == 1) { CN_mean <- data.frame(case_CN_mean) CN_sd <- data.frame(case_CN_sd) } else { CN_mean <- data.frame(CN_mean,case_CN_mean) CN_sd <- data.frame(CN_sd, case_CN_sd) } } colnames(CN_mean) <- pat.IDs colnames(CN_sd) <- pat.IDs row_names <- c() for (chr in chrList) { if (chr == 23) { row_names <- c(row_names,'Xp','Xq') } else { row_names <- c(row_names,paste(chr,'p',sep=""),paste(chr,'q',sep="")) } } rownames(CN_mean) <- row_names rownames(CN_sd) <- row_names #### plot Mean of Copy Number per chromosome arm for all patient on one page postscript(file = "your-input-file.mean.ps", bg="white") # set outer margins and plot a blank page par(omi=c(0.5,0.5,0.5,0.5)) # set outer margins CN_top = as.integer(max(as.vector(CN_mean),na.rm=TRUE))+1 plot(c(0,length(row_names)+1),c(0,CN_top),type="n", main="Copy Number Means of your-file-name", xlab="Chromosome",ylab="Mean Copy Number",axes=F,xaxs="i") box() axis(2) for (i in 0:CN_top) { lines(c(0,length(row_names)+1),c(i,i),col="gray") # horizontal grid } for (j in 0:(length(row_names)/2)) { lines(c(j*2+0.5,j*2+0.5),c(0,CN_top),col="gray") # vertical grid } for (i in 1:length(patient.cols)) { case.col <- patient.cols[i] points(CN_mean[,i],col=case_colors[i],pch=3) lines(CN_mean[,i],type="l",lwd=1,col=case_colors[i]) } for (i in 1:length(rownames(CN_mean))) { text(i,-0.1,labels=rownames(CN_mean)[i],cex=0.7) } legend(1,1.7,legend=pat.IDs,lty=1,lwd=2,col=case_colors,bg="white",cex=0.7) dev.off() write.table(CN_mean,sep=',',file="your-input-file.mean.csv") #### plot SD of Copy Number per chromosome arm for all patient on one page postscript(file = "your-input-file.sd.ps", bg="white") # set outer margins and plot a blank page par(omi=c(0.5,0.5,0.5,0.5)) # set outer margins CN_top = as.integer(max(as.vector(CN_sd),na.rm=TRUE))+1 plot(c(0,length(row_names)+1),c(0,CN_top),type="n", main="Copy Number Standard Deviations of your-file-name", xlab="Chromosome",ylab="Standard Deviation",axes=F,xaxs="i") box() axis(2) for (i in 0:CN_top) { lines(c(0,length(row_names)+1),c(i,i),col="gray") # horizontal grid } for (j in 0:(length(row_names)/2)) { lines(c(j*2+0.5,j*2+0.5),c(0,CN_top),col="gray") # vertical grid } for (i in 1:length(patient.cols)) { case.col <- patient.cols[i] points(CN_sd[,i],col=case_colors[i],pch=3) lines(CN_sd[,i],type="l",lwd=1,col=case_colors[i]) } if (CN_top > 2) { for (i in 1:length(rownames(CN_sd))) { text(i,-0.1,labels=rownames(CN_sd)[i],cex=0.7) } legend(1,CN_top-0.1,legend=pat.IDs,lty=1,lwd=2,col=case_colors,bg="white",cex=0.7) } else { for (i in 1:length(rownames(CN_sd))) { text(i,-0.005,labels=rownames(CN_sd)[i],cex=0.7) } legend(1,0.3*CN_top,legend=pat.IDs,lty=1,lwd=2,col=case_colors,bg="white",cex=0.7) } dev.off() write.table(CN_sd,sep=',',file="your-input-file.sd.csv")