MAAPster_report.Rmd

---
title: "Example external report from the Microarray pipeline"
runtime: shiny
output: 
html_document:
keep_md: yes
---

```{r setup, include=FALSE}
knit_hooks$set(rgl = function(before, options, envir) {
  if (!before) {
    ## after a chunk has been evaluated
    if (rgl.cur() == 0) return()  # no active device
    name = paste(options$fig.path, options$label, sep = '')
    rgl.snapshot(paste(name, '.png', sep = ''), fmt = 'png')
    return(paste('\\includegraphics{', name, '}\n', sep = ''))
  }
})

knit_hooks$set(webgl = hook_webgl)

```

```{r global_options, include=FALSE}
# Set global chunk options, based on whether the report is internal or external
external<-"TRUE"

if (external=="TRUE"){
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='Figs/',
                      echo=FALSE, warning=FALSE,message=FALSE, results="hide")
} else {
knitr::opts_chunk$set(fig.width=12, fig.height=8, fig.path='Figs/',
                      echo=TRUE, warning=TRUE, message=TRUE)
}
```


####**A) Generate QC plots on raw data before normalization**
<center> <h4> MA Plots before normalization </h4> </center>
```{r, fig.height=4,fig.width=8}  
  par(mfrow = c(1,3))
  #MAplot(raw(),pairs=TRUE,plotFun=smoothScatter,main="MVA plot before normalization", labels=raw()$SampleID)


  ## raw data plots
  for (i in 1:nbfacs)
  {
    #  MAplot
    # MA plots are then used to visualize intensity-dependent ratio for each group
    #igp=which(pData(raw())$SampleGroup==labfacs[i])
    #png(paste(project,"-MAplot-",labfacs[i],"-rawdata.png",sep=""),width=1000,height=1000)
    MAplot(celfiles,which=i,plotFun=smoothScatter,refSamples=c(1:nbfacs),cex=1.5, cex.main=1, main='')
    #MAplot(raw()[,igp],pairs=TRUE,plotFun=smoothScatter,main="MVA plot",labels=raw()[,igp]$SampleID)
   # dev.off()
    cat("group : ",i, "plot done\n")
  }
```

```{r}
  #png(paste(project,"-histog-rawdata.png",sep=""))
  hist(celfiles,which="all", main ="Intensity distribution before normalization")
# Add the sample names to the legend
  #legend("topright",phenotable$SampleName)
```  

```{r}  
  # boxplot(celfiles[,1:2], col=c(3,2)) # default both
  #png(paste(project,"-boxplot-rawdata.png",sep=""),width=1000,height=700)
  par(mar=c(8, 4, 4, 8) + 0.1)
  boxplot(celfiles, col=colors,which="all", main="Boxplots before normalization",las=2, cex.axis=0.7,names=pData(celfiles)$SampleID)
```  

```{r, message=FALSE, warning=FALSE}  
  # RLE plots are constructed using log-scale estimates for the expression of each probe set on each array. For each probe set and each array, ratios are calculated between the expression of a probe set and the median expression of this probe set across all arrays of the experiment. For each array, these relative expression values are displayed as a box plot. Since it is assumed that in most experiments only relatively few genes are differentially expressed, the boxes should be similar in range and be centered close to 0. 
  # celfiles.qc <- fitProbeLevelModel(celfiles)
  
  #png(paste(project,"-rle-plot.png",sep=""),width=1000,height=700)
  par(mar=c(8, 4, 4, 8) + 0.1,las=2, cex.axis=0.8)
  if (celfiles@annotation!='pd.mogene.1.0.st.v1' && celfiles@annotation!='pd.mogene.1.1.st.v1') {
    RLE(celfiles.qc, main="RLE", names = celfiles$SampleID, col=colors)
  }
```

```{r}
 # (NUSE) represents normalized standard error (SE) estimates from the PLM fit. The SE estimates are normalized such that for each probe set, the median standard error across all arrays is equal to 1. A box plot of NUSE values is drawn for each array. On the NUSE plot, arrays with lower quality will have boxes that are centered higher and/or have a larger spread than the other good quality arrays from the same experiment. Typically, boxes centered above 1.1 represent arrays that have quality problems which are often detected in several of the other QC measures
  #png(paste(project,"-nuse-plot.png",sep=""),width=1000,height=700)
  par(mar=c(8, 4, 4, 8) + 0.1,las=2,cex.axis=0.8)
    if (celfiles@annotation!='pd.mogene.1.0.st.v1' && celfiles@annotation!='pd.mogene.1.1.st.v1') {
      NUSE(celfiles.qc, main="NUSE", names = celfiles$SampleID, col=colors)
    }
```

```{r}
  #cat("Probeset presence Plot ...\n ")
  
  # present Absent Calls
  # PAp = paCalls(raw()[, 1:2])
  # if (input$Platform=="h133p2") {
  
  #The code below excludes clariom chips from probeset calls 
  
   if (celfiles@annotation=="pd.clariom.s.human.ht" | celfiles@annotation=="pd.clariom.s.mouse.ht" | celfiles@annotation=="pd.clariom.s.mouse" | celfiles@annotation=="pd.clariom.s.human" | celfiles@annotation=='pd.hg.u219' | celfiles@annotation=='pd.hta.2.0' |
celfiles@annotation=='pd.clariom.s.rat') {
  }
  
  if (celfiles@annotation=="pd.hg.u133.plus.2" | celfiles@annotation=='pd.mouse430.2' | celfiles@annotation=='pd.hg.u133a' | celfiles@annotation=='pd.hg.u133a.2' | celfiles@annotation=='pd.mg.u74av2' | celfiles@annotation=='pd.mouse430a.2' | celfiles@annotation=='pd.moe430a' | celfiles@annotation=='pd.hg.u95av2' | celfiles@annotation=='pd.hg.u133b') {  
      PApset = paCalls(celfiles,"MAS5") 
      PApsetNC <- dim(PApset[[2]])[2]
      PApsetNR <- dim(PApset[[2]])[1]
      pres=c()
      for (i in 1:PApsetNC) pres=c(pres,length(which(PApset[[2]][,i]<0.05)))
      # names(pres)=colnames(PApset[[2]])
          #head(PApset)
      names(pres)=pData(celfiles)$SampleID
    
      pres=round((pres/PApsetNR)*100,2)
    
      # hist(pres)
      # plot(density(pres))

      #png(paste(project,"-probesetcalls.png",sep=""),width=1000,height=1000)
      par(mar=c(8, 4, 4, 8) + 0.1)
      plot(pres, type="l",ylab="Presence percentage", main ="Probesets presence",xaxt="n", xlab="")
      axis(1,at=c(1:PApsetNC),labels=names(pres),las=2,cex.lab=0.4)
    
      } else {
        if (celfiles@annotation=="pd.mogene.2.0.st" | celfiles@annotation=="pd.hugene.2.0.st" | celfiles@annotation=='pd.hugene.1.0.st.v1' | celfiles@annotation=='pd.mogene.1.0.st.v1' | celfiles@annotation=='pd.huex.1.0.st.v2' | celfiles@annotation=='pd.moex.1.0.st.v1' | celfiles@annotation=='pd.hugene.1.1.st.v1' | celfiles@annotation=='pd.mogene.1.1.st.v1') {
          PApset = paCalls(celfiles,"PSDABG")
          PApsetNC <- dim(PApset)[2]
          PApsetNR <- dim(PApset)[1]

          pres=c()
          for (i in 1:PApsetNC) pres=c(pres,length(which(PApset[,i]<0.05)))
          # names(pres)=colnames(PApset)

          #head(PApset)

          names(pres)=pData(celfiles)$SampleID

          pres=round((pres/PApsetNR)*100,2)

          # hist(pres)
          # plot(density(pres))

          #png(paste(project,"-probesetcalls.png",sep=""),width=1000,height=1000)
          par(mar=c(8, 4, 4, 8) + 0.1)
          plot(pres, type="l",ylab="Presence percentage", main ="Probesets presence",xaxt="n", xlab="")
          axis(1,at=c(1:PApsetNC),labels=names(pres),las=2,cex.lab=0.4)
        }
      }
  
  #if (celfiles@annotation=="pd.hg.u133.plus.2") {  
   #PApset = paCalls(raw(),"MAS5")
   #PApsetNC <- dim(PApset[[2]])[2]
   #PApsetNR <- dim(PApset[[2]])[1]
   #pres=c()
   #for (i in 1:PApsetNC) pres=c(pres,length(which(PApset[[2]][,i]<0.05)))
    #names(pres)=colnames(PApset[[2]])
  #} else {
  #PApset = paCalls(raw(),"PSDABG")
  #PApsetNC <- dim(PApset)[2]
  #PApsetNR <- dim(PApset)[1]
    
  #pres=c()
  #for (i in 1:PApsetNC) pres=c(pres,length(which(PApset[,i]<0.05)))
  # names(pres)=colnames(PApset)
  #}
  #head(PApset)
  
   #names(pres)=pData(raw())$SampleID
  
  #pres=round((pres/PApsetNR)*100,2)
  
  #hist(pres)
  #plot(density(pres))
  
  #png(paste(project,"-probesetcalls.png",sep=""),width=1000,height=1000)
  #par(mar=c(8, 4, 4, 8) + 0.1)
  #plot(pres, type="l",ylab="Presence percentage", main ="Probesets presence",xaxt="n", xlab="")
  #axis(1,at=c(1:PApsetNC),labels=names(pres),las=2,cex.lab=0.4)
```


####**B) QC plots after normalization**
```{r}
  ## some plots
  #png(paste(project,"-boxplot-rma.png",sep=""),width=1000,height=1000)
  par(mar=c(8, 4, 4, 8) + 0.1)

  boxplot(celfiles.rma,col=colors, main="Boxplots after RMA normalization",las=2, cex.axis=0.8,names=pData(celfiles.rma)$SampleID)
```
<center> <h4> MA Plots after normalization </h4> </center>
```{r, fig.height=4,fig.width=8}  
  #MA plots after normalization

  par(mfrow = c(1,3))
#  MAplot(norm(),pairs=TRUE,plotFun=smoothScatter,main="MVA plot after RMA Normalization", labels=norm()$SampleID) 
  #par(mar=c(4, 2, 2, 4))
  for (i in 1:nbfacs)
  {
    #  MAplot
    #igp=which(pData(norm())$SampleGroup==labfacs[i])
    #png(paste(project,"-MAplot-",labfacs[i],"-rma.png",sep=""),width=1000,height=1000)
    #MAplot(norm()[,igp],pairs=TRUE,plotFun=smoothScatter,main="MVA plot",labels=norm()[,igp]$SampleID) # 
    
    MAplot(celfiles.rma,which=i,plotFun=smoothScatter,refSamples=c(1:nbfacs), cex=1.5, cex.main=1, main='')
  }

``` 

```{r, messsage=FALSE, warning=FALSE}  
  #trad.scatter.plot(exprs(celfiles.rma)[,1],exprs(celfiles.rma)[,2],fc.line.col="lightblue",col="blue")
 #arrayQualityMetrics(expressionset = celfiles.rma, intgroup=c("SampleGroup"), outdir = "QC_normalized", force =TRUE)
```

```{r}
  #png(paste(project,"-Histo-rma.png",sep=""),width=1000,height=1000)
  hist(celfiles.rma, main="Intensity distribution after Normalization")
  # Add the sample names to the legend
  #legend("topright",c("sdf","dfsd"))
```


```{r, results='asis'}


  #3D PCA

tedf= t(exprs(celfiles.rma))

 #removes zero  variances (issue with small sample sizes)
 if (length(which(apply(tedf, 2, var)==0)) >= 0){
   tedf = tedf[ , apply(tedf, 2, var) != 0]
 }

pca=prcomp(tedf, scale. = T)
rgl.open(useNULL=T)
bg3d('white')
plot3d(pca$x[,1:3],col=colors, type='s',size=2)
group.v=as.vector(pData(celfiles.rma)$SampleID)
text3d(pca$x, pca$y, pca$z, group.v, cex=0.6, adj=2)
par3d(mouseMode = "trackball")     #not sure if this is needed
rglwidget()


#---------


```


```{r}

#2D PCA

#tedf= t(exprs(norm()))
       
 #removes zero  variances (issue with small sample sizes)
# if (length(which(apply(tedf, 2, var)==0)) >= 0){
#   tedf = tedf[ , apply(tedf, 2, var) != 0]
# }
 
# rownames(tedf)=pData(norm())$SampleID
 # tedf1 = data.frame(tedf)
# pr1=prcomp(tedf,scale.=T)
# ff <- factor(pData(norm())$SampleGroup)
# dd=cbind(tedf,group=as.character(ff))
 
# pc1.var=100*round(((pr1$sdev)**2)[1]/sum((pr1$sdev)**2),digits=2) # %var pc1 
# pc2.var=100*round(((pr1$sdev)**2)[2]/sum((pr1$sdev)**2),digits=2) # % var pc2
 
# xLab=paste("PC1 - ",pc1.var," % of variation",sep="")
# yLab=paste("PC2 - ",pc2.var," % of variation",sep="")
 
# autoplot(pr1,data=dd, colour = 'group', label = T, xlab=xLab, ylab=yLab)  

 
#  pr1=prcomp(tedf,scale.=T)
#  summary(pr1)
#  pr1$x # scores for the samples
#  (pr1$sdev)**2 ## variances for all , equiv to eigen values
#  head(pr1$rotation[,1:2]) # loading for pc1 and pc2 (pc vs genes)
  # pc are the eigen vectors
  
#  igp1=which(pData(norm())$SampleGroup==labfacs[1])
#  xrange=range(pr1$x[,1])
#  yrange=range(pr1$x[,2])
  
#  pc1.var=100*round(((pr1$sdev)**2)[1]/sum((pr1$sdev)**2),digits=2) # %var pc1 
#  pc2.var=100*round(((pr1$sdev)**2)[2]/sum((pr1$sdev)**2),digits=2) # % var pc2
  
#  xlab=paste("PC1 - ",pc1.var," % of variation",sep="")
#  ylab=paste("PC2 - ",pc2.var," % of variation",sep="")
  
  #png(paste(project,"-pca-rma.png",sep=""),width=1000,height=1000)
  
#  plot(pr1$x[igp1,1],pr1$x[igp1,2],col=1,main="prcomp after scaling",ylab=ylab,xlab=xlab,xlim=xrange,ylim=yrange)
#  for (i in 2:nbfacs)
#  {
#    igp=which(pData(norm())$SampleGroup==labfacs[i])
#    points(pr1$x[igp,1],pr1$x[igp,2],col=i)
    
#  }
#  legend('bottom', labfacs,lty=1, col=1:nbfacs, bty='n', cex=.6)
```

```{r}
  # MDS plot after normalization,group labels
  par(mar=c(10,7,1,1))
 # png(paste(project,"-MDS_afterNorm.png",sep=""),width=1000,height=1000)
  plotMDS(exprs(celfiles.rma),main="MDS plot,after normalization",cex=0.7,bty="n",col=colors,labels=celfiles.rma$SampleID)
```


```{r}
  #hclust
  
  eset <- exprs(celfiles.rma) # now a matrix
  colnames(eset)=paste(pData(celfiles.rma)$SampleID,"_",pData(celfiles.rma)$SampleGroup,sep="")
  #colnames(eset)=pData(celfiles.rma)$barcode.s.
  
  # standardize before clustering
  eset.s=scale(eset)
  distance.s <- dist(t(eset.s))
  clusters.s <- hclust(distance.s)
  d2=as.dist(1-cor(eset.s))
  
  #png(paste(project,"-HC-EuclPears-Scale-rma.png",sep=""),width=1000,height=1000)
  par(mfrow=c(1,2))
  plot(clusters.s,main="Euclidean with scale")
  # rect.hclust(clusters.s, k=2, border="red")
  plot(hclust(d2), main="Pearson Correlation with scale")
  # rect.hclust(hclust(d2), k=2, border="red")
```

```{r, echo=FALSE, results='asis'}
  ## heatmap
  #
  distylog2=dist(t(eset))
  mat = as.matrix(distylog2)
  rownames(mat)=pData(celfiles.rma)$SampleID
  colnames(mat)=rownames(mat)
  #heatmap.2(mat, trace="none", margin=c(10,10))
  heatmaply(mat,margins = c(60,100,40,20),colorRampPalette(colors = c("red", "yellow")))
  # 
  #dev.copy(png,paste(project,"-heatmap_samplebysample-noscale.png",sep=""),width=1000,height=1000)
```

```{r}
  nb=length(mylist)
  for (i in 1:nb)
  {
    
    all.genes.con = mylist[[i]]
    
    ## generate plotMA from geneplotter
    
    dataf=data.frame("m"=all.genes.con$AveExpr,"fc"=all.genes.con$logFC,"sig"=all.genes.con$P.Value<0.05)
    #png(paste(project,"-MAplot-",cons[i],".png",sep=""),width=1000,height=700)
    geneplotter::plotMA(dataf,main=names(mylist)[i],ylim=c(-2,2))
    legend("topright",legend=c("P-Value < 0.05","Not significant"),col=c('red3','gray32'),pch=16)
    # name=paste("all.genes.con",i,sep="")
  }
 
```


```{r, echo=FALSE, results='asis'}
#Volcano plot
library(plotly)
library(htmltools)
  
  k <- htmltools::tagList()
  #k = data.frame(matrix(ncol = ncol(deg()), nrow = 100))
  lngth = length(mylist)
  for (i in 1:lngth) {
    #myi = i
    dat = mylist[[i]]
    dat = dat[dat$SYMBOL!='NA',]
    log_FC=dat$logFC
    log_pval=-log10(dat$P.Value)
    Significant=rep("NotSignificant",length(log_FC))
    Significant[which(dat$P.Value<0.05 & abs(dat$logFC)>=1)]="AbsFoldChange>2 & PValue<0.05"
    Significant[which(dat$P.Value<0.05 & abs(dat$logFC)<1)]="PValue<0.05"
    Significant[which(dat$P.Value>=0.05 & abs(dat$logFC)>=1)]="AbsFoldChange>2"
    gene=dat$SYMBOL
    volcano_data=as.data.frame(cbind(gene,log_FC,log_pval,Significant))
    k[[i]] = plot_ly(type='scatter',data = volcano_data, x = log_FC, y = log_pval, text = gene, mode = "markers", color = Significant) %>% layout(title =paste0('Volcano plot for: ',names(mylist)[[i]]), xaxis=list(title="Fold Change",range =c(-5,5),tickvals=c(-5,-4,-3,-2,-1,0,1,2,3,4,5),ticktext=c('-32','-16','-8','-4','-2','1','2','4','8','16','32')), yaxis=list(title="-Log10 pvalue",range =c(0,15)))
  }
  k
```