Compare vs PNAS

Counts-based analysis

This section has the code for running edgeR-robust and DESeq2 on the simulation data set using the known exons as features.

This first code chunk loads the necessary data.

## Track time spent on making the report
startTime <- Sys.time()

library('edgeR')

## Loading required package: limma
## Loading required package: methods

library('DESeq2')

## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:limma':
## 
##     plotMA
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist, unsplit
## 
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: Rcpp
## Loading required package: RcppArmadillo

library('GenomicRanges')

## Load data
load("../coverageToExon/covToEx-ucsc.Rdata")
load("../derAnalysis/run3-v1.0.10/groupInfo.Rdata")
load("../derAnalysis/run3-v1.0.10/colsubset.Rdata")

## GenomicState object
if(file.exists('/home/epi/ajaffe/Lieber/Projects/RNAseq/derannotator/rdas/GenomicState.Hsapiens.UCSC.hg19.knownGene.rda')) {
    load('/home/epi/ajaffe/Lieber/Projects/RNAseq/derannotator/rdas/GenomicState.Hsapiens.UCSC.hg19.knownGene.rda')
} else if(file.exists('../../GenomicState.Hsapiens.UCSC.hg19.knownGene.rda')) {
    load('../../GenomicState.Hsapiens.UCSC.hg19.knownGene.rda')
} else {
    stop('Missing UCSC hg19 genomic state object')
}

## Annotation used
exons <- GenomicState.Hsapiens.UCSC.hg19.knownGene$fullGenome
exons <- exons[exons$theRegion == 'exon']

## Round matrix and remove exons with 0s
counts <- round(covToEx[, colsubset])
nonzero <- sapply(rowSums(counts), function(x) {x > 0})

DESeq2

The following code performs the DESeq2 analysis. Code is based on edgeR_Robust supplementary code. The main change is that it has been modified for the multi-group scenario.

## Round matrix and specify design
dse <- DESeqDataSetFromMatrix(counts[nonzero, ], data.frame(group = groupInfo), ~ group)

## converting counts to integer mode

## Perform DE analysis
system.time( dse <- DESeq(dse, test = 'LRT', reduced = ~ 1) )

## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 194 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

##    user  system elapsed 
## 680.447   2.572 683.900

## Extract results
deseq <- exons[nonzero]
mcols(deseq) <- cbind(mcols(deseq), results(dse))

## Which are significant?
deseq$sig <- deseq$padj < 0.05
deseq$sig[is.na(deseq$sig)] <- FALSE

## Save results
save(deseq, file = 'deseq.Rdata')

## Adjust by Holm
deseq_holm <- deseq
deseq_holm$sig <- p.adjust(deseq_holm$pvalue, 'holm') < 0.05

edgeR-robust

The following code performs the DESeq2 analysis. Code is based on edgeR_Robust supplementary code. The main change is that it has been modified for the multi-group scenario.

## Determine design matrix
design <- model.matrix(~ groupInfo)

## Perform DE analysis
d <- DGEList(counts = counts[nonzero, ], group = groupInfo)
d <- calcNormFactors(d)
system.time(dw <- estimateGLMRobustDisp(d, design = design, prior.df = 10, maxit = 6))

##     user   system  elapsed 
## 1555.359    9.853 1567.200

fw <- glmFit(dw, design = design, coef = 2:3)
lrw <- glmLRT(fw, coef = 2:3)

## Extract results
edger <- exons[nonzero]
mcols(edger) <- cbind(mcols(edger), DataFrame(lrw$table))
edger$pvalue <-  lrw$table$PValue
edger$padj <- p.adjust(lrw$table$PValue, 'BH')

## Which are significant?
edger$sig <- edger$padj < 0.05
edger$sig[is.na(edger$sig)] <- FALSE

## Save results
save(edger, file = 'edger.Rdata')

## Adjust by Holm
edger_holm <- edger
edger_holm$sig <- p.adjust(edger_holm$pvalue, 'holm') < 0.05

Overlap

## Load data
load('../derAnalysis/run3-v1.0.10/fullRegions.Rdata')

## Some formatting and subsets
names(fullRegions) <- seq_len(length(fullRegions))
fullRegions$sigFWER <- as.logical(fullRegions$significantFWER)
fullRegs20 <- fullRegions[width(fullRegions) >= 20]

## Overlap table for all 4 cases
ov_table <- function(ders, counts, query = 'der', minov = 0) {
    if(query == 'der') {
        if(minov == 0) {
            res <- addmargins(table('Significant DER (FWER)' = ders$sigFWER, 'Overlaps sig DE exon' = countOverlaps(ders, counts[counts$sig]) > 0))
        } else {
            res <- addmargins(table(ders$sigFWER, countOverlaps(ders, counts[counts$sig], minoverlap = minov) > 0, dnn = c('Significant DER (FWER)', paste0('Overlaps sig DE exon (min ', minov, 'bp)'))))
        }
    } else if (query == 'counts') {
        if(minov == 0) {
            res <- addmargins(table('Significant DE exon' = counts$sig, 'Overlaps sig DER (FWER)' = countOverlaps(counts, ders[ders$sigFWER]) > 0))
        } else {
            res <- addmargins(table(counts$sig[width(counts) >= minov], countOverlaps(counts[width(counts) >= minov], ders[ders$sigFWER], minoverlap = minov) > 0, dnn = c('Significant DE exon', paste0('Overlaps sig DER (FWER, min ', minov, 'bp)'))))
        }
    }
    return(res)
}

## Explore mistmatched cases for DERs vs Exons direction
explore_ov <- function(ders, counts, case = "FALSE-TRUE", minov = 0L) {
    if(case == 'FALSE-TRUE') {
        i <- which(countOverlaps(ders, counts[counts$sig], minoverlap = minov) > 0 & !ders$sigFWER)
    } else if (case == 'TRUE-FALSE') {
        i <- which(!countOverlaps(ders, counts[counts$sig], minoverlap = minov) > 0 & ders$sigFWER)
    } else{
        stop('invalid case')
    }
    if(length(i) == 0) return("No such cases")
    
    if(case == 'FALSE-TRUE') {
        res <- list(
            n_overlaps = table(countOverlaps(ders[i], counts[counts$sig], minoverlap = minov)),
            width_der = summary(width(ders[i])),
            ders_per_exon_table = table(table(subjectHits(findOverlaps(ders[i], counts[counts$sig], minoverlap = minov)))),
            ders_per_exon = sort(table(subjectHits(findOverlaps(ders[i], counts[counts$sig], minoverlap = minov)))),
            i = i
        )
    } else {
        res <- list(
            width_der = summary(width(ders[i])),
            distance_nearest_sum = summary(mcols(distanceToNearest(ders[i], counts, ignore.strand = TRUE))$distance),
            distance_nearest_sig_sum = summary(mcols(distanceToNearest(ders[i], counts[counts$sig], ignore.strand = TRUE))$distance),
            distance_nearest = distanceToNearest(ders[i], counts, ignore.strand = TRUE),
            distance_nearest_sig = distanceToNearest(ders[i], counts[counts$sig], ignore.strand = TRUE),
            i = i
        )
    }
    
    return(res)
}

## Explore mistmatched cases for Exons vs DERs direction
explore_ov_counts <- function(ders, counts, case = "FALSE-TRUE", minov = 0L) {
    counts <- counts[width(counts) >= minov]
    if(case == 'FALSE-TRUE') {
        i <- which(countOverlaps(counts, ders[ders$sigFWER], minoverlap = minov) > 0 & !counts$sig)
    } else if (case == 'TRUE-FALSE') {
        i <- which(!countOverlaps(counts, ders[ders$sigFWER], minoverlap = minov) > 0 & counts$sig)
    } else{
        stop('invalid case')
    }
    if(length(i) == 0) return("No such cases")
    
    if(case == 'FALSE-TRUE') {
        res <- list(
            n_overlaps = table(countOverlaps(counts[i], ders[ders$sigFWER], minoverlap = minov)),
            width_exon = summary(width(counts[i])),
            exons_per_der_table = table(table(subjectHits(findOverlaps(counts[i], ders[ders$sigFWER], minoverlap = minov)))),
            exons_per_der = sort(table(subjectHits(findOverlaps(counts[i], ders[ders$sigFWER], minoverlap = minov)))),
            i = i
        )
    } else {
        res <- list(
            width_exon = summary(width(counts[i])),
            distance_nearest_sum = summary(mcols(distanceToNearest(counts[i], ders, ignore.strand = TRUE))$distance),
             distance_nearest_sig_sum = summary(mcols(distanceToNearest(counts[i], ders[ders$sigFWER], ignore.strand = TRUE))$distance),
            distance_nearest = distanceToNearest(counts[i], ders, ignore.strand = TRUE),
            distance_nearest_sig = distanceToNearest(counts[i], ders[ders$sigFWER], ignore.strand = TRUE),
            i = i
        )
    }
    
    return(res)
}

noNA <- function(x) {
    x[!is.na(x)]
}

DESeq2

Query: DERs

We can first compare the results by using the DERs as the query and the exons as the subject. The following output shows the comparison using all DERs and exploring the mismatched cases. Then its repeated using the DERs ≥ 20 bp and a minimum overlap of 20bp.

For the mismatched cases of non-significant DERs overlapping a significant exon, we check:

how many exons each DER overlaps,
the width of the DERs
the frequency table of how many DERs overlap the same exon

For the other mismatched case, we check:

the width of the DERs
distance to nearest exon (regardless of exon size)
distance to nearest significant DE exon (ibidem)

## Overlap between DERs and significant DE exons
ov_table(fullRegions, deseq)

##                       Overlaps sig DE exon
## Significant DER (FWER) FALSE  TRUE   Sum
##                  FALSE 16671 14792 31463
##                  TRUE    255   260   515
##                  Sum   16926 15052 31978

## Explore mismatched cases
#noNA(explore_ov(fullRegions, deseq)[1:3])
#noNA(explore_ov(fullRegions, deseq, 'TRUE-FALSE')[1:3])

## Min 20 bp overlap, using only DERs 20 bp long
ov_table(fullRegs20, deseq, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  1127 1151 2278
##                  TRUE    255  260  515
##                  Sum    1382 1411 2793

## Explore mismatched cases, min 20bp overlap
noNA(explore_ov(fullRegs20, deseq, minov = 20L)[1:3])

## $n_overlaps
## 
##    1    2 
## 1131   20 
## 
## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   23.00   29.00   30.22   36.00   62.00 
## 
## $ders_per_exon_table
## 
##   1   2   3   4   5   6 
## 775 109  27  12   5   4

noNA(explore_ov(fullRegs20, deseq, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.00   49.00   60.00   67.99   77.00  182.00 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0     290   11010   16860  302400 
## 
## $distance_nearest_sig_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      54    7859   26350   60970   44550 2106000

## Holm vs BH
addmargins(table('DESeq2 Holm' = deseq_holm$sig, 'DESeq2 BH' = deseq$sig))

##            DESeq2 BH
## DESeq2 Holm  FALSE   TRUE    Sum
##       FALSE 199844  14185 214029
##       TRUE       4    387    391
##       Sum   199848  14572 214420

## Use Holm and min 20 bp ov
ov_table(fullRegs20, deseq_holm, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  2143  135 2278
##                  TRUE    433   82  515
##                  Sum    2576  217 2793

Most of the DERs are shorter than 20bp (91.27 percent), so we'll focus on the longer ones. The majority of the mismatches are from non significant DERs that overlap a significant exon.

As expected, when controlling the FWER instead of the FDR, most of the DE exons are no longer significant. Using FWER-controlled DE exons, most of the DERs 20bp or longer agree with the exons as not being significantly DE.

Query: exons

We can now repeat the comparison using the exons as the query and the DERs as the subject.

For the mismatched cases of non-significant exons overlapping a significant DER, we check:

how many DERs each exon overlaps,
the width of the exons
the frequency table of how many exons overlap the same DER

For the other mismatched case, we check:

the width of the exons
distance to nearest DER (regardless of DER size)
distance to nearest significant DER (ibidem)

## Overlap between exons and significant DERs
#ov_table(fullRegions, deseq, 'counts')

## Explore mismatched cases
#noNA(explore_ov_counts(fullRegions, deseq)[1:3])
#noNA(explore_ov_counts(fullRegions, deseq, 'TRUE-FALSE')[1:3])

## Overlap between exons and significant DERs, min 20 bp
ov_table(fullRegions, deseq, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 199497     98 199595
##               TRUE   14338    232  14570
##               Sum   213835    330 214165

## Explore mismatched cases
noNA(explore_ov_counts(fullRegions, deseq, minov = 20L)[1:3])

## $n_overlaps
## 
##  1  2  3  9 
## 87  8  2  1 
## 
## $width_exon
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    51.0   154.8   655.0  1328.0  1899.0  9761.0 
## 
## $exons_per_der_table
## 
##   1 
## 118

noNA(explore_ov_counts(fullRegions, deseq, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_exon
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     21.0    143.0    332.5   1160.0   1578.0 205000.0 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0    6440   86290   57270 5353000 
## 
## $distance_nearest_sig_sum
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##       54   526200  2054000  4818000  5809000 48690000      229

## Now with Holm
ov_table(fullRegions, deseq_holm, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 213513    261 213774
##               TRUE     322     69    391
##               Sum   213835    330 214165

From these results, we can see that derfinder is more conservative.

edgeR-robust

Query: DERs

Similar comparison using DERs as query and exons as subject with edgeR-robust results.

## Overlap between DERs and significant DE exons
#ov_table(fullRegions, edger)

## Explore mismatched cases
#noNA(explore_ov(fullRegions, edger)[1:3])
#noNA(explore_ov(fullRegions, edger, 'TRUE-FALSE')[1:3])

## Min 20 bp overlap, using only DERs 20 bp long
ov_table(fullRegs20, edger, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  1256 1022 2278
##                  TRUE    263  252  515
##                  Sum    1519 1274 2793

## Explore mismatched cases, min 20bp overlap
noNA(explore_ov(fullRegs20, edger, minov = 20L)[1:3])

## $n_overlaps
## 
##    1    2 
## 1006   16 
## 
## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   24.00   30.00   30.46   36.00   58.00 
## 
## $ders_per_exon_table
## 
##   1   2   3   4   5   6  19 
## 677  95  24   9   4   4   1

noNA(explore_ov(fullRegs20, edger, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    32.0    49.0    59.0    66.0    75.5   174.0 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0     172   10680   16300  302400 
## 
## $distance_nearest_sig_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      54    4856   21090   50470   42110 2106000

## Holm vs BH
addmargins(table('edgeR Holm' = edger_holm$sig, 'edger BH' = edger$sig))

##           edger BH
## edgeR Holm  FALSE   TRUE    Sum
##      FALSE 197071  16827 213898
##      TRUE       0    522    522
##      Sum   197071  17349 214420

## With Holm, 20bp
ov_table(fullRegs20, edger_holm, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  2155  123 2278
##                  TRUE    450   65  515
##                  Sum    2605  188 2793

The results are fairly similar to those from using DESeq2.

Query: exons

Similar comparison using exons as query and DERs as subject with edgeR-robust results.

## Overlap between exons and significant DERs
#ov_table(fullRegions, edger, 'counts')

## Explore mismatched cases
#noNA(explore_ov_counts(fullRegions, edger)[1:3])
#noNA(explore_ov_counts(fullRegions, edger, 'TRUE-FALSE')[1:3])

## Overlap between exons and significant DERs, min 20 bp
ov_table(fullRegions, edger, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 196707    114 196821
##               TRUE   17128    216  17344
##               Sum   213835    330 214165

## Explore mismatched cases
noNA(explore_ov_counts(fullRegions, edger, minov = 20L)[1:3])

## $n_overlaps
## 
##   1   2   3   4 
## 105   7   1   1 
## 
## $width_exon
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    66.0   181.2   954.0  1396.0  1908.0  9761.0 
## 
## $exons_per_der_table
## 
##   1   2 
## 124   1

noNA(explore_ov_counts(fullRegions, edger, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_exon
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     21.0    125.0    211.5    941.3   1066.0 205000.0 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0     148   14720  115400   89810 5353000 
## 
## $distance_nearest_sig_sum
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##       54   612100  2251000  5073000  6170000 48690000      288

## With Holm, 20 bp
ov_table(fullRegions, edger_holm, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 213371    272 213643
##               TRUE     464     58    522
##               Sum   213835    330 214165

overall

While the DERs vs exons results are fairly similar between edgeR-robust and DESeq2, as shown below the number of mismatched cases is high compared to the number of cases both counts-based methods agree. This is also true when controlling the FWER to determine significance.

## edgeR vs DESeq2
addmargins(table('edgeR-robust (FDR)' = edger$sig, 'DESeq2 (FDR)' = deseq$sig))

##                   DESeq2 (FDR)
## edgeR-robust (FDR)  FALSE   TRUE    Sum
##              FALSE 194436   2635 197071
##              TRUE    5412  11937  17349
##              Sum   199848  14572 214420

## Control FWER
addmargins(table('edgeR-robust (FWER)' = edger_holm$sig, 'DESeq2 (FWER)' = deseq_holm$sig))

##                    DESeq2 (FWER)
## edgeR-robust (FWER)  FALSE   TRUE    Sum
##               FALSE 213832     66 213898
##               TRUE     197    325    522
##               Sum   214029    391 214420

## Only sig if both edgeR and DEseq2 say it is
both <- deseq
both$sig <- both$sig & edger$sig

## Same, for holm
both_holm <- deseq_holm
both_holm$sig <- both_holm$sig & edger_holm$sig

We can consider an exon to be DE only if both edgeR-robust and DESeq2 find that its significantly DE. The next sections use this information.

Query: DERs

## Overlap between DERs and significant DE exons
#ov_table(fullRegions, both)

## Explore mismatched cases
#noNA(explore_ov(fullRegions, both)[1:3])
#noNA(explore_ov(fullRegions, both, 'TRUE-FALSE')[1:3])

## Min 20 bp overlap, using only DERs 20 bp long
ov_table(fullRegs20, both, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  1326  952 2278
##                  TRUE    286  229  515
##                  Sum    1612 1181 2793

## Explore mismatched cases, min 20bp overlap
noNA(explore_ov(fullRegs20, both, minov = 20L)[1:3])

## $n_overlaps
## 
##   1   2 
## 936  16 
## 
## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   24.00   30.00   30.49   36.00   58.00 
## 
## $ders_per_exon_table
## 
##   1   2   3   4   5   6 
## 647  89  21   9   4   4

noNA(explore_ov(fullRegs20, both, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_der
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   29.00   50.00   60.00   67.59   76.75  182.00 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    9817   15010  302400 
## 
## $distance_nearest_sig_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      54    6497   23720   61260   43820 2106000

## Holm vs BH
addmargins(table('Both Holm' = both_holm$sig, 'Both BH' = both$sig))

##          Both BH
## Both Holm  FALSE   TRUE    Sum
##     FALSE 202479  11616 214095
##     TRUE       4    321    325
##     Sum   202483  11937 214420

## Use Holm and min 20 bp ov
ov_table(fullRegs20, both_holm, minov = 20L)

##                       Overlaps sig DE exon (min 20bp)
## Significant DER (FWER) FALSE TRUE  Sum
##                  FALSE  2185   93 2278
##                  TRUE    456   59  515
##                  Sum    2641  152 2793

The trends observed previously are maintained in this comparison with a reduction of cases where the exon is DE. This is expected due to the non-perfect agreement between DESeq2 and edgeR-robust.

library('TxDb.Hsapiens.UCSC.hg19.knownGene')

## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'AnnotationDbi'
## 
## The following object is masked from 'package:GenomeInfoDb':
## 
##     species

library('derfinder')
library('derfinderHelper')
library('derfinderPlot')
load('../derAnalysis/run3-v1.0.10/models.Rdata')
load('../derAnalysis/run3-v1.0.10/chr22/optionsStats.Rdata')
load("../CoverageInfo/fullCov.Rdata")

def.par <- par()
def.par <- def.par[-which(names(def.par) %in% c('cin', 'cra', 'csi', 'cxy', 'din', 'page'))]

regPlot <- function(region, title) {
    ## Calculate F-stats
    range <- start(region):end(region)
    dat <- fullCov[[as.character(seqnames(region))]][range, colsubset]

    ## Log2 transform
    for(i in seq_len(length(groupInfo))) dat[[i]] <- log2(dat[[i]] + 32) 

    ## Calculate f-stats
    fstats <- as.numeric(fstats.apply(data = dat, mod = models$mod, mod0 = models$mod0))

    ## Find annotation
    annoReg <- annotateRegions(region, GenomicState.Hsapiens.UCSC.hg19.knownGene$fullGenome, verbose = FALSE)
    symbol <- mcols(annoReg$annotationList[[1]])$symbol
    symbol <- as.character(noNA(symbol)[[1]])
    if(length(symbol) > 1) symbol <- symbol[1]
    symbol <- ifelse(is.null(symbol), NA, symbol)
    ## Remove symbol name because it gets chomped on the plot
    mcols(annoReg$annotationList[[1]])$symbol <- NA
    
    par(def.par)

    ## Plot long exon
    plotRegionCoverage(region, getRegionCoverage(fullCov, region, verbose = FALSE), groupInfo, data.frame(name = title, distance = NA, region = symbol), annoReg, verbose = FALSE, ask = FALSE, txdb = TxDb.Hsapiens.UCSC.hg19.knownGene)

    ## Add F-stat track
    par(fig = c(0, 1, 0.065, 0.125), new = TRUE, xaxt = 'n', oma = c(0, 0, 0, 0), mar = c(0, 4.5, 0, 1.1))
    plot(y = fstats, x = range, ylab = 'F-stat', type = 'l', xlab = '', bty = 'n', ylim = c(0, max(fstats[is.finite(fstats)], optionsStats$cutoffFstatUsed) * 1.1), las = 2, yaxt = 'n')
    y.max <- round(max(c(optionsStats$cutoffFstatUsed, fstats[is.finite(fstats)]), na.rm = TRUE), 0)
    axis(2, at = c(0, round(y.max / 2, 0), y.max), c(0, round(y.max / 2, 0), y.max), las = 2, tick = TRUE)
    abline(h = optionsStats$cutoffFstatUsed, col = 'red')
    abline(h = 0, col = 'grey')
}

sortWidth <- function(regions) {
    regions[order(width(regions), decreasing = TRUE)]
}

We can now make plots to explore some DERs for each of the cases.

query_der_plots <- function() {
    sapply(sortWidth(fullRegs20[countOverlaps(fullRegs20, both[both$sig], minoverlap = 20L) > 0 & fullRegs20$sigFWER])[1:10], function(reg) {
        regPlot(reg, 'DER query: DE agreement')
    })

    sapply(sortWidth(fullRegs20[countOverlaps(fullRegs20, both[both$sig], minoverlap = 20L) == 0 & !fullRegs20$sigFWER])[1:10],  function(reg) {
        regPlot(reg, 'DER query: not DE agreement')
    })

    sapply(sortWidth(fullRegs20[countOverlaps(fullRegs20, both[both$sig], minoverlap = 20L) == 0 & fullRegs20$sigFWER])[1:10], function(reg) {
        regPlot(reg, 'DER query: only exon not DE')
    })

    sapply(sortWidth(fullRegs20[countOverlaps(fullRegs20, both[both$sig], minoverlap = 20L) > 0 & !fullRegs20$sigFWER])[1:10], function(reg) {
        regPlot(reg, 'DER query: only exon DE')
    })
}
pdf(file = 'query_der_plots.pdf', width = 10, height = 7)
query_der_plots()

## $`540`
## NULL
## 
## $`625`
## NULL
## 
## $`518`
## NULL
## 
## $`589`
## NULL
## 
## $`610`
## NULL
## 
## $`616`
## NULL
## 
## $`533`
## NULL
## 
## $`643`
## NULL
## 
## $`696`
## NULL
## 
## $`520`
## NULL

dev.off()

## pdf 
##   2

query_der_plots()

## $`540`
## NULL
## 
## $`625`
## NULL
## 
## $`518`
## NULL
## 
## $`589`
## NULL
## 
## $`610`
## NULL
## 
## $`616`
## NULL
## 
## $`533`
## NULL
## 
## $`643`
## NULL
## 
## $`696`
## NULL
## 
## $`520`
## NULL

Query: exons

As was shown with either DESeq2 or edgeR-robust results, derfinder is more conservative than the counts-based methods.

## Overlap between exons and significant DERs, min 20 bp
ov_table(fullRegions, both, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 202105    125 202230
##               TRUE   11730    205  11935
##               Sum   213835    330 214165

## Explore mismatched cases
noNA(explore_ov_counts(fullRegions, both, minov = 20L)[1:3])

## $n_overlaps
## 
##   1   2   3   4   9 
## 112   9   2   1   1 
## 
## $width_exon
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      51     180     890    1354    1905    9761 
## 
## $exons_per_der_table
## 
##   1   2 
## 147   1

noNA(explore_ov_counts(fullRegions, both, 'TRUE-FALSE', minov = 20L)[1:3])

## $width_exon
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      21     139     321    1179    1595  205000 
## 
## $distance_nearest_sum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0    6672   89960   60730 5353000 
## 
## $distance_nearest_sig_sum
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##       54   535600  2094000  4886000  5941000 48690000      199

## With Holm, 20 bp
ov_table(fullRegions, both_holm, 'counts', 20L)

##                    Overlaps sig DER (FWER, min 20bp)
## Significant DE exon  FALSE   TRUE    Sum
##               FALSE 213563    277 213840
##               TRUE     272     53    325
##               Sum   213835    330 214165

We can now visually explore some exons for each of the four cases.

query_exon_plots <- function() {
    sapply(sortWidth(both[width(both) >= 20 & both$sig & countOverlaps(both, fullRegions[fullRegions$sigFWER], minoverlap = 20L) > 0])[1:10], function(reg) {
        regPlot(reg, 'Exon query: DE agreement')
    })

    sapply(sortWidth(both[width(both) >= 20 & !both$sig & countOverlaps(both, fullRegions[fullRegions$sigFWER], minoverlap = 20L) == 0])[1:10], function(reg) {
        regPlot(reg, 'Exon query: not DE agreement')
    })

    sapply(sortWidth(both[width(both) >= 20 & !both$sig & countOverlaps(both, fullRegions[fullRegions$sigFWER], minoverlap = 20L) > 0])[1:10], function(reg) {
        regPlot(reg, 'Exon query: only exon not DE')
    })

    sapply(sortWidth(both[width(both) >= 20 & both$sig & countOverlaps(both, fullRegions[fullRegions$sigFWER], minoverlap = 20L) == 0])[1:10], function(reg) {
        regPlot(reg, 'Exon query: only exon DE')
    })
}
pdf(file = 'query_exon_plots.pdf', width = 10, height = 7)
query_exon_plots()

## $`290397`
## NULL
## 
## $`312032`
## NULL
## 
## $`473746`
## NULL
## 
## $`262083`
## NULL
## 
## $`69671`
## NULL
## 
## $`77501`
## NULL
## 
## $`97002`
## NULL
## 
## $`183187`
## NULL
## 
## $`105322`
## NULL
## 
## $`462539`
## NULL

dev.off()

## pdf 
##   2

query_exon_plots()

## $`290397`
## NULL
## 
## $`312032`
## NULL
## 
## $`473746`
## NULL
## 
## $`262083`
## NULL
## 
## $`69671`
## NULL
## 
## $`77501`
## NULL
## 
## $`97002`
## NULL
## 
## $`183187`
## NULL
## 
## $`105322`
## NULL
## 
## $`462539`
## NULL

Finding regions of interest

The code in this section is partially based on /home/epi/ajaffe/Lieber/Projects/RNAseq/HippoPublic/clean_previous_hits.R.

First we find the regions of the genome corresponding to the genes of interest. That is, the genes from the original paper that were differentially expressed in the conditions we are analyzing.

## Required pkgs
library("GenomicRanges")
library("ggbio")

## Loading required package: ggplot2
## Need specific help about ggbio? try mailing 
##  the maintainer or visit http://tengfei.github.com/ggbio/
## 
## Attaching package: 'ggbio'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     geom_bar, geom_rect, geom_segment, ggsave, stat_bin,
##     stat_identity, xlim

library("reshape2")
library("plyr")

## 
## Attaching package: 'plyr'
## 
## The following object is masked from 'package:IRanges':
## 
##     desc
## 
## The following object is masked from 'package:S4Vectors':
## 
##     rename

library("scales")

## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:ggbio':
## 
##     rescale

library("TxDb.Hsapiens.UCSC.hg19.knownGene")
library("org.Hs.eg.db")

## Loading required package: DBI

## Gene symbol names of interest
#### Original names
## symbols <- c("HIST1H4E", "RN7SK", "CDR1", "SNORD89", "SNORA73A", "SCARNA17", "PAPD1", "CACNB2", "LRCH4", "SNORD42A", "SNORA47", "LENG8", "FAM123A", "HIVEP3", "HNRPH1", "ZGPAT", "ERF", "SNORD116-29", "C9orf139", "C9orf3", "KCNA2", "EXOC6B", "CENTB5", "TAOK2", "TNRC6C", "ADAMTS4", "MSH4", "C16orf72", "CCR5")

## http://www.genenames.org/data/hgnc_data.php?hgnc_id=25532
## http://www.genenames.org/data/hgnc_data.php?hgnc_id=26360
## http://www.genenames.org/data/hgnc_data.php?hgnc_id=5041
## http://www.genenames.org/data/hgnc_data.php?hgnc_id=16754

## Updated symbols
symbols <- c("HIST1H4E", "RN7SK", "CDR1", "SNORD89", "SNORA73A", "SCARNA17", "MTPAP", "CACNB2", "LRCH4", "SNORD42A", "SNORA47", "LENG8", "AMER2", "HIVEP3", "HNRNPH1", "ZGPAT", "ERF", "SNORD116-29", "C9orf139", "C9orf3", "KCNA2", "EXOC6B", "ACAP3", "TAOK2", "TNRC6C", "ADAMTS4", "MSH4", "C16orf72", "CCR5")


## Map gene symbol names to entrezid's

keys <- keys(org.Hs.eg.db, keytype = "ENTREZID")
columns <- c("SYMBOL")
map <- select(org.Hs.eg.db, keys, columns, keytype = "ENTREZID")
idx <- sapply(symbols, function(x) { 
    res <- which(map$SYMBOL == x)
    ifelse(length(res) > 0, res, NA)
})
ids <- map$ENTREZID[idx]
names(ids) <- names(idx)

## Remove those not-found
ids <- ids[!is.na(ids)]

## Find the exons

txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
exonsUCSC <- exons(txdb, vals = list(gene_id =  ids), columns = c("gene_id", "exon_id", "tx_name", "tx_id"))

## some gene ids have multiples, straighten out
gids <- as.list(exonsUCSC$gene_id)
for(i in seq(along=gids)) gids[[i]] <- gids[[i]][which(gids[[i]] %in% ids)]
gids <- unlist(gids)


## split into list by EID
exonListUCSC <- split(exonsUCSC, gids)
exonListUCSC <- exonListUCSC[ ids[ids %in% gids] ]
### drop duplicated exons
# exonListUCSC = lapply(exonListUCSC, function(x) x[!duplicated(x)])
# identical(names(exonListUCSC), ids[ids %in% gids]) # TRUE

## Not found
ids[which(!ids %in% gids)]

##    RN7SK SNORA73A 
## "125050"   "6080"

## Find them manually
# http://www.ncbi.nlm.nih.gov/gene/125050
# http://www.ncbi.nlm.nih.gov/gene/6080
missing <- GRanges(seqnames=c("chr6", "chr1"), ranges=IRanges(start=c(52860418, 28833877), end=c(52860749, 28834083)))
toAdd <- split(missing, 1:2)
names(toAdd) <- ids[which(!ids %in% gids)]

## Reduce to min/max per gene
windows <- c(GRangesList(lapply(exonListUCSC, range)), toAdd)

## 
## Attaching package: 'XVector'
## 
## The following object is masked from 'package:plyr':
## 
##     compact

## Save for later use
save(windows, ids, idx, file="windows.Rdata")

Original genes

In this section, we make a plot for each gene showing the coverage data and whether derfinder identified candidate DERs as described in the main text.

## Find chrs used
chrs <- as.character(unique(unlist(seqnames(windows), use.names=FALSE)))

## Build ideograms
data(hg19IdeogramCyto, package = "biovizBase")
p.ideos <- lapply(chrs, function(xx) { 
    plotIdeogram(hg19IdeogramCyto, xx)
})
names(p.ideos) <- chrs


## Filter data
fullCovSmall <- lapply(chrs, function(chr) {
    fullCov[[chr]][, colsubset]
})
names(fullCovSmall) <- chrs
rm(fullCov)

## Main plotting function
plotClusterCustom <- function(cluster, regions, titleName, coverageInfo, groupInfo, titleUse="fwer", txdb=NULL, p.ideogram=NULL, maxExtend=300L, colsubset=NULL, forceLarge=FALSE) {

    stopifnot(is.factor(groupInfo))
    if(is.null(colsubset)) colsubset <- seq_len(length(groupInfo))
    
    ## Window length
    l <-  width(cluster) + 2 * min(maxExtend, width(cluster))
    
    if(l > 1e5 & !forceLarge) {
        message(paste("No plot will be made because the data is too large. The window size exceeds 100 kb."))
        return(invisible(l))
    }
    
    wh <- resize(cluster, l, fix="center")
    title <- paste("Window view for ENTREZ Symbol", titleName)
    
    ## Plot the ideogram if not supplied
    if(is.null(p.ideogram)) {
        chr <- as.character(seqnames(wh))
        ## Now load the ideogram info
        hg19IdeogramCyto <- NULL
        load(system.file("data", "hg19IdeogramCyto.rda", package="biovizBase", mustWork=TRUE))
        p.ideogram <- plotIdeogram(hg19IdeogramCyto, chr)
    }
    
    ## Regions found (from the view)
    neighbors <- regions[queryHits(findOverlaps(regions, wh))]
    if(length(neighbors) == 0) {
        neighbors <- wh
        neighbors$significant <- NA
        neighbors$significantQval <- NA
        neighbors$significantFWER <- NA
    } 
    if(titleUse == "pval") {
        p.region <- autoplot(neighbors, aes(fill=significant)) + 
        scale_fill_manual(values=c("chartreuse4", "wheat2"), limits=c("TRUE", "FALSE")) 
    } else if (titleUse == "qval" ){
        p.region <- autoplot(neighbors, aes(fill=significantQval)) +
        scale_fill_manual(values=c("chartreuse4", "wheat2"), limits=c("TRUE", "FALSE")) 
    } else if (titleUse == "fwer" ){
        p.region <- autoplot(neighbors, aes(fill=significantFWER)) +
        scale_fill_manual(values=c("chartreuse4", "wheat2"), limits=c("TRUE", "FALSE")) 
    } else {
        p.region <- autoplot(neighbors)
    }

    ## Graphical parameters
    nGroups <- length(levels(groupInfo))
    
    ## Construct the coverage plot
    pos <- start(wh):end(wh)
    rawData <- as.data.frame(coverageInfo[pos, colsubset])
    rawData$position <- pos
    covData <- melt(rawData, id.vars="position")
    covData$group <- rep(groupInfo, each=nrow(rawData))
    p.coverage <- ggplot(covData, aes(x=position, y=value, group=variable, colour=group)) + geom_line(alpha=1/nGroups) + scale_y_continuous(trans=log2_trans())
    
    ## Construct mean by group coverage plot
    meanCoverage <- ddply(covData, c("position", "group"), summarise, meanCov=mean(value))
    p.meanCov <- ggplot(meanCoverage, aes(x=position, y=meanCov, colour=group)) + geom_line(alpha=1/max(1, 1/2 * nGroups)) + scale_y_continuous(trans=log2_trans())
    
    ## Annotation info and final plot
    if(is.null(txdb)) {
        p.transcripts <- FALSE
    } else {
        ## The tryCatch is needed because not all regions overlap a transcript
        p.transcripts <- tryCatch(autoplot(txdb, which = wh, names.expr = "tx_name(gene_id)"), error = function(e) { FALSE })
    }   
    if(!is.logical(p.transcripts)) {
        result <- tracks(p.ideogram, "Coverage" = p.coverage, "Mean coverage" = p.meanCov, "Regions" = p.region, "tx_name\n(gene_id)" = p.transcripts, heights = c(2, 4, 4, 1.5, 3), xlim=wh, title=title) + ylab("") + theme_tracks_sunset()       
    } else {
        result <- tracks(p.ideogram, "Coverage" = p.coverage, "Mean coverage" = p.meanCov, "Regions" = p.region, heights = c(2, 5, 5, 2), xlim=wh, title=title) + ylab("") + theme_tracks_sunset()
    }
    return(result)  
}

## Plotting function
regionClusterPlot <- function(i, tUse="fwer") {
    ## Chr specific selections
    chr <- as.character(seqnames(windows[[i]]))
    p.ideo <- p.ideos[[chr]]
    covInfo <- fullCovSmall[[chr]]
    
    ## Make the plot
    p <- plotClusterCustom(windows[[i]], regions=fullRegions, titleName=names(ids)[ids == names(windows)[i]], coverageInfo=covInfo, groupInfo=groupInfo, titleUse=tUse, txdb=txdb, p.ideogram=p.ideo, forceLarge=TRUE)
    print(p)
    rm(p.ideo, covInfo)
    
    return(invisible(TRUE)) 
}

## Make plots
for(i in seq_len(length(windows))) {
    regionClusterPlot(i)
}

Reproducibility

Date the report was generated.

## [1] "2015-03-14 04:52:25 EDT"

Wallclock time spent generating the report.

## Time difference of 3.266 hours

R session information.

## Session info -----------------------------------------------------------------------------------------------------------

##  setting  value                                      
##  version  R version 3.1.1 Patched (2014-10-16 r66782)
##  system   x86_64, linux-gnu                          
##  ui       X11                                        
##  language (EN)                                       
##  collate  en_US.UTF-8                                
##  tz

## Packages ---------------------------------------------------------------------------------------------------------------

##  package                           * version     date       source                                            
##  acepack                           * 1.3-3.3     2013-05-03 CRAN (R 3.1.1)                                    
##  annotate                          * 1.44.0      2014-10-15 Bioconductor                                      
##  AnnotationDbi                       1.28.1      2014-10-29 Bioconductor                                      
##  base64enc                         * 0.1-2       2014-06-26 CRAN (R 3.1.0)                                    
##  BatchJobs                         * 1.5         2014-10-30 CRAN (R 3.1.1)                                    
##  BBmisc                            * 1.9         2015-02-03 CRAN (R 3.1.1)                                    
##  Biobase                             2.26.0      2014-10-15 Bioconductor                                      
##  BiocGenerics                        0.12.1      2014-11-15 Bioconductor                                      
##  BiocParallel                      * 1.0.3       2015-02-09 Bioconductor                                      
##  biomaRt                           * 2.22.0      2014-10-15 Bioconductor                                      
##  Biostrings                        * 2.34.1      2014-12-13 Bioconductor                                      
##  biovizBase                        * 1.14.1      2014-12-14 Bioconductor                                      
##  bitops                            * 1.0-6       2013-08-17 CRAN (R 3.1.0)                                    
##  brew                              * 1.0-6       2011-04-13 CRAN (R 3.1.0)                                    
##  BSgenome                          * 1.34.1      2014-12-31 Bioconductor                                      
##  bumphunter                        * 1.6.0       2014-10-15 Bioconductor                                      
##  Cairo                             * 1.5-6       2014-06-26 CRAN (R 3.1.0)                                    
##  checkmate                         * 1.5.1       2014-12-14 CRAN (R 3.1.1)                                    
##  cluster                           * 1.15.3      2014-09-04 CRAN (R 3.1.1)                                    
##  codetools                         * 0.2-9       2014-08-21 CRAN (R 3.1.1)                                    
##  colorspace                        * 1.2-6       2015-03-11 CRAN (R 3.1.1)                                    
##  DBI                                 0.3.1       2014-09-24 CRAN (R 3.1.1)                                    
##  derfinder                           1.0.10      2015-03-14 Bioconductor                                      
##  derfinderHelper                     1.0.4       2014-11-05 Github (lcolladotor/derfinderHelper@27bcfe6)      
##  derfinderPlot                       1.0.3       2015-02-22 Github (lcolladotor/derfinderPlot-release@d1dd4bd)
##  DESeq2                              1.6.3       2015-03-12 Bioconductor                                      
##  devtools                          * 1.7.0       2015-01-17 CRAN (R 3.1.1)                                    
##  dichromat                         * 2.0-0       2013-01-24 CRAN (R 3.1.0)                                    
##  digest                            * 0.6.8       2014-12-31 CRAN (R 3.1.1)                                    
##  doRNG                             * 1.6         2014-03-07 CRAN (R 3.1.0)                                    
##  edgeR                               3.8.6       2015-03-12 Bioconductor                                      
##  evaluate                          * 0.5.5       2014-04-29 CRAN (R 3.1.0)                                    
##  fail                              * 1.2         2013-09-19 CRAN (R 3.1.0)                                    
##  foreach                           * 1.4.2       2014-04-11 CRAN (R 3.1.0)                                    
##  foreign                           * 0.8-61      2014-03-28 CRAN (R 3.1.1)                                    
##  formatR                           * 1.0         2014-08-25 CRAN (R 3.1.1)                                    
##  Formula                           * 1.2-0       2015-01-20 CRAN (R 3.1.1)                                    
##  genefilter                        * 1.48.1      2014-10-17 Bioconductor                                      
##  geneplotter                       * 1.44.0      2014-10-15 Bioconductor                                      
##  GenomeInfoDb                        1.2.4       2014-12-20 Bioconductor                                      
##  GenomicAlignments                 * 1.2.2       2015-03-04 Bioconductor                                      
##  GenomicFeatures                     1.18.3      2014-12-17 Bioconductor                                      
##  GenomicFiles                      * 1.2.1       2015-02-08 Bioconductor                                      
##  GenomicRanges                       1.18.4      2015-01-08 Bioconductor                                      
##  GGally                            * 0.5.0       2014-12-02 CRAN (R 3.1.1)                                    
##  ggbio                               1.14.0      2014-11-04 Bioconductor                                      
##  ggplot2                             1.0.0       2014-05-21 CRAN (R 3.1.0)                                    
##  graph                             * 1.44.1      2014-12-10 Bioconductor                                      
##  gridExtra                         * 0.9.1       2012-08-09 CRAN (R 3.1.1)                                    
##  gtable                            * 0.1.2       2012-12-05 CRAN (R 3.1.0)                                    
##  Hmisc                             * 3.15-0      2015-02-16 CRAN (R 3.1.1)                                    
##  htmltools                         * 0.2.6       2014-09-08 CRAN (R 3.1.1)                                    
##  IRanges                             2.0.1       2014-12-13 Bioconductor                                      
##  iterators                         * 1.0.7       2014-04-11 CRAN (R 3.1.0)                                    
##  knitr                             * 1.9         2015-01-20 CRAN (R 3.1.1)                                    
##  knitrBootstrap                    * 1.0.0       2014-11-19 Github (jimhester/knitrBootstrap@76c41f0)         
##  labeling                          * 0.3         2014-08-23 CRAN (R 3.1.1)                                    
##  lattice                           * 0.20-29     2014-04-04 CRAN (R 3.1.1)                                    
##  latticeExtra                      * 0.6-26      2013-08-15 CRAN (R 3.1.0)                                    
##  limma                               3.22.7      2015-03-13 Bioconductor                                      
##  locfit                            * 1.5-9.1     2013-04-20 CRAN (R 3.1.0)                                    
##  markdown                          * 0.7.4       2014-08-24 CRAN (R 3.1.1)                                    
##  MASS                              * 7.3-35      2014-09-30 CRAN (R 3.1.1)                                    
##  Matrix                            * 1.1-4       2014-06-15 CRAN (R 3.1.1)                                    
##  matrixStats                       * 0.14.0      2015-02-14 CRAN (R 3.1.1)                                    
##  mime                              * 0.2         2014-09-26 CRAN (R 3.1.1)                                    
##  munsell                           * 0.4.2       2013-07-11 CRAN (R 3.1.0)                                    
##  nnet                              * 7.3-8       2014-03-28 CRAN (R 3.1.1)                                    
##  OrganismDbi                       * 1.8.1       2015-03-11 Bioconductor                                      
##  org.Hs.eg.db                        3.0.0       2014-09-27 Bioconductor                                      
##  pkgmaker                          * 0.22        2014-05-14 CRAN (R 3.1.0)                                    
##  plyr                                1.8.1       2014-02-26 CRAN (R 3.1.0)                                    
##  proto                             * 0.3-10      2012-12-22 CRAN (R 3.1.0)                                    
##  qvalue                            * 1.43.0      2015-03-06 Bioconductor                                      
##  RBGL                              * 1.42.0      2014-10-15 Bioconductor                                      
##  RColorBrewer                      * 1.1-2       2014-12-07 CRAN (R 3.1.1)                                    
##  Rcpp                                0.11.5      2015-03-06 CRAN (R 3.1.1)                                    
##  RcppArmadillo                       0.4.650.1.1 2015-02-26 CRAN (R 3.1.1)                                    
##  RCurl                             * 1.95-4.5    2014-12-06 CRAN (R 3.1.1)                                    
##  registry                          * 0.2         2012-01-24 CRAN (R 3.1.0)                                    
##  reshape                           * 0.8.5       2014-04-23 CRAN (R 3.1.0)                                    
##  reshape2                            1.4.1       2014-12-06 CRAN (R 3.1.1)                                    
##  rmarkdown                           0.5.1       2015-01-26 CRAN (R 3.1.1)                                    
##  rngtools                          * 1.2.4       2014-03-06 CRAN (R 3.1.0)                                    
##  rpart                             * 4.1-8       2014-03-28 CRAN (R 3.1.1)                                    
##  Rsamtools                         * 1.18.3      2015-03-04 Bioconductor                                      
##  RSQLite                             1.0.0       2014-10-25 CRAN (R 3.1.1)                                    
##  rstudioapi                        * 0.2         2014-12-31 CRAN (R 3.1.1)                                    
##  rtracklayer                       * 1.26.2      2014-11-12 Bioconductor                                      
##  S4Vectors                           0.4.0       2014-10-15 Bioconductor                                      
##  scales                              0.2.4       2014-04-22 CRAN (R 3.1.0)                                    
##  sendmailR                         * 1.2-1       2014-09-21 CRAN (R 3.1.1)                                    
##  stringr                           * 0.6.2       2012-12-06 CRAN (R 3.1.0)                                    
##  survival                          * 2.37-7      2014-01-22 CRAN (R 3.1.1)                                    
##  TxDb.Hsapiens.UCSC.hg19.knownGene   3.0.0       2014-09-29 Bioconductor                                      
##  VariantAnnotation                 * 1.12.9      2015-01-22 Bioconductor                                      
##  XML                               * 3.98-1.1    2013-06-20 CRAN (R 3.1.0)                                    
##  xtable                            * 1.7-4       2014-09-12 CRAN (R 3.1.1)                                    
##  XVector                             0.6.0       2014-10-15 Bioconductor                                      
##  yaml                              * 2.1.13      2014-06-12 CRAN (R 3.1.1)                                    
##  zlibbioc                          * 1.12.0      2014-10-15 Bioconductor