Contents

Here we evaluate the performance of the different pipelines we used to analyze the simulated data. Evaluating the performance is tricky and depends on how you define the reference set. Here we show the results by using all exons and exons that overlap only 1 transcript. The second set removes ambiguity in determining the truth and is thus the most useful. Other possible reference sets can be considered and you can find the results in previous versions of this document. We removed them from the final version to avoid confusion.

1 Setup

First, we load the required libraries.

## Load required libraries
library('GenomicRanges')
library('TxDb.Hsapiens.UCSC.hg19.knownGene')
library('knitr')
library('devtools')
library('ggplot2')

1.1 Load data

Next we load the required data from the simulation setup as well as the statistical results from the different pipelines. We then format the results in a way that we can easily access them later on.

## Load simulation information
load(file.path('..', 'generateReads', 'simulation_info.Rdata'))

## Load transcripts used
txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, 'chr17')
txinfo <- select(txdb, keys = chosen$ucsckg_id, columns = columns(txdb), keytype = 'TXNAME')
## 'select()' returned 1:many mapping between keys and columns
## Build GRangesList with exons grouped by transcript
tx <- split(GRanges(seqnames = txinfo$EXONCHROM, IRanges(start = txinfo$EXONSTART, end = txinfo$EXONEND), strand = txinfo$EXONSTRAND), txinfo$TXNAME)
tx <- tx[match(chosen$ucsckg_id, names(tx))]


if(!identical(nrow(chosen), length(tx))) {
    message(paste(Sys.time(), 'Subsetting list of transcripts to those that were used when simulating the reads'))
    tx <- tx[names(tx) %in% chosen$gene_id]
    stopifnot(unique(sum(width(tx)) - chosen$width) == 0)
}

## Load stat results
inputs <- c('featureCounts', 'railMatrix', 'regionMatrix', 'ballgown')
reps <- 1:3
softwares <- c('DESeq2', 'edgeR', 'limma')
statuses <- c('complete', 'incomplete')

## Function for loading results from deseq2-edger folder
loadDE <- function(software, file) {
    load(file.path('..', 'deseq2-edger', paste0('stats-', file, '-', software, '.Rdata')))
    if(software == 'DESeq2') {
        DEres <- deseq
    } else if (software == 'edgeR') {
        DEres <- edger
    } else if (software == 'limma') {
        DEres <- limma
    }
    return(DEres)
}

## Function for loading ballgown results
loadBG <- function(level, file) {
    load(file.path('..', 'ballgown', paste0('bgres-', level, '-', file, '.Rdata')))
    return(bgres)
}

## Actually load the stats results
stats <- lapply(inputs, function(input) {
    inputres <- lapply(reps, function(rep) {
        if(input %in% c('featureCounts', 'ballgown')) {
            statusres <- lapply(statuses, function(status) {
                if(input == 'ballgown') {
                    res <- lapply(c('trans', 'exon'), loadBG, file = 
                        paste0('R', rep, ifelse(status == 'complete', '-comp', '-inc')))
                    names(res) <- c('trans', 'exon')
                } else if (input == 'featureCounts') {
                    res <- lapply(softwares, loadDE, file = paste0('featureCounts-R', rep, 
                        ifelse(status == 'complete', '-comp', '-inc')))
                    names(res) <- softwares
                }
                return(res)      
            })
            names(statusres) <- statuses
            return(statusres)
        } else {
            softres <- lapply(softwares, loadDE, file = paste0(input, '-R', rep))
            names(softres) <- softwares
            return(softres)
        }
    })
    names(inputres) <- reps
    return(inputres)
})
names(stats) <- inputs

## Group the stats results into a list without nested levels
stats_GR <- lapply(inputs, function(input) {
    if(input %in% c('featureCounts', 'ballgown')) {
        res <- lapply(stats[[input]], unlist)
    } else {
        res <- unlist(stats[[input]])
    }
    res <- unlist(res)
    return(res)
})
names(stats_GR) <- inputs
stats_GR <- unlist(stats_GR)

1.2 Evaluation functions

The following code defines the functions we will use to evaluate the statistical results.

## count_comp compares the information at hand versus the reference set
count_comp <- function(info, rep = 1, type = 'padj', reference, cut = 0.05) {
    if(type == 'padj') {
        idx <- mcols(info)$padj < cut
    } else if (type == 'qval') {
        idx <- mcols(info)$qval < cut
    } else if (type == 'pbonf') {
        idx <- mcols(info)$pbonf < cut
    }
    
    idx[is.na(idx)] <- FALSE

    ## Overlaps at least 1 DE 'reference' unit
    addmargins(table('DE truth' = mcols(reference)[[paste0('DEr', rep)]],
        'Overlaps DE' = countOverlaps(reference, info[idx]) > 0))
}


## Functions for evaluating empirical power, FPR and FDR, plus summarizing 
## the info
emp_power <- function(m, digits = 1) {
    round(m[2, 2] / m[2, 3] * 100, digits)
}
emp_fpr <- function(m, digits = 1) {
    round(m[1, 2] / m[1, 3] * 100, digits)
}
emp_fdr <- function(m, digits = 1) {
    round(m[1, 2] / m[3, 2] * 100, digits)
}

## Detailed table with the results for each replicate of the simulation
emp <- function(tables) {
    empirical <- data.frame(
        power = sapply(tables, emp_power),
        FPR = sapply(tables, emp_fpr),
        FDR = sapply(tables, emp_fdr)
    )
    empirical$replicate <- as.integer(sapply(strsplit(names(tables), '\\.'), '[[', 2))
    empirical$AnnotationComplete <- NA
    empirical$AnnotationComplete[grepl('\\.complete', names(tables))] <- TRUE
    empirical$AnnotationComplete[grepl('\\.incomplete', names(tables))] <- FALSE
    empirical$Aligner <- 'HISAT'
    empirical$Aligner[grepl('rail', names(tables))] <- 'Rail-RNA'
    empirical$SummaryMethod <- 'StringTie'
    empirical$SummaryMethod[grepl('Matrix', names(tables))] <- 'derfinder'
    empirical$SummaryMethod[grepl('featureCounts', names(tables))] <- 'featureCounts'
    empirical$StatMethod <- 'DESeq2'
    empirical$StatMethod[grepl('edgeR', names(tables))] <- 'edgeR'
    empirical$StatMethod[grepl('limma', names(tables))] <- 'limma'
    empirical$StatMethod[grepl('ballgown.*exon', names(tables))] <- 'ballgown-exon'
    empirical$StatMethod[grepl('ballgown.*trans', names(tables))] <- 'ballgown-trans'
    rownames(empirical) <- NULL
    return(empirical)
}

## This function takes the result from emp() and summarizes it by showing
## the minimum and maximum value per replicate
emp_sum_paste <- function(x) { paste0('(', x[1], '-', x[2], ')') }
emp_sum <- function(empinfo) {
    empinfo$situation <- paste(empinfo$AnnotationComplete, empinfo$Aligner, 
        empinfo$SummaryMethod, empinfo$StatMethod, sep=';')
    empsum <- data.frame(
        'Power' = sapply(tapply(empinfo$power, empinfo$situation, range),
            emp_sum_paste),
        'FPR' = sapply(tapply(empinfo$FPR, empinfo$situation, range),
            emp_sum_paste),
        'FDR' = sapply(tapply(empinfo$FDR, empinfo$situation, range),
            emp_sum_paste)
    )
    empsum$AnnotationComplete <- as.logical(sapply(strsplit(rownames(empsum), ';'), '[[', 1))
    empsum$Aligner <- sapply(strsplit(rownames(empsum), ';'), '[[', 2)
    empsum$SummaryMethod <- sapply(strsplit(rownames(empsum), ';'), '[[', 3)
    empsum$StatMethod <- sapply(strsplit(rownames(empsum), ';'), '[[', 4)
    empsum <- empsum[c(1, 12, 2, 13, 3, 14, 4, 15, 5, 16, 6, 9, 7, 10, 8, 11), ]
    rownames(empsum) <- NULL
    empsum 
}

## Summarize by taking the mean
emp_sum_mean <- function(empinfo) {
    empinfo$situation <- paste(empinfo$AnnotationComplete, empinfo$Aligner, 
        empinfo$SummaryMethod, empinfo$StatMethod, sep=';')
    empsum <- data.frame(
        'Power' = tapply(empinfo$power, empinfo$situation, mean, na.rm = TRUE),
        'FPR' = tapply(empinfo$FPR, empinfo$situation, mean, na.rm = TRUE),
        'FDR' = tapply(empinfo$FDR, empinfo$situation, mean, na.rm = TRUE)
    )
    empsum$AnnotationComplete <- as.logical(sapply(strsplit(rownames(empsum), ';'), '[[', 1))
    empsum$Aligner <- sapply(strsplit(rownames(empsum), ';'), '[[', 2)
    empsum$SummaryMethod <- sapply(strsplit(rownames(empsum), ';'), '[[', 3)
    empsum$StatMethod <- sapply(strsplit(rownames(empsum), ';'), '[[', 4)
    empsum <- empsum[c(1, 12, 2, 13, 3, 14, 4, 15, 5, 16, 6, 9, 7, 10, 8, 11), ]
    rownames(empsum) <- NULL
    empsum 
}

## index_comp is similar to count_comp but it returns the actual indices for
## the false positives, true positives, etc
index_comp <- function(info, rep = 1, type = 'padj', reference, cut = 0.05) {
    if(type == 'padj') {
        idx <- mcols(info)$padj < cut
    } else if (type == 'qval') {
        idx <- mcols(info)$qval < cut
    } else if (type == 'pbonf') {
        idx <- mcols(info)$pbonf < cut
    }
    
    idx[is.na(idx)] <- FALSE

    ## Overlaps at least 1 DE 'reference' unit
    ov <- countOverlaps(reference, info[idx]) > 0
    TP <- mcols(reference)[[paste0('DEr', rep)]] & ov
    TN <- !mcols(reference)[[paste0('DEr', rep)]] & !ov
    FP <- !mcols(reference)[[paste0('DEr', rep)]] & ov
    FN <- mcols(reference)[[paste0('DEr', rep)]] & !ov
    return(list(TruePositive = TP, TrueNegative = TN, FalsePositive = FP, FalseNegative = FN))
}

## case_result uses the information from index_comp to make a nice summary table
case_result <- function(idx, r, reference) { 
    res <- lapply(names(idx), function(i) {
        cases <- mcols(reference)[[paste0('case', r)]][idx[[i]]]
        if(length(cases) == 0) return(NULL)
        data.frame(case = cases, result = i, stringsAsFactors = FALSE)
    })
    res <- do.call(rbind, res)
    addmargins(table("Case" = res$case, "Result" = res$result))
}

## Chose the most frequent case, used for choosing the case in the exons 
## reference sets.exon
mostFreq <- function(cases) {
    if(length(cases) == 1) return(cases)
    names(sort(table(cases), decreasing = TRUE))[1]
}

## Process exons set to create a reference set
processExons <- function(exons_set) {
    ov <- findOverlaps(exons_set, tx)
    ## Find DE status per exon
    sHits <- subjectHits(ov)
    qHits <- queryHits(ov)
    exons_set$DEr1 <- as.vector(tapply(sHits, qHits, function(y) {
        ## Note that we could change this from any() to all() and that
        ## would impact change whether the exon is labeled as DE or not.
        any(chosen$rep1[y] != 'normal')
    }))
    exons_set$DEr2 <- as.vector(tapply(sHits, qHits, function(y) {
        any(chosen$rep2[y] != 'normal')
    }))
    exons_set$DEr3 <- as.vector(tapply(sHits, qHits, function(y) {
        any(chosen$rep3[y] != 'normal')
    }))

    exons_set$cases1 <- as.vector(tapply(sHits, qHits, function(y) {
        chosen$case1[y]
    }))
    exons_set$cases2 <- as.vector(tapply(sHits, qHits, function(y) {
        chosen$case2[y]
    }))
    exons_set$cases3 <- as.vector(tapply(sHits, qHits, function(y) {
        chosen$case3[y]
    }))
    exons_set$case1 <- unname(sapply(exons_set$cases1, mostFreq))
    exons_set$case2 <- unname(sapply(exons_set$cases2, mostFreq))
    exons_set$case3 <- unname(sapply(exons_set$cases3, mostFreq))
    return(exons_set)
}

1.3 Reference sets

We can now proceed to defining our reference sets. They are:

  1. Exon level with DE status given by whether the exon overlaps any transcript with low or high expression. Note that if two transcripts share the same exon, that exon will appear twice in this reference set.
  2. Exons that overlap only 1 transcript with DE status given by the transcript they overlap. These exons are not ambiguous in the truth assignment and is thus the most reliable reference set.
## Transcript level info
#trans <- tx
#mcols(trans)$DEr1 <- chosen$rep1 != 'normal'
#mcols(trans)$DEr2 <- chosen$rep2 != 'normal'
#mcols(trans)$DEr3 <- chosen$rep3 != 'normal'
#mcols(trans)$case1 <- chosen$case1
#mcols(trans)$case2 <- chosen$case2
#mcols(trans)$case3 <- chosen$case3

## Transcript level information with DE status given by the gene level
#trans_case <- trans
#mcols(trans_case)$DEr1 <- chosen$case1 %in% c('allDE', 'singleDE', 'someDE')
#mcols(trans_case)$DEr2 <- chosen$case2 %in% c('allDE', 'singleDE', 'someDE')
#mcols(trans_case)$DEr3 <- chosen$case3 %in% c('allDE', 'singleDE', 'someDE')


## Create exon level reference
exons <- processExons(unlist(tx))

## Create disjoint exon level reference
#exons_disjoin <- processExons(disjoin(unlist(tx)))

## Exons overlapping only 1 transcript
exons_one <- exons[countOverlaps(exons, tx) == 1]

## Disjoint exons overlapping only 1 transcript
#exons_disjoin_one <- exons_disjoin[countOverlaps(exons_disjoin, trans) == 1]


## Explore the number and percent of DE cases
percDE <- function(reference) {
    res <- lapply(1:3, function(r) {
        round(table(mcols(reference)[[paste0('DEr', r)]]) / length(reference) * 100, 2)
    })
    names(res) <- paste0('rep', 1:3)
    unlist(res)
}

## First at the transcript level, which matches exactly the 2/6 of transcripts 
## set to be DE (1/6 low, 1/6 high)
#percDE(trans)

## Next at the transcript level with DE status given by the gene level
## The percent DE is highly increased given that genes with more than 1
## transcript are likely to have at least 1 DE.
#percDE(trans_case)

## At the exon level, the percent with the truth set as DE is similar than a
## the transcript (DE by gene) level and is highly increased.
percDE(exons)
## rep1.FALSE  rep1.TRUE rep2.FALSE  rep2.TRUE rep3.FALSE  rep3.TRUE 
##      24.34      75.66      26.20      73.80      24.56      75.44
## Exons overlapping only 1 transcript. The percent is close to the one used in
## the simulation setup. That is: 2/6 of transcripts set to be DE (1/6 low, 1/6 
## high)
percDE(exons_one)
## rep1.FALSE  rep1.TRUE rep2.FALSE  rep2.TRUE rep3.FALSE  rep3.TRUE 
##      59.64      40.36      68.69      31.31      68.12      31.88
## For each reference set, see how many other units of that set each piece overlaps
table(countOverlaps(exons) - 1)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14 
## 3868 5874 7205 5901 4788 3734 3039 1311 1075  405  424  161  107  162   63 
##   15   16   17   19   20   21   23   24   25   26   27   46   47 
##   70   71   36   64   21    1    1  303  439  189   22    1    3
table(countOverlaps(exons_one) - 1)
## 
##    0 
## 3868

Note how the overlap between the different units of the same reference set is considerable for the complete exon reference set.

Number of units per reference level:

2 Evaluation (FDR controlled)

2.1 Evaluate results

The following code actually runs the evaluation functions for the different reference sets.

## Get type and replicate info
types <- ifelse(grepl('ballgown', names(stats_GR)), 'qval', 'padj') ## For FDR 
#types <- 'pbonf' ## For FWER
replicates <- as.integer(sapply(strsplit(names(stats_GR), '\\.'), '[[', 2))


## Evaluate at the exons level
## DE if any of the transcripts it overlaps is DE
tables_exons <- mapply(count_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons, cut = 0.05))
empirical_exons <- emp(tables_exons)
index_exons <- mapply(index_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons, cut = 0.05))
case_result_exons <- mapply(case_result, index_exons, replicates,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons))
    
## Evaluate at the exons level (overlapping only 1 transcript)
## DE if any of the transcripts it overlaps is DE
tables_exons_one <- mapply(count_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = 0.05))
empirical_exons_one <- emp(tables_exons_one)
index_exons_one <- mapply(index_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = 0.05))
case_result_exons_one <- mapply(case_result, index_exons_one, replicates,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one))

2.2 Summaries

Below are summaries of showing the minimum and maximum empirical power, false positive rate (FPR) and false discovery rate (FDR) per replicate for each of the analysis pipelines by the different reference sets.

## Exon level
kable(emp_sum(empirical_exons), format = 'html')
Power FPR FDR AnnotationComplete Aligner SummaryMethod StatMethod
(72.2-74) (5.2-6.7) (2.3-2.8) FALSE HISAT featureCounts DESeq2
(73.4-75) (5.6-6.6) (2.6-2.9) TRUE HISAT featureCounts DESeq2
(77.4-79.6) (10.9-12.6) (4.4-5) FALSE HISAT featureCounts edgeR
(78.4-80.6) (12.2-13.1) (5-5.1) TRUE HISAT featureCounts edgeR
(73.2-75.6) (2.4-3.7) (1.1-1.6) FALSE HISAT featureCounts limma
(74.1-76.6) (2.6-4) (1.2-1.7) TRUE HISAT featureCounts limma
(70.9-72.9) (3.1-4.1) (1.3-1.8) FALSE HISAT StringTie ballgown-exon
(72-74.6) (3.3-4.4) (1.4-1.9) TRUE HISAT StringTie ballgown-exon
(47.1-51.9) (1-1.8) (0.7-1.2) FALSE HISAT StringTie ballgown-trans
(47.8-51.6) (1.2-3.1) (0.8-1.9) TRUE HISAT StringTie ballgown-trans
(71.6-73) (8.3-10.1) (3.6-4.2) NA HISAT derfinder DESeq2
(71.6-72.7) (8.3-9.4) (3.9-4) NA Rail-RNA derfinder DESeq2
(72.6-73.4) (9.9-10.8) (4.3-4.9) NA HISAT derfinder edgeR
(72.4-73.2) (10-10.6) (4.4-4.7) NA Rail-RNA derfinder edgeR
(65.5-69.7) (15-17.8) (7.1-7.6) NA HISAT derfinder limma
(65.8-69.8) (15.8-18.3) (7.5-7.9) NA Rail-RNA derfinder limma
## Exon level (overlapping only 1 transcript)
kable(emp_sum(empirical_exons_one), format = 'html')
Power FPR FDR AnnotationComplete Aligner SummaryMethod StatMethod
(70-78.3) (2.1-3.6) (5.6-9.8) FALSE HISAT featureCounts DESeq2
(95.1-95.9) (2.9-4.8) (6.3-9.7) TRUE HISAT featureCounts DESeq2
(71.2-79.2) (5-8.1) (12-19.5) FALSE HISAT featureCounts edgeR
(96.4-97.2) (6.8-9.9) (12.7-18.1) TRUE HISAT featureCounts edgeR
(69-77.6) (2.5-3.3) (6-7.7) FALSE HISAT featureCounts limma
(94.4-95.1) (3.1-4.5) (6.5-7.5) TRUE HISAT featureCounts limma
(68.4-77) (2.8-3) (5.5-8.3) FALSE HISAT StringTie ballgown-exon
(93.7-94.6) (3.6-4) (5.9-7.8) TRUE HISAT StringTie ballgown-exon
(53.2-60) (0.6-2.2) (1.4-8.1) FALSE HISAT StringTie ballgown-trans
(67.2-71.9) (0.6-1.1) (1.4-3.2) TRUE HISAT StringTie ballgown-trans
(94.3-95.5) (6.8-9) (12.2-16.1) NA HISAT derfinder DESeq2
(94.1-95.1) (6.5-8.3) (11.1-15.7) NA Rail-RNA derfinder DESeq2
(95.6-96.1) (8.4-10.5) (13.9-18.9) NA HISAT derfinder edgeR
(94.7-95.9) (8.3-10.2) (12.8-18.5) NA Rail-RNA derfinder edgeR
(93.6-94.2) (6.4-9.3) (12.8-16.5) NA HISAT derfinder limma
(93.7-94.2) (6.5-9.1) (12.5-16.1) NA Rail-RNA derfinder limma

From these summaries we can see that in most cases the minimum and maximum values are nearly the same, with more variability for the HISAT -> StringTie -> ballgown pipeline results. Note that for this pipeline we conducted tests at the transcript level (StatMethod = ballgown-trans) and at the exon level (ballgown-exon). edgeR achieves slightly greater empirical power than DESeq2 at the cost of a higher empirical FPR and FDR.

The sets where we consider exons overlapping only 1 transcript shows a clear difference in power for the methods that rely on annotation between the analyses that used the complete annotation and those that used the incomplete one. The derfinder analyses do have higher FPR and FDR than other pipelines, but achieve nearly the same power as the other methods when the annotation is complete.

2.3 Details

The following tables show the results for all the replicates.

## Exon level
kable(empirical_exons, format = 'html', row.names = TRUE)
power FPR FDR replicate AnnotationComplete Aligner SummaryMethod StatMethod
1 75.0 6.6 2.8 1 TRUE HISAT featureCounts DESeq2
2 80.6 13.1 5.0 1 TRUE HISAT featureCounts edgeR
3 74.1 4.0 1.7 1 TRUE HISAT featureCounts limma
4 74.0 6.7 2.8 1 FALSE HISAT featureCounts DESeq2
5 79.6 12.6 4.8 1 FALSE HISAT featureCounts edgeR
6 73.2 3.7 1.6 1 FALSE HISAT featureCounts limma
7 74.0 5.6 2.6 2 TRUE HISAT featureCounts DESeq2
8 80.0 12.2 5.1 2 TRUE HISAT featureCounts edgeR
9 76.6 2.6 1.2 2 TRUE HISAT featureCounts limma
10 72.9 5.2 2.5 2 FALSE HISAT featureCounts DESeq2
11 78.9 11.7 5.0 2 FALSE HISAT featureCounts edgeR
12 75.6 2.4 1.1 2 FALSE HISAT featureCounts limma
13 73.4 6.6 2.9 3 TRUE HISAT featureCounts DESeq2
14 78.4 12.9 5.1 3 TRUE HISAT featureCounts edgeR
15 76.6 3.6 1.5 3 TRUE HISAT featureCounts limma
16 72.2 5.2 2.3 3 FALSE HISAT featureCounts DESeq2
17 77.4 10.9 4.4 3 FALSE HISAT featureCounts edgeR
18 75.1 3.3 1.4 3 FALSE HISAT featureCounts limma
19 72.7 9.4 4.0 1 NA Rail-RNA derfinder DESeq2
20 73.2 10.6 4.5 1 NA Rail-RNA derfinder edgeR
21 69.8 18.3 7.8 1 NA Rail-RNA derfinder limma
22 71.6 8.3 4.0 2 NA Rail-RNA derfinder DESeq2
23 72.4 10.0 4.7 2 NA Rail-RNA derfinder edgeR
24 65.8 15.8 7.9 2 NA Rail-RNA derfinder limma
25 72.3 8.9 3.9 3 NA Rail-RNA derfinder DESeq2
26 73.0 10.3 4.4 3 NA Rail-RNA derfinder edgeR
27 66.8 16.6 7.5 3 NA Rail-RNA derfinder limma
28 73.0 10.1 4.2 1 NA HISAT derfinder DESeq2
29 73.4 10.8 4.5 1 NA HISAT derfinder edgeR
30 69.7 17.8 7.6 1 NA HISAT derfinder limma
31 71.6 8.8 4.2 2 NA HISAT derfinder DESeq2
32 72.6 10.6 4.9 2 NA HISAT derfinder edgeR
33 65.5 15.0 7.5 2 NA HISAT derfinder limma
34 72.3 8.3 3.6 3 NA HISAT derfinder DESeq2
35 72.8 9.9 4.3 3 NA HISAT derfinder edgeR
36 66.9 15.7 7.1 3 NA HISAT derfinder limma
37 50.7 3.1 1.9 1 TRUE HISAT StringTie ballgown-trans
38 72.0 4.4 1.9 1 TRUE HISAT StringTie ballgown-exon
39 47.1 1.5 1.0 1 FALSE HISAT StringTie ballgown-trans
40 70.9 4.1 1.8 1 FALSE HISAT StringTie ballgown-exon
41 51.6 1.2 0.8 2 TRUE HISAT StringTie ballgown-trans
42 74.1 3.7 1.7 2 TRUE HISAT StringTie ballgown-exon
43 51.9 1.8 1.2 2 FALSE HISAT StringTie ballgown-trans
44 72.9 3.5 1.7 2 FALSE HISAT StringTie ballgown-exon
45 47.8 1.5 1.0 3 TRUE HISAT StringTie ballgown-trans
46 74.6 3.3 1.4 3 TRUE HISAT StringTie ballgown-exon
47 50.6 1.0 0.7 3 FALSE HISAT StringTie ballgown-trans
48 72.9 3.1 1.3 3 FALSE HISAT StringTie ballgown-exon
## Exon level (overlapping only 1 transcript)
kable(empirical_exons_one, format = 'html', row.names = TRUE)
power FPR FDR replicate AnnotationComplete Aligner SummaryMethod StatMethod
1 95.9 4.5 6.5 1 TRUE HISAT featureCounts DESeq2
2 97.2 9.5 12.7 1 TRUE HISAT featureCounts edgeR
3 95.1 4.5 6.5 1 TRUE HISAT featureCounts limma
4 78.3 3.5 6.2 1 FALSE HISAT featureCounts DESeq2
5 79.2 7.3 12.0 1 FALSE HISAT featureCounts edgeR
6 77.6 3.3 6.0 1 FALSE HISAT featureCounts limma
7 95.1 2.9 6.3 2 TRUE HISAT featureCounts DESeq2
8 96.4 6.8 13.5 2 TRUE HISAT featureCounts edgeR
9 94.8 3.1 6.7 2 TRUE HISAT featureCounts limma
10 77.5 2.1 5.6 2 FALSE HISAT featureCounts DESeq2
11 78.4 5.0 12.3 2 FALSE HISAT featureCounts edgeR
12 77.3 2.5 6.6 2 FALSE HISAT featureCounts limma
13 95.4 4.8 9.7 3 TRUE HISAT featureCounts DESeq2
14 96.4 9.9 18.1 3 TRUE HISAT featureCounts edgeR
15 94.4 3.6 7.5 3 TRUE HISAT featureCounts limma
16 70.0 3.6 9.8 3 FALSE HISAT featureCounts DESeq2
17 71.2 8.1 19.5 3 FALSE HISAT featureCounts edgeR
18 69.0 2.7 7.7 3 FALSE HISAT featureCounts limma
19 95.0 8.0 11.1 1 NA Rail-RNA derfinder DESeq2
20 95.3 9.4 12.8 1 NA Rail-RNA derfinder edgeR
21 94.1 9.1 12.5 1 NA Rail-RNA derfinder limma
22 94.1 6.5 13.1 2 NA Rail-RNA derfinder DESeq2
23 94.7 8.3 16.1 2 NA Rail-RNA derfinder edgeR
24 93.7 6.5 13.2 2 NA Rail-RNA derfinder limma
25 95.1 8.3 15.7 3 NA Rail-RNA derfinder DESeq2
26 95.9 10.2 18.5 3 NA Rail-RNA derfinder edgeR
27 94.2 8.5 16.1 3 NA Rail-RNA derfinder limma
28 95.5 9.0 12.2 1 NA HISAT derfinder DESeq2
29 95.8 10.5 13.9 1 NA HISAT derfinder edgeR
30 94.2 9.3 12.8 1 NA HISAT derfinder limma
31 94.3 6.8 13.7 2 NA HISAT derfinder DESeq2
32 95.6 8.4 16.1 2 NA HISAT derfinder edgeR
33 93.6 6.4 13.0 2 NA HISAT derfinder limma
34 95.3 8.5 16.1 3 NA HISAT derfinder DESeq2
35 96.1 10.5 18.9 3 NA HISAT derfinder edgeR
36 93.8 8.7 16.5 3 NA HISAT derfinder limma
37 70.9 0.7 1.4 1 TRUE HISAT StringTie ballgown-trans
38 94.6 4.0 5.9 1 TRUE HISAT StringTie ballgown-exon
39 60.0 0.6 1.4 1 FALSE HISAT StringTie ballgown-trans
40 77.0 3.0 5.5 1 FALSE HISAT StringTie ballgown-exon
41 71.9 1.1 3.2 2 TRUE HISAT StringTie ballgown-trans
42 94.3 3.6 7.8 2 TRUE HISAT StringTie ballgown-exon
43 58.1 2.0 6.9 2 FALSE HISAT StringTie ballgown-trans
44 77.0 2.8 7.4 2 FALSE HISAT StringTie ballgown-exon
45 67.2 0.6 1.8 3 TRUE HISAT StringTie ballgown-trans
46 93.7 3.6 7.7 3 TRUE HISAT StringTie ballgown-exon
47 53.2 2.2 8.1 3 FALSE HISAT StringTie ballgown-trans
48 68.4 2.9 8.3 3 FALSE HISAT StringTie ballgown-exon

2.4 Specific case

In this section we focus on the specific case of using HISAT -> derfinder -> edgeR from the first replicate because it has the highest empirical FPR from all the derfinder analyses when using the exon reference set (row 20 in the first table).

When using the exon level reference set, only about a tenth [(37 + 642) / 6018 = 11.28%] of the noneDE cases are incorrectly labeled. The numbers decrease in the subset of exons that overlap only 1 transcript: [(0 + 37) / 448 = 8.26%].

## Compare all references for one simulation scenario
case_result_exons[['regionMatrix.1.edgeR']]
##              Result
## Case          FalseNegative FalsePositive TrueNegative TruePositive   Sum
##   allDE                  81             0            0          595   676
##   noneDE                 37           642         5304           35  6018
##   singleDE               35             0            0          885   920
##   singleNotDE             0           149         1127            9  1285
##   someDE               7775           244         2110        20310 30439
##   Sum                  7928          1035         8541        21834 39338
case_result_exons_one[['regionMatrix.1.edgeR']]
##              Result
## Case          FalseNegative FalsePositive TrueNegative TruePositive  Sum
##   allDE                   2             0            0          137  139
##   noneDE                  0            37          411            0  448
##   singleDE               35             0            0          870  905
##   singleNotDE             0           145         1117            0 1262
##   someDE                 28            60          537          489 1114
##   Sum                    65           242         2065         1496 3868

Next we show the same tables but in percent of the total reference set units.

## Now in percent
round(case_result_exons[['regionMatrix.1.edgeR']] /
    case_result_exons[['regionMatrix.1.edgeR']][6, 5] * 100, 2)
##              Result
## Case          FalseNegative FalsePositive TrueNegative TruePositive    Sum
##   allDE                0.21          0.00         0.00         1.51   1.72
##   noneDE               0.09          1.63        13.48         0.09  15.30
##   singleDE             0.09          0.00         0.00         2.25   2.34
##   singleNotDE          0.00          0.38         2.86         0.02   3.27
##   someDE              19.76          0.62         5.36        51.63  77.38
##   Sum                 20.15          2.63        21.71        55.50 100.00
round(case_result_exons_one[['regionMatrix.1.edgeR']] /
    case_result_exons_one[['regionMatrix.1.edgeR']][6, 5] * 100, 2)
##              Result
## Case          FalseNegative FalsePositive TrueNegative TruePositive    Sum
##   allDE                0.05          0.00         0.00         3.54   3.59
##   noneDE               0.00          0.96        10.63         0.00  11.58
##   singleDE             0.90          0.00         0.00        22.49  23.40
##   singleNotDE          0.00          3.75        28.88         0.00  32.63
##   someDE               0.72          1.55        13.88        12.64  28.80
##   Sum                  1.68          6.26        53.39        38.68 100.00

In both the exons reference set, most of the false negatives are from the someDE scenario. Those are ambiguous cases and it’s the main reason why the power is decreased when using this reference set. Note how the percent of total reference units from the someDE case that result in false negatives decreases from 19.76 to 0.72 between the exons set that does not take into account multiplicity and the subset of exons overlapping only 1 transcript.

2.5 At different cutoffs

The following plots shows the observed FDR and observed FPR for controlling the FDR at 0.01, 0.05, 0.1, 0.15 and 0.2.

## Evaluate at the exons level (overlapping only 1 transcript)
## DE if any of the transcripts it overlaps is DE at different cutoffs
cuts <- c(0.01, 0.05, 0.1, 0.15, 0.2)
to_char <- function(x) { x[is.na(x)] <- 'NA'; return(x) }
emp_exons_one_cuts <- do.call(rbind, lapply(cuts, function(cut) {
    res <- emp_sum_mean(emp(mapply(count_comp, stats_GR, replicates, types,
        SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = cut))))
    res$cut <- cut
    return(res)
}))
assign_cluster <- function(sum_met, stat_met, ann) {
    if(sum_met == 'featureCounts') {
        res <- ifelse(ann, 'fC-complete', 'fC-incomplete')
    } else if(sum_met == 'derfinder') {
        res <- 'derfinder'
    } else if(sum_met == 'StringTie') {
        res <- ifelse(ann, 'ST-complete', 'ST-incomplete')
    }
    return(res)
}
emp_exons_one_cuts$cluster <- with(emp_exons_one_cuts, mapply(assign_cluster, SummaryMethod, StatMethod, AnnotationComplete))

## Use only HISAT-aligned for simplicity
g1 <- ggplot(data = subset(emp_exons_one_cuts, Aligner == 'HISAT'),
    aes(x = FDR, y = Power, shape = StatMethod, color = cluster)) +
    geom_point(size = 3) + geom_line() + ylab('Empirical power') +
    xlab('Observed FDR (in percent)') +
    theme_linedraw(base_size = 16) +
    scale_color_brewer(palette = 'Set1', name = 'Group') +
    scale_shape_discrete(name = 'Statistical\nmethod')
g1

## Use only HISAT-aligned for simplicity
g2 <- ggplot(data = subset(emp_exons_one_cuts, Aligner == 'HISAT'),
    aes(x = FPR, y = Power, shape = StatMethod, color = cluster)) +
    geom_point(size = 3) + geom_line() + ylab('Empirical power') +
    xlab('Observed FPR (in percent)') +
    theme_linedraw(base_size = 16) +
    scale_color_brewer(palette = 'Set1', name = 'Group') +
    scale_shape_discrete(name = 'Statistical\nmethod')
g2

pdf('curve_controlFDR.pdf', width = 9)
g1
g2
dev.off()
## quartz_off_screen 
##                 2
#case_res_exons_one_cuts <- lapply(cuts, function(cut) {
#    i <- mapply(index_comp, stats_GR, replicates, types,
#        SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = cut))
#    res <- mapply(case_result, i, replicates,
#        SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one))
#    return(res)
#})

The following table shows the actual numbers used in the previous two plots. The empirical power, FPR and FDR are the mean values across the 3 simulation replicates.

## Exon level (overlapping only 1 transcript), showing mean by cutoff
kable(emp_exons_one_cuts, format = 'html', row.names = TRUE)
Power FPR FDR AnnotationComplete Aligner SummaryMethod StatMethod cut cluster
1 73.46667 1.3000000 3.4333333 FALSE HISAT featureCounts DESeq2 0.01 fC-incomplete
2 93.13333 1.8333333 3.6333333 TRUE HISAT featureCounts DESeq2 0.01 fC-complete
3 75.00000 2.7666667 6.5333333 FALSE HISAT featureCounts edgeR 0.01 fC-incomplete
4 95.10000 3.6000000 6.7333333 TRUE HISAT featureCounts edgeR 0.01 fC-complete
5 69.33333 0.9333333 2.5666667 FALSE HISAT featureCounts limma 0.01 fC-incomplete
6 88.33333 1.2666667 2.6333333 TRUE HISAT featureCounts limma 0.01 fC-complete
7 68.26667 0.9000000 2.5333333 FALSE HISAT StringTie ballgown-exon 0.01 ST-incomplete
8 86.80000 1.1666667 2.5666667 TRUE HISAT StringTie ballgown-exon 0.01 ST-complete
9 52.56667 0.1000000 0.4000000 FALSE HISAT StringTie ballgown-trans 0.01 ST-incomplete
10 64.46667 0.0666667 0.1666667 TRUE HISAT StringTie ballgown-trans 0.01 ST-complete
11 92.83333 4.0000000 7.5666667 NA HISAT derfinder DESeq2 0.01 derfinder
12 92.60000 3.6000000 6.9333333 NA Rail-RNA derfinder DESeq2 0.01 derfinder
13 94.03333 4.5666667 8.5000000 NA HISAT derfinder edgeR 0.01 derfinder
14 93.80000 4.2000000 7.9000000 NA Rail-RNA derfinder edgeR 0.01 derfinder
15 87.00000 2.5000000 5.2000000 NA HISAT derfinder limma 0.01 derfinder
16 87.53333 2.6333333 5.3333333 NA Rail-RNA derfinder limma 0.01 derfinder
17 75.26667 3.0666667 7.2000000 FALSE HISAT featureCounts DESeq2 0.05 fC-incomplete
18 95.46667 4.0666667 7.5000000 TRUE HISAT featureCounts DESeq2 0.05 fC-complete
19 76.26667 6.8000000 14.6000000 FALSE HISAT featureCounts edgeR 0.05 fC-incomplete
20 96.66667 8.7333333 14.7666667 TRUE HISAT featureCounts edgeR 0.05 fC-complete
21 74.63333 2.8333333 6.7666667 FALSE HISAT featureCounts limma 0.05 fC-incomplete
22 94.76667 3.7333333 6.9000000 TRUE HISAT featureCounts limma 0.05 fC-complete
23 74.13333 2.9000000 7.0666667 FALSE HISAT StringTie ballgown-exon 0.05 ST-incomplete
24 94.20000 3.7333333 7.1333333 TRUE HISAT StringTie ballgown-exon 0.05 ST-complete
25 57.10000 1.6000000 5.4666667 FALSE HISAT StringTie ballgown-trans 0.05 ST-incomplete
26 70.00000 0.8000000 2.1333333 TRUE HISAT StringTie ballgown-trans 0.05 ST-complete
27 95.03333 8.1000000 14.0000000 NA HISAT derfinder DESeq2 0.05 derfinder
28 94.73333 7.6000000 13.3000000 NA Rail-RNA derfinder DESeq2 0.05 derfinder
29 95.83333 9.8000000 16.3000000 NA HISAT derfinder edgeR 0.05 derfinder
30 95.30000 9.3000000 15.8000000 NA Rail-RNA derfinder edgeR 0.05 derfinder
31 93.86667 8.1333333 14.1000000 NA HISAT derfinder limma 0.05 derfinder
32 94.00000 8.0333333 13.9333333 NA Rail-RNA derfinder limma 0.05 derfinder
33 76.06667 5.7666667 12.5666667 FALSE HISAT featureCounts DESeq2 0.10 fC-incomplete
34 96.36667 7.1333333 12.3000000 TRUE HISAT featureCounts DESeq2 0.10 fC-complete
35 76.86667 10.9333333 21.4000000 FALSE HISAT featureCounts edgeR 0.10 fC-incomplete
36 97.60000 13.9000000 21.3333333 TRUE HISAT featureCounts edgeR 0.10 fC-complete
37 75.70000 5.3000000 11.8333333 FALSE HISAT featureCounts limma 0.10 fC-incomplete
38 96.10000 6.8000000 11.8000000 TRUE HISAT featureCounts limma 0.10 fC-complete
39 75.53333 5.1666667 11.7000000 FALSE HISAT StringTie ballgown-exon 0.10 ST-incomplete
40 95.90000 6.7333333 11.8000000 TRUE HISAT StringTie ballgown-exon 0.10 ST-complete
41 58.16667 2.6000000 8.5333333 FALSE HISAT StringTie ballgown-trans 0.10 ST-incomplete
42 71.20000 2.2666667 5.9666667 TRUE HISAT StringTie ballgown-trans 0.10 ST-complete
43 95.80000 12.1666667 19.5666667 NA HISAT derfinder DESeq2 0.10 derfinder
44 95.63333 11.8000000 19.1666667 NA Rail-RNA derfinder DESeq2 0.10 derfinder
45 96.50000 14.8666667 22.7333333 NA HISAT derfinder edgeR 0.10 derfinder
46 95.96667 14.1666667 22.0000000 NA Rail-RNA derfinder edgeR 0.10 derfinder
47 95.70000 14.1000000 21.8333333 NA HISAT derfinder limma 0.10 derfinder
48 95.53333 14.1333333 22.0333333 NA Rail-RNA derfinder limma 0.10 derfinder
49 76.66667 8.2333333 17.0333333 FALSE HISAT featureCounts DESeq2 0.15 fC-incomplete
50 97.13333 10.2333333 16.6000000 TRUE HISAT featureCounts DESeq2 0.15 fC-complete
51 77.20000 14.8333333 26.7666667 FALSE HISAT featureCounts edgeR 0.15 fC-incomplete
52 97.96667 18.7333333 26.7333333 TRUE HISAT featureCounts edgeR 0.15 fC-complete
53 76.30000 7.9666667 16.7333333 FALSE HISAT featureCounts limma 0.15 fC-incomplete
54 96.86667 9.9000000 16.1333333 TRUE HISAT featureCounts limma 0.15 fC-complete
55 76.16667 8.2333333 17.3333333 FALSE HISAT StringTie ballgown-exon 0.15 ST-incomplete
56 96.73333 10.4000000 17.0666667 TRUE HISAT StringTie ballgown-exon 0.15 ST-complete
57 58.73333 3.9333333 11.9000000 FALSE HISAT StringTie ballgown-trans 0.15 ST-incomplete
58 72.13333 4.1000000 9.6666667 TRUE HISAT StringTie ballgown-trans 0.15 ST-complete
59 96.23333 16.4000000 24.5333333 NA HISAT derfinder DESeq2 0.15 derfinder
60 96.13333 15.6333333 23.7000000 NA Rail-RNA derfinder DESeq2 0.15 derfinder
61 97.06667 19.1333333 27.3666667 NA HISAT derfinder edgeR 0.15 derfinder
62 96.56667 18.4000000 26.7000000 NA Rail-RNA derfinder edgeR 0.15 derfinder
63 96.36667 20.2000000 28.5333333 NA HISAT derfinder limma 0.15 derfinder
64 96.20000 19.9666667 28.3666667 NA Rail-RNA derfinder limma 0.15 derfinder
65 76.96667 11.1000000 21.6333333 FALSE HISAT featureCounts DESeq2 0.20 fC-incomplete
66 97.56667 13.8666667 21.3000000 TRUE HISAT featureCounts DESeq2 0.20 fC-complete
67 77.33333 18.4666667 31.3000000 FALSE HISAT featureCounts edgeR 0.20 fC-incomplete
68 98.23333 23.1333333 30.9666667 TRUE HISAT featureCounts edgeR 0.20 fC-complete
69 76.90000 11.0666667 21.6333333 FALSE HISAT featureCounts limma 0.20 fC-incomplete
70 97.60000 13.6666667 20.9666667 TRUE HISAT featureCounts limma 0.20 fC-complete
71 76.70000 11.4333333 22.3333333 FALSE HISAT StringTie ballgown-exon 0.20 ST-incomplete
72 97.36667 14.3000000 21.8666667 TRUE HISAT StringTie ballgown-exon 0.20 ST-complete
73 59.43333 5.1333333 14.5666667 FALSE HISAT StringTie ballgown-trans 0.20 ST-incomplete
74 73.20000 5.4000000 12.3000000 TRUE HISAT StringTie ballgown-trans 0.20 ST-complete
75 96.86667 20.4000000 28.6333333 NA HISAT derfinder DESeq2 0.20 derfinder
76 96.76667 19.5333333 27.8000000 NA Rail-RNA derfinder DESeq2 0.20 derfinder
77 97.50000 23.6000000 31.5666667 NA HISAT derfinder edgeR 0.20 derfinder
78 97.13333 22.5333333 30.7000000 NA Rail-RNA derfinder edgeR 0.20 derfinder
79 96.96667 25.4333333 33.3000000 NA HISAT derfinder limma 0.20 derfinder
80 96.83333 25.4000000 33.3333333 NA Rail-RNA derfinder limma 0.20 derfinder

3 Evaluation (FWER controlled)

3.1 Evaluate results

The following code actually runs the evaluation functions for the different reference sets.

## Get type and replicate info
types <- 'pbonf' ## For FWER
replicates <- as.integer(sapply(strsplit(names(stats_GR), '\\.'), '[[', 2))


## Evaluate at the exons level
## DE if any of the transcripts it overlaps is DE
tables_exons <- mapply(count_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons, cut = 0.05))
empirical_exons <- emp(tables_exons)
index_exons <- mapply(index_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons, cut = 0.05))
case_result_exons <- mapply(case_result, index_exons, replicates,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons))
    
## Evaluate at the exons level (overlapping only 1 transcript)
## DE if any of the transcripts it overlaps is DE
tables_exons_one <- mapply(count_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = 0.05))
empirical_exons_one <- emp(tables_exons_one)
index_exons_one <- mapply(index_comp, stats_GR, replicates, types,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = 0.05))
case_result_exons_one <- mapply(case_result, index_exons_one, replicates,
    SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one))

3.2 Summaries

Below are summaries of showing the minimum and maximum empirical power, false positive rate (FPR) and false discovery rate (FDR) per replicate for each of the analysis pipelines by the different reference sets.

## Exon level
kable(emp_sum(empirical_exons), format = 'html')
## Exon level (overlapping only 1 transcript)
kable(emp_sum(empirical_exons_one), format = 'html')

3.3 Details

The following tables show the results for all the replicates.

## Exon level
kable(empirical_exons, format = 'html', row.names = TRUE)
## Exon level (overlapping only 1 transcript)
kable(empirical_exons_one, format = 'html', row.names = TRUE)

3.4 At different cutoffs

The following plots shows the observed FDR and observed FPR for controlling the FWER at 0.01, 0.05, 0.1, 0.15 and 0.2.

## Evaluate at the exons level (overlapping only 1 transcript)
## DE if any of the transcripts it overlaps is DE at different cutoffs
emp_exons_one_cuts <- do.call(rbind, lapply(cuts, function(cut) {
    res <- emp_sum_mean(emp(mapply(count_comp, stats_GR, replicates, types,
        SIMPLIFY = FALSE, MoreArgs = list(reference = exons_one, cut = cut))))
    res$cut <- cut
    return(res)
}))

## Use only HISAT-aligned for simplicity
g3 <- ggplot(data = subset(emp_exons_one_cuts, Aligner == 'HISAT'),
    aes(x = FDR, y = Power, shape = StatMethod, color = cluster)) +
    geom_point(size = 3) + geom_line() + ylab('Empirical power') +
    xlab('Observed FDR (in percent)') +
    theme_linedraw(base_size = 16) +
    scale_color_brewer(palette = 'Set1', name = 'Group') +
    scale_shape_discrete(name = 'Statistical\nmethod')
g3

## Use only HISAT-aligned for simplicity
g4 <- ggplot(data = subset(emp_exons_one_cuts, Aligner == 'HISAT'),
    aes(x = FPR, y = Power, shape = StatMethod, color = cluster)) +
    geom_point(size = 3) + geom_line() + ylab('Empirical power') +
    xlab('Observed FPR (in percent)') +
    theme_linedraw(base_size = 16) +
    scale_color_brewer(palette = 'Set1', name = 'Group') +
    scale_shape_discrete(name = 'Statistical\nmethod')
g4

pdf('curve_controlFWER.pdf', width = 9)
g3
g4
dev.off()

The following table shows the actual numbers used in the previous two plots. The empirical power, FPR and FDR are the mean values across the 3 simulation replicates.

## Exon level (overlapping only 1 transcript), showing mean by cutoff
kable(emp_exons_one_cuts, format = 'html', row.names = TRUE)

4 Reproducibility

## Reproducibility info
Sys.time()
## [1] "2016-07-20 17:24:01 EDT"
proc.time()
##    user  system elapsed 
##  82.726   3.385  90.972
options(width = 120)
session_info()
## Session info -----------------------------------------------------------------------------------------------------------
##  setting  value                       
##  version  R version 3.3.1 (2016-06-21)
##  system   x86_64, darwin13.4.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  tz       America/New_York            
##  date     2016-07-20
## Packages ---------------------------------------------------------------------------------------------------------------
##  package                           * version  date       source                                          
##  AnnotationDbi                     * 1.35.4   2016-07-08 Bioconductor                                    
##  Biobase                           * 2.33.0   2016-05-05 Bioconductor                                    
##  BiocGenerics                      * 0.19.2   2016-07-08 Bioconductor                                    
##  BiocParallel                        1.7.4    2016-06-17 Bioconductor                                    
##  BiocStyle                         * 2.1.19   2016-07-11 Bioconductor                                    
##  biomaRt                             2.29.2   2016-05-30 Bioconductor                                    
##  Biostrings                          2.41.4   2016-06-17 Bioconductor                                    
##  bitops                              1.0-6    2013-08-17 CRAN (R 3.3.0)                                  
##  colorspace                          1.2-6    2015-03-11 CRAN (R 3.3.0)                                  
##  DBI                                 0.4-1    2016-05-08 CRAN (R 3.3.0)                                  
##  devtools                          * 1.12.0   2016-06-24 CRAN (R 3.3.0)                                  
##  digest                              0.6.9    2016-01-08 CRAN (R 3.3.0)                                  
##  evaluate                            0.9      2016-04-29 CRAN (R 3.3.0)                                  
##  formatR                             1.4      2016-05-09 CRAN (R 3.3.0)                                  
##  GenomeInfoDb                      * 1.9.4    2016-07-14 Bioconductor                                    
##  GenomicAlignments                   1.9.6    2016-07-17 Bioconductor                                    
##  GenomicFeatures                   * 1.25.15  2016-07-08 Bioconductor                                    
##  GenomicRanges                     * 1.25.9   2016-06-26 Bioconductor                                    
##  ggplot2                           * 2.1.0    2016-03-01 CRAN (R 3.3.0)                                  
##  gtable                              0.2.0    2016-02-26 CRAN (R 3.3.0)                                  
##  highr                               0.6      2016-05-09 CRAN (R 3.3.0)                                  
##  htmltools                           0.3.5    2016-03-21 CRAN (R 3.3.0)                                  
##  IRanges                           * 2.7.11   2016-06-22 Bioconductor                                    
##  knitr                             * 1.13     2016-05-09 CRAN (R 3.3.0)                                  
##  labeling                            0.3      2014-08-23 CRAN (R 3.3.0)                                  
##  lattice                             0.20-33  2015-07-14 CRAN (R 3.3.1)                                  
##  magrittr                            1.5      2014-11-22 CRAN (R 3.3.0)                                  
##  Matrix                              1.2-6    2016-05-02 CRAN (R 3.3.1)                                  
##  memoise                             1.0.0    2016-01-29 CRAN (R 3.3.0)                                  
##  munsell                             0.4.3    2016-02-13 CRAN (R 3.3.0)                                  
##  plyr                                1.8.4    2016-06-08 CRAN (R 3.3.0)                                  
##  RColorBrewer                        1.1-2    2014-12-07 CRAN (R 3.3.0)                                  
##  Rcpp                                0.12.5   2016-05-14 CRAN (R 3.3.0)                                  
##  RCurl                               1.95-4.8 2016-03-01 CRAN (R 3.3.0)                                  
##  rmarkdown                         * 1.0      2016-07-08 CRAN (R 3.3.0)                                  
##  Rsamtools                           1.25.0   2016-05-05 Bioconductor                                    
##  RSQLite                             1.0.0    2014-10-25 CRAN (R 3.3.0)                                  
##  rtracklayer                         1.33.10  2016-07-14 Github (Bioconductor-mirror/rtracklayer@136a2bd)
##  S4Vectors                         * 0.11.9   2016-07-08 Bioconductor                                    
##  scales                              0.4.0    2016-02-26 CRAN (R 3.3.0)                                  
##  stringi                             1.1.1    2016-05-27 CRAN (R 3.3.0)                                  
##  stringr                             1.0.0    2015-04-30 CRAN (R 3.3.0)                                  
##  SummarizedExperiment                1.3.7    2016-07-08 Bioconductor                                    
##  TxDb.Hsapiens.UCSC.hg19.knownGene * 3.2.2    2016-05-05 Bioconductor                                    
##  withr                               1.0.2    2016-06-20 CRAN (R 3.3.0)                                  
##  XML                                 3.98-1.4 2016-03-01 CRAN (R 3.3.0)                                  
##  XVector                             0.13.6   2016-07-08 Bioconductor                                    
##  yaml                                2.1.13   2014-06-12 CRAN (R 3.3.0)                                  
##  zlibbioc                            1.19.0   2016-05-05 Bioconductor