Evaluate simulation

This report evaluates the simulation results.

library('GenomicRanges')
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## 
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## 
## The following object is masked from 'package:stats':
## 
##     xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unlist, unsplit
## 
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
library('TxDb.Hsapiens.UCSC.hg19.knownGene')
## Loading required package: GenomicFeatures
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library('knitr')
library('devtools')
library('derfinder')
library('derfinderHelper')
library('derfinderPlot')
library('bumphunter')
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: locfit
## locfit 1.5-9.1    2013-03-22
load('../simulation_info.Rdata')
load('../derAnalysis/run2-v1.0.10/fullRegions.Rdata')
load('../derAnalysis/run2-v1.0.10/groupInfo.Rdata')
load('../derAnalysis/run2-v1.0.10/models.Rdata')
load('../derAnalysis/run2-v1.0.10/chr22/optionsStats.Rdata')
load('../CoverageInfo/fullCov.Rdata')
names(fullRegions) <- seq_len(length(fullRegions))

Results

## Find exons
txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, 'chr22')
txinfo <- select(txdb, keys = chosen$ucsckg_id, columns = columns(txdb), keytype = 'TXNAME')

## Buiild GRangesList with exons grouped by transcript
tx <- split(GRanges(seqnames = txinfo$EXONCHROM, IRanges(start = txinfo$EXONSTART, end = txinfo$EXONEND), strand = txinfo$EXONSTRAND), txinfo$TXNAME)
tx <- tx[match(chosen$ucsckg_id, names(tx))]

## Find overlaps with DERs
ctov <- countOverlaps(tx, fullRegions)

## Check result with Q-value sig 
fullRegions$significantFDR <- factor(fullRegions$qvalues < 0.05, levels = c('TRUE', 'FALSE'))
fdr <-  fullRegions[fullRegions$significantFDR == 'TRUE']
min.ov <- min(min(width(fdr)), min(width(tx)))
ctov.fdr <- countOverlaps(tx, fdr, minoverlap =  min.ov)

## Use appropriate set
ctov.use <- if (identical(ctov > 0, ctov.fdr > 0)) ctov else ctov.fdr

Transcripts / genes vs DERs

Overview

Table showing the results between whether the transcript was set to be differentially expressed (DE) and if it overlaps (minimum 1 bp) any candidate DER.

addmargins(table('DE status' = chosen$DE, 'Overlaps DER' = ctov > 0))
##          Overlaps DER
## DE status FALSE TRUE Sum
##     FALSE    25   23  48
##     TRUE      5   43  48
##     Sum      30   66  96

The results are not the same using a minimum overlap of 13 bp between transcripts and candidate DERs with a Q-value < 0.05. Thus, we will use only the DERs with Q-value < 0.05.

addmargins(table('DE status' = chosen$DE, 'Overlaps DER (sig Q-value)' = ctov.fdr > 0))
##          Overlaps DER (sig Q-value)
## DE status FALSE TRUE Sum
##     FALSE    37   11  48
##     TRUE      6   42  48
##     Sum      43   53  96

At a finer level, there is a difference in the number of exons per transcript overlapping all candidate DERs vs the DERs with Q-value < 0.05.

## Verify things are working properly
# table(countOverlaps(tx, fullRegions, minoverlap = min.ov) - ctov.fdr)

## Difference in overlaps found
table(ctov - ctov.fdr)
## 
##   0   1   2   3   4   5   6   7   9  10  11  12  13  15  50  61 106 109 
##  49  10   3   9   4   1   4   2   2   3   1   1   1   2   1   1   1   1

By case

We can separate the transcripts by their experiment setup case. That is, whether its from a gene with:

  • a single transcript
    • set to be DE: singleDE
    • set not to be DE: singleNotDe
  • two transcripts
    • with both set to be DE: bothDE
    • with only one transcript set to be DE: oneDE
    • with both set not to be DE: noneDE

Then compare against the results where

  • success.DE means that the transcript was set to be DE and overlaps a DER (true positive)
  • failed.DE means that the transcript was set to be DE and doesn't overlap a DER (false negative)
  • success.DER means that the transcript was set not to de DE and doesn't overlap a DER (true negative)
  • failed.DER means that the transcript was set not to be DE and does overlap a DER (false positive)
## Indexes
idx <- list(success = list(de = chosen$DE & ctov.use > 0, der = !chosen$DE & !ctov.use > 0), failed = list(de = chosen$DE & !ctov.use > 0, der = !chosen$DE & ctov.use > 0))
idx <- lapply(idx, function(x) { lapply(x, which) })

## Classify results
chosen$result <- 'Success.DE'
chosen$result[ idx$success$der ] <- 'Success.DER'
chosen$result[ idx$failed$de ] <- 'Failed.DE'
chosen$result[ idx$failed$der ] <- 'Failed.DER'

## Overall summary
kable(table(chosen$case, chosen$result), format = 'html')
Failed.DE Failed.DER Success.DE Success.DER
bothDE 0 0 24 0
noneDE 0 0 0 24
oneDE 1 11 11 1
singleDE 5 0 7 0
singleNotDE 0 0 0 12

Failed.DE (false negative)

The 6 Failed.DE cases (false negatives) are mostly short single transcript genes (one exon only) where 5 were set to have low expression on one group, normal on the other two.

## Successful cases
success.de <- tx[ idx$success$de  ]
success.der <- tx[  idx$success$der ]

## What happened with the txs set to be DE that were not picked up?
failed.de <- tx[ idx$failed$de  ]

## They are short transcripts
kable(chosen[idx$failed$de, ], format = 'html')
tx_idx tx_n tx_i gene_id ucsckg_id fasta_i DE group1 group2 group3 width readspertx mean1 mean2 mean3 case result
2 2 1 1 100126318 uc021wmk.1 181 TRUE low normal normal 78 31 16 31 31 singleDE Failed.DE
11 13 1 1 100302149 uc021wrh.1 796 TRUE normal normal low 66 26 26 26 13 singleDE Failed.DE
17 20 1 1 100422998 uc021wnn.1 317 TRUE low normal normal 86 34 17 34 34 singleDE Failed.DE
22 27 1 1 100500833 uc010gvn.2 347 TRUE normal normal high 110 44 44 44 88 singleDE Failed.DE
24 29 1 1 100500901 uc021wny.1 423 TRUE normal normal low 58 23 23 23 12 singleDE Failed.DE
126 272 2 2 3761 uc003avt.2 605 TRUE normal normal low 2075 830 830 830 415 oneDE Failed.DE

However, 2 similar cases with short transcripts were successfully detected. So it's likely that a lower F-stat cutoff would have picked up these false negative cases.

## However there are other short transcripts that were picked up
kable(chosen[ idx$success$de[sum(width(success.de)) <= 110], ], format = 'html')
tx_idx tx_n tx_i gene_id ucsckg_id fasta_i DE group1 group2 group3 width readspertx mean1 mean2 mean3 case result
10 12 1 1 100302118 uc021wls.1 127 TRUE normal high normal 78 31 31 62 31 singleDE Success.DE
23 28 1 1 100500860 uc021wlo.1 115 TRUE normal low normal 88 35 35 18 35 singleDE Success.DE

More info:

width(failed.de)
## IntegerList of length 6
## [["uc021wmk.1"]] 78
## [["uc021wrh.1"]] 66
## [["uc021wnn.1"]] 86
## [["uc010gvn.2"]] 110
## [["uc021wny.1"]] 58
## [["uc003avt.2"]] 220 1844
width(tx[ idx$success$de[sum(width(success.de)) <= 110] ])
## IntegerList of length 2
## [["uc021wls.1"]] 78
## [["uc021wlo.1"]] 88

Plots

gs <- makeGenomicState(txdb, chrs = '22', verbose = FALSE)
annoTrans <- annotateTranscripts(txdb = txdb)
## Warning:   Calling species() on a TxDb object is *deprecated*.
##   Please use organism() instead.
## No annotationPackage supplied. Trying org.Hs.eg.db.
## Loading required package: org.Hs.eg.db
## Loading required package: DBI
## 
## Getting TSS and TSE.
## Getting CSS and CSE.
## Getting exons.
## Annotating genes.
makePlots <- function(reg, gs) {
    ## Prep
    strand(reg) <- '*'
    regCov <- getRegionCoverage(fullCov = fullCov, regions = reg, verbose = FALSE)
    annoReg <- annotateRegions(reg, gs$fullGenome, verbose = FALSE)
    annoNear <- matchGenes(reg, subject = annoTrans)

    ## Actually make the plots with F-stat track
    prev.name <- ''
    def.par <- par()
    def.par <- def.par[-which(names(def.par) %in% c('cin', 'cra', 'csi', 'cxy', 'din', 'page'))]
    for(reg.i in seq_len(length(reg))) {
        
        if(prev.name != names(reg)[reg.i]) {
            par(def.par)
            plot.new()
            text(0.5, 0.5, names(reg)[reg.i], cex = 5)
        }
        prev.name <- names(reg)[reg.i]
        
        
        range <- start(reg[reg.i]):end(reg[reg.i])
        dat <- fullCov$chr22[range, ]
        
        ## Skip plot if there is no coverage data
        if(max(sapply(dat, max)) == 0) {
            par(def.par)
            plot.new()
            text(0.5, 0.5, paste('No data\nReg', reg.i), cex = 5)
            next
        }
        
        ## Log2 transform
        for(i in 1:30) dat[[i]] <- log2(dat[[i]] + 32) 
        
        ## Calculate f-stats
        fstats <- as.numeric(fstats.apply(data = dat, mod = models$mod, mod0 = models$mod0))
    
        ## Make plot
        plotRegionCoverage(reg, regCov, groupInfo, annoNear, annoReg, txdb, reg.i, ask = FALSE, verbose = FALSE)
    
        ## Add F-stat track
        par(fig = c(0, 1, 0.075, 0.125), new = TRUE, xaxt = 'n', oma = c(0, 0, 0, 0), mar = c(0, 4.5, 0, 1.1))
        plot(y = fstats, x = range, ylab = 'F-stat', type = 'l', xlab = '', bty = 'n', ylim = c(0, max(fstats[is.finite(fstats)], optionsStats$cutoffFstatUsed) * 1.1))
        abline(h = optionsStats$cutoffFstatUsed, col = 'red')
    }
}

Coverage plots with F-statistics shown at the bottom for the false negative cases. One plot it shown for each exon that compose these transcripts.

makePlots(unlist(failed.de), gs)

Failed.DER (false positive)

Out of the 11 Failed.DER transcripts (false positives), 11 of them are from the oneDE case. You could then argue that they are really not false positives. However, 0 and 0 transcripts are from the noneDE and singleNotDE cases respectively which would be the truly false positives.

## What happened with those set to be not DE but overlap DERs?
failed.der <- tx[ idx$failed$der ]

kable(chosen[idx$failed$der, ], format = 'html')
tx_idx tx_n tx_i gene_id ucsckg_id fasta_i DE group1 group2 group3 width readspertx mean1 mean2 mean3 case result
26 6 2 2 100130717 uc011agh.3 17 FALSE normal normal normal 650 260 260 260 260 oneDE Failed.DER
37 56 2 1 10738 uc010gwn.3 467 FALSE normal normal normal 1488 595 595 595 595 oneDE Failed.DER
47 80 2 1 128977 uc002zpi.3 84 FALSE normal normal normal 1558 623 623 623 623 oneDE Failed.DER
49 85 2 1 129138 uc003auc.3 576 FALSE normal normal normal 2149 860 860 860 860 oneDE Failed.DER
98 206 2 2 25817 uc003bio.4 836 FALSE normal normal normal 2652 1061 1061 1061 1061 oneDE Failed.DER
121 266 2 1 339669 uc003aqe.3 538 FALSE normal normal normal 1049 420 420 420 420 oneDE Failed.DER
132 283 2 2 3976 uc011aks.2 379 FALSE normal normal normal 3987 1595 1595 1595 1595 oneDE Failed.DER
150 321 2 2 4689 uc003apz.4 533 FALSE normal normal normal 1646 658 658 658 658 oneDE Failed.DER
175 400 2 1 6523 uc003amc.3 457 FALSE normal normal normal 5061 2024 2024 2024 2024 oneDE Failed.DER
190 417 2 2 6948 uc003air.2 407 FALSE normal normal normal 2006 802 802 802 802 oneDE Failed.DER
192 421 2 2 7122 uc010grr.2 91 FALSE normal normal normal 1720 688 688 688 688 oneDE Failed.DER

Plots

Coverage plots with F-statistics shown at the bottom for the false positive cases. One plot it shown for each exon that compose these transcripts. For the 11 transcripts from the oneDE case, it can be seen how at least one plot contains a DER overlapping an exon set to be DE. .

Some complex situations where there are exons on both strands can be observed.

Other strand

In some simulations, we found what seemed to be false positive transcripts but turned out to overlap DERs in regions where there are exons on both the positive and negative strands and at least one of the exons was set to be DE.

## Most of the truly false positive transcripts don't overlap other transcripts
inter <- intersect(idx$failed$der, c(which(chosen$case == 'noneDE'), which(chosen$case == 'singleNotDE')))
table(countOverlaps(tx[inter], tx[-inter]))
## < table of extent 0 >
## They are not short
width(tx[inter])
## IntegerList of length 0
## To explore regions with derfinderReport
sort(subjectHits(findOverlaps(tx[inter], fullRegions)))
## integer(0)
## DERs
# fullRegions[subjectHits(findOverlaps(tx[i], fullRegions))]

Skipped the following section due to the absence of such cases in this particular simulation

As it can be seen below, 0 apparent false positive transcripts from the noneDE case overlap (when strand is not taken into account) genes where at least one of two transcripts was set to be DE.

kable(chosen[ subjectHits(findOverlaps(tx[inter], tx[-inter], ignore.strand = TRUE)), ], format = 'html')
tx_idx tx_n tx_i gene_id ucsckg_id fasta_i DE group1 group2 group3 width readspertx mean1 mean2 mean3 case result

Gene level

For each gene, if at least one transcript is set to be DE then we consider the gene to be DE. Then, we check if the gene overlaps at least one DER.

gene <- data.frame(gene_id = unique(chosen$gene_id))
gene$DE <- sapply(gene$gene_id, function(x) { any(chosen$DE[chosen$gene_id == x])  })
gene$case <- sapply(gene$gene_id, function(x) { unique(chosen$case[chosen$gene_id == x])  })
gene$overlaps <- sapply(gene$gene_id, function(x) { sum(ctov.use[ chosen$ucsckg_id[chosen$gene_id == x] ]) })
gene$overlap <- gene$overlaps > 0

## Results between DE status and overlapping at least 1 DER at the gene level
addmargins(table('DE status' = gene$DE, 'Overlaps DER' = gene$overlap))
##          Overlaps DER
## DE status FALSE TRUE Sum
##     FALSE    24    0  24
##     TRUE      6   30  36
##     Sum      30   30  60

Conclusions

The results from the simulation are promising as most transcripts were correctly classified as differentially expressed or not by derfinder.

The majority of the false negative cases involved short single transcript genes with one group having low expression relative to the other two. These cases could potentially be mitigated by lowering the F-statistic threshold used in the derfinder analysis.

In some simulations there are some apparent false positives which are due to transcripts on one strand set not to be DE overlapping transcripts from the other strand set to be DE. This situation could be solved with strand-specific RNA-seq data and running derfinder for each strand separately.

Extra

Minimum number of reads per transcript as well as per sample.

## Distribution of the minimum number of reads per transcript
summary(apply(readmat, 1, min))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   213.2   453.5   625.5   783.0  2747.0
## Distribution of the minimum number of reads per sample
summary(apply(readmat, 2, min))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   7.000   9.000   9.067  11.000  19.000

The minimum number of reads per transcript for a given sample is 1.

Exonic segments

Next we can evaluate the simulation by classifying the exonic segments as whether they should be DE or not. Then, we can find if the DERs overlap such segments and viceversa. We would expect that the DERs with a Q-value < 0.05 would only overlap segments that were set to be DE.

segments <- GRangesList(lapply(gene$gene_id, function(x) {
    i <- chosen$ucsckg_id[ chosen$gene_id == x]
    
    ## Find segments
    segs <- disjoin(unlist(tx[i]))
    ov <- findOverlaps(segs, tx[i])
    
    ## Find DE status per segment
    segs$DE <- as.vector(tapply(subjectHits(ov), queryHits(ov), function(y) {
        any(chosen$DE[ chosen$gene_id == x])
    }))
    
    ## Finish
    return(segs)
}))
names(segments) <- gene$gene_id
segs <- unlist(segments)

Segments vs DERs

We can check the if the exonic segments overlap one or more DERs similarly to what we did earlier at the transcript and gene level. The results change depending on whether only the DERs with significant Q-value or all of the DERs are used.

addmargins(table('DE status' = segs$DE, 'Overlaps DER (sig Q-value)' = countOverlaps(segs, fdr) > 0))
##          Overlaps DER (sig Q-value)
## DE status FALSE TRUE Sum
##     FALSE   111    0 111
##     TRUE     30  139 169
##     Sum     141  139 280
addmargins(table('DE status' = segs$DE, 'Overlaps DER' = countOverlaps(segs, fullRegions) > 0))
##          Overlaps DER
## DE status FALSE TRUE Sum
##     FALSE   105    6 111
##     TRUE     20  149 169
##     Sum     125  155 280

Using the DERs with significant Q-values, there are 30 false negative cases. From the exploration shown below, half of them seem short. Most of the false negative segments correspond to genes from the oneDE scenario. Thus revealing that the complexity of that scenario makes it challenging to identify significant DERs.

## Explore false negative segments using DERs with sig Q-value
seg.fn <- which(segs$DE & !countOverlaps(segs, fdr) > 0)

## Around half of these segments are short
summary(width(segs[seg.fn]))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     5.0    73.5   119.0   242.7   241.0  1844.0
chosen[chosen$gene_id == '6523', ]
##     tx_idx tx_n tx_i gene_id  ucsckg_id fasta_i    DE group1 group2 group3
## 175    400    2    1    6523 uc003amc.3     457 FALSE normal normal normal
## 176    400    2    2    6523 uc011alz.2     458  TRUE normal normal   high
##     width readspertx mean1 mean2 mean3  case     result
## 175  5061       2024  2024  2024  2024 oneDE Failed.DER
## 176  4779       1912  1912  1912  3824 oneDE Success.DE
## 9 of the 30 segments are from gene with id 6523
tail(sort(table(names(segs[seg.fn]))))
## 
##  4689  7494 10738  3761 25817  6523 
##     1     1     2     3     4     9
## Cases of the genes with at least one FN segment
table(tapply(subset(chosen, gene_id %in% names(seg.fn))$case, subset(chosen, gene_id %in% names(seg.fn))$gene_id, unique))
## 
##   bothDE    oneDE singleDE 
##        1       10        5
## Type of gene where the segments come from. Mostly oneDE genes
table(sapply(names(segs[seg.fn]), function(x) { unique(chosen$case[chosen$gene_id == x]) }))
## 
##   bothDE    oneDE singleDE 
##        1       24        5

Plots

Coverage plots with F-statistics shown at the bottom for the false negative exonic segments grouped by their gene.

DERs vs segments

We can check how many segments each DER overlaps. Ideally they should all overlap at least one segment, but there are some cases where this could not happen (0 in this case). Possibly because of small mismatches between the transcripts and the actual mRNA used in the simulation. Alternatively, alignment problems could explain such cases.

## Do DERs overlap segments that are set to be DE?
der.ov <- findOverlaps(fullRegions, segs)
fullRegions$overlap <- sapply(seq_len(length(fullRegions)), function(x) {
    y <- which(queryHits(der.ov) == x)
    if(length(y) == 0) return(NA)
    any(segs$DE[ subjectHits(der.ov)[y] ])
})

## Do DERs overlap at least one segment?
table(countOverlaps(fullRegions, segs))
## 
##   0   1   2   3   4 
##   6 451   9   1   2
## Widths of DERs not overlapping any segment
width(fullRegions[is.na(fullRegions$overlap)])
## [1] 39  4  3  1  1  1
## Do these DERs have a significant Q-value?
table(fullRegions$significantFDR[is.na(fullRegions$overlap)])
## 
##  TRUE FALSE 
##     1     5

Minimum 10bp

We can repeat the same exploration but now requiring at least a 10bp overlap.

## Do DERs overlap segments that are set to be DE?
der.ov10 <- findOverlaps(fullRegions, segs, minoverlap = 10)
fullRegions$overlap10 <- sapply(seq_len(length(fullRegions)), function(x) {
    y <- which(queryHits(der.ov10) == x)
    if(length(y) == 0) return(NA)
    any(segs$DE[ subjectHits(der.ov10)[y] ])
})

## How many ders are smaller than 10bp?
table(width(fullRegions) < 10)
## 
## FALSE  TRUE 
##   208   261
## How many exonic segments are smaller than 10bp?
table(width(segs) < 10)
## 
## FALSE  TRUE 
##   279     1
## Do DERs (min 10bp long) overlap at least one segment?
table(countOverlaps(fullRegions[width(fullRegions) >= 10], segs, minoverlap = 10))
## 
##   0   1   2   3   4 
##   1 197   7   2   1
## Widths of DERs not overlapping any segment
width(fullRegions[is.na(fullRegions$overlap10) & width(fullRegions) >= 10])
## [1] 39
## Do these DERs have a significant Q-value?
table(fullRegions$significantFDR[is.na(fullRegions$overlap10) & width(fullRegions) >= 10])
## 
##  TRUE FALSE 
##     1     0

Minimum 20bp

And similarly with a minimum overlap of 20bp.

## Do DERs overlap segments that are set to be DE?
der.ov20 <- findOverlaps(fullRegions, segs, minoverlap = 20)
fullRegions$overlap20 <- sapply(seq_len(length(fullRegions)), function(x) {
    y <- which(queryHits(der.ov20) == x)
    if(length(y) == 0) return(NA)
    any(segs$DE[ subjectHits(der.ov20)[y] ])
})

## How many ders are smaller than 20bp?
table(width(fullRegions) < 20)
## 
## FALSE  TRUE 
##   169   300
## How many exonic segments are smaller than 20bp?
table(width(segs) < 20)
## 
## FALSE  TRUE 
##   277     3
## Do DERs (min 20bp long) overlap at least one segment?
table(countOverlaps(fullRegions[width(fullRegions) >= 20], segs, minoverlap = 20))
## 
##   0   1   2   3   4 
##   1 161   4   2   1
## Widths of DERs not overlapping any segment
width(fullRegions[is.na(fullRegions$overlap20) & width(fullRegions) >= 20])
## [1] 39
## Do these DERs have a significant Q-value?
table(fullRegions$significantFDR[is.na(fullRegions$overlap20) & width(fullRegions) >= 20])
## 
##  TRUE FALSE 
##     1     0

DER correctness

However, the main result is whether the DERs overlap segments expected to be DE. Note that for this comparison, DERs are unstranded and could potentially overlap two segments from different strands where only one of them was set to be DE.

## Check by whether the DER has a Q-value < 0.05
addmargins(table('Overlaps a DE segment' = fullRegions$overlap, 'Q-value < 0.05' = fullRegions$significantFDR))
##                      Q-value < 0.05
## Overlaps a DE segment TRUE FALSE Sum
##                 FALSE    0    30  30
##                 TRUE   152   281 433
##                 Sum    152   311 463

Regardless of whether the DER p-value is significant, we see that 6.48 percent of the DERs overlapping at least one segment, incorrectly overlap a segment set not to be DE.

Minimum 10bp

Out of the 469 DERs, only 208 are at least 10bp long. They are compared against 279 exonic segments at least 10bp long out of the total 280. Only 1 DER 10bp or longer does not overlap any exonic segment regardless of its DE status.

## Check by whether the DER has a Q-value < 0.05
addmargins(table('Overlaps a DE segment' = fullRegions$overlap10, 'Q-value < 0.05' = fullRegions$significantFDR))
##                      Q-value < 0.05
## Overlaps a DE segment TRUE FALSE Sum
##                 FALSE    0     4   4
##                 TRUE   152    51 203
##                 Sum    152    55 207

Regardless of whether the DER p-value is significant, we see that 1.93 percent of the DERs overlapping at least one segment (min overlap 10bp), incorrectly overlap a segment set not to be DE.

Minimum 20bp

Out of the 469 DERs, only 169 are at least 20bp long. They are compared against 277 exonic segments at least 20bp long out of the total 280. Only 1 DER 20bp or longer does not overlap any exonic segment regardless of its DE status.

## Check by whether the DER has a Q-value < 0.05
addmargins(table('Overlaps a DE segment' = fullRegions$overlap20, 'Q-value < 0.05' = fullRegions$significantFDR))
##                      Q-value < 0.05
## Overlaps a DE segment TRUE FALSE Sum
##                 FALSE    0     1   1
##                 TRUE   152    15 167
##                 Sum    152    16 168

Regardless of whether the DER p-value is significant, we see that 0.6 percent of the DERs overlapping at least one segment (min overlap 20bp), incorrectly overlap a segment set not to be DE.

Conclusions

The observed FDR is lower than 0.05, which is what we would expect.

Reproducibility

## Reproducibility info
Sys.time()
## [1] "2015-04-06 21:12:35 EDT"
proc.time()
##    user  system elapsed 
## 255.606   2.562 260.842
options(width = 120)
session_info()
## Session info-----------------------------------------------------------------------------------------------------------
##  setting  value                                             
##  version  R Under development (unstable) (2014-11-01 r66923)
##  system   x86_64, darwin10.8.0                              
##  ui       X11                                               
##  language (EN)                                              
##  collate  en_US.UTF-8                                       
##  tz       America/New_York
## Packages---------------------------------------------------------------------------------------------------------------
##  package                           * version  date       source                                    
##  acepack                             1.3.3.3  2013-05-03 CRAN (R 3.2.0)                            
##  AnnotationDbi                     * 1.29.21  2015-04-03 Bioconductor                              
##  Biobase                           * 2.27.3   2015-03-27 Bioconductor                              
##  BiocGenerics                      * 0.13.11  2015-04-03 Bioconductor                              
##  BiocParallel                        1.1.21   2015-03-24 Bioconductor                              
##  biomaRt                             2.23.5   2014-11-22 Bioconductor                              
##  Biostrings                          2.35.12  2015-03-26 Bioconductor                              
##  biovizBase                          1.15.3   2015-03-30 Bioconductor                              
##  bitops                              1.0.6    2013-08-17 CRAN (R 3.2.0)                            
##  BSgenome                            1.35.20  2015-03-27 Bioconductor                              
##  bumphunter                        * 1.7.6    2015-03-13 Github (lcolladotor/bumphunter@37d10e7)   
##  cluster                             2.0.1    2015-01-31 CRAN (R 3.2.0)                            
##  codetools                           0.2.11   2015-03-10 CRAN (R 3.2.0)                            
##  colorspace                          1.2.6    2015-03-11 CRAN (R 3.2.0)                            
##  DBI                               * 0.3.1    2014-09-24 CRAN (R 3.2.0)                            
##  derfinder                         * 1.1.18   2015-04-01 Bioconductor                              
##  derfinderHelper                   * 1.1.6    2015-03-15 Bioconductor                              
##  derfinderPlot                     * 1.1.6    2015-03-14 Github (lcolladotor/derfinderPlot@1319754)
##  devtools                          * 1.6.1    2014-10-07 CRAN (R 3.2.0)                            
##  dichromat                           2.0.0    2013-01-24 CRAN (R 3.2.0)                            
##  digest                              0.6.8    2014-12-31 CRAN (R 3.2.0)                            
##  doRNG                               1.6      2014-03-07 CRAN (R 3.2.0)                            
##  evaluate                            0.5.5    2014-04-29 CRAN (R 3.2.0)                            
##  foreach                           * 1.4.2    2014-04-11 CRAN (R 3.2.0)                            
##  foreign                             0.8.63   2015-02-20 CRAN (R 3.2.0)                            
##  formatR                             1.0      2014-08-25 CRAN (R 3.2.0)                            
##  Formula                             1.2.0    2015-01-20 CRAN (R 3.2.0)                            
##  futile.logger                       1.4      2015-03-21 CRAN (R 3.2.0)                            
##  futile.options                      1.0.0    2010-04-06 CRAN (R 3.2.0)                            
##  GenomeInfoDb                      * 1.3.16   2015-03-27 Bioconductor                              
##  GenomicAlignments                   1.3.33   2015-04-06 Bioconductor                              
##  GenomicFeatures                   * 1.19.36  2015-03-30 Bioconductor                              
##  GenomicFiles                        1.3.15   2015-04-01 Bioconductor                              
##  GenomicRanges                     * 1.19.52  2015-04-04 Bioconductor                              
##  GGally                              0.4.8    2014-08-26 CRAN (R 3.2.0)                            
##  ggbio                               1.15.3   2015-04-02 Bioconductor                              
##  ggplot2                             1.0.0    2014-05-21 CRAN (R 3.2.0)                            
##  graph                               1.45.3   2015-04-03 Bioconductor                              
##  gridExtra                           0.9.1    2012-08-09 CRAN (R 3.2.0)                            
##  gtable                              0.1.2    2012-12-05 CRAN (R 3.2.0)                            
##  Hmisc                               3.14.5   2014-09-12 CRAN (R 3.2.0)                            
##  htmltools                           0.2.6    2014-09-08 CRAN (R 3.2.0)                            
##  IRanges                           * 2.1.43   2015-03-07 Bioconductor                              
##  iterators                         * 1.0.7    2014-04-11 CRAN (R 3.2.0)                            
##  knitr                             * 1.7      2014-10-13 CRAN (R 3.2.0)                            
##  knitrBootstrap                      1.0.0    2014-11-03 Github (jimhester/knitrBootstrap@76c41f0) 
##  lambda.r                            1.1.7    2015-03-20 CRAN (R 3.2.0)                            
##  lattice                             0.20.31  2015-03-30 CRAN (R 3.2.0)                            
##  latticeExtra                        0.6.26   2013-08-15 CRAN (R 3.2.0)                            
##  locfit                            * 1.5.9.1  2013-04-20 CRAN (R 3.2.0)                            
##  markdown                            0.7.4    2014-08-24 CRAN (R 3.2.0)                            
##  MASS                                7.3.40   2015-03-21 CRAN (R 3.2.0)                            
##  Matrix                              1.2.0    2015-04-04 CRAN (R 3.2.0)                            
##  matrixStats                         0.14.0   2015-02-14 CRAN (R 3.2.0)                            
##  mime                                0.3      2015-03-29 CRAN (R 3.2.0)                            
##  munsell                             0.4.2    2013-07-11 CRAN (R 3.2.0)                            
##  nnet                                7.3.9    2015-02-11 CRAN (R 3.2.0)                            
##  org.Hs.eg.db                      * 3.0.0    2014-09-26 Bioconductor                              
##  OrganismDbi                         1.9.15   2015-03-30 Bioconductor                              
##  pkgmaker                            0.22     2014-05-14 CRAN (R 3.2.0)                            
##  plyr                                1.8.1    2014-02-26 CRAN (R 3.2.0)                            
##  proto                               0.3.10   2012-12-22 CRAN (R 3.2.0)                            
##  qvalue                              1.99.1   2015-04-04 Bioconductor                              
##  RBGL                                1.43.0   2014-10-14 Bioconductor                              
##  RColorBrewer                        1.1.2    2014-12-07 CRAN (R 3.2.0)                            
##  Rcpp                                0.11.5   2015-03-06 CRAN (R 3.2.0)                            
##  RCurl                               1.95.4.5 2014-12-28 CRAN (R 3.2.0)                            
##  registry                            0.2      2012-01-24 CRAN (R 3.2.0)                            
##  reshape                             0.8.5    2014-04-23 CRAN (R 3.2.0)                            
##  reshape2                            1.4.1    2014-12-06 CRAN (R 3.2.0)                            
##  rmarkdown                         * 0.3.3    2014-09-17 CRAN (R 3.2.0)                            
##  rngtools                            1.2.4    2014-03-06 CRAN (R 3.2.0)                            
##  rpart                               4.1.9    2015-02-24 CRAN (R 3.2.0)                            
##  Rsamtools                           1.19.49  2015-03-27 Bioconductor                              
##  RSQLite                           * 1.0.0    2014-10-25 CRAN (R 3.2.0)                            
##  rstudioapi                          0.2      2014-12-31 CRAN (R 3.2.0)                            
##  rtracklayer                         1.27.11  2015-04-01 Bioconductor                              
##  S4Vectors                         * 0.5.22   2015-03-06 Bioconductor                              
##  scales                              0.2.4    2014-04-22 CRAN (R 3.2.0)                            
##  stringr                             0.6.2    2012-12-06 CRAN (R 3.2.0)                            
##  survival                            2.38.1   2015-02-24 CRAN (R 3.2.0)                            
##  TxDb.Hsapiens.UCSC.hg19.knownGene * 3.0.0    2014-09-26 Bioconductor                              
##  VariantAnnotation                   1.13.46  2015-03-26 Bioconductor                              
##  XML                                 3.98.1.1 2013-06-20 CRAN (R 3.2.0)                            
##  xtable                              1.7.4    2014-09-12 CRAN (R 3.2.0)                            
##  XVector                           * 0.7.4    2015-02-08 Bioconductor                              
##  yaml                                2.1.13   2014-06-12 CRAN (R 3.2.0)                            
##  zlibbioc                            1.13.3   2015-03-23 Bioconductor