## Check that data was loaded
stopifnot(all(c('fullRegions', 'regionMat', 'analysisPath') %in% ls()))
## Fix DERs
ders <- fullRegions
names(ders) <- NULL
## Fix region matrices
regs <- unlist(GRangesList(lapply(regionMat, '[[', 'regions')))
names(regs) <- NULL
## Assign seqlengths
data(hg19Ideogram, package = 'biovizBase', envir = environment())
seqlengths(ders) <- seqlengths(hg19Ideogram)[names(seqlengths(ders))]
seqlengths(regs) <- seqlengths(hg19Ideogram)[names(seqlengths(regs))]
## Sort
ders <- sort(ders)
regs <- sort(regs)
## Check names match
identical(seqlengths(regs), seqlengths(ders))
## [1] TRUE
## Save the raw data
save(ders, file = file.path(analysisPath, 'dersOriginal.Rdata'))
save(regs, file = file.path(analysisPath, 'regsOriginal.Rdata'))
## Filter out those with less than 3bp
c('# ders under 3bp' = sum(width(ders) < 3), '# regs under 3bp' = sum(width(regs) < 3))
## # ders under 3bp # regs under 3bp
## 16078 36576
ders <- ders[width(ders) >= 3]
regs <- regs[width(regs) >= 3]
## Save data used
save(ders, file = file.path(analysisPath, 'ders.Rdata'))
save(regs, file = file.path(analysisPath, 'regs.Rdata'))
Construct logical indexes for DERs and regionMatrix regions.
Number of base-pairs in each index. Summary first, then overall info for the genome (in number of bases, then in percent of the genome), and finally results in interactive table.
## Merge all the indexes
index.all <- mapply(function(der, reg) {
both <- der & reg
only.der <- der & !reg
only.reg <- !der & reg
none <- !der & !reg
res <- list('both' = both, 'only.der' = only.der, 'only.reg' = only.reg,
'none' = none, 'all.der' = der, 'all.reg' = reg)
return(list(res))
}, index.ders, index.regs)
## Find number of base-pairs in each index
index.num <- data.frame(do.call(rbind, lapply(index.all, function(x) { sapply(x, sum)})))
index.num$chrLen <- seqlengths(ders)
index.num$chr <- rownames(index.num)
rownames(index.num) <- NULL
## Print info
summary(index.num)
## both only.der only.reg none
## Min. : 471 Min. : 10.0 Min. : 17706 Min. : 48039170
## 1st Qu.: 4646 1st Qu.: 663.8 1st Qu.: 300546 1st Qu.: 79841586
## Median : 6343 Median : 940.0 Median : 374345 Median :133877898
## Mean : 7251 Mean : 921.5 Mean : 420243 Mean :128558143
## 3rd Qu.: 9428 3rd Qu.:1097.8 3rd Qu.: 534129 3rd Qu.:161639197
## Max. :27552 Max. :2347.0 Max. :1040506 Max. :248194137
## all.der all.reg chrLen chr
## Min. : 481 Min. : 18177 Min. : 48129895 Length:24
## 1st Qu.: 5413 1st Qu.: 304750 1st Qu.: 80415720 Class :character
## Median : 7281 Median : 381242 Median :134429206 Mode :character
## Mean : 8172 Mean : 427494 Mean :128986559
## 3rd Qu.:10670 3rd Qu.: 541522 3rd Qu.:162132764
## Max. :28918 Max. :1054137 Max. :249250621
## Overall info
overallInfo <- colSums(index.num[, -ncol(index.num)])
overallInfo
Number of segments per index. First summary, then results for genome, and finally an interactive table.
## Find number of segments in each index
index.seg <- data.frame(do.call(rbind, lapply(index.all, function(x) {
sapply(x, function(y) {
sum(runValue(y))
})
})))
index.seg$chr <- rownames(index.seg)
rownames(index.seg) <- NULL
## Print info
summary(index.seg)
## both only.der only.reg none
## Min. : 26.0 Min. : 2.0 Min. : 391 Min. : 372
## 1st Qu.: 357.5 1st Qu.:113.2 1st Qu.: 5697 1st Qu.: 5486
## Median : 508.0 Median :136.5 Median : 7558 Median : 7276
## Mean : 546.9 Mean :143.9 Mean : 7978 Mean : 7627
## 3rd Qu.: 694.2 3rd Qu.:171.2 3rd Qu.:10105 3rd Qu.: 9745
## Max. :1726.0 Max. :372.0 Max. :19596 Max. :18908
## all.der all.reg chr
## Min. : 28.0 Min. : 369 Length:24
## 1st Qu.: 456.0 1st Qu.: 5410 Class :character
## Median : 623.5 Median : 7158 Mode :character
## Mean : 662.5 Mean : 7517
## 3rd Qu.: 808.8 3rd Qu.: 9615
## Max. :1885.0 Max. :18604
## Overall info
colSums(index.seg[, -ncol(index.seg)])
Summary of the segment widths for each index. First the overall summary, then the results for each index.
## Get an idea of the width of the segments in each index
index.width <- data.frame(do.call(rbind, lapply(index.all, function(x) {
tmp <- data.frame(do.call(rbind, lapply(x, function(y) {
summary(runLength(y)[runValue(y)])
})), check.names = FALSE)
tmp$index <- names(x)
rownames(tmp) <- NULL
return(tmp)
})), check.names = FALSE)
index.width$chr <- rep(names(seqlengths(ders)), each = 6)
rownames(index.width) <- NULL
## Print info
summary(index.width)
## Min. 1st Qu. Median Mean
## Min. :1.000 Min. : 3.00 Min. : 4.00 Min. : 5.00
## 1st Qu.:1.000 1st Qu.: 4.00 1st Qu.: 6.00 1st Qu.: 11.63
## Median :1.000 Median : 8.00 Median : 23.00 Median : 31.70
## Mean :1.694 Mean : 8.26 Mean : 33.74 Mean : 4198.87
## 3rd Qu.:3.000 3rd Qu.:12.00 3rd Qu.: 36.00 3rd Qu.: 56.19
## Max. :3.000 Max. :39.75 Max. :259.00 Max. :159600.00
## 3rd Qu. Max. index
## Min. : 6.00 Min. : 7 Length:144
## 1st Qu.: 13.00 1st Qu.: 98 Class :character
## Median : 44.25 Median : 307 Mode :character
## Mean : 276.39 Mean : 1609538
## 3rd Qu.: 70.00 3rd Qu.: 2746
## Max. :3379.00 Max. :30260000
## chr
## Length:144
## Class :character
## Mode :character
##
##
##
## setting value
## version R version 3.1.1 Patched (2014-10-16 r66782)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz