Skip to content

Commit 9ce3cdc

Browse files
authored
Add 'strandMode' argument to readGAlignmentsList() (#26)
1 parent 1f8a1c3 commit 9ce3cdc

File tree

3 files changed

+87
-11
lines changed

3 files changed

+87
-11
lines changed

R/readGAlignments.R

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,20 +290,54 @@ setMethod("readGAlignmentPairs", "character",
290290

291291
setGeneric("readGAlignmentsList", signature="file",
292292
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
293-
with.which_label=FALSE)
293+
with.which_label=FALSE, strandMode=NA)
294294
standardGeneric("readGAlignmentsList")
295295
)
296296

297-
.matesFromBam <- function(file, use.names, param, what0, with.which_label)
297+
.setRealStrand <- function(gal, param, strandMode) {
298+
if (strandMode == 0L)
299+
strand(gal) <- Rle(strand("*"), length(gal))
300+
else {
301+
gal_mcols <- mcols(gal, use.names=FALSE)
302+
if (is.null(gal_mcols$flag))
303+
warning("Flag information missing in GAlignmentsList object. Strand information might not be accurate.")
304+
else {
305+
mask_first_mate <- bamFlagTest(gal_mcols$flag, "isFirstMateRead")
306+
if (strandMode == 1L)
307+
strand(gal[!mask_first_mate]) <-
308+
invertStrand(strand(gal[!mask_first_mate]))
309+
else if (strandMode == 2L)
310+
strand(gal[mask_first_mate]) <-
311+
invertStrand(strand(gal[mask_first_mate]))
312+
else
313+
stop("strandMode should be either 0, 1 or 2.")
314+
}
315+
}
316+
## if the user didn't request the 'flag' info
317+
## then remove it to reduce memory footprint
318+
if (!"flag" %in% bamWhat(param))
319+
mcols(gal)$flag <- NULL
320+
gal
321+
}
322+
.matesFromBam <- function(file, use.names, param, what0, with.which_label,
323+
strandMode)
298324
{
299325
bamcols <- .load_bamcols_from_BamFile(file, param, what0,
300326
with.which_label=with.which_label)
301327
seqlengths <- .load_seqlengths_from_BamFile(file, levels(bamcols$rname))
302328
gal <- GAlignments(seqnames=bamcols$rname, pos=bamcols$pos,
303329
cigar=bamcols$cigar, strand=bamcols$strand,
304330
seqlengths=seqlengths)
305-
gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols,
306-
with.which_label=with.which_label)
331+
if (!is.na(strandMode)) {
332+
flag0 <- scanBamFlag()
333+
what0 <- "flag"
334+
param2 <- .normargParam(param, flag0, what0)
335+
gal <- .bindExtraData(gal, use.names=FALSE, param2, bamcols,
336+
with.which_label=with.which_label)
337+
gal <- .setRealStrand(gal, param, strandMode)
338+
} else
339+
gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols,
340+
with.which_label=with.which_label)
307341
if (asMates(file)) {
308342
f <- factor(bamcols$groupid)
309343
gal <- unname(split(gal, f))
@@ -320,30 +354,34 @@ setGeneric("readGAlignmentsList", signature="file",
320354

321355
.readGAlignmentsList.BamFile <- function(file, index=file,
322356
use.names=FALSE, param=ScanBamParam(),
323-
with.which_label=FALSE)
357+
with.which_label=FALSE,
358+
strandMode=NA)
324359
{
325360
if (!isTRUEorFALSE(use.names))
326361
stop("'use.names' must be TRUE or FALSE")
327362
if (!asMates(file))
328363
bamWhat(param) <- setdiff(bamWhat(param),
329364
c("groupid", "mate_status"))
330365
what0 <- c("rname", "strand", "pos", "cigar", "groupid", "mate_status")
366+
if (!is.na(strandMode))
367+
what0 <- c(what0, "flag")
331368
if (use.names)
332369
what0 <- c(what0, "qname")
333-
.matesFromBam(file, use.names, param, what0, with.which_label)
370+
.matesFromBam(file, use.names, param, what0, with.which_label, strandMode)
334371
}
335372

336373
setMethod("readGAlignmentsList", "BamFile", .readGAlignmentsList.BamFile)
337374

338375
setMethod("readGAlignmentsList", "character",
339376
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
340-
with.which_label=FALSE)
377+
with.which_label=FALSE, strandMode=NA)
341378
{
342379
bam <- .open_BamFile(file, index=index, asMates=TRUE, param=param)
343380
on.exit(close(bam))
344381
readGAlignmentsList(bam, character(0),
345382
use.names=use.names, param=param,
346-
with.which_label=with.which_label)
383+
with.which_label=with.which_label,
384+
strandMode=strandMode)
347385
}
348386
)
349387

inst/unitTests/test_readGAlignmentsList.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,18 @@ test_readGAlignmentsList_which <- function()
182182
rng2 <- as.vector(mcols(unlist(target1[2]))$which_label)
183183
checkTrue(all(rng2 %in% my_ROI_labels[4]))
184184
}
185+
186+
text_readGAlignmentsList_findOverlaps <- function()
187+
{
188+
fl <- system.file("extdata", "ex1.bam", package="Rsamtools")
189+
bf <- BamFile(fl, asMates=TRUE)
190+
galist <- readGAlignmentsList(bf, strandMode=1L)
191+
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
192+
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
193+
checkIdentical(0L, length(ov))
194+
195+
galist <- readGAlignmentsList(bf, strandMode=2L)
196+
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
197+
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
198+
checkIdentical(1L, length(ov))
199+
}

man/readGAlignments.Rd

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ readGAlignmentPairs(file, index=file, use.names=FALSE, param=NULL,
3434
with.which_label=FALSE, strandMode=1)
3535

3636
readGAlignmentsList(file, index=file, use.names=FALSE,
37-
param=ScanBamParam(), with.which_label=FALSE)
37+
param=ScanBamParam(), with.which_label=FALSE,
38+
strandMode=NA)
3839

3940
readGappedReads(file, index=file, use.names=FALSE, param=NULL,
4041
with.which_label=FALSE)
@@ -103,8 +104,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL,
103104
represented as a factor-\link[S4Vectors]{Rle}.
104105
}
105106
\item{strandMode}{
106-
Strand mode to set on the returned \link{GAlignmentPairs} object.
107-
See \code{?\link{strandMode}} for more information.
107+
Strand mode to set on the returned \link{GAlignmentPairs} or
108+
\link{GAlignmentsList} object. Note that the default value for
109+
this parameter is different for \code{readGAlignmentPairs()} and
110+
\code{readGAlignmentsList()}.
111+
See details below on \code{readGAlignmentsList()} and
112+
\code{?\link{strandMode}} for more information.
108113
}
109114
}
110115

@@ -155,6 +160,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL,
155160
See the \sQuote{asMates} section of \code{?\link[Rsamtools]{BamFile}}
156161
in the \pkg{Rsamtools} package for details.
157162

163+
Note that, by default, \code{strandMode=NA}, which is different to
164+
the default value in \code{readGAlignmentPairs()} and which implies
165+
that, by default, the strand values in the returned
166+
\code{GAlignmentsList} object correspond to the original strand of
167+
the reads.
168+
158169
\item \code{readGappedReads} reads a file containing aligned reads as a
159170
\link{GappedReads} object. See \code{?\link{GappedReads}} for a
160171
description of \link{GappedReads} objects.
@@ -398,7 +409,19 @@ readGAlignmentsList(bamfile)
398409
## Use a 'param' to fine tune the results.
399410
param <- ScanBamParam(flag=scanBamFlag(isProperPair=TRUE))
400411
galist2 <- readGAlignmentsList(bam, param=param)
412+
galist2[1:3]
401413
length(galist2)
414+
table(elementNROWS(galist2))
415+
416+
## For paired-end data, we can set the 'strandMode' parameter to
417+
## infer the strand of a pair from the strand of the first and
418+
## last alignments in the pair
419+
galist3 <- readGAlignmentsList(bam, param=param, strandMode=0)
420+
galist3[1:3]
421+
galist4 <- readGAlignmentsList(bam, param=param, strandMode=1)
422+
galist4[1:3]
423+
galist5 <- readGAlignmentsList(bam, param=param, strandMode=2)
424+
galist5[1:3]
402425

403426
## ---------------------------------------------------------------------
404427
## D. COMPARING 4 STRATEGIES FOR LOADING THE ALIGNMENTS THAT OVERLAP

0 commit comments

Comments
 (0)