gsub 在 R 中产生一个范围而不是所需的表达式
gsub producing a range rather than desired expression in R
我想阅读 table 然后使用 gsub 来 return 文本的一部分。我知道 gsub 需要字符向量格式。我没有得到所需的 'C516_A1_B1' 样本列表和 'C516' 等 pat 列表,而是得到 '1:5'。解决此问题的最简单方法是什么?谢谢!
bamlist <- read.table('pathtotxtfile.txt')
for (y in bamlist) {
samp <- gsub('EPICC_(C\S+)_S1\S+$','\1', bamlist)
pat <- gsub('(C\d+)_\S+$','\1', samp)
}
列表:
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
为什么循环,sub
在 x
上向量化。
samp <- sub("^[^_]*_(.*)_[^_]*$", "\1", bamlist)
pat <- sub("(^[^_]+)_.*$", "\1", samp)
samp
#[1] "C516_A1_B1" "C516_A1_G4" "C516_B1_G7" "C516_B1_G8"
#[5] "C516_B3_B1"
pat
#[1] "C516" "C516" "C516" "C516" "C516"
数据
bamlist <- scan(what = character(), text = "
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
")
编辑
关注用户@akrun 的,这是将上述代码应用于data.frame.
的方法
lapply(bamlist, function(y){
samp <- sub("^[^_]*_(.*)_[^_]*$", "\1", y)
pat <- sub("(^[^_]+)_.*$", "\1", samp)
data.frame(samp = samp, pat = pat)
})
#$X
# samp pat
#1 C516_A1_B1 C516
#2 C516_A1_G4 C516
#3 C516_B1_G7 C516
#4 C516_B1_G8 C516
#5 C516_B3_B1 C516
数据现在是
X <- scan(what = character(), text = "
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
")
bamlist <- data.frame(X)
我想阅读 table 然后使用 gsub 来 return 文本的一部分。我知道 gsub 需要字符向量格式。我没有得到所需的 'C516_A1_B1' 样本列表和 'C516' 等 pat 列表,而是得到 '1:5'。解决此问题的最简单方法是什么?谢谢!
bamlist <- read.table('pathtotxtfile.txt')
for (y in bamlist) {
samp <- gsub('EPICC_(C\S+)_S1\S+$','\1', bamlist)
pat <- gsub('(C\d+)_\S+$','\1', samp)
}
列表:
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
为什么循环,sub
在 x
上向量化。
samp <- sub("^[^_]*_(.*)_[^_]*$", "\1", bamlist)
pat <- sub("(^[^_]+)_.*$", "\1", samp)
samp
#[1] "C516_A1_B1" "C516_A1_G4" "C516_B1_G7" "C516_B1_G8"
#[5] "C516_B3_B1"
pat
#[1] "C516" "C516" "C516" "C516" "C516"
数据
bamlist <- scan(what = character(), text = "
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
")
编辑
关注用户@akrun 的
lapply(bamlist, function(y){
samp <- sub("^[^_]*_(.*)_[^_]*$", "\1", y)
pat <- sub("(^[^_]+)_.*$", "\1", samp)
data.frame(samp = samp, pat = pat)
})
#$X
# samp pat
#1 C516_A1_B1 C516
#2 C516_A1_G4 C516
#3 C516_B1_G7 C516
#4 C516_B1_G8 C516
#5 C516_B3_B1 C516
数据现在是
X <- scan(what = character(), text = "
EPICC_C516_A1_B1_S1-GRCh38.bam
EPICC_C516_A1_G4_S1-GRCh38.bam
EPICC_C516_B1_G7_S1-GRCh38.bam
EPICC_C516_B1_G8_S1-GRCh38.bam
EPICC_C516_B3_B1_S1-GRCh38.bam
")
bamlist <- data.frame(X)