函数只计算 R 数据的第一行 table
Function only evaluates first row of R data table
我有一个简单的数据 table apple
,其中有许多数字缩写为 40.08B、40.08M、400.08K 等。我需要删除这些字母并将它们替换为适当数量的零(即 400.08K 变成 400080),所以我写了下面的代码:
apple2 <- dplyr::case_when(
stringr::str_detect(apple[,-1], 'B') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e9,
stringr::str_detect(apple[,-1], 'M') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e6,
stringr::str_detect(apple[,-1], 'K') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e3,
TRUE ~ parse_number(as.character(apple[,-1]), na = c("", "NA"), trim_ws = TRUE)
)
该代码在查找字符串并将其转换为适当的数字方面按预期工作,但它仅在数据的第一行上运行 table。此外,它删除了 headers。错误信息如下:
argument is not an atomic vector; coercingargument is not an atomic vector; coercingargument is not an atomic vector; coercing[1]
我试了几个小时来解决这个问题,但无济于事 - 我做错了什么?谢谢!
我们可以使用 str_replace_all
而不是多个 str_detect
。将列中的 'B'、'M'、'K' 子字符串匹配并替换为 str_replace_all
中的命名向量,然后是 separate
列,然后进行乘法运算在分隔的列上
library(stringr)
library(dplyr)
library(tidyr)
apple %>%
mutate(col1 = str_replace_all(col1, setNames(c(' 1e9', ' 1e6', ' 1e3'),
c('B', 'M', 'K')))) %>%
separate(col1, into = c('col1', 'col2'), convert = TRUE) %>%
transmute(col1 = col1 * col2)
-输出
# col1
#1 4.0e+10
#2 2.0e+08
#3 2.0e+06
#4 4.0e+05
#5 3.6e+10
数据
apple <- structure(list(col1 = c("40B", "200M", "2M", "400K", "36B")),
class = "data.frame", row.names = c(NA,
-5L))
您使用 case_when 的方式有点不正统:
## some data:
d <- cbind.data.frame(
id = LETTERS,
matrix(
paste0(
ceiling(runif(26*5, max=999)),
sample( c("","B","K","M"), size=26*5, replace=T )
), nrow=26
)
)
library(stringr)
library(readr)
d %>% mutate( across( -1,
~ case_when(
str_detect(., 'B') ~ parse_number(as.character(.), na = c("", "NA")) * 1e9,
str_detect(., 'M') ~ parse_number(as.character(.), na = c("", "NA")) * 1e6,
str_detect(., 'K') ~ parse_number(as.character(.), na = c("", "NA")) * 1e3,
TRUE ~ parse_number(as.character(.), na = c("", "NA"), trim_ws = TRUE)
)
))
输入数据:
id 1 2 3 4 5
1 A 834 27B 250 881B 988
2 B 313M 506B 309 413 141K
3 C 197 77 824 161B 43K
4 D 845K 172K 745B 922M 145M
5 E 168M 959M 990B 250K 893
6 F 430 687K 368M 10M 824M
7 G 940B 403B 655M 818 777K
8 H 281 833K 86B 849B 16K
9 I 485B 508B 349M 643M 926M
10 J 235B 10B 206M 505K 347M
11 K 897B 727M 405K 987B 674M
12 L 588B 40M 860M 58 934B
13 M 727K 375 188M 728K 201B
14 N 280K 442M 43K 400 445
15 O 988B 388M 530B 702M 240B
16 P 177M 782 410K 254K 758K
17 Q 706K 262 520B 104K 34
18 R 390B 99K 677K 965 635M
19 S 819 115M 920M 580M 295K
20 T 573M 901K 360 7K 88B
21 U 333B 593M 504B 992 241B
22 V 674 192M 841B 644B 659
23 W 524M 581M 692M 41 133
24 X 626K 686M 712K 756M 136B
25 Y 295 468 932M 486B 35K
26 Z 526K 798K 229K 958B 700B
输出:
id 1 2 3 4 5
1 A 8.34e+02 2.70e+10 2.50e+02 8.81e+11 9.88e+02
2 B 3.13e+08 5.06e+11 3.09e+02 4.13e+02 1.41e+05
3 C 1.97e+02 7.70e+01 8.24e+02 1.61e+11 4.30e+04
4 D 8.45e+05 1.72e+05 7.45e+11 9.22e+08 1.45e+08
5 E 1.68e+08 9.59e+08 9.90e+11 2.50e+05 8.93e+02
6 F 4.30e+02 6.87e+05 3.68e+08 1.00e+07 8.24e+08
7 G 9.40e+11 4.03e+11 6.55e+08 8.18e+02 7.77e+05
8 H 2.81e+02 8.33e+05 8.60e+10 8.49e+11 1.60e+04
9 I 4.85e+11 5.08e+11 3.49e+08 6.43e+08 9.26e+08
10 J 2.35e+11 1.00e+10 2.06e+08 5.05e+05 3.47e+08
11 K 8.97e+11 7.27e+08 4.05e+05 9.87e+11 6.74e+08
12 L 5.88e+11 4.00e+07 8.60e+08 5.80e+01 9.34e+11
13 M 7.27e+05 3.75e+02 1.88e+08 7.28e+05 2.01e+11
14 N 2.80e+05 4.42e+08 4.30e+04 4.00e+02 4.45e+02
15 O 9.88e+11 3.88e+08 5.30e+11 7.02e+08 2.40e+11
16 P 1.77e+08 7.82e+02 4.10e+05 2.54e+05 7.58e+05
17 Q 7.06e+05 2.62e+02 5.20e+11 1.04e+05 3.40e+01
18 R 3.90e+11 9.90e+04 6.77e+05 9.65e+02 6.35e+08
19 S 8.19e+02 1.15e+08 9.20e+08 5.80e+08 2.95e+05
20 T 5.73e+08 9.01e+05 3.60e+02 7.00e+03 8.80e+10
21 U 3.33e+11 5.93e+08 5.04e+11 9.92e+02 2.41e+11
22 V 6.74e+02 1.92e+08 8.41e+11 6.44e+11 6.59e+02
23 W 5.24e+08 5.81e+08 6.92e+08 4.10e+01 1.33e+02
24 X 6.26e+05 6.86e+08 7.12e+05 7.56e+08 1.36e+11
25 Y 2.95e+02 4.68e+02 9.32e+08 4.86e+11 3.50e+04
26 Z 5.26e+05 7.98e+05 2.29e+05 9.58e+11 7.00e+11
另请参阅将人类可读字节数转换为数字的其他方法,例如this or perhaps
我有一个简单的数据 table apple
,其中有许多数字缩写为 40.08B、40.08M、400.08K 等。我需要删除这些字母并将它们替换为适当数量的零(即 400.08K 变成 400080),所以我写了下面的代码:
apple2 <- dplyr::case_when(
stringr::str_detect(apple[,-1], 'B') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e9,
stringr::str_detect(apple[,-1], 'M') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e6,
stringr::str_detect(apple[,-1], 'K') ~ readr::parse_number(as.character(apple[,-1]), na = c("", "NA")) * 1e3,
TRUE ~ parse_number(as.character(apple[,-1]), na = c("", "NA"), trim_ws = TRUE)
)
该代码在查找字符串并将其转换为适当的数字方面按预期工作,但它仅在数据的第一行上运行 table。此外,它删除了 headers。错误信息如下:
argument is not an atomic vector; coercingargument is not an atomic vector; coercingargument is not an atomic vector; coercing[1]
我试了几个小时来解决这个问题,但无济于事 - 我做错了什么?谢谢!
我们可以使用 str_replace_all
而不是多个 str_detect
。将列中的 'B'、'M'、'K' 子字符串匹配并替换为 str_replace_all
中的命名向量,然后是 separate
列,然后进行乘法运算在分隔的列上
library(stringr)
library(dplyr)
library(tidyr)
apple %>%
mutate(col1 = str_replace_all(col1, setNames(c(' 1e9', ' 1e6', ' 1e3'),
c('B', 'M', 'K')))) %>%
separate(col1, into = c('col1', 'col2'), convert = TRUE) %>%
transmute(col1 = col1 * col2)
-输出
# col1
#1 4.0e+10
#2 2.0e+08
#3 2.0e+06
#4 4.0e+05
#5 3.6e+10
数据
apple <- structure(list(col1 = c("40B", "200M", "2M", "400K", "36B")),
class = "data.frame", row.names = c(NA,
-5L))
您使用 case_when 的方式有点不正统:
## some data:
d <- cbind.data.frame(
id = LETTERS,
matrix(
paste0(
ceiling(runif(26*5, max=999)),
sample( c("","B","K","M"), size=26*5, replace=T )
), nrow=26
)
)
library(stringr)
library(readr)
d %>% mutate( across( -1,
~ case_when(
str_detect(., 'B') ~ parse_number(as.character(.), na = c("", "NA")) * 1e9,
str_detect(., 'M') ~ parse_number(as.character(.), na = c("", "NA")) * 1e6,
str_detect(., 'K') ~ parse_number(as.character(.), na = c("", "NA")) * 1e3,
TRUE ~ parse_number(as.character(.), na = c("", "NA"), trim_ws = TRUE)
)
))
输入数据:
id 1 2 3 4 5
1 A 834 27B 250 881B 988
2 B 313M 506B 309 413 141K
3 C 197 77 824 161B 43K
4 D 845K 172K 745B 922M 145M
5 E 168M 959M 990B 250K 893
6 F 430 687K 368M 10M 824M
7 G 940B 403B 655M 818 777K
8 H 281 833K 86B 849B 16K
9 I 485B 508B 349M 643M 926M
10 J 235B 10B 206M 505K 347M
11 K 897B 727M 405K 987B 674M
12 L 588B 40M 860M 58 934B
13 M 727K 375 188M 728K 201B
14 N 280K 442M 43K 400 445
15 O 988B 388M 530B 702M 240B
16 P 177M 782 410K 254K 758K
17 Q 706K 262 520B 104K 34
18 R 390B 99K 677K 965 635M
19 S 819 115M 920M 580M 295K
20 T 573M 901K 360 7K 88B
21 U 333B 593M 504B 992 241B
22 V 674 192M 841B 644B 659
23 W 524M 581M 692M 41 133
24 X 626K 686M 712K 756M 136B
25 Y 295 468 932M 486B 35K
26 Z 526K 798K 229K 958B 700B
输出:
id 1 2 3 4 5
1 A 8.34e+02 2.70e+10 2.50e+02 8.81e+11 9.88e+02
2 B 3.13e+08 5.06e+11 3.09e+02 4.13e+02 1.41e+05
3 C 1.97e+02 7.70e+01 8.24e+02 1.61e+11 4.30e+04
4 D 8.45e+05 1.72e+05 7.45e+11 9.22e+08 1.45e+08
5 E 1.68e+08 9.59e+08 9.90e+11 2.50e+05 8.93e+02
6 F 4.30e+02 6.87e+05 3.68e+08 1.00e+07 8.24e+08
7 G 9.40e+11 4.03e+11 6.55e+08 8.18e+02 7.77e+05
8 H 2.81e+02 8.33e+05 8.60e+10 8.49e+11 1.60e+04
9 I 4.85e+11 5.08e+11 3.49e+08 6.43e+08 9.26e+08
10 J 2.35e+11 1.00e+10 2.06e+08 5.05e+05 3.47e+08
11 K 8.97e+11 7.27e+08 4.05e+05 9.87e+11 6.74e+08
12 L 5.88e+11 4.00e+07 8.60e+08 5.80e+01 9.34e+11
13 M 7.27e+05 3.75e+02 1.88e+08 7.28e+05 2.01e+11
14 N 2.80e+05 4.42e+08 4.30e+04 4.00e+02 4.45e+02
15 O 9.88e+11 3.88e+08 5.30e+11 7.02e+08 2.40e+11
16 P 1.77e+08 7.82e+02 4.10e+05 2.54e+05 7.58e+05
17 Q 7.06e+05 2.62e+02 5.20e+11 1.04e+05 3.40e+01
18 R 3.90e+11 9.90e+04 6.77e+05 9.65e+02 6.35e+08
19 S 8.19e+02 1.15e+08 9.20e+08 5.80e+08 2.95e+05
20 T 5.73e+08 9.01e+05 3.60e+02 7.00e+03 8.80e+10
21 U 3.33e+11 5.93e+08 5.04e+11 9.92e+02 2.41e+11
22 V 6.74e+02 1.92e+08 8.41e+11 6.44e+11 6.59e+02
23 W 5.24e+08 5.81e+08 6.92e+08 4.10e+01 1.33e+02
24 X 6.26e+05 6.86e+08 7.12e+05 7.56e+08 1.36e+11
25 Y 2.95e+02 4.68e+02 9.32e+08 4.86e+11 3.50e+04
26 Z 5.26e+05 7.98e+05 2.29e+05 9.58e+11 7.00e+11
另请参阅将人类可读字节数转换为数字的其他方法,例如this or perhaps