使用数组名称改变列
Mutate column using array names
上下文:
我的数据分析涉及分别处理约 100 个不同的试验,每个试验都有 >1000 行。最终,一步需要我将每个试验与来自不同数据集的列值结合起来。我计划使用 left_join() 和“ID”作为键将此数据集与数组中的每个试验结合起来。
困境
我想将试验名称 mutate() 为标有“ID”的新列。我觉得这应该是一个简单的任务,但在使用列表和数组时我仍然是一个新手。
工作代码
我不知道如何共享 .csv 文件,但您可以将示例数据集保存为名为“data”的练习文件夹中的 .csv 文件。
library(tidyverse)
# Create practice dataset
df1 <- tibble(Time = seq(1, 5, by = 1),
Point = seq(6, 10, by = 1)) %>% print()
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 1 6
2 2 7
3 3 8
4 4 9
5 5 10
df2 <- tibble(Time = seq(6, 10, by = 1),
Point = seq(1, 5, by = 1)) %>% print()
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 6 1
2 7 2
3 8 3
4 9 4
5 10 5
write_csv(df1, file.path("data", "21May27_CtYJ10.csv")
write_csv(df2, file.path("data", "21May27_HrOW07.csv"))
这是我现在正在使用的代码:
# Isolate .csv files from directory into a list
rawFiles_List <- list.files("data", pattern = ".csv", full = TRUE) %>% print()
# Naming scheme for files w/n list
trialDate <- list(str_sub(rawFiles_List, 13, 26)) %>%
print() # Adjust the substring to include date and trial
[[1]]
[1] "21May27_CtYJ10" "21May27_HrOW07"
trial <- list(str_sub(rawFiles_List, 21, 26)) %>% print() # Only include trial
[[1]]
[1] "CtYJ10" "HrOW07"
# Combine the list and list names into an array
rawFiles <- array(map(rawFiles_List, read_csv), dimnames = trialDate) %>% print()
Parsed with column specification:
cols(
Time = col_double(),
Point = col_double()
)
Parsed with column specification:
cols(
Time = col_double(),
Point = col_double()
)
$`21May27_CtYJ10`
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 1 6
2 2 7
3 3 8
4 4 9
5 5 10
$`21May27_HrOW07`
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 6 1
2 7 2
3 8 3
4 9 4
5 10 5
这部分满足了我的要求:
map(rawFiles, ~ data.frame(.) %>% # Convert to dataframe
# Create a new column with trial name
mutate(ID = map(trial, paste)) %>% # Pastes the list, not the respective value
as_tibble(.)) # Convert back to tibble
$`21May27_CtYJ10`
# A tibble: 5 x 3
Time Point MouseID
<dbl> <dbl> <list>
1 1 6 <chr [2]>
2 2 7 <chr [2]>
3 3 8 <chr [2]>
4 4 9 <chr [2]>
5 5 10 <chr [2]>
$`21May27_HrOW07`
# A tibble: 5 x 3
Time Point MouseID
<dbl> <dbl> <list>
1 6 1 <chr [2]>
2 7 2 <chr [2]>
3 8 3 <chr [2]>
4 9 4 <chr [2]>
5 10 5 <chr [2]>
问题:
你能帮我制作一个新的列,里面填满他们各自的试用 ID 吗?我主要尝试使用 tidyverse 函数,但我也愿意使用 Base-R 函数。如果您能够解释如何将列表元素与数组元素相匹配,或者向我推荐有用的资源,我们将不胜感激。
奖金问题:
我正在研究如何在所有操作之后保存每个文件,但我不确定我是否正确地编写了我的 for 循环。您能否就我应该如何编辑我的 for 循环提供一些指导?我使用以前的代码作为指导,但如果我使事情过于复杂,我愿意放弃它。以下是我目前写的:
SaveDate <- format(Sys.Date(), format = "%y%b%d")
for (i in 1:length(combFiles)) { # Dataset combing array of trials manipulated
filename <- vector("list", length(rawFiles)) # Vector to fill
filename[[i]] <- paste( # Fill vector with respective filenames
as.data.frame(trial)[[1]][i], "_mod_", SaveDate, ".csv", sep = "")
write.csv(file = filename[[i]],
modFiles[[i]], # Array of trials manipulated
sep = ",", row.names = FALSE, col.names = TRUE)
}
library(tidyverse)
# Create practice dataset
df1 <- tibble(Time = seq(1, 5, by = 1),
Point = seq(6, 10, by = 1)) %>% print()
#> # A tibble: 5 x 2
#> Time Point
#> <dbl> <dbl>
#> 1 1 6
#> 2 2 7
#> 3 3 8
#> 4 4 9
#> 5 5 10
df2 <- tibble(Time = seq(6, 10, by = 1),
Point = seq(1, 5, by = 1)) %>% print()
#> # A tibble: 5 x 2
#> Time Point
#> <dbl> <dbl>
#> 1 6 1
#> 2 7 2
#> 3 8 3
#> 4 9 4
#> 5 10 5
write_csv(df1, "21May27_CtYJ10.csv")
write_csv(df2, "21May27_HrOW07.csv")
rm(df1, df2)
最简单的是使用imap_*
。这将自动循环播放列表中的所有文件,并在需要时将它们合并。为此,文件列表必须有名称。
# Prepare raw file list with names equal to the values
rawFiles_List <- list.files(pattern = "^21May27") %>%
set_names()
rawFiles_List
#> 21May27_CtYJ10.csv 21May27_HrOW07.csv
#> "21May27_CtYJ10.csv" "21May27_HrOW07.csv"
imap_dfr(rawFiles_List,
~ read_csv(.x, col_types = "dd") %>%
add_column(source_file = .y))
#> # A tibble: 10 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 1 6 21May27_CtYJ10.csv
#> 2 2 7 21May27_CtYJ10.csv
#> 3 3 8 21May27_CtYJ10.csv
#> 4 4 9 21May27_CtYJ10.csv
#> 5 5 10 21May27_CtYJ10.csv
#> 6 6 1 21May27_HrOW07.csv
#> 7 7 2 21May27_HrOW07.csv
#> 8 8 3 21May27_HrOW07.csv
#> 9 9 4 21May27_HrOW07.csv
#> 10 10 5 21May27_HrOW07.csv
如果您更喜欢使用数据框列表并在每个数据框中添加一列,请使用 imap()
:
imap(rawFiles_List,
~ read_csv(.x, col_types = "dd") %>%
add_column(source_file = .y))
#> $`21May27_CtYJ10.csv`
#> # A tibble: 5 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 1 6 21May27_CtYJ10.csv
#> 2 2 7 21May27_CtYJ10.csv
#> 3 3 8 21May27_CtYJ10.csv
#> 4 4 9 21May27_CtYJ10.csv
#> 5 5 10 21May27_CtYJ10.csv
#>
#> $`21May27_HrOW07.csv`
#> # A tibble: 5 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 6 1 21May27_HrOW07.csv
#> 2 7 2 21May27_HrOW07.csv
#> 3 8 3 21May27_HrOW07.csv
#> 4 9 4 21May27_HrOW07.csv
#> 5 10 5 21May27_HrOW07.csv
当然,如果您在 运行 map
命令之前操作文件列表的名称,您可以确保在列中插入正确的值:
rawFiles_List <- list.files(pattern = "^21May27") %>%
set_names(str_sub(., 21L, 26L))
关于储蓄,我建议你使用iwalk()
。我认为你的 for 循环没有做你想做的事(你在每次传递时都重新初始化 filename
,删除它以前的内容,可能不是你想要的)。
上下文:
我的数据分析涉及分别处理约 100 个不同的试验,每个试验都有 >1000 行。最终,一步需要我将每个试验与来自不同数据集的列值结合起来。我计划使用 left_join() 和“ID”作为键将此数据集与数组中的每个试验结合起来。
困境
我想将试验名称 mutate() 为标有“ID”的新列。我觉得这应该是一个简单的任务,但在使用列表和数组时我仍然是一个新手。
工作代码
我不知道如何共享 .csv 文件,但您可以将示例数据集保存为名为“data”的练习文件夹中的 .csv 文件。
library(tidyverse)
# Create practice dataset
df1 <- tibble(Time = seq(1, 5, by = 1),
Point = seq(6, 10, by = 1)) %>% print()
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 1 6
2 2 7
3 3 8
4 4 9
5 5 10
df2 <- tibble(Time = seq(6, 10, by = 1),
Point = seq(1, 5, by = 1)) %>% print()
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 6 1
2 7 2
3 8 3
4 9 4
5 10 5
write_csv(df1, file.path("data", "21May27_CtYJ10.csv")
write_csv(df2, file.path("data", "21May27_HrOW07.csv"))
这是我现在正在使用的代码:
# Isolate .csv files from directory into a list
rawFiles_List <- list.files("data", pattern = ".csv", full = TRUE) %>% print()
# Naming scheme for files w/n list
trialDate <- list(str_sub(rawFiles_List, 13, 26)) %>%
print() # Adjust the substring to include date and trial
[[1]]
[1] "21May27_CtYJ10" "21May27_HrOW07"
trial <- list(str_sub(rawFiles_List, 21, 26)) %>% print() # Only include trial
[[1]]
[1] "CtYJ10" "HrOW07"
# Combine the list and list names into an array
rawFiles <- array(map(rawFiles_List, read_csv), dimnames = trialDate) %>% print()
Parsed with column specification:
cols(
Time = col_double(),
Point = col_double()
)
Parsed with column specification:
cols(
Time = col_double(),
Point = col_double()
)
$`21May27_CtYJ10`
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 1 6
2 2 7
3 3 8
4 4 9
5 5 10
$`21May27_HrOW07`
# A tibble: 5 x 2
Time Point
<dbl> <dbl>
1 6 1
2 7 2
3 8 3
4 9 4
5 10 5
这部分满足了我的要求:
map(rawFiles, ~ data.frame(.) %>% # Convert to dataframe
# Create a new column with trial name
mutate(ID = map(trial, paste)) %>% # Pastes the list, not the respective value
as_tibble(.)) # Convert back to tibble
$`21May27_CtYJ10`
# A tibble: 5 x 3
Time Point MouseID
<dbl> <dbl> <list>
1 1 6 <chr [2]>
2 2 7 <chr [2]>
3 3 8 <chr [2]>
4 4 9 <chr [2]>
5 5 10 <chr [2]>
$`21May27_HrOW07`
# A tibble: 5 x 3
Time Point MouseID
<dbl> <dbl> <list>
1 6 1 <chr [2]>
2 7 2 <chr [2]>
3 8 3 <chr [2]>
4 9 4 <chr [2]>
5 10 5 <chr [2]>
问题:
你能帮我制作一个新的列,里面填满他们各自的试用 ID 吗?我主要尝试使用 tidyverse 函数,但我也愿意使用 Base-R 函数。如果您能够解释如何将列表元素与数组元素相匹配,或者向我推荐有用的资源,我们将不胜感激。
奖金问题:
我正在研究如何在所有操作之后保存每个文件,但我不确定我是否正确地编写了我的 for 循环。您能否就我应该如何编辑我的 for 循环提供一些指导?我使用以前的代码作为指导,但如果我使事情过于复杂,我愿意放弃它。以下是我目前写的:
SaveDate <- format(Sys.Date(), format = "%y%b%d")
for (i in 1:length(combFiles)) { # Dataset combing array of trials manipulated
filename <- vector("list", length(rawFiles)) # Vector to fill
filename[[i]] <- paste( # Fill vector with respective filenames
as.data.frame(trial)[[1]][i], "_mod_", SaveDate, ".csv", sep = "")
write.csv(file = filename[[i]],
modFiles[[i]], # Array of trials manipulated
sep = ",", row.names = FALSE, col.names = TRUE)
}
library(tidyverse)
# Create practice dataset
df1 <- tibble(Time = seq(1, 5, by = 1),
Point = seq(6, 10, by = 1)) %>% print()
#> # A tibble: 5 x 2
#> Time Point
#> <dbl> <dbl>
#> 1 1 6
#> 2 2 7
#> 3 3 8
#> 4 4 9
#> 5 5 10
df2 <- tibble(Time = seq(6, 10, by = 1),
Point = seq(1, 5, by = 1)) %>% print()
#> # A tibble: 5 x 2
#> Time Point
#> <dbl> <dbl>
#> 1 6 1
#> 2 7 2
#> 3 8 3
#> 4 9 4
#> 5 10 5
write_csv(df1, "21May27_CtYJ10.csv")
write_csv(df2, "21May27_HrOW07.csv")
rm(df1, df2)
最简单的是使用imap_*
。这将自动循环播放列表中的所有文件,并在需要时将它们合并。为此,文件列表必须有名称。
# Prepare raw file list with names equal to the values
rawFiles_List <- list.files(pattern = "^21May27") %>%
set_names()
rawFiles_List
#> 21May27_CtYJ10.csv 21May27_HrOW07.csv
#> "21May27_CtYJ10.csv" "21May27_HrOW07.csv"
imap_dfr(rawFiles_List,
~ read_csv(.x, col_types = "dd") %>%
add_column(source_file = .y))
#> # A tibble: 10 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 1 6 21May27_CtYJ10.csv
#> 2 2 7 21May27_CtYJ10.csv
#> 3 3 8 21May27_CtYJ10.csv
#> 4 4 9 21May27_CtYJ10.csv
#> 5 5 10 21May27_CtYJ10.csv
#> 6 6 1 21May27_HrOW07.csv
#> 7 7 2 21May27_HrOW07.csv
#> 8 8 3 21May27_HrOW07.csv
#> 9 9 4 21May27_HrOW07.csv
#> 10 10 5 21May27_HrOW07.csv
如果您更喜欢使用数据框列表并在每个数据框中添加一列,请使用 imap()
:
imap(rawFiles_List,
~ read_csv(.x, col_types = "dd") %>%
add_column(source_file = .y))
#> $`21May27_CtYJ10.csv`
#> # A tibble: 5 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 1 6 21May27_CtYJ10.csv
#> 2 2 7 21May27_CtYJ10.csv
#> 3 3 8 21May27_CtYJ10.csv
#> 4 4 9 21May27_CtYJ10.csv
#> 5 5 10 21May27_CtYJ10.csv
#>
#> $`21May27_HrOW07.csv`
#> # A tibble: 5 x 3
#> Time Point source_file
#> <dbl> <dbl> <chr>
#> 1 6 1 21May27_HrOW07.csv
#> 2 7 2 21May27_HrOW07.csv
#> 3 8 3 21May27_HrOW07.csv
#> 4 9 4 21May27_HrOW07.csv
#> 5 10 5 21May27_HrOW07.csv
当然,如果您在 运行 map
命令之前操作文件列表的名称,您可以确保在列中插入正确的值:
rawFiles_List <- list.files(pattern = "^21May27") %>%
set_names(str_sub(., 21L, 26L))
关于储蓄,我建议你使用iwalk()
。我认为你的 for 循环没有做你想做的事(你在每次传递时都重新初始化 filename
,删除它以前的内容,可能不是你想要的)。