如果数据库中已存在值,则 R 跳到下一条记录
R Skip to Next Record if Value Already Exists in Database
我正在用头撞墙试图解决这个问题...我有一个脚本,如果 game_id 已经存在于 table 我正在写信给我想跳到循环中的下一条记录
#Connect to db
saber <- odbcConnect("sabermetrics")
#query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats")
#build variables to compare db field and game_id in current url
game_id_check <- sqlQuery(saber, check_query)
curr_gameid <- substr(thisboxscoreURL, 66, 95)
#if they match sktip to next record
if(game_id_check == curr_gameid) next
我试过矩阵化数据、列出、取消列出、组合以将数据获取到 "match",但到处都惨遭失败!
我可能会将 url 中的 game_id 放在查询字符串的 where 子句中,如果计数大于 0,则也跳到下一条记录。我不确定哪个更好,可能是后者,因为对于循环中的每条记录,我最多只 return 一个值。
类似
#Connect to db
saber <- odbcConnect("sabermetrics")
#query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats where game_id = url_game_id")
if(count(sqlquery(saber, check_query)))>0 next
示例数据
away home inning away_team_code home_team_code game_id
0 0 1 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 2 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 3 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 4 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 1 5 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 6 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 7 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 8 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 x 9 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
这是输出,但不确定它是否有用。
dput(sqlQuery(saber, "SELECT TOP 20 * FROM allbats"))
structure(list(away = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 5L, 0L, 1L, 0L, 0L, 0L), home = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("0", "1", "2", "x"), class = "factor"),
inning = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L), away_team_code = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("hou", "mia", "sdn"), class = "factor"),
home_team_code = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("lan",
"sea", "sfn"), class = "factor"), game_id = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("gid_2016_04_26_houmlb_seamlb_1",
"gid_2016_04_26_miamlb_lanmlb_1", "gid_2016_04_26_sdnmlb_sfnmlb_1"
), class = "factor")), .Names = c("away", "home", "inning",
"away_team_code", "home_team_code", "game_id"), row.names = c(NA,
20L), class = "data.frame")
如有任何帮助,我们将不胜感激!!谢谢!
我找到了使用 sqldf 包的方法
saber <- odbcConnect("sabermetrics")
existingGames <- paste0("select distinct game_id from linescore")
newGames <- data.frame(gameids)
existingGames <- sqlQuery(saber, existingGames)
require(sqldf)
# Get all new games that do not exist in existing games (linescore table)
ngNotIneg <- sqldf('SELECT distinct * FROM newGames EXCEPT SELECT distinct * FROM existingGames')
odbcClose(saber)
我正在用头撞墙试图解决这个问题...我有一个脚本,如果 game_id 已经存在于 table 我正在写信给我想跳到循环中的下一条记录
#Connect to db
saber <- odbcConnect("sabermetrics")
#query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats")
#build variables to compare db field and game_id in current url
game_id_check <- sqlQuery(saber, check_query)
curr_gameid <- substr(thisboxscoreURL, 66, 95)
#if they match sktip to next record
if(game_id_check == curr_gameid) next
我试过矩阵化数据、列出、取消列出、组合以将数据获取到 "match",但到处都惨遭失败!
我可能会将 url 中的 game_id 放在查询字符串的 where 子句中,如果计数大于 0,则也跳到下一条记录。我不确定哪个更好,可能是后者,因为对于循环中的每条记录,我最多只 return 一个值。
类似
#Connect to db
saber <- odbcConnect("sabermetrics")
#query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats where game_id = url_game_id")
if(count(sqlquery(saber, check_query)))>0 next
示例数据
away home inning away_team_code home_team_code game_id
0 0 1 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 2 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 3 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 4 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 1 5 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 6 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 7 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 0 8 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
0 x 9 sdn sfn gid_2016_04_26_sdnmlb_sfnmlb_1
这是输出,但不确定它是否有用。
dput(sqlQuery(saber, "SELECT TOP 20 * FROM allbats"))
structure(list(away = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 5L, 0L, 1L, 0L, 0L, 0L), home = structure(c(1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("0", "1", "2", "x"), class = "factor"),
inning = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L), away_team_code = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("hou", "mia", "sdn"), class = "factor"),
home_team_code = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("lan",
"sea", "sfn"), class = "factor"), game_id = structure(c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("gid_2016_04_26_houmlb_seamlb_1",
"gid_2016_04_26_miamlb_lanmlb_1", "gid_2016_04_26_sdnmlb_sfnmlb_1"
), class = "factor")), .Names = c("away", "home", "inning",
"away_team_code", "home_team_code", "game_id"), row.names = c(NA,
20L), class = "data.frame")
如有任何帮助,我们将不胜感激!!谢谢!
我找到了使用 sqldf 包的方法
saber <- odbcConnect("sabermetrics")
existingGames <- paste0("select distinct game_id from linescore")
newGames <- data.frame(gameids)
existingGames <- sqlQuery(saber, existingGames)
require(sqldf)
# Get all new games that do not exist in existing games (linescore table)
ngNotIneg <- sqldf('SELECT distinct * FROM newGames EXCEPT SELECT distinct * FROM existingGames')
odbcClose(saber)