具有基本功能的 R 中的 Monty Hall 游戏
Monty Hall game in R with base functions
只是为了好玩和训练 R,我试图证明 Monty Hall 游戏规则(在打开一扇门后改变你的选择会让你更有可能获胜),我制作了这个可重现的代码(每一步的解释是在代码中):
## First I set the seed
set.seed(4)
## Then I modelize the presence of the prize as a random variable between gates 1,2,3
randomgates <- ceiling(runif(10000, min = 0, max = 3))
## so do I with the random choice.
randomchoice <- ceiling(runif(10000, min = 0, max = 3))
## As the opening of a gate is dependent from the gate you chose (the gate you chose cannot be opened)
## I modelize the opening of the gate as a variable which cannot be equal to the choice.
options <- c(1:3)
randomopen <- rep(1,10000)
for (i in 1:length(randomgates)) {
realoptions <- options[options != randomchoice[i]]
randomopen[i] <- realoptions[ceiling(runif(1,min = 0, max = 2))]
}
##Just to make data more easy to handle, I make a dataset
dataset <- cbind(randomgates, randomchoice, randomopen)
## Then I creat a dataset which only keeps the realization of the games in which we carry on (
## the opened gate wasn't the one with the price within)
steptwo <- dataset[randomopen != randomgates,]
## The next step is just to check if the probability of carry on is 2/3, which indeed is
carryon <- randomopen != randomgates
sum(carryon)/length(randomgates)
## I format the dataset as a data frame
steptwo <- as.data.frame(steptwo)
## Now we check what happens if we hold our initial choice when game carries on
prizesholding <- steptwo$randomgates == steptwo$randomchoice
sum(prizesholding)
## creating a vector of changing option, dependant on the opened gate, in the dataset that
## keeps only the cases in which we carried on playing (the opened gate wasn't the one with the prize)
switchedchoice <- rep(1,length(steptwo$randomgates))
for (i in 1:length(steptwo$randomgates)) {
choice <- options[options != steptwo$randomchoice[i]]
switchedchoice[i] <- choice[ceiling(runif(1,min = 0, max = 2))]
}
## Now we check how many times you guess the prize gate when you switch your initial choice
prizesswitching <- steptwo$randomgates == switchedchoice
sum(prizesswitching)/length(steptwo$randomgates)
当我在游戏进行的情况下(开门与奖品不匹配)不改变我的初始选择时检查概率,我得到了我预期的(接近1/3的概率)中奖),参考如下指令:
carryon <- randomopen != randomgates
sum(carryon)/length(randomgates)
当我在更改选择后检查赢得奖品的概率时出现了我的问题(条件,显然是没有打开持有奖品的门),而不是像 Monty Hall 所说的那样得到 1/2,我得到1/4,指的是如下指令:
prizesswitching <- steptwo$randomgates == switchedchoice
sum(prizesswitching)/length(steptwo$randomgates)
我知道我在做坏事,因为 Monty Hall 已经证明了这一点,但我无法检测到缺陷。有人知道是什么吗?
如果您不知道什么是蒙提霍尔问题,您可以在维基百科上找到通俗易懂的信息:
编辑:正如@Dason 指出的那样,问题之一是我在更改初始选择时引入了某种随机性,这没有意义,因为只剩下一个选项。
另一个问题是我没有在 Monty Hall 知道奖品在哪里的假设下处理这个问题。我把我的代码从最初的改成了这个,问题就解决了:
# Prepare each variable for 10000 experiments
## First I set the seed
set.seed(4)
## Then I modelize the presence of the prize as a random variable between gates 1,2,3
randomgates <- ceiling(runif(10000, min = 0, max = 3))
## so do I with the random choice.
randomchoice <- ceiling(runif(10000, min = 0, max = 3))
## As the opening of a gate is dependent from the gate you chose (the gate you chose cannot be opened
##, neither the one with the prize does), I modelize the opening of the gate as a variable which cannot be equal to the choice.
options <- c(1:3)
randomopen <- rep(1,10000)
for (i in 1:length(randomgates)) {
randomopen[i] <- options[options != randomchoice[i] & options != randomgates[i]]
}
##Just to make data more easy to handle, I make a dataset
dataset <- cbind(randomgates, randomchoice, randomopen)
## I format the dataset as a data frame
steptwo <- as.data.frame(dataset)
## Now we check what happens if we hold our initial choice when game carries on
steptwo$prizesholding <- steptwo$randomgates == steptwo$randomchoice
with(steptwo, sum(prizesholding))
## creating a vector of changing option, dependant on the opened gate, in the dataset that
## keeps only the cases in which we carried on playing (the opened gate wasn't the one with the prize)
steptwo$switchedchoice <- rep(1,length(steptwo$randomgates))
for (i in 1:length(steptwo$randomgates)) {
steptwo$switchedchoice[i] <- options[options != steptwo$randomchoice[i] & options != steptwo$randomopen[i]]
}
## Now we check how many times you guess the prize gate when you switch your initial choice
steptwo$prizesswitching <- steptwo$randomgates == steptwo$switchedchoice
with(steptwo, sum(prizesswitching)/length(randomgates))
这似乎可以解决问题:
n_iter <- 10000
set.seed(4)
doors <- 1:3
prizes <- sample.int(n = 3, size = n_iter, replace = TRUE)
your_pick <- sample.int(n = 3, size = n_iter, replace = TRUE)
open_door <- rep(0, n_iter)
switched_door <- rep(0, n_iter)
for (i in 1:n_iter) {
remaining_choices <- setdiff(doors, c(your_pick[i], prizes[i]))
if (length(remaining_choices) > 1) {
open_door[i] <- sample(remaining_choices, size = 1)
} else {
open_door[i] <- remaining_choices
}
switched_door[i] <- setdiff(doors, c(your_pick[i], open_door[i]))
}
> mean(your_pick == prizes)
[1] 0.3305
> mean(switched_door == prizes)
[1] 0.6695
sample.int
和 sample
基本函数有助于简化一些事情。 remaining_choices
项包含游戏节目主持人可以打开的可能门,长度为 1 或 2,具体取决于您最初的选择。如果长度是 2,我们从那个向量中采样,如果它是 1,那扇门会自动打开。
每一轮,有一个prize_door和一个chosen_door。 Monty Hall 会打开一扇不是 prize_door 或 chosen_door 的门(set diff between 1:3 and the vector (prize_door, chosen_door), 随机选择如果 setdiff 是两个元素,则介于两者之间)。那么开关门就是没有选择或打开的门。
n <- 1e4
set.seed(2020)
df <-
data.frame(
prize_door = sample(1:3, n, replace = TRUE),
chosen_door = sample(1:3, n, replace = TRUE))
df$opened_door <-
mapply(function(x, y){
available <- setdiff(1:3, c(x, y))
available[sample(length(available), 1)]
}, df$prize_door, df$chosen_door)
df$switch_door <-
mapply(function(x, y) setdiff(1:3, c(x, y)),
df$chosen_door, df$opened_door)
with(df, mean(prize_door == chosen_door))
# [1] 0.3358
with(df, mean(prize_door == switch_door))
# [1] 0.6642
n 增加时的概率图
probs <-
data.frame(
chosen_p = with(df, cumsum(prize_door == chosen_door))/(1:n),
switch_p = with(df, cumsum(prize_door == switch_door))/(1:n))
plot(probs$switch_p, type = 'l', ylim = c(0, 1))
lines(probs$chosen_p, col = 'red')
abline(h = 1/3)
abline(h = 2/3)
只是为了好玩和训练 R,我试图证明 Monty Hall 游戏规则(在打开一扇门后改变你的选择会让你更有可能获胜),我制作了这个可重现的代码(每一步的解释是在代码中):
## First I set the seed
set.seed(4)
## Then I modelize the presence of the prize as a random variable between gates 1,2,3
randomgates <- ceiling(runif(10000, min = 0, max = 3))
## so do I with the random choice.
randomchoice <- ceiling(runif(10000, min = 0, max = 3))
## As the opening of a gate is dependent from the gate you chose (the gate you chose cannot be opened)
## I modelize the opening of the gate as a variable which cannot be equal to the choice.
options <- c(1:3)
randomopen <- rep(1,10000)
for (i in 1:length(randomgates)) {
realoptions <- options[options != randomchoice[i]]
randomopen[i] <- realoptions[ceiling(runif(1,min = 0, max = 2))]
}
##Just to make data more easy to handle, I make a dataset
dataset <- cbind(randomgates, randomchoice, randomopen)
## Then I creat a dataset which only keeps the realization of the games in which we carry on (
## the opened gate wasn't the one with the price within)
steptwo <- dataset[randomopen != randomgates,]
## The next step is just to check if the probability of carry on is 2/3, which indeed is
carryon <- randomopen != randomgates
sum(carryon)/length(randomgates)
## I format the dataset as a data frame
steptwo <- as.data.frame(steptwo)
## Now we check what happens if we hold our initial choice when game carries on
prizesholding <- steptwo$randomgates == steptwo$randomchoice
sum(prizesholding)
## creating a vector of changing option, dependant on the opened gate, in the dataset that
## keeps only the cases in which we carried on playing (the opened gate wasn't the one with the prize)
switchedchoice <- rep(1,length(steptwo$randomgates))
for (i in 1:length(steptwo$randomgates)) {
choice <- options[options != steptwo$randomchoice[i]]
switchedchoice[i] <- choice[ceiling(runif(1,min = 0, max = 2))]
}
## Now we check how many times you guess the prize gate when you switch your initial choice
prizesswitching <- steptwo$randomgates == switchedchoice
sum(prizesswitching)/length(steptwo$randomgates)
当我在游戏进行的情况下(开门与奖品不匹配)不改变我的初始选择时检查概率,我得到了我预期的(接近1/3的概率)中奖),参考如下指令:
carryon <- randomopen != randomgates
sum(carryon)/length(randomgates)
当我在更改选择后检查赢得奖品的概率时出现了我的问题(条件,显然是没有打开持有奖品的门),而不是像 Monty Hall 所说的那样得到 1/2,我得到1/4,指的是如下指令:
prizesswitching <- steptwo$randomgates == switchedchoice
sum(prizesswitching)/length(steptwo$randomgates)
我知道我在做坏事,因为 Monty Hall 已经证明了这一点,但我无法检测到缺陷。有人知道是什么吗?
如果您不知道什么是蒙提霍尔问题,您可以在维基百科上找到通俗易懂的信息:
编辑:正如@Dason 指出的那样,问题之一是我在更改初始选择时引入了某种随机性,这没有意义,因为只剩下一个选项。
另一个问题是我没有在 Monty Hall 知道奖品在哪里的假设下处理这个问题。我把我的代码从最初的改成了这个,问题就解决了:
# Prepare each variable for 10000 experiments
## First I set the seed
set.seed(4)
## Then I modelize the presence of the prize as a random variable between gates 1,2,3
randomgates <- ceiling(runif(10000, min = 0, max = 3))
## so do I with the random choice.
randomchoice <- ceiling(runif(10000, min = 0, max = 3))
## As the opening of a gate is dependent from the gate you chose (the gate you chose cannot be opened
##, neither the one with the prize does), I modelize the opening of the gate as a variable which cannot be equal to the choice.
options <- c(1:3)
randomopen <- rep(1,10000)
for (i in 1:length(randomgates)) {
randomopen[i] <- options[options != randomchoice[i] & options != randomgates[i]]
}
##Just to make data more easy to handle, I make a dataset
dataset <- cbind(randomgates, randomchoice, randomopen)
## I format the dataset as a data frame
steptwo <- as.data.frame(dataset)
## Now we check what happens if we hold our initial choice when game carries on
steptwo$prizesholding <- steptwo$randomgates == steptwo$randomchoice
with(steptwo, sum(prizesholding))
## creating a vector of changing option, dependant on the opened gate, in the dataset that
## keeps only the cases in which we carried on playing (the opened gate wasn't the one with the prize)
steptwo$switchedchoice <- rep(1,length(steptwo$randomgates))
for (i in 1:length(steptwo$randomgates)) {
steptwo$switchedchoice[i] <- options[options != steptwo$randomchoice[i] & options != steptwo$randomopen[i]]
}
## Now we check how many times you guess the prize gate when you switch your initial choice
steptwo$prizesswitching <- steptwo$randomgates == steptwo$switchedchoice
with(steptwo, sum(prizesswitching)/length(randomgates))
这似乎可以解决问题:
n_iter <- 10000
set.seed(4)
doors <- 1:3
prizes <- sample.int(n = 3, size = n_iter, replace = TRUE)
your_pick <- sample.int(n = 3, size = n_iter, replace = TRUE)
open_door <- rep(0, n_iter)
switched_door <- rep(0, n_iter)
for (i in 1:n_iter) {
remaining_choices <- setdiff(doors, c(your_pick[i], prizes[i]))
if (length(remaining_choices) > 1) {
open_door[i] <- sample(remaining_choices, size = 1)
} else {
open_door[i] <- remaining_choices
}
switched_door[i] <- setdiff(doors, c(your_pick[i], open_door[i]))
}
> mean(your_pick == prizes)
[1] 0.3305
> mean(switched_door == prizes)
[1] 0.6695
sample.int
和 sample
基本函数有助于简化一些事情。 remaining_choices
项包含游戏节目主持人可以打开的可能门,长度为 1 或 2,具体取决于您最初的选择。如果长度是 2,我们从那个向量中采样,如果它是 1,那扇门会自动打开。
每一轮,有一个prize_door和一个chosen_door。 Monty Hall 会打开一扇不是 prize_door 或 chosen_door 的门(set diff between 1:3 and the vector (prize_door, chosen_door), 随机选择如果 setdiff 是两个元素,则介于两者之间)。那么开关门就是没有选择或打开的门。
n <- 1e4
set.seed(2020)
df <-
data.frame(
prize_door = sample(1:3, n, replace = TRUE),
chosen_door = sample(1:3, n, replace = TRUE))
df$opened_door <-
mapply(function(x, y){
available <- setdiff(1:3, c(x, y))
available[sample(length(available), 1)]
}, df$prize_door, df$chosen_door)
df$switch_door <-
mapply(function(x, y) setdiff(1:3, c(x, y)),
df$chosen_door, df$opened_door)
with(df, mean(prize_door == chosen_door))
# [1] 0.3358
with(df, mean(prize_door == switch_door))
# [1] 0.6642
n 增加时的概率图
probs <-
data.frame(
chosen_p = with(df, cumsum(prize_door == chosen_door))/(1:n),
switch_p = with(df, cumsum(prize_door == switch_door))/(1:n))
plot(probs$switch_p, type = 'l', ylim = c(0, 1))
lines(probs$chosen_p, col = 'red')
abline(h = 1/3)
abline(h = 2/3)