minimax算法的实现哪里出错了?
Where's error in implementation of minimax algorithm?
Tic-Tac-Toe 游戏的实现存在一个小问题。对于以下组合:
['x', 'o', 'e',
'o', ' e', 'e',
'e', ' e', 'e']
最好的选择是
['x', 'o', 'e',
'o', ' x', 'e',
'e', ' e', 'e']
但它 returns 因为我认为最接近的合适的:
['x', 'o', 'x',
'o', ' e', 'e',
'e', ' e', 'e']
在这种情况下,AI 输了。
这是代码:
var board = ['x', 'o', 'e', 'o', 'e', 'e', 'e', 'e', 'e'];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';
game = {
over: function(board) {
for (var i = 0; i < board.length; i += 3) {
if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
return board[i] !== 'e' ? board[i] : false;
}
}
for (var j = 0; j < 3; j++) {
if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
return board[j] !== 'e' ? board[j] : false;
}
}
if ((board[4] === board[0] && board[4] === board[8]) ||
(board[4] === board[2] && board[4] === board[6])) {
return board[4] !== 'e' ? board[4] : false;
}
return ( board.every(function(element) {
return element !== 'e';
}))
},
winner: function(board) {
return game.over(board);
},
possible_moves: function(board, sign) {
var testBoard = [],
nextBoard;
for (var i = 0; i < board.length; i++) {
nextBoard = board.slice();
if (nextBoard[i] === 'e') {
nextBoard[i] = sign;
testBoard.push(nextBoard);
}
}
return testBoard;
}
}
function moveScore(board) {
var result = game.winner(board);
if (result === signPlayer) {
return -100;
}
if (result === signAI) {
return +100;
}
return 0;
//Game is a draw
}
function max(board) {
if (game.over(board)) {
return board;
}
var newGame = [];
var bestMove = [];
var score;
var best_score = -Infinity;
var movesArray = game.possible_moves(board, signAI);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
score = moveScore(min(newGame));
if (score > best_score) {
best_score = score;
bestMove = newGame;
}
}
return bestMove;
}
function min(board) {
if (game.over(board)) {
return board;
}
var newGame = [];
var worstMove = [];
var score;
var worst_score = +Infinity;
var movesArray = game.possible_moves(board, signPlayer);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
score = moveScore(max(newGame));
if (score < worst_score) {
worst_score = score;
worstMove = newGame;
}
}
return worstMove;
}
max(board);
存在以下问题:
over
方法对某些板给出了错误的输出,例如这个板:
['e', 'e', 'e', 'o', 'o', 'o', 'x', 'x', 'e']
它实际上会在找到前三个元素中的三个 e
值和 return false
之后停止查找,即它没有看到第二行的胜利o
。要修复,请更改此行:
return board[i] !== 'e' ? board[i] : false;
至:
if (board[i] !== 'e') return board[i];
这将使该函数在连续找到三个 e
时继续进行其他检查。其他循环中也需要类似的修复(最后一个循环除外)。
虽然minimax算法成功访问了搜索树中的节点,但它并没有将找到的叶子分数(0、-100或100)带回搜索树中。相反,您只需查看静态棋盘配置即可重新计算每个位置的分数,而忽略您可以从递归调用中获得的 best/worst 分数。为了解决这个问题,让 min 和 max 函数不仅 return 最好的着法,还有与之相关的分数。所以替换这个:
return bestMove;
与:
return [best_score, bestMove];
然后你从递归调用中获取分数,如果你替换这个:
score = moveScore(min(newGame));
与:
score = min(newGame)[0];
你需要对游戏结束的情况进行类似的修改。替换为:
if (game.over(board)) {
return board;
}
与:
if (game.over(board)) {
return [moveScore(board), []];
}
请注意,是调用moveScore的唯一正确时机。数组的第二个元素应该是最好的着法,但由于没有着法,最好只使用一个空数组。
这是一个小问题:您实际上并没有使用从 max 的主要调用中获得的最佳移动。通过上述更改,您可以获得最佳着法 和 它在主调用中的分数:
[score, nextBoard] = max(board);
这是您更正后的代码,末尾有一些附加代码,允许通过单击 3x3 网格来玩游戏。为此,我将代码 e
更改为 space,因为它在印制板上看起来更好:
var board = ['x', 'o', ' ', 'o', ' ', ' ', ' ', ' ', ' '];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';
var game = {
over: function(board) {
for (var i = 0; i < board.length; i += 3) {
if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
//return board[i] !== ' ' ? board[i] : false;
if (board[i] !== ' ') return board[i];
}
}
for (var j = 0; j < 3; j++) {
if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
//return board[j] !== ' ' ? board[j] : false;
if (board[j] !== ' ') return board[j];
}
}
if ((board[4] === board[0] && board[4] === board[8]) ||
(board[4] === board[2] && board[4] === board[6])) {
//return board[4] !== ' ' ? board[4] : false;
if (board[4] !== ' ') return board[4];
}
return ( board.every(function(element) {
return element !== ' ';
}))
},
winner: function(board) {
return game.over(board);
},
possible_moves: function(board, sign) {
var testBoard = [],
nextBoard;
for (var i = 0; i < board.length; i++) {
nextBoard = board.slice();
if (nextBoard[i] === ' ') {
nextBoard[i] = sign;
testBoard.push(nextBoard);
}
}
return testBoard;
}
}
function moveScore(board) {
var result = game.winner(board);
if (result === signPlayer) {
return -100;
}
if (result === signAI) {
return +100;
}
return 0;
//Game is a draw
}
function max(board) {
//if (game.over(board)) {
// return board;
//}
if (game.over(board)) {
return [moveScore(board), []];
}
var newGame = [];
var bestMove = [];
var score;
var best_score = -Infinity;
var movesArray = game.possible_moves(board, signAI);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
//score = moveScore(min(newGame));
score = min(newGame)[0];
if (score > best_score) {
best_score = score;
bestMove = newGame;
}
}
//return bestMove;
return [best_score, bestMove];
}
function min(board) {
//if (game.over(board)) {
// return board;
//}
if (game.over(board)) {
return [moveScore(board), []];
}
var newGame = [];
var worstMove = [];
var score;
var worst_score = +Infinity;
var movesArray = game.possible_moves(board, signPlayer);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
//score = moveScore(max(newGame));
score = max(newGame)[0];
if (score < worst_score) {
worst_score = score;
worstMove = newGame;
}
}
//return worstMove;
return [worst_score, worstMove];
}
// Extra code for adding a simple GUI
var board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
var score = null;
var tds = Array.from(document.querySelectorAll('td'));
var table = document.querySelector('table');
var span = document.querySelector('span');
function display(board, score) {
board.forEach( (v, i) => tds[i].textContent = v );
span.textContent = score;
}
display(board);
table.onclick = function (e) {
var i = tds.indexOf(e.target);
if (i == -1 || board[i] !== ' ' || game.over(board)) return;
board[i] = signPlayer;
display(board);
[score, board] = max(board, 1);
display(board, score);
}
td { border: 1px solid; width: 20px; text-align: center; cursor: hand }
tr { height: 25px; v-align: middle }
<table>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
</table>
<div>
Score: <span></span>
</div>
最后的笔记
我刚刚进行了更正以使其工作,但请注意有几种方法可以提高效率。这可以通过使用 alpha-beta 搜索、跟踪已评估棋盘的分数、同时通过平移(转动、镜像)映射相似棋盘以及改变棋盘而不是每次下棋时创建新棋盘来实现。
Tic-Tac-Toe 游戏的实现存在一个小问题。对于以下组合:
['x', 'o', 'e',
'o', ' e', 'e',
'e', ' e', 'e']
最好的选择是
['x', 'o', 'e',
'o', ' x', 'e',
'e', ' e', 'e']
但它 returns 因为我认为最接近的合适的:
['x', 'o', 'x',
'o', ' e', 'e',
'e', ' e', 'e']
在这种情况下,AI 输了。 这是代码:
var board = ['x', 'o', 'e', 'o', 'e', 'e', 'e', 'e', 'e'];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';
game = {
over: function(board) {
for (var i = 0; i < board.length; i += 3) {
if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
return board[i] !== 'e' ? board[i] : false;
}
}
for (var j = 0; j < 3; j++) {
if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
return board[j] !== 'e' ? board[j] : false;
}
}
if ((board[4] === board[0] && board[4] === board[8]) ||
(board[4] === board[2] && board[4] === board[6])) {
return board[4] !== 'e' ? board[4] : false;
}
return ( board.every(function(element) {
return element !== 'e';
}))
},
winner: function(board) {
return game.over(board);
},
possible_moves: function(board, sign) {
var testBoard = [],
nextBoard;
for (var i = 0; i < board.length; i++) {
nextBoard = board.slice();
if (nextBoard[i] === 'e') {
nextBoard[i] = sign;
testBoard.push(nextBoard);
}
}
return testBoard;
}
}
function moveScore(board) {
var result = game.winner(board);
if (result === signPlayer) {
return -100;
}
if (result === signAI) {
return +100;
}
return 0;
//Game is a draw
}
function max(board) {
if (game.over(board)) {
return board;
}
var newGame = [];
var bestMove = [];
var score;
var best_score = -Infinity;
var movesArray = game.possible_moves(board, signAI);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
score = moveScore(min(newGame));
if (score > best_score) {
best_score = score;
bestMove = newGame;
}
}
return bestMove;
}
function min(board) {
if (game.over(board)) {
return board;
}
var newGame = [];
var worstMove = [];
var score;
var worst_score = +Infinity;
var movesArray = game.possible_moves(board, signPlayer);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
score = moveScore(max(newGame));
if (score < worst_score) {
worst_score = score;
worstMove = newGame;
}
}
return worstMove;
}
max(board);
存在以下问题:
over
方法对某些板给出了错误的输出,例如这个板:['e', 'e', 'e', 'o', 'o', 'o', 'x', 'x', 'e']
它实际上会在找到前三个元素中的三个
e
值和 returnfalse
之后停止查找,即它没有看到第二行的胜利o
。要修复,请更改此行:return board[i] !== 'e' ? board[i] : false;
至:
if (board[i] !== 'e') return board[i];
这将使该函数在连续找到三个
e
时继续进行其他检查。其他循环中也需要类似的修复(最后一个循环除外)。虽然minimax算法成功访问了搜索树中的节点,但它并没有将找到的叶子分数(0、-100或100)带回搜索树中。相反,您只需查看静态棋盘配置即可重新计算每个位置的分数,而忽略您可以从递归调用中获得的 best/worst 分数。为了解决这个问题,让 min 和 max 函数不仅 return 最好的着法,还有与之相关的分数。所以替换这个:
return bestMove;
与:
return [best_score, bestMove];
然后你从递归调用中获取分数,如果你替换这个:
score = moveScore(min(newGame));
与:
score = min(newGame)[0];
你需要对游戏结束的情况进行类似的修改。替换为:
if (game.over(board)) { return board; }
与:
if (game.over(board)) { return [moveScore(board), []]; }
请注意,是调用moveScore的唯一正确时机。数组的第二个元素应该是最好的着法,但由于没有着法,最好只使用一个空数组。
这是一个小问题:您实际上并没有使用从 max 的主要调用中获得的最佳移动。通过上述更改,您可以获得最佳着法 和 它在主调用中的分数:
[score, nextBoard] = max(board);
这是您更正后的代码,末尾有一些附加代码,允许通过单击 3x3 网格来玩游戏。为此,我将代码 e
更改为 space,因为它在印制板上看起来更好:
var board = ['x', 'o', ' ', 'o', ' ', ' ', ' ', ' ', ' '];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';
var game = {
over: function(board) {
for (var i = 0; i < board.length; i += 3) {
if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
//return board[i] !== ' ' ? board[i] : false;
if (board[i] !== ' ') return board[i];
}
}
for (var j = 0; j < 3; j++) {
if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
//return board[j] !== ' ' ? board[j] : false;
if (board[j] !== ' ') return board[j];
}
}
if ((board[4] === board[0] && board[4] === board[8]) ||
(board[4] === board[2] && board[4] === board[6])) {
//return board[4] !== ' ' ? board[4] : false;
if (board[4] !== ' ') return board[4];
}
return ( board.every(function(element) {
return element !== ' ';
}))
},
winner: function(board) {
return game.over(board);
},
possible_moves: function(board, sign) {
var testBoard = [],
nextBoard;
for (var i = 0; i < board.length; i++) {
nextBoard = board.slice();
if (nextBoard[i] === ' ') {
nextBoard[i] = sign;
testBoard.push(nextBoard);
}
}
return testBoard;
}
}
function moveScore(board) {
var result = game.winner(board);
if (result === signPlayer) {
return -100;
}
if (result === signAI) {
return +100;
}
return 0;
//Game is a draw
}
function max(board) {
//if (game.over(board)) {
// return board;
//}
if (game.over(board)) {
return [moveScore(board), []];
}
var newGame = [];
var bestMove = [];
var score;
var best_score = -Infinity;
var movesArray = game.possible_moves(board, signAI);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
//score = moveScore(min(newGame));
score = min(newGame)[0];
if (score > best_score) {
best_score = score;
bestMove = newGame;
}
}
//return bestMove;
return [best_score, bestMove];
}
function min(board) {
//if (game.over(board)) {
// return board;
//}
if (game.over(board)) {
return [moveScore(board), []];
}
var newGame = [];
var worstMove = [];
var score;
var worst_score = +Infinity;
var movesArray = game.possible_moves(board, signPlayer);
for (var i = 0; i < movesArray.length; i++) {
newGame = movesArray[i].slice();
//score = moveScore(max(newGame));
score = max(newGame)[0];
if (score < worst_score) {
worst_score = score;
worstMove = newGame;
}
}
//return worstMove;
return [worst_score, worstMove];
}
// Extra code for adding a simple GUI
var board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
var score = null;
var tds = Array.from(document.querySelectorAll('td'));
var table = document.querySelector('table');
var span = document.querySelector('span');
function display(board, score) {
board.forEach( (v, i) => tds[i].textContent = v );
span.textContent = score;
}
display(board);
table.onclick = function (e) {
var i = tds.indexOf(e.target);
if (i == -1 || board[i] !== ' ' || game.over(board)) return;
board[i] = signPlayer;
display(board);
[score, board] = max(board, 1);
display(board, score);
}
td { border: 1px solid; width: 20px; text-align: center; cursor: hand }
tr { height: 25px; v-align: middle }
<table>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
</table>
<div>
Score: <span></span>
</div>
最后的笔记
我刚刚进行了更正以使其工作,但请注意有几种方法可以提高效率。这可以通过使用 alpha-beta 搜索、跟踪已评估棋盘的分数、同时通过平移(转动、镜像)映射相似棋盘以及改变棋盘而不是每次下棋时创建新棋盘来实现。