minimax算法的实现哪里出错了?

Where's error in implementation of minimax algorithm?

Tic-Tac-Toe 游戏的实现存在一个小问题。对于以下组合:

['x', 'o', 'e',  
 'o', ' e', 'e',  
 'e', ' e', 'e']  

最好的选择是

['x', 'o', 'e',  
 'o', ' x', 'e',  
 'e', ' e', 'e']  

但它 returns 因为我认为最接近的合适的:

['x', 'o', 'x',  
 'o', ' e', 'e',  
 'e', ' e', 'e']   

在这种情况下,AI 输了。 这是代码:

var board = ['x', 'o', 'e', 'o', 'e', 'e', 'e', 'e', 'e'];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';

game = {
    over: function(board) {
        for (var i = 0; i < board.length; i += 3) {
            if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
                return board[i] !== 'e' ? board[i] : false;
            }
        }
        for (var j = 0; j < 3; j++) {
            if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
                return board[j] !== 'e' ? board[j] : false;
            }
        }
        if ((board[4] === board[0] && board[4] === board[8]) || 
        (board[4] === board[2] && board[4] === board[6])) {
            return board[4] !== 'e' ? board[4] : false;
        }
        return ( board.every(function(element) {
            return element !== 'e';
        })) 
    },
    winner: function(board) {
        return game.over(board);
    },
    possible_moves: function(board, sign) {
        var testBoard = [], 
        nextBoard;
        for (var i = 0; i < board.length; i++) {
            nextBoard = board.slice();
            if (nextBoard[i] === 'e') {
                nextBoard[i] = sign;
                testBoard.push(nextBoard);
            }
        }
        return testBoard;
    }
}

function moveScore(board) {
    var result = game.winner(board);

    if (result === signPlayer) {
        return -100;
    }
    if (result === signAI) {
        return +100;
    }
    return 0;
    //Game is a draw
}

function max(board) {

    if (game.over(board)) {
        return board;
    }
    var newGame = [];
    var bestMove = [];
    var score;
    var best_score = -Infinity;
    var movesArray = game.possible_moves(board, signAI);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        score = moveScore(min(newGame));
        if (score > best_score) {
            best_score = score;
            bestMove = newGame;
        }
    }
    return bestMove;
}

function min(board) {

    if (game.over(board)) {
        return board;
    }
    var newGame = [];
    var worstMove = [];
    var score;
    var worst_score = +Infinity;
    var movesArray = game.possible_moves(board, signPlayer);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        score = moveScore(max(newGame));
        if (score < worst_score) {
            worst_score = score;
            worstMove = newGame;
        }
    }
    return worstMove;
}

max(board);

存在以下问题:

  • over 方法对某些板给出了错误的输出,例如这个板:

    ['e', 'e', 'e', 'o', 'o', 'o', 'x', 'x', 'e']
    

    它实际上会在找到前三个元素中的三个 e 值和 return false 之后停止查找,即它没有看到第二行的胜利o。要修复,请更改此行:

    return board[i] !== 'e' ? board[i] : false;
    

    至:

    if (board[i] !== 'e') return board[i];
    

    这将使该函数在连续找到三个 e 时继续进行其他检查。其他循环中也需要类似的修复(最后一个循环除外)。

  • 虽然minimax算法成功访问了搜索树中的节点,但它并没有将找到的叶子分数(0、-100或100)带回搜索树中。相反,您只需查看静态棋盘配置即可重新计算每个位置的分数,而忽略您可以从递归调用中获得的 best/worst 分数。为了解决这个问题,让 minmax 函数不仅 return 最好的着法,还有与之相关的分数。所以替换这个:

    return bestMove;
    

    与:

    return [best_score, bestMove];
    

    然后你从递归调用中获取分数,如果你替换这个:

    score = moveScore(min(newGame));
    

    与:

    score = min(newGame)[0];
    

    你需要对游戏结束的情况进行类似的修改。替换为:

    if (game.over(board)) {
        return board;
    }
    

    与:

    if (game.over(board)) {
        return [moveScore(board), []];
    }
    

    请注意,调用moveScore的唯一正确时机。数组的第二个元素应该是最好的着法,但由于没有着法,最好只使用一个空数组。

  • 这是一个小问题:您实际上并没有使用从 max 的主要调用中获得的最佳移动。通过上述更改,您可以获得最佳着法 它在主调用中的分数:

    [score, nextBoard] = max(board);
    

这是您更正后的代码,末尾有一些附加代码,允许通过单击 3x3 网格来玩游戏。为此,我将代码 e 更改为 space,因为它在印制板上看起来更好:

var board = ['x', 'o', ' ', 'o', ' ', ' ', ' ', ' ', ' '];
var signPlayer = 'o';
var signAI = (signPlayer === 'x') ? 'o' : 'x';

var game = {
    over: function(board) {
        for (var i = 0; i < board.length; i += 3) {
            if (board[i] === board[i + 1] && board[i + 1] === board[i + 2]) {
                //return board[i] !== ' ' ? board[i] : false;
                if (board[i] !== ' ') return board[i];
            }
        }
        for (var j = 0; j < 3; j++) {
            if (board[j] === board[j + 3] && board[j + 3] === board[j + 6]) {
                //return board[j] !== ' ' ? board[j] : false;
                if (board[j] !== ' ') return board[j];
            }
        }
        if ((board[4] === board[0] && board[4] === board[8]) || 
                (board[4] === board[2] && board[4] === board[6])) {
            //return board[4] !== ' ' ? board[4] : false;
            if (board[4] !== ' ') return board[4];
        }
        return ( board.every(function(element) {
            return element !== ' ';
        })) 
    },
    winner: function(board) {
        return game.over(board);
    },
    possible_moves: function(board, sign) {
        var testBoard = [], 
        nextBoard;
        for (var i = 0; i < board.length; i++) {
            nextBoard = board.slice();
            if (nextBoard[i] === ' ') {
                nextBoard[i] = sign;
                testBoard.push(nextBoard);
            }
        }
        return testBoard;
    }
}

function moveScore(board) {
    var result = game.winner(board);

    if (result === signPlayer) {
        return -100;
    }
    if (result === signAI) {
        return +100;
    }
    return 0;
    //Game is a draw
}

function max(board) {
    //if (game.over(board)) {
    //    return board;
    //}
    if (game.over(board)) {
        return [moveScore(board), []];
    }
    var newGame = [];
    var bestMove = [];
    var score;
    var best_score = -Infinity;
    var movesArray = game.possible_moves(board, signAI);
    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        //score = moveScore(min(newGame));
        score = min(newGame)[0];
        if (score > best_score) {
            best_score = score;
            bestMove = newGame;
        }
    }
    //return bestMove;
    return [best_score, bestMove];
}

function min(board) {
    //if (game.over(board)) {
    //    return board;
    //}
    if (game.over(board)) {
        return [moveScore(board), []];
    }
    var newGame = [];
    var worstMove = [];
    var score;
    var worst_score = +Infinity;
    var movesArray = game.possible_moves(board, signPlayer);

    for (var i = 0; i < movesArray.length; i++) {
        newGame = movesArray[i].slice();
        //score = moveScore(max(newGame));
        score = max(newGame)[0];
        if (score < worst_score) {
            worst_score = score;
            worstMove = newGame;
        }
    }
    //return worstMove;
    return [worst_score, worstMove];
}

// Extra code for adding a simple GUI

var board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '];
var score = null;

var tds = Array.from(document.querySelectorAll('td'));
var table = document.querySelector('table');
var span = document.querySelector('span');

function display(board, score) {
    board.forEach( (v, i) => tds[i].textContent = v );
    span.textContent = score;
}
display(board);

table.onclick = function (e) {
    var i = tds.indexOf(e.target);
    if (i == -1 || board[i] !== ' ' || game.over(board)) return;
    board[i] = signPlayer;
    display(board);
    [score, board] = max(board, 1);
    display(board, score);
}
td { border: 1px solid; width: 20px; text-align: center; cursor: hand }
tr { height: 25px; v-align: middle }
<table>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
<tr><td></td><td></td><td></td></tr>
</table>
<div>
Score: <span></span>
</div>

最后的笔记

我刚刚进行了更正以使其工作,但请注意有几种方法可以提高效率。这可以通过使用 alpha-beta 搜索、跟踪已评估棋盘的分数、同时通过平移(转动、镜像)映射相似棋盘以及改变棋盘而不是每次下棋时创建新棋盘来实现。