模型介绍
博弈树是描述序贯博弈的数学工具,它将博弈过程表示为树形结构:
- 节点:表示博弈状态;
- 边:表示玩家的行动;
- 叶子节点:表示博弈结束状态,包含收益值。
在两人零和博弈中,博弈树通常包含:
- MAX节点:最大化玩家决策点;
- MIN节点:最小化玩家决策点;
- 终端节点:游戏结束状态。
历史背景
博弈树的概念最早出现在 \(20\) 世纪 \(40\) 年代,与博弈论和计算机科学的发展密切相关:
- \(1944\) 年:冯·诺依曼和摩根斯坦在《博弈论与经济行为》中奠定了理论基础;
- \(1950\) 年:香农提出计算机下棋的基本思路;
- \(1956\) 年:约翰·麦卡锡提出Alpha-Beta剪枝算法;
- \(1997\) 年:深蓝击败国际象棋世界冠军,展示了博弈树搜索的威力。
核心理论与证明
博弈树的基本性质
- 定义:博弈树是一个有根树 \(T=(V,E)\),其中:
- 每个内部节点对应一个决策点;
- 每条边对应一个可能的行动;
- 每个叶子节点对应一个收益向量。
- 定理 \(1\):任何有限完美信息博弈都可以用博弈树表示,证明:
- 博弈状态有限,可枚举所有可能状态;
- 每个状态通过有限行动转移到其他状态;
- 可构造树结构,根节点为初始状态;
- 叶子节点为终止状态,包含收益值。
Minimax 算法
- 算法思想:
- MAX 玩家选择使收益最大化的行动;
- MIN 玩家选择使收益最小化的行动;
- 通过递归计算确定最优策略。
- 数学表达:
Minimax(s) = if s是终端状态: return 效用值(s)if s是MAX节点: max(Minimax(子节点))if s是MIN节点: min(Minimax(子节点))
- 正确性证明:
- 引理:在两人零和博弈中,Minimax 值等于博弈的值,证明:
- 假设两个玩家都采用最优策略;
- 在 MAX 节点,MAX 会选择使最小值最大化的行动;
- 在 MIN 节点,MIN 会选择使最大值最小化的行动;
- 通过数学归纳法,可证明该值即为博弈值。
博弈树的复杂度
- 定理 2:完全博弈树的节点数随游戏深度指数增长,证明,设分支因子为 \(b\),深度为 \(d\):
- 叶子节点数:\(b^d\)
- 总节点数:\(1+b+b^2+\cdots+b^d=\frac{b^{d+1}-1}{b-1}\approx O(b^d)\)
- 例如国际象棋:
- \(b\approx35,d\approx100\)
- 节点数 \(\approx35^{100}\approx10^{154}\)(远大于宇宙原子数)
代码实现
基础博弈树实现
#include <bits/stdc++.h>
#define int long longusing namespace std;// 博弈树节点
class GameTreeNode {public:int value; // 节点值(对于内部节点是计算值,叶子节点是实际值)bool is_max_node; // true: MAX节点, false: MIN节点bool is_terminal; // 是否为终端节点vector<shared_ptr<GameTreeNode>> children;GameTreeNode(int val, bool is_max, bool terminal = false): value(val), is_max_node(is_max), is_terminal(terminal) {}// 添加子节点void addChild(shared_ptr<GameTreeNode> child) {children.push_back(child);}// 显示节点信息void display(int depth = 0) {string indent(depth * 2, ' ');cout << indent << (is_max_node ? "MAX" : "MIN") << "节点";if (is_terminal) {cout << "[终端, 值=" << value << "]" << endl;} else {cout << "[值=" << value << "]" << endl;}for (auto& child : children) {child->display(depth + 1);}}
};// 博弈树构建器
class GameTreeBuilder {public:// 构建一个示例博弈树static shared_ptr<GameTreeNode> buildExampleTree() {/*构建如下博弈树:MAX/ | \MIN MIN MIN/|\ |\ |\3 5 1 2 9 0 8*/// 创建叶子节点(终端节点)auto leaf1 = make_shared<GameTreeNode>(3, false, true);auto leaf2 = make_shared<GameTreeNode>(5, false, true);auto leaf3 = make_shared<GameTreeNode>(1, false, true);auto leaf4 = make_shared<GameTreeNode>(2, false, true);auto leaf5 = make_shared<GameTreeNode>(9, false, true);auto leaf6 = make_shared<GameTreeNode>(0, false, true);auto leaf7 = make_shared<GameTreeNode>(8, false, true);// 创建MIN节点auto min1 = make_shared<GameTreeNode>(0, false);min1->addChild(leaf1);min1->addChild(leaf2);min1->addChild(leaf3);auto min2 = make_shared<GameTreeNode>(0, false);min2->addChild(leaf4);min2->addChild(leaf5);auto min3 = make_shared<GameTreeNode>(0, false);min3->addChild(leaf6);min3->addChild(leaf7);// 创建根节点(MAX节点)auto root = make_shared<GameTreeNode>(0, true);root->addChild(min1);root->addChild(min2);root->addChild(min3);return root;}
};// Minimax算法实现
class MinimaxSolver {public:// 计算博弈树的最优值int solve(shared_ptr<GameTreeNode> node) {if (node->is_terminal) {return node->value;}if (node->is_max_node) {int best_value = INT_MIN;for (auto& child : node->children) {int child_value = solve(child);best_value = max(best_value, child_value);}node->value = best_value;return best_value;} else {int best_value = INT_MAX;for (auto& child : node->children) {int child_value = solve(child);best_value = min(best_value, child_value);}node->value = best_value;return best_value;}}// 显示求解过程void solveWithTrace(shared_ptr<GameTreeNode> node, int depth = 0) {string indent(depth * 2, ' ');if (node->is_terminal) {cout << indent << "终端节点返回值: " << node->value << endl;return;}cout << indent << (node->is_max_node ? "MAX" : "MIN") << "节点开始计算" << endl;if (node->is_max_node) {int best_value = INT_MIN;for (auto& child : node->children) {solveWithTrace(child, depth + 1);best_value = max(best_value, child->value);cout << indent << "考虑子节点值: " << child->value << ", 当前最佳: " << best_value << endl;}node->value = best_value;} else {int best_value = INT_MAX;for (auto& child : node->children) {solveWithTrace(child, depth + 1);best_value = min(best_value, child->value);cout << indent << "考虑子节点值: " << child->value << ", 当前最佳: " << best_value << endl;}node->value = best_value;}cout << indent << (node->is_max_node ? "MAX" : "MIN") << "节点最终值: " << node->value << endl;}
};
Alpha-Beta 剪枝实现
// Alpha-Beta剪枝算法
class AlphaBetaSolver {public:int solve(shared_ptr<GameTreeNode> node, int alpha = INT_MIN, int beta = INT_MAX) {if (node->is_terminal) {return node->value;}if (node->is_max_node) {int value = INT_MIN;for (auto& child : node->children) {value = max(value, solve(child, alpha, beta));alpha = max(alpha, value);if (alpha >= beta) {break; // Beta剪枝}}node->value = value;return value;} else {int value = INT_MAX;for (auto& child : node->children) {value = min(value, solve(child, alpha, beta));beta = min(beta, value);if (beta <= alpha) {break; // Alpha剪枝}}node->value = value;return value;}}// 带详细跟踪的Alpha-Beta搜索int solveWithTrace(shared_ptr<GameTreeNode> node, int depth = 0, int alpha = INT_MIN, int beta = INT_MAX) {string indent(depth * 2, ' ');if (node->is_terminal) {cout << indent << "终端节点返回值: " << node->value << endl;return node->value;}cout << indent << (node->is_max_node ? "MAX" : "MIN") << "节点, alpha=" << alpha << ", beta=" << beta << endl;if (node->is_max_node) {int value = INT_MIN;for (int i = 0; i < node->children.size(); i++) {auto& child = node->children[i];cout << indent << "探索子节点 " << i << endl;int child_value = solveWithTrace(child, depth + 1, alpha, beta);value = max(value, child_value);alpha = max(alpha, value);cout << indent << "子节点" << i << "返回值: " << child_value << ", 当前值: " << value << ", alpha: " << alpha << endl;if (alpha >= beta) {cout << indent << "Beta剪枝! alpha=" << alpha << " >= beta=" << beta << endl;break;}}node->value = value;cout << indent << "MAX节点返回值: " << value << endl;return value;} else {int value = INT_MAX;for (int i = 0; i < node->children.size(); i++) {auto& child = node->children[i];cout << indent << "探索子节点 " << i << endl;int child_value = solveWithTrace(child, depth + 1, alpha, beta);value = min(value, child_value);beta = min(beta, value);cout << indent << "子节点" << i << "返回值: " << child_value << ", 当前值: " << value << ", beta: " << beta << endl;if (beta <= alpha) {cout << indent << "Alpha剪枝! beta=" << beta << " <= alpha=" << alpha << endl;break;}}node->value = value;cout << indent << "MIN节点返回值: " << value << endl;return value;}}
};
变种题目与解法
变种 1:多玩家博弈树
- 问题:扩展到 \(3\) 个或更多玩家的博弈
- 解法:使用向量表示多个玩家的收益
class MultiPlayerGameNode {public:array<int, 3> payoffs; // 三个玩家的收益int current_player; // 当前行动玩家 (0,1,2)bool is_terminal;vector<shared_ptr<MultiPlayerGameNode>> children;MultiPlayerGameNode(array<int, 3> p, int player, bool terminal = false): payoffs(p), current_player(player), is_terminal(terminal) {}
};class MultiPlayerSolver {public:// 多玩家Minimax(假设每个玩家最大化自己的收益)array<int, 3> solve(shared_ptr<MultiPlayerGameNode> node) {if (node->is_terminal) {return node->payoffs;}array<int, 3> best_payoffs;bool first_child = true;for (auto& child : node->children) {array<int, 3> child_payoffs = solve(child);if (first_child) {best_payoffs = child_payoffs;first_child = false;} else {// 当前玩家选择对自己最有利的行动if (child_payoffs[node->current_player] > best_payoffs[node->current_player]) {best_payoffs = child_payoffs;}}}return best_payoffs;}
};
变种 2:随机博弈树
- 问题:包含概率事件的博弈(如掷骰子)
- 解法:添加机会节点,计算期望值
class ChanceGameNode {public:enum NodeType { MAX_NODE,MIN_NODE,CHANCE_NODE,TERMINAL };NodeType type;int value; // 终端节点的值vector<pair<shared_ptr<ChanceGameNode>, double>> children; // 子节点及其概率ChanceGameNode(NodeType t, int val = 0) : type(t), value(val) {}
};class ExpectiminimaxSolver {public:double solve(shared_ptr<ChanceGameNode> node) {switch (node->type) {case ChanceGameNode::TERMINAL:return node->value;case ChanceGameNode::MAX_NODE: {double best_value = -INFINITY;for (auto& [child, prob] : node->children) {best_value = max(best_value, solve(child));}return best_value;}case ChanceGameNode::MIN_NODE: {double best_value = INFINITY;for (auto& [child, prob] : node->children) {best_value = min(best_value, solve(child));}return best_value;}case ChanceGameNode::CHANCE_NODE: {double expected_value = 0.0;for (auto& [child, prob] : node->children) {expected_value += prob * solve(child);}return expected_value;}}return 0.0;}
};
变种 3:不完全信息博弈树
- 问题:玩家无法观察到所有信息
- 解法:使用信息集表示相同信息的决策点
class ImperfectInfoGameNode {public:string information_set; // 信息集标识符bool is_terminal;int value;vector<shared_ptr<ImperfectInfoGameNode>> children;ImperfectInfoGameNode(string info_set, bool terminal = false, int val = 0): information_set(info_set), is_terminal(terminal), value(val) {}
};class ImperfectInfoSolver {private:unordered_map<string, double> strategy; // 信息集到策略的映射public:// 使用反事实遗憾最小化等算法求解void solve(shared_ptr<ImperfectInfoGameNode> node,double reach_probability = 1.0) {if (node->is_terminal) {return;}// 简化实现:实际需要使用CFR等算法// 这里只是展示框架for (auto& child : node->children) {solve(child, reach_probability);}}
};
变种 4:实时决策博弈树
- 问题:在有限时间内做出决策
- 解法:迭代加深搜索
class RealTimeSolver {private:chrono::milliseconds time_limit;public:RealTimeSolver(int ms_limit) : time_limit(ms_limit) {}int iterativeDeepening(shared_ptr<GameTreeNode> root) {auto start_time = chrono::steady_clock::now();int depth = 1;int best_value = 0;MinimaxSolver solver;while (true) {auto current_time = chrono::steady_clock::now();auto elapsed = chrono::duration_cast<chrono::milliseconds>(current_time - start_time);if (elapsed > time_limit * 0.9) { // 留10%安全边际break;}// 使用超时保护auto future = async(launch::async, [&]() {return solver.solve(root);});if (future.wait_for(time_limit - elapsed) == future_status::ready) {best_value = future.get();} else {break; // 超时}depth++;}cout << "搜索深度: " << depth - 1 << endl;return best_value;}
};
总结
博弈树是博弈论和人工智能的核心概念:
- 理论基础:为序贯决策提供数学模型
- 算法框架:Minimax 和 Alpha-Beta 成为游戏 AI 的基础
- 扩展性:可处理多玩家、随机事件、不完全信息等复杂情况
- 实用性:在象棋、围棋、扑克等游戏中取得巨大成功