模型介绍
树形图博弈是博弈论中描述序贯博弈的数学模型,它使用树结构来表示博弈过程:
- 节点:表示博弈状态或决策点;
- 边:表示玩家的行动或选择;
- 叶子节点:表示博弈结束状态,包含各玩家的收益;
- 信息集:表示玩家无法区分的状态集合。
树形图博弈是描述完美信息博弈的理想工具,能够清晰地展示博弈的时间结构和信息结构。
历史背景
树形图博弈的形式化模型起源于 \(20\) 世纪中叶:
- \(1944\) 年:冯·诺依曼和摩根斯坦在《博弈论与经济行为》中奠定了博弈论基础;
- \(1953\) 年:库恩提出扩展式博弈的概念,正式引入博弈树;
- \(1965\) 年:塞尔滕引入精炼纳什均衡概念,完善了序贯博弈分析;
- \(1982\) 年:克雷普斯和威尔逊提出序贯均衡概念。
核心理论与证明
扩展式博弈定义
一个扩展式博弈包含以下要素:
- 玩家集合 \(N=\{1,2,\cdots,n\}\);
- 历史集合 \(H\),包含空历史 \(\emptyset\);
- 玩家函数 \(P(h)\) 指定在历史 \(h\) 后行动的玩家;
- 收益函数 \(u_i(z)\) 给每个终端历史 \(z\) 分配玩家 \(i\) 的收益;
- 信息分割 \(I_i\),将玩家 \(i\) 的决策点划分为信息集。
博弈树的基本性质
- 定理 \(1\):任何有限扩展式博弈都可以用博弈树表示,证明:
- 博弈历史有限,可构造树结构;
- 根节点对应空历史 \(\emptyset\);
- 每个节点对应一个历史 \(h\in H\);
- 边对应行动 \(a\in A(h)\);
- 叶子节点对应终端历史 \(z\in Z\)。
子博弈完美均衡
- 定义:策略组合是子博弈完美均衡,如果它在每个子博弈中都构成纳什均衡;
- 定理 \(2\)(塞尔滕,\(1965\)):每个有限扩展式博弈都有子博弈完美均衡,证明思路:
- 使用逆向归纳法;
- 从博弈树的终端节点开始;
- 在每个决策点,玩家选择最优行动;
- 逐步回推到根节点;
- 得到的策略组合是子博弈完美的。
泽尔滕定理的详细证明
- 基础情况:单阶段博弈:只有一个决策点,显然是子博弈完美的;
- 归纳步骤:假设对于所有深度小于 \(d\) 的博弈,定理成立:
- 考虑深度为 \(d\) 的博弈;
- 移除根节点,得到深度小于 \(d\) 的子博弈;
- 根据归纳假设,子博弈有子博弈完美均衡;
- 在根节点,玩家选择对子博弈均衡最优的反应;
- 得到的策略在整個博弈中是子博弈完美的。
代码实现
基础博弈树实现
#include <bits/stdc++.h>
#define int long longusing namespace std;// 博弈树节点
class GameTreeNode {public:string id;int player; // -1: 机会节点, 0,1,2,...: 玩家编号bool is_terminal;vector<double> payoffs; // 终端节点的收益vector<pair<string, shared_ptr<GameTreeNode>>> children; // 行动和子节点GameTreeNode(string node_id, int pl = -1, bool terminal = false): id(node_id), player(pl), is_terminal(terminal) {}void addChild(const string& action, shared_ptr<GameTreeNode> child) { children.push_back({action, child}); }void setPayoffs(const vector<double>& p) { payoffs = p; }void display(int depth = 0) {string indent(depth * 2, ' ');cout << indent << "节点 " << id;if (is_terminal) {cout << " [终端, 收益=(";for (size_t i = 0; i < payoffs.size(); i++) {cout << payoffs[i];if (i < payoffs.size() - 1) cout << ", ";}cout << ")]" << endl;} else if (player == -1) {cout << " [机会节点]" << endl;} else {cout << " [玩家 " << player << "]" << endl;}for (auto& [action, child] : children) {cout << indent << " " << action << " ->" << endl;child->display(depth + 1);}}
};// 博弈树构建器
class GameTreeBuilder {public:// 构建一个简单的两阶段博弈static shared_ptr<GameTreeNode> buildTwoStageGame() {/*两阶段博弈:玩家1先行动,选择L或R如果选L,博弈结束,收益(2,2)如果选R,玩家2行动,选择U或DU: 收益(3,1)D: 收益(1,3)*/auto root = make_shared<GameTreeNode>("root", 0);// 玩家1选择L,直接结束auto terminal_L = make_shared<GameTreeNode>("term_L", 0, true);terminal_L->setPayoffs({2, 2});root->addChild("L", terminal_L);// 玩家1选择R,进入玩家2的决策auto node_R = make_shared<GameTreeNode>("node_R", 1);root->addChild("R", node_R);// 玩家2的选择auto terminal_U = make_shared<GameTreeNode>("term_U", 1, true);terminal_U->setPayoffs({3, 1});node_R->addChild("U", terminal_U);auto terminal_D = make_shared<GameTreeNode>("term_D", 1, true);terminal_D->setPayoffs({1, 3});node_R->addChild("D", terminal_D);return root;}// 构建一个更复杂的多阶段博弈static shared_ptr<GameTreeNode> buildMultiStageGame() {auto root = make_shared<GameTreeNode>("root", 0);// 第一层:玩家1的选择auto node_A = make_shared<GameTreeNode>("A", 1);auto node_B = make_shared<GameTreeNode>("B", 1);root->addChild("A1", node_A);root->addChild("A2", node_B);// 第二层:玩家2的选择auto node_A1 = make_shared<GameTreeNode>("A1", 0);auto node_A2 = make_shared<GameTreeNode>("A2", 0);auto node_B1 = make_shared<GameTreeNode>("B1", 0);auto node_B2 = make_shared<GameTreeNode>("B2", 0);node_A->addChild("B1", node_A1);node_A->addChild("B2", node_A2);node_B->addChild("B1", node_B1);node_B->addChild("B2", node_B2);// 终端节点auto term1 = make_shared<GameTreeNode>("term1", -1, true);term1->setPayoffs({4, 1});auto term2 = make_shared<GameTreeNode>("term2", -1, true);term2->setPayoffs({3, 2});auto term3 = make_shared<GameTreeNode>("term3", -1, true);term3->setPayoffs({2, 3});auto term4 = make_shared<GameTreeNode>("term4", -1, true);term4->setPayoffs({1, 4});node_A1->addChild("End", term1);node_A2->addChild("End", term2);node_B1->addChild("End", term3);node_B2->addChild("End", term4);return root;}
};
逆向归纳法求解
#include <bits/stdc++.h>
#define int long longusing namespace std;class BackwardInductionSolver {private:map<string, pair<string, vector<double>>> solution; // 节点ID -> (最优行动, 收益)public:// 使用逆向归纳法求解子博弈完美均衡void solve(shared_ptr<GameTreeNode> node) {if (node->is_terminal) {solution[node->id] = {"", node->payoffs};return;}// 先求解所有子节点for (auto& [action, child] : node->children) {solve(child);}if (node->player == -1) {// 机会节点:计算期望收益vector<double> expected_payoffs(node->children[0].second->payoffs.size(), 0.0);double prob = 1.0 / node->children.size();for (auto& [action, child] : node->children) {auto& child_sol = solution[child->id];for (size_t i = 0; i < expected_payoffs.size(); i++) {expected_payoffs[i] += prob * child_sol.second[i];}}solution[node->id] = {"", expected_payoffs};} else {// 玩家节点:选择最优行动string best_action;vector<double> best_payoffs;bool first = true;for (auto& [action, child] : node->children) {auto& child_sol = solution[child->id];if (first || child_sol.second[node->player] > best_payoffs[node->player]) {best_action = action;best_payoffs = child_sol.second;first = false;}}solution[node->id] = {best_action, best_payoffs};}}void printSolution(shared_ptr<GameTreeNode> node, int depth = 0) {string indent(depth * 2, ' ');auto& sol = solution[node->id];if (node->is_terminal) {cout << indent << node->id << ": 终端, 收益=(";for (size_t i = 0; i < sol.second.size(); i++) {cout << sol.second[i];if (i < sol.second.size() - 1) cout << ", ";}cout << ")" << endl;} else if (node->player == -1) {cout << indent << node->id << ": 机会节点, 期望收益=(";for (size_t i = 0; i < sol.second.size(); i++) {cout << sol.second[i];if (i < sol.second.size() - 1) cout << ", ";}cout << ")" << endl;} else {cout << indent << node->id << ": 玩家" << node->player << ", 最优行动=" << sol.first << ", 收益=(";for (size_t i = 0; i < sol.second.size(); i++) {cout << sol.second[i];if (i < sol.second.size() - 1) cout << ", ";}cout << ")" << endl;}for (auto& [action, child] : node->children) {if (action == sol.first) {cout << indent << " -> " << action << endl;printSolution(child, depth + 1);}}}map<string, pair<string, vector<double>>> getSolution() { return solution; }
};
完美回忆博弈实现
cpp class PerfectRecallGame {private:struct InformationSet {string id;int player;vector<string> nodes; // 属于该信息集的节点vector<string> actions; // 可用行动};map<string, shared_ptr<GameTreeNode>> nodes;map<string, InformationSet> info_sets;public:void addNode(shared_ptr<GameTreeNode> node) { nodes[node->id] = node; }void addInformationSet(const string& info_id, int player, const vector<string>& node_ids, const vector<string>& action_list) {InformationSet info;info.id = info_id;info.player = player;info.nodes = node_ids;info.actions = action_list;info_sets[info_id] = info;}// 检查完美回忆条件bool checkPerfectRecall() {for (auto& [info_id, info] : info_sets) {// 对于每个信息集,检查所有节点有相同的行动集// 并且玩家记得自己之前的所有行动// 简化检查:验证行动集一致性for (const string& node_id : info.nodes) {auto node = nodes[node_id];vector<string> node_actions;for (auto& [action, child] : node->children) {node_actions.push_back(action);}// 检查行动集是否匹配if (node_actions != info.actions) {return false;}}}return true;}void displayInformationStructure() {cout << "信息结构:" << endl;for (auto& [info_id, info] : info_sets) {cout << "信息集 " << info_id << " (玩家" << info.player << "): ";cout << "节点={";for (size_t i = 0; i < info.nodes.size(); i++) {cout << info.nodes[i];if (i < info.nodes.size() - 1) cout << ", ";}cout << "}, 行动={";for (size_t i = 0; i < info.actions.size(); i++) {cout << info.actions[i];if (i < info.actions.size() - 1) cout << ", ";}cout << "}" << endl;}}
};
变种题目与解法
变种 1:不完全信息博弈树
- 问题:玩家无法观察到所有信息;
- 解法:使用信息集表示相同信息的决策点。
class ImperfectInformationGame {private:shared_ptr<GameTreeNode> root;map<string, vector<string>> information_sets; // 信息集ID -> 节点ID列表public:ImperfectInformationGame(shared_ptr<GameTreeNode> r) : root(r) {}void addInformationSet(const string& info_id, const vector<string>& node_ids) { information_sets[info_id] = node_ids; }// 使用序列形式表示法map<string, vector<double>> calculateSequentialEquilibrium() {// 简化实现:实际需要使用线性规划或学习算法map<string, vector<double>> strategies;// 为每个信息集分配混合策略for (auto& [info_id, nodes] : information_sets) {// 假设均匀随机策略int action_count = root->children.size(); // 简化vector<double> strategy(action_count, 1.0 / action_count);strategies[info_id] = strategy;}return strategies;}void displayEquilibrium() {auto strategies = calculateSequentialEquilibrium();cout << "序贯均衡策略:" << endl;for (auto& [info_id, strategy] : strategies) {cout << "信息集 " << info_id << ": ";for (size_t i = 0; i < strategy.size(); i++) {cout << "行动" << i << "=" << strategy[i];if (i < strategy.size() - 1) cout << ", ";}cout << endl;}}
};
变种 2:随机博弈树
- 问题:包含概率事件的博弈;
- 解法:添加机会节点,计算期望收益。
class StochasticGameTree {private:shared_ptr<GameTreeNode> root;public:StochasticGameTree(shared_ptr<GameTreeNode> r) : root(r) {}// 构建包含随机事件的博弈树static shared_ptr<GameTreeNode> buildStochasticGame() {auto root = make_shared<GameTreeNode>("root", 0);// 玩家1行动auto chance_node = make_shared<GameTreeNode>("chance", -1);root->addChild("行动", chance_node);// 随机事件:成功(概率0.7)或失败(概率0.3)auto success_node = make_shared<GameTreeNode>("success", 1);auto failure_node = make_shared<GameTreeNode>("failure", 1);chance_node->addChild("成功(0.7)", success_node);chance_node->addChild("失败(0.3)", failure_node);// 成功情况下的终端节点auto term_success1 = make_shared<GameTreeNode>("term_s1", -1, true);term_success1->setPayoffs({5, 2});auto term_success2 = make_shared<GameTreeNode>("term_s2", -1, true);term_success2->setPayoffs({3, 4});success_node->addChild("合作", term_success1);success_node->addChild("背叛", term_success2);// 失败情况下的终端节点auto term_failure1 = make_shared<GameTreeNode>("term_f1", -1, true);term_failure1->setPayoffs({1, 1});auto term_failure2 = make_shared<GameTreeNode>("term_f2", -1, true);term_failure2->setPayoffs({0, 0});failure_node->addChild("合作", term_failure1);failure_node->addChild("背叛", term_failure2);return root;}// 计算期望收益vector<double> calculateExpectedPayoffs(shared_ptr<GameTreeNode> node) {if (node->is_terminal) {return node->payoffs;}if (node->player == -1) {// 机会节点:计算加权平均vector<double> expected(node->children[0].second->payoffs.size(), 0.0);for (auto& [action, child] : node->children) {// 解析概率(简化处理)double prob = 0.5; // 默认概率if (action.find("(0.") != string::npos) {// 从行动描述中提取概率size_t start = action.find("(0.");if (start != string::npos) {string prob_str = action.substr(start + 1, 3);prob = stod(prob_str);}}auto child_payoffs = calculateExpectedPayoffs(child);for (size_t i = 0; i < expected.size(); i++) {expected[i] += prob * child_payoffs[i];}}return expected;} else {// 玩家节点:假设选择最优行动(简化)vector<double> best_payoffs;bool first = true;for (auto& [action, child] : node->children) {auto child_payoffs = calculateExpectedPayoffs(child);if (first || child_payoffs[node->player] > best_payoffs[node->player]) {best_payoffs = child_payoffs;first = false;}}return best_payoffs;}}
};
变种 3:重复博弈树
- 问题:多次重复的博弈;
- 解法:构建重复博弈的扩展形式。
class RepeatedGameTree {private:int stages;shared_ptr<GameTreeNode> base_game;public:RepeatedGameTree(int s, shared_ptr<GameTreeNode> base): stages(s), base_game(base) {}// 构建重复博弈树shared_ptr<GameTreeNode> buildRepeatedGame() {return buildStage(stages, "stage_" + to_string(stages));}private:shared_ptr<GameTreeNode> buildStage(int remaining, const string& id) {if (remaining == 1) {// 最后阶段,使用基础博弈return base_game;}auto node = make_shared<GameTreeNode>(id, 0);// 递归构建后续阶段for (auto& [action, child] : base_game->children) {auto next_stage = buildStage(remaining - 1, id + "_" + action);node->addChild(action, next_stage);}return node;}// 计算重复博弈的收益(折扣因子δ)vector<double> calculateDiscountedPayoffs(shared_ptr<GameTreeNode> node, double discount, int stage = 1) {if (node->is_terminal) {return node->payoffs;}vector<double> current_payoffs(node->payoffs.size(), 0.0);vector<double> future_payoffs(node->payoffs.size(), 0.0);// 假设选择第一个行动(简化)auto& [action, child] = node->children[0];future_payoffs = calculateDiscountedPayoffs(child, discount, stage + 1);// 合并当前和未来收益for (size_t i = 0; i < current_payoffs.size(); i++) {current_payoffs[i] = pow(discount, stage - 1) * future_payoffs[i];}return current_payoffs;}
};
变种 4:贝叶斯博弈树
- 问题:包含类型不确定性的博弈;
- 解法:使用类型空间和信念更新。
class BayesianGameTree {private:struct PlayerType {string type_id;double probability;vector<double> payoffs; // 类型相关的收益};map<int, vector<PlayerType>> player_types;public:void addPlayerType(int player, const string& type_id, double prob, const vector<double>& payoffs) {player_types[player].push_back({type_id, prob, payoffs});}// 构建贝叶斯博弈的扩展形式shared_ptr<GameTreeNode> buildBayesianGame() {auto root = make_shared<GameTreeNode>("root", -1); // 自然首先行动// 为每个类型组合创建子节点for (auto& type1 : player_types[0]) {for (auto& type2 : player_types[1]) {string node_id = "type_" + type1.type_id + "_" + type2.type_id;auto type_node = make_shared<GameTreeNode>(node_id, 0);// 设置类型相关的收益// 这里简化处理,实际需要更复杂的结构root->addChild(type1.type_id + "_" + type2.type_id, type_node);}}return root;}// 计算贝叶斯纳什均衡map<string, vector<double>> calculateBayesianEquilibrium() {// 简化实现map<string, vector<double>> strategies;// 为每个类型分配策略for (auto& [player, types] : player_types) {for (auto& type : types) {// 均匀随机策略vector<double> strategy(2, 0.5); // 假设两个行动strategies[type.type_id] = strategy;}}return strategies;}
};
数学性质深度分析
库恩定理
- 定理(库恩,\(1953\)):在完美回忆的扩展式博弈中,混合策略等价于行为策略,证明思路:
- 完美回忆确保玩家记得自己之前的所有选择;
- 每个信息集的决策可以独立指定;
- 混合策略可以分解为行为策略;
- 反之亦然。
塞尔滕定理的扩展
- 精炼纳什均衡:排除不可置信威胁的纳什均衡,证明方法:
- 使用颤抖手完美均衡概念;
- 考虑策略的微小扰动;
- 在极限情况下得到精炼均衡。
应用领域
- 经济学:市场进入博弈、拍卖理论;
- 政治学:国际关系、投票博弈;
- 计算机科学:多智能体系统、算法博弈论;
- 生物学:进化博弈论、物种竞争。
总结
树形图博弈是博弈论中描述序贯决策的核心工具:
- 理论基础:提供了分析动态博弈的严谨框架;
- 算法价值:逆向归纳法等求解算法在实践中很有效;
- 扩展性强:可处理不完全信息、随机事件等复杂情况;
- 应用广泛:从经济学到人工智能都有重要应用。