1. 数据准备和预处理
classdef LoadDataPreprocessorpropertiesraw_dataprocessed_datafeature_namestemporal_featuresweather_featureshistorical_featuresendmethodsfunction obj = LoadDataPreprocessor(data_file)% 构造函数if nargin > 0obj = obj.load_data(data_file);endendfunction obj = load_data(obj, data_file)% 加载电力负荷数据fprintf('加载数据文件: %s\n', data_file);% 支持多种数据格式[~, ~, ext] = fileparts(data_file);switch lower(ext)case '.csv'obj.raw_data = readtable(data_file);case '.mat'loaded_data = load(data_file);obj.raw_data = loaded_data.data;case {'.xlsx', '.xls'}obj.raw_data = readtable(data_file);otherwiseerror('不支持的文件格式');endfprintf('数据加载完成,共 %d 行,%d 列\n', ...size(obj.raw_data, 1), size(obj.raw_data, 2));endfunction obj = preprocess_data(obj)% 数据预处理流程fprintf('开始数据预处理...\n');% 1. 处理缺失值obj = obj.handle_missing_values();% 2. 异常值检测和处理obj = obj.handle_outliers();% 3. 特征工程obj = obj.feature_engineering();% 4. 数据标准化obj = obj.normalize_data();fprintf('数据预处理完成\n');endfunction obj = handle_missing_values(obj)% 处理缺失值data = obj.raw_data;% 检查缺失值missing_ratio = sum(ismissing(data)) / height(data);fprintf('缺失值比例:\n');disp(missing_ratio);% 对数值列使用插值numeric_vars = varfun(@isnumeric, data, 'OutputFormat', 'uniform');numeric_data = data(:, numeric_vars);for i = 1:width(numeric_data)if missing_ratio(i) < 0.3 % 如果缺失值比例小于30%numeric_data{:,i} = fillmissing(numeric_data{:,i}, 'linear');elsenumeric_data{:,i} = fillmissing(numeric_data{:,i}, 'constant', 0);endenddata(:, numeric_vars) = numeric_data;obj.raw_data = data;endfunction obj = handle_outliers(obj)% 异常值处理data = obj.raw_data;% 假设负荷数据在第一个数值列load_data = data{:,1};% 使用IQR方法检测异常值Q1 = quantile(load_data, 0.25);Q3 = quantile(load_data, 0.75);IQR = Q3 - Q1;lower_bound = Q1 - 1.5 * IQR;upper_bound = Q3 + 1.5 * IQR;outliers = load_data < lower_bound | load_data > upper_bound;fprintf('检测到 %d 个异常值 (%.2f%%)\n', ...sum(outliers), sum(outliers)/length(load_data)*100);% 使用移动中位数替换异常值if sum(outliers) > 0load_data_clean = load_data;for i = 1:length(load_data)if outliers(i)% 使用前后各2个小时的数据计算中位数start_idx = max(1, i-2);end_idx = min(length(load_data), i+2);neighbor_data = load_data(start_idx:end_idx);valid_neighbors = neighbor_data(~outliers(start_idx:end_idx));if ~isempty(valid_neighbors)load_data_clean(i) = median(valid_neighbors);elseload_data_clean(i) = median(load_data(~outliers));endendenddata{:,1} = load_data_clean;endobj.raw_data = data;endfunction obj = feature_engineering(obj)% 特征工程data = obj.raw_data;% 提取时间特征if isdatetime(data{:,1}) || iscell(data{:,1})time_col = data{:,1};if iscell(time_col)time_col = datetime(time_col);end% 提取多种时间特征data.Hour = hour(time_col);data.DayOfWeek = weekday(time_col);data.Month = month(time_col);data.DayOfYear = day(time_col, 'dayofyear');data.IsWeekend = isweekend(time_col);data.Season = obj.get_season(month(time_col));% 节假日特征data.IsHoliday = obj.is_holiday(time_col);% 滞后特征data.Load_Lag1 = [NaN; data{1:end-1, 2}]; % 假设第二列是负荷data.Load_Lag24 = [NaN(24,1); data{1:end-24, 2}];data.Load_Lag168 = [NaN(168,1); data{1:end-168, 2}];% 移动平均特征data.Load_MA24 = movmean(data{:,2}, [23, 0], 'omitnan');data.Load_MA168 = movmean(data{:,2}, [167, 0], 'omitnan');obj.temporal_features = {'Hour', 'DayOfWeek', 'Month', ...'DayOfYear', 'IsWeekend', 'Season', 'IsHoliday'};obj.historical_features = {'Load_Lag1', 'Load_Lag24', ...'Load_Lag168', 'Load_MA24', 'Load_MA168'};endobj.processed_data = data;obj.feature_names = data.Properties.VariableNames;endfunction season = get_season(~, month)% 根据月份获取季节season = zeros(size(month));season(month >= 3 & month <= 5) = 1; % 春季season(month >= 6 & month <= 8) = 2; % 夏季season(month >= 9 & month <= 11) = 3; % 秋季season(month == 12 | month <= 2) = 4; % 冬季endfunction is_holiday = is_holiday(~, dates)% 简单的节假日判断(需要根据实际情况完善)is_holiday = false(size(dates));% 这里可以添加具体的节假日判断逻辑% 例如:is_holiday = (month(dates) == 1 & day(dates) == 1); % 元旦endfunction obj = normalize_data(obj)% 数据标准化data = obj.processed_data;% 对数值列进行标准化numeric_vars = varfun(@isnumeric, data, 'OutputFormat', 'uniform');for i = 1:length(numeric_vars)if numeric_vars(i)col_data = data{:,i};if ~all(isnan(col_data))% Z-score标准化mu = mean(col_data, 'omitnan');sigma = std(col_data, 'omitnan');if sigma > 0data{:,i} = (col_data - mu) / sigma;endendendendobj.processed_data = data;endfunction [X_train, X_test, y_train, y_test] = split_data(obj, test_ratio, lag_hours)% 分割数据集if nargin < 2test_ratio = 0.2;endif nargin < 3lag_hours = 24;enddata = obj.processed_data;numeric_data = data{:, varfun(@isnumeric, data, 'OutputFormat', 'uniform')};numeric_data = rmmissing(numeric_data); % 移除包含NaN的行% 准备特征和目标变量X = numeric_data(:, 2:end); % 假设第一列是时间,第二列开始是特征y = numeric_data(:, 1); % 假设第一列是负荷值% 分割数据n = size(X, 1);split_point = floor(n * (1 - test_ratio));X_train = X(1:split_point, :);X_test = X(split_point+1:end, :);y_train = y(1:split_point);y_test = y(split_point+1:end);fprintf('训练集: %d 样本, 测试集: %d 样本\n', ...size(X_train, 1), size(X_test, 1));endend
end
2. 多种预测模型实现
classdef LoadForecasterpropertiesmodelsmodel_namespredictionsactual_valuesperformance_metricsfeature_importanceendmethodsfunction obj = LoadForecaster()% 构造函数obj.models = struct();obj.model_names = {};obj.predictions = struct();obj.performance_metrics = struct();endfunction obj = train_arima(obj, X_train, y_train, order)% ARIMA模型训练fprintf('训练ARIMA模型...\n');if nargin < 4order = [2, 1, 2]; % [p, d, q]endtry% 确保数据是列向量y_train = y_train(:);% 创建ARIMA模型Mdl = arima('ARLags', 1:order(1), ...'D', order(2), ...'MALags', 1:order(3), ...'Constant', 0);% 训练模型obj.models.arima = estimate(Mdl, y_train, 'Display', 'off');obj.model_names{end+1} = 'ARIMA';fprintf('ARIMA模型训练完成\n');catch MEfprintf('ARIMA训练错误: %s\n', ME.message);endendfunction obj = train_svm(obj, X_train, y_train, kernel_type)% SVM模型训练fprintf('训练SVM模型...\n');if nargin < 4kernel_type = 'gaussian';endtry% 训练SVM回归模型if strcmpi(kernel_type, 'gaussian')obj.models.svm = fitrsvm(X_train, y_train, ...'KernelFunction', 'gaussian', ...'Standardize', true, ...'KernelScale', 'auto');elseobj.models.svm = fitrsvm(X_train, y_train, ...'KernelFunction', kernel_type, ...'Standardize', true);endobj.model_names{end+1} = 'SVM';fprintf('SVM模型训练完成\n');catch MEfprintf('SVM训练错误: %s\n', ME.message);endendfunction obj = train_random_forest(obj, X_train, y_train, num_trees)% 随机森林训练fprintf('训练随机森林模型...\n');if nargin < 4num_trees = 100;endtry% 训练随机森林obj.models.random_forest = TreeBagger(num_trees, X_train, y_train, ...'Method', 'regression', ...'OOBPrediction', 'on', ...'MinLeafSize', 5);obj.model_names{end+1} = 'RandomForest';% 计算特征重要性obj.feature_importance.random_forest = ...obj.models.random_forest.OOBPermutedPredictorDeltaError;fprintf('随机森林训练完成,OOB误差: %.4f\n', ...mean(obj.models.random_forest.oobError));catch MEfprintf('随机森林训练错误: %s\n', ME.message);endendfunction obj = train_xgboost(obj, X_train, y_train, params)% XGBoost模型训练(需要安装XGBoost库)fprintf('训练XGBoost模型...\n');if nargin < 4params = struct('max_depth', 6, 'learning_rate', 0.1, ...'n_estimators', 100, 'objective', 'reg:squarederror');endtry% 转换数据为XGBoost格式dtrain = obj.mat2xgb(X_train, y_train);% 训练模型obj.models.xgboost = xgboost_train(params, dtrain, ...params.n_estimators, struct('verbose', 0));obj.model_names{end+1} = 'XGBoost';fprintf('XGBoost模型训练完成\n');catch MEfprintf('XGBoost训练错误: %s\n', ME.message);fprintf('请确保已安装XGBoost for MATLAB\n');endendfunction obj = train_lstm(obj, X_train, y_train, lstm_params)% LSTM模型训练fprintf('训练LSTM模型...\n');if nargin < 4lstm_params = struct('hidden_units', 50, 'num_layers', 2, ...'epochs', 50, 'learning_rate', 0.001);endtry% 准备LSTM数据[X_lstm, y_lstm] = obj.prepare_lstm_data(X_train, y_train, 24);% 创建LSTM网络layers = [ ...sequenceInputLayer(size(X_lstm, 2))lstmLayer(lstm_params.hidden_units, 'OutputMode', 'last')fullyConnectedLayer(50)reluLayer()fullyConnectedLayer(1)regressionLayer()];options = trainingOptions('adam', ...'MaxEpochs', lstm_params.epochs, ...'InitialLearnRate', lstm_params.learning_rate, ...'Verbose', false);% 训练网络obj.models.lstm = trainNetwork(X_lstm, y_lstm, layers, options);obj.model_names{end+1} = 'LSTM';fprintf('LSTM模型训练完成\n');catch MEfprintf('LSTM训练错误: %s\n', ME.message);endendfunction [X_seq, y_seq] = prepare_lstm_data(~, X, y, sequence_length)% 准备LSTM序列数据num_sequences = size(X, 1) - sequence_length;X_seq = cell(num_sequences, 1);y_seq = zeros(num_sequences, 1);for i = 1:num_sequencesX_seq{i} = X(i:i+sequence_length-1, :)';y_seq(i) = y(i+sequence_length);endendfunction predictions = predict_arima(obj, X_test, y_test, steps)% ARIMA模型预测if nargin < 4steps = length(y_test);endif isfield(obj.models, 'arima')try[y_pred, y_mse] = forecast(obj.models.arima, steps, 'Y0', y_test(1:min(100, length(y_test))));predictions = y_pred;catchpredictions = zeros(steps, 1);endelsepredictions = zeros(steps, 1);endendfunction predictions = predict_svm(obj, X_test)% SVM模型预测if isfield(obj.models, 'svm')predictions = predict(obj.models.svm, X_test);elsepredictions = zeros(size(X_test, 1), 1);endendfunction predictions = predict_random_forest(obj, X_test)% 随机森林预测if isfield(obj.models, 'random_forest')predictions = predict(obj.models.random_forest, X_test);elsepredictions = zeros(size(X_test, 1), 1);endendfunction obj = predict_all_models(obj, X_test, y_test)% 所有模型预测fprintf('开始模型预测...\n');obj.actual_values = y_test;% 各模型预测for i = 1:length(obj.model_names)model_name = obj.model_names{i};fprintf('使用 %s 模型进行预测...\n', model_name);switch lower(model_name)case 'arima'pred = obj.predict_arima(X_test, y_test, length(y_test));case 'svm'pred = obj.predict_svm(X_test);case 'randomforest'pred = obj.predict_random_forest(X_test);case 'xgboost'pred = obj.predict_xgboost(X_test);case 'lstm'pred = obj.predict_lstm(X_test);otherwisepred = zeros(length(y_test), 1);endobj.predictions.(model_name) = pred;% 计算性能指标obj.performance_metrics.(model_name) = ...obj.calculate_metrics(y_test, pred);endendfunction metrics = calculate_metrics(~, y_true, y_pred)% 计算预测性能指标metrics = struct();% 移除NaN值valid_idx = ~isnan(y_true) & ~isnan(y_pred);y_true = y_true(valid_idx);y_pred = y_pred(valid_idx);if isempty(y_true)metrics.MAE = NaN;metrics.RMSE = NaN;metrics.MAPE = NaN;metrics.R2 = NaN;return;end% 平均绝对误差metrics.MAE = mean(abs(y_true - y_pred));% 均方根误差metrics.RMSE = sqrt(mean((y_true - y_pred).^2));% 平均绝对百分比误差metrics.MAPE = mean(abs((y_true - y_pred) ./ y_true)) * 100;% R²决定系数SS_res = sum((y_true - y_pred).^2);SS_tot = sum((y_true - mean(y_true)).^2);metrics.R2 = 1 - (SS_res / SS_tot);endfunction plot_predictions(obj, model_names)% 绘制预测结果if nargin < 2model_names = obj.model_names;endfigure('Position', [100, 100, 1200, 800]);% 预测结果对比subplot(2, 2, 1);time_points = 1:length(obj.actual_values);plot(time_points, obj.actual_values, 'k-', 'LineWidth', 2);hold on;colors = lines(length(model_names));legend_entries = {'实际值'};for i = 1:length(model_names)model_name = model_names{i};if isfield(obj.predictions, model_name)pred = obj.predictions.(model_name);plot(time_points, pred, '--', 'Color', colors(i,:), 'LineWidth', 1.5);legend_entries{end+1} = model_name;endendxlabel('时间点');ylabel('负荷值');title('负荷预测结果对比');legend(legend_entries, 'Location', 'best');grid on;% 误差分布subplot(2, 2, 2);errors = [];error_labels = {};for i = 1:length(model_names)model_name = model_names{i};if isfield(obj.predictions, model_name)pred = obj.predictions.(model_name);error = abs(pred - obj.actual_values);errors = [errors; error];error_labels = [error_labels; repmat({model_name}, length(error), 1)];endendboxplot(errors, error_labels);ylabel('绝对误差');title('各模型误差分布');grid on;% 性能指标对比subplot(2, 2, 3);metrics = {'MAE', 'RMSE', 'MAPE'};metric_data = zeros(length(metrics), length(model_names));for i = 1:length(model_names)model_name = model_names{i};if isfield(obj.performance_metrics, model_name)for j = 1:length(metrics)metric_data(j, i) = obj.performance_metrics.(model_name).(metrics{j});endendendbar(metric_data');set(gca, 'XTickLabel', model_names);ylabel('指标值');title('模型性能指标对比');legend(metrics, 'Location', 'best');grid on;% 特征重要性(随机森林)subplot(2, 2, 4);if isfield(obj.feature_importance, 'random_forest')importance = obj.feature_importance.random_forest;[~, idx] = sort(importance, 'descend');top_idx = idx(1:min(10, length(importance)));barh(importance(top_idx));set(gca, 'YTickLabel', top_idx);xlabel('重要性得分');title('随机森林特征重要性 (Top 10)');grid on;endendfunction print_performance(obj)% 打印性能指标fprintf('\n=== 模型性能比较 ===\n');fprintf('%-15s %-8s %-8s %-8s %-8s\n', ...'模型', 'MAE', 'RMSE', 'MAPE%', 'R²');fprintf('%-15s %-8s %-8s %-8s %-8s\n', ...'-----', '---', '----', '-----', '--');for i = 1:length(obj.model_names)model_name = obj.model_names{i};if isfield(obj.performance_metrics, model_name)m = obj.performance_metrics.(model_name);fprintf('%-15s %-8.3f %-8.3f %-8.2f %-8.3f\n', ...model_name, m.MAE, m.RMSE, m.MAPE, m.R2);endendendend
end
3. 集成学习和模型融合
classdef EnsembleForecasterpropertiesbase_modelsensemble_weightsensemble_methodpredictionsperformanceendmethodsfunction obj = EnsembleForecaster(base_models, method)% 构造函数obj.base_models = base_models;if nargin < 2obj.ensemble_method = 'weighted_average';elseobj.ensemble_method = method;endendfunction obj = train_ensemble(obj, X_train, y_train, X_val, y_val)% 训练集成模型fprintf('训练集成预测模型...\n');% 获取基模型的预测base_predictions = [];model_weights = [];for i = 1:length(obj.base_models)model = obj.base_models{i};% 获取模型在验证集上的预测if isa(model, 'LoadForecaster')pred = model.predict_all_models(X_val, y_val);% 这里需要根据具体实现调整elsepred = predict(model, X_val);endbase_predictions = [base_predictions, pred];% 根据性能计算权重mae = mean(abs(pred - y_val));weight = 1 / (mae + eps); % 避免除零model_weights = [model_weights, weight];end% 归一化权重obj.ensemble_weights = model_weights / sum(model_weights);fprintf('集成模型训练完成,权重: %s\n', mat2str(obj.ensemble_weights, 2));endfunction ensemble_pred = predict_ensemble(obj, X_test)% 集成预测base_preds = zeros(size(X_test, 1), length(obj.base_models));for i = 1:length(obj.base_models)model = obj.base_models{i};if isa(model, 'LoadForecaster')% 处理LoadForecaster对象的预测pred = model.predict_all_models(X_test, zeros(size(X_test,1),1));elsepred = predict(model, X_test);endbase_preds(:, i) = pred;endswitch obj.ensemble_methodcase 'weighted_average'ensemble_pred = base_preds * obj.ensemble_weights';case 'median'ensemble_pred = median(base_preds, 2);case 'best_model'[~, best_idx] = max(obj.ensemble_weights);ensemble_pred = base_preds(:, best_idx);otherwiseensemble_pred = mean(base_preds, 2);endendend
end
4. 完整的使用示例
% 电力系统短期负荷预测主程序
function main_short_term_load_forecasting()% 清除环境clear; close all; clc;fprintf('=== 电力系统短期负荷预测系统 ===\n\n');% 1. 数据准备和预处理fprintf('步骤1: 数据准备和预处理\n');% 创建示例数据(实际应用中替换为真实数据)[X, y] = create_sample_load_data();% 数据预处理preprocessor = LoadDataPreprocessor();preprocessor.raw_data = array2table([y, X], 'VariableNames', ...{'Load', 'Feature1', 'Feature2', 'Feature3', 'Feature4'});preprocessor = preprocessor.preprocess_data();% 分割数据[X_train, X_test, y_train, y_test] = preprocessor.split_data(0.2);% 2. 训练预测模型fprintf('\n步骤2: 训练预测模型\n');forecaster = LoadForecaster();% 训练多个模型forecaster = forecaster.train_arima(X_train, y_train);forecaster = forecaster.train_svm(X_train, y_train);forecaster = forecaster.train_random_forest(X_train, y_train);% 3. 模型预测和评估fprintf('\n步骤3: 模型预测和评估\n');forecaster = forecaster.predict_all_models(X_test, y_test);% 显示性能比较forecaster.print_performance();% 4. 可视化结果fprintf('\n步骤4: 结果可视化\n');forecaster.plot_predictions();% 5. 集成学习fprintf('\n步骤5: 集成学习\n');% 创建集成模型(示例)base_models = {forecaster.models.svm, forecaster.models.random_forest};ensemble = EnsembleForecaster(base_models, 'weighted_average');% 这里需要验证集数据来训练集成权重% ensemble = ensemble.train_ensemble(X_train, y_train, X_val, y_val);fprintf('\n=== 预测完成 ===\n');
end% 创建示例数据函数
function [X, y] = create_sample_load_data()% 创建示例负荷数据rng(42); % 设置随机种子确保可重现n_points = 1000;time = (1:n_points)';% 基础负荷模式base_load = 1000 + 500 * sin(2*pi*time/24) + 200 * sin(2*pi*time/168);% 添加天气影响temperature = 20 + 10 * sin(2*pi*time/24) + 5 * randn(n_points, 1);humidity = 50 + 20 * sin(2*pi*time/12) + 10 * randn(n_points, 1);% 添加随机噪声noise = 50 * randn(n_points, 1);% 生成目标负荷y = base_load + 10 * temperature + 5 * humidity + noise;% 生成特征X = [temperature, humidity, sin(2*pi*time/24), cos(2*pi*time/24)];fprintf('生成示例数据: %d 个时间点\n', n_points);
end% 实时预测函数
function real_time_forecasting_example()% 实时预测示例fprintf('实时负荷预测示例...\n');% 加载最新数据% 这里应该是从实时数据库或API获取数据recent_data = get_recent_load_data();% 使用训练好的模型进行预测% load('trained_forecaster.mat'); % 加载已训练的模型% 进行未来24小时预测horizon = 24;% future_pred = predict_future(forecaster, recent_data, horizon);fprintf('未来%d小时负荷预测完成\n', horizon);
end
5. 性能优化和高级功能
% 模型超参数优化
function optimized_forecaster = optimize_hyperparameters(X_train, y_train)% 使用贝叶斯优化寻找最佳超参数fprintf('开始超参数优化...\n');% 定义优化变量optim_vars = [optimizableVariable('NumTrees', [50, 200], 'Type', 'integer');optimizableVariable('MinLeafSize', [1, 20], 'Type', 'integer');optimizableVariable('MaxDepth', [5, 20], 'Type', 'integer')];% 目标函数objective_function = @(params) train_evaluate_model(X_train, y_train, params);% 贝叶斯优化results = bayesopt(objective_function, optim_vars, ...'MaxTime', 3600, 'Verbose', 1, ...'PlotFcn', {@plotObjectiveModel, @plotMinObjective});% 使用最佳参数训练最终模型best_params = results.XAtMinObjective;optimized_forecaster = train_final_model(X_train, y_train, best_params);fprintf('超参数优化完成\n');
endfunction loss = train_evaluate_model(X_train, y_train, params)% 训练并评估模型cv = cvpartition(length(y_train), 'KFold', 5);losses = zeros(cv.NumTestSets, 1);for i = 1:cv.NumTestSetstrain_idx = training(cv, i);test_idx = test(cv, i);X_tr = X_train(train_idx, :);y_tr = y_train(train_idx);X_te = X_train(test_idx, :);y_te = y_train(test_idx);% 训练模型model = TreeBagger(params.NumTrees, X_tr, y_tr, ...'Method', 'regression', ...'MinLeafSize', params.MinLeafSize);% 预测和评估y_pred = predict(model, X_te);losses(i) = sqrt(mean((y_te - y_pred).^2)); % RMSEendloss = mean(losses);
end
参考代码 电力系统短期负荷预测 www.youwenfan.com.com/contentcnj/59804.html
电力系统短期负荷预测方案提供了:
- 数据预处理:缺失值处理、异常值检测、特征工程
- 多种预测模型:ARIMA、SVM、随机森林、LSTM等
- 模型评估:多种性能指标和可视化
- 集成学习:模型融合提高预测精度
- 超参数优化:自动寻找最佳模型参数