一、基础实现方案(System.Speech)
1. 环境配置
// 安装NuGet包
Install-Package System.Speech// 添加语言包(控制面板)
控制面板 -> 语言 -> 添加中文语音包
2. 核心代码实现
using System.Speech.Recognition;
using System.Windows.Forms;public class ContinuousRecognizer : IDisposable
{private SpeechRecognitionEngine _engine;private bool _isRunning;public ContinuousRecognizer(){InitializeEngine();}private void InitializeEngine(){_engine = new SpeechRecognitionEngine(new System.Globalization.CultureInfo("zh-CN"));// 加载默认语法(连续识别模式)var grammar = new DictationGrammar();_engine.LoadGrammar(grammar);// 配置音频输入_engine.SetInputToDefaultAudioDevice();// 注册事件处理器_engine.SpeechRecognized += OnRecognized;_engine.SpeechRecognitionRejected += OnRejected;_engine.RecognizeAsyncStop();}public void StartRecognition(){if (!_isRunning){_engine.RecognizeAsync(RecognizeMode.Multiple);_isRunning = true;}}public void StopRecognition(){if (_isRunning){_engine.RecognizeAsyncStop();_isRunning = false;}}private void OnRecognized(object sender, SpeechRecognizedEventArgs e){if (e.Result.Confidence > 0.7){string text = e.Result.Text;// 更新UI(需跨线程)Application.Current.Dispatcher.Invoke(() => txtResult.AppendText(text + Environment.NewLine));}}private void OnRejected(object sender, SpeechRecognitionRejectedEventArgs e){txtResult.AppendText("[识别失败]" + Environment.NewLine);}public void Dispose(){_engine?.Dispose();}
}
二、性能优化
1. 音频预处理
// 添加降噪模块(使用NAudio)
public class AudioPreprocessor
{private readonly WaveInEvent _waveIn;private readonly SpeechRecognitionEngine _engine;public AudioPreprocessor(SpeechRecognitionEngine engine){_engine = engine;_waveIn = new WaveInEvent();_waveIn.DataAvailable += OnDataAvailable;}private void OnDataAvailable(object sender, WaveInEventArgs e){// 应用WebRTC降噪算法byte[] denoisedData = WebRTCNative.NoiseSuppress(e.Buffer, e.BytesRecorded);// 转换为流格式using (var ms = new MemoryStream(denoisedData)){_engine.SetInputToWaveStream(ms);_engine.Recognize();}}
}
2. 多线程处理
// 使用生产者-消费者模型
public class RecognitionProcessor
{private BlockingCollection<byte[]> _audioQueue = new BlockingCollection<byte[]>();private Thread _processingThread;public RecognitionProcessor(){_processingThread = new Thread(ProcessAudio);_processingThread.Start();}public void Enqueue(byte[] audioData){_audioQueue.Add(audioData);}private void ProcessAudio(){foreach (var chunk in _audioQueue.GetConsumingEnumerable()){using (var stream = new MemoryStream(chunk)){var recognizer = new SpeechRecognitionEngine();recognizer.LoadGrammar(new DictationGrammar());recognizer.Recognize(stream);}}}
}
三、高级功能实现
1. 自定义语法识别
// 创建SRGS语法文件
var grammarBuilder = new GrammarBuilder();
grammarBuilder.Append(new Choices("打开", "关闭", "保存"));
grammarBuilder.Append(new SemanticResultKey("action", "操作类型"));var grammar = new Grammar(grammarBuilder);
_recognizer.LoadGrammar(grammar);// 事件处理
_recognizer.SpeechRecognized += (s, e) =>
{if (e.Result.Semantics["action"].Value.ToString() == "打开"){OpenApplication();}
};
2. 实时转写服务
// 结合WebSocket实现流式传输
public class StreamingTranscriber
{private readonly SpeechRecognizer _recognizer;private readonly ClientWebSocket _webSocket;public StreamingTranscriber(ClientWebSocket webSocket){_webSocket = webSocket;_recognizer = new SpeechRecognizer();_recognizer.SpeechRecognized += OnRecognized;}private async void OnRecognized(object sender, SpeechRecognizedEventArgs e){var json = JsonSerializer.Serialize(new {Text = e.Result.Text,Timestamp = DateTime.Now});await _webSocket.SendAsync(new ArraySegment<byte>(Encoding.UTF8.GetBytes(json)),WebSocketMessageType.Text,true,CancellationToken.None);}
}
四、部署与调试
1. 硬件要求
组件 | 最低配置 | 推荐配置 |
---|---|---|
CPU | i5-4代(4核) | i7-10代(8核) |
内存 | 8GB | 16GB+ |
声卡 | 普通USB声卡 | 支持ASIO的专业声卡 |
麦克风 | 16kHz采样率 | 高信噪比指向性麦克风 |
2. 调试技巧
// 启用详细日志
public static void EnableDebugLogging()
{var config = SpeechConfig.FromSubscription("YourKey", "YourRegion");config.SetProperty("SpeechServiceResponse_DebugLog", "true");config.SetProperty("SpeechServiceResponse_ProfanityOption", "Raw");
}// 性能监控
public void LogPerformance(TimeSpan duration, int wordCount)
{Debug.WriteLine($"识别速度: {wordCount/duration.TotalSeconds}字/秒");
}
五、扩展应用场景
1. 会议记录系统
// 结合语音识别与PPT同步
public class MeetingRecorder
{private ContinuousRecognizer _recognizer;private PptxBuilder _pptBuilder;public MeetingRecorder(){_recognizer = new ContinuousRecognizer();_pptBuilder = new PptxBuilder();_recognizer.SpeechRecognized += (s, e) => {_pptBuilder.AddSlide(e.Result.Text);};}
}
2. 工业设备控制
// 关键指令处理
public class VoiceController
{private ContinuousRecognizer _recognizer;public VoiceController(){_recognizer = new ContinuousRecognizer();_recognizer.SpeechRecognized += (s, e) => {if (e.Result.Text.Contains("紧急停止")){Machine.Stop();}};}
}
参考代码 语音转文字连续识别的C#小程序 www.youwenfan.com/contentcnj/70643.html
六、常见问题解决
-
识别率低 添加领域特定词汇表 调整
InitialSilenceTimeout
和EndSilenceTimeout
参数 使用SetProperty("Adaptation", "True")
启用自适应训练 -
内存泄漏 实现对象池模式管理识别引擎 限制同时处理的音频流数量 定期调用
GC.Collect()
-
多设备冲突
// 显式指定音频设备 var deviceEnumerator = new MMDeviceEnumerator(); var inputDevice = deviceEnumerator.GetDefaultAudioEndpoint(DataFlow.Capture, Role.Communications); _engine.SetInputToDefaultAudioDevice(inputDevice);