使用Python實現(xiàn)從麥克風(fēng)獲取音頻并識別
更新時間:2025年02月28日 08:30:38 作者:鳳梟香
這篇文章主要為大家詳細(xì)介紹了如何使用Python實現(xiàn)從麥克風(fēng)獲取音頻并識別功能,文中的示例代碼講解詳細(xì),感興趣的小伙伴可以跟隨小編一起學(xué)習(xí)一下
python麥克風(fēng)獲取音頻并識別
麥克風(fēng)獲取
# 打開麥克風(fēng)流 stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
設(shè)置參數(shù)
# 錄音參數(shù) FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 單聲道 RATE = 16000 # 采樣率 16kHz CHUNK = 1024 # 數(shù)據(jù)塊大小 RECORD_SECONDS = 5 # 錄制時長 (秒) WAVE_OUTPUT_FILENAME = "output.wav"
讀取數(shù)據(jù)塊
# 循環(huán)讀取數(shù)據(jù)塊 for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data)
阿里語音識別模型加載
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revision="v2.0.2", device="cpu" )
阿里語音識別
res = model.generate(input=WAVE_OUTPUT_FILENAME, batch_size_s=16000, hotword='魔搭')
整體代碼
import pyaudio import wave import threading import keyboard from funasr import AutoModel # 錄音參數(shù) FORMAT = pyaudio.paInt16 # 16-bit resolution CHANNELS = 1 # 單聲道 RATE = 16000 # 采樣率 16kHz CHUNK = 1024 # 數(shù)據(jù)塊大小 WAVE_OUTPUT_FILENAME = "./wav_data/output.wav" # 初始化 PyAudio audio = pyaudio.PyAudio() frames = [] stream = None recording = False paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn" fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch" ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large" cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common" model = AutoModel(model=paraformer_path, model_revision="v2.0.4", vad_model=fsmn_path, vad_model_revision="v2.0.4", punc_model=ct_punc_path, punc_model_revision="v2.0.4", spk_model=cam_path, spk_model_revision="v2.0.2", device="cpu" ) print("加載模型完成?。?!") def start_recording(): """ 開始錄音 """ global stream, recording if not recording: print("開始錄音...") recording = True stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) record_thread = threading.Thread(target=record_audio) record_thread.start() def stop_recording(): """ 停止錄音并進(jìn)行識別 """ global stream, recording if recording: print("錄音結(jié)束.") recording = False stream.stop_stream() stream.close() save_audio() audio.terminate() print("開始識別?。?!") res = model.generate(input=WAVE_OUTPUT_FILENAME, batch_size_s=16000, hotword='魔搭') print("識別結(jié)束?。?!") print("識別結(jié)果:", res) def record_audio(): """ 錄音功能實現(xiàn) """ while recording: data = stream.read(CHUNK) frames.append(data) def save_audio(): """ 保存錄音文件 """ wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(audio.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() # 監(jiān)聽 Ctrl 鍵開始錄音 keyboard.add_hotkey('ctrl', start_recording) # 監(jiān)聽 Alt 鍵結(jié)束錄音 keyboard.add_hotkey('alt', stop_recording) print("按 Ctrl 開始錄音,按 Alt 結(jié)束錄音") keyboard.wait() # 保持程序運(yùn)行
到此這篇關(guān)于使用Python實現(xiàn)從麥克風(fēng)獲取音頻并識別的文章就介紹到這了,更多相關(guān)Python麥克風(fēng)獲取音頻內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
使用Python操作Excel中圖片的基礎(chǔ)示例(插入、替換、提取、刪除)
Excel是主要用于處理表格和數(shù)據(jù)的工具,我們也能在其中插入、編輯或管理圖片,為工作表增添視覺效果,提升報告的吸引力,本文將詳細(xì)介紹如何使用Python操作Excel中的圖片,文中有詳細(xì)代碼示例供大家參考,需要的朋友可以參考下2024-07-07