hugvey/hugvey/speech/recorder.py

105 lines
3.2 KiB
Python

import pyaudio
import logging
import audioop
import wave
import time
import os
import chunk
from _struct import pack
from array import array
mainLogger = logging.getLogger("hugvey")
logger = mainLogger.getChild("recorder")
class Recorder:
"""
Record the streamed audio
"""
def __init__(self, hv_id, src_rate, out_folder):
if not os.path.exists(out_folder):
raise Exception(f"Invalid output folder for recordings: {out_folder}")
self.hv_id = hv_id
self.src_rate = src_rate
self.main_folder = out_folder # unfortunately not every device plays 16kHz audio streams
self.running = False
def start(self):
self.subsequentMutedFrames = 0
self.fragmentNr = 0
self.data = array('h')
self.currentTranscription = ""
t = time.strftime("%Y%m%d-%H%M")
self.out_folder = os.path.join(self.main_folder, f"{self.hv_id}", t)
if not os.path.exists(self.out_folder):
logger.debug(f"Create directory {self.out_folder}")
self.target_folder = os.makedirs(self.out_folder, exist_ok=True)
self.running = True
def writeData(self):
if len(self.data) < 1:
return
self.fragmentNr += 1
self.wf = wave.open(os.path.join(self.out_folder, f"{self.fragmentNr}.wav"), 'wb')
self.wf.setnchannels(1)
self.wf.setsampwidth(2)
self.wf.setframerate(self.src_rate)
# adapted from https://stackoverflow.com/questions/892199/detect-record-audio-in-python#6743593
self.wf.writeframes(pack('<' + ('h'*len(self.data)), *self.data))
self.wf.close()
with open(os.path.join(self.out_folder, "transcriptions.txt"), "a") as fp:
fp.write(f"{self.fragmentNr}\t{self.currentTranscription}\n")
self.log('-',self.currentTranscription)
self.data = array('h')
self.currentTranscription = ""
def receive(self, chunk):
if not self.running:
return
# logger.debug('receive {}'.format(len(chunk)))
if audioop.max(chunk, 2) == 0:
# mic is muted on client side.
self.subsequentMutedFrames += 1
logger.warn(f"Empty frame {self.subsequentMutedFrames}")
# self.logger.debug("Muted")
if self.subsequentMutedFrames == 4:
logger.warn("Create new wave!")
# self.createWave()
self.writeData()
if self.subsequentMutedFrames > 4:
# Don't write more muted frames to the audio file.
return
else:
self.subsequentMutedFrames = 0
d = array('h', chunk)
self.data.extend(d)
# self.wf.writeframes(chunk)
def shutdown(self):
self.writeData()
self.running = False
# self.wf.close()
def triggerStart(self):
self.start()
def updateTranscription(self, text):
self.currentTranscription = text
def log(self, origin, msg):
with open(os.path.join(self.out_folder, "log.txt"), "a") as fp:
fp.write(f"{origin}: {msg}\n")