Fix #27 and fix #17 - Transcriptions and recordings - two birds, one stone

This commit is contained in:
Ruben van de Ven 2019-04-10 10:13:35 +02:00
parent 642571d391
commit 5270994fcf
7 changed files with 135 additions and 2 deletions

View file

@ -22,6 +22,7 @@ import queue
import threading import threading
from hugvey.voice import VoiceStorage from hugvey.voice import VoiceStorage
import multiprocessing import multiprocessing
from hugvey.speech.recorder import Recorder
mainLogger = logging.getLogger("hugvey") mainLogger = logging.getLogger("hugvey")
@ -314,6 +315,7 @@ class HugveyState(object):
self.streamer = None self.streamer = None
self.status = self.STATE_PAUSE self.status = self.STATE_PAUSE
self.google = None self.google = None
self.recorder = None
self.notShuttingDown = True # TODO: allow shutdown of object self.notShuttingDown = True # TODO: allow shutdown of object
self.startMsgId = None self.startMsgId = None
self.eventLogger = eventLogger.getChild(f"{self.id}") self.eventLogger = eventLogger.getChild(f"{self.id}")
@ -494,6 +496,10 @@ class HugveyState(object):
startMsgId = self.startMsgId startMsgId = self.startMsgId
self.startMsgId = None # use only once, reset before 'run' self.startMsgId = None # use only once, reset before 'run'
self.logger.warn(f"Starting from {startMsgId}") self.logger.warn(f"Starting from {startMsgId}")
if not self.streamer:
await asyncio.sleep(1)
self.streamer.triggerStart()
self.story.setStoryData(self.command.languages[self.language_code]) self.story.setStoryData(self.command.languages[self.language_code])
await self.story.run(startMsgId) await self.story.run(startMsgId)
# self.story = None # self.story = None
@ -510,6 +516,12 @@ class HugveyState(object):
self.player = Player( self.player = Player(
self.command.config['voice']['src_rate'], self.command.config['voice']['out_rate']) self.command.config['voice']['src_rate'], self.command.config['voice']['out_rate'])
self.streamer.addConsumer(self.player) self.streamer.addConsumer(self.player)
if self.command.config['voice']['record_dir']:
self.logger.warn("Record Audio of conversation")
self.recorder = Recorder( self.id,
self.command.config['voice']['src_rate'], self.command.config['voice']['record_dir'])
self.streamer.addConsumer(self.recorder)
self.logger.debug("Start Speech") self.logger.debug("Start Speech")
self.google = GoogleVoiceClient( self.google = GoogleVoiceClient(

View file

@ -209,6 +209,9 @@ class GoogleVoiceClient(object):
def shutdown(self): def shutdown(self):
self.toBeShutdown = True self.toBeShutdown = True
def triggerStart(self):
pass

View file

@ -61,3 +61,6 @@ class Player:
self.stream.close() self.stream.close()
self.p.terminate() self.p.terminate()
def triggerStart(self):
pass

105
hugvey/speech/recorder.py Normal file
View file

@ -0,0 +1,105 @@
import pyaudio
import logging
import audioop
import wave
import time
import os
import chunk
from _struct import pack
from array import array
mainLogger = logging.getLogger("hugvey")
logger = mainLogger.getChild("recorder")
class Recorder:
"""
Record the streamed audio
"""
def __init__(self, hv_id, src_rate, out_folder):
if not os.path.exists(out_folder):
raise Exception(f"Invalid output folder for recordings: {out_folder}")
self.hv_id = hv_id
self.src_rate = src_rate
self.main_folder = out_folder # unfortunately not every device plays 16kHz audio streams
self.running = False
def start(self):
self.subsequentMutedFrames = 0
self.fragmentNr = 0
self.data = array('h')
self.currentTranscription = ""
t = time.strftime("%Y%m%d-%H%M")
self.out_folder = os.path.join(self.main_folder, f"{self.hv_id}", t)
if not os.path.exists(self.out_folder):
logger.debug(f"Create directory {self.out_folder}")
self.target_folder = os.makedirs(self.out_folder, exist_ok=True)
self.running = True
def writeData(self):
if len(self.data) < 1:
return
self.fragmentNr += 1
self.wf = wave.open(os.path.join(self.out_folder, f"{self.fragmentNr}.wav"), 'wb')
self.wf.setnchannels(1)
self.wf.setsampwidth(2)
self.wf.setframerate(self.src_rate)
# adapted from https://stackoverflow.com/questions/892199/detect-record-audio-in-python#6743593
self.wf.writeframes(pack('<' + ('h'*len(self.data)), *self.data))
self.wf.close()
with open(os.path.join(self.out_folder, "transcriptions.txt"), "a") as fp:
fp.write(f"{self.fragmentNr}\t{self.currentTranscription}\n")
self.log('-',self.currentTranscription)
self.data = array('h')
self.currentTranscription = ""
def receive(self, chunk):
if not self.running:
return
# logger.debug('receive {}'.format(len(chunk)))
if audioop.max(chunk, 2) == 0:
# mic is muted on client side.
self.subsequentMutedFrames += 1
logger.warn(f"Empty frame {self.subsequentMutedFrames}")
# self.logger.debug("Muted")
if self.subsequentMutedFrames == 4:
logger.warn("Create new wave!")
# self.createWave()
self.writeData()
if self.subsequentMutedFrames > 4:
# Don't write more muted frames to the audio file.
return
else:
self.subsequentMutedFrames = 0
d = array('h', chunk)
self.data.extend(d)
# self.wf.writeframes(chunk)
def shutdown(self):
self.writeData()
self.running = False
# self.wf.close()
def triggerStart(self):
self.start()
def updateTranscription(self, text):
self.currentTranscription = text
def log(self, origin, msg):
with open(os.path.join(self.out_folder, "log.txt"), "a") as fp:
fp.write(f"{origin}: {msg}\n")

View file

@ -60,4 +60,7 @@ class AudioStreamer(object):
for consumer in self.consumers: for consumer in self.consumers:
consumer.receive(chunk) consumer.receive(chunk)
def triggerStart(self):
# start a (new) run on the hugvey. Send it to the consumers that need it
for consumer in self.consumers:
consumer.triggerStart()

View file

@ -657,7 +657,7 @@ class Story(object):
# Register variables # Register variables
for msg in self.getMessages(): for msg in self.getMessages():
print(msg.id, msg.hasVariables()) # print(msg.id, msg.hasVariables())
if not msg.hasVariables(): if not msg.hasVariables():
continue continue
@ -794,6 +794,9 @@ class Story(object):
if e['is_final']: if e['is_final']:
utterance.setFinished(self.timer.getElapsed()) utterance.setFinished(self.timer.getElapsed())
self.hugvey.eventLogger.info("speaking: stop {}".format(id(utterance))) self.hugvey.eventLogger.info("speaking: stop {}".format(id(utterance)))
if self.hugvey.recorder:
self.hugvey.recorder.updateTranscription(self.currentReply.getText())
async def _processDirections(self, directions): async def _processDirections(self, directions):
@ -873,6 +876,7 @@ class Story(object):
'action': 'stop', 'action': 'stop',
'id': self.currentMessage.id, 'id': self.currentMessage.id,
}) })
message.uuid = shortuuid.uuid() message.uuid = shortuuid.uuid()
self.currentMessage = message self.currentMessage = message
@ -891,6 +895,8 @@ class Story(object):
message.id, message.text)) message.id, message.text))
self.addToLog(message) self.addToLog(message)
self.hugvey.eventLogger.info(f"message: {message.id} {message.uuid} start \"{message.text}\"") self.hugvey.eventLogger.info(f"message: {message.id} {message.uuid} start \"{message.text}\"")
if self.hugvey.recorder:
self.hugvey.recorder.log('h',message.text)
# TODO: prep events & timer etc. # TODO: prep events & timer etc.
fn = await message.getAudioFilePath() fn = await message.getAudioFilePath()

1
recordings/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
*