Add volume to playback, and better Google ASR resource management

This commit is contained in:
Ruben van de Ven 2019-04-08 17:35:10 +02:00
parent cc03473361
commit 4498e31d0c
2 changed files with 24 additions and 10 deletions

View file

@ -58,13 +58,19 @@ class GoogleVoiceClient(object):
self.isRunning.set() self.isRunning.set()
def generator(self): def generator(self):
print('start generator')
while not self.toBeShutdown and self.isRunning.is_set(): while not self.toBeShutdown and self.isRunning.is_set():
try: try:
# set a timeout, as not to wait infinitely for the buffer when # set a timeout, as not to wait infinitely for the buffer when
# we actually want to restart # we actually want to restart
yield self.buffer.get(timeout=.2) yield self.buffer.get(timeout=.3)
except queue.Empty as e: except queue.Empty as e:
self.logger.debug('empty mic buffer - restart?') self.logger.debug('empty mic buffer - restart?')
print(self.isRunning.isSet())
print('stop generator')
self.restart = False # don't trigger double restart
# raise RequireRestart("Restart required (generator)")
def setLanguage(self, language_code): def setLanguage(self, language_code):
if self.language_code == language_code: if self.language_code == language_code:
@ -77,20 +83,21 @@ class GoogleVoiceClient(object):
def run(self): def run(self):
self.isRunning.set() self.isRunning.set()
self.speech_client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=self.src_rate,
language_code=self.language_code)
self.streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=True)
while not self.toBeShutdown: while not self.toBeShutdown:
try: try:
self.logger.info("wait for Google Voice") self.logger.info("wait for Google Voice")
self.isRunning.wait() self.isRunning.wait()
self.logger.info("Starting Google Voice") self.logger.info("Starting Google Voice")
self.speech_client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=self.src_rate,
language_code=self.language_code)
self.streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=True)
audio_generator = self.generator() audio_generator = self.generator()
requests = (types.StreamingRecognizeRequest(audio_content=content) requests = (types.StreamingRecognizeRequest(audio_content=content)
@ -137,6 +144,8 @@ class GoogleVoiceClient(object):
self.logger.info(f"Text: {transcript}") self.logger.info(f"Text: {transcript}")
self.lastNonFinalTranscript = None self.lastNonFinalTranscript = None
if result.is_final:
print("native final")
msg = { msg = {
"event": "speech", "event": "speech",
"is_final": result.is_final, "is_final": result.is_final,
@ -153,6 +162,7 @@ class GoogleVoiceClient(object):
self.logger.warn("Stopping voice loop") self.logger.warn("Stopping voice loop")
break break
except RequireRestart as e: except RequireRestart as e:
self.restart = False
self.logger.warn("Restart Google Voice. Language: {}".format(self.language_code)) self.logger.warn("Restart Google Voice. Language: {}".format(self.language_code))
except Exception as e: except Exception as e:
self.logger.critical(f"Crashed Google Voice: {e}") self.logger.critical(f"Crashed Google Voice: {e}")

View file

@ -66,6 +66,9 @@ class Message(object):
msg.setStory(story) msg.setStory(story)
if 'params' in data: if 'params' in data:
msg.params = data['params'] msg.params = data['params']
if not 'vol' in msg.params:
# prevent clipping on some Lyrebird tracks
msg.params['vol'] = .8
return msg return msg
def parseForVariables(self): def parseForVariables(self):
@ -191,7 +194,8 @@ class Reply(object):
now = self.forMessage.story.timer.getElapsed() now = self.forMessage.story.timer.getElapsed()
diff = now - u.lastUpdate diff = now - u.lastUpdate
if diff > 2: # time in seconds to force silence in utterance if diff > 5: # time in seconds to force silence in utterance
# useful for eg. 'hello', or 'no'
self.forMessage.story.logger.warn( self.forMessage.story.logger.warn(
f"Set finish time for utterance after {diff}s {u.text}" f"Set finish time for utterance after {diff}s {u.text}"
) )