From 4498e31d0c79bee1e8f61fee9405ba28aef02204 Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Mon, 8 Apr 2019 17:35:10 +0200 Subject: [PATCH] Add volume to playback, and better Google ASR resource management --- hugvey/speech/google.py | 28 +++++++++++++++++++--------- hugvey/story.py | 6 +++++- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/hugvey/speech/google.py b/hugvey/speech/google.py index 09995e2..ccfe417 100644 --- a/hugvey/speech/google.py +++ b/hugvey/speech/google.py @@ -58,13 +58,19 @@ class GoogleVoiceClient(object): self.isRunning.set() def generator(self): + print('start generator') while not self.toBeShutdown and self.isRunning.is_set(): try: # set a timeout, as not to wait infinitely for the buffer when # we actually want to restart - yield self.buffer.get(timeout=.2) + yield self.buffer.get(timeout=.3) except queue.Empty as e: self.logger.debug('empty mic buffer - restart?') + print(self.isRunning.isSet()) + print('stop generator') + self.restart = False # don't trigger double restart + +# raise RequireRestart("Restart required (generator)") def setLanguage(self, language_code): if self.language_code == language_code: @@ -77,20 +83,21 @@ class GoogleVoiceClient(object): def run(self): self.isRunning.set() + self.speech_client = speech.SpeechClient() + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=self.src_rate, + language_code=self.language_code) + self.streaming_config = types.StreamingRecognitionConfig( + config=config, + interim_results=True) + while not self.toBeShutdown: try: self.logger.info("wait for Google Voice") self.isRunning.wait() self.logger.info("Starting Google Voice") - self.speech_client = speech.SpeechClient() - config = types.RecognitionConfig( - encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, - sample_rate_hertz=self.src_rate, - language_code=self.language_code) - self.streaming_config = types.StreamingRecognitionConfig( - config=config, - interim_results=True) audio_generator = self.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) @@ -137,6 +144,8 @@ class GoogleVoiceClient(object): self.logger.info(f"Text: {transcript}") self.lastNonFinalTranscript = None + if result.is_final: + print("native final") msg = { "event": "speech", "is_final": result.is_final, @@ -153,6 +162,7 @@ class GoogleVoiceClient(object): self.logger.warn("Stopping voice loop") break except RequireRestart as e: + self.restart = False self.logger.warn("Restart Google Voice. Language: {}".format(self.language_code)) except Exception as e: self.logger.critical(f"Crashed Google Voice: {e}") diff --git a/hugvey/story.py b/hugvey/story.py index 3a2002b..fbc7baf 100644 --- a/hugvey/story.py +++ b/hugvey/story.py @@ -66,6 +66,9 @@ class Message(object): msg.setStory(story) if 'params' in data: msg.params = data['params'] + if not 'vol' in msg.params: + # prevent clipping on some Lyrebird tracks + msg.params['vol'] = .8 return msg def parseForVariables(self): @@ -191,7 +194,8 @@ class Reply(object): now = self.forMessage.story.timer.getElapsed() diff = now - u.lastUpdate - if diff > 2: # time in seconds to force silence in utterance + if diff > 5: # time in seconds to force silence in utterance + # useful for eg. 'hello', or 'no' self.forMessage.story.logger.warn( f"Set finish time for utterance after {diff}s {u.text}" )