diff --git a/hugvey/speech/google.py b/hugvey/speech/google.py index 79bf177..09995e2 100644 --- a/hugvey/speech/google.py +++ b/hugvey/speech/google.py @@ -17,6 +17,7 @@ import threading import queue import uuid from hugvey.communication import LOG_BS +import audioop mainLogger = logging.getLogger("hugvey") logger = mainLogger.getChild("speech") @@ -44,6 +45,9 @@ class GoogleVoiceClient(object): self.task = threading.Thread(target=self.run, name=f"hugvey#{self.hugvey.id}v") self.task.setDaemon(True) self.task.start() + self.subsequentMutedFrames = 0 + + self.lastNonFinalTranscript = None def pause(self): self.isRunning.clear() @@ -55,7 +59,12 @@ class GoogleVoiceClient(object): def generator(self): while not self.toBeShutdown and self.isRunning.is_set(): - yield self.buffer.get() + try: + # set a timeout, as not to wait infinitely for the buffer when + # we actually want to restart + yield self.buffer.get(timeout=.2) + except queue.Empty as e: + self.logger.debug('empty mic buffer - restart?') def setLanguage(self, language_code): if self.language_code == language_code: @@ -68,9 +77,9 @@ class GoogleVoiceClient(object): def run(self): self.isRunning.set() - while not self.toBeShutdown: try: + self.logger.info("wait for Google Voice") self.isRunning.wait() self.logger.info("Starting Google Voice") @@ -92,6 +101,7 @@ class GoogleVoiceClient(object): self.logger.info("Starting voice loop") for response in responses: if not response.results: + self.logger.debug('...') continue """Iterates through server responses and prints them. @@ -122,8 +132,10 @@ class GoogleVoiceClient(object): if not result.is_final: self.logger.debug(f"Text: {transcript}") + self.lastNonFinalTranscript = transcript else: self.logger.info(f"Text: {transcript}") + self.lastNonFinalTranscript = None msg = { "event": "speech", @@ -144,12 +156,37 @@ class GoogleVoiceClient(object): self.logger.warn("Restart Google Voice. Language: {}".format(self.language_code)) except Exception as e: self.logger.critical(f"Crashed Google Voice: {e}") + + # make sure we always send a 'final' transcript. + if self.lastNonFinalTranscript is not None: + msg = { + "event": "speech", + "is_final": True, + "transcript": self.lastNonFinalTranscript.strip(), + } + self.hugvey.queueEvent(msg) def receive(self, chunk): if not self.task.isAlive(): raise Exception("Voice thread died") + + if audioop.max(chunk, 2) == 0: + # mic is muted on client side. + self.subsequentMutedFrames += 1 +# self.logger.debug("Muted") + if self.subsequentMutedFrames > 4 and self.isRunning.is_set(): + self.logger.warn("Pause muted stream!") + self.pause() + return + +# self.logger.debug("We have mic!") + if not self.isRunning.is_set(): + self.logger.warn("Resume voice") + self.resume() + + if not self.isRunning.is_set(): # logger.log(LOG_BS, "Don't put to queue if google is paused") return diff --git a/hugvey/story.py b/hugvey/story.py index 1570248..bbafac1 100644 --- a/hugvey/story.py +++ b/hugvey/story.py @@ -593,8 +593,8 @@ class Story(object): obj = className.initFromJson(el, self) self.add(obj) - self.logger.debug(self.elements) - self.logger.debug(self.directionsPerMsg) +# self.logger.debug(self.elements) +# self.logger.debug(self.directionsPerMsg) self.diversions = [el for el in self.elements.values() if type(el) == Diversion]