222 lines
8.4 KiB
Python
222 lines
8.4 KiB
Python
import json
|
|
import logging
|
|
import os
|
|
import pyaudio
|
|
import re
|
|
import socket
|
|
import os
|
|
from threading import Thread
|
|
|
|
from google.cloud.speech import enums
|
|
from google.cloud.speech import types
|
|
from google.auth.credentials import Credentials
|
|
|
|
from google.cloud import speech
|
|
import asyncio
|
|
import threading
|
|
import queue
|
|
import uuid
|
|
from hugvey.communication import LOG_BS
|
|
import audioop
|
|
|
|
mainLogger = logging.getLogger("hugvey")
|
|
logger = mainLogger.getChild("google")
|
|
|
|
class RequireRestart(Exception):
|
|
pass
|
|
|
|
class GoogleVoiceClient(object):
|
|
def __init__(self, hugvey, src_rate, credential_file, language_code = "en_GB"):
|
|
self.logger = mainLogger.getChild(f"{hugvey.id}").getChild('google')
|
|
self.src_rate = src_rate
|
|
self.hugvey = hugvey
|
|
self.language_code = language_code
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_file
|
|
|
|
# Create a thread-safe buffer of audio data
|
|
self.buffer = queue.Queue()
|
|
self.isRunning = threading.Event()
|
|
self.toBeShutdown = False
|
|
self.target_rate = 16000
|
|
self.cv_laststate = None
|
|
self.restart = False
|
|
|
|
|
|
self.task = threading.Thread(target=self.run, name=f"hugvey#{self.hugvey.id}v")
|
|
self.task.setDaemon(True)
|
|
self.task.start()
|
|
self.subsequentMutedFrames = 0
|
|
|
|
self.lastNonFinalTranscript = None
|
|
|
|
def pause(self):
|
|
self.isRunning.clear()
|
|
self.restart = True
|
|
|
|
def resume(self):
|
|
self.buffer = queue.Queue() # have a clear queue when resuming
|
|
self.isRunning.set()
|
|
|
|
def generator(self):
|
|
self.logger.debug('start generator')
|
|
while not self.toBeShutdown and self.isRunning.is_set():
|
|
try:
|
|
# set a timeout, as not to wait infinitely for the buffer when
|
|
# we actually want to restart
|
|
yield self.buffer.get(timeout=.3)
|
|
except queue.Empty as e:
|
|
self.logger.debug('empty mic buffer - restart?')
|
|
# print(self.isRunning.isSet())
|
|
self.logger.info('stop generator')
|
|
self.restart = False # don't trigger double restart
|
|
|
|
# raise RequireRestart("Restart required (generator)")
|
|
|
|
def setLanguage(self, language_code):
|
|
if self.language_code == language_code:
|
|
return
|
|
|
|
self.logger.info("Change language from {} to {}".format(self.language_code, language_code))
|
|
self.language_code = language_code
|
|
self.restart = True
|
|
|
|
def run(self):
|
|
self.isRunning.set()
|
|
|
|
self.speech_client = speech.SpeechClient()
|
|
config = types.RecognitionConfig(
|
|
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
|
sample_rate_hertz=self.src_rate,
|
|
language_code=self.language_code)
|
|
self.streaming_config = types.StreamingRecognitionConfig(
|
|
config=config,
|
|
interim_results=True)
|
|
|
|
while not self.toBeShutdown:
|
|
try:
|
|
self.logger.info("wait for Google Voice")
|
|
if not self.isRunning.wait(timeout=1):
|
|
continue # re-ceck toBeShutdown
|
|
self.logger.info("Starting Google Voice")
|
|
|
|
|
|
audio_generator = self.generator()
|
|
requests = (types.StreamingRecognizeRequest(audio_content=content)
|
|
for content in audio_generator)
|
|
responses = self.speech_client.streaming_recognize(
|
|
self.streaming_config, requests)
|
|
|
|
self.logger.info("Starting voice loop")
|
|
for response in responses:
|
|
if not response.results:
|
|
self.logger.debug('...')
|
|
continue
|
|
"""Iterates through server responses and prints them.
|
|
|
|
The responses passed is a generator that will block until a response
|
|
is provided by the server.
|
|
|
|
Each response may contain multiple results, and each result may contain
|
|
multiple alternatives; for details, see https://goo.gl/tjCPAU. Here we
|
|
print only the transcription for the top alternative of the top result.
|
|
|
|
In this case, responses are provided for interim results as well. If the
|
|
response is an interim one, print a line feed at the end of it, to allow
|
|
the next result to overwrite it, until the response is a final one. For the
|
|
final one, print a newline to preserve the finalized transcription.
|
|
"""
|
|
|
|
# The `results` list is consecutive. For streaming, we only care about
|
|
# the first result being considered, since once it's `is_final`, it
|
|
# moves on to considering the next utterance.
|
|
result = response.results[0]
|
|
if not result.alternatives:
|
|
continue
|
|
|
|
# Display the transcription of the top alternative.
|
|
transcript = result.alternatives[0].transcript
|
|
|
|
# self.logger.debug("Text: ".format(transcript))
|
|
|
|
if not result.is_final:
|
|
self.logger.debug(f"Text: {transcript}")
|
|
self.lastNonFinalTranscript = transcript
|
|
else:
|
|
self.logger.info(f"Text: {transcript}")
|
|
self.lastNonFinalTranscript = None
|
|
|
|
if result.is_final:
|
|
self.logger.info("native final")
|
|
msg = {
|
|
"event": "speech",
|
|
"is_final": result.is_final,
|
|
"transcript": transcript.strip(),
|
|
}
|
|
|
|
self.hugvey.queueEvent(msg)
|
|
|
|
if self.restart:
|
|
self.restart = False
|
|
raise RequireRestart("Restart required")
|
|
|
|
if self.toBeShutdown:
|
|
self.logger.warn("Stopping voice loop")
|
|
break
|
|
except RequireRestart as e:
|
|
self.restart = False
|
|
self.logger.warn("Restart Google Voice. Language: {}".format(self.language_code))
|
|
except Exception as e:
|
|
self.logger.critical(f"Crashed Google Voice: {e}")
|
|
|
|
# make sure we always send a 'final' transcript.
|
|
if self.lastNonFinalTranscript is not None:
|
|
msg = {
|
|
"event": "speech",
|
|
"is_final": True,
|
|
"transcript": self.lastNonFinalTranscript.strip(),
|
|
}
|
|
self.hugvey.queueEvent(msg)
|
|
self.logger.warn("Stop google run()") # finish means wrapping of hugvey#3v thread
|
|
|
|
|
|
def receive(self, chunk):
|
|
if not self.task.isAlive():
|
|
raise Exception("Voice thread died")
|
|
|
|
|
|
if audioop.max(chunk, 2) == 0:
|
|
# mic is muted on client side.
|
|
self.subsequentMutedFrames += 1
|
|
# self.logger.debug("Muted")
|
|
if self.subsequentMutedFrames > 4 and self.isRunning.is_set():
|
|
self.logger.info("Pause muted stream!")
|
|
self.pause()
|
|
return
|
|
|
|
self.subsequentMutedFrames = 0
|
|
# self.logger.debug("We have mic!")
|
|
if not self.isRunning.is_set():
|
|
self.logger.info("Resume voice")
|
|
self.resume()
|
|
|
|
|
|
if not self.isRunning.is_set():
|
|
# logger.log(LOG_BS, "Don't put to queue if google is paused")
|
|
return
|
|
|
|
if self.src_rate == self.target_rate:
|
|
data = chunk
|
|
else:
|
|
data, self.cv_laststate = audioop.ratecv(chunk, 2, 1, self.src_rate, self.target_rate, self.cv_laststate)
|
|
self.buffer.put_nowait(data)
|
|
|
|
def shutdown(self):
|
|
self.toBeShutdown = True
|
|
self.hugvey = None
|
|
|
|
def triggerStart(self):
|
|
pass
|
|
|
|
def __del__(self):
|
|
self.logger.warn("Destroyed google object")
|
|
|