diff --git a/hugvey/central_command.py b/hugvey/central_command.py index ab5707c..eaebaa8 100644 --- a/hugvey/central_command.py +++ b/hugvey/central_command.py @@ -62,6 +62,7 @@ class CentralCommand(object): self.hugveyLock = asyncio.Lock() self.start_time = time.time() self.languageFiles = {} + self.languageConfig = {} self.args = args # cli args eventLogger.addHandler(logging.handlers.QueueHandler(self.logQueue)) @@ -84,7 +85,7 @@ class CentralCommand(object): voice_dir = os.path.join(self.config['web']['files_dir'], 'voices') - self.voiceStorage = VoiceStorage(voice_dir, self.config['voice']['token']) + self.voiceStorage = VoiceStorage(voice_dir, self.languageConfig) self.panopticon = Panopticon(self, self.config, self.voiceStorage) @@ -96,6 +97,7 @@ class CentralCommand(object): for lang in self.config['languages']: lang_filename = os.path.join(self.config['web']['files_dir'], lang['file']) self.languageFiles[lang['code']] = lang['file'] + self.languageConfig[lang['code']] = lang with open(lang_filename, 'r') as fp: self.languages[lang['code']] = json.load(fp) @@ -246,7 +248,8 @@ class CentralCommand(object): r = await s.recv_json() isVariable = bool(r['variable']) text = r['text'] - fn = await self.voiceStorage.requestFile(text, isVariable) + hv = self.hugveys[hugvey_id] #: :type hv: HugveyState + fn = await self.voiceStorage.requestFile(hv.language_code, text, isVariable) if fn is None: eventLogger.getChild(f"{hugvey_id}").critical("error: No voice file fetched, check logs.") fn = 'local/crash.wav' diff --git a/hugvey/panopticon.py b/hugvey/panopticon.py index 414b2df..1ced204 100644 --- a/hugvey/panopticon.py +++ b/hugvey/panopticon.py @@ -181,9 +181,10 @@ def getVoiceHandler(voiceStorage): async def get(self): # TODO: we should be using ZMQ here... text = self.get_argument('text') + lang_code = self.get_argument('lang') isVariable = True if int(self.get_argument('variable')) >0 else False # TODO: make zmq socket request/reply pattern: - fn = await voiceStorage.requestFile(text, isVariable) + fn = await voiceStorage.requestFile(lang_code, text, isVariable) if not fn: raise Exception(f"No Filename for text: {text}") diff --git a/hugvey/voice.py b/hugvey/voice.py index 2da33b8..c13338b 100644 --- a/hugvey/voice.py +++ b/hugvey/voice.py @@ -15,33 +15,38 @@ class VoiceStorage(object): """ Store & keep voices that are not part of the story json """ - def __init__(self, cache_dir, token): + def __init__(self, cache_dir, languageConfig): self.cache_dir = cache_dir if not os.path.exists(self.cache_dir): raise Exception(f"Cache dir does not exists: {self.cache_dir}") # self.request_session = AsyncSession(n=5) self.pendingRequests = {} - self.token = token + self.languages = languageConfig + self.fetchers = {} + + for lang in self.languages: + cls = VoiceFetcher.getClass(self.languages[lang]['type']) + self.fetchers[lang] = cls(self.languages[lang]) - def getId(self, text): + def getId(self, lang_code, text): """ Get a unique id based on text and the voice token. So changing the voice or text triggers a re-download. """ - return sha1((self.token + ':' + text).encode()).hexdigest() + return sha1((f"{lang_code}:{self.languages[lang_code]['token']}:{text}").encode()).hexdigest() - def getFilename(self, text, isVariable=False): + def getFilename(self, lang_code, text, isVariable=False): subdir = 'static' if not isVariable else 'variable' - id = self.getId(text) + id = self.getId(lang_code, text) prefix = id[:2] - storageDir = os.path.join(self.cache_dir, subdir, prefix) + storageDir = os.path.join(self.cache_dir, lang_code, subdir, prefix) fn = os.path.join(storageDir, f"{id}.wav") return fn - async def requestFile(self, text, isVariable=False) -> str: - id = self.getId(text) - fn = self.getFilename(text) + async def requestFile(self, lang_code, text, isVariable=False) -> str: + id = self.getId(lang_code, text) + fn = self.getFilename(lang_code, text, isVariable) if os.path.exists(fn): return fn @@ -59,32 +64,120 @@ class VoiceStorage(object): self.pendingRequests[id] = asyncio.Event() + + try: + contents = await self.fetchers[lang_code].requestVoiceFile(text) + except Exception as e: + logger.exception(e) + self.pendingRequests[id].set() + return None + + with open(fn, "wb") as f: + logger.debug(f"Write file for {lang_code}: {text}") + f.write(contents) + self.pendingRequests[id].set() +# print(type(fn), fn) + + return fn + +class VoiceFetcher(): + def __init__(self, config): + self.config = config + + async def requestVoiceFile(self, text): + pass + + @classmethod + def getClass(cls, type): + if type == "lyrebird": + return LyrebirdVoiceFetcher + if type == "ms": + return MSVoiceFetcher + raise Exception(f"Unknown voice type: {type}") + +class LyrebirdVoiceFetcher(VoiceFetcher): + async def requestVoiceFile(self, text): http_client = AsyncHTTPClient() request = HTTPRequest( method="POST", url="https://avatar.lyrebird.ai/api/v0/generate", body=json.dumps({"text": text}), - headers={"authorization": f"Bearer {self.token}"} + headers={"authorization": f"Bearer {self.config['token']}"} ) try: response = await http_client.fetch(request) except Exception as e: - logger.exception(e) - logger.critical(request) - self.pendingRequests[id].set() http_client.close() - return None - else: - if response.code != 200: - logger.critical(f"No proper response! {response.code}") - self.pendingRequests[id].set() + raise e + + if response.code != 200: + raise Exception(f"No proper response! {response.code}") + + return response.body + +class MSVoiceFetcher(VoiceFetcher): + def __init__(self, config): + self.config = config + self.timer = 0 + self.access_token = None + + async def getToken(self): + now = time.time() + if now - self.timer > 8 * 60: # token expires after 10 min. Use 8 to be sure + headers = { + 'Ocp-Apim-Subscription-Key': self.config['token'] + } + http_client = AsyncHTTPClient() + request = HTTPRequest( + method="POST", + url=self.config['token_url'], + headers=headers, + allow_nonstandard_methods=True + ) + print(request.method, request.url, request.headers) + try: + response = await http_client.fetch(request) + except Exception as e: http_client.close() - return None - -# logger.debug(f"Wrote body: {response.code}") - with open(fn, "wb") as f: - f.write(response.body) - self.pendingRequests[id].set() -# print(type(fn), fn) + raise e + self.access_token = response.body.decode() + self.timer = time.time() http_client.close() - return fn + + return self.access_token + + async def requestVoiceFile(self, text): + + print(self.config['voice_url']) + headers = { + 'Authorization': 'Bearer ' + await self.getToken(), + 'Content-Type': 'application/ssml+xml', + 'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm', +# 'User-Agent': 'YOUR_RESOURCE_NAME' + } + body = f""" + {text} +""" + print(headers, body) + http_client = AsyncHTTPClient() + request = HTTPRequest( + method="POST", + url=self.config['voice_url'], + headers=headers, + body=body + ) + try: + response = await http_client.fetch(request) + except Exception as e: + http_client.close() + raise e + + http_client.close() + + if response.code != 200: + raise Exception(f"No proper response! {response.code}") + + return response.body + + \ No newline at end of file diff --git a/www/js/hugvey_console.js b/www/js/hugvey_console.js index 28bc1f0..a7d0322 100644 --- a/www/js/hugvey_console.js +++ b/www/js/hugvey_console.js @@ -384,7 +384,8 @@ class Graph { getAudioUrlForMsg(msg) { let isVariable = msg['text'].includes('$') ? '1' : '0'; - return `http://localhost:8888/voice?text=${encodeURIComponent(msg['text'])}&variable=${isVariable}&filename=0`; + let lang = panopticon.graph.language_code; + return `http://localhost:8888/voice?text=${encodeURIComponent(msg['text'])}&variable=${isVariable}&lang=${lang}&filename=0`; } getNumericId(prefix) {