Add Microsoft

2019-04-09 09:40:50 +02:00 · 2019-04-09 09:40:50 +02:00 · 9ef7195019
commit 9ef7195019
parent 4498e31d0c
4 changed files with 129 additions and 31 deletions
--- a/hugvey/central_command.py
+++ b/hugvey/central_command.py
@ -62,6 +62,7 @@ class CentralCommand(object):
        self.hugveyLock = asyncio.Lock()
        self.start_time = time.time()
        self.languageFiles = {}
+        self.languageConfig = {}
        self.args = args # cli args
        
        eventLogger.addHandler(logging.handlers.QueueHandler(self.logQueue))
@ -84,7 +85,7 @@ class CentralCommand(object):
        
        
        voice_dir = os.path.join(self.config['web']['files_dir'], 'voices')
-        self.voiceStorage = VoiceStorage(voice_dir, self.config['voice']['token'])
+        self.voiceStorage = VoiceStorage(voice_dir, self.languageConfig)
        
        self.panopticon = Panopticon(self, self.config, self.voiceStorage)
        
@ -96,6 +97,7 @@ class CentralCommand(object):
        for lang in self.config['languages']:
            lang_filename = os.path.join(self.config['web']['files_dir'], lang['file'])
            self.languageFiles[lang['code']] = lang['file']
+            self.languageConfig[lang['code']] = lang
            with open(lang_filename, 'r') as fp:
                self.languages[lang['code']] = json.load(fp)

@ -246,7 +248,8 @@ class CentralCommand(object):
                r = await s.recv_json()
                isVariable = bool(r['variable'])
                text = r['text']
-                fn = await self.voiceStorage.requestFile(text, isVariable)
+                hv = self.hugveys[hugvey_id] #: :type hv: HugveyState
+                fn = await self.voiceStorage.requestFile(hv.language_code, text, isVariable)
                if fn is None:
                    eventLogger.getChild(f"{hugvey_id}").critical("error: No voice file fetched, check logs.")
                    fn = 'local/crash.wav' 
--- a/hugvey/panopticon.py
+++ b/hugvey/panopticon.py
@ -181,9 +181,10 @@ def getVoiceHandler(voiceStorage):
        async def get(self):
            # TODO: we should be using ZMQ here...
            text = self.get_argument('text')
+            lang_code = self.get_argument('lang')
            isVariable = True if int(self.get_argument('variable')) >0 else False
            # TODO: make zmq socket request/reply pattern:
-            fn = await voiceStorage.requestFile(text, isVariable)
+            fn = await voiceStorage.requestFile(lang_code, text, isVariable)
            if not fn:
                raise Exception(f"No Filename for text: {text}")
            
--- a/hugvey/voice.py
+++ b/hugvey/voice.py
@ -15,33 +15,38 @@ class VoiceStorage(object):
    """
    Store & keep voices that are not part of the story json
    """
-    def __init__(self, cache_dir, token):
+    def __init__(self, cache_dir, languageConfig):
        self.cache_dir = cache_dir
        if not os.path.exists(self.cache_dir):
            raise Exception(f"Cache dir does not exists: {self.cache_dir}")
 #         self.request_session = AsyncSession(n=5)
        self.pendingRequests = {}
-        self.token = token
+        self.languages = languageConfig
+        self.fetchers = {}
+        
+        for lang in self.languages:
+            cls = VoiceFetcher.getClass(self.languages[lang]['type'])
+            self.fetchers[lang] = cls(self.languages[lang])
    
-    def getId(self, text):
+    def getId(self, lang_code, text):
        """
        Get a unique id based on text and the voice token.
        
        So changing the voice or text triggers a re-download.
        """
-        return sha1((self.token + ':' + text).encode()).hexdigest()
+        return sha1((f"{lang_code}:{self.languages[lang_code]['token']}:{text}").encode()).hexdigest()
    
-    def getFilename(self, text, isVariable=False):
+    def getFilename(self, lang_code, text, isVariable=False):
        subdir = 'static' if not isVariable else 'variable'
-        id = self.getId(text)
+        id = self.getId(lang_code, text)
        prefix = id[:2]
-        storageDir = os.path.join(self.cache_dir, subdir, prefix)
+        storageDir = os.path.join(self.cache_dir, lang_code, subdir, prefix)
        fn = os.path.join(storageDir, f"{id}.wav")
        return fn
    
-    async def requestFile(self, text, isVariable=False) -> str:
-        id = self.getId(text)
-        fn = self.getFilename(text)
+    async def requestFile(self, lang_code, text, isVariable=False) -> str:
+        id = self.getId(lang_code, text)
+        fn = self.getFilename(lang_code, text, isVariable)
        
        if os.path.exists(fn):
            return fn
@ -59,32 +64,120 @@ class VoiceStorage(object):
            
        self.pendingRequests[id] = asyncio.Event()
        
+        
+        try:
+            contents = await self.fetchers[lang_code].requestVoiceFile(text)
+        except Exception as e:
+            logger.exception(e)
+            self.pendingRequests[id].set()
+            return None
+        
+        with open(fn, "wb") as f:
+            logger.debug(f"Write file for {lang_code}: {text}")
+            f.write(contents)
+        self.pendingRequests[id].set()
+#             print(type(fn), fn)
+            
+        return fn
+        
+class VoiceFetcher():
+    def __init__(self, config):
+        self.config = config
+    
+    async def requestVoiceFile(self, text):
+        pass
+    
+    @classmethod
+    def getClass(cls, type):
+        if type == "lyrebird":
+            return LyrebirdVoiceFetcher
+        if type == "ms":
+            return MSVoiceFetcher
+        raise Exception(f"Unknown voice type: {type}")
+        
+class LyrebirdVoiceFetcher(VoiceFetcher):
+    async def requestVoiceFile(self, text):
        http_client = AsyncHTTPClient()
        request = HTTPRequest(
            method="POST",
            url="https://avatar.lyrebird.ai/api/v0/generate",
            body=json.dumps({"text": text}),
-            headers={"authorization": f"Bearer {self.token}"}
+            headers={"authorization": f"Bearer {self.config['token']}"}
        )
        try:
            response = await http_client.fetch(request)
        except Exception as e:
-            logger.exception(e)
-            logger.critical(request)
-            self.pendingRequests[id].set()
            http_client.close()
-            return None
-        else:
-            if response.code != 200:
-                logger.critical(f"No proper response! {response.code}")
-                self.pendingRequests[id].set()
+            raise e
+
+        if response.code != 200:
+            raise Exception(f"No proper response! {response.code}")
+        
+        return response.body
+
+class MSVoiceFetcher(VoiceFetcher):
+    def __init__(self, config):
+        self.config = config
+        self.timer = 0
+        self.access_token = None
+        
+    async def getToken(self):
+        now = time.time()
+        if now - self.timer > 8 * 60: # token expires after 10 min. Use 8 to be sure
+            headers = {
+                'Ocp-Apim-Subscription-Key': self.config['token']
+            }
+            http_client = AsyncHTTPClient()
+            request = HTTPRequest(
+                method="POST",
+                url=self.config['token_url'],
+                headers=headers,
+                allow_nonstandard_methods=True
+            )
+            print(request.method, request.url, request.headers)
+            try:
+                response = await http_client.fetch(request)
+            except Exception as e:
                http_client.close()
-                return None
-            
-#             logger.debug(f"Wrote body: {response.code}")
-            with open(fn, "wb") as f:
-                f.write(response.body)
-            self.pendingRequests[id].set()
-#             print(type(fn), fn)
+                raise e
+            self.access_token = response.body.decode()
+            self.timer = time.time()
            http_client.close()
-            return fn
+            
+        return self.access_token
+        
+    async def requestVoiceFile(self, text):
+        
+        print(self.config['voice_url'])
+        headers = {
+            'Authorization': 'Bearer ' + await self.getToken(),
+            'Content-Type': 'application/ssml+xml',
+            'X-Microsoft-OutputFormat': 'riff-24khz-16bit-mono-pcm',
+#             'User-Agent': 'YOUR_RESOURCE_NAME'
+        }
+        body = f"""<speak version='1.0' xml:lang='{self.config['ms_lang']}'><voice xml:lang='{self.config['ms_lang']}' xml:gender='{self.config['ms_gender']}'
+    name='{self.config['ms_name']}'>
+        {text}
+</voice></speak>"""
+        print(headers, body)
+        http_client = AsyncHTTPClient()
+        request = HTTPRequest(
+                method="POST",
+                url=self.config['voice_url'],
+                headers=headers,
+                body=body
+            )
+        try:
+            response = await http_client.fetch(request)
+        except Exception as e:
+            http_client.close()
+            raise e
+        
+        http_client.close()
+        
+        if response.code != 200:
+            raise Exception(f"No proper response! {response.code}")
+        
+        return response.body
+    
+    
--- a/www/js/hugvey_console.js
+++ b/www/js/hugvey_console.js
@ -384,7 +384,8 @@ class Graph {
    
    getAudioUrlForMsg(msg) {
    	let isVariable = msg['text'].includes('$') ? '1' : '0';
-        return `http://localhost:8888/voice?text=${encodeURIComponent(msg['text'])}&variable=${isVariable}&filename=0`;
+    	let lang = panopticon.graph.language_code;
+        return `http://localhost:8888/voice?text=${encodeURIComponent(msg['text'])}&variable=${isVariable}&lang=${lang}&filename=0`;
    }
    
    getNumericId(prefix) {