hugvey/hugvey/tools.py

import logging
import yaml
import os
import json
from hugvey.voice import VoiceStorage
import re
import csv
from operator import indexOf
import operator

logger = logging.getLogger('toolbox')

# From https://stackoverflow.com/a/1165552
class DictDiffer(object):
    """
    Calculate the difference between two dictionaries as:
    (1) items added
    (2) items removed
    (3) keys same in both but changed values
    (4) keys same in both and unchanged values
    """
    def __init__(self, current_dict, past_dict):
        self.current_dict, self.past_dict = current_dict, past_dict
        self.set_current, self.set_past = set(current_dict.keys()), set(past_dict.keys())
        self.intersect = self.set_current.intersection(self.set_past)
    def added(self):
        return self.set_current - self.intersect
    def removed(self):
        return self.set_past - self.intersect
    def changed(self):
        return set(o for o in self.intersect if self.past_dict[o] != self.current_dict[o])
    def unchanged(self):
        return set(o for o in self.intersect if self.past_dict[o] == self.current_dict[o])

class Toolbox:
    def __init__(self, configFile):
        self.languageFiles = {}
        self.languageConfig = {}

        with open(configFile, 'r') as fp:
            logger.debug('Load config from {}'.format(configFile))
            self.config = yaml.safe_load(fp)

        self.hugvey_ids = [i + 1 for i in range(self.config['hugveys'])]

        self.loadLanguages()


        voice_dir = os.path.join(self.config['web']['files_dir'], 'voices')
        self.voiceStorage = VoiceStorage(voice_dir, self.languageConfig)


    def loadLanguages(self):
        logger.debug('load language files')
        self.languages = {}

        for lang in self.config['languages']:
            lang_filename = os.path.join(self.config['web']['files_dir'], lang['file'])
            self.languageFiles[lang['code']] = lang['file']
            self.languageConfig[lang['code']] = lang
            with open(lang_filename, 'r') as fp:
                self.languages[lang['code']] = json.load(fp)

            if lang['token'] == 'LB_TOKEN' or lang['token'] == 'SECRET_KEY':
                raise Exception("Are you using the right config file? Language key not configured properly!")

    def get_audio_filenames(self):
        """
        Get all audio files as defined trough the config.
        """
        filenames = [
            'local/crash.wav'
            ]
        for langCode in self.languages:
            logger.info(f'lang {langCode}')
            msgs = [node for node in self.languages[langCode] if node['@type'] == 'Msg']
            for msg in msgs:
                if 'audio' in msg and msg['audio'] is not None:
                    filenames.append(msg['audio']['file'])
                    continue

                if '$' in msg['text']:
                    # skip variable texts
                    continue

                fn = self.voiceStorage.getFilename(langCode, msg['text'], False)
                filenames.append(fn)

        return filenames

    def get_existing_filesnames(self):
        existing_files = []
        for path, subdirs, files in os.walk(self.config['web']['files_dir']):
            for name in files:
                if name[-4:] == '.wav':
                    existing_files.append(os.path.join(path, name))
        return existing_files

    def clean_audio_files(self):
        needed_files = self.get_audio_filenames()
        existing_files = self.get_existing_filesnames()

#         if 'local/voices/en-GB/static/9c/9ce29fe21fa813cca9db94419947238f6f215da1.wav' in needed_files:
#             print("GOOO!")
#         else:
#             print('ojee')
#         exit()

        for fn in existing_files:
            if fn not in needed_files:
                logger.warn(f"Remove {fn}")
                os.unlink(fn)
            else:
                logger.debug(f"Keep {fn}")

        missingFiles = []
        for fn in needed_files:
            if fn not in existing_files:
                missingFiles.append(fn)
#                 logger.info(f"Missing {fn}")

        logger.info("{} files missing".format(len(missingFiles)))

    @classmethod
    def find_direction_for_condition(cls, conditionId, story):
        for i, item in enumerate(story):
            if item['@type'] == 'Direction':
                for dConditionId in item['conditions']:
                    if dConditionId == conditionId:
                        return item

    def fix_story_file(self, lang_code):
        if lang_code not in self.languages.keys():
            logger.critical("Invalid langauge code")
            logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
            return


        filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
        story = self.languages[lang_code]

        beginnings = [item for item in story if 'beginning' in item and item['beginning'] is True]
        if len(beginnings) < 1:
            logger.critical("No beginning set")
        if len(beginnings) > 1:
            beginningIds = [i['@id'] for i in beginnings]
            logger.warn(f"{len(beginnings)} beginning messages configured. Set only one of {beginningIds}")

        itemsPerId = {item['@id']: item for item in story}
        orphans = 0
        for i, item in enumerate(story):
            if item['@type'] == 'Direction':
                if type(item['source']) == dict:
                    logger.warn(f"Fixing broken direction {item['@id']}, please check if everything still works!")
                    validMsg = itemsPerId[item['source']['@id']]
                    diff = DictDiffer(item['source'], validMsg)
                    if diff.changed() or diff.added() or diff.removed():
                        logger.warn("Changes found between messages")
                        logger.warn(f"Changed: {list(diff.changed())} Keys that will be remove: {list(diff.added())} Keys that will be added: {list(diff.removed())}")
                        logger.info(f"Direction pointed to {item['source']}")
                        logger.info(f"Will now point to {validMsg}")
                    item['source'] = item['source']['@id']
                for conditionId in item['conditions']:
                    if conditionId not in itemsPerId:
                        logger.critical(f"Direction {item['@id']} refers to non-existing condition {conditionId}! (This will result in a crash when playing the message)")
            if item['@type'] == 'Condition':
                direction = self.find_direction_for_condition(item['@id'], story)
                if not direction:
                    orphans +=1
#                     This should be fine, but I don't dare to do it yet...
#                     logger.info("Clear residu condition {item['@id']} ... this is not properly done by the editor.")
#                     del story[i]
                    continue
                if item['type'] == 'messagePlayed':
                    msgId = item['vars']['msgId'].strip()
                    if msgId not in itemsPerId:
                        logger.warning(f"Message played condition for non-existing message {msgId} when going from {direction['source']} to {direction['target']}! (this will ignore the condition)")
                if item['type'] == 'replyContains':
                    if 'regex' in item['vars'] and len(item['vars']['regex'].rstrip()):
                        try:
                            re.compile(item['vars']['regex'].rstrip())
                        except Exception as e:
                            logger.critical(f"Invalid regex for condition {item['@id']}: {item['vars']['regex'].rstrip()}")
                            logger.exception(e)

        logger.debug( f"Can clear {orphans} orphaned conditions (uncomment code in tools.py)")

        with open(filename, 'w') as fp:
            json.dump(story, fp, indent=2)
            logger.info(f"Wrote to {filename}")

    csv_fieldnames = ['id','type','color','text','regex','to text','translation', 'regex_translation']

    def generate_story_csv(self, lang_code):
        if lang_code not in self.languages.keys():
            logger.critical("Invalid langauge code")
            logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
            return


        filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
        story = self.languages[lang_code]
        csv_filename = filename + '.csv'


        logger.info(f"Write csv of {lang_code} to {csv_filename}")
        with open(csv_filename, 'w', newline='') as fp:
            writer = csv.DictWriter(fp, fieldnames=self.csv_fieldnames)

            writer.writeheader()
            msgs = [node for node in story if node['@type'] == 'Msg']
            msgs = sorted(msgs, key=lambda m: m['color'] if 'color' in m else '')
            for msg in msgs:
                writer.writerow({'id': msg['@id'], 'type':'Msg','color':msg['color'] if 'color' in msg else '', 'text': msg['text']})

                directions = [subnode for subnode in story if subnode['@type'] == 'Direction' and subnode['source'] == msg['@id']]
                for direction in directions:
                    targetMsg = [subnode for subnode in story if subnode['@id'] == direction['target']][0]
                    for conditionId in direction['conditions']:
                        condition = [subnode for subnode in story if subnode['@id'] == conditionId][0]
                        if condition['type'] == 'replyContains' and len(condition['vars']['regex']) > 0:
                            text = condition['vars']['regex']
                        else:
                            continue

                        writer.writerow({'id': condition['@id'], 'type':'Condition', 'regex': text, 'to text': targetMsg['text']})

            diversions = [node for node in story if node['@type'] == 'Diversion' and node['type'] == 'reply_contains']
            for diversion in diversions:
                if len(diversion['params']['regex']) < 1:
                    continue

                writer.writerow({'id': diversion['@id'], 'type':'Diversion','regex': diversion['params']['regex']})

        logger.info(f"Done")

    def import_story_csv(self, lang_code, csv_filename):
        if lang_code not in self.languages.keys():
            logger.critical("Invalid langauge code")
            logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
            return


        filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
        story = self.languages[lang_code]

        logger.info(f"Writing translation from {csv_filename} to {filename}")


        with open(csv_filename, 'r') as fp:
            reader = csv.DictReader(fp)
            logger.info(reader.fieldnames)
            if 'id' not in reader.fieldnames or 'translation' not in reader.fieldnames or 'regex_translation' not in reader.fieldnames or 'text' not in reader.fieldnames:
                raise Exception("Not all required fieldnames are given in csv: id, translation, regex_translation")

            for row in reader:
                if not any(row.values()):
                    logger.info(f"Skipping empty row")
                    continue
                if not row['id']:
                    logger.critical(f"Skipping row without ID, but with data: {list(row.values())}")
                    continue

                try:
                    node = [node for node in story if node['@id'] == row['id']][0]
                except Exception as e:
                    logger.critical(f"Exception finding node id {row}")
                    logger.exception(e)
                    raise(e)

                if node['@type'] == 'Msg':
                    if len(row['translation']) < 1 and len(node['text']) > 0:
                        logger.warning(f"Skipping empty translation for message {node['@id']} \"{node['text']}\"")
                        continue
                    if 'label' not in node or (len(node['label']) < 1 and node['text'] == row['text']):
                        node['label'] = row['text'] # store original text as label for readability
                    node['text'] = row['translation']
                elif node['@type'] == 'Condition':
                    if len(row['regex_translation']) < 1 and len(node['vars']['regex']) > 0:
                        logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['vars']['regex']}\"")
                        continue

                    node['vars']['regex'] = row['regex_translation']
                elif node['@type'] == 'Diversion':
                    if len(row['regex_translation']) < 1 and len(node['params']['regex']) > 0:
                        logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['params']['regex']}\"")
                        continue

                    node['params']['regex'] = row['regex_translation']
                else:
                    raise Exception(f"Unknown type: {row}")

        with open(filename, 'w') as fp:
            json.dump(story, fp, indent=2)
            logger.info(f"Wrote to {filename}")

    def parse_cutelog(self, filename):
        with open(filename,'r') as fp:
            cutelog = json.load(fp);

        hugvey_ids = list(range(1,30))
        hugveys_stats = {}
        for id in hugvey_ids:
            print(f"HUGVEY {id}")
            log = [i for i in cutelog if 'name' in i and i['name'].startswith(f'hugvey.{id}.')]

            txts = [i for i in log if 'msg' in i and ((i['msg'].startswith('Text: ') and i['msg'] != "Text: ") or i['msg'].startswith('Current message') or i['msg'].startswith('ignore'))]
            last = None
            for txt in txts:
                if last:
                    if txt['msg'].startswith('Current'):
                        print('--------------------', txt['created'])
                    elif txt['msg'].startswith('ignore'):
                        print('/////////////////////', txt['created'])
                    else:
                        print(txt['created'] - last['created'], txt['msg'], txt['levelname']
                     )
                        last = txt
                else:
                    last = txt
            tC = [i for i in log if 'msg' in i and (i['msg'].startswith("Condition is met"))]
            tR = [i for i in log if 'msg' in i and (i['msg'].startswith("Received {'file"))]
            tP = [i for i in log if 'msg' in i and (i['msg'].startswith("['play'"))]
            for i, txt in enumerate(tP):
                print(txt['created']-tC[i]['created'], txt['msg'], tC[i]['msg'], tR[i]['msg'])


        print('===================')