
265 lines
12 KiB
Raw Normal View History

2019-04-16 17:13:03 +02:00
import logging
import yaml
import os
import json
from hugvey.voice import VoiceStorage
2019-11-15 16:22:15 +01:00
import re
import csv
from operator import indexOf
import operator
2019-04-16 17:13:03 +02:00
logger = logging.getLogger('toolbox')
2019-11-08 11:30:49 +01:00
# From
class DictDiffer(object):
Calculate the difference between two dictionaries as:
(1) items added
(2) items removed
(3) keys same in both but changed values
(4) keys same in both and unchanged values
def __init__(self, current_dict, past_dict):
self.current_dict, self.past_dict = current_dict, past_dict
self.set_current, self.set_past = set(current_dict.keys()), set(past_dict.keys())
self.intersect = self.set_current.intersection(self.set_past)
def added(self):
return self.set_current - self.intersect
def removed(self):
return self.set_past - self.intersect
def changed(self):
return set(o for o in self.intersect if self.past_dict[o] != self.current_dict[o])
def unchanged(self):
return set(o for o in self.intersect if self.past_dict[o] == self.current_dict[o])
2019-04-16 17:13:03 +02:00
class Toolbox:
def __init__(self, configFile):
self.languageFiles = {}
self.languageConfig = {}
with open(configFile, 'r') as fp:
logger.debug('Load config from {}'.format(configFile))
self.config = yaml.safe_load(fp)
self.hugvey_ids = [i + 1 for i in range(self.config['hugveys'])]
voice_dir = os.path.join(self.config['web']['files_dir'], 'voices')
self.voiceStorage = VoiceStorage(voice_dir, self.languageConfig)
def loadLanguages(self):
logger.debug('load language files')
self.languages = {}
for lang in self.config['languages']:
lang_filename = os.path.join(self.config['web']['files_dir'], lang['file'])
self.languageFiles[lang['code']] = lang['file']
self.languageConfig[lang['code']] = lang
with open(lang_filename, 'r') as fp:
self.languages[lang['code']] = json.load(fp)
if lang['token'] == 'LB_TOKEN' or lang['token'] == 'SECRET_KEY':
raise Exception("Are you using the right config file? Language key not configured properly!")
def get_audio_filenames(self):
Get all audio files as defined trough the config.
filenames = [
for langCode in self.languages:'lang {langCode}')
msgs = [node for node in self.languages[langCode] if node['@type'] == 'Msg']
for msg in msgs:
if 'audio' in msg and msg['audio'] is not None:
if '$' in msg['text']:
# skip variable texts
fn = self.voiceStorage.getFilename(langCode, msg['text'], False)
return filenames
def get_existing_filesnames(self):
existing_files = []
for path, subdirs, files in os.walk(self.config['web']['files_dir']):
for name in files:
if name[-4:] == '.wav':
existing_files.append(os.path.join(path, name))
return existing_files
def clean_audio_files(self):
needed_files = self.get_audio_filenames()
existing_files = self.get_existing_filesnames()
# if 'local/voices/en-GB/static/9c/9ce29fe21fa813cca9db94419947238f6f215da1.wav' in needed_files:
# print("GOOO!")
# else:
# print('ojee')
# exit()
for fn in existing_files:
if fn not in needed_files:
logger.warn(f"Remove {fn}")
logger.debug(f"Keep {fn}")
missingFiles = []
for fn in needed_files:
if fn not in existing_files:
#"Missing {fn}")"{} files missing".format(len(missingFiles)))
2019-11-08 11:30:49 +01:00
def fix_story_file(self, lang_code):
if lang_code not in self.languages.keys():
logger.critical("Invalid langauge code")
logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
story = self.languages[lang_code]
beginnings = [item for item in story if 'beginning' in item and item['beginning'] is True]
if len(beginnings) < 1:
logger.critical("No beginning set")
if len(beginnings) > 1:
2019-11-15 16:22:15 +01:00
logger.warn(f"{len(beginnings)} beginning messages configured. Set only one")
2019-11-08 11:30:49 +01:00
itemsPerId = {item['@id']: item for item in story}
for i, item in enumerate(story):
if item['@type'] == 'Direction':
if type(item['source']) == dict:
logger.warn(f"Fixing broken direction {item['@id']}, please check if everything still works!")
validMsg = itemsPerId[item['source']['@id']]
diff = DictDiffer(item['source'], validMsg)
if diff.changed() or diff.added() or diff.removed():
logger.warn("Changes found between messages")
logger.warn(f"Changed: {list(diff.changed())} Keys that will be remove: {list(diff.added())} Keys that will be added: {list(diff.removed())}")"Direction pointed to {item['source']}")"Will now point to {validMsg}")
item['source'] = item['source']['@id']
if item['@type'] == 'Condition':
if item['type'] == 'messagePlayed':
msgId = item['vars']['msgId'].strip()
if msgId not in itemsPerId:
logger.critical(f"Message played condition for non-existing message {msgId}!")
2019-11-15 16:22:15 +01:00
if item['type'] == 'replyContains':
if 'regex' in item['vars'] and len(item['vars']['regex'].rstrip()):
except Exception as e:
logger.critical(f"Invalid regex for condition {item['@id']}: {item['vars']['regex'].rstrip()}")
2019-11-08 11:30:49 +01:00
with open(filename, 'w') as fp:
json.dump(story, fp, indent=2)"Wrote to {filename}")
csv_fieldnames = ['id','type','color','text','regex','to text','translation', 'regex_translation']
def generate_story_csv(self, lang_code):
if lang_code not in self.languages.keys():
logger.critical("Invalid langauge code")
logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
story = self.languages[lang_code]
csv_filename = filename + '.csv'"Write csv of {lang_code} to {csv_filename}")
with open(csv_filename, 'w', newline='') as fp:
writer = csv.DictWriter(fp, fieldnames=self.csv_fieldnames)
msgs = [node for node in story if node['@type'] == 'Msg']
msgs = sorted(msgs, key=lambda m: m['color'] if 'color' in m else '')
for msg in msgs:
writer.writerow({'id': msg['@id'], 'type':'Msg','color':msg['color'] if 'color' in msg else '', 'text': msg['text']})
directions = [subnode for subnode in story if subnode['@type'] == 'Direction' and subnode['source'] == msg['@id']]
for direction in directions:
targetMsg = [subnode for subnode in story if subnode['@id'] == direction['target']][0]
for conditionId in direction['conditions']:
condition = [subnode for subnode in story if subnode['@id'] == conditionId][0]
if condition['type'] == 'replyContains' and len(condition['vars']['regex']) > 0:
text = condition['vars']['regex']
writer.writerow({'id': condition['@id'], 'type':'Condition', 'regex': text, 'to text': targetMsg['text']})
diversions = [node for node in story if node['@type'] == 'Diversion' and node['type'] == 'reply_contains']
for diversion in diversions:
if len(diversion['params']['regex']) < 1:
writer.writerow({'id': diversion['@id'], 'type':'Diversion','regex': diversion['params']['regex']})"Done")
2019-11-08 11:30:49 +01:00
def import_story_csv(self, lang_code, csv_filename):
if lang_code not in self.languages.keys():
logger.critical("Invalid langauge code")
logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
story = self.languages[lang_code]"Writing translation from {csv_filename} to {filename}")
with open(csv_filename, 'r') as fp:
reader = csv.DictReader(fp)
if 'id' not in reader.fieldnames or 'translation' not in reader.fieldnames or 'regex_translation' not in reader.fieldnames or 'text' not in reader.fieldnames:
raise Exception("Not all required fieldnames are given in csv: id, translation, regex_translation")
for row in reader:
node = [node for node in story if node['@id'] == row['id']][0]
if node['@type'] == 'Msg':
if len(row['translation']) < 1 and len(node['text']) > 0:
logger.warning(f"Skipping empty translation for message {node['@id']} \"{node['text']}\"")
if 'label' not in node or (len(node['label']) < 1 and node['text'] == row['text']):
node['label'] = row['text'] # store original text as label for readability
node['text'] = row['translation']
elif node['@type'] == 'Condition':
if len(row['regex_translation']) < 1 and len(node['vars']['regex']) > 0:
logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['vars']['regex']}\"")
node['vars']['regex'] = row['regex_translation']
elif node['@type'] == 'Diversion':
if len(row['regex_translation']) < 1 and len(node['params']['regex']) > 0:
logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['params']['regex']}\"")
node['params']['regex'] = row['regex_translation']
raise Exception(f"Unknown type: {row}")
with open(filename, 'w') as fp:
json.dump(story, fp, indent=2)"Wrote to {filename}")
2019-04-16 17:13:03 +02:00