From cc1d33c8313041d0a98c8bfbde660dd998d9f18a Mon Sep 17 00:00:00 2001 From: Ruben van de Ven Date: Fri, 29 Nov 2019 20:34:31 +0100 Subject: [PATCH] Generate and create CSVs for easier translation --- hugvey/tools.py | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ tools.py | 19 +++++++++- 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/hugvey/tools.py b/hugvey/tools.py index 6793ee6..fb0ad8a 100644 --- a/hugvey/tools.py +++ b/hugvey/tools.py @@ -4,6 +4,9 @@ import os import json from hugvey.voice import VoiceStorage import re +import csv +from operator import indexOf +import operator logger = logging.getLogger('toolbox') @@ -165,5 +168,96 @@ class Toolbox: with open(filename, 'w') as fp: json.dump(story, fp, indent=2) logger.info(f"Wrote to {filename}") + + csv_fieldnames = ['id','type','color','text','regex','to text','translation', 'regex_translation'] + + def generate_story_csv(self, lang_code): + if lang_code not in self.languages.keys(): + logger.critical("Invalid langauge code") + logger.warn(f"Valid codes are {' '.join(self.languages.keys())}") + return + filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code]) + story = self.languages[lang_code] + csv_filename = filename + '.csv' + + + logger.info(f"Write csv of {lang_code} to {csv_filename}") + with open(csv_filename, 'w', newline='') as fp: + writer = csv.DictWriter(fp, fieldnames=self.csv_fieldnames) + + writer.writeheader() + msgs = [node for node in story if node['@type'] == 'Msg'] + msgs = sorted(msgs, key=lambda m: m['color'] if 'color' in m else '') + for msg in msgs: + writer.writerow({'id': msg['@id'], 'type':'Msg','color':msg['color'] if 'color' in msg else '', 'text': msg['text']}) + + directions = [subnode for subnode in story if subnode['@type'] == 'Direction' and subnode['source'] == msg['@id']] + for direction in directions: + targetMsg = [subnode for subnode in story if subnode['@id'] == direction['target']][0] + for conditionId in direction['conditions']: + condition = [subnode for subnode in story if subnode['@id'] == conditionId][0] + if condition['type'] == 'replyContains' and len(condition['vars']['regex']) > 0: + text = condition['vars']['regex'] + else: + continue + + writer.writerow({'id': condition['@id'], 'type':'Condition', 'regex': text, 'to text': targetMsg['text']}) + + diversions = [node for node in story if node['@type'] == 'Diversion' and node['type'] == 'reply_contains'] + for diversion in diversions: + if len(diversion['params']['regex']) < 1: + continue + + writer.writerow({'id': diversion['@id'], 'type':'Diversion','regex': diversion['params']['regex']}) + + logger.info(f"Done") + + def import_story_csv(self, lang_code, csv_filename): + if lang_code not in self.languages.keys(): + logger.critical("Invalid langauge code") + logger.warn(f"Valid codes are {' '.join(self.languages.keys())}") + return + + + filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code]) + story = self.languages[lang_code] + + logger.info(f"Writing translation from {csv_filename} to {filename}") + + + with open(csv_filename, 'r') as fp: + reader = csv.DictReader(fp) + logger.info(reader.fieldnames) + if 'id' not in reader.fieldnames or 'translation' not in reader.fieldnames or 'regex_translation' not in reader.fieldnames: + raise Exception("Not all required fieldnames are given in csv: id, translation, regex_translation") + + for row in reader: + node = [node for node in story if node['@id'] == row['id']][0] + if node['@type'] == 'Msg': + if len(row['translation']) < 1 and len(node['text']) > 0: + logger.warning(f"Skipping empty translation for message {node['@id']} \"{node['text']}\"") + continue + + node['text'] = row['translation'] + elif node['@type'] == 'Condition': + if len(row['regex_translation']) < 1 and len(node['vars']['regex']) > 0: + logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['vars']['regex']}\"") + continue + + node['vars']['regex'] = row['regex_translation'] + elif node['@type'] == 'Diversion': + if len(row['regex_translation']) < 1 and len(node['params']['regex']) > 0: + logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['params']['regex']}\"") + continue + + node['params']['regex'] = row['regex_translation'] + else: + raise Exception(f"Unknown type: {row}") + + with open(filename, 'w') as fp: + json.dump(story, fp, indent=2) + logger.info(f"Wrote to {filename}") + + diff --git a/tools.py b/tools.py index db712fc..0f2b3a8 100644 --- a/tools.py +++ b/tools.py @@ -27,6 +27,19 @@ if __name__ == '__main__': help="Find/restore issues with the story file", metavar="LANG_CODE" ) + argParser.add_argument( + '--csv', + default=None, + help="Generate a csv", + metavar="LANG_CODE" + ) + argParser.add_argument( + '--import_csv', + default=None, + help="Import a csv file", + metavar=("LANG_CODE", "CVS_FILE"), + nargs=2 + ) args = argParser.parse_args() @@ -43,4 +56,8 @@ if __name__ == '__main__': logger.info("Filenames") tools.clean_audio_files() if args.story: - tools.fix_story_file(args.story) \ No newline at end of file + tools.fix_story_file(args.story) + if args.csv: + tools.generate_story_csv(args.csv) + if args.import_csv: + tools.import_story_csv(*args.import_csv) \ No newline at end of file