Generate and create CSVs for easier translation

2019-11-29 20:34:31 +01:00 · 2019-11-29 20:34:31 +01:00 · cc1d33c831
commit cc1d33c831
parent d65a79d378
2 changed files with 112 additions and 1 deletions
--- a/hugvey/tools.py
+++ b/hugvey/tools.py
@ -4,6 +4,9 @@ import os
 import json
 from hugvey.voice import VoiceStorage
 import re
+import csv
+from operator import indexOf
+import operator

 logger = logging.getLogger('toolbox')

@ -165,5 +168,96 @@ class Toolbox:
        with open(filename, 'w') as fp:
            json.dump(story, fp, indent=2)
            logger.info(f"Wrote to {filename}")
+    
+    csv_fieldnames = ['id','type','color','text','regex','to text','translation', 'regex_translation']
+    
+    def generate_story_csv(self, lang_code):
+        if lang_code not in self.languages.keys():
+            logger.critical("Invalid langauge code")
+            logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
+            return
        
        
+        filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
+        story = self.languages[lang_code]
+        csv_filename = filename + '.csv'
+        
+        
+        logger.info(f"Write csv of {lang_code} to {csv_filename}")
+        with open(csv_filename, 'w', newline='') as fp:
+            writer = csv.DictWriter(fp, fieldnames=self.csv_fieldnames)
+            
+            writer.writeheader()
+            msgs = [node for node in story if node['@type'] == 'Msg']
+            msgs = sorted(msgs, key=lambda m: m['color'] if 'color' in m else '')
+            for msg in msgs:
+                writer.writerow({'id': msg['@id'], 'type':'Msg','color':msg['color'] if 'color' in msg else '', 'text': msg['text']})
+                
+                directions = [subnode for subnode in story if subnode['@type'] == 'Direction' and subnode['source'] == msg['@id']]
+                for direction in directions:
+                    targetMsg = [subnode for subnode in story if subnode['@id'] == direction['target']][0]
+                    for conditionId in direction['conditions']:
+                        condition = [subnode for subnode in story if subnode['@id'] == conditionId][0]
+                        if condition['type'] == 'replyContains' and len(condition['vars']['regex']) > 0:
+                            text = condition['vars']['regex']
+                        else:
+                            continue
+                        
+                        writer.writerow({'id': condition['@id'], 'type':'Condition', 'regex': text, 'to text': targetMsg['text']})
+                        
+            diversions = [node for node in story if node['@type'] == 'Diversion' and node['type'] == 'reply_contains']
+            for diversion in diversions:
+                if len(diversion['params']['regex']) < 1:
+                    continue
+                
+                writer.writerow({'id': diversion['@id'], 'type':'Diversion','regex': diversion['params']['regex']})
+            
+        logger.info(f"Done")
+        
+    def import_story_csv(self, lang_code, csv_filename):
+        if lang_code not in self.languages.keys():
+            logger.critical("Invalid langauge code")
+            logger.warn(f"Valid codes are {' '.join(self.languages.keys())}")
+            return
+        
+        
+        filename = os.path.join(self.config['web']['files_dir'],self.languageFiles[lang_code])
+        story = self.languages[lang_code]
+        
+        logger.info(f"Writing translation from {csv_filename} to {filename}")
+        
+        
+        with open(csv_filename, 'r') as fp:
+            reader = csv.DictReader(fp)
+            logger.info(reader.fieldnames)
+            if 'id' not in reader.fieldnames or 'translation' not in reader.fieldnames or 'regex_translation' not in reader.fieldnames:
+                raise Exception("Not all required fieldnames are given in csv: id, translation, regex_translation")
+            
+            for row in reader:
+                node = [node for node in story if node['@id'] == row['id']][0]
+                if node['@type'] == 'Msg':
+                    if len(row['translation']) < 1 and len(node['text']) > 0:
+                        logger.warning(f"Skipping empty translation for message {node['@id']} \"{node['text']}\"")
+                        continue
+                    
+                    node['text'] = row['translation']
+                elif node['@type'] == 'Condition':
+                    if len(row['regex_translation']) < 1 and len(node['vars']['regex']) > 0:
+                        logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['vars']['regex']}\"")
+                        continue
+                    
+                    node['vars']['regex'] = row['regex_translation']
+                elif node['@type'] == 'Diversion':
+                    if len(row['regex_translation']) < 1 and len(node['params']['regex']) > 0:
+                        logger.warning(f"Skipping empty translation for regex {node['@id']} \"{node['params']['regex']}\"")
+                        continue
+                    
+                    node['params']['regex'] = row['regex_translation']
+                else:
+                    raise Exception(f"Unknown type: {row}")
+        
+        with open(filename, 'w') as fp:
+            json.dump(story, fp, indent=2)
+            logger.info(f"Wrote to {filename}")
+            
+        
--- a/tools.py
+++ b/tools.py
@ -27,6 +27,19 @@ if __name__ == '__main__':
            help="Find/restore issues with the story file",
            metavar="LANG_CODE"
        )
+    argParser.add_argument(
+            '--csv',
+            default=None,
+            help="Generate a csv",
+            metavar="LANG_CODE"
+        )
+    argParser.add_argument(
+            '--import_csv',
+            default=None,
+            help="Import a csv file",
+            metavar=("LANG_CODE", "CVS_FILE"),
+            nargs=2
+        )

    args = argParser.parse_args()

@ -43,4 +56,8 @@ if __name__ == '__main__':
        logger.info("Filenames")
        tools.clean_audio_files()
    if args.story:
-        tools.fix_story_file(args.story)
+        tools.fix_story_file(args.story)
+    if args.csv:
+        tools.generate_story_csv(args.csv)
+    if args.import_csv:
+        tools.import_story_csv(*args.import_csv)